These documents are For the HEAD of the CVS repository on July 19, 2007
Api docs for previous releases
Modware::Feature
CONTIG
Modware::Feature::CONTIG - Modware representation of a contig
|
No package variables defined. |
NEVER INSTANTIATE THIS OBJECT, USE Modware::Feature
USE CASE: write a genbank file WITH FEATURE ANNOTATIONS
use Modware::Feature; my $feat = new Modware::Feature( -primary_id => SOME_CONTIG_PRIMARY_ID );
$feat->to_file( -format => 'genbank', -file => '>CONTIG_DUMP.gb' );
|
If you want to create a standalone contig attached to a chromosome,
my $feat = new Modware::Feature( -primary_id => 'some primary_id' );
When the contig is created, the bioperl method returns a Bio::SeqFeature::Generic object attached to a Bio::Seq object representing the chromosome. All features are relative to the chromosome.
If you would like to transform to 'relative' coordinates, use the method 'float'.
to_file takes the same arugments as Bio::SeqIO (i.e. -format, and -file ). First it 'floats' the contig through $contig->float(). It fetches fresh copies of all of the cds features and manipulates them so that the bioperl objects contain the correct annotation information and location formats that SeqIO uses to write the object. After writing it erases the modified features so that fresh copies are fetched next time they are requested.
|
Methods description
Title : _get_genes
Function : fetches all genes on this contig
: onto the bioperl object
Returns : nothing
Args : none
Title : _init
Note : sets attributes specific to Contig features
Usage : called internally by new
Function :
Returns : nothing
Args : none
Title : cds_features
Note : Fetches all CDS features located on this contig
Usage : To print the primary_id of all CDS features associated with this feature
: print map { print $_->primary_id()."\n" } @{ $self->cds_features() };
Function : gets/sets the cds_features array of the gene
Returns : string
Args : optional: array of features object
Title : check_straddlers
Note : Fetches all CDS features that overlap a boundary of this contig
Usage : To print the primary_id of all CDS features straddling this contig
: print map { print $_->primary_id()."\n" } $self->check_straddlerss();
Returns : array of features
Args : optional: none
Title : float
Note : creates 'free contig'. bioperl seq object is the sequence of the contig and
: the features on it become relative to the contig not the whole
: chromosome; chromosome_no gets set to undef until it is added to some chromosome
Returns : self
Args : optional: none
Title : genes
Note : Fetches all gene objects with features located on this contig
Usage : To print the name of all genes on this contig
: print map { print $_->name()."\n" } @{ $self->genes() };
Returns : array of gene objects
Args : nothing
Title : to_file
Usage : $feat->to_file( -format => 'genbank', -file => '>CONTIG_DUMP.gb' );
Function : writes the contig WITH ANNOTATIONS (cds's) in any format that
: Bio::SeqIO can write.
Returns : $self
Args : named:
: -format => any format that Bio::SeqIO will write
: -file => optional, must be read/write. If this is not passed,
: writes to STDOUT
Methods code
sub _get_genes
{ my ($self) = @_;
my @genes = grep { ( $_->start() >= $self->start() ) && ( $_->end() <= $self->end() ) } Modware::Search::Gene->Search_overlapping_by_range( $self->reference_feature(), $self->start(), $self->end() );
my @feats = grep { $_->type() eq "mRNA" } map{ @{ $_->features() } } @genes;
$self->genes(\@ genes );
$self->cds_features(\@ feats );
}
sub _init
{ my ($self, @args) = @_;
$self->type ( 'contig' );
$self->source( "Sequencing Center" ) if (!$self->source());
}
sub cds_features
{ my ($self, $obj) = @_;
#
# fetches cds_features from database (_get_cds_features) if cds_features is not yet defined
# and the user is not attempting to set the cds_features
#
exists $self->{_cds_features} || scalar @_ > 1 || $self->{genes} || $self->genes();
if(scalar @_ > 1) {
$self->{_cds_features} = $obj;
foreach my $feature ( @$obj ) {
eval {
$self->bioperl( $feature->bioperl );
};
if ( $@ ) {
die "ERROR with ".$feature->primary_id()."\n$@";
}
}
my $seq_obj = $self->reference_feature() ? $self->reference_feature->bioperl : $self->bioperl();
map{ $seq_obj->add_SeqFeature( $_->bioperl ); } @$obj;
}
return $self->{_cds_features};
}
sub check_straddlers
{ my ($self, $obj) = @_;
my @feats = grep { $_->type eq "mRNA" } map { new Modware::Feature( -feature_id => $_ ) } @{ $self->reference_feature( $self->start(), $self->end() ) };
return @feats;
}
sub float
{ my ($self, $obj) = @_;
# pull cds_features off of chromosome via bioperl
#
my $features = $self->cds_features();
map { $_->reference_feature( undef ); $_->bioperl->{_gsf_seq} = undef; } @$features;
#
# replace Bio::Seq object with contig sequence instead by first
# 1. converting features to contig coordinates,
# 2. attach the contig sequence to the bioperl object (instead of chromosome)
# 3. removing contig from chromosome
#
my $offset = -1*$self->start() + 1;
my $seq = new Bio::Seq( -seq => $self->bioperl->seq->seq );
my $bioperl = $self->bioperl;
$bioperl->start( $bioperl->start() + $offset );
$bioperl->end ( $bioperl->end() + $offset );
$self->bioperl( $seq );
foreach my $feature ( @{ $self->cds_features() } ) {
$feature->shift_feature( $offset );
}
map {
$self->bioperl($_->bioperl);
} @$features;
$self->reference_feature( undef );
return $self;
}
sub genes
{ my ($self, $obj) = @_;
#
# fetches genes from database (_get_genes) if genes is not yet defined
# and the user is not attempting to set the cds_features
#
exists $self->{genes} || scalar @_ > 1 || $self->_get_genes();
if(scalar @_ > 1) {
$self->{genes} = $obj;
}
return $self->{genes};
}
sub to_apollo_file
{
my ($self, @args) = @_;
#
# 'reset' cds_features and genes, to make sure we have a fresh copy from the database
#
delete $self->{genes};
delete $self->{cds_features};
my $seq_writer = new Bio::SeqIO( @args );
$seq_writer->start_l($self->start());
$seq_writer->end_l($self->end());
my $seqobj = $self->bioperl();
$seqobj->display_id( $self->primary_id );
my @feature_array;
my $source_feat = new Bio::SeqFeature::Generic( -start => 1, -end => $seqobj->length() );
$source_feat->primary_tag( 'source' );
$source_feat->add_tag_value( 'organism', 'Dictyostelium discoideum');
$source_feat->add_tag_value( 'mol_type', 'genomic DNA');
$source_feat->add_tag_value( 'chromosome', $self->reference_feature());
my $features = $self->cds_features();
my @feats = $seqobj->remove_SeqFeatures();
$seqobj->add_SeqFeature($source_feat);
foreach my $feature (@$features)
{
if($feature->source() =~ /curat/i)
{
$feature->bioperl("CDS");
my $loc = new Bio::Location::Split();
map { $loc->add_sub_Location( $_->location ) } $feature->bioperl();
$feature->bioperl( $loc );
$feature->bioperl( 'locus_tag', $feature->gene() );
$feature->bioperl( 'primary_id', $feature->primary_id() );
$seqobj->add_SeqFeature($feature->bioperl());
}
# else
# {
#
# $feature->bioperl->primary_tag("GENE PREDICTION");
# my $loc = new Bio::Location::Split();
# map { $loc->add_sub_Location( $_->location ) } $feature->bioperl->exons();
# $feature->bioperl->location( $loc );
# $feature->bioperl->add_tag_value( 'locus_tag', $feature->gene->name() );
# $feature->bioperl->add_tag_value( 'primary_id', $feature->primary_id() );
# $seqobj->add_SeqFeature($feature->bioperl());
# }
}
$seq_writer->write_seq( $seqobj );
delete $self->{genes};
delete $self->{cds_features};
return $self;
}
sub to_file
{ my ($self, @args) = @_;
#
# 'reset' cds_features and genes, to make sure we have a fresh copy from the database
#
delete $self->{genes};
delete $self->{cds_features};
$self->float();
#
# here we have to manipulate the bioperl object to be
# what we expect it to be for the writer:
#
map {
#
# to write 'cds' feature, need a split location
# so create a split location for each cds feature out of the array of
# Bio::Location::Simple locations returned by 'exons' method
#
my $loc = new Bio::Location::Split();
map { $loc->add_sub_Location( $_->location ) } $_->bioperl();
$_->bioperl( $loc );
# $_->bioperl->primary_tag('gene');
# $_->bioperl->add_tag_value( 'gene', $_->gene->name() );
# $_->bioperl->add_tag_value( 'locus_tag', $_->gene->name() );
#
# $_->bioperl->primary_tag('mRNA');
# $_->bioperl->add_tag_value( 'locus_tag', $_->gene->name() );
$_->bioperl('CDS');
#
# now populate the 'tags' of the bioperl object for each feature
#
$_->bioperl( 'protein_id', $_->primary_id() );
$_->bioperl( 'locus_tag', $_->gene() );
# $_->bioperl->add_tag_value('product', $_->gene->gene_products() );
# $_->bioperl->add_tag_value('prot_desc', $_->gene->name_description() );
} @{ $self->cds_features() };
my $seq_writer = new Bio::SeqIO( @args );
$seq_writer->{'start_l'} = $self->start();
$seq_writer->{'end_l'} = $self->end();
my $seqobj = $self->bioperl();
$seqobj->display_id( $self->primary_id );
#
# need a 'source' feature which spans entire sequence
#
my $source_feat = new Bio::SeqFeature::Generic( -start => 1, -end => $seqobj->length() );
$source_feat->primary_tag( 'source' );
#
# have to add the $source_feat AT THE BEGINNING OF THE ARRAY
# so its written first. The method used here is the 'correct'
# way to do that using public mehtods. If its a performance hindrance
# use :
#
# unshift(@{$seqobj->{'_as_feat'}},$source_feat);
#
my @feats = $seqobj->remove_SeqFeatures();
unshift(@feats,$source_feat);
map { $seqobj->add_SeqFeature( $_ ) } @feats;
$seq_writer->write_seq( $seqobj );
#
# 'reset' cds_features and genes, to make sure we have a fresh copy from the database
#
delete $self->{genes};
delete $self->{cds_features};
return $self;
}
General documentation
Copyright © 2006, Northwestern University
All rights reserved.
|
|