These documents are For the HEAD of the CVS repository on July 19, 2007 Api docs for previous releases

Modware::Feature

CONTIG

Summary Included libraries Package variables Synopsis Description General documentation Methods

Summary
   Modware::Feature::CONTIG - Modware representation of a contig
Package variables top
No package variables defined.
Included modulestop
Bio::SeqFeature::Generic
Bio::SeqIO
Data::Dumper
Modware::Feature::GENERIC
Modware::Search::Gene
Inherit top
Modware::Feature::GENERIC
Synopsistop
  NEVER INSTANTIATE THIS OBJECT, USE Modware::Feature

USE CASE: write a genbank file WITH FEATURE ANNOTATIONS

use Modware::Feature;
my $feat = new Modware::Feature( -primary_id => SOME_CONTIG_PRIMARY_ID );

$feat->to_file( -format => 'genbank', -file => '>CONTIG_DUMP.gb' );
Descriptiontop
  If you want to create a standalone contig attached to a chromosome,

my $feat = new Modware::Feature( -primary_id => 'some primary_id' );

When the contig is created, the bioperl method returns a Bio::SeqFeature::Generic object attached to a
Bio::Seq object representing the chromosome. All features are relative to the chromosome.

If you would like to transform to 'relative' coordinates, use the method 'float'.

to_file takes the same arugments as Bio::SeqIO (i.e. -format, and -file ).
First it 'floats' the contig through $contig->float(). It fetches fresh
copies of all of the cds features and manipulates them so that the bioperl objects contain
the correct annotation information and location formats that SeqIO uses to write the object.
After writing it erases the modified features so that fresh copies are fetched next time they are requested.
Methodstop
_get_genesDescriptionCode
_initDescriptionCode
cds_featuresDescriptionCode
check_straddlersDescriptionCode
floatDescriptionCode
genesDescriptionCode
to_apollo_fileNo descriptionCode
to_fileDescriptionCode

Methods description

_get_genescodetopprevnext
 Title    : _get_genes
Function : fetches all genes on this contig
: onto the bioperl object
Returns : nothing
Args : none
_initcodetopprevnext
 Title    : _init
Note : sets attributes specific to Contig features
Usage : called internally by new
Function :
Returns : nothing
Args : none
cds_featurescodetopprevnext
 Title    : cds_features
Note : Fetches all CDS features located on this contig
Usage : To print the primary_id of all CDS features associated with this feature
: print map { print $_->primary_id()."\n" } @{ $self->cds_features() };
Function : gets/sets the cds_features array of the gene
Returns : string
Args : optional: array of features object
check_straddlerscodetopprevnext
 Title    : check_straddlers
Note : Fetches all CDS features that overlap a boundary of this contig
Usage : To print the primary_id of all CDS features straddling this contig
: print map { print $_->primary_id()."\n" } $self->check_straddlerss();
Returns : array of features
Args : optional: none
floatcodetopprevnext
 Title    : float
Note : creates 'free contig'. bioperl seq object is the sequence of the contig and
: the features on it become relative to the contig not the whole
: chromosome; chromosome_no gets set to undef until it is added to some chromosome
Returns : self
Args : optional: none
genescodetopprevnext
 Title    : genes
Note : Fetches all gene objects with features located on this contig
Usage : To print the name of all genes on this contig
: print map { print $_->name()."\n" } @{ $self->genes() };
Returns : array of gene objects
Args : nothing
to_filecodetopprevnext
 Title    : to_file
Usage : $feat->to_file( -format => 'genbank', -file => '>CONTIG_DUMP.gb' );
Function : writes the contig WITH ANNOTATIONS (cds's) in any format that
: Bio::SeqIO can write.
Returns : $self
Args : named:
: -format => any format that Bio::SeqIO will write
: -file => optional, must be read/write. If this is not passed,
: writes to STDOUT

Methods code

_get_genesdescriptiontopprevnext
sub _get_genes {
   my ($self) = @_;

   my @genes = grep { ( $_->start() >= $self->start() ) && ( $_->end() <= $self->end() ) } Modware::Search::Gene->Search_overlapping_by_range( $self->reference_feature(), $self->start(), $self->end() );

   my @feats = grep { $_->type() eq "mRNA" } map{ @{ $_->features() } } @genes;

   $self->genes(\@ genes );
   $self->cds_features(\@ feats );
}
_initdescriptiontopprevnext
sub _init {
   my ($self, @args) = @_;
   $self->type             ( 'contig'     );
   $self->source( "Sequencing Center" ) if (!$self->source());
}
cds_featuresdescriptiontopprevnext
sub cds_features {
   my ($self, $obj) = @_;

  #
# fetches cds_features from database (_get_cds_features) if cds_features is not yet defined
# and the user is not attempting to set the cds_features
#
exists $self->{_cds_features} || scalar @_ > 1 || $self->{genes} || $self->genes(); if(scalar @_ > 1) { $self->{_cds_features} = $obj; foreach my $feature ( @$obj ) { eval { $self->bioperl( $feature->bioperl ); }; if ( $@ ) { die "ERROR with ".$feature->primary_id()."\n$@"; } } my $seq_obj = $self->reference_feature() ? $self->reference_feature->bioperl : $self->bioperl(); map{ $seq_obj->add_SeqFeature( $_->bioperl ); } @$obj; } return $self->{_cds_features};
}
check_straddlersdescriptiontopprevnext
sub check_straddlers {
   my ($self, $obj) = @_;

   my @feats = grep { $_->type eq "mRNA" } map { new Modware::Feature( -feature_id => $_ ) } @{ $self->reference_feature( $self->start(), $self->end() ) };

   return @feats;
}
floatdescriptiontopprevnext
sub float {
   my ($self, $obj) = @_;

  #  pull cds_features off of chromosome via bioperl
#
my $features = $self->cds_features(); map { $_->reference_feature( undef ); $_->bioperl->{_gsf_seq} = undef; } @$features; #
# replace Bio::Seq object with contig sequence instead by first
# 1. converting features to contig coordinates,
# 2. attach the contig sequence to the bioperl object (instead of chromosome)
# 3. removing contig from chromosome
#
my $offset = -1*$self->start() + 1; my $seq = new Bio::Seq( -seq => $self->bioperl->seq->seq ); my $bioperl = $self->bioperl; $bioperl->start( $bioperl->start() + $offset ); $bioperl->end ( $bioperl->end() + $offset ); $self->bioperl( $seq ); foreach my $feature ( @{ $self->cds_features() } ) { $feature->shift_feature( $offset ); } map { $self->bioperl($_->bioperl); } @$features; $self->reference_feature( undef ); return $self;
}
genesdescriptiontopprevnext
sub genes {
   my ($self, $obj) = @_;

  #
# fetches genes from database (_get_genes) if genes is not yet defined
# and the user is not attempting to set the cds_features
#
exists $self->{genes} || scalar @_ > 1 || $self->_get_genes(); if(scalar @_ > 1) { $self->{genes} = $obj; } return $self->{genes};
}
to_apollo_filedescriptiontopprevnext
sub to_apollo_file {
   my ($self, @args) = @_;

  #
# 'reset' cds_features and genes, to make sure we have a fresh copy from the database
#
delete $self->{genes}; delete $self->{cds_features}; my $seq_writer = new Bio::SeqIO( @args ); $seq_writer->start_l($self->start()); $seq_writer->end_l($self->end()); my $seqobj = $self->bioperl(); $seqobj->display_id( $self->primary_id ); my @feature_array; my $source_feat = new Bio::SeqFeature::Generic( -start => 1, -end => $seqobj->length() ); $source_feat->primary_tag( 'source' ); $source_feat->add_tag_value( 'organism', 'Dictyostelium discoideum'); $source_feat->add_tag_value( 'mol_type', 'genomic DNA'); $source_feat->add_tag_value( 'chromosome', $self->reference_feature()); my $features = $self->cds_features(); my @feats = $seqobj->remove_SeqFeatures(); $seqobj->add_SeqFeature($source_feat); foreach my $feature (@$features) { if($feature->source() =~ /curat/i) { $feature->bioperl("CDS"); my $loc = new Bio::Location::Split(); map { $loc->add_sub_Location( $_->location ) } $feature->bioperl(); $feature->bioperl( $loc ); $feature->bioperl( 'locus_tag', $feature->gene() ); $feature->bioperl( 'primary_id', $feature->primary_id() ); $seqobj->add_SeqFeature($feature->bioperl()); } # else
# {
#
# $feature->bioperl->primary_tag("GENE PREDICTION");
# my $loc = new Bio::Location::Split();
# map { $loc->add_sub_Location( $_->location ) } $feature->bioperl->exons();
# $feature->bioperl->location( $loc );
# $feature->bioperl->add_tag_value( 'locus_tag', $feature->gene->name() );
# $feature->bioperl->add_tag_value( 'primary_id', $feature->primary_id() );
# $seqobj->add_SeqFeature($feature->bioperl());
# }
} $seq_writer->write_seq( $seqobj ); delete $self->{genes}; delete $self->{cds_features}; return $self;
}
to_filedescriptiontopprevnext
sub to_file {
   my ($self, @args) = @_;

  #
# 'reset' cds_features and genes, to make sure we have a fresh copy from the database
#
delete $self->{genes}; delete $self->{cds_features}; $self->float(); #
# here we have to manipulate the bioperl object to be
# what we expect it to be for the writer:
#
map { #
# to write 'cds' feature, need a split location
# so create a split location for each cds feature out of the array of
# Bio::Location::Simple locations returned by 'exons' method
#
my $loc = new Bio::Location::Split(); map { $loc->add_sub_Location( $_->location ) } $_->bioperl(); $_->bioperl( $loc ); # $_->bioperl->primary_tag('gene');
# $_->bioperl->add_tag_value( 'gene', $_->gene->name() );
# $_->bioperl->add_tag_value( 'locus_tag', $_->gene->name() );
#
# $_->bioperl->primary_tag('mRNA');
# $_->bioperl->add_tag_value( 'locus_tag', $_->gene->name() );
$_->bioperl('CDS'); #
# now populate the 'tags' of the bioperl object for each feature
#
$_->bioperl( 'protein_id', $_->primary_id() ); $_->bioperl( 'locus_tag', $_->gene() ); # $_->bioperl->add_tag_value('product', $_->gene->gene_products() );
# $_->bioperl->add_tag_value('prot_desc', $_->gene->name_description() );
} @{ $self->cds_features() }; my $seq_writer = new Bio::SeqIO( @args ); $seq_writer->{'start_l'} = $self->start(); $seq_writer->{'end_l'} = $self->end(); my $seqobj = $self->bioperl(); $seqobj->display_id( $self->primary_id ); #
# need a 'source' feature which spans entire sequence
#
my $source_feat = new Bio::SeqFeature::Generic( -start => 1, -end => $seqobj->length() ); $source_feat->primary_tag( 'source' ); #
# have to add the $source_feat AT THE BEGINNING OF THE ARRAY
# so its written first. The method used here is the 'correct'
# way to do that using public mehtods. If its a performance hindrance
# use :
#
# unshift(@{$seqobj->{'_as_feat'}},$source_feat);
#
my @feats = $seqobj->remove_SeqFeatures(); unshift(@feats,$source_feat); map { $seqobj->add_SeqFeature( $_ ) } @feats; $seq_writer->write_seq( $seqobj ); #
# 'reset' cds_features and genes, to make sure we have a fresh copy from the database
#
delete $self->{genes}; delete $self->{cds_features}; return $self;
}

General documentation

AUTHOR - Eric Just top
   Eric Just e-just@northwestern.edu
APPENDIX top
   The rest of the documentation details each of the object
methods. Internal methods are usually preceded with a _