These documents are For the HEAD of the CVS repository on July 19, 2007
Api docs for previous releases
Modware::Feature
Segment
Modware::Feature::Segment - Modware representation of arbitrary segment of some reference feature.
|
No package variables defined. |
NEVER INSTANTIATE THIS OBJECT, USE Modware::Segment
USE CASE: write a genbank file WITH FEATURE ANNOTATIONS given a chromosome and start and end coordinats.
use Modware::Feature::Segment; my $feat = new Modware::Feature::Segment( -CHROMOSOME_NAME => "2", -START => 1031 -END => 2000 );
$feat->to_file( -format => 'genbank', -file => '>SEGMENT_DUMP.gb' );
|
If you want to create a Segement attached to a chromosome,
my $feat = new Modware::Segment( -CHROMOSOME_NAME => "2", -START => 1031 -END => 2000 );
When the contig is created, the bioperl method returns a Bio::SeqFeature::Generic object attached to a Bio::Seq object representing the chromosome. All features are relative to the chromosome.
If you would like to transform to 'relative' coordinates, use the method 'float'.
to_file takes the same arugments as Bio::SeqIO (i.e. -format, and -file ). First it 'floats' the contig through $contig->float(). It fetches fresh copies of all of the cds features and manipulates them so that the bioperl objects contain the correct annotation information and location formats that SeqIO uses to write the object. After writing it erases the modified features so that fresh copies are fetched next time they are requested.
|
Methods description
Title : _get_bioperl
Note : creates a bioperl object representing this Segment (Bio::SeqFeature::Generic)
Usage : called internally by lazy evaluated 'bioperl' method
Function : creates a bioperl object with a location on the chromosome's bioperl object.
Returns : nothing
Args : none
Title : _get_reference_feature
Usage : $feature->_get_reference_feature();
Function : gets the reference_feature object for this feature (called from reference_feature)
Returns : nothing
Args : none
Title : bioperl
Note : Fetches the bioperl object associated with this feature
: This is lazy evaluated
: each feature type will have its own _get_bioperl method
Usage : To print the bioperl name of the bioperl that this feature belongs to
: print $self->bioperl->bioperl_name();
Function : gets/sets the bioperl attribute of the feature
Returns : a Bio::SeqFeatureI implementing object
Args : optional: bioperl object
Title : float
Note : creates 'free contig'. bioperl seq object is the sequence of the contig and
: the features on it become relative to the contig not the whole
: chromosome; chromosome_no gets set to undef until it is added to some chromosome
Returns : self
Args : optional: none
Title : reference_feature
Note : Fetches the reference_feature object associated with this feature
: This is lazy evaluated
Usage : to print reference_feature name
: print $feature->reference_feature->name();
Function : gets/sets the reference_feature attribute of the feature
Returns : Modware::Feature::reference_feature
Args : optional: Modware::Feature::reference_feature object
Title : to_file
Usage : $feat->to_file( -format => 'genbank', -file => '>CONTIG_DUMP.gb' );
Function : writes the contig WITH ANNOTATIONS (cds's) in any format that
: Bio::SeqIO can write.
Returns : $self
Args : named:
: -format => any format that Bio::SeqIO will write
: -file => optional, must be read/write. If this is not passed,
: writes to STDOUT
Methods code
sub _get_bioperl
{ my ($self, @args) = @_;
my $bioperl = new Bio::SeqFeature::Generic();
$bioperl->start( $self->start() );
$bioperl->end( $self->end() );
$bioperl->primary_tag( 'Segment' );
my $chromosome = new Modware::Chromosome( -name => $self->reference_name());
#
# returning a Segment object having a chromosome attached to it
# to chromosome ( in _get_contig_features() )
# results in weird behavior: if there are more than 8 contigs in the
# array the code breaks, but does not throw an error.
#
$bioperl->attach_seq( $chromosome->bioperl );
$self->bioperl( $bioperl );
}
sub _get_reference_feature
{ my ($self) = @_;
my $chr_object = new Modware::Chromosome( -name => $self->reference_name() );
$self->reference_feature( $chr_object );
}
sub _update_cached_sequence
{ my ($self, $obj) = @_;
#
# not updateing any seqs
#
}
sub bioperl
{ my ($self, $obj) = @_;
#
# fetches bioperl from database (_get_bioperl) if bioperl is not yet defined
# and the user is not attempting to set the bioperl
#
exists $self->{bioperl} || scalar @_ > 1 || $self->_get_bioperl();
if(scalar @_ > 1) {
$self->{bioperl} = $obj;
}
return $self->{bioperl};
}
sub chromosome
{ my ($self, $obj) = @_;
warn "chromosome method in Modware::Segement is deprecated, use reference_feature instead\n";
return scalar @_ > 1 ? $self->reference_feature( $obj ) : $self->reference_feature();
}
sub chromosome_name
{ my ($self, $obj) = @_;
warn "chromosome_name method in Modware::Segement is deprecated, use reference_name instead\n";
return scalar @_ > 1 ? $self->reference_name( $obj ) : $self->reference_name();
}
sub end
{ my ($self, $obj) = @_;
if($obj) {
$self->{end} = $obj;
}
return $self->{end};
}
sub float
{ my ($self, $obj) = @_;
# pull cds_features off of chromosome via bioperl
#
my $features = $self->cds_features();
map { $_->reference_feature( undef ); $_->bioperl->{_gsf_seq} = undef; } @$features;
#
# replace Bio::Seq object with contig sequence instead by first
# 1. converting features to contig coordinates,
# 2. attach the contig sequence to the bioperl object (instead of chromosome)
# 3. removing contig from chromosome
#
my $offset = -1*$self->start() + 1;
my $seq = new Bio::Seq( -seq => $self->bioperl->seq->seq );
my $bioperl = $self->bioperl;
$bioperl->start( $bioperl->start() + $offset );
$bioperl->end ( $bioperl->end() + $offset );
$self->bioperl( $seq );
foreach my $feature ( @{ $self->cds_features() } ) {
$feature->shift_feature( $offset );
}
map {
$self->bioperl($_->bioperl);
} @$features;
$self->reference_feature( undef );
return $self;
}
sub new
{ my ($type, @args) = @_;
my $self = {};
bless $self,$type;
my @arglist = qw(
REFERENCE_NAME START END CHROMOSOME_NAME
);
my (
$reference_name, $start, $end, $chromosome_name
) = $self->_rearrange( [@arglist], @args );
$reference_name = $chromosome_name if ($chromosome_name && !$reference_name);
if ( !defined $reference_name || !defined $start || !defined $end ) {
$self->throw( "need -reference_name, -start, and -end passed to the constructor of Modware::Feature::Segment." );
}
$self->reference_name( $reference_name );
$self->start( $start);
$self->end( $end );
# $self->bioperl($bioperl) if defined $bioperl;
# $self->type ( 'contig' );
return $self;
}
sub reference_feature
{ my ($self, $obj) = @_;
#
# fetches reference_feature from database (_get_reference_feature) if reference_feature is not yet defined
# and the user is not attempting to set the reference_feature
#
exists $self->{reference_feature} || scalar @_ > 1 || $self->_get_reference_feature();
if(scalar @_ > 1) {
$self->{reference_feature} = $obj;
$self->bioperl( $obj->bioperl() ) if ( $obj && $self->{bioperl} && !$self->isa( 'Modware::Feature::Aligned' ) );
}
return $self->{reference_feature};
}
sub reference_name
{ my ($self, $obj) = @_;
if($obj) {
$self->{reference_name} = $obj;
}
return $self->{reference_name};
}
sub start
{ my ($self, $obj) = @_;
if($obj) {
$self->{start} = $obj;
}
return $self->{start};
}
sub to_apollo_file
{
my ($self, @args) = @_;
#
# 'reset' cds_features and genes, to make sure we have a fresh copy from the database
#
delete $self->{genes};
delete $self->{cds_features};
my $seq_writer = new Bio::SeqIO( @args );
$seq_writer->start_l($self->start());
$seq_writer->end_l($self->end());
my $seqobj = $self->bioperl();
# print Dumper($self->bioperl());
my $segmentid = $self->reference_name().":".$self->start()."..".$self->end();
$seqobj->display_id( $segmentid);
my @feature_array;
my $source_feat = new Bio::SeqFeature::Generic( -start => 1, -end => $seqobj->length() );
$source_feat->primary_tag( 'source' );
$source_feat->add_tag_value( 'organism', 'Dictyostelium discoideum');
$source_feat->add_tag_value( 'mol_type', 'genomic DNA');
$source_feat->add_tag_value( 'chromosome', $self->reference_feature());
my $features = $self->cds_features();
my @feats = $seqobj->remove_SeqFeatures();
$seqobj->add_SeqFeature($source_feat);
foreach my $feature (@$features)
{
if($feature->source() =~ /curat/i)
{
$feature->bioperl("CDS");
my $loc = new Bio::Location::Split();
map { $loc->add_sub_Location( $_->location ) } $feature->bioperl();
$feature->bioperl( $loc );
$feature->bioperl( 'locus_tag', $feature->gene() );
$feature->bioperl( 'primary_id', $feature->primary_id() );
$seqobj->add_SeqFeature($feature->bioperl());
}
# else
# {
#
# $feature->bioperl->primary_tag("GENE PREDICTION");
# my $loc = new Bio::Location::Split();
# map { $loc->add_sub_Location( $_->location ) } $feature->bioperl->exons();
# $feature->bioperl->location( $loc );
# $feature->bioperl->add_tag_value( 'locus_tag', $feature->gene->gene_name() );
# $feature->bioperl->add_tag_value( 'primary_id', $feature->primary_id() );
# $seqobj->add_SeqFeature($feature->bioperl());
# }
}
# print Dumper($seqobj);
$seq_writer->write_seq( $seqobj );
delete $self->{genes};
delete $self->{cds_features};
return $self;
}
sub to_file
{ my ($self, @args) = @_;
#
# 'reset' cds_features and genes, to make sure we have a fresh copy from the database
#
delete $self->{genes};
delete $self->{cds_features};
$self->float();
#
# here we have to manipulate the bioperl object to be
# what we expect it to be for the writer:
#
map {
#
# to write 'cds' feature, need a split location
# so create a split location for each cds feature out of the array of
# Bio::Location::Simple locations returned by 'exons' method
#
my $loc = new Bio::Location::Split();
map { $loc->add_sub_Location( $_->location ) } $_->bioperl();
$_->bioperl( $loc );
# $_->bioperl->primary_tag('gene');
# $_->bioperl->add_tag_value( 'gene', $_->gene->gene_name() );
# $_->bioperl->add_tag_value( 'locus_tag', $_->gene->gene_name() );
#
# $_->bioperl->primary_tag('mRNA');
# $_->bioperl->add_tag_value( 'locus_tag', $_->gene->gene_name() );
$_->bioperl('CDS');
#
# now populate the 'tags' of the bioperl object for each feature
#
$_->bioperl( 'protein_id', $_->primary_id() );
$_->bioperl( 'locus_tag', $_->gene() );
# $_->bioperl->add_tag_value('product', $_->gene->gene_products() );
# $_->bioperl->add_tag_value('prot_desc', $_->gene->name_description() );
} @{ $self->cds_features() };
my $seq_writer = new Bio::SeqIO( @args );
$seq_writer->{'start_l'} = $self->start();
$seq_writer->{'end_l'} = $self->end();
my $seqobj = $self->bioperl();
### WE had to amek up this id because Segment is not a feature in dictyBase
my $segmentid = $self->reference_name().":".$self->start()."..".$self->end();
$seqobj->display_id( $segmentid);
#
# need a 'source' feature which spans entire sequence
#
my $source_feat = new Bio::SeqFeature::Generic( -start => 1, -end => $seqobj->length() );
$source_feat->primary_tag( 'source' );
#
# have to add the $source_feat AT THE BEGINNING OF THE ARRAY
# so its written first. The method used here is the 'correct'
# way to do that using public mehtods. If its a performance hindrance
# use :
#
# unshift(@{$seqobj->{'_as_feat'}},$source_feat);
#
my @feats = $seqobj->remove_SeqFeatures();
unshift(@feats,$source_feat);
map { $seqobj->add_SeqFeature( $_ ) } @feats;
$seq_writer->write_seq( $seqobj );
#
# 'reset' cds_features and genes, to make sure we have a fresh copy from the database
#
delete $self->{genes};
delete $self->{cds_features};
return $self;
}
General documentation
Copyright © 2006, Northwestern University
All rights reserved.
|
|