These documents are For the HEAD of the CVS repository on July 19, 2007
Api docs for previous releases
Modware::Feature
TRANSCRIPT
Modware::Feature::TRANSCRIPT - Parent class for all trnascript classes (mRNA, ncRNA, tRNA)
|
No package variables defined. |
NEVER INSTANTIATE THIS OBJECT, USE Modware::Feature
|
This class currently functions as an abstract class for transcript features. Currently, mRNA and tRNA are subclasses which inherit from this.
In general a transcript object is modeled as a Bio::SeqFeature::Gene::Transcript object and contains exons. This class handles the synchronization of these exons with the database and some sequence functionality as well.
Sequences are made available: genomic : the unspliced transcript plus 1000 bases upstream and 1000 bases downstream. spliced_transcript : the transcript after introns have been removed
|
Methods description
Title : _exon_type
Function : different 'Transcript' subclasses store their subfeatures
: as different 'types' of exons
: i.e. pseudogene exon type is 'pseudogene_exon'
Returns : nothing
Args : none
Title : _genomic_flank_down
Function : gets/sets the number of bases of the genomic sequence downstream of the transcript end
: gets set automatically when genomic_seq is being calculated
Returns : number
Args : optional number
Title : _genomic_flank_up
Function : gets/sets the number of bases of the genomic sequence upstream of the transcript start
: gets set automatically when genomic_seq is being calculated
Returns : number
Args : optional number
Title : _get_bioperl
Note : creates a bioperl object representing this trasncript (Bio::SeqFeature::Gene::Transcript)
Usage : called internally by lazy evaluated 'bioperl' method
Function : creates a bioperl object with a location on the chromosome's bioperl object.
Returns : nothing
Args : none
Title : _get_cached_sequences
Function : Calculates all available sequences and stores in a hash.
: Useful for comparing state of feature before and after a genome change
: so you could call cached_sequence(), then apply some change
: then compare the sequences in cached_sequences vs. the sequences
: calculated by caluculate_SOME_seq().
Returns : nothing
Args : none
Title : _init
Note : sets attributes specific to CDS features
Usage : called internally by new
Function :
Returns : nothing
Args : none
Title : _update_exons
Function : Updating exons is a bit complicated
: 1) for every exon in the object
Returns : nothing
Args : none
Title : _update_gene_association
Function : calls SUPER::update to associate the feature with the gene
: but then also calls a method on the gene to update
: its coordinates based on the associated transcripts.
Returns : nothing
Args : none
Title : bioperl
Function : calls SUPER->bioperl
: if associating new bioperl object, attach it to the chromosome Bio::Seq object
Returns : Bio::SeqFeature::Gene::Transcript
Args : optional Bio::SeqFeature::Gene::Transcript object
Title : calculate_genomic_seq
Function : returns genomic sequence as calculated from the chromosome sequence
: sets the private attributes _genomic_flank_up and _genomic_flank down
: which store the number of available bases upstream and downstream of the sequence
: most of the time, both will be 1000 bases.
Returns : dna string
Args : none
Title :calculate_spliced_transcript_seq
Function : Calculates spliced transcript (exons and no introns) for a transcript
Returns : DNA sequence string
Args : none
Title : insert
Function : calls Feature::insert but then inserts the intron/exon subfeatures
: which are specific to Transcript features, and also
: updates the associated Gene Feature's coordianates
Returns : nothing
Args : none
Title : sequence
Function : returns sequence of a given type and formats in a given way
Returns : formatted sequence
Args : named arguments
:
: -type => type of sequence ( in display_seq_type )
: -format => string (used by Bio::SeqIO) optional
:
Title : shift_feature
Note :
Usage : To move a feature upstream by 125 bases:
: $feature->shift_feature( 25 );
Function : moves a feature by a specified amount
Returns : nothing
Args : integer ( + or - )
Title : update
Function : calls Feature::update but then updates the intron/exon subfeatures
: which are specific to Transcript features, and also
: updates the associated Gene Feature's coordianates
Returns : nothing
Args : none
Methods code
sub _exon_type
{ my ($self, $obj) = @_;
if(scalar @_ > 1) {
$self->{_exon_type} = $obj;
}
return $self->{_exon_type};
}
sub _genomic_flank_down
{ my ($self, $obj) = @_;
if(scalar @_ > 1) {
$self->{_genomic_flank_down} = $obj;
}
return $self->{_genomic_flank_down};
}
sub _genomic_flank_up
{ my ($self, $obj) = @_;
if(scalar @_ > 1) {
$self->{_genomic_flank_up} = $obj;
}
return $self->{_genomic_flank_up};
}
sub _get_bioperl
{ my ($self, @args) = @_;
my $strand = $self->_featureloc();
my $start = $self->_featureloc() + 1;
my $end = $self->_featureloc();
my $bioperl = new Bio::SeqFeature::Generic(
-strand => $strand,
-start => $start,
-end => $end
);
my @subfeatures = $self->subfeatures();
my @exons = grep{ $_->type_id->name eq $self->_exon_type() } @subfeatures;
my @bp_exons;
foreach my $exon ( @exons ) {
my $locs = $exon->featureloc_feature_ids();
my $location = $locs->next();
$self->throw("more than one location for feature_id: ".$exon->feature_id ) if $locs->next();
# chado is interbase coordinates, so add 1 to start of exons
my $bp_exon = Bio::SeqFeature::Generic->new (
-start => $location->fmin + 1,
-end => $location->fmax,
-strand => $location->strand()
);
$bp_exon->add_tag_value('feature_id', $exon->feature_id() );
push @bp_exons, $bp_exon;
}
#
# sort the exons by start
#
@bp_exons = sort { $a->start() <=> $b->start() } @bp_exons;
# and add them to the Transcript object
map { $bioperl->add_SeqFeature( $_ ) } @bp_exons;
$self->bioperl( $bioperl );
}
sub _get_cached_sequences
{
my ($self) = @_;
my $seq_hash = {};
my @seqtypes = ( 'Genomic', 'Spliced transcript' );
foreach my $seqtype (@seqtypes) {
my $methodname = lc($seqtype);
$methodname =~ s/ /_/g;
$methodname = "calculate_".$methodname."_seq";
$seq_hash->{lc($seqtype)} = $self->$methodname;
}
$self->cached_sequences( $seq_hash );
}
sub _init
{ my ($self, @args) = @_;
# set the default exon type as 'exon'
$self->_exon_type( 'exon' );
# default display type for transcirpt features is identical to type
$self->display_type( $self->type() );
}
sub _update_exons
{ my ($self, @args) = @_;
#
# loop through stored exons (in database)
# for each one pop an exon off of the exon array and update that
# subfeature with that exon's coordinates.
#
# after this, if the exon array is empty, the remaining subfeatures need to be deleted
# if there are exons left, then they all need to be inserted.
#
my @exons = $self->type() eq 'mRNA' ? $self->bioperl() : $self->bioperl();
$self->throw( "There are no exons in this feature: ".$self->primary_id ) if ! ( scalar @exons );
my $current_exon_ids;
foreach my $exon ( @exons ) {
$self->_insert_or_update_subfeature( $exon, $self->_exon_type() );
# _insert_or_update_subfeature tags exon with feature_id
# store current feature_ids in a hash for later use
my ($exon_feature_id) = $exon->get_tag_values( 'feature_id' );
$current_exon_ids->{ $exon_feature_id } = 1;
delete $exon->{'annotation'};
}
my @subfeatures = $self->_relationship_subjects( 'part_of' );
my @stored_exons = grep{ $_->type_id->name eq $self->_exon_type() } @subfeatures;
foreach my $stored_exon ( @stored_exons ) {
if ( !$current_exon_ids->{ $stored_exon->feature_id() } ) {
# this whole bit could be simplified with some sql
# or Class::DBI::AbstractSearch
#
my @feature_rels = Chado::Feature_Relationship->search( subject_id => $stored_exon->feature_id() );
if ( ! grep { $_->object_id ne $self->feature_id() } @feature_rels ) {
$stored_exon->delete();
warn "deleting exon for ".$self->gene()->name()."\n" if DEBUG;
}
else {
my ($feature_rel, $other) = grep { $_->object_id eq $self->feature_id() } @feature_rels;
die "more than one subject object relationship between feature and exon??\n" if $other;
warn "can''t delete exon, used by another object, just delete feature_rel\n" if DEBUG;
$feature_rel->delete();
}
}
}
}
sub _update_gene_association
{ my ($self) = @_;
$self->SUPER::_update_gene_association();
#
# if the feature has not already been added to the genes feature array, add it here
# so that start/stop can be calculated
#
if ( !grep { $_->primary_id eq $self->primary_id } @{ $self->gene() } ) {
push @{$self->gene->features}, $self;
}
$self->gene(); #the location of the gene is dependent on the location of the features that compose it
}
sub bioperl
{ my ($self, @args) = @_;
$self->SUPER::bioperl( @args );
if ( @args ) {
$self->bioperl( $self->reference_feature->bioperl ) if $self->{'reference_feature'};
$self->strand( $self->bioperl() );
}
return $self->SUPER::bioperl();
}
sub calculate_genomic_seq
{ my ($self) = @_;
#
# make sure that bioperl object is attached to Bio::Seq representing chromosome
#
$self->bioperl( $self->reference_feature->bioperl ) if !$self->bioperl() && $self->reference_feature();
my ($genomic_start, $genomic_end, $flank_up, $flank_down);
if ($self->bioperl() > 1000) {
$genomic_start = $self->bioperl() - 1000;
$flank_up = 1000;
}
else {
$genomic_start = 1;
$flank_up = $self->bioperl()-1;
}
if (($self->bioperl->entire_seq->length) - $self->bioperl() > 1000 ) {
$genomic_end = $self->bioperl() + 1000;
$flank_down = 1000;
}
else {
$genomic_end = ($self->bioperl->entire_seq->length);
$flank_down = ($self->bioperl->entire_seq->length) - $self->bioperl();
}
$self->_genomic_flank_up ( $self->bioperl->strand ne "-1" ? $flank_up : $flank_down );
$self->_genomic_flank_down( $self->bioperl->strand ne "-1" ? $flank_down : $flank_up );
my $seq = $self->bioperl->strand ne "-1" ? $self->bioperl($genomic_start,$genomic_end) :
$self->bioperl($genomic_start,$genomic_end)->revcom;
return $seq->seq;
}
sub calculate_spliced_transcript_seq
{ my ($self) = @_;
#
# make sure that bioperl object is attached to Bio::Seq representing chromosome
#
$self->bioperl( $self->reference_feature->bioperl ) if !$self->bioperl() && $self->reference_feature();
my $seq;
# process exons sorted by strand
my @exons = $self->bioperl();
@exons = sort { $a->strand()*$a->start() <=> $b->strand()*$b->start() } @exons;
map { $seq = $seq.$_->seq() } @exons;
return $seq;
}
sub end
{ my ($self, @args) = @_;
return $self->bioperl();
}
sub insert
{ my ($self, @args) = @_;
$self->SUPER::insert();
$self->_insert_featureloc();
$self->_update_exons();
$self->_update_gene_association() if ( exists $self->{gene} );
$self->warn( $self->type()." Feature should be associated with a gene when it is inserted" ) if ( !$self->gene() );
#
# if the feature has not already been added to the genes feature array, add it here
# so that start/stop can be calculated
#
if ( !grep { $_->primary_id eq $self->primary_id } @{ $self->gene() } ) {
push @{$self->gene->features}, $self;
}
$self->gene(); #the location of the gene is dependent on the location of the features that compose it
$self->_update_qualifiers(); # need to update tags here, because calculating sequence can add a tags
}
sub new
{ my ($class, @args) = @_;
#
# do not bless it here, assume subclass will bless as a subclass type
#
my $self = {};
bless $self,$class;
my @arglist = qw(
SOURCE TYPE BIOPERL
EXTERNAL_IDS DESCRIPTION REFSEQ_ID REFERENCE_FEATURE
);
my (
$source, $type, $bioperl,
$external_ids, $description, $refseq_id, $reference_feature
) = $self->_rearrange( [@arglist], @args );
if ( $refseq_id ) {
$self->warn("Instead of passing -refseq_id to a constructure, please pass on object reference through the arguement -reference_feature; Support for -refseq_id will be removed in the next version.");
$reference_feature = new Modware::Feature( -primary_id => $refseq_id );
if ( !$refseq->isa('Modware::Feature::REFERENCE_FEATURE') ) { $self->throw( "Currently you can only define a new feature relative to an object that implements Modware::Feature::REFERENCE_FEATURE.") };
}
$self->source ( $source ) if defined $source;
$self->type ( $type ) if defined $type;
$self->bioperl ( $bioperl ) if defined $bioperl;
$self->external_ids ( $external_ids ) if defined $external_ids;
$self->reference_feature ( $reference_feature ) if $reference_feature;
$self->description ( $description ) if defined $description;
$self->qualifiers( [] );
$self->_init();
return $self;
}
sub sequence
{ my ($self, @args) = @_;
my ( $format, $display_seq_type ) = $self->_rearrange([qw(FORMAT TYPE)], @args);
my $seq = $self->cached_sequences->{lc($display_seq_type)};
$self->throw(" no seqeunce of type: $display_seq_type exists for ". $self->primary_id) if ( !$seq );
my $header;
if ( !$format ) {
return $seq;
}
elsif ( $display_seq_type =~ /genomic/i ) {
my $flankup = $self->_genomic_flank_up();
my $flankdown = $self->_genomic_flank_down();
$header .= "|".$display_seq_type."|";
$header .= " gene: ".$self->gene->name if $self->gene;
$header .= " on chromosome: ".$self->reference_feature->name if $self->reference_feature;
$header .= " position ".$self->bioperl->start." to ".$self->bioperl->end if $self->reference_feature;
$header .= " plus ".$flankup. " upstream and ".$flankdown." downstream basepairs";
$header .= ", reverse complement" if ($self->{strand} && $self->strand eq '-1');
return $self->_formatted_seq(\$ seq, $header, $format);
}
else {
$header .= "|".$display_seq_type."|";
$header .= " gene: ".$self->gene->name if $self->gene;
$header .= " on chromosome: ".$self->reference_feature->name if $self->reference_feature;
$header .= " position ".$self->bioperl->start." to ".$self->bioperl->end if $self->reference_feature;
return $self->_formatted_seq(\$ seq, $header, $format);
}
}
sub shift_feature
{ my ($self, $offset) = @_;
my $bioperl = $self->bioperl;
$bioperl->start( $bioperl->start() + $offset );
$bioperl->end ( $bioperl->end() + $offset );
foreach my $exon ( $bioperl->get_SeqFeatures() ) {
$exon->start( $exon->start() + $offset );
$exon->end ( $exon->end() + $offset );
}
}
sub start
{ my ($self, @args) = @_;
return $self->bioperl();
}
sub update
{ my ($self, @args) = @_;
$self->SUPER::update();
# not only adds relationship of Transcript to gene,
# also sets coordinates of gene based on transcript start/stop
$self->_update_gene_association() if ( exists $self->{gene} );
if ( $self->{'bioperl'} ) {
$self->_update_featureloc();
$self->_update_exons();
}
}
General documentation
Copyright © 2006, Northwestern University
All rights reserved.
|
|