TRANSCRIPT documentation.

No package variables defined.

Included modules

Bio::SeqFeature::Generic
Modware::Feature

Inherit

Modware::Feature

Synopsis

  NEVER INSTANTIATE THIS OBJECT, USE Modware::Feature

Description

  This class currently functions as an abstract class for transcript features.  Currently,
  mRNA and tRNA are subclasses which inherit from this.

  In general a transcript object is modeled as a Bio::SeqFeature::Gene::Transcript object
  and contains exons.  This class handles the synchronization of these exons with the database
  and some sequence functionality as well.

  Sequences are made available:
      genomic            : the unspliced transcript plus 1000 bases upstream and 
                           1000 bases downstream.
      spliced_transcript : the transcript after introns have been removed

Methods

_exon_type	Description	Code
_genomic_flank_down	Description	Code
_genomic_flank_up	Description	Code
_get_bioperl	Description	Code
_get_cached_sequences	Description	Code
_init	Description	Code
_update_exons	Description	Code
_update_gene_association	Description	Code
bioperl	Description	Code
calculate_genomic_seq	Description	Code
calculate_spliced_transcript_seq	Description	Code
end	No description	Code
insert	Description	Code
new	No description	Code
sequence	Description	Code
shift_feature	Description	Code
start	No description	Code
update	Description	Code

Methods description

_exon_type

 Title    : _exon_type
 Function : different 'Transcript' subclasses store their subfeatures
          : as different 'types' of exons
          : i.e. pseudogene exon type is 'pseudogene_exon'
 Returns  : nothing
 Args     : none

_genomic_flank_down

 Title    : _genomic_flank_down
 Function : gets/sets the number of bases of the genomic sequence downstream of the transcript end
          : gets set automatically when genomic_seq is being calculated
 Returns  : number
 Args     : optional number

_genomic_flank_up

 Title    : _genomic_flank_up
 Function : gets/sets the number of bases of the genomic sequence upstream of the transcript start
          : gets set automatically when genomic_seq is being calculated
 Returns  : number
 Args     : optional number

_get_bioperl

 Title    : _get_bioperl
 Note     : creates a bioperl object representing this trasncript (Bio::SeqFeature::Gene::Transcript)
 Usage    : called internally by lazy evaluated 'bioperl' method 
 Function : creates a bioperl object with a location on the chromosome's bioperl object.
 Returns  : nothing
 Args     : none

_get_cached_sequences

 Title    : _get_cached_sequences
 Function : Calculates all available sequences and stores in a hash.
          : Useful for comparing state of feature before and after a genome change
          : so you could call cached_sequence(), then apply some change
          : then compare the sequences in cached_sequences vs. the sequences
          : calculated by caluculate_SOME_seq().
 Returns  : nothing
 Args     : none

_init

 Title    : _init
 Note     : sets attributes specific to CDS features
 Usage    : called internally by new
 Function : 
 Returns  : nothing
 Args     : none

_update_exons

 Title    : _update_exons
 Function : Updating exons is a bit complicated
          : 1) for every exon in the object
 Returns  : nothing
 Args     : none

_update_gene_association

 Title    : _update_gene_association
 Function : calls SUPER::update to associate the feature with the gene
          :  but then also calls a method on the gene to update 
          :  its coordinates based on the associated transcripts.
 Returns  : nothing
 Args     : none

bioperl

 
 Title    : bioperl
 Function : calls SUPER->bioperl
          : if associating new bioperl object, attach it to the chromosome Bio::Seq object
 Returns  : Bio::SeqFeature::Gene::Transcript
 Args     : optional Bio::SeqFeature::Gene::Transcript object

calculate_genomic_seq

 Title    : calculate_genomic_seq
 Function : returns genomic sequence as calculated from the chromosome sequence
          : sets the private attributes _genomic_flank_up and _genomic_flank down
          : which store the number of available bases upstream and downstream of the sequence
          : most of the time, both will be 1000 bases.
 Returns  : dna string
 Args     : none

calculate_spliced_transcript_seq

 Title    :calculate_spliced_transcript_seq
 Function : Calculates spliced transcript (exons and no introns) for a transcript
 Returns  : DNA sequence string
 Args     : none

insert

 Title    : insert
 Function : calls Feature::insert but then inserts the intron/exon subfeatures 
          : which are specific to Transcript features, and also
          : updates the associated Gene Feature's coordianates
 Returns  : nothing
 Args     : none

sequence

 Title    : sequence
 Function : returns sequence of a given type and formats in a given way
 Returns  : formatted sequence
 Args     : named arguments
          :
          : -type => type of sequence ( in display_seq_type )
          : -format => string (used by Bio::SeqIO) optional
          :

shift_feature

 Title    : shift_feature
 Note     : 
 Usage    : To move a feature upstream by 125 bases:
          : $feature->shift_feature( 25 );
 Function : moves a feature by a specified amount
 Returns  : nothing
 Args     : integer ( + or - )

update

 Title    : update
 Function : calls Feature::update but then updates the intron/exon subfeatures 
          : which are specific to Transcript features, and also
          : updates the associated Gene Feature's coordianates
 Returns  : nothing
 Args     : none

Methods code

_exon_type

sub _exon_type {

   my ($self, $obj) = @_;

   if(scalar @_ > 1) {
      $self->{_exon_type} = $obj;
   }
   return $self->{_exon_type};

}

_genomic_flank_down

sub _genomic_flank_down {

   my ($self, $obj) = @_;

   if(scalar @_ > 1) {
      $self->{_genomic_flank_down} = $obj;
   }
   return $self->{_genomic_flank_down};

}

_genomic_flank_up

sub _genomic_flank_up {

   my ($self, $obj) = @_;

   if(scalar @_ > 1) {
      $self->{_genomic_flank_up} = $obj;
   }
   return $self->{_genomic_flank_up};

}

_get_bioperl

sub _get_bioperl {

   my ($self, @args) = @_;


   my $strand = $self->_featureloc();
   my $start  = $self->_featureloc() + 1;
   my $end    = $self->_featureloc();

   my $bioperl  = new Bio::SeqFeature::Generic(
      -strand => $strand,
      -start  => $start,
      -end    => $end
   );

   my @subfeatures = $self->subfeatures();

   my @exons       = grep{ $_->type_id->name eq $self->_exon_type() } @subfeatures;

   my @bp_exons;

   foreach my $exon ( @exons ) {
      my $locs     = $exon->featureloc_feature_ids();
      my $location = $locs->next();
      $self->throw("more than one location for feature_id: ".$exon->feature_id ) if $locs->next();

     # chado is interbase coordinates, so add 1 to start of exons
      my $bp_exon = Bio::SeqFeature::Generic->new (
                                                   -start  => $location->fmin + 1, 
                                                   -end    => $location->fmax, 
                                                   -strand => $location->strand()
                                                  );

      $bp_exon->add_tag_value('feature_id', $exon->feature_id() ); 
      push @bp_exons, $bp_exon;
   }           



  #
  # sort the exons by start
  #
   @bp_exons = sort { $a->start() <=> $b->start() } @bp_exons; 
 
  # and add them to the Transcript object
   map { $bioperl->add_SeqFeature( $_ ) } @bp_exons;


   $self->bioperl( $bioperl );

}

_get_cached_sequences

sub _get_cached_sequences {

   my ($self) = @_;

   my $seq_hash = {};

   my @seqtypes = ( 'Genomic', 'Spliced transcript' );
   foreach my $seqtype (@seqtypes) {
      my $methodname = lc($seqtype);
      $methodname =~ s/ /_/g;
      $methodname = "calculate_".$methodname."_seq";
      $seq_hash->{lc($seqtype)} = $self->$methodname;
   }
 
   $self->cached_sequences( $seq_hash );

}

_init

sub _init {

   my ($self, @args) = @_;
   # set the default exon type as 'exon'
   $self->_exon_type( 'exon' );
   
   # default display type for transcirpt features is identical to type
   $self->display_type( $self->type() );

}

_update_exons

sub _update_exons {

   my ($self,  @args) = @_;


  #
  # loop through stored exons (in database)
  # for each one pop an exon off of the exon array and update that 
  # subfeature with that exon's coordinates.
  #
  # after this, if the exon array is empty, the remaining subfeatures need to be deleted
  # if there are exons left, then they all need to be inserted.
  #

   my @exons        = $self->type() eq 'mRNA' ? $self->bioperl() : $self->bioperl();

   $self->throw( "There are no exons in this feature: ".$self->primary_id ) if ! ( scalar @exons );

   my $current_exon_ids;
   foreach my $exon ( @exons ) {

      $self->_insert_or_update_subfeature( $exon, $self->_exon_type() );
      # _insert_or_update_subfeature tags exon with feature_id 
      # store current feature_ids in a hash for later use
      my ($exon_feature_id) = $exon->get_tag_values( 'feature_id' ); 
      $current_exon_ids->{ $exon_feature_id } = 1;
      delete $exon->{'annotation'};
   }

   my @subfeatures  = $self->_relationship_subjects( 'part_of' );
   my @stored_exons = grep{ $_->type_id->name eq $self->_exon_type() } @subfeatures;

   foreach my $stored_exon ( @stored_exons ) {
      if ( !$current_exon_ids->{ $stored_exon->feature_id() } ) {

         # this whole bit could be simplified with some sql
         # or Class::DBI::AbstractSearch

         # 

         my @feature_rels = Chado::Feature_Relationship->search( subject_id => $stored_exon->feature_id() );


         if  ( ! grep { $_->object_id ne $self->feature_id() } @feature_rels ) {
            $stored_exon->delete();
            warn "deleting exon for ".$self->gene()->name()."\n" if DEBUG;
         }
         else {
            my ($feature_rel, $other) = grep { $_->object_id eq $self->feature_id() } @feature_rels;
            die "more than one subject object relationship between feature and exon??\n" if $other;
            warn "can''t delete exon, used by another object, just delete feature_rel\n" if DEBUG;
            $feature_rel->delete();
         }
      }

   }

}

_update_gene_association

sub _update_gene_association {

   my ($self) = @_;

   $self->SUPER::_update_gene_association();

  #
  # if the feature has not already been added to the genes feature array, add it here
  #  so that start/stop can be calculated
  #
   if ( !grep { $_->primary_id eq $self->primary_id } @{ $self->gene() } ) {
      push @{$self->gene->features}, $self;
   }
   $self->gene(); #the location of the gene is dependent on the location of the features that compose it

}

bioperl

sub bioperl {

   my ($self, @args) = @_;

   $self->SUPER::bioperl( @args );
   
   if ( @args ) {
      $self->bioperl( $self->reference_feature->bioperl ) if $self->{'reference_feature'};
      $self->strand( $self->bioperl() );
   }
   return  $self->SUPER::bioperl();

}

calculate_genomic_seq

sub calculate_genomic_seq {

    my ($self) = @_;

  #
  # make sure that bioperl object is attached to Bio::Seq representing chromosome
  #
   $self->bioperl( $self->reference_feature->bioperl ) if !$self->bioperl() && $self->reference_feature();


   my ($genomic_start, $genomic_end, $flank_up, $flank_down);

   if ($self->bioperl() > 1000) {
      $genomic_start     = $self->bioperl() - 1000;
      $flank_up          = 1000;
   }
   else {
      $genomic_start     = 1;
      $flank_up          = $self->bioperl()-1;
   }

   if (($self->bioperl->entire_seq->length) - $self->bioperl() > 1000 ) {
      $genomic_end     = $self->bioperl() + 1000;
      $flank_down      = 1000;
   }
   else {
      $genomic_end     = ($self->bioperl->entire_seq->length);
      $flank_down      = ($self->bioperl->entire_seq->length) - $self->bioperl();
   }
   
   $self->_genomic_flank_up  (   $self->bioperl->strand ne "-1" ? $flank_up    :  $flank_down );
   $self->_genomic_flank_down(   $self->bioperl->strand ne "-1" ? $flank_down  :  $flank_up   );

   my $seq = $self->bioperl->strand ne "-1" ? $self->bioperl($genomic_start,$genomic_end) :
                                     $self->bioperl($genomic_start,$genomic_end)->revcom;
   return $seq->seq;

}

calculate_spliced_transcript_seq

sub calculate_spliced_transcript_seq {

    my ($self) = @_;

  #
  # make sure that bioperl object is attached to Bio::Seq representing chromosome
  #
   $self->bioperl( $self->reference_feature->bioperl ) if !$self->bioperl() && $self->reference_feature();

   my $seq;

   # process exons sorted by strand
   my @exons = $self->bioperl();

   @exons = sort { $a->strand()*$a->start() <=> $b->strand()*$b->start() } @exons;
   
   map { $seq = $seq.$_->seq() } @exons;

   return $seq;

}

end

description

sub end {

   my ($self, @args) = @_;

   return $self->bioperl();

}

insert

sub insert {

   my ($self,  @args) = @_;

   $self->SUPER::insert();
   $self->_insert_featureloc();
   $self->_update_exons();

   $self->_update_gene_association() if ( exists $self->{gene} );

   $self->warn( $self->type()." Feature should be associated with a gene when it is inserted" ) if ( !$self->gene() );
  #
  # if the feature has not already been added to the genes feature array, add it here
  #  so that start/stop can be calculated
  #
   if ( !grep { $_->primary_id eq $self->primary_id } @{ $self->gene() } ) {
      push @{$self->gene->features}, $self;
   }
   $self->gene();  #the location of the gene is dependent on the location of the features that compose it

   $self->_update_qualifiers();  # need to update tags here, because calculating sequence can add a tags

}

new

description

sub new {

   my ($class, @args) = @_;

  #
  #  do not bless it here, assume subclass will bless as a subclass type
  #
   my $self = {};
   bless $self,$class;
   my @arglist = qw(
      SOURCE       TYPE        BIOPERL
      EXTERNAL_IDS DESCRIPTION REFSEQ_ID REFERENCE_FEATURE
   );

   my ( 
      $source,      $type,         $bioperl,
      $external_ids, $description, $refseq_id, $reference_feature
    )   = $self->_rearrange( [@arglist], @args );

   if ( $refseq_id ) {
      $self->warn("Instead of passing -refseq_id to a constructure, please pass on object reference through the arguement -reference_feature; Support for -refseq_id will be removed in the next version.");
      $reference_feature = new Modware::Feature( -primary_id => $refseq_id );
      if ( !$refseq->isa('Modware::Feature::REFERENCE_FEATURE') ) { $self->throw( "Currently you can only define a new feature relative to an object that implements Modware::Feature::REFERENCE_FEATURE.") };
   }

   $self->source       ( $source         ) if defined $source;
   $self->type         ( $type           ) if defined $type;
   $self->bioperl      ( $bioperl        ) if defined $bioperl;
   $self->external_ids ( $external_ids   ) if defined $external_ids;
   $self->reference_feature ( $reference_feature         ) if $reference_feature;
   $self->description  ( $description  ) if defined $description;
   $self->qualifiers( [] );
   $self->_init();
   return $self;

}

sequence

sub sequence {

   my ($self,  @args) = @_;

   my ( $format, $display_seq_type ) =  $self->_rearrange([qw(FORMAT TYPE)], @args);

   my $seq = $self->cached_sequences->{lc($display_seq_type)};

   $self->throw(" no seqeunce of type: $display_seq_type exists for ". $self->primary_id) if ( !$seq );

   my $header;

   if ( !$format ) {
     return $seq;
   }
   elsif ( $display_seq_type =~ /genomic/i ) {
      my $flankup   = $self->_genomic_flank_up();
      my $flankdown = $self->_genomic_flank_down();

      $header .= "|".$display_seq_type."|";
      $header .= " gene: ".$self->gene->name if $self->gene;
      $header .= " on chromosome: ".$self->reference_feature->name if $self->reference_feature;
      $header .= " position ".$self->bioperl->start." to ".$self->bioperl->end if $self->reference_feature;
      $header .= " plus ".$flankup. " upstream and ".$flankdown." downstream basepairs";
      $header .= ", reverse complement" if ($self->{strand} && $self->strand eq '-1');

      return $self->_formatted_seq(\$ seq, $header, $format);
   }
   else {
      $header .= "|".$display_seq_type."|";
      $header .= " gene: ".$self->gene->name if $self->gene;
      $header .= " on chromosome: ".$self->reference_feature->name if $self->reference_feature;
      $header .= " position ".$self->bioperl->start." to ".$self->bioperl->end if $self->reference_feature;

      return $self->_formatted_seq(\$ seq, $header, $format);
   }

}

shift_feature

sub shift_feature {

   my ($self, $offset) = @_;

   my $bioperl = $self->bioperl;
   $bioperl->start(  $bioperl->start() + $offset );
   $bioperl->end  (  $bioperl->end() + $offset   );

   foreach my $exon ( $bioperl->get_SeqFeatures() ) {
      $exon->start(  $exon->start() + $offset );
      $exon->end  (  $exon->end()   + $offset );

   }

}

start

description

sub start {

   my ($self, @args) = @_;

   return $self->bioperl();

}

update