These documents are For the HEAD of the CVS repository on July 19, 2007
Api docs for previous releases
Modware::Feature
REFERENCE_FEATURE
Modware::Feature::REFERENCE_FEATURE - Parent class for all features serving as coordinate system for other features (usually chromosome or contig)
|
No package variables defined. |
Abstract class representing
ANOTHER INTERFACE TO CREATE THESE OBJECTS IS IN Modware::Chromosome where you can specify -name or -feature_id as arguments
USE CASE: write a fasta file for the chromsome
my $chromosome = new Modware::Chromosome( -name => '2' ); print $chromosome->sequence( -format => 'fasta' );
|
THis object represents a chromosome.
The bioperl method returns a Bio::Seq object representing the chromosome. -------------------------------------------------- Sequence editing --------------------------------------------------
This is also the object where, currenlty, all sequence editing takes place.
Contig reshuffling edits should use remove_contig and insert_contig. They both try to take care of things like inserting and removing adjacent gaps accordingly.
change_bases is used for smaller scale sequence edits where you want to insert, delete, or replace some bases: CONTIGS UPSTREAM OF THE CHANGE WILL REMAIN UNCHANGED CONTIGS DOWNSTREAM OF THE CHANGE WILL BE SHIFTED BY THE APPROPRIATE AMOUNT THE CONTIG OVERLAPPING THIS CONTIG WILL HAVE ITS END MOVED BY THE APPROPRIATE AMOUNT CDS FEATURES DOWNSTREAM OF THE CHANGE WILL BE SHIFTED BY THE APPROPRIATE AMOUNT CDS FEATURES ENCLOSED BY CHANGE WILL BE DELETED! CDS FEATURES OVERLAPPING, BUT NOT ENCLOSED BY THE CHANGE WILL BE MARKED 'Corrupted' and otherwise be unchnaged
Examples: $obj->change_bases('', 10); delete nucleotide #10 $obj->change_bases('', 10, 2); delete two nucleotides starting from #10 $obj->change_bases('G', 10); change nuc #10 to 'G' $obj->change_bases('GA', 10, 4); replace #10 and 3 following with 'GA' $obj->change_bases('GA', 10, 2)); is same as $obj->change_bases('GA', 10); $obj->change_bases('GA', 10, 0 ); insert 'GA' before nucleotide at #10 $obj->change_bases('GA', 10, 1); GA inserted before #10, #10 deleted $obj->change_bases('GATC', 10, 2); GATC inserted before #10, #10 deleted $obj->change_bases('GATC', 10, 6); GATC inserted before #10, #10-#15 deleted
-------------------------------------------------- Sequence dumping --------------------------------------------------
The sequence can be dumped in any format through the 'sequence' method which takes one argument: -format if Bio::SeqIO can do it, you can specify it. THIS WILL ONLY DUMP
The 'to_file' method is similar but it makes sure that all seqFeatures are included in the to_file, so that annotated files ( embl, genbank, game ) can include all of these.
|
Methods description
Title : _change_seq
Usage : $substring = $obj->change_bases('AA', 10);
Note : interface taken from Bio::LiveSeq
Function: changes the chromosomal sequence in the database
Examples:
$obj->change_bases('', 10); delete nucleotide #10
$obj->change_bases('', 10, 2); delete two nucleotides starting from #10
$obj->change_bases('G', 10); change nuc #10 to 'G'
$obj->change_bases('GA', 10, 4); replace #10 and 3 following with 'GA'
$obj->change_bases('GA', 10, 2)); is same as $obj->change_bases('GA', 10);
$obj->change_bases('GA', 10, 0 ); insert 'GA' before nucleotide at #10
$obj->change_bases('GA', 10, 1); GA inserted before #10, #10 deleted
$obj->change_bases('GATC', 10, 2); GATC inserted before #10, #10 deleted
$obj->change_bases('GATC', 10, 6); GATC inserted before #10, #10-#15 deleted
Returns : Nothing
Args : seq, string, or '' ('' = undef = 0 = deletion)
start, integer
length, integer (optional)
Title : _get_bioperl
Usage : Interface mehtod
Function : requires subclasses to implement this method
Returns : nothing
Args : none
Title : get_contig_features
Function : fetches all contig features for this object and places them
: onto the bioperl object
Returns : nothing
Args : none
Title : get_gap_features
Function : fetches all gap features for this object and places them
: onto the bioperl object
Returns : nothing
Args : none
Title : _init
Note : sets attributes specific to Contig features
Usage : called internally by new
Function :
Returns : nothing
Args : none
Title : _insert_feature
Function : overridden from Modware::Feature - inserts into feature table, gives primary_id as dbxrefid
: chromosome_name as name, and uniquename
Returns : nothing
Args : none
Title : change_bases
Usage : $substring = $obj->change_bases('AA', 10);
Note : interface taken from Bio::LiveSeq
Function: changes the chromosomal sequence in the database
: and properly updates all features affected by this change
: CONTIGS UPSTREAM OF THE CHANGE WILL REMAIN UNCHANGED
: CONTIGS DOWNSTREAM OF THE CHANGE WILL BE SHIFTED BY THE APPROPRIATE AMOUNT
: THE CONTIG OVERLAPPING THIS CONTIG WILL HAVE ITS END MOVED BY THE APPROPRIATE AMOUNT
: CDS FEATURES DOWNSTREAM OF THE CHANGE WILL BE SHIFTED BY THE APPROPRIATE AMOUNT
: CDS FEATURES ENCLOSED BY CHANGE WILL BE DELETED!
: CDS FEATURES OVERLAPPING, BUT NOT ENCLOSED BY THE CHANGE WILL BE MARKED 'Corrupted' and otherwise be unchnaged
Examples:
$obj->change_bases('', 10); delete nucleotide #10
$obj->change_bases('', 10, 2); delete two nucleotides starting from #10
$obj->change_bases('G', 10); change nuc #10 to 'G'
$obj->change_bases('GA', 10, 4); replace #10 and 3 following with 'GA'
$obj->change_bases('GA', 10, 2)); is same as $obj->change_bases('GA', 10);
$obj->change_bases('GA', 10, 0 ); insert 'GA' before nucleotide at #10
$obj->change_bases('GA', 10, 1); GA inserted before #10, #10 deleted
$obj->change_bases('GATC', 10, 2); GATC inserted before #10, #10 deleted
$obj->change_bases('GATC', 10, 6); GATC inserted before #10, #10-#15 deleted
Returns : Nothing
Args : seq, string, or '' ('' = undef = 0 = deletion)
start, integer
length, integer (optional)
Title : chromosome_name
Usage : $self->chromosome_name( $some_chromosome_name );
: or
: print $self->chromosome_name();
Function : gets/sets chromosome_name
Returns : chromosome_name string
Args : chromosome_name ( optional )
Title : contig_features
Note : Fetches all contig features located on this chromosome
Usage : To print the primary_id of all contig features associated with this feature
: print map { print $_->primary_id()."\n" } @{ $self->contig_features() };
Function : gets/sets the contig_features array of the gene
Returns : string
Args : optional: array of features object
Title : gap_features
Note : Fetches all gap features located on this chromosome
Usage : To print the primary_id of all gap features associated with this feature
: print map { print $_->primary_id()."\n" } @{ $self->gap_features() };
Function : gets/sets the gap_features array of the gene
Returns : string
Args : optional: array of features object
Title : insert_contig
Usage : $floating_contig = $chromosome_obj->remove_contig( -primary_id => 'DDBXXXXXXX' );
Function : inserts a free contig ( with features relative to it ) into a given position
: of a chromosome. THe contig will be inserted just before the position given
: so you have to give a position that is the first base of another contig OR
: one past the last base of the LAST contig on the chromosome. This method will
: automatically add padding between the inserted contig and the subsequent ( or previous )
: contig. Everything gets written to the database.
:
Args : named :
: -contig => free contig containig cds features
: -position => contig will be inserted before this coordinate
: -pad => OPTIONAL number of N's to pad with ( default 100)
Title : name
Usage : $self->name( $some_name );
: or
: print $self->name();
Function : gets/sets name
Returns : name string
Args : name ( optional )
Title : remove_contig
Usage : $floating_contig = $chromosome_obj->remove_contig( -primary_id => 'DDBXXXXXXX' );
Function : removes a contig from the sequence of a chromosome. Also removes an adjacent gap.
: The gap removed is always the downstream gap unless the upstream gap is smaller.
: This is to make sure that after removal, the longest gap is still there.
Returns : The removed contig, with its own sequence as the bioperl->entire_seq
: and all of its features attached and relative to its coordinates
:
Note : *************************** IMPORTANT **************************************************
:
: CHANGES TO THE FLOATING CONTIG AND FEATURES ON THE FLOATING CONTIG ARE NOT WRITTEN TO THE
: DATABASE. IT IS UP TO THE CALLER TO DECIDE HOW TO UPDATE THE FEATURES. ALL FEATURES ON
: THE CONTIG ARE MARKED 'DELETED' IN THE DATABASE (BY CHANGE_BASES).
:
Args : named :
: primary_id
Title : reset_contigs
Note : Useful when temporary operations are performed on contigs
: for example the 'float' method for contigs changes the coordinates
: for all features on a contig to be relative to the contig. You may call float
: on the contig_features to do an someting with them in contig coordinates
: but later want to do something with contig_features in chromosome coordinates (the 'natural' state)
: So you would call reset_contigs between the two tasks so that the second time around
: you are fetching a 'fresh' set of contigs.
Function : deletes hash key pointing to contig features
Returns : string
Args : optional: array of features object
Title : sequence
Usage : my $fasta = $chromosome_obj->sequence( -format => 'fasta' );
Function : writes sequence in a given format (used for generating fasta files )
Args : named :
: -format => format of seqeunce: fasta, etc
Title : update
Function : inherits feature->update and also updates 'version'
Returns : nothing
Args : none
Title : version
Note : Genome version of chromosome
Function : gets the version of the feature
Returns : string
Args : optional: version string
Methods code
sub _change_seq
{
my ($self, $string, $start, $length_removed ) = @_;
#
# Here we are actully changeing the chromosome seqeunce and updating the database
#
#
my $seq = $self->bioperl();
substr( $seq, $start - 1 , $length_removed ) = $string;
# the Bio::Seq object contains its sequence in the primary_seq object
$self->bioperl( $seq );
$self->_database_object( $self->bioperl->primary_seq->length );
$self->_database_object();
}
sub _get_bioperl
{ my ($self) = @_;
my $primary_seq = Modware::Bioseq->new( -primary_id => $self->feature_id() );
my $bioperl = new Bio::Seq( -primary_id => $self->primary_id() );
$bioperl->primary_seq( $primary_seq );
$self->bioperl( $bioperl );
}
sub _get_contig_features
{ my ($self) = @_;
my @features = Modware::Search::Feature->Search_contig_features_by_reference_feature_name( $self->name() );
$self->contig_features(\@ features );
}
sub _get_gap_features
{ my ($self) = @_;
my @features = Modware::Search::Feature->Search_gap_features_by_reference_feature_name( $self->name() );
$self->gap_features(\@ features );
}
sub _init
{ my ($self, @args) = @_;
$self->source( "Sequencing Center" ) if (!$self->source());
$self->name ( $self->_database_object() ) if $self->_database_object;
}
sub _insert_feature
{ my ($self, @args) = @_;
my $id_dbxref = Modware::DBH->get_public_id();
$self->primary_id( $id_dbxref->accession() );
my ($cvterm) = Chado::Cvterm->get_single_row(
cv_id => Modware::Constant->Sequence_ontology_id(),
name => $self->type
);
$self->throw( "cannot insert a feature with out a valid SO type.") if !$cvterm;
my $type_id = $cvterm->cvterm_id;
my $feautre_row = Chado::Feature->create({
organism_id => Modware::Constant->Organism_id(),
dbxref_id => $id_dbxref->dbxref_id,
name => $self->name(),
uniquename => $self->primary_id(),
residues => $self->bioperl->seq,
seqlen => $self->bioperl->length,
type_id => $type_id
});
$self->feature_id( $feautre_row->feature_id );
}
sub change_bases
{ my ($self, $string, $start, $length_removed ) = @_;
my $original_length = $self->bioperl->length;
warn "warning: change_bases in REFERENCE_FEATURE is an experimental method. It is not production ready and is for testing only.\n";
#The following line should maybe be moved after we move away from caching sequences
$self->_change_seq( $string, $start, $length_removed );
#
# make a seqfeature that represents section of original genome
# which is changed or removed
#
my $changed_feat = new Bio::SeqFeature::Generic ( -start => $start,
-end => $start + $length_removed - 1 );
my $length_added = length( $string );
my $shift_amount = $length_added - $length_removed;
my @overlapping_generic = ( Modware::Search::Feature->Search_overlapping_feats_by_range( $self->name(), $changed_feat->start(), $changed_feat->end(), 'repeat_region' ),
Modware::Search::Feature->Search_overlapping_feats_by_range( $self->name(), $changed_feat->start(), $changed_feat->end(), 'contig'),
Modware::Search::Feature->Search_overlapping_feats_by_range( $self->name(), $changed_feat->start(), $changed_feat->end(), 'gap')
);
my @overlapping_mrnas = Modware::Search::Feature->Search_overlapping_feats_by_range( $self->name(), $changed_feat->start(), $changed_feat->end(), 'mRNA');
my @enclosed_features = grep { $changed_feat->contains( $_->bioperl() ) } @overlapping_mrnas;
my @straddling_features;
foreach my $feat ( @overlapping_mrnas ) {
push @straddling_features, $feat if ( ! grep{ $feat->primary_id eq $_->primary_id } @enclosed_features);
}
foreach my $feature ( @enclosed_features ) {
$feature->is_deleted(1);
#
# want to delete feature, but keep cached sequences for archival purposes,
# only update database table otherwise all cached_sequecnes will get overwrittten
#
$feature->description( $feature->description()."; DELETED for ".$self->version() );
$feature->_update_feature();
print "DELETED ENCLOSED FEATURE: ".$feature->primary_id()."\n";
if ( !$feature->gene() ) {
#my $note = new Modware::Note(
# -text => "Deleted, change to sequence has introduced an internal stop codon.",
# -is_public => 1
#);
$feature->gene( $note );
$feature->gene(1);
$feature->gene();
}
}
foreach my $feature ( @straddling_features ) {
if ( $feature->cached_sequences->{'cds'} ne $feature->calculate_cds_seq() ) {
$feature->add_qualifier( 'Changed' );
$feature->_update_qualifiers();
}
}
foreach my $feature ( grep { $_->end() > $changed_feat->end() } @overlapping_generic ) {
#print "shifting ".$feature->type()."\n";
$feature->end( $feature->end() + $shift_amount );
$feature->update();
}
#
# shift everything downstream of the 'change'
#
if ( $shift_amount != 0 ) {
$self->shift_features_in_section( $changed_feat->end() + 1, $original_length, $shift_amount );
}
}
sub chromosome_name
{ my ($self, $obj) = @_;
warn "chromosome_name is deprecated, use name instead";
return $self->name();
}
sub contig_features
{ my ($self, $obj) = @_;
#
# fetches contig_features from database (_get_contig_features) if contig_features is not yet defined
# and the user is not attempting to set the contig_features
#
exists $self->{_contig_features} || scalar @_ > 1 || $self->_get_contig_features();
if(scalar @_ > 1) {
$self->{_contig_features} = $obj;
map{ $self->bioperl( $_->bioperl ); } @{ $self->{'_contig_features'} };
}
return $self->{_contig_features};
}
sub end
{ my ($self, @args) = @_;
return $self->_database_object();
}
sub gap_features
{ my ($self, $obj) = @_;
#
# fetches gap_features from database (_get_gap_features) if gap_features is not yet defined
# and the user is not attempting to set the gap_features
#
exists $self->{_gap_features} || scalar @_ > 1 || $self->_get_gap_features();
if(scalar @_ > 1) {
$self->{_gap_features} = $obj;
map{ $self->bioperl( $_->bioperl ); } @{ $self->{'_gap_features'} };
}
return $self->{_gap_features};
}
sub insert_contig
{ my ($self, @args ) = @_;
#
# $pad_length is the length of N's to be added, (defaults to 100)
#
# The N's will be added after the inserted contig unless the position is the
# end of the last contig, in which case they will be added before
#
my ( $contig, $position, $pad_length, $gap_type ) = Modware::Root->_rearrange([qw(CONTIG POSITION PAD GAP_TYPE)], @args);
$pad_length ||= 100;
$self->throw( ' -contig must be of type contig' ) if !($contig->type() eq 'contig');
$self->throw( 'gap_type must be specified' ) if !$gap_type;
#
# check that position is end of a contig or one before the first contig
#
#
# $contig_start will be true if the position is at the beginning of a contig
# $contig_end will be true if the position is the end of the LAST contig
#
my $contigs = $self->contig_features();
my $contig_start = grep { $_->start() eq $position } @$contigs;
my $contig_end = $contigs->[ scalar @$contigs - 1 ]->end + 1 eq $position;
$self->throw("must pass position on edge of contig") if !($contig_end xor $contig_start);
my $contig_seq = $contig->bioperl();
my $gap_bioperl; # bioperl object representing gap being inserted
#
# if we are adding the contig at the end of the last contig, pad the end of that contig
if ( $contig_end ) {
$gap_bioperl = new Bio::SeqFeature::Generic(
-start => 1,
-end => $pad_length
);
$self->change_bases( "N"x$pad_length, $position, $self->bioperl()->length() + 1 );
$position += $pad_length;
}
#
# other wise we pad at the end of new contig
#
else {
$contig->bioperl( $contig_seq."N"x$pad_length );
$gap_bioperl = new Bio::SeqFeature::Generic(
-start => $contig->bioperl() + 1,
-end => $contig->bioperl() + $pad_length
);
}
die "Error on padding" if ( $contig->bioperl() ne $contig_seq );
#
# add the contig sequence to the chromosome
#
$self->change_bases( $contig->bioperl(), $position, 0 );
#
# no give contig and ( all cds features on it ) chromosomal features
#
map {
$_->shift_feature( $position - 1 );
$_->reference_feature( $self );
$_->feature_id ? $_->update() : $_->insert();
} @{ $contig->cds_features() };
bless $contig, 'Modware::Feature::CONTIG';
$contig->shift_feature( $position - 1 );
$gap_bioperl->start( $gap_bioperl->start() + $position - 1 );
$gap_bioperl->end ( $gap_bioperl->end() + $position - 1 );
$contig->reference_feature( $self );
$contig->bioperl( $self->bioperl );
$gap_bioperl->attach_seq( $self->bioperl );
die "Contig not correctly moved" if ( $contig->bioperl() ne $contig_seq );
$contig->feature_id() ? $contig->update() : $contig->insert();
#
# now create the gap feature and insert it
#
delete $self->{_contig_features};
my $gap_feature = new Modware::Feature(
-type => 'gap',
-start => $gap_bioperl->start(),
-end => $gap_bioperl->end(),
-gap_type => $gap_type,
-description => 'Added as padding for addition of contig: '.$contig->primary_id(),
-reference_feature => $self
);
$gap_feature->insert();
return ( $contig, $gap_feature );
}
sub name
{ my ($self, $obj) = @_;
if($obj) {
$self->{name} = $obj;
}
return $self->{name};
}
sub new
{ my ($type, @args) = @_;
#
# do not bless it here, assume subclass will bless as a subclass type
#
my $self = {};
bless $self,$type;
my @arglist = qw(
BIOPERL SOURCE DESCRIPTION CHROMOSOME_NAME NAME
);
my (
$bioperl, $source, $description, $chromosome_name, $name
) = $self->_rearrange( [@arglist], @args );
$name = $chromosome_name if ($chromosome_name && !$name);
$self->warn("-chromosome_name argument to Modware::Feature for chromsomes constructor is deprecated, use -name\n") if $chromosome_name;
$self->source ( $source ) if defined $source;
$self->bioperl ( $bioperl ) if defined $bioperl;
$self->description ( $description ) if defined $description;
$self->name ( $name );
$self->qualifiers( [] );
$self->_init();
return $self;
}
sub remove_contig
{ my ($self, @args ) = @_;
my ( $contig_ddb ) = Modware::Root->_rearrange([qw(PRIMARY_ID)], @args);
die "Modware::Chromosome::remove_contig requires 1 named argument:\n".
" -primary_id ( the primary_id of the contig)\n" if ( !$contig_ddb );
my $contig = new Modware::Feature( -primary_id => $contig_ddb );
my $cds_features = $contig->cds_features();
my $gap_features = $self->gap_features();
#
# get surrounding gaps
#
my ( $upstream_gap ) = grep { $_->bioperl() eq ( $contig->start() - 1 ) } @{ $gap_features };
my ( $downstream_gap ) = grep { $_->bioperl() eq ( $contig->end() + 1 ) } @{ $gap_features };
#
# extend range removed from chromosome by downstream gap unless
# the upstream gap is smaller
#
# ( $start, $end ) : the coordinates to delete from full chromosome
#
my ( $start, $end );
if ( !$downstream_gap || ( $upstream_gap->bioperl() < $downstream_gap->bioperl() ) ) {
$upstream_gap->is_deleted(1); #remove the gap
$upstream_gap->update(); #...
$start = $upstream_gap->start();
$end = $contig->end();
}
else {
$downstream_gap->is_deleted(1); #remove the gap
$downstream_gap->update(); #...
$start = $contig->start();
$end = $downstream_gap->end();
}
$contig->float();
$self->change_bases( '', $start, $end - $start + 1 );
#
# change_bases deletes features which are enclosed by a change
# here we just undelete them
#
map {
$downstream_gap->is_deleted(0);
$_->_update_qualifiers();
} @$cds_features;
$contig->is_deleted(0);
#
# dont want to return gap features
#
delete $self->{_gap_features};
return $contig;
## use Data::Dumper;
## print "Contig start,end\n";
## print Dumper( $contig->start, $contig->end);
## print "upstream, downstream gap lengths\n";
## print Dumper( $upstream_gap->length, $downstream_gap->length);
## print "upstream, start and end\n";
## print Dumper( $upstream_gap->start, $upstream_gap->end);
## print "downstream, start and end\n";
## print Dumper( $downstream_gap->start, $downstream_gap->end);
## print "final coords\n";
## print Dumper( $start, $end);
#
}
sub reset_contigs
{ my ($self, $obj) = @_;
delete $self->{_contig_features};
delete $self->{_gap_features};
}
sub sequence
{ my ($self, @args) = @_;
$self->throw("Feature must have a feature_id to call sequence method") if ( !$self->feature_id );
my ( $format, $display_seq_type ) = $self->_rearrange([qw(FORMAT TYPE)], @args);
$display_seq_type ||= $self->display_type() ." Sequence";
my $seq = $self->bioperl();
my $header;
if ( $display_seq_type =~ /Masked/ ) {
my @mask_regions = Modware::Search::Feature->Search_overlapping_feats_by_range( $self->name(), 1, $self->bioperl(), 'repeat_region' );
foreach my $mask_reg ( @mask_regions ) {
my $start = $mask_reg->start();
my $end = $mask_reg->end();
my $length = $mask_reg->length();
# print "masking $start to $end \n";
substr( $seq, $start, $length, "N"x$length );
}
}
if ( !$format ) {
return $seq;
}
$header .= "|".$display_seq_type."|";
$header .= " ".$self->display_type().": ".$self->name ;
$header .= " position 1 to ".$self->end();
return $self->_formatted_seq(\$ seq, $header, $format);
}
sub shift_features_in_section
{ my ($self, $range_start, $range_end, $offset) = @_;
my $dbh = new Modware::DBH;
my @results;
#
# convert to 'interbase'
#
$range_start -= 1;
my $sth = $dbh->prepare("
UPDATE FEATURELOC
SET FMIN = FMIN + ?,
FMAX = FMAX + ?
WHERE FMIN >= ?
AND FMAX <= ?
AND SRCFEATURE_ID = ?
AND EXISTS (
SELECT 'A' FROM V_NOTDELETED_FEATURE V
WHERE V.FEATURE_ID = FEATURELOC.FEATURE_ID
)
");
$sth->execute( $offset, $offset, $range_start, $range_end, $self->feature_id() );
$sth->finish();
}
sub start
{ my ($self, @args) = @_;
return 1;
}
sub update
{ my ($self, @args) = @_;
$self->SUPER::update();
$self->_insert_or_update_featureprop( 'version', $self->version() ) if exists $self->{version};
}
sub version
{ my ($self, $obj) = @_;
#
# fetches version from database (_get_version) if version is not yet defined
# and the user is not attempting to set the version options
#
exists $self->{version} || scalar @_ > 1 || $self->version( $self->_get_featureprop( 'version' ) );
if(scalar @_ > 1) {
$self->{version} = $obj;
}
return $self->{version};
}
General documentation
Copyright © 2006, Northwestern University
All rights reserved.
|
|