Class Bio::GCG::Msf
In: lib/bio/appl/gcg/msf.rb
Parent: Object

The msf is a multiple sequence alignment format developed by Wisconsin. Bio::GCG::Msf is a msf format parser.

Methods

Constants

DELIMITER = RS = nil   delimiter used by Bio::FlatFile

Attributes

checksum  [R]  checksum
date  [R]  date
description  [R]  description
entry_id  [R]  ID of the alignment
heading  [R]  heading (’!!NA_MULTIPLE_ALIGNMENT 1.0’ or whatever like this)
length  [R]  alignment length
seq_type  [R]  sequence type ("N" for DNA/RNA or "P" for protein)

Public Class methods

Creates a new Msf object.

[Source]

    # File lib/bio/appl/gcg/msf.rb, line 31
31:       def initialize(str)
32:         str = str.sub(/\A[\r\n]+/, '')
33:         if /^\!\![A-Z]+\_MULTIPLE\_ALIGNMNENT/ =~ str[/.*/] then
34:           @heading = str[/.*/] # '!!NA_MULTIPLE_ALIGNMENT 1.0' or like this
35:           str.sub!(/.*/, '')
36:         end
37:         str.sub!(/.*\.\.$/m, '')
38:         @description = $&.to_s.sub(/^.*\.\.$/, '').to_s
39:         d = $&.to_s
40:         if m = /(.+)\s+MSF\:\s+(\d+)\s+Type\:\s+(\w)\s+(.+)\s+(Comp)?Check\:\s+(\d+)/.match(d) then
41:           @entry_id = m[1].to_s.strip
42:           @length   = (m[2] ? m[2].to_i : nil)
43:           @seq_type = m[3]
44:           @date     = m[4].to_s.strip
45:           @checksum = (m[6] ? m[6].to_i : nil)
46:         end
47: 
48:         str.sub!(/.*\/\/$/m, '')
49:         a = $&.to_s.split(/^/)
50:         @seq_info = []
51:         a.each do |x|
52:           if /Name\: / =~ x then
53:             s = {}
54:             x.scan(/(\S+)\: +(\S*)/) { |y| s[$1] = $2 }
55:             @seq_info << s
56:           end
57:         end
58: 
59:         @data = str
60:         @description.sub!(/\A(\r\n|\r|\n)/, '')
61:         @align = nil
62:       end

Public Instance methods

returns Bio::Alignment object.

[Source]

     # File lib/bio/appl/gcg/msf.rb, line 179
179:       def alignment
180:         do_parse
181:         @align
182:       end

CompCheck field

[Source]

     # File lib/bio/appl/gcg/msf.rb, line 122
122:       def compcheck
123:         unless defined?(@compcheck)
124:           if /CompCheck\: +(\d+)/ =~ @description then
125:             @compcheck = $1.to_i
126:           else
127:             @compcheck = nil
128:           end
129:         end
130:         @compcheck
131:       end

gap length weight

[Source]

     # File lib/bio/appl/gcg/msf.rb, line 113
113:       def gap_length_weight
114:         unless defined?(@gap_length_weight)
115:           /GapLengthWeight\: +(\S+)/ =~ @description
116:           @gap_length_weight = $1
117:         end
118:         @gap_length_weight
119:       end

gap weight

[Source]

     # File lib/bio/appl/gcg/msf.rb, line 104
104:       def gap_weight
105:         unless defined?(@gap_weight)
106:           /GapWeight\: +(\S+)/ =~ @description
107:           @gap_weight = $1
108:         end
109:         @gap_weight
110:       end

gets seq data (used internally) (will be obsoleted)

[Source]

     # File lib/bio/appl/gcg/msf.rb, line 185
185:       def seq_data
186:         do_parse
187:         @seq_data
188:       end

symbol comparison table

[Source]

     # File lib/bio/appl/gcg/msf.rb, line 95
 95:       def symbol_comparison_table
 96:         unless defined?(@symbol_comparison_table)
 97:           /Symbol comparison table\: +(\S+)/ =~ @description
 98:           @symbol_comparison_table = $1
 99:         end
100:         @symbol_comparison_table
101:       end

validates checksum

[Source]

     # File lib/bio/appl/gcg/msf.rb, line 191
191:       def validate_checksum
192:         do_parse
193:         valid = true
194:         total = 0
195:         @seq_data.each_with_index do |x, i|
196:           sum = Bio::GCG::Seq.calc_checksum(x)
197:           if sum != @seq_info[i]['Check'].to_i
198:             valid = false
199:             break
200:           end
201:           total += sum
202:         end
203:         return false unless valid
204:         if @checksum != 0 # "Check:" field of BioPerl is always 0
205:           valid = ((total % 10000) == @checksum)
206:         end
207:         valid
208:       end

[Validate]