Class | Bio::GCG::Msf |
In: |
lib/bio/appl/gcg/msf.rb
|
Parent: | Object |
The msf is a multiple sequence alignment format developed by Wisconsin. Bio::GCG::Msf is a msf format parser.
DELIMITER | = | RS = nil | delimiter used by Bio::FlatFile |
# File lib/bio/appl/gcg/msf.rb, line 31 31: def initialize(str) 32: str = str.sub(/\A[\r\n]+/, '') 33: if /^\!\![A-Z]+\_MULTIPLE\_ALIGNMNENT/ =~ str[/.*/] then 34: @heading = str[/.*/] # '!!NA_MULTIPLE_ALIGNMENT 1.0' or like this 35: str.sub!(/.*/, '') 36: end 37: str.sub!(/.*\.\.$/m, '') 38: @description = $&.to_s.sub(/^.*\.\.$/, '').to_s 39: d = $&.to_s 40: if m = /(.+)\s+MSF\:\s+(\d+)\s+Type\:\s+(\w)\s+(.+)\s+(Comp)?Check\:\s+(\d+)/.match(d) then 41: @entry_id = m[1].to_s.strip 42: @length = (m[2] ? m[2].to_i : nil) 43: @seq_type = m[3] 44: @date = m[4].to_s.strip 45: @checksum = (m[6] ? m[6].to_i : nil) 46: end 47: 48: str.sub!(/.*\/\/$/m, '') 49: a = $&.to_s.split(/^/) 50: @seq_info = [] 51: a.each do |x| 52: if /Name\: / =~ x then 53: s = {} 54: x.scan(/(\S+)\: +(\S*)/) { |y| s[$1] = $2 } 55: @seq_info << s 56: end 57: end 58: 59: @data = str 60: @description.sub!(/\A(\r\n|\r|\n)/, '') 61: @align = nil 62: end
returns Bio::Alignment object.
# File lib/bio/appl/gcg/msf.rb, line 179 179: def alignment 180: do_parse 181: @align 182: end
CompCheck field
# File lib/bio/appl/gcg/msf.rb, line 122 122: def compcheck 123: unless defined?(@compcheck) 124: if /CompCheck\: +(\d+)/ =~ @description then 125: @compcheck = $1.to_i 126: else 127: @compcheck = nil 128: end 129: end 130: @compcheck 131: end
gap length weight
# File lib/bio/appl/gcg/msf.rb, line 113 113: def gap_length_weight 114: unless defined?(@gap_length_weight) 115: /GapLengthWeight\: +(\S+)/ =~ @description 116: @gap_length_weight = $1 117: end 118: @gap_length_weight 119: end
gap weight
# File lib/bio/appl/gcg/msf.rb, line 104 104: def gap_weight 105: unless defined?(@gap_weight) 106: /GapWeight\: +(\S+)/ =~ @description 107: @gap_weight = $1 108: end 109: @gap_weight 110: end
gets seq data (used internally) (will be obsoleted)
# File lib/bio/appl/gcg/msf.rb, line 185 185: def seq_data 186: do_parse 187: @seq_data 188: end
symbol comparison table
# File lib/bio/appl/gcg/msf.rb, line 95 95: def symbol_comparison_table 96: unless defined?(@symbol_comparison_table) 97: /Symbol comparison table\: +(\S+)/ =~ @description 98: @symbol_comparison_table = $1 99: end 100: @symbol_comparison_table 101: end
validates checksum
# File lib/bio/appl/gcg/msf.rb, line 191 191: def validate_checksum 192: do_parse 193: valid = true 194: total = 0 195: @seq_data.each_with_index do |x, i| 196: sum = Bio::GCG::Seq.calc_checksum(x) 197: if sum != @seq_info[i]['Check'].to_i 198: valid = false 199: break 200: end 201: total += sum 202: end 203: return false unless valid 204: if @checksum != 0 # "Check:" field of BioPerl is always 0 205: valid = ((total % 10000) == @checksum) 206: end 207: valid 208: end