Module | Bio::NCBIDB::Common |
In: |
lib/bio/db/genbank/common.rb
|
This module defines a common framework among GenBank, GenPept, RefSeq, and DDBJ. For more details, see the documentations in each genbank/*.rb files.
DELIMITER | = | RS = "\n//\n" |
TAGSIZE | = | 12 |
# File lib/bio/db/genbank/common.rb, line 30 30: def initialize(entry) 31: super(entry, TAGSIZE) 32: end
Returns the ACCESSION part of the acc_version.
# File lib/bio/db/genbank/common.rb, line 62 62: def accession 63: acc_version.split(/\./).first.to_s 64: end
ACCESSION — Returns contents of the ACCESSION record as an Array.
# File lib/bio/db/genbank/common.rb, line 46 46: def accessions 47: field_fetch('ACCESSION').strip.split(/\s+/) 48: end
# File lib/bio/db/genbank/common.rb, line 120 120: def common_name 121: source['common_name'] 122: end
FEATURES — Returns contents of the FEATURES record as an array of Bio::Feature objects.
# File lib/bio/db/genbank/common.rb, line 207 207: def features 208: unless @data['FEATURES'] 209: ary = [] 210: in_quote = false 211: get('FEATURES').each_line do |line| 212: next if line =~ /^FEATURES/ 213: 214: # feature type (source, CDS, ...) 215: head = line[0,20].to_s.strip 216: 217: # feature value (position or /qualifier=) 218: body = line[20,60].to_s.chomp 219: 220: # sub-array [ feature type, position, /q="data", ... ] 221: if line =~ /^ {5}\S/ 222: ary.push([ head, body ]) 223: 224: # feature qualifier start (/q="data..., /q="data...", /q=data, /q) 225: elsif body =~ /^ \// and not in_quote # gb:IRO125195 226: ary.last.push(body) 227: 228: # flag for open quote (/q="data...) 229: if body =~ /="/ and body !~ /"$/ 230: in_quote = true 231: end 232: 233: # feature qualifier continued (...data..., ...data...") 234: else 235: ary.last.last << body 236: 237: # flag for closing quote (/q="data... lines ...") 238: if body =~ /"$/ 239: in_quote = false 240: end 241: end 242: end 243: 244: ary.collect! do |subary| 245: parse_qualifiers(subary) 246: end 247: 248: @data['FEATURES'] = ary.extend(Bio::Features::BackwardCompatibility) 249: end 250: if block_given? 251: @data['FEATURES'].each do |f| 252: yield f 253: end 254: else 255: @data['FEATURES'] 256: end 257: end
KEYWORDS — Returns contents of the KEYWORDS record as an Array of Strings.
# File lib/bio/db/genbank/common.rb, line 84 84: def keywords 85: @data['KEYWORDS'] ||= fetch('KEYWORDS').chomp('.').split(/; /) 86: end
LOCUS — Locus class must be defined in child classes.
# File lib/bio/db/genbank/common.rb, line 35 35: def locus 36: # must be overrided in each subclass 37: end
ORIGIN — Returns contents of the ORIGIN record as a String.
# File lib/bio/db/genbank/common.rb, line 261 261: def origin 262: unless @data['ORIGIN'] 263: ori, seqstr = get('ORIGIN').split("\n", 2) 264: seqstr ||= "" 265: @data['ORIGIN'] = truncate(tag_cut(ori)) 266: @data['SEQUENCE'] = seqstr.tr("0-9 \t\n\r\/", '') 267: end 268: @data['ORIGIN'] 269: end
REFERENCE — Returns contents of the REFERENCE records as an Array of Bio::Reference objects.
# File lib/bio/db/genbank/common.rb, line 136 136: def references 137: unless @data['REFERENCE'] 138: ary = [] 139: toptag2array(get('REFERENCE')).each do |ref| 140: hash = Hash.new 141: subtag2array(ref).each do |field| 142: case tag_get(field) 143: when /REFERENCE/ 144: if /(\d+)(\s*\((.+)\))?/m =~ tag_cut(field) then 145: hash['embl_gb_record_number'] = $1.to_i 146: if $3 and $3 != 'sites' then 147: seqpos = $3 148: seqpos.sub!(/\A\s*bases\s+/, '') 149: seqpos.gsub!(/(\d+)\s+to\s+(\d+)/, "\\1-\\2") 150: seqpos.gsub!(/\s*\;\s*/, ', ') 151: hash['sequence_position'] = seqpos 152: end 153: end 154: when /AUTHORS/ 155: authors = truncate(tag_cut(field)) 156: authors = authors.split(/, /) 157: authors[-1] = authors[-1].split(/\s+and\s+/) if authors[-1] 158: authors = authors.flatten.map { |a| a.sub(/,/, ', ') } 159: hash['authors'] = authors 160: when /TITLE/ 161: hash['title'] = truncate(tag_cut(field)) + '.' 162: when /JOURNAL/ 163: journal = truncate(tag_cut(field)) 164: if journal =~ /(.*) (\d+) \((\d+)\), (\d+-\d+) \((\d+)\)$/ 165: hash['journal'] = $1 166: hash['volume'] = $2 167: hash['issue'] = $3 168: hash['pages'] = $4 169: hash['year'] = $5 170: else 171: hash['journal'] = journal 172: end 173: when /MEDLINE/ 174: hash['medline'] = truncate(tag_cut(field)) 175: when /PUBMED/ 176: hash['pubmed'] = truncate(tag_cut(field)) 177: when /REMARK/ 178: hash['comments'] ||= [] 179: hash['comments'].push truncate(tag_cut(field)) 180: end 181: end 182: ary.push(Reference.new(hash)) 183: end 184: @data['REFERENCE'] = ary.extend(Bio::References::BackwardCompatibility) 185: end 186: if block_given? 187: @data['REFERENCE'].each do |r| 188: yield r 189: end 190: else 191: @data['REFERENCE'] 192: end 193: end
SOURCE — Returns contents of the SOURCE record as a Hash.
# File lib/bio/db/genbank/common.rb, line 96 96: def source 97: unless @data['SOURCE'] 98: name, org = get('SOURCE').split('ORGANISM') 99: org ||= "" 100: if org[/\S+;/] 101: organism = $` 102: taxonomy = $& + $' 103: elsif org[/\S+\./] # rs:NC_001741 104: organism = $` 105: taxonomy = $& + $' 106: else 107: organism = org 108: taxonomy = '' 109: end 110: @data['SOURCE'] = { 111: 'common_name' => truncate(tag_cut(name)), 112: 'organism' => truncate(organism), 113: 'taxonomy' => truncate(taxonomy), 114: } 115: @data['SOURCE'].default = '' 116: end 117: @data['SOURCE'] 118: end
Returns the VERSION part of the acc_version as a Fixnum
# File lib/bio/db/genbank/common.rb, line 67 67: def version 68: acc_version.split(/\./).last.to_i 69: end