Class Bio::NCBI::REST
In: lib/bio/io/ncbirest.rb
Parent: Object

Methods

Classes and Modules

Class Bio::NCBI::REST::EFetch
Class Bio::NCBI::REST::ESearch

Constants

NCBI_INTERVAL = 3   Make no more than one request every 3 seconds.

Public Class methods

[Source]

     # File lib/bio/io/ncbirest.rb, line 245
245:   def self.efetch(*args)
246:     self.new.efetch(*args)
247:   end

[Source]

     # File lib/bio/io/ncbirest.rb, line 233
233:   def self.einfo
234:     self.new.einfo
235:   end

[Source]

     # File lib/bio/io/ncbirest.rb, line 237
237:   def self.esearch(*args)
238:     self.new.esearch(*args)
239:   end

[Source]

     # File lib/bio/io/ncbirest.rb, line 241
241:   def self.esearch_count(*args)
242:     self.new.esearch_count(*args)
243:   end

Public Instance methods

Retrieve database entries by given IDs and using E-Utils (efetch) service.

For information on the possible arguments, see

Usage

 ncbi = Bio::NCBI::REST.new
 ncbi.efetch("185041", {"db"=>"nucleotide", "rettype"=>"gb", "retmode" => "xml"})
 ncbi.efetch("J00231", {"db"=>"nuccore", "rettype"=>"gb", "retmode"=>"xml"})
 ncbi.efetch("AAA52805", {"db"=>"protein", "rettype"=>"gb"})

 Bio::NCBI::REST.efetch("185041", {"db"=>"nucleotide", "rettype"=>"gb", "retmode" => "xml"})
 Bio::NCBI::REST.efetch("J00231", {"db"=>"nuccore", "rettype"=>"gb"})
 Bio::NCBI::REST.efetch("AAA52805", {"db"=>"protein", "rettype"=>"gb"})

Arguments:

  • ids: list of NCBI entry IDs (required)
  • hash: hash of E-Utils option {"db" => "nuccore", "rettype" => "gb"}
    • db: "sequences", "nucleotide", "protein", "pubmed", "omim", …
    • retmode: "text", "xml", "html", …
    • rettype: "gb", "gbc", "medline", "count",…
  • step: maximum number of entries retrieved at a time
Returns:String

[Source]

     # File lib/bio/io/ncbirest.rb, line 205
205:   def efetch(ids, hash = {}, step = 100)
206:     serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
207:     opts = {
208:       "tool"     => "bioruby",
209:       "retmode"  => "text",
210:     }
211:     opts.update(hash)
212: 
213:     case ids
214:     when Array
215:       list = ids
216:     else
217:       list = ids.to_s.split(/\s*,\s*/)
218:     end
219: 
220:     result = ""
221:     0.step(list.size, step) do |i|
222:       opts["id"] = list[i, step].join(',')
223:       unless opts["id"].empty?
224:         ncbi_access_wait
225:         response = Bio::Command.post_form(serv, opts)
226:         result += response.body
227:       end
228:     end
229:     return result.strip
230:     #return result.strip.split(/\n\n+/)
231:   end

List the NCBI database names E-Utils (einfo) service

 pubmed protein nucleotide nuccore nucgss nucest structure genome
 books cancerchromosomes cdd gap domains gene genomeprj gensat geo
 gds homologene journals mesh ncbisearch nlmcatalog omia omim pmc
 popset probe proteinclusters pcassay pccompound pcsubstance snp
 taxonomy toolkit unigene unists

Usage

 ncbi = Bio::NCBI::REST.new
 ncbi.einfo

 Bio::NCBI::REST.einfo

Returns:array of string (database names)

[Source]

    # File lib/bio/io/ncbirest.rb, line 66
66:   def einfo
67:     serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/einfo.fcgi"
68:     opts = {}
69:     response = Bio::Command.post_form(serv, opts)
70:     result = response.body
71:     list = result.scan(/<DbName>(.*?)<\/DbName>/m).flatten
72:     return list
73:   end

Search the NCBI database by given keywords using E-Utils (esearch) service and returns an array of entry IDs.

For information on the possible arguments, see

Usage

 ncbi = Bio::NCBI::REST.new
 ncbi.esearch("tardigrada", {"db"=>"nucleotide", "rettype"=>"count"})
 ncbi.esearch("tardigrada", {"db"=>"nucleotide", "rettype"=>"gb"})
 ncbi.esearch("yeast kinase", {"db"=>"nuccore", "rettype"=>"gb", "retmax"=>5})

 Bio::NCBI::REST.esearch("tardigrada", {"db"=>"nucleotide", "rettype"=>"count"})
 Bio::NCBI::REST.esearch("tardigrada", {"db"=>"nucleotide", "rettype"=>"gb"})
 Bio::NCBI::REST.esearch("yeast kinase", {"db"=>"nuccore", "rettype"=>"gb", "retmax"=>5})

Arguments:

  • str: query string (required)
  • hash: hash of E-Utils option {"db" => "nuccore", "rettype" => "gb"}
    • db: "sequences", "nucleotide", "protein", "pubmed", "taxonomy", …
    • retmode: "text", "xml", "html", …
    • rettype: "gb", "medline", "count", …
    • retmax: integer (default 100)
    • retstart: integer
    • field:
      • "titl": Title [TI]
      • "tiab": Title/Abstract [TIAB]
      • "word": Text words [TW]
      • "auth": Author [AU]
      • "affl": Affiliation [AD]
      • "jour": Journal [TA]
      • "vol": Volume [VI]
      • "iss": Issue [IP]
      • "page": First page [PG]
      • "pdat": Publication date [DP]
      • "ptyp": Publication type [PT]
      • "lang": Language [LA]
      • "mesh": MeSH term [MH]
      • "majr": MeSH major topic [MAJR]
      • "subh": Mesh sub headings [SH]
      • "mhda": MeSH date [MHDA]
      • "ecno": EC/RN Number [rn]
      • "si": Secondary source ID [SI]
      • "uid": PubMed ID (PMID) [UI]
      • "fltr": Filter [FILTER] [SB]
      • "subs": Subset [SB]
    • reldate: 365
    • mindate: 2001
    • maxdate: 2002/01/01
    • datetype: "edat"
  • limit: maximum number of entries to be returned (0 for unlimited)
  • step: maximum number of entries retrieved at a time
Returns:array of entry IDs or a number of results

[Source]

     # File lib/bio/io/ncbirest.rb, line 133
133:   def esearch(str, hash = {}, limit = 100, step = 10000)
134:     serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
135:     opts = {
136:       "tool"   => "bioruby",
137:       "term"   => str,
138:     }
139:     opts.update(hash)
140: 
141:     case opts["rettype"]
142:     when "count"
143:       count = esearch_count(str, opts)
144:       return count
145:     else
146:       limit = esearch_count(str, opts) if limit == 0   # unlimit
147: 
148:       list = []
149:       0.step(limit, step) do |i|
150:         retmax = [step, limit - i].min
151:         opts.update("retmax" => retmax, "retstart" => i)
152:         ncbi_access_wait
153:         response = Bio::Command.post_form(serv, opts)
154:         result = response.body
155:         list += result.scan(/<Id>(.*?)<\/Id>/m).flatten
156:       end
157:       return list
158:     end
159:   end
Arguments:same as esearch method
Returns:array of entry IDs or a number of results

[Source]

     # File lib/bio/io/ncbirest.rb, line 163
163:   def esearch_count(str, hash = {})
164:     serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
165:     opts = {
166:       "tool"   => "bioruby",
167:       "term"   => str,
168:     }
169:     opts.update(hash)
170:     opts.update("rettype" => "count")
171:     #ncbi_access_wait
172:     response = Bio::Command.post_form(serv, opts)
173:     result = response.body
174:     count = result.scan(/<Count>(.*?)<\/Count>/m).flatten.first.to_i
175:     return count
176:   end

[Validate]