Bionode.io - Modular and universal bioinformatics
Pipeable UNIX command line tools and JavaScript / Node.js APIs for bioinformatic analysis workflows on the server and browser.
#bionode
gitter.im/bionode/bionode
Difficulty getting relevant description and datasets from NCBI API using bio* libs
Python example: URL for the Acromyrmex assembly?
ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA_000188075.1_Si_gnG
import xml.etree.ElementTree as ET from Bio import Entrez Entrez.email = "mail@bmpvieira.com" esearch_handle = Entrez.esearch(db="assembly", term="Acromyrmex") esearch_record = Entrez.read(esearch_handle) for id in esearch_record['IdList']: esummary_handle = Entrez.esummary(db="assembly", id=id) esummary_record = Entrez.read(esummary_handle) documentSummarySet = esummary_record['DocumentSummarySet'] document = documentSummarySet['DocumentSummary'][0] metadata_XML = document['Meta'].encode('utf-8') metadata = ET.fromstring('' + metadata_XML + ' ') for entry in Metadata[1]: print entry.text
Solution: bionode-ncbi
Better way with Bionode - 4 approaches
JavaScript
var bio = require('bionode')
//Callback pattern bio.ncbi.urls('assembly', 'Acromyrmex', function(urls) { console.log(urls[0].genomic.fna) })
//Event pattern bio.ncbi.urls('assembly', 'Acromyrmex').on('data', printGenomeURL) function printGenomeURL(url) { console.log(url.genomic.fna) }
//Pipe pattern var tool = require('tool-stream') bio.ncbi.urls('assembly', 'Acromyrmex') .pipe(tool.extractProperty('genomic.fna')) .pipe(process.stdout)
BASH
bionode ncbi urls assembly Acromyrmex | tool-stream extractProperty genomic.fna
Complex pipelines with forks
ncbi .search('sra', 'Solenopsis invicta') .pipe(fork1) .pipe(dat.reads) fork1 .pipe(tool.extractProperty('expxml.Biosample.id')) .pipe(ncbi.search('biosample')) .pipe(dat.samples) fork1 .pipe(tool.extractProperty('uid')) .pipe(ncbi.link('sra', 'pubmed')) .pipe(ncbi.search('pubmed')) .pipe(fork2) .pipe(dat.papers)
# Mac
brew install n
n stable
# Ubuntu
sudo apt-get install npm
npm install -g n
n stable
# Windows
Go to http://nodejs.org
npm install -g bionode-ncbi bionode-fasta json
bionode-ncbi search genome spiders bionode-ncbi search genome spiders | wc bionode-ncbi search genome spiders | head -n 1 | json bionode-ncbi search genome spiders | json -ga organism_name
bionode-ncbi search genome spiders | \ json -ga uid | \ bionode-ncbi link genome pubmed - | \ json -ga destUID | \ bionode-ncbi search pubmed - | \ json -ga title
bionode-ncbi download assembly Guillardia theta | \ json -ga -c 'this.status === "completed"' | \ json -ga path | \ bionode-fasta -f | \ json -ga -c 'this.seq.length > 10000' | \ bionode-fasta --write > gtheta-big-scaffolds.fasta
var through = require('through2')
var stream = through2.obj(transform)
function transform (obj, enc, next) {
// do things, example:
obj.name = obj.name.toUpperCase()
// Push downstream
this.push(obj)
// Callback to fetch next object
next()
}
var through = require('through2')
var stream = through2.obj(transform)
function transform (obj, enc, next) {
// do things, example:
var self = this
requestSomethingFromDB(obj.name, function(data) {
obj.data = data
self.push(obj)
next()
})
}
Bash
mkdir project
cd project
npm install bionode-ncbi through2
JavaScript
var ncbi = require('bionode-ncbi') var through = require('through2') var json = require('ndjson') var myStream = through.obj(transform) function transform (obj, enc, next) { var result = { specie: obj.organism, organisazation: obj.meta['submitter-organization'] } this.push(result) next() } ncbi.search('assembly', 'spiders') .pipe(myStream) .pipe(json.stringify()) .pipe(process.stdout)
var counter = 0 myStream .on('data', function (data) { counter++ }) .on('end', function () { console.log('Processed ' + counter) })
var counter = 0 var count = function (data) { counter++ } var log = function () { console.log('Processed ' + counter) } myStream.on('data', count).on('end', log)
Bionode
Hackday
Same code client/server side
CoffeeScript pipeline and a new format?
ncbi.search 'genome', 'rodentia' .pipe ncbi.expand 'assembly' .pipe ncbi.expand 'tax' .pipe getLineage() .pipe ncbi.link 'tax', 'sra' .pipe ncbi.expand 'sra' .pipe through.obj (obj, enc, next) -> async.map obj.sra, expandBiosample, (error, sra)=> obj.sra = sra @push obj next()
pipeline1 ncbi.search genome rodentia ncbi.expand assembly ncbi.expand tax getLineage ncbi.link tax sra ncbi.expand sra stream (obj, next) -> async.map obj.sra expandBiosample (sra) => obj.sra = sra @push obj next()
Pipelines and alternatives to Makefiles?
Name | Type | Status | People |
---|---|---|---|
ncbi | Data access | ||
fasta | Parser | ||
seq | Wrangling | IM | |
ensembl | Data access | ||
blast-parser | Parser |
Name | Type | Status | People |
---|---|---|---|
template | Documentation | ||
JS pipeline | Documentation | ||
Gasket pipeline | Documentation | ||
Dat/Bionode workshop | Documentation |
Name | Type | Status | People |
---|---|---|---|
sra | Wrappers | ||
bwa | Wrappers | ||
sam | Wrappers | ||
bbi | Parser |
Name | Type | People |
---|---|---|
ebi | Data access | |
semantic | Data access | |
vcf | Parser | |
gff | Parser | |
bowtie | Wrappers | |
sge | Wrappers | |
blast | Wrappers |
Name | Type | People |
---|---|---|
vsearch | Wrappers | |
khmer | Wrappers | |
rsem | Wrappers | |
gmap | Wrappers | |
star | Wrappers | |
go | Wrappers |