Bionode Demo

Some problems I faced during my research:

For web projects, needed to implement the same functionality on browser and server
Difficulty getting relevant descriptions and datasets from NCBI API using bio* libs
Difficulty writing scalable, reproducible and complex bioinformatic pipelines

Bionode

Bionode.io - Modular and universal bioinformatics

Pipeable UNIX command line tools and JavaScript / Node.js APIs for bioinformatic analysis workflows on the server and browser. #bionode
gitter.im/bionode/bionode

Collaborations and Reusability

Dat-data.com

BioJS.net

Difficulty getting relevant description and datasets from NCBI API using bio* libs

Python example: URL for the Acromyrmex assembly?

ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA_000188075.1_Si_gnG

import xml.etree.ElementTree as ET
from Bio import Entrez
Entrez.email = "mail@bmpvieira.com"
esearch_handle = Entrez.esearch(db="assembly", term="Acromyrmex")
esearch_record = Entrez.read(esearch_handle)
for id in esearch_record['IdList']:
  esummary_handle = Entrez.esummary(db="assembly", id=id)
  esummary_record = Entrez.read(esummary_handle)
  documentSummarySet = esummary_record['DocumentSummarySet']
  document = documentSummarySet['DocumentSummary'][0]
  metadata_XML = document['Meta'].encode('utf-8')
  metadata = ET.fromstring('' + metadata_XML + '')
  for entry in Metadata[1]:
    print entry.text

Solution: bionode-ncbi

Better way with Bionode - 4 approaches

JavaScript

var bio = require('bionode')

//Callback pattern
bio.ncbi.urls('assembly', 'Acromyrmex', function(urls) {
  console.log(urls[0].genomic.fna)
})

//Event pattern
bio.ncbi.urls('assembly', 'Acromyrmex').on('data', printGenomeURL)
function printGenomeURL(url) {
  console.log(url.genomic.fna)
}

//Pipe pattern
var tool = require('tool-stream')
bio.ncbi.urls('assembly', 'Acromyrmex')
.pipe(tool.extractProperty('genomic.fna'))
.pipe(process.stdout)

BASH

bionode ncbi urls assembly Acromyrmex | tool-stream extractProperty genomic.fna

Complex pipelines with forks

ncbi
.search('sra', 'Solenopsis invicta')
.pipe(fork1)
.pipe(dat.reads)

fork1
.pipe(tool.extractProperty('expxml.Biosample.id'))
.pipe(ncbi.search('biosample'))
.pipe(dat.samples)

fork1
.pipe(tool.extractProperty('uid'))
.pipe(ncbi.link('sra', 'pubmed'))
.pipe(ncbi.search('pubmed'))
.pipe(fork2)
.pipe(dat.papers)

streams

noflo

Better pipeline representation format? Search GitHub for

Gasket
Datscript
Hackfile

Install Node.js and Bionode

 # Mac
  brew install n
  n stable

 # Ubuntu
  sudo apt-get install npm
  npm install -g n
  n stable

 # Windows
  Go to http://nodejs.org

Online

try.bionode.io
bit.ly/try-dat

Install bionode and json parser

npm install -g bionode-ncbi bionode-fasta json

 bionode-ncbi search genome spiders
 bionode-ncbi search genome spiders | wc
 bionode-ncbi search genome spiders | head -n 1 | json
 bionode-ncbi search genome spiders | json -ga organism_name

  bionode-ncbi search genome spiders | \
    json -ga uid | \
      bionode-ncbi link genome pubmed - | \
        json -ga destUID | \
          bionode-ncbi search pubmed - | \
            json -ga title

  bionode-ncbi download assembly Guillardia theta | \
    json -ga -c 'this.status === "completed"' | \
      json -ga path | \
        bionode-fasta -f | \
          json -ga -c 'this.seq.length > 10000' | \
            bionode-fasta --write > gtheta-big-scaffolds.fasta

How to write a Stream?

 var through = require('through2')
  var stream = through2.obj(transform)
  function transform (obj, enc, next) {
    // do things, example:
    obj.name = obj.name.toUpperCase()
    // Push downstream
    this.push(obj)
    // Callback to fetch next object
    next()
  }

How to write a Stream?

var through = require('through2')
 var stream = through2.obj(transform)
 function transform (obj, enc, next) {
   // do things, example:
   var self = this
   requestSomethingFromDB(obj.name, function(data) {
     obj.data = data
     self.push(obj)
     next()
   })
 }

Bash

 mkdir project
  cd project
  npm install bionode-ncbi through2

JavaScript

  var ncbi = require('bionode-ncbi')
  var through = require('through2')
  var json = require('ndjson')

  var myStream = through.obj(transform)
  function transform (obj, enc, next) {
    var result = {
      specie: obj.organism,
      organisazation: obj.meta['submitter-organization']
    }
    this.push(result)
    next()
  }

  ncbi.search('assembly', 'spiders')
  .pipe(myStream)
  .pipe(json.stringify())
  .pipe(process.stdout)

JavaScript Events and Styles

  var counter = 0
  myStream
  .on('data', function (data) {
    counter++
  })
  .on('end', function () {
    console.log('Processed ' + counter)
  })

  var counter = 0

  var count = function (data) {
    counter++
  }

  var log = function () {
    console.log('Processed ' + counter)
  }

  myStream.on('data', count).on('end', log)

Bionode

Hackday

bit.ly/biocw15

Extra slides

Bionode - Why wrappers?

Same interface between modules (Streams and NDJSON)
Easy installation with NPM
Semantic versioning
Add tests
Abstract complexity / More user friendly

Bionode - Why Node.js?

Same code client/server side

client-server

Bionode - Why Node.js?

Reusable, small and tested modules

badges

CoffeeScript pipeline and a new format?

ncbi.search 'genome', 'rodentia'
.pipe ncbi.expand 'assembly'
.pipe ncbi.expand 'tax'
.pipe getLineage()
.pipe ncbi.link 'tax', 'sra'
.pipe ncbi.expand 'sra'
.pipe through.obj (obj, enc, next) ->
  async.map obj.sra, expandBiosample, (error, sra)=>
    obj.sra = sra
    @push obj
    next()

pipeline1
  ncbi.search genome rodentia
  ncbi.expand assembly
  ncbi.expand tax
  getLineage
  ncbi.link tax sra
  ncbi.expand sra
  stream (obj, next) ->
    async.map obj.sra expandBiosample (sra) =>
      obj.sra = sra
      @push obj
      next()

Pipelines and alternatives to Makefiles?

Bionode - list of modules

Name	Type	People
ncbi	Data access
fasta	Parser
seq	Wrangling	IM
ensembl	Data access
blast-parser	Parser

Bionode - list of modules

Name	Type	Status	People
template	Documentation
JS pipeline	Documentation
Gasket pipeline	Documentation
Dat/Bionode workshop	Documentation

Bionode - list of modules

Name	Type	Status	People
sra	Wrappers
bwa	Wrappers
sam	Wrappers
bbi	Parser

Bionode - list of modules

Name	Type	People
ebi	Data access
semantic	Data access
vcf	Parser
gff	Parser
bowtie	Wrappers
sge	Wrappers
blast	Wrappers

Bionode - list of modules

Name	Type	People
vsearch	Wrappers
khmer	Wrappers
rsem	Wrappers
gmap	Wrappers
star	Wrappers
go	Wrappers