Source code for biokit.viz.volcano

"""Volcano plot"""

import numpy as np
import pylab
import pandas as pd


__all__ = ['Volcano']


[docs]class Volcano(object): """Volcano plot In essence, just a scatter plot with annotations. .. plot:: :width: 80% :include-source: import numpy as np fc = np.random.randn(1000) pvalue = np.random.randn(1000) from biokit import Volcano v = Volcano(fc, -np.log10(pvalue**2)) v.plot(pvalue_threshold=3) """ def __init__(self, fold_changes=None, pvalues=None, color=None): """.. rubric:: constructor :param list fold_changes: 1D array or list :param list pvalues: 1D array or list :param df: a dataframe with those column names: fold_changes, pvalues, color (optional) """ # try to compute the FC now #if self.fold_change is None: # self.fold_change = pylab.log2(X1/X0) #if pvalue is None: # # assume a normal distribution mean 0 and sigma 1 # import scipy.stats # self.pvalue = - pylab.log10(scipy.stats.norm.pdf(abs(self.fold_change), 0,1)), self.fold_changes = np.array(fold_changes) self.pvalues = np.array(pvalues) assert len(self.fold_changes) == len(self.pvalues) if color is None: self.color = ['blue'] * len(self.pvalues) else: self.color = np.array(color) # TODO: check that the 3 columns have same length assert len(self.fold_changes) == len(self.color) self.df = pd.DataFrame({"fold_change": self.fold_changes, "pvalue": self.pvalues, 'color': self.color})
[docs] def plot(self, size=100, alpha=0.5, marker='o', fontsize=16, xlabel='fold change', ylabel='p-value', pvalue_threshold=1.5, fold_change_threshold=1): """ :param size: size of the markers :param alpha: transparency of the marker :param fontsize: :param xlabel: :param ylabel: :param pvalue_threshold: adds an horizontal dashed line at the threshold provided. :param fold_change_threshold: colors in grey the absolute fold changes below a given threshold. """ pylab.clf() mask1 = abs(self.fold_changes) < fold_change_threshold mask2 = abs(self.fold_changes) >= fold_change_threshold colors = self.df.color pylab.scatter(self.fold_changes[mask1], self.pvalues[mask1], s=size, alpha=alpha, c='grey', marker=marker) pylab.scatter(self.fold_changes[mask2], self.pvalues[mask2], s=size, alpha=alpha, c=colors[mask2]) pylab.grid() #pylab.ylim([0, pylab.ylim()[1]]) #M = max(abs(self.fold_change)) * 1.1 #pylab.xlim([-M, M]) pylab.xlabel(xlabel, fontsize=fontsize) pylab.ylabel(ylabel, fontsize=fontsize) pylab.axhline(pvalue_threshold, color='red', linestyle='--') pylab.axvline(fold_change_threshold, color='red', linestyle='--') pylab.axvline(-1*fold_change_threshold, color='red', linestyle='--')