Source code for biokit.viz.heatmap

"""Heatmap and dendograms"""
import matplotlib
import pylab
import scipy.cluster.hierarchy as hierarchy
import scipy.spatial.distance as distance
import numpy as np # get rid of this dependence

import easydev
import colormap
from biokit.viz.linkage import Linkage

__all__ = ['Heatmap']


def get_heatmap_df():
    """a simple example to play with and perform test"""
    import pandas as pd
    df = pd.DataFrame(
            {'A':[1,0,1,1],
             'B':[.9,0.1,.6,1],
             'C':[.5,.2,0,1],
             'D':[.5,.2,0,1]})
    return df


#def heatmap(data, *args, **kargs):
#    """alias to Heatmap class"""
#    h = Heatmap(data, *args, **kargs)
#    h.plot()
#    return h


[docs]class Heatmap(Linkage): """Heatmap and dendograms of an input matrix A heat map is an image representation of a matrix with a dendrogram added to the left side and to the top. Typically, reordering of the rows and columns according to some set of values (row or column means) within the restrictions imposed by the dendrogram is carried out. .. plot:: :include-source: :width: 50% from biokit.viz import heatmap df = heatmap.get_heatmap_df() h = heatmap.Heatmap(df) h.plot() .. warning:: in progress """ def __init__(self, data=None, row_method='complete', column_method='complete', row_metric='euclidean',column_metric='euclidean', cmap='yellow_black_blue', col_side_colors=None, row_side_colors=None, verbose=True ): """.. rubric:: constructor :param data: a dataframe or possibly a numpy matrix. .. todo:: if row_method id none, no ordering in the dendogram """ # should be a copy since it may be reshuffled ? try: if data is None and verbose is True: print("No data provided, please fill the `df` attribute manually") else: self._df = data.copy() except AttributeError as err: print("input must be a pandas data frame or numpy matrix") raise(err) self._row_method = row_method self._column_method = column_method self._column_metric = column_metric self._row_metric = row_metric # some default parameters self.cluster_criterion = 'distance' self.params = easydev.AttrDict() self.params.col_side_colors = ['r', 'g', 'b', 'y', 'w', 'k', 'm'] self.params.row_side_colors = ['r', 'g', 'b', 'y', 'w', 'k', 'm'] self.params.cmap = cmap self.category_row = None self.category_column = None if col_side_colors: self.params.col_side_colors = col_side_colors if row_side_colors: self.params.row_side_colors = row_side_colors def _get_df(self): return self._df def _set_df(self, data): self._df = data.copy() df = property(_get_df, _set_df) frame = property(_get_df, _set_df) def _get_row_method(self): return self._row_method def _set_row_method(self, value): self.check_method(value) self._row_method = value row_method = property(_get_row_method, _set_row_method) def _get_col_method(self): return self._column_method def _set_col_method(self, value): self.check_method(value) self._column_method = value column_method = property(_get_col_method, _set_col_method) def _get_col_metric(self): return self._column_metric def _set_col_metric(self, value): self.check_metric(value) self._column_metric = value column_metric = property(_get_col_metric, _set_col_metric) def _get_row_metric(self): return self._row_metric def _set_row_metric(self, value): self.check_metric(value) self._row_metric = value row_metric = property(_get_row_metric, _set_row_metric)
[docs] def plot(self, num=1, cmap=None, colorbar=True, vmin=None, vmax=None, colorbar_position='right', gradient_span='None' ): """ :param gradient_span: None is default in R Using:: df = pd.DataFrame({'A':[1,0,1,1], 'B':[.9,0.1,.6,1], 'C':[.5,.2,0,1], 'D':[.5,.2,0,1]}) and :: h = Heatmap(df) h.plot(vmin=0, vmax=1.1) we seem to get the same as in R wiht :: df = data.frame(A=c(1,0,1,1), B=c(.9,.1,.6,1), C=c(.5,.2,0,1), D=c(.5,.2,0,1)) heatmap((as.matrix(df)), scale='none') .. todo:: right now, the order of cols and rows is random somehow. could be ordered like in heatmap (r) byt mean of the row and col or with a set of vector for col and rows. heatmap((as.matrix(df)), Rowv=c(3,2), Colv=c(1), scale='none') gives same as:: df = get_heatmap_df() h = heatmap.Heatmap(df) h.plot(vmin=-0, vmax=1.1) """ # save all parameters in a dict layout = {} if cmap is None: cmap = self.params.cmap try:cmap = colormap.cmap_builder(cmap) except:pass # keep track of row and column names for later. row_header = self.frame.index column_header = self.frame.columns # FIXME something clever for the fontsize if len(row_header) > 100 or len(column_header) > 100: matplotlib.rcParams['font.size'] = 6 if len(row_header) > 50 or len(column_header) > 50: matplotlib.rcParams['font.size'] = 7 else: matplotlib.rcParams['font.size'] = 12 # scaling min/max range self.gradient_span = gradient_span #'only_max' # min_to_max, min_to_max_centered, only_max, only_min if self.gradient_span == 'min_to_max_centered': vmax = max([vmax, abs(vmin)]) vmin = vmax * -1 if self.gradient_span == 'only_max': vmin = 0 vmax = self.frame.max().max() if self.gradient_span == 'only_min': vmin = self.frame.min().min() vmax = 0 norm = matplotlib.colors.Normalize(vmin, vmax) # Scale the figure window size # fig = pylab.figure(num=num, figsize=(12, 8)) fig.clf() # LAYOUT -------------------------------------------------- # ax1 (dendrogram 1) on the left of the heatmap [ax1_x, ax1_y, ax1_w, ax1_h] = [0.05, 0.22, 0.2, 0.6] width_between_ax1_axr = 0.004 # distance between the top color bar axis and the matrix height_between_ax1_axc = 0.004 # Sufficient size to show color_bar_w = 0.015 # axr, placement of row side colorbar # second to last controls the width of the side color bar - 0.015 when showing [axr_x, axr_y, axr_w, axr_h] = [0.31, 0.1, color_bar_w, 0.6] axr_x = ax1_x + ax1_w + width_between_ax1_axr axr_y = ax1_y; axr_h = ax1_h width_between_axr_axm = 0.004 # axc, placement of column side colorbar # # last one controls the hight of the top color bar - 0.015 when showing [axc_x, axc_y, axc_w, axc_h] = [0.4, 0.63, 0.5, color_bar_w] axc_x = axr_x + axr_w + width_between_axr_axm axc_y = ax1_y + ax1_h + height_between_ax1_axc height_between_axc_ax2 = 0.004 # axm, placement of heatmap for the data matrix # why larger than 1? [axm_x, axm_y, axm_w, axm_h] = [0.4, 0.9, 2.5, 0.5] axm_x = axr_x + axr_w + width_between_axr_axm axm_y = ax1_y; axm_h = ax1_h axm_w = axc_w # ax2 (dendrogram 2), on the top of the heatmap # [ax2_x, ax2_y, ax2_w, ax2_h] = [0.3, 0.72, 0.6, 0.15] ax2_x = axr_x + axr_w + width_between_axr_axm ax2_y = ax1_y + ax1_h + height_between_ax1_axc + axc_h + height_between_axc_ax2 ax2_w = axc_w # axcb - placement of the color legend # if colorbar_position == 'top left': [axcb_x, axcb_y, axcb_w, axcb_h] = [0.07, 0.88, 0.18, 0.09] elif colorbar_position == 'right': [axcb_x, axcb_y, axcb_w, axcb_h] = [0.85, 0.2, 0.08, 0.6] else: raise ValueError("'top left' or 'right' accepted for now") # COMPUTATION DENDOGRAM 1 ------------------------------------- if self.column_method: Y = self.linkage(self.frame.transpose(),self.column_method, self.column_metric ) ax2 = fig.add_axes([ax2_x, ax2_y, ax2_w, ax2_h], frame_on=True) Z = hierarchy.dendrogram(Y) ind2 = hierarchy.fcluster(Y, 0.7*max(Y[:,2]), self.cluster_criterion) ax2.set_xticks([]) ax2.set_yticks([]) # apply the clustering for the array-dendrograms to the actual matrix data idx2 = Z['leaves'] self.frame = self.frame.iloc[:,idx2] # reorder the flat cluster to match the order of the leaves the dendrogram ind2 = ind2[idx2] layout['dendogram2'] = ax2 else: idx2 = range(self.frame.shape[1]) # COMPUTATION DENDOGRAM 2 --------------------------------- if self.row_method: Y = self.linkage(self.frame, self.row_method, self.row_metric ) ax1 = fig.add_axes([ax1_x, ax1_y, ax1_w, ax1_h], frame_on=True) Z = hierarchy.dendrogram(Y, orientation='right') ind1 = hierarchy.fcluster(Y, 0.7*max(Y[:,2]), self.cluster_criterion) ax1.set_xticks([]) ax1.set_yticks([]) # apply the clustering for the array-dendrograms to the actual matrix data idx1 = Z['leaves'] self.frame = self.frame.iloc[idx1,:] # reorder the flat cluster to match the order of the leaves the dendrogram ind1 = ind1[idx1] layout['dendogram1'] = ax1 else: idx1 = range(self.frame.shape[0]) # HEATMAP itself axm = fig.add_axes([axm_x, axm_y, axm_w, axm_h]) axm.imshow(self.frame, aspect='auto', origin='lower', interpolation='None', cmap=cmap, norm=norm) axm.set_xticks([]) axm.set_yticks([]) layout['heatmap'] = axm # TEXT new_row_header = [] new_column_header = [] for i in range(self.frame.shape[0]): axm.text(self.frame.shape[1]-0.5, i, ' ' + str(row_header[idx1[i]]), verticalalignment="center") new_row_header.append(row_header[idx1[i]] if self.row_method else row_header[i]) for i in range(self.frame.shape[1]): axm.text(i, -0.9, ' '+str(column_header[idx2[i]]), rotation=90, verticalalignment="top", horizontalalignment="center") new_column_header.append(column_header[idx2[i]] if self.column_method else column_header[i]) # CATEGORY column ------------------------------ if self.category_column: axc = fig.add_axes([axc_x, axc_y, axc_w, axc_h]) cmap_c = matplotlib.colors.ListedColormap(self.params.col_side_colors) category_col = [self.category_column[self.df.columns[i]] for i in idx2] dc = np.array(category_col, dtype=int) dc.shape = (1,len(ind2)) axc.matshow(dc, aspect='auto', origin='lower', cmap=cmap_c) axc.set_xticks([]) axc.set_yticks([]) layout['category_column'] = axc # CATEGORY row ------------------------------- if self.category_row: axr = fig.add_axes([axr_x, axr_y, axr_w, axr_h]) # self.category_row must be a dictionary with names as found in the columns # of the dataframe. category_row = [self.category_row[self.df.columns[i]] for i in idx1] dr = np.array(category_row, dtype=int) dr.shape = (len(category_row),1) cmap_r = matplotlib.colors.ListedColormap(self.params.col_side_colors) axr.matshow(dr, aspect='auto', origin='lower', cmap=cmap_r) axr.set_xticks([]) axr.set_yticks([]) layout['category_row'] = axr # COLORBAR ---------------------- if colorbar == True: axcb = fig.add_axes([axcb_x, axcb_y, axcb_w, axcb_h], frame_on=False) if colorbar_position == 'right': orientation = 'vertical' else: orientation = 'horizontal' cb = matplotlib.colorbar.ColorbarBase(axcb, cmap=cmap, norm=norm, orientation=orientation) #axcb.set_title("whatever") #max_cb_ticks = 5 #axcb.xaxis.set_major_locator(matplotlib.ticker.MaxNLocator(max_cb_ticks)) layout['colorbar'] = cb # could be useful self.d = {'ordered': self.frame.copy(), 'rorder': idx1, 'corder': idx2} return layout