""".. rubric:: Scatter plots
:author: Thomas Cokelaer
"""
from pylab import scatter, hist, axes, clf
import pylab
import pandas as pd
from biokit.viz.core import VizInput2D
__all__ = ["ScatterHist"]
[docs]class ScatterHist(VizInput2D):
"""Scatter plots and histograms
"""
def __init__(self, x, y=None, verbose=True):
""".. rubric:: constructor
:param x: if x is provided, it should be a dataframe with 2 columns. The
first one will be used as your X data, and the second one as
the Y data
:param y:
:param verbose:
"""
super(ScatterHist, self).__init__(x,y,verbose)
[docs] def plot(self,
kargs_scatter={'s':20, 'c':'b'},
kargs_grids={},
kargs_histx={},
kargs_histy={},
scatter_position='bottom left',
width=.5,
height=.5,
offset_x=.10,
offset_y=.10,
gap=0.06,
facecolor='lightgrey',
grid=True,
show_labels=True,
**kargs):
"""Scatter plot of set of 2 vectors and their histograms.
:param x: a dataframe or a numpy matrix (2 vectors) or a list of 2 items,
which can be a mix of list or numpy array.
if **size** and/or **color** are found in the columns dataframe,
those columns will be used in the scatter plot. kargs_scatter keys **c**
and **s** will then be ignored. If a list of lists, **x** will be the first row
and **y** the second row.
:param y: if x is a list or an array, then y must also be provided as
a list or an array
:param kargs_scatter: a dictionary with pairs of key/value accepted by
matplotlib.scatter function. Examples is a list of colors or a list
of sizes as shown in the examples below.
:param kargs_grid: a dictionary with pairs of key/value accepted by
the maplotlib.grid (applied on histogram and axis at the same time)
:param kargs_histx: a dictionary with pairs of key/value accepted by the
matplotlib.histogram
:param kargs_histy: a dictionary with pairs of key/value accepted by the
matplotlib.histogram
:param kargs: other optional parameters are **hold**, **facecolor**.
:param scatter_position: can be 'bottom right/bottom left/top left/top right'
:param width: width of the scatter plot (value between 0 and 1)
:param height: height of the scatter plot (value between 0 and 1)
:param offset_x:
:param offset_y:
:param gap: gap between the scatter and histogram plots.
:param grid: defaults to True
:return: the scatter, histogram1 and histogram2 axes.
.. plot::
:include-source:
:width: 50%
import pylab
import pandas as pd
X = pylab.randn(1000)
Y = pylab.randn(1000)
df = pd.DataFrame({'X':X, 'Y':Y})
from biokit.viz import ScatterHist
ScatterHist(df).plot()
.. plot::
:include-source:
:width: 50%
from biokit.viz import ScatterHist
ScatterHist(x=[1,2,3,4], y=[3,5,6,4]).plot(
kargs_scatter={
's':[200,400,600,800],
'c': ['red', 'green', 'blue', 'yellow'],
'alpha':0.5},
kargs_histx={'color': 'red'},
kargs_histy={'color': 'green'})
.. seealso:: `notebook <http://nbviewer.ipython.org/github/biokit/biokit/blob/master/notebooks/viz/biokit.viz examples.ipynb>`_
"""
df = self.df
try:
kargs_scatter['s'] = df['size']
except:
pass
try:
kargs_scatter['c'] = df['color']
except:
pass
if kargs.get("hold", False) is False:
pylab.clf()
W = width
H = height
if scatter_position == 'bottom left':
X0 = offset_x
Y0 = offset_y
Xoff = X0 + W + gap
Yoff = Y0 + H + gap
Wh = 1 - offset_x*2 - W - gap
Hh = 1 - offset_y*2 - H - gap
elif scatter_position == 'bottom right':
Wh = 1 - offset_x*2 - W - gap
Hh = 1 - offset_y*2 - H - gap
X0 = offset_x + Wh +gap
Y0 = offset_y
Xoff = offset_x
Yoff = Y0 + H + gap
elif scatter_position == 'top right':
Wh = 1 - offset_x*2 - W - gap
Hh = 1 - offset_y*2 - H - gap
X0 = offset_x + Wh +gap
Y0 = offset_y + Hh + gap
Xoff = offset_x
Yoff = offset_y
elif scatter_position == 'top left':
Wh = 1 - offset_x*2 - W - gap
Hh = 1 - offset_y*2 - H - gap
X0 = offset_x
Y0 = offset_y + Hh + gap
Xoff = offset_x + W + gap
Yoff = offset_y #Y0 #+ H + gap
else:
raise ValueError("scatter_position must be 'top left', 'top right', 'bottom left', 'bottom right'")
facecolor = kargs.get('facecolor', 'lightgrey')
ax_scatter = axes((X0, Y0, W, H), facecolor=facecolor, xscale='linear',
yscale='linear')#, xticks='auto', yticks='auto')
if show_labels:
ax_scatter.set_xlabel(self.xy_names[0])
ax_scatter.set_ylabel(self.xy_names[1])
ax_hist_x = axes((X0, Yoff, W, Hh), facecolor=facecolor, xscale='linear',
yscale='linear')#, xticks='auto', yticks='auto')
ax_hist_y = axes((Xoff, Y0, Wh, H), facecolor=facecolor, xscale='linear',
yscale='linear')#, xticks='auto', yticks='auto')
# move ticks on axis if needed
ax_hist_x.xaxis.set_ticks_position('top')
if scatter_position == 'bottom left':
ax_scatter.yaxis.set_ticks_position('left')
ax_hist_x.yaxis.set_ticks_position('right')
elif scatter_position == 'bottom right':
ax_hist_y.yaxis.set_ticks_position('left')
elif scatter_position == 'top right':
ax_scatter.xaxis.set_ticks_position('top')
ax_scatter.yaxis.set_ticks_position('right')
ax_hist_y.yaxis.set_ticks_position('left')
ax_hist_x.xaxis.set_ticks_position('bottom')
elif scatter_position == 'top left':
ax_scatter.xaxis.set_ticks_position('top')
ax_hist_y.yaxis.set_ticks_position('right')
ax_hist_x.xaxis.set_ticks_position('bottom')
else:
raise ValueError("scatter_position must be 'top left', 'top right', 'bottom left', 'bottom right'")
ax_scatter.scatter(df.x, df.y, **kargs_scatter)
ax_hist_x.hist(df.x, **kargs_histx)
# fixme: user may not want that ?
kargs_histy['orientation'] = 'horizontal'
ax_hist_y.hist(df.y, **kargs_histy)
# I tried c.set_xticks but rotation could not be found
pylab.xticks(ax_hist_y.get_xticks(), rotation=90)
# grid
if grid is True:
ax_scatter.grid(b=grid, which='major', axis='both', **kargs_grids)
ax_hist_x.grid(b=grid, which='major', axis='both', **kargs_grids)
ax_hist_y.grid(b=grid, which='major', axis='both', **kargs_grids)
return (ax_scatter, ax_hist_x, ax_hist_y)