import matplotlib.pyplot as plt
import seaborn as sns
# make graphics sharper on a good screen
from matplotlib_inline.backend_inline import set_matplotlib_formats
'retina', 'png') set_matplotlib_formats(
Plots with matplotlib
; # semi colon makes last value None plt.scatter(penguins.bill_length_mm, penguins.flipper_length_mm)
sns.set_style(# sns.set_style("darkgrid")
# sns.set_style("whitegrid")
# sns.set_style("white")
# sns.set_style("dark")
plt.scatter(penguins.bill_length_mm, penguins.flipper_length_mm) sns.despine()
; plt.hist(penguins.bill_length_mm)
Better plotting with seaborn (on top of matplotlib)
=penguins, x="bill_length_mm", y="flipper_length_mm") ; sns.scatterplot(data
=penguins, x="bill_length_mm", y="flipper_length_mm", hue="species") ; sns.scatterplot(data
=penguins, x="bill_length_mm", y="flipper_length_mm", hue="species", style="sex") ; sns.scatterplot(data
=penguins, x="bill_length_mm", y="flipper_length_mm", hue="species",
sns.scatterplot(data="sex", size="body_mass_g") ; style
def legend_outside():
=(1.05, 1), loc='upper left', borderaxespad=0) plt.legend(bbox_to_anchor
=penguins, x="bill_length_mm", y="flipper_length_mm", hue="species", style="sex", size="body_mass_g") ;
"Penguin measurements")
plt.title("flipper length (mm)")
plt.ylabel("bill length in (mm)") ; plt.xlabel(
Wide or long data
Wide format data:
'bill_length_mm', 'bill_depth_mm']] penguins[[
bill_length_mm | bill_depth_mm | |
0 | 39.1 | 18.7 |
1 | 39.5 | 17.4 |
2 | 40.3 | 18.0 |
3 | NaN | NaN |
4 | 36.7 | 19.3 |
... | ... | ... |
339 | NaN | NaN |
340 | 46.8 | 14.3 |
341 | 50.4 | 15.7 |
342 | 45.2 | 14.8 |
343 | 49.9 | 16.1 |
344 rows × 2 columns
Long format data:
= penguins.melt(value_vars=['bill_length_mm', 'bill_depth_mm'])
long_df long_df
variable | value | |
0 | bill_length_mm | 39.1 |
1 | bill_length_mm | 39.5 |
2 | bill_length_mm | 40.3 |
3 | bill_length_mm | NaN |
4 | bill_length_mm | 36.7 |
... | ... | ... |
683 | bill_depth_mm | NaN |
684 | bill_depth_mm | 14.3 |
685 | bill_depth_mm | 15.7 |
686 | bill_depth_mm | 14.8 |
687 | bill_depth_mm | 16.1 |
688 rows × 2 columns
long format is required when you want a number of columns to appear as a “variable” in the plot. As in the example below where the color “variable” reflects whether the point is bill_depth_mm
or bill_length_mm
Retain other information for each observation:
= penguins.melt(id_vars=['species', 'body_mass_g', 'island'], value_vars=['bill_length_mm', 'bill_depth_mm'])
long_df long_df
species | body_mass_g | island | variable | value | |
0 | Adelie | 3750.0 | Torgersen | bill_length_mm | 39.1 |
1 | Adelie | 3800.0 | Torgersen | bill_length_mm | 39.5 |
2 | Adelie | 3250.0 | Torgersen | bill_length_mm | 40.3 |
3 | Adelie | NaN | Torgersen | bill_length_mm | NaN |
4 | Adelie | 3450.0 | Torgersen | bill_length_mm | 36.7 |
... | ... | ... | ... | ... | ... |
683 | Gentoo | NaN | Biscoe | bill_depth_mm | NaN |
684 | Gentoo | 4850.0 | Biscoe | bill_depth_mm | 14.3 |
685 | Gentoo | 5750.0 | Biscoe | bill_depth_mm | 15.7 |
686 | Gentoo | 5200.0 | Biscoe | bill_depth_mm | 14.8 |
687 | Gentoo | 5400.0 | Biscoe | bill_depth_mm | 16.1 |
688 rows × 5 columns
=long_df, x='body_mass_g', y='value', hue='variable', style='species')
sns.scatterplot(data legend_outside()
=long_df, x='species', y='value', hue='variable') ; sns.boxplot(data
=long_df, x='variable', y='value', hue='species') ; sns.boxplot(data
=long_df, x='species', y='value', hue='variable') ; sns.boxplot(data
Plots with multiple facets
= sns.FacetGrid(penguins, col="island")
g ; g
Map plotting to each facet:
= sns.FacetGrid(penguins, col="island", hue="species") ;
g map(sns.scatterplot, "bill_length_mm", "flipper_length_mm") ; g.
Grid of facets representing combinations of two variables:
= sns.FacetGrid(penguins, row="sex", col="island", hue="species") ;
g map(sns.scatterplot, "bill_length_mm", "flipper_length_mm") ; g.
= sns.FacetGrid(penguins, row="sex", col="island", hue="species") ;
g map(sns.regplot, "bill_length_mm", "flipper_length_mm") ; g.
=penguins, x="bill_length_mm", y="flipper_length_mm", row="sex", col="island", hue="species", height=3) ; sns.lmplot(data vs. FacetGrid.map_dataframe
When you use, "col1", "col2", ...)
, the function func
is passed the values of the columns "col1"
and "col2"
(and more if needed) as parameters 1 and 2 (args[0]
, args[1]
, …). In addition, the function always receives a keyword argument named color=
def scatter(*args, **kwargs):
return plt.scatter(args[0], args[1], **kwargs)
= sns.FacetGrid(penguins, row="sex", col="island", hue="species") ;
g map(scatter, "bill_length_mm", "flipper_length_mm") ; g.
When you use FacetGrid.map_dataframe(func, "col1", "col2", ...)
, the function func
is passed the names "col1"
and "col2"
(and more if needed) as parameters 1 and 2 (args[0]
, args[1]
, …), and the filtered dataframe as keyword argument data=
. In addition, the function always receives a keyword argument named color=
def scatterplot(*args, **kwargs):
return sns.scatterplot(x=args[0], y=args[1], **kwargs)
= sns.FacetGrid(penguins, row="sex", col="island", hue="species") ;
g "bill_length_mm", "flipper_length_mm") ; g.map_dataframe(scatterplot,
= sns.FacetGrid(penguins, row="sex", col="island", hue="species") ;
g map(sns.histplot, "bill_length_mm") ; g.
= sns.FacetGrid(penguins, row="sex", col="island", hue="species") ;
g map(sns.kdeplot, "bill_length_mm") ; g.
="species") ; sns.pairplot(penguins, hue
="sex") ; sns.pairplot(penguins, hue