Plots with matplotlib

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("darkgrid")

# make graphics sharper on a good screen
from matplotlib_inline.backend_inline import set_matplotlib_formats
set_matplotlib_formats('retina', 'png')
plt.scatter(penguins.bill_length_mm, penguins.flipper_length_mm) ;  # semi colon makes last value None

sns.set_style("ticks")
# sns.set_style("darkgrid")
# sns.set_style("whitegrid")
# sns.set_style("white")
# sns.set_style("dark")

plt.scatter(penguins.bill_length_mm, penguins.flipper_length_mm)
sns.despine()

plt.hist(penguins.bill_length_mm) ;

Better plotting with seaborn (on top of matplotlib)

sns.scatterplot(data=penguins, x="bill_length_mm", y="flipper_length_mm") ;

sns.scatterplot(data=penguins, x="bill_length_mm", y="flipper_length_mm", hue="species") ;

sns.scatterplot(data=penguins, x="bill_length_mm", y="flipper_length_mm", hue="species", style="sex") ;

sns.scatterplot(data=penguins, x="bill_length_mm", y="flipper_length_mm", hue="species", 
                style="sex", size="body_mass_g") ;

def legend_outside():
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0)
sns.scatterplot(data=penguins, x="bill_length_mm", y="flipper_length_mm", hue="species", style="sex", size="body_mass_g") ;

legend_outside()

plt.title("Penguin measurements")
plt.ylabel("flipper length (mm)")
plt.xlabel("bill length in (mm)") ;

Wide or long data

Wide format data:

penguins[['bill_length_mm', 'bill_depth_mm']]
bill_length_mm bill_depth_mm
0 39.1 18.7
1 39.5 17.4
2 40.3 18.0
3 NaN NaN
4 36.7 19.3
... ... ...
339 NaN NaN
340 46.8 14.3
341 50.4 15.7
342 45.2 14.8
343 49.9 16.1

344 rows × 2 columns

Long format data:

long_df = penguins.melt(value_vars=['bill_length_mm', 'bill_depth_mm'])
long_df
variable value
0 bill_length_mm 39.1
1 bill_length_mm 39.5
2 bill_length_mm 40.3
3 bill_length_mm NaN
4 bill_length_mm 36.7
... ... ...
683 bill_depth_mm NaN
684 bill_depth_mm 14.3
685 bill_depth_mm 15.7
686 bill_depth_mm 14.8
687 bill_depth_mm 16.1

688 rows × 2 columns

long format is required when you want a number of columns to appear as a “variable” in the plot. As in the example below where the color “variable” reflects whether the point is bill_depth_mm or bill_length_mm.

Retain other information for each observation:

long_df = penguins.melt(id_vars=['species', 'body_mass_g', 'island'], value_vars=['bill_length_mm', 'bill_depth_mm'])
long_df
species body_mass_g island variable value
0 Adelie 3750.0 Torgersen bill_length_mm 39.1
1 Adelie 3800.0 Torgersen bill_length_mm 39.5
2 Adelie 3250.0 Torgersen bill_length_mm 40.3
3 Adelie NaN Torgersen bill_length_mm NaN
4 Adelie 3450.0 Torgersen bill_length_mm 36.7
... ... ... ... ... ...
683 Gentoo NaN Biscoe bill_depth_mm NaN
684 Gentoo 4850.0 Biscoe bill_depth_mm 14.3
685 Gentoo 5750.0 Biscoe bill_depth_mm 15.7
686 Gentoo 5200.0 Biscoe bill_depth_mm 14.8
687 Gentoo 5400.0 Biscoe bill_depth_mm 16.1

688 rows × 5 columns

sns.scatterplot(data=long_df, x='body_mass_g', y='value', hue='variable', style='species')
legend_outside()

sns.boxplot(data=long_df, x='species', y='value', hue='variable') ;

sns.boxplot(data=long_df, x='variable', y='value', hue='species') ;

sns.boxplot(data=long_df, x='species', y='value', hue='variable') ;

Plots with multiple facets

g = sns.FacetGrid(penguins, col="island")
g ;

Map plotting to each facet:

g = sns.FacetGrid(penguins, col="island", hue="species") ;
g.map(sns.scatterplot, "bill_length_mm", "flipper_length_mm") ;

Grid of facets representing combinations of two variables:

g = sns.FacetGrid(penguins, row="sex", col="island", hue="species") ;
g.map(sns.scatterplot, "bill_length_mm", "flipper_length_mm") ;

g = sns.FacetGrid(penguins, row="sex", col="island", hue="species") ;
g.map(sns.regplot, "bill_length_mm", "flipper_length_mm") ;

sns.lmplot(data=penguins, x="bill_length_mm", y="flipper_length_mm", row="sex", col="island", hue="species", height=3) ;

FacetGrid.map vs. FacetGrid.map_dataframe

When you use FacetGrid.map(func, "col1", "col2", ...), the function func is passed the values of the columns "col1" and "col2" (and more if needed) as parameters 1 and 2 (args[0], args[1], …). In addition, the function always receives a keyword argument named color=.

def scatter(*args, **kwargs):
    return plt.scatter(args[0], args[1], **kwargs)
    
g = sns.FacetGrid(penguins, row="sex", col="island", hue="species") ;
g.map(scatter, "bill_length_mm", "flipper_length_mm") ;

When you use FacetGrid.map_dataframe(func, "col1", "col2", ...), the function func is passed the names "col1" and "col2" (and more if needed) as parameters 1 and 2 (args[0], args[1], …), and the filtered dataframe as keyword argument data=. In addition, the function always receives a keyword argument named color=.

def scatterplot(*args, **kwargs):
    return sns.scatterplot(x=args[0], y=args[1], **kwargs)

g = sns.FacetGrid(penguins, row="sex", col="island", hue="species") ;
g.map_dataframe(scatterplot, "bill_length_mm", "flipper_length_mm") ;

g = sns.FacetGrid(penguins, row="sex", col="island", hue="species") ;
g.map(sns.histplot, "bill_length_mm") ;

g = sns.FacetGrid(penguins, row="sex", col="island", hue="species") ;
g.map(sns.kdeplot, "bill_length_mm") ;

sns.pairplot(penguins, hue="species") ;

sns.pairplot(penguins, hue="sex") ;