Hans Rosling
Most people do not understand the current world situation
Chimpanzee test
Wrapper around MatlibPlot
import pandas as pd
sample = pd.Series([1,2,4,8,16])
sample.plot()
<AxesSubplot:>
sample_df = pd.DataFrame( [[1,2,4,8,16],[1,3,9,27,71]])
sample_df.plot()
<AxesSubplot:>
sample_df.T.plot.line()
<AxesSubplot:>
sample_df = pd.DataFrame( {'two': [1,2,4,8,16],
'three': [1,3,9,27,71]})
sample_df.plot.line()
<AxesSubplot:>
sample_df.plot()
<AxesSubplot:>
sample_df.head()
two | three | |
---|---|---|
0 | 1 | 1 |
1 | 2 | 3 |
2 | 4 | 9 |
3 | 8 | 27 |
4 | 16 | 71 |
DataFrame.plot([x, y, kind, ax, ….])
DataFrame.plot.area([x, y])
DataFrame.plot.bar([x, y])
DataFrame.plot.barh([x, y])
DataFrame.plot.box([by])
DataFrame.plot.density([bw_method, ind])
DataFrame.plot.hexbin(x, y[, C, …])
DataFrame.plot.hist([by, bins])
DataFrame.plot.kde([bw_method, ind])
DataFrame.plot.line([x, y])
DataFrame.plot.pie([y])
DataFrame.plot.scatter(x, y[, s, c])
DataFrame.boxplot([column, by, ax, …])
DataFrame.hist([column, by, grid, …])
State machine
Function and Object interface
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
figure = plt.figure(figsize=(20,6)) # 20 inches by 6 inches?
axes1 = figure.add_subplot(2, 2, 1) #add_subplot(nrows, ncols, index, **kwargs)
axes2 = figure.add_subplot(2, 2, 2)
axes3 = figure.add_subplot(2, 2, 3)
_ = axes1.hist(np.random.randn(100), bins=20, color='k', alpha=0.3)
axes2.scatter(np.arange(30), np.arange(30) + 3 * np.random.randn(30))
plt.plot(np.random.randn(50).cumsum(), 'k--')
[<matplotlib.lines.Line2D at 0x7fe1e96528e0>]
figure = plt.figure(figsize=(20,6),dpi=500)
plt.plot(np.random.randn(50).cumsum(), 'k--')
[<matplotlib.lines.Line2D at 0x7fe1e96c2a00>]
Main entry point for object interface
Everything you see
x = np.arange(0, 10, 0.2)
y = np.sin(x)
figure, axes = plt.subplots(figsize=(20,6))
axes.plot(x, y)
plt.show()
matplotlib.pyplot.subplots(nrows=1, ncols=1, sharex=False, sharey=False, squeeze=True, subplot_kw=None, gridspec_kw=None, **fig_kw)
x = np.arange(0, 10, 0.2)
y = np.sin(x)
figure = plt.figure(figsize=(20,6))
axes = figure.subplots()
axes.plot(x, y)
plt.show()
savefig saves the current active figure to a file
x = np.arange(0, 10, 0.2)
y = np.sin(x)
figure, axes = plt.subplots(figsize=(20,6))
axes.plot(x, y)
plt.savefig('sample.png', dpi=400)
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
pokemon_df = pd.read_csv('Pokemon.csv', index_col=0)
pokemon_df.head()
Name | Type 1 | Type 2 | Total | HP | Attack | Defense | Sp. Atk | Sp. Def | Speed | Stage | Legendary | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
# | ||||||||||||
1 | Bulbasaur | Grass | Poison | 318 | 45 | 49 | 49 | 65 | 65 | 45 | 1 | False |
2 | Ivysaur | Grass | Poison | 405 | 60 | 62 | 63 | 80 | 80 | 60 | 2 | False |
3 | Venusaur | Grass | Poison | 525 | 80 | 82 | 83 | 100 | 100 | 80 | 3 | False |
4 | Charmander | Fire | NaN | 309 | 39 | 52 | 43 | 60 | 50 | 65 | 1 | False |
5 | Charmeleon | Fire | NaN | 405 | 58 | 64 | 58 | 80 | 65 | 80 | 2 | False |
sns.lmplot(x='Attack', y='Defense', data=pokemon_df)
<seaborn.axisgrid.FacetGrid at 0x7fe20074c940>
seaborn.lmplot( x, y, data, hue=None, col=None, row=None,
palette=None, col_wrap=None, height=5, aspect=1,
markers='o', sharex=True, sharey=True, hue_order=None,
col_order=None, row_order=None, legend=True, legend_out=True,
x_estimator=None, x_bins=None, x_ci='ci', scatter=True, fit_reg=True,
ci=95, n_boot=1000, units=None, order=1, logistic=False, lowess=False,
robust=False, logx=False, x_partial=None, y_partial=None, truncate=False,
x_jitter=None, y_jitter=None, scatter_kws=None, line_kws=None, size=None)
sns.lmplot(x='Attack', y='Defense', data=pokemon_df, height=7)
<seaborn.axisgrid.FacetGrid at 0x7fe218300070>
sns.lmplot(x='Attack', y='Defense', data=pokemon_df, aspect=0.5, height=7 )
<seaborn.axisgrid.FacetGrid at 0x7fe1e02f3280>
Setting figsize does not work
plt.figure(figsize=(20,6))
sns.lmplot(x='Attack', y='Defense', data=pokemon_df)
<seaborn.axisgrid.FacetGrid at 0x7fe20a188f40>
<Figure size 1440x432 with 0 Axes>
plt.figure(figsize=(20,6))
sns.regplot(x='Attack', y='Defense', data=pokemon_df)
<AxesSubplot:xlabel='Attack', ylabel='Defense'>
plt.figure(figsize=(20,6))
sns.residplot(x='Attack', y='Defense', data=pokemon_df)
<AxesSubplot:xlabel='Attack', ylabel='Defense'>
sns.lmplot(x='Attack', y='Defense', data=pokemon_df,
fit_reg=False, # No regression line
hue='Stage') # Color by evolution stage
<seaborn.axisgrid.FacetGrid at 0x7fe20a1bcd60>
sns.lmplot(x='Attack', y='Defense', data=pokemon_df,
fit_reg=False,
hue='Stage')
# Tweak using Matplotlib
plt.ylim(0, None)
plt.xlim(0, None)
(0.0, 140.45)
Use Panda's to processs the data before ploting
pokemon_df = pd.read_csv('Pokemon.csv', index_col=0)
plt.figure(figsize=(20,6)) # x axis = 20, y axis = 6
sns.boxplot(data= pokemon_df)
<AxesSubplot:>
plt.figure(figsize=(20,6))
stats_df = pokemon_df.drop(['Total', 'Stage', 'Legendary'], axis=1)
# New boxplot using stats_df
sns.boxplot(data=stats_df)
<AxesSubplot:>
Jupyter resets plot settings at the end of each cell
Except for some settings
Plot pairwise relationships in a dataset
pokemon_df = pd.read_csv('Pokemon.csv', index_col=0)
sns.pairplot(data=pokemon_df, vars=['Attack', 'Defense', 'Speed'], diag_kind='kde', plot_kws={'alpha': 0.2})
<seaborn.axisgrid.PairGrid at 0x7fe1d87babb0>
"white", "dark", "whitegrid", "darkgrid", "ticks"
plt.figure(figsize=(20,6))
sns.set_style('darkgrid')
# Violin plot
sns.violinplot(x='Type 1', y='Attack', data=pokemon_df)
<AxesSubplot:xlabel='Type 1', ylabel='Attack'>
/anaconda/lib/python3.6/site-packages/scipy/stats/stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use arr[tuple(seq)]
instead of arr[seq]
. In the future this will be interpreted as an array index, arr[np.array(seq)]
, which will result either in an error or a different result.
return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval
Due to using python 3.7 or greater with scipy 1.1 or less
pkmn_type_colors = ['#78C850', # Grass
'#F08030', # Fire
'#6890F0', # Water
'#A8B820', # Bug
'#A8A878', # Normal
'#A040A0', # Poison
'#F8D030', # Electric
'#E0C068', # Ground
'#EE99AC', # Fairy
'#C03028', # Fighting
'#F85888', # Psychic
'#B8A038', # Rock
'#705898', # Ghost
'#98D8D8', # Ice
'#7038F8', # Dragon
]
plt.figure(figsize=(20,6))
sns.violinplot(x='Type 1', y='Attack', data=pokemon_df,
palette=pkmn_type_colors)
<AxesSubplot:xlabel='Type 1', ylabel='Attack'>
# First plot
plt.figure(figsize=(20,6))
sns.swarmplot(x='Type 1', y='Attack', data=pokemon_df,
palette=pkmn_type_colors)
<AxesSubplot:xlabel='Type 1', ylabel='Attack'>
plt.figure(figsize=(20,6))
sns.violinplot(x='Type 1', y='Attack', data=pokemon_df,
palette=pkmn_type_colors)
<AxesSubplot:xlabel='Type 1', ylabel='Attack'>
plt.figure(figsize=(20,6))
# Create plot
sns.violinplot(x='Type 1',
y='Attack',
data=pokemon_df,
inner=None, # Remove the bars inside the violins
palette=pkmn_type_colors)
sns.swarmplot(x='Type 1',
y='Attack',
data=pokemon_df,
color='k', # Make points black
alpha=0.7) # and slightly transparent
# Set title with matplotlib
plt.title('Attack by Type')
Text(0.5, 1.0, 'Attack by Type')
pokemon_df = pd.read_csv('Pokemon.csv', index_col=0)
stats_df = pokemon_df.drop(['Total', 'Stage', 'Legendary'], axis=1)
corr = stats_df.corr()
plt.figure(figsize=(20,6))
# Heatmap
sns.heatmap(corr)
<AxesSubplot:>
plt.figure(figsize=(20,6))
sns.distplot(pokemon_df.Attack)
/Users/rwhitney/opt/anaconda3/lib/python3.9/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
<AxesSubplot:xlabel='Attack', ylabel='Density'>
plt.figure(figsize=(20,6))
sns.countplot(x='Type 1', data=pokemon_df, palette=pkmn_type_colors)
# Rotate x-labels
plt.xticks(rotation=-45)
(array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]), [Text(0, 0, 'Grass'), Text(1, 0, 'Fire'), Text(2, 0, 'Water'), Text(3, 0, 'Bug'), Text(4, 0, 'Normal'), Text(5, 0, 'Poison'), Text(6, 0, 'Electric'), Text(7, 0, 'Ground'), Text(8, 0, 'Fairy'), Text(9, 0, 'Fighting'), Text(10, 0, 'Psychic'), Text(11, 0, 'Rock'), Text(12, 0, 'Ghost'), Text(13, 0, 'Ice'), Text(14, 0, 'Dragon')])
sns.jointplot(x='Attack', y='Defense', data=pokemon_df)
<seaborn.axisgrid.JointGrid at 0x7fe1e0678b80>
mpg = sns.load_dataset("mpg")
mpg.tail()
mpg | cylinders | displacement | horsepower | weight | acceleration | model_year | origin | name | |
---|---|---|---|---|---|---|---|---|---|
393 | 27.0 | 4 | 140.0 | 86.0 | 2790 | 15.6 | 82 | usa | ford mustang gl |
394 | 44.0 | 4 | 97.0 | 52.0 | 2130 | 24.6 | 82 | europe | vw pickup |
395 | 32.0 | 4 | 135.0 | 84.0 | 2295 | 11.6 | 82 | usa | dodge rampage |
396 | 28.0 | 4 | 120.0 | 79.0 | 2625 | 18.6 | 82 | usa | ford ranger |
397 | 31.0 | 4 | 119.0 | 82.0 | 2720 | 19.4 | 82 | usa | chevy s-10 |
sns.lmplot(x='mpg', y='horsepower', data=mpg,fit_reg=False, hue='origin')
<seaborn.axisgrid.FacetGrid at 0x7fe20ad62670>
sns.lmplot(x='mpg', y='horsepower', data=mpg,fit_reg=False, hue='origin', height=6)
<seaborn.axisgrid.FacetGrid at 0x7fe20ad698b0>
sns.lmplot(x='mpg', y='weight', data=mpg, fit_reg=False,
hue='origin', height=6)
<seaborn.axisgrid.FacetGrid at 0x7fe20ac46fa0>
sns.lmplot(y='mpg', x='model_year', data=mpg,
hue='origin', height=6)
<seaborn.axisgrid.FacetGrid at 0x7fe1ea589f70>
plt.figure(figsize=(20,6))
#stats_df = mpg.drop(['Total', 'Stage', 'Legendary'], axis=1)
# New boxplot using stats_df
sns.boxplot(data=mpg, x='origin', y ='mpg')
<AxesSubplot:xlabel='origin', ylabel='mpg'>
mpg2019_df = pd.read_csv('2019mpg.csv', index_col=0)
mpg2019_df.head()
MfrName | Division | Carline | Verify Mfr Cd | Index (Model Type Index) | Eng Displ | # Cyl | Transmission | City FE (Guide) - Conventional Fuel | Hwy FE (Guide) - Conventional Fuel | ... | 120V Charge time at 120 Volts (hours) | PHEV Total Driving Range (rounded to nearest 10 miles)DISTANCE | City PHEV Composite MPGe | Hwy PHEV Composite MPGe | Comb PHEV Composite MPGe | Unnamed: 162 | Unnamed: 163 | Unnamed: 164 | Unnamed: 165 | Unnamed: 166 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Model Year | |||||||||||||||||||||
2019.0 | Honda | Acura | NSX | HNX | 41.0 | 3.5 | 6.0 | Auto(AM-S9) | 21.0 | 22.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2019.0 | FCA US LLC | ALFA ROMEO | 4C | CRX | 403.0 | 1.8 | 4.0 | Auto(AM6) | 24.0 | 34.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2019.0 | aston martin | Aston Martin Lagonda Ltd | Vantage V8 | ASX | 3.0 | 4.0 | 8.0 | Auto(S8) | 18.0 | 25.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2019.0 | Volkswagen Group of | Audi | TT Roadster quattro | VGA | 51.0 | 2.0 | 4.0 | Auto(AM-S7) | 23.0 | 31.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2019.0 | BMW | BMW | Z4 sDrive30i | BMX | 350.0 | 2.0 | 4.0 | Auto(S8) | 25.0 | 32.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
5 rows × 166 columns
plt.figure(figsize=(35,10))
sns.set(style="whitegrid")
sns.boxplot(data=mpg2019_df, x='MfrName', y ='mpg')
<AxesSubplot:xlabel='MfrName', ylabel='mpg'>