import numpy as np
import pandas as pd
ints = pd.Series([1,3,5,6])
ints
pd.Series(5, index=['a','b','c'])
pd.Series(np.random.randn(3), index=['a', 'b', 'c'])
pd.Series(np.random.randn(3), ['a', 'c', 'd'])
d = {'a': 0., 'b': 1., 'c': 2.}
pd.Series(d)
pd.Series(d, ['b', 'c', 'd', 'a'])
ints = pd.Series([1,3,5,6])
ints[2]
ints[2] = 11
ints
ints.get(2)
ints.index
ints.dtype
ints = pd.Series([1,3,5,6])
x = ints.get(10)
print(x)
ints[10]
ints.get(10, -1)
ints = pd.Series([1,3,5,7], index=['a','b','c','d'])
ints
ints['b']
ints[['a','c','d']]
ints[0]
ints[[1,2]]
ints = pd.Series([1,3,5,7], index=['a','b','c','d'])
'b' in ints
3 in ints
ints = pd.Series([1,3,5,7], index=['a','b','c','d'])
ints['a':'c']
ints['c':]
ints = pd.Series([1,3,5,7], index=['a','b','c','d'])
ints[1:3]
ints['a':'c'] = 0
ints
ints['b','d'] = 11
ints
ints = pd.Series([1,3,5,7], index=['a','b','c','d'])
ints[0:2] = 42
ints
ints = pd.Series([1,3,5,7], index=['a','b','c','d'])
ints['a':'c'] = 0
ints
ints = pd.Series([1,3,5,7], index=['a','b','c','d'])
ints[0:2] = 0
ints
ints = pd.Series([1,3,5,7], index=['a','b','c','d'])
ints[ints > 3]
ints
ints[ints > ints.median()]
ints > ints.median()
ints[[False, False, True,True]]
ints.
Done element wise
odd = pd.Series([1,3,5,7],['a','b','c','d'])
odd + 1
odd * 2
odd + odd
odd * odd
np.sin(odd)
np.sin is a NumPy function
Panda Series can be used instead of ndarray in most NumPy functions
np.sin(odd) < 0.2
gain_loss = pd.Series(np.random.randn(1000),
index=pd.date_range('1/1/2019', periods=1000))
gain_loss.head()
gain_loss.plot()
cumulative_gain = gain_loss.cumsum()
cumulative_gain.plot()
odd = pd.Series([1,3,5,7],['a','b','c','d'])
even = pd.Series([2,4,6],['d','b','e'])
odd + even
sample = pd.Series([1,2,3,np.nan])
sample
sample[0] = np.nan
sample
Any operation on NaN result is Nan
odd = pd.Series([1,3,5],['a','b','c'])
even = pd.Series([2,4,6],['d','b','c'])
result = odd + even
result
result + 1
result.mean()
What should mean of result be?
even = pd.Series([2,4,6],['a','a','b'])
even
odd = pd.Series([1,3,5],['a','a','b'])
odd + even
odd = pd.Series([1,3,5],['a','a','b'])
even = pd.Series([2,4,6],['a','a','c'])
odd + even
odd = pd.Series([1,3],['a','a'])
even = pd.Series([2,4],['a','a'])
odd + even
You might want to avoid having duplicate index values
d = {'a': 0., 'b': 1., 'c': 2.}
odd_order = pd.Series(d, ['b', 'c', 'd', 'a'])
odd_order
better_order = odd_order.reindex(['a','b','c','d','e'])
better_order
int_order = odd_order.reindex([1,2,3,4])
int_order
odd_order = pd.Series([1.,2.,3.], ['b', 'c', 'd'])
odd_order
odd_order.index = [1,2,3]
odd_order
odd_order.index = ['a',2,"cat"]
odd_order
gaps = pd.Series(['a','b','c'], [1, 4, 6])
gaps
gaps.reindex(range(7))
gaps.reindex(range(7), method='ffill') #forward fill
gaps.reindex(range(7), method='bfill') #backward fill
gaps.reindex(range(7), fill_value="cat")
gaps.reindex(range(7), fill_value=0)
Series not restricted to one data type
2D data structure
Rows & Columns labeled
Columns can have different data types
Create from
data = {'one': pd.Series([1., 2., 3.], index=['a', 'b', 'c']),
'two': pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}
sample_df = pd.DataFrame(data)
sample_df
data = {'one': pd.Series([1., 2., 3.], index=['a', 'b', 'c']),
'two': pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}
sample_df = pd.DataFrame(data, index=['d','c','b'])
sample_df
data = {'one': pd.Series([1., 2., 3.], index=['a', 'b', 'c']),
'two': pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}
sample_df = pd.DataFrame(data, index=['d','c','b'], columns=['two', 'one'])
sample_df
data = {'one': pd.Series([1., 2., 3.], index=['a', 'b', 'c']),
'two': pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}
sample_df = pd.DataFrame(data)
sample_df['one']
sample_df['one']['a']
sample_df['one']['a'] = 42
sample_df
sample_df[1]