import numpy as np
import pandas as pd
ints = pd.Series([1,3,5,6])
ints
0 1 1 3 2 5 3 6 dtype: int64
pd.Series(5, index=['a','b','c'])
a 5 b 5 c 5 dtype: int64
pd.Series(np.random.randn(3), index=['a', 'b', 'c'])
a -0.033369 b -0.436952 c 0.030384 dtype: float64
pd.Series(np.random.randn(3), ['a', 'c', 'd'])
a -1.123177 c -0.183864 d -1.150105 dtype: float64
d = {'a': 0., 'b': 1., 'c': 2.}
pd.Series(d)
a 0.0 b 1.0 c 2.0 dtype: float64
pd.Series(d, ['b', 'c', 'd', 'a'])
b 1.0 c 2.0 d NaN a 0.0 dtype: float64
ints = pd.Series([1,3,5,6])
ints[2]
5
ints[2] = 11
ints
0 1 1 3 2 11 3 6 dtype: int64
ints.get(2)
11
ints.index
RangeIndex(start=0, stop=4, step=1)
ints.dtype
dtype('int64')
ints = pd.Series([1,3,5,6])
x = ints.get(10)
print(x)
None
ints[10]
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) ~/opt/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/range.py in get_loc(self, key, method, tolerance) 350 try: --> 351 return self._range.index(new_key) 352 except ValueError as err: ValueError: 10 is not in range The above exception was the direct cause of the following exception: KeyError Traceback (most recent call last) <ipython-input-12-249e29dff803> in <module> ----> 1 ints[10] ~/opt/anaconda3/lib/python3.7/site-packages/pandas/core/series.py in __getitem__(self, key) 822 823 elif key_is_scalar: --> 824 return self._get_value(key) 825 826 if is_hashable(key): ~/opt/anaconda3/lib/python3.7/site-packages/pandas/core/series.py in _get_value(self, label, takeable) 930 931 # Similar to Index.get_value, but we do not fall back to positional --> 932 loc = self.index.get_loc(label) 933 return self.index._get_values_for_loc(self, loc, label) 934 ~/opt/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/range.py in get_loc(self, key, method, tolerance) 351 return self._range.index(new_key) 352 except ValueError as err: --> 353 raise KeyError(key) from err 354 raise KeyError(key) 355 return super().get_loc(key, method=method, tolerance=tolerance) KeyError: 10
ints.get(10, -1)
-1
ints = pd.Series([1,3,5,7], index=['a','b','c','d'])
ints
a 1 b 3 c 5 d 7 dtype: int64
ints['b']
3
ints[['a','c','d']]
a 1 c 5 d 7 dtype: int64
ints[0]
1
ints[[1,2]]
b 3 c 5 dtype: int64
ints = pd.Series([1,3,5,7], index=['a','b','c','d'])
'b' in ints
True
3 in ints
False
ints = pd.Series([1,3,5,7], index=['a','b','c','d'])
ints['a':'c']
a 1 b 3 c 5 dtype: int64
ints['c':]
c 5 d 7 dtype: int64
ints = pd.Series([1,3,5,7], index=['a','b','c','d'])
ints2[1:3]
b 3 c 5 dtype: int64
ints['a':'c'] = 0
ints
a 0 b 0 c 0 d 7 dtype: int64
ints[['b','d']] = 11
ints
a 1 b 11 c 5 d 11 dtype: int64
ints = pd.Series([1,3,5,7], index=['a','b','c','d'])
ints[0:2] = 42
ints
a 42 b 42 c 5 d 7 dtype: int64
ints = pd.Series([1,3,5,7])
ints[1:3] = 0
ints
0 1 1 0 2 0 3 7 dtype: int64
ints = pd.Series([1,3,5,7], index=['a','b','c','d'])
ints[1:2] = 0
ints
a 1 b 0 c 5 d 7 dtype: int64
ints = pd.Series([1,3,5,7], index=['a','b','c', 'd'])
ints[ints > 3]
c 5 d 7 dtype: int64
ints
a 1 b 3 c 5 d 7 dtype: int64
ints[ints > ints.median()]
c 5 d 7 dtype: int64
import numpy as np
import pandas as pd
ints > ints.median()
a False b False c True d True dtype: bool
ints[[False, False, True,True]]
c 5 d 7 dtype: int64
Fun With Indexing
data = pd.Series(['a', 'b', 'c'], index=[1, 3, 5])
data
1 a 3 b 5 c dtype: object
data[1] # Two possible meanings - position or index
'a'
data[1:3] # Two possible meanings
3 b 5 c dtype: object
data[1] Use index data[1:3] Use position
loc - use index
data = pd.Series(['a', 'b', 'c'], index=[1, 3, 5])
data.loc[1]
'a'
data.loc[1:3]
1 a 3 b dtype: object
iloc - use position
data = pd.Series(['a', 'b', 'c'], index=[1, 3, 5])
data.iloc[1]
'b'
data.loc[1:3]
1 a 3 b dtype: object
ints.t
Done element wise
odd = pd.Series([1,3,5,7],['a','b','c','d'])
odd + 1
a 2 b 4 c 6 d 8 dtype: int64
odd * 2
a 2 b 6 c 10 d 14 dtype: int64
odd + odd
a 2 b 6 c 10 d 14 dtype: int64
odd * odd
a 1 b 9 c 25 d 49 dtype: int64
np.sin(odd)
a 0.841471 b 0.141120 c -0.958924 d 0.656987 dtype: float64
np.sin is a NumPy function
Panda Series can be used instead of ndarray in most NumPy functions
np.sin(odd) < 0.2
a False b True c True d False dtype: bool
import numpy as np
import pandas as pd
def speed_up(N,p):
return 1/(1 - p + p/N)
speed_up(5, 0.6)
1.923076923076923
max_N = 50
N_series = pd.Series(range(1,max_N), index=range(1,max_N))
N_series.head(5)
1 1 2 2 3 3 4 4 5 5 dtype: int64
speed_up(N_series,0.5).head(3)
1 1.000000 2 1.333333 3 1.500000 dtype: float64
speed_up(N_series,0.5).plot()
<AxesSubplot:>
odd = pd.Series([1,3,5,7],['a','b','c','d'])
even = pd.Series([2,4,6],['d','b','e'])
odd + even
a NaN b 7.0 c NaN d 9.0 e NaN dtype: float64
sample = pd.Series([1,2,3,np.nan])
sample
0 1.0 1 2.0 2 3.0 3 NaN dtype: float64
sample[0] = np.nan
sample
0 NaN 1 2.0 2 3.0 3 NaN dtype: float64
Any operation on NaN result is Nan
odd = pd.Series([1,3,5],['a','b','c'])
even = pd.Series([2,4,6],['d','b','c'])
result = odd + even
result
a NaN b 7.0 c 11.0 d NaN dtype: float64
result + 1
a NaN b 8.0 c 12.0 d NaN dtype: float64
result.mean()
9.0
What should mean of result be?
even = pd.Series([2,4,6],['a','a','b'])
even
a 2 a 4 b 6 dtype: int64
odd = pd.Series([1,3,5],['a','a','b'])
odd + even
a 3 a 7 b 11 dtype: int64
odd = pd.Series([1,3,5],['a','a','b'])
even = pd.Series([2,4,6],['a','a','c'])
odd + even
a 3.0 a 5.0 a 5.0 a 7.0 b NaN c NaN dtype: float64
odd = pd.Series([1,3],['a','a'])
even = pd.Series([2,4],['a','a'])
odd + even
a 3 a 7 dtype: int64
You might want to avoid having duplicate index values
d = {'a': 0., 'b': 1., 'c': 2.}
odd_order = pd.Series(d, ['b', 'c', 'd', 'a'])
odd_order
b 1.0 c 2.0 d NaN a 0.0 dtype: float64
better_order = odd_order.reindex(['a','b','c','d','e'])
better_order
a 0.0 b 1.0 c 2.0 d NaN e NaN dtype: float64
int_order = odd_order.reindex([1,2,3,4])
int_order
1 NaN 2 NaN 3 NaN 4 NaN dtype: float64
odd_order = pd.Series([1.,2.,3.], ['b', 'c', 'd'])
odd_order
b 1.0 c 2.0 d 3.0 dtype: float64
odd_order.index = [1,2,3]
odd_order
1 1.0 2 2.0 3 3.0 dtype: float64
odd_order.index = ['a',2,"cat"]
odd_order
a 1.0 2 2.0 cat 3.0 dtype: float64
gaps = pd.Series(['a','b','c'], [1, 4, 6])
gaps
1 a 4 b 6 c dtype: object
gaps.reindex(range(7))
0 NaN 1 a 2 NaN 3 NaN 4 b 5 NaN 6 c dtype: object
gaps.reindex(range(7), method='ffill') #forward fill
0 NaN 1 a 2 a 3 a 4 b 5 b 6 c dtype: object
gaps.reindex(range(7), method='bfill') #backward fill
0 a 1 a 2 b 3 b 4 b 5 c 6 c dtype: object
gaps.reindex(range(7), fill_value="cat")
0 cat 1 a 2 cat 3 cat 4 b 5 cat 6 c dtype: object
gaps
1 a 4 b 6 c dtype: object