pandas.Seriesでのデータ作成・データへのアクセス

次のコードで色々試してみた。

#coding:utf-8
import numpy as np
import pandas as pd

### Series ###
## 1-1.Seriesデータの作成
np.random.seed(1)
s = pd.Series(np.random.randn(100))

print s

## 1-1-1.select from Series
#スカラーとして1要素を取り出す
print s[0]

#Seriesとして部分集合を取り出す
print s[[0, 2, 4]]
print s[0:4]

#Indexの取得(RangeIndex型)
print s.index

#値の取得(ndarray)
print s.values

## 1-2.Create Series
s2 = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])
print s2

## 1-2-1.select from Series
#スカラーとして1要素を取り出す
print s2[0]
print s2['a']

#Indexの取得(RangeIndex型)
print s2.index

#値の取得(ndarray)
print s2.values

## 1-3.Create Series
s3 = pd.Series([1, 1, 3, 4, 5, 5, 1, 13, 2, 3, 4, 5, 8, 1, 1, 1, 2, 11, 13])

#ユニークな値の取得
print s3.unique()

#ユニークな値の個数を取得
print s3.value_counts()

## 1-4.Create Series(演算)
s4_1 = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])
s4_2 = pd.Series([4, 3, 2, 1], index=['d', 'c', 'b', 'a'])

# indexでのソート
print s4_2.sort_index(ascending=True)

#演算はindexが同じものに対して行われる
print s4_1+s4_2

実行結果は次。

### print s
Backend TkAgg is interactive backend. Turning interactive mode on.
0 1.624345
1 -0.611756
2 -0.528172
3 -1.072969
4 0.865408
5 -2.301539
6 1.744812
7 -0.761207
8 0.319039
9 -0.249370
10 1.462108
11 -2.060141
12 -0.322417
13 -0.384054
14 1.133769
15 -1.099891
16 -0.172428
17 -0.877858
18 0.042214
19 0.582815
20 -1.100619
21 1.144724
22 0.901591
23 0.502494
24 0.900856
25 -0.683728
26 -0.122890
27 -0.935769
28 -0.267888
29 0.530355
...
70 -1.444114
71 -0.504466
72 0.160037
73 0.876169
74 0.315635
75 -2.022201
76 -0.306204
77 0.827975
78 0.230095
79 0.762011
80 -0.222328
81 -0.200758
82 0.186561
83 0.410052
84 0.198300
85 0.119009
86 -0.670662
87 0.377564
88 0.121821
89 1.129484
90 1.198918
91 0.185156
92 -0.375285
93 -0.638730
94 0.423494
95 0.077340
96 -0.343854
97 0.043597
98 -0.620001
99 0.698032
dtype: float64
### print s[0]
1.62434536366

### print s0, 2, 4
0 1.624345
2 -0.528172
4 0.865408
dtype: float64

### print s[0:4]
0 1.624345
1 -0.611756
2 -0.528172
3 -1.072969
dtype: float64

### print s.index
RangeIndex(start=0, stop=100, step=1)

### print s.values
[ 1.62434536 -0.61175641 -0.52817175 -1.07296862 0.86540763 -2.3015387
1.74481176 -0.7612069 0.3190391 -0.24937038 1.46210794 -2.06014071
-0.3224172 -0.38405435 1.13376944 -1.09989127 -0.17242821 -0.87785842
0.04221375 0.58281521 -1.10061918 1.14472371 0.90159072 0.50249434
0.90085595 -0.68372786 -0.12289023 -0.93576943 -0.26788808 0.53035547
-0.69166075 -0.39675353 -0.6871727 -0.84520564 -0.67124613 -0.0126646
-1.11731035 0.2344157 1.65980218 0.74204416 -0.19183555 -0.88762896
-0.74715829 1.6924546 0.05080775 -0.63699565 0.19091548 2.10025514
0.12015895 0.61720311 0.30017032 -0.35224985 -1.1425182 -0.34934272
-0.20889423 0.58662319 0.83898341 0.93110208 0.28558733 0.88514116
-0.75439794 1.25286816 0.51292982 -0.29809284 0.48851815 -0.07557171
1.13162939 1.51981682 2.18557541 -1.39649634 -1.44411381 -0.50446586
0.16003707 0.87616892 0.31563495 -2.02220122 -0.30620401 0.82797464
0.23009474 0.76201118 -0.22232814 -0.20075807 0.18656139 0.41005165
0.19829972 0.11900865 -0.67066229 0.37756379 0.12182127 1.12948391
1.19891788 0.18515642 -0.37528495 -0.63873041 0.42349435 0.07734007
-0.34385368 0.04359686 -0.62000084 0.69803203]

### print s2
a 1
b 2
c 3
d 4
dtype: int64

### print s2[0]
1
### print s2['a']
1

### print s2.index
Index([u'a', u'b', u'c', u'd'], dtype='object')

### print s2.values
[1 2 3 4]

### print s3.unique()
[ 1 3 4 5 13 2 8 11]

### print s3.value_counts()
1 6
5 3
13 2
4 2
3 2
2 2
11 1
8 1
dtype: int64

### print s4_2.sort_index(ascending=True)
a 1
b 2
c 3
d 4
dtype: int64

### print s4_1+s4_2
a 2
b 4
c 6
d 8
dtype: int64