由数组字典构造DataFrame
from pandas import Series,DataFrame import numpy as np import pandas as pd data={'state':['ohio','ohio','ohio','Nevada','Nevada'], 'year':[2000,2001,2002,2001,2002], 'pop':[1.5,1.7,3.6,2.4,2.9]} frame=DataFrame(data) Out[103]: pop state year 0 1.5 ohio 2000 1 1.7 ohio 2001 2 3.6 ohio 2002 3 2.4 Nevada 2001 4 2.9 Nevada 2002改变序列顺序
DataFrame(data,columns=['year','state','pop']) Out[104]: year state pop 0 2000 ohio 1.5 1 2001 ohio 1.7 2 2002 ohio 3.6 3 2001 Nevada 2.4 4 2002 Nevada 2.9改变标签,增加列没有数据,显示NaN
frame2=DataFrame(data,columns=['year','state','pop','debt'], index=['one','two','three','four','five']) frame2 Out[108]: year state pop debt one 2000 ohio 1.5 NaN two 2001 ohio 1.7 NaN three 2002 ohio 3.6 NaN four 2001 Nevada 2.4 NaN five 2002 Nevada 2.9 NaN frame2.columns Out[111]: Index(['year', 'state', 'pop', 'debt'], dtype='object')frame2.state=frame2[‘state’] 用法相同
frame2.state Out[112]: one ohio two ohio three ohio four Nevada five Nevada Name: state, dtype: object frame2['state'] Out[113]: one ohio two ohio three ohio four Nevada five Nevada Name: state, dtype: objectdebt赋值
frame2['debt']=16.5 frame2 Out[115]: year state pop debt one 2000 ohio 1.5 16.5 two 2001 ohio 1.7 16.5 three 2002 ohio 3.6 16.5 four 2001 Nevada 2.4 16.5 five 2002 Nevada 2.9 16.5 frame2['debt']=np.arange(5) frame2.debt=np.arange(5) frame2 Out[120]: year state pop debt one 2000 ohio 1.5 0 two 2001 ohio 1.7 1 three 2002 ohio 3.6 2 four 2001 Nevada 2.4 3 five 2002 Nevada 2.9 4 val=Series([-1.2,-1.5,-1.7],index=['two','four','five']) frame2['debt']=val frame2 Out[127]: year state pop debt one 2000 ohio 1.5 NaN two 2001 ohio 1.7 -1.2 three 2002 ohio 3.6 NaN four 2001 Nevada 2.4 -1.5 five 2002 Nevada 2.9 -1.7 frame2['eastern']=frame2.state=='ohio' frame2 Out[129]: year state pop debt eastern one 2000 ohio 1.5 NaN True two 2001 ohio 1.7 -1.2 True three 2002 ohio 3.6 NaN True four 2001 Nevada 2.4 -1.5 False five 2002 Nevada 2.9 -1.7 False 删除列 del frame2['eastern'] frame2 Out[131]: year state pop debt one 2000 ohio 1.5 NaN two 2001 ohio 1.7 -1.2 three 2002 ohio 3.6 NaN four 2001 Nevada 2.4 -1.5 five 2002 Nevada 2.9 -1.7 frame2.columns Out[132]: Index(['year', 'state', 'pop', 'debt'], dtype='object')