"""
@author: XiangguoSun
@contact: sunxiangguodut@qq.com
@file: learn_pandas.py
@time: 2017/3/8 8:18
@software: PyCharm
"""
import numpy
as np
from pandas
import Series, DataFrame
import pandas
as pd
'''
1,基本数据结构
'''
'''
1.1 Series: dic+array
'''
obj_dic={
'a':
1,
'b':
2,
'c':
3}
objd=Series(obj_dic)
obj = Series([
4,
7,-
5,
3],index=[
'a',
'b',
'c',
'd'])
print obj.index,obj.values
print obj[[
'a',
'c']]
print 'b' in obj
obj_na = Series(obj,index=[
'a',
'b',
'c',
'd',
'add'])
print obj_na
print obj_na.isnull()
print obj_na.notnull()
print obj_na.name
print obj_na.index.name
obj_na.index=[
'x',
'y',
'z',
'o',
'p']
obj_na.name=
'my_table'
obj_na.index.name=
'my_index'
print obj_na
'''
1.2 DataFrame
'''
data = {
'state': [
'Ohio',
'Ohio',
'Ohio',
'Nevada',
'Nevada'],
'year': [
2000,
2001,
2002,
2001,
2002],
'pop': [
1.5,
1.7,
3.6,
2.4,
2.9]
}
df = DataFrame(data, columns=[
'year',
'state',
'pop',
'debt'], index=[
'one',
'two',
'three',
'four',
'five'])
print df
print df.ix[
'three']
df[
'five']=np.arange(
5)
print df
df[
'new_column']=df.state ==
'Ohio'
print df
del df[
'new_column']
print df.columns
pop = {
'Nevada':{
2001:
2.4,
2002:
2.9},
'Ohio':{
2000:
1.5,
2001:
1.7,
2002:
3.6}
}
data = DataFrame(pop)
print data
print data.T
print DataFrame(pop,index=[
2001,
2002,
2003])
data.index.name=
'sunxiangguo'
data.columns.name =
'state'
print data
print data.values
'''index对象不可修改'''
'''
2,基本功能
'''
obj = Series([
4.5,
7,-
2,
4],index=[
'b',
'a',
'c',
'd'])
print obj
obj2 = obj.reindex([
'a',
'b',
'c',
'd',
'e'])
print obj2
obj3 = obj.reindex([
'a',
'b',
'c',
'd',
'e'], fill_value=
0)
print obj3
obj = Series([
'blue',
'perple',
'yellow'],index=[
0,
2,
4])
print obj
obj2 = obj.reindex(range(
8),method=
'ffill')
print obj2
print obj.reindex(range(
7),method=
'pad')
print obj.reindex(range(
7),method=
'bfill')
print obj.reindex(range(
7),method=
'backfill')
obj = Series(np.arange(
5), index=[
'a',
'b',
'c',
'd',
'e'])
new_obj = obj.drop(
'c')
print obj
print new_obj
print obj.drop([
'c',
'd'])
data = DataFrame(np.arange(
16).reshape((
4,
4)),
index=[
'Ohio',
'Colorado',
'Utah',
'New York'],
columns=[
'one',
'two',
'three',
'four']
)
print data
print data.drop([
'Colorado',
'Ohio'])
print data.drop(
'two', axis=
1)
print data.drop([
'two',
'four'], axis=
1)
data = Series(np.arange(
4), index=[
'a',
'b',
'c',
'd'])
print data
print data[
'b']
print data[
1]
print data[
2:
4]
print data[[
'b',
'a',
'd']]
print data[[
1,
3]]
print data[data<
2]
print data[
'a':
'c']
data = DataFrame(np.arange(
16).reshape((
4,
4)),
index=[
'Ohio',
'Colorado',
'Utah',
'New York'],
columns=[
'one',
'two',
'three',
'four'])
print data
print data[
'two']
print data[[
'three',
'one']]
print data[:
2]
print data[data[
'three'] >
5]
print data.ix[:
2, :
2]
print data.ix[
1:
3]
print data.xs(
'Ohio')
print data.xs(range(
1,
4), axis=
1)
'''
print data.icol(2)
print data.irow(0)
这两个已经被下面两行代码取代
'''
print data.iloc[:,
2]
print data.iloc[
0]
转载请注明原文地址: https://ju.6miu.com/read-9932.html