以下代码是基于python3.5.0编写的
import pandas
as pd
import numpy
as np
titanic_survival = pd.read_csv(
"titanic_train.csv")
# ---------------------------统计age列有多少值为空-------------------------
age = titanic_survival[
"Age"]
age_is_null = pd.isnull(age)
age_null_true = age[age_is_null]
age_null_count =
len(age_null_true)
print(age_null_count)
#-------------------------求均值方法一----------------------------------------
good_ages = titanic_survival[
"Age"][age_is_null ==
False]
#把age列中不为空的值赋值给good_ages
correct_mean_age =
sum(good_ages) /
len(good_ages)
print(correct_mean_age)
#-------------------------求均值方法二----------------------------------------
correct_mean_age = titanic_survival[
"Age"].mean()
#mean函数会自动取出age列中为空的值,然后赋值给correct_mean_age
print(correct_mean_age)
#-----------------------------统计每种等级船舱平均票价------------------------------
passenger_classes = [
1, 2, 3]
fares_by_class = {}
for this_class
in passenger_classes:
pclass_rows = titanic_survival[titanic_survival[
"Pclass"] == this_class]
#找出Pclass为1的所有行
pclass_fares = pclass_rows[
"Fare"]
#找出Pclass为1的Fare列
fare_for_class = pclass_fares.mean()
fares_by_class[this_class] = fare_for_class
print(fares_by_class)
# -----------------pivot_table透视表函数,找出每种Pclass所对应Survived的平均值-------------------------------------
passenger_survival = titanic_survival.pivot_table(
index=
"Pclass", values=
"Survived", aggfunc=np.mean)
print(passenger_survival)
passenger_age = titanic_survival.pivot_table(
index=
"Pclass", values=
"Age", aggfunc=np.mean)
print(passenger_age)
port_stats = titanic_survival.pivot_table(
index=
"Embarked", values=[
"Fare","Survived"]
, aggfunc=np.sum)
print(port_stats)
转载请注明原文地址: https://ju.6miu.com/read-38871.html