import numpy
as np
import pandas
as pd
"""
数据要求:read_num, book_id
"""
pf = pd.read_csv(
'new_data.csv', encoding=
'gbk')
print type(pf)
unit = pf[
'read_unit']
unit = unit.str.split(
' ')
dapartment = unit.str[
0]
major = unit.str[
1]
data = pf[[
'read_sex',
'book_id']]
print type(data)
data.insert(
2,
'dapartment', dapartment)
data.columns = [
'sex',
'book',
'dapartment']
print type(data)
print '------------------------------------------------------'
"""
算法:获取标签
"""
def add_label(s):
l = []
m = []
for i
in range(len(s)):
if i ==
0:
m = []
l = [
1]
else:
m.append(s[i -
1])
if s[i]
in m:
if m.index(s[i]) ==
0:
l.append(
1)
else:
l.append(l[m.index(s[i])])
else:
l.append(max(l) +
1)
return l
sex = data[
'sex']
print type(sex)
sex = add_label(sex)
dapartment = data[
'dapartment']
dapartment = add_label(dapartment)
book = data[
'book']
book = add_label(book)
"""
不同学院的学生借阅书籍的不同
"""
diff_dep = []
diff_dep.append(sex)
diff_dep.append(dapartment)
m = np.array(diff_dep).T
print m
print m[:
10]
n = book
print n[
150000:]
"""
决策树训练数据和预测数据
"""
train_data = m[:
150000]
test_data = m[
150000:]
train_target = n[:
150000]
test_target = n[
150000:]
from sklearn.tree
import DecisionTreeClassifier
clf = DecisionTreeClassifier()
clf.fit(train_data, train_target)
print clf
predict_target = clf.predict(test_data)
print predict_target
print predict_target == test_target
print sum(predict_target == test_target)
from sklearn
import metrics
print metrics.classification_report(test_target, predict_target)
转载请注明原文地址: https://ju.6miu.com/read-16227.html