【数据分析】图书馆数据-08决策树

    xiaoxiao2021-03-25  115

    # -*-coding:utf-8-*- import numpy as np import pandas as pd """ 数据要求:read_num, book_id """ pf = pd.read_csv('new_data.csv', encoding='gbk') print type(pf) unit = pf['read_unit'] unit = unit.str.split(' ') # 原来是空格分隔的 dapartment = unit.str[0] # 学院 # print dapartment major = unit.str[1] # 专业 # print major data = pf[['read_sex', 'book_id']] print type(data) data.insert(2, 'dapartment', dapartment) # data.insert(3, 'major', major) # data.columns = ['sex', 'book', 'dapartment', 'major'] data.columns = ['sex', 'book', 'dapartment'] print type(data) print '------------------------------------------------------' """ 算法:获取标签 """ def add_label(s): l = [] m = [] for i in range(len(s)): if i == 0: m = [] l = [1] else: m.append(s[i - 1]) if s[i] in m: if m.index(s[i]) == 0: l.append(1) else: l.append(l[m.index(s[i])]) else: l.append(max(l) + 1) return l sex = data['sex'] print type(sex) sex = add_label(sex) # 添加标签 dapartment = data['dapartment'] dapartment = add_label(dapartment) # 添加标签 book = data['book'] book = add_label(book) # 添加标签 # major = data['major'] # major = add_label(major) # print 'sex:', sex[:50] # print 'department:', dapartment[:50] # print 'book:', book[:50] # print 'major:', major[:50] """ 不同学院的学生借阅书籍的不同 """ diff_dep = [] diff_dep.append(sex) diff_dep.append(dapartment) m = np.array(diff_dep).T print m # data # print len(m) # 182508 print m[:10] n = book # target print n[150000:] """ 决策树训练数据和预测数据 """ train_data = m[: 150000] test_data = m[150000:] train_target = n[: 150000] test_target = n[150000:] # 导入决策树DTC包 from sklearn.tree import DecisionTreeClassifier # 训练 clf = DecisionTreeClassifier() # 注意均使用训练数据集和样本类标 clf.fit(train_data, train_target) print clf # 预测结果 predict_target = clf.predict(test_data) print predict_target # 预测结果与真实结果比对 print predict_target == test_target print sum(predict_target == test_target) # 输出准确率 召回率 F值 from sklearn import metrics print metrics.classification_report(test_target, predict_target) # print metrics.confusion_matrix(test_target, predict_target)
    转载请注明原文地址: https://ju.6miu.com/read-16227.html

    最新回复(0)