VGG Face Descriptor 是牛津大学VGG小组的工作,现在已经开源训练好的网络结构和模型参数,本文将基于此模型在caffe上使用自己的人脸数据微调,并进行特征提取与精确度验证。 数据传送门:CASIA WebFace 模型传送门:http://www.robots.ox.ac.uk/~vgg/software/vgg_face/
1.从上面的网址中下载VGG-Face已经训练好的模型和网络结构文件,根据deploy.proto文件来修改得到train_val.prototxt文件,主要有:修改数据层的输入和最后全连接层以及损失层等,注意fc8层名称的修改,具体如下:
name: "VGG_FACE_16_layers" layer { name: "data" type: "Data" top: "data" top: "label" include { phase: TRAIN } transform_param { mirror: true crop_size: 224 # mean_file: "data/ilsvrc12/imagenet_mean.binaryproto" } data_param { source: "vggface/webface_train_lmdb" batch_size: 32 backend: LMDB } } layer { name: "data" type: "Data" top: "data" top: "label" include { phase: TEST } transform_param { mirror: false crop_size: 224 # mean_file: "data/ilsvrc12/imagenet_mean.binaryproto" } data_param { source: "vggface/webface_val_lmdb" batch_size: 32 backend: LMDB } } ...... layer { bottom: "fc7" top: "fc8_s" name: "fc8_s" type: "InnerProduct" param { lr_mult: 10 decay_mult: 1 } param { lr_mult: 20 decay_mult: 0 } inner_product_param { num_output: 2031 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0.1 } } } layer { name: "accuracy" type: "Accuracy" bottom: "fc8_s" bottom: "label" top: "accuracy" include { phase: TEST } } layer { name: "loss" type: "SoftmaxWithLoss" bottom: "fc8_s" bottom: "label" top: "loss" }本次训练,我采用的是webface中人脸数量在50张以上的个人类别,总共有两千多个。按照caffe的工具转换成lmdb格式即可开始训练。
模型训练结束之后,在lfw上验证实验效果。lfw数据对使用官方提供的txt文件,不过我觉得格式不太好,就自己用脚本进行了些许修改: pairs.txt
Abel_Pacheco 1 4 Akhmed_Zakayev 1 3 Akhmed_Zakayev 2 3 Amber_Tamblyn 1 2 Anders_Fogh_Rasmussen 1 3 Anders_Fogh_Rasmussen 1 4修改后,将一对图片的路径放在一起,属于同一个人为1,否则为0
Abel_Pacheco/Abel_Pacheco_0001.jpg Abel_Pacheco/Abel_Pacheco_0004.jpg 1 Akhmed_Zakayev/Akhmed_Zakayev_0001.jpg Akhmed_Zakayev/Akhmed_Zakayev_0003.jpg 1 Akhmed_Zakayev/Akhmed_Zakayev_0002.jpg Akhmed_Zakayev/Akhmed_Zakayev_0003.jpg 1 Amber_Tamblyn/Amber_Tamblyn_0001.jpg Amber_Tamblyn/Amber_Tamblyn_0002.jpg 1脚本如下:
#!/usr/bin/python #-*- coding: utf-8 -*- ''' #Created on Thur Mar 2 10:17:38 2017 #Goal:parse pairs.txt of LFW database to label.txt for face #recognition test #@author: wujiyang ''' import sys def get_all_images(filename): file = open(filename) lines = file.readlines() list = [] for line in lines: line_split = line.strip('\n').split('\t') if(len(line_split)) == 3: line_split[-1] = line_split[-1].zfill(4) line_split[-2] = line_split[-2].zfill(4) if(len(line_split)) == 4: line_split[-1] = line_split[-1].zfill(4) line_split[-3] = line_split[-3].zfill(4) list.append(line_split) file.close() return list def save2labelfile(list): file = open('label.txt', 'w') labellines = [] for i in range(len(list)): if len(list[i]) == 3: labelline = list[i][0] + '/' + list[i][0] + '_' + list[i][1] + '.jpg' + '\t' + 'original' + '/' + list[i][0] + '/' + list[i][0] + '_' +list[i][2] + '.jpg' + '\t' + '1\n' labellines.append(labelline) elif len(list[i]) == 4: labelline = list[i][0] + '/' + list[i][0] + '_' + list[i][1] + '.jpg' + '\t' + 'original' + '/' + list[i][2] + '/' + list[i][2] + '_' + list[i][3] + '.jpg' + '\t' + '0\n' labellines.append(labelline) file.writelines(labellines) file.close() ''''' 使用方法:执行脚本 python pair2label.py pairs.txt ''' if __name__ == "__main__": if len(sys.argv) != 2: print "Format Error! Usuage: python %s pairs.txt" %(sys.argv[0]) sys.exit() list = get_all_images("pairs.txt") save2labelfile(list) print "Done!"此脚本采用caffe的python接口进行特征提取与验证。
# -*- coding: utf-8 -*- """ Created on Mon Apr 20 16:55:55 2015 @author: wujiyang @brief:在lfw数据库上验证训练好了的网络 """ import math import sklearn import numpy as np import matplotlib.pyplot as plt import skimage caffe_root = '/home/wujiyang/caffe/' import sys sys.path.insert(0, caffe_root + 'python') import caffe import sklearn.metrics.pairwise as pw #模型初始化相关操作 def initilize(): print 'model initilizing...' deployPrototxt = "./vgg-face-deploy.prototxt" modelFile = "./vgg-face.caffemodel" caffe.set_mode_gpu() caffe.set_device(0) net = caffe.Net(deployPrototxt, modelFile,caffe.TEST) return net def read_imagelist(labelfile): ''' @brief:从列表文件中,读取图像数据到矩阵文件中 @param: labelfile 图像列表文件 @return :4D 的矩阵 ''' file = open(labelfile) lines = file.readlines() test_num=len(lines) file.close() x = np.empty((test_num,3,224,224)) y = np.empty((test_num,3,224,224)) labels = [] i = 0 for line in lines: path = line.strip('\n').split('\t') #read left image filename = path[0] img = skimage.io.imread(filename,as_grey=False) image = skimage.transform.resize(img,(224,224))*255 if image.ndim < 3: print 'gray:'+filename x[i,0,:,:]=image[:,:] x[i,1,:,:]=image[:,:] x[i,2,:,:]=image[:,:] else: x[i,0,:,:]=image[:,:,0] x[i,1,:,:]=image[:,:,1] x[i,2,:,:]=image[:,:,2] #read right image filename = path[1] img = skimage.io.imread(filename,as_grey=False) image = skimage.transform.resize(img,(224,224))*255 if image.ndim < 3: print 'gray:'+filename y[i,0,:,:]=image[:,:] y[i,1,:,:]=image[:,:] y[i,2,:,:]=image[:,:] else: y[i,0,:,:]=image[:,:,0] y[i,1,:,:]=image[:,:,1] y[i,2,:,:]=image[:,:,2] #read label labels.append(int(path[2])) i=i+1 return x, y, labels def extractFeature(leftdata,rightdata): #提取左半部分的特征 test_num=np.shape(leftdata)[0] #data 是输入层的名字 out = net.forward_all(data = leftdata) feature1 = np.float64(out['fc7']) featureleft=np.reshape(feature1,(test_num,4096)) #np.savetxt('feature1.txt', feature1, delimiter=',') #提取右半部分的特征 out = net.forward_all(data = rightdata) feature2 = np.float64(out['fc7']) featureright=np.reshape(feature2,(test_num,4096)) #np.savetxt('feature2.txt', feature2, delimiter=',') return featureleft, featureright def calculate_accuracy(distance,labels,num): ''' #计算识别率, 选取阈值,计算识别率 ''' accuracy = {} predict = np.empty((num,)) threshold = 0.1 while threshold <= 0.9 : for i in range(num): if distance[i] >= threshold: predict[i] = 0 else: predict[i] = 1 predict_right = 0.0 for i in range(num): if predict[i] == labels[i]: predict_right += 1.0 current_accuracy = (predict_right / num) accuracy[str(threshold)] = current_accuracy threshold = threshold + 0.001 #将字典按照value排序 temp = sorted(accuracy.items(), key = lambda d:d[1], reverse = True) highestAccuracy = temp[0][1] thres = temp[0][0] return highestAccuracy, thres if __name__=='__main__': #模型初始化 net = initilize() print 'network input :' ,net.inputs print 'network output: ', net.outputs #读取图像数据 leftdata,rightdata,labels = read_imagelist('tmp.txt') #计算特征 featureleft, featureright = extractFeature(leftdata, rightdata) #计算每个特征之间的距离 cosine距离 test_num = len(labels) mt = pw.pairwise_distances(featureleft, featureright, metric='cosine') distance = np.empty((test_num,)) for i in range(test_num): distance[i] = mt[i][i] print 'Distance before normalization:\n', distance print 'Distance max:', np.max(distance), ' Distance min:', np.min(distance), '\n' # 距离需要归一化到0--1,与标签0-1匹配 distance_norm = np.empty((test_num,)) for i in range(test_num): distance_norm[i] = (distance[i]-np.min(distance))/(np.max(distance)-np.min(distance)) print 'Distance after normalization:\n', distance_norm print 'Distance_norm max:', np.max(distance_norm), ' Distance_norm min:', np.min(distance_norm), '\n' #根据label和distance_norm计算精确度 highestAccuracy, threshold = calculate_accuracy(distance_norm,labels,len(labels)) print ("the highest accuracy is : %.4f, and the corresponding threshold is %s \n"%(highestAccuracy, threshold))