1. 利用有监督的离散算法对数据集的属性进行离散,并保存离散后的数据集;
import java.io.File;
import weka.filters.SupervisedFilter; import java.io.IOException; import weka.core.Instances; import weka.core.converters.CSVLoader; import weka.core.converters.ConverterUtils.DataSink; import weka.core.converters.ConverterUtils.DataSource; import weka.filters.*; import weka.filters.supervised.attribute.Discretize; //unsupervised import weka.filters.unsupervised.attribute.AddID; public class preprocess { /** * @param args * @throws Exception */ public static void main(String[] args) throws Exception { // TODO Auto-generated method stub Instances instances = DataSource.read("C:/Users/PC/Desktop/segment-challenge.arff"); instances.setClassIndex(instances.numAttributes() - 1); Discretize discretize = new Discretize(); System.err.println(instances.toSummaryString()); AddID filter = new AddID(); String[] options = new String[6]; options[0] = "-B"; options[1] = "8"; options[2] = "-M"; options[3] = "-1.0"; options[4] = "-R"; options[5] = "2-last"; discretize.setOptions(options); discretize.setInputFormat(instances); Instances newInstances2 = Filter.useFilter(instances, discretize); System.err.println(newInstances2.toSummaryString()); DataSink.write("data/1.arff", newInstances2); }}
2 利用weka中的算法对segment-challenge.arff数据集进行标准化处理,并保存标准化后的数据集
import java.io.File; import weka.filters.SupervisedFilter; import java.io.IOException; import weka.core.Instances; import weka.core.converters.CSVLoader; import weka.core.converters.ConverterUtils.DataSink; import weka.core.converters.ConverterUtils.DataSource; import weka.filters.*; import weka.filters.unsupervised.attribute.*; //unsupervised import weka.filters.unsupervised.attribute.AddID; public class preprocess { /** * @param args * @throws Exception */ public static void main(String[] args) throws Exception { // TODO Auto-generated method stub Instances instances = DataSource.read("C:/Users/PC/Desktop/segment-challenge.arff"); instances.setClassIndex(instances.numAttributes() - 1); Normalize normalize = new Normalize (); System.err.println(instances.toSummaryString()); String[] options = new String[6]; options[0] = "-B"; options[1] = "8"; options[2] = "-M"; options[3] = "-1.0"; options[4] = "-R"; options[5] = "2-last"; normalize.setOptions(options); normalize.setInputFormat(instances); Instances newInstances2 = Filter.useFilter(instances, normalize); System.err.println(newInstances2.toSummaryString()); DataSink.write("C:/Users/PC/Desktop/2.arff", newInstances2); } }
http://blog.sina.com.cn/s/blog_6f611c30010185kz.html
http://blog.163.com/shen_960124/blog/static/60730984201502884651349/