weka数据集预处理

    xiaoxiao2023-03-24  2

    1.     利用有监督的离散算法对数据集的属性进行离散,并保存离散后的数据集;

    import java.io.File;

    import weka.filters.SupervisedFilter; import java.io.IOException; import weka.core.Instances; import weka.core.converters.CSVLoader; import weka.core.converters.ConverterUtils.DataSink; import weka.core.converters.ConverterUtils.DataSource; import weka.filters.*; import weka.filters.supervised.attribute.Discretize; //unsupervised import weka.filters.unsupervised.attribute.AddID; public class preprocess { /** * @param args * @throws Exception */ public static void main(String[] args) throws Exception { // TODO Auto-generated method stub Instances instances = DataSource.read("C:/Users/PC/Desktop/segment-challenge.arff"); instances.setClassIndex(instances.numAttributes() - 1);  Discretize discretize = new Discretize(); System.err.println(instances.toSummaryString());  AddID filter = new AddID();  String[] options = new String[6];  options[0] = "-B";  options[1] = "8";  options[2] = "-M";  options[3] = "-1.0";  options[4] = "-R";  options[5] = "2-last";  discretize.setOptions(options);  discretize.setInputFormat(instances);  Instances newInstances2 = Filter.useFilter(instances, discretize);  System.err.println(newInstances2.toSummaryString());  DataSink.write("data/1.arff", newInstances2);  }

    }

    2   利用weka中的算法对segment-challenge.arff数据集进行标准化处理,并保存标准化后的数据集

    import java.io.File; import weka.filters.SupervisedFilter; import java.io.IOException; import weka.core.Instances; import weka.core.converters.CSVLoader; import weka.core.converters.ConverterUtils.DataSink; import weka.core.converters.ConverterUtils.DataSource; import weka.filters.*; import weka.filters.unsupervised.attribute.*; //unsupervised import weka.filters.unsupervised.attribute.AddID; public class preprocess { /** * @param args * @throws Exception */ public static void main(String[] args) throws Exception { // TODO Auto-generated method stub Instances instances = DataSource.read("C:/Users/PC/Desktop/segment-challenge.arff"); instances.setClassIndex(instances.numAttributes() - 1);  Normalize normalize  = new Normalize (); System.err.println(instances.toSummaryString());  String[] options = new String[6];  options[0] = "-B";  options[1] = "8";  options[2] = "-M";  options[3] = "-1.0";  options[4] = "-R";  options[5] = "2-last";  normalize.setOptions(options);  normalize.setInputFormat(instances);  Instances newInstances2 = Filter.useFilter(instances, normalize);  System.err.println(newInstances2.toSummaryString());  DataSink.write("C:/Users/PC/Desktop/2.arff", newInstances2);  } }

    http://blog.sina.com.cn/s/blog_6f611c30010185kz.html

    http://blog.163.com/shen_960124/blog/static/60730984201502884651349/

    转载请注明原文地址: https://ju.6miu.com/read-1200049.html
    最新回复(0)