Hitachi Vantara Pentaho Community Forums
Results 1 to 2 of 2

Thread: Help understanding and implementing percentage split for evaluation using WEKA API

  1. #1
    Join Date
    Mar 2016
    Posts
    1

    Default Help please percentage split not evaluating instances

    package SpamD_Training;
    import weka.classifiers.trees.J48;
    import java.io.BufferedReader;
    import java.io.FileNotFoundException;
    import java.io.FileReader;
    import java.io.IOException;
    import java.util.Random;
    import weka.classifiers.Evaluation;
    import weka.core.Instance;
    import weka.core.Instances;
    import weka.core.Utils;
    import java.lang.Math;


    public class J48Classifiertestnew
    {
    static String J48ModelPath = "/Users/Z/Desktop/Pro/Models/Spambase/J48.model";

    public static void main(String[] args) throws FileNotFoundException, IOException, Exception {

    double percent = 60.0;
    BufferedReader breader;
    breader =new BufferedReader (new FileReader("/Users/Z/Dropbox/Pro/datasets/spam_dataset.arff"));
    Instances inst = new Instances(breader);

    inst.setClassIndex(inst.numAttributes() - 1); //set the last column as the class attribute
    ///PARAMETERS are set

    J48 J4tree = new J48();
    int seed=1;
    Random rnd = new Random(seed);
    inst.randomize(rnd);

    String[] options;

    options = weka.core.Utils.splitOptions("-U -M 2");
    J4tree.setOptions(options);

    System.out.println("Performing " +percent +"%split evaluation");



    int trainSize = (int) Math.round(inst.numInstances()*percent*100);

    int testSize = inst.numInstances()-trainSize;


    Instances train = new Instances (inst, trainSize);
    Instances test = new Instances (inst, trainSize,testSize);

    J4tree.buildClassifier(train);

    Evaluation eval = new Evaluation(train );
    eval.evaluateModel(J4tree, test);


    breader.close();


    System.out.println(eval.toSummaryString("\nResults\n======\n", false));


    System.out.println("training performance results of: " + J4tree.getClass().getSimpleName()
    + "\n---------------------------------");
    System.out.println(eval.toSummaryString("\nResults",true));
    System.out.println("fmeasure: " +eval.fMeasure(1) + " Precision: " + eval.precision(1)+ " Recall: "+ eval.recall(1));
    System.out.println(eval.toMatrixString());
    System.out.println(eval.toClassDetailsString());
    System.out.println("AUC = " +eval.areaUnderROC(1));
    System.out.println("Training complete, please validate trained model");
    // weka.core.SerializationHelper.write(J48ModelPath,J4tree);

    }
    }
    Last edited by zibz2008; 04-19-2016 at 05:57 AM.

  2. #2
    Join Date
    Aug 2006
    Posts
    1,741

    Default

    Your training size computation needs to be:

    int trainSize = (int) Math.round(inst.numInstances()*percent/100);

    Your training set construction needs to be:

    Instances train = new Instances (inst, 0, trainSize);

    Cheers,
    Mark.

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •  
Privacy Policy | Legal Notices | Safe Harbor Privacy Policy

Copyright © 2005 - 2019 Hitachi Vantara Corporation. All Rights Reserved.