CrossValidationCreateTModel, TLearner, TInput, TOutput Method |
Namespace: Accord.MachineLearning
public static CrossValidation<TModel, TLearner, TInput, TOutput> Create<TModel, TLearner, TInput, TOutput>( int k, CreateLearnerFromSubset<TLearner, TInput, TOutput> learner, LearnNewModel<TLearner, TInput, TOutput, TModel> fit, ComputeLoss<TOutput, SetResult<TModel>> loss, TInput[] x, TOutput[] y ) where TModel : class, Object, ITransform<TInput, TOutput> where TLearner : class, Object, ISupervisedLearning<TModel, TInput, TOutput>
// Ensure results are reproducible Accord.Math.Random.Generator.Seed = 0; // This is a sample code on how to use Cross-Validation // to assess the performance of Support Vector Machines. // Consider the example binary data. We will be trying // to learn a XOR problem and see how well does SVMs // perform on this data. double[][] data = { new double[] { -1, -1 }, new double[] { 1, -1 }, new double[] { -1, 1 }, new double[] { 1, 1 }, new double[] { -1, -1 }, new double[] { 1, -1 }, new double[] { -1, 1 }, new double[] { 1, 1 }, new double[] { -1, -1 }, new double[] { 1, -1 }, new double[] { -1, 1 }, new double[] { 1, 1 }, new double[] { -1, -1 }, new double[] { 1, -1 }, new double[] { -1, 1 }, new double[] { 1, 1 }, }; int[] xor = // result of xor for the sample input data { -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, }; // Create a new Cross-validation algorithm passing the data set size and the number of folds var crossvalidation = new CrossValidation<SupportVectorMachine<Linear, double[]>, double[]>() { K = 3, // Use 3 folds in cross-validation // Indicate how learning algorithms for the models should be created Learner = (s) => new SequentialMinimalOptimization<Linear, double[]>() { Complexity = 100 }, // Indicate how the performance of those models will be measured Loss = (expected, actual, p) => new ZeroOneLoss(expected).Loss(actual), Stratify = false, // do not force balancing of classes }; // If needed, control the parallelization degree crossvalidation.ParallelOptions.MaxDegreeOfParallelism = 1; // Compute the cross-validation var result = crossvalidation.Learn(data, xor); // Finally, access the measured performance. double trainingErrors = result.Training.Mean; // should be 0.30606060606060609 (+/- var. 0.083498622589531682) double validationErrors = result.Validation.Mean; // should be 0.3666666666666667 (+/- var. 0.023333333333333334) // If desired, compute an aggregate confusion matrix for the validation sets: GeneralConfusionMatrix gcm = result.ToConfusionMatrix(data, xor); double accuracy = gcm.Accuracy; // should be 0.625 double error = gcm.Error; // should be 0.375
// Ensure results are reproducible Accord.Math.Random.Generator.Seed = 0; // This is a sample code on how to use Cross-Validation // to assess the performance of Hidden Markov Models. // Declare some testing data int[][] inputs = new int[][] { new int[] { 0,1,1,0 }, // Class 0 new int[] { 0,0,1,0 }, // Class 0 new int[] { 0,1,1,1,0 }, // Class 0 new int[] { 0,1,1,1,0 }, // Class 0 new int[] { 0,1,1,0 }, // Class 0 new int[] { 0,0,0,0,0 }, // Class 1 new int[] { 0,0,0,1,0 }, // Class 1 new int[] { 0,0,0,0,0 }, // Class 1 new int[] { 0,0,0 }, // Class 1 new int[] { 0,0,0,0 }, // Class 1 new int[] { 1,0,0,1 }, // Class 2 new int[] { 1,1,0,1 }, // Class 2 new int[] { 1,0,0,0,1 }, // Class 2 new int[] { 1,0,1 }, // Class 2 new int[] { 1,1,0,1 }, // Class 2 }; int[] outputs = new int[] { 0,0,0,0,0, // First 5 sequences are of class 0 1,1,1,1,1, // Middle 5 sequences are of class 1 2,2,2,2,2, // Last 5 sequences are of class 2 }; // Create a new Cross-validation algorithm passing the data set size and the number of folds var crossvalidation = new CrossValidation<HiddenMarkovClassifier, int[]>() { K = 3, // Use 3 folds in cross-validation Learner = (s) => new HiddenMarkovClassifierLearning() { Learner = (p) => new BaumWelchLearning() { NumberOfStates = 3 } }, Loss = (expected, actual, p) => { var cm = new GeneralConfusionMatrix(classes: p.Model.NumberOfClasses, expected: expected, predicted: actual); p.Variance = cm.Variance; return p.Value = cm.Kappa; }, Stratify = false, }; // If needed, control the parallelization degree crossvalidation.ParallelOptions.MaxDegreeOfParallelism = 1; // Compute the cross-validation var result = crossvalidation.Learn(inputs, outputs); // If desired, compute an aggregate confusion matrix for the validation sets: GeneralConfusionMatrix gcm = result.ToConfusionMatrix(inputs, outputs); // Finally, access the measured performance. double trainingErrors = result.Training.Mean; double validationErrors = result.Validation.Mean; double trainingErrorVar = result.Training.Variance; double validationErrorVar = result.Validation.Variance; double trainingErrorPooledVar = result.Training.PooledVariance; double validationErrorPooledVar = result.Validation.PooledVariance;
// Ensure we have reproducible results Accord.Math.Random.Generator.Seed = 0; // Get some data to be learned. We will be using the Wiconsin's // (Diagnostic) Breast Cancer dataset, where the goal is to determine // whether the characteristics extracted from a breast cancer exam // correspond to a malignant or benign type of cancer: var data = new WisconsinDiagnosticBreastCancer(); double[][] input = data.Features; // 569 samples, 30-dimensional features int[] output = data.ClassLabels; // 569 samples, 2 different class labels // Let's say we want to measure the cross-validation performance of // a decision tree with a maximum tree height of 5 and where variables // are able to join the decision path at most 2 times during evaluation: var cv = CrossValidation.Create( k: 10, // We will be using 10-fold cross validation learner: (p) => new C45Learning() // here we create the learning algorithm { Join = 2, MaxHeight = 5 }, // Now we have to specify how the tree performance should be measured: loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), // This function can be used to perform any special // operations before the actual learning is done, but // here we will just leave it as simple as it can be: fit: (teacher, x, y, w) => teacher.Learn(x, y, w), // Finally, we have to pass the input and output data // that will be used in cross-validation. x: input, y: output ); // After the cross-validation object has been created, // we can call its .Learn method with the input and // output data that will be partitioned into the folds: var result = cv.Learn(input, output); // We can grab some information about the problem: int numberOfSamples = result.NumberOfSamples; // should be 569 int numberOfInputs = result.NumberOfInputs; // should be 30 int numberOfOutputs = result.NumberOfOutputs; // should be 2 double trainingError = result.Training.Mean; // should be 0.017771153143274855 double validationError = result.Validation.Mean; // should be 0.0755952380952381 // If desired, compute an aggregate confusion matrix for the validation sets: GeneralConfusionMatrix gcm = result.ToConfusionMatrix(input, output); double accuracy = gcm.Accuracy; // result should be 0.92442882249560632