我想要做一个10折交叉验证在我的一对,所有 支持向量机在MATLAB分类。
我试图以某种方式混合这两个相关的答案:
- 在LIBSVM多类分类
- 在MATLAB 10倍SVM分类的示例
但是,当我是新来的MATLAB和它的语法,我没有管理,使其工作至今。
在另一方面,我看到了有关交叉验证的只是下面的几行LIBSVM README文件,我找不到任何相关的例子有:
选项-V随机数据分成n个部分,并且计算交叉验证准确性/意味着它们平方误差。
见LIBSVM FAQ对于输出的意义。
谁能给我提供10倍交叉验证和一对 - 所有分类的例子吗?
主要有两个原因,我们做交叉验证 :
- 作为测试方法,它为我们提供了我们模型的泛化功率几乎无偏估计(通过避免过度拟合)
- 作为一种方法模式选择 (例如:找到最好的
C
和gamma
在训练数据参数,请参阅这篇文章的例子)
对于我们感兴趣的是第一种情况下,该过程涉及培训k
每个折叠模型,然后训练一个最终的模型在整个训练集。 我们报告在K-褶皱平均准确。
现在,因为我们使用一个-VS-所有的方法来处理多类问题,每个模型由N
支持向量机(每类)。
以下是实现一个-VS-所有方法包装函数:
function mdl = libsvmtrain_ova(y, X, opts)
if nargin < 3, opts = ''; end
%# classes
labels = unique(y);
numLabels = numel(labels);
%# train one-against-all models
models = cell(numLabels,1);
for k=1:numLabels
models{k} = libsvmtrain(double(y==labels(k)), X, strcat(opts,' -b 1 -q'));
end
mdl = struct('models',{models}, 'labels',labels);
end
function [pred,acc,prob] = libsvmpredict_ova(y, X, mdl)
%# classes
labels = mdl.labels;
numLabels = numel(labels);
%# get probability estimates of test instances using each 1-vs-all model
prob = zeros(size(X,1), numLabels);
for k=1:numLabels
[~,~,p] = libsvmpredict(double(y==labels(k)), X, mdl.models{k}, '-b 1 -q');
prob(:,k) = p(:, mdl.models{k}.Label==1);
end
%# predict the class with the highest probability
[~,pred] = max(prob, [], 2);
%# compute classification accuracy
acc = mean(pred == y);
end
这里是功能,支持交叉验证:
function acc = libsvmcrossval_ova(y, X, opts, nfold, indices)
if nargin < 3, opts = ''; end
if nargin < 4, nfold = 10; end
if nargin < 5, indices = crossvalidation(y, nfold); end
%# N-fold cross-validation testing
acc = zeros(nfold,1);
for i=1:nfold
testIdx = (indices == i); trainIdx = ~testIdx;
mdl = libsvmtrain_ova(y(trainIdx), X(trainIdx,:), opts);
[~,acc(i)] = libsvmpredict_ova(y(testIdx), X(testIdx,:), mdl);
end
acc = mean(acc); %# average accuracy
end
function indices = crossvalidation(y, nfold)
%# stratified n-fold cros-validation
%#indices = crossvalind('Kfold', y, nfold); %# Bioinformatics toolbox
cv = cvpartition(y, 'kfold',nfold); %# Statistics toolbox
indices = zeros(size(y));
for i=1:nfold
indices(cv.test(i)) = i;
end
end
最终,这里是简单的演示来说明用法:
%# laod dataset
S = load('fisheriris');
data = zscore(S.meas);
labels = grp2idx(S.species);
%# cross-validate using one-vs-all approach
opts = '-s 0 -t 2 -c 1 -g 0.25'; %# libsvm training options
nfold = 10;
acc = libsvmcrossval_ova(labels, data, opts, nfold);
fprintf('Cross Validation Accuracy = %.4f%%\n', 100*mean(acc));
%# compute final model over the entire dataset
mdl = libsvmtrain_ova(labels, data, opts);
与此相比,对所使用默认的LIBSVM单VS单的方式:
acc = libsvmtrain(labels, data, sprintf('%s -v %d -q',opts,nfold));
model = libsvmtrain(labels, data, strcat(opts,' -q'));
它可能会产生混淆你的两个问题一个是不是LIBSVM。 你应该尝试调整这个答案而忽略其他。
你应该选择的褶皱,并完成其余完全一样的链接的问题。 假设已经被加载到数据data
和标签为labels
:
n = size(data,1);
ns = floor(n/10);
for fold=1:10,
if fold==1,
testindices= ((fold-1)*ns+1):fold*ns;
trainindices = fold*ns+1:n;
else
if fold==10,
testindices= ((fold-1)*ns+1):n;
trainindices = 1:(fold-1)*ns;
else
testindices= ((fold-1)*ns+1):fold*ns;
trainindices = [1:(fold-1)*ns,fold*ns+1:n];
end
end
% use testindices only for testing and train indices only for testing
trainLabel = label(trainindices);
trainData = data(trainindices,:);
testLabel = label(testindices);
testData = data(testindices,:)
%# train one-against-all models
model = cell(numLabels,1);
for k=1:numLabels
model{k} = svmtrain(double(trainLabel==k), trainData, '-c 1 -g 0.2 -b 1');
end
%# get probability estimates of test instances using each model
prob = zeros(size(testData,1),numLabels);
for k=1:numLabels
[~,~,p] = svmpredict(double(testLabel==k), testData, model{k}, '-b 1');
prob(:,k) = p(:,model{k}.Label==1); %# probability of class==k
end
%# predict the class with the highest probability
[~,pred] = max(prob,[],2);
acc = sum(pred == testLabel) ./ numel(testLabel) %# accuracy
C = confusionmat(testLabel, pred) %# confusion matrix
end