I am training an Opencv Bayes Classifier on various features of people for re-identification. The classifier appears to be working and predict() is giving an output, but the predictProb() function is returning very large values for some features and 0 for others.
Data of the form: (this is one row of the matrix)
0.2523284, 0.027687496, 0.0042156572, 0.0018417788, 5.1221455e-06, 0.00030639244, 3.1830291e-07;
gives probabilities of the order 1.0710769e+21
Data of the form: (this is one row of the matrix)
0.95060945, 0.0046671298, 0.0089099752, 0.0036064184, 0.0046671298, 0.010394971, 0.053247705, 0.096736871, 0.1947466, 0.18817018, 0.06449125, 0.041579884, 0.047732007, 0.040094886, 0.02821492, 0;
gives probabilities of 0
Full example data :
[0.24554592, 0.030083558, 0.001872576, 0.00072512549, 8.3897498e-07, 0.00011867422, 1.0044554e-07;
0.24195954, 0.027969711, 0.0020806724, 0.00063155976, 7.0596019e-07, 9.0558395e-05, 1.6049883e-07;
0.2330683, 0.02543075, 0.0010663052, 0.00027030293, 1.3644078e-07, 3.482226e-05, 4.942391e-08;
0.24967246, 0.033870667, 0.00022691712, 0.00010154386, 1.5067682e-08, 1.6858012e-05, 3.248853e-09;
0.24420726, 0.03094162, 4.4175573e-05, 2.8687055e-05, 9.5080277e-10, 4.590403e-06, 3.7264986e-10;
0.24473086, 0.030979391, 0.0012599876, 0.00030005348, 1.7481725e-07, 3.5889527e-05, -5.8964279e-08;
0.24217123, 0.028877074, 0.0016002082, 0.00047409788, 4.0964099e-07, 6.9703383e-05, -5.2118221e-08;
0.25637382, 0.035073806, 0.0022825941, 0.00075897016, 9.9729755e-07, 0.00013538048, -5.7738678e-08;
0.26366508, 0.036608532, 0.0038970483, 0.0017170503, 4.4291824e-06, 0.00032849875, 3.3230174e-07;
0.24577436, 0.027840961, 0.0022851552, 0.00086983148, 1.2244786e-06, 0.00014513655, 6.7524446e-08;
0.29990238, 0.057243217, 0.001743517, 0.00086817687, 1.0518384e-06, 0.0002015764, -1.8586159e-07;
0.2999571, 0.057276569, 0.0017433959, 0.00086798106, 1.0514048e-06, 0.0002015565, -1.8602559e-07;
0.29948989, 0.0570211, 0.001791846, 0.00088352599, 1.0937558e-06, 0.00020415963, -1.9881658e-07;
0.2999571, 0.057276569, 0.0017433959, 0.00086798106, 1.0514048e-06, 0.0002015565, -1.8602559e-07;
0.29948989, 0.0570211, 0.001791846, 0.00088352599, 1.0937558e-06, 0.00020415963, -1.9881658e-07;
0.29948989, 0.0570211, 0.001791846, 0.00088352599, 1.0937558e-06, 0.00020415963, -1.9881658e-07;
0.29948989, 0.0570211, 0.001791846, 0.00088352599, 1.0937558e-06, 0.00020415963, -1.9881658e-07;
0.29948989, 0.0570211, 0.001791846, 0.00088352599, 1.0937558e-06, 0.00020415963, -1.9881658e-07;
0.29948989, 0.0570211, 0.001791846, 0.00088352599, 1.0937558e-06, 0.00020415963, -1.9881658e-07;
0.29948989, 0.0570211, 0.001791846, 0.00088352599, 1.0937558e-06, 0.00020415963, -1.9881658e-07;
0.26770356, 0.037844498, 0.0025187179, 0.00095610513, 1.4737503e-06, 0.00017293372, 1.7160167e-07;
0.27160931, 0.038864531, 0.0042026751, 0.0016995093, 4.5410925e-06, 0.00032149046, 9.1359716e-08;
0.27844539, 0.041910287, 0.0040488713, 0.0017748536, 4.7307567e-06, 0.00035996895, -5.0703198e-07;
0.25730094, 0.033653006, 0.001680501, 0.0008100156, 9.3013892e-07, 0.00014856602, -1.6727309e-07;
0.24316898, 0.029878205, 0.00053204619, 0.00023361837, 8.01667e-08, 3.8614409e-05, -1.8895969e-08;
0.25522304, 0.036376934, 2.5822179e-05, 4.4587466e-07, -1.0168869e-12, 6.5386686e-08, 1.1202072e-12;
0.25790453, 0.034891039, 0.00034801796, 1.0062397e-05, 5.0446247e-10, 1.6149079e-06, -3.163714e-10;
0.24671364, 0.028603083, 0.00054841745, 1.7253473e-05, -1.111465e-09, -2.9053133e-06, 1.2575164e-09]
Associated responses:
[0;
0;
0;
0;
0;
0;
0;
0;
0;
0;
1;
1;
1;
1;
1;
1;
1;
1;
1;
1;
2;
2;
2;
2;
2;
2;
2;
2]
Each of the responses is a different target, and there are 10 feature vectors stored per target. (only 8 for target 2 as they have only been seen 8 times in this iteration)
When passed a new feature vector, the output is given as:
[0] //the target it has been classified as
[1.4380369e+21, 0, 5.2847116e+19] //the probabilities of all three targets
As previously stated, when using different features, the probabilities become all zero.
The code for this section is below:
Ptr<NormalBayesClassifier> bayesActive;
Ptr<TrainData> trainData;
Mat data;
Mat responses;
Mat outputs;
Mat probabilities;
/*
Data and responses populated - code omitted here
*/
int nsamples_all = data.rows;
data.convertTo(data, CV_32F);
responses.convertTo(responses, CV_32F);
Mat sample_idx = Mat::zeros( 1, data.rows, CV_8U );
sample_idx = sample_idx.colRange(0, nsamples_all);
sample_idx.setTo(Scalar::all(1));
int nvars = data.cols;
Mat var_type( nvars + 1, 1, CV_8U );
var_type.setTo(Scalar::all(VAR_ORDERED));
var_type.at<uchar>(nvars) = VAR_CATEGORICAL;
trainData = TrainData::create(data, ROW_SAMPLE, responses, noArray(), sample_idx, noArray(), var_type);
cout << data << endl;
cout << responses << endl;
bayes = NormalBayesClassifier::create();
bayes->train(trainData);
bayes->predictProb(feature,outputs,probabilities);
cout << outputs << endl;
cout << probabilities << endl;