View Javadoc

1   package de.bwaldvogel.liblinear;
2   
3   import static de.bwaldvogel.liblinear.Linear.copyOf;
4   import static de.bwaldvogel.liblinear.Linear.info;
5   import static de.bwaldvogel.liblinear.Linear.swap;
6   
7   
8   /**
9    * A coordinate descent algorithm for
10   * multi-class support vector machines by Crammer and Singer
11   *
12   * <pre>
13   * min_{\alpha} 0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i
14   * s.t. \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i
15   *
16   * where e^m_i = 0 if y_i = m,
17   * e^m_i = 1 if y_i != m,
18   * C^m_i = C if m = y_i,
19   * C^m_i = 0 if m != y_i,
20   * and w_m(\alpha) = \sum_i \alpha^m_i x_i
21   *
22   * Given:
23   * x, y, C
24   * eps is the stopping tolerance
25   *
26   * solution will be put in w
27   *
28   * See Appendix of LIBLINEAR paper, Fan et al. (2008)
29   * </pre>
30   */
31  class SolverMCSVM_CS {
32  
33      private final double[] B;
34      private final double[] C;
35      private final double   eps;
36      private final double[] G;
37      private final int      max_iter;
38      private final int      w_size, l;
39      private final int      nr_class;
40      private final Problem  prob;
41  
42      public SolverMCSVM_CS( Problem prob, int nr_class, double[] C ) {
43          this(prob, nr_class, C, 0.1);
44      }
45  
46      public SolverMCSVM_CS( Problem prob, int nr_class, double[] C, double eps ) {
47          this(prob, nr_class, C, eps, 100000);
48      }
49  
50  
51      public SolverMCSVM_CS( Problem prob, int nr_class, double[] weighted_C, double eps, int max_iter ) {
52          this.w_size = prob.n;
53          this.l = prob.l;
54          this.nr_class = nr_class;
55          this.eps = eps;
56          this.max_iter = max_iter;
57          this.prob = prob;
58          this.C = weighted_C;
59          this.B = new double[nr_class];
60          this.G = new double[nr_class];
61      }
62  
63      private int GETI(int i) {
64          return prob.y[i];
65      }
66  
67      private boolean be_shrunk(int i, int m, int yi, double alpha_i, double minG) {
68          double bound = 0;
69          if (m == yi) bound = C[GETI(i)];
70          if (alpha_i == bound && G[m] < minG) return true;
71          return false;
72      }
73  
74      public void solve(double[] w) {
75          int i, m, s;
76          int iter = 0;
77          double[] alpha = new double[l * nr_class];
78          double[] alpha_new = new double[nr_class];
79          int[] index = new int[l];
80          double[] QD = new double[l];
81          int[] d_ind = new int[nr_class];
82          double[] d_val = new double[nr_class];
83          int[] alpha_index = new int[nr_class * l];
84          int[] y_index = new int[l];
85          int active_size = l;
86          int[] active_size_i = new int[l];
87          double eps_shrink = Math.max(10.0 * eps, 1.0); // stopping tolerance for shrinking
88          boolean start_from_all = true;
89          // initial
90          for (i = 0; i < l * nr_class; i++)
91              alpha[i] = 0;
92          for (i = 0; i < w_size * nr_class; i++)
93              w[i] = 0;
94          for (i = 0; i < l; i++) {
95              for (m = 0; m < nr_class; m++)
96                  alpha_index[i * nr_class + m] = m;
97              QD[i] = 0;
98              for (FeatureNode xi : prob.x[i]) {
99                  QD[i] += xi.value * xi.value;
100             }
101             active_size_i[i] = nr_class;
102             y_index[i] = prob.y[i];
103             index[i] = i;
104         }
105 
106         DoubleArrayPointer alpha_i = new DoubleArrayPointer(alpha, 0);
107         IntArrayPointer alpha_index_i = new IntArrayPointer(alpha_index, 0);
108 
109         while (iter < max_iter) {
110             double stopping = Double.NEGATIVE_INFINITY;
111 
112             for (i = 0; i < active_size; i++) {
113                 // int j = i+rand()%(active_size-i);
114                 int j = i + Linear.random.nextInt(active_size - i);
115                 swap(index, i, j);
116             }
117             for (s = 0; s < active_size; s++) {
118 
119                 i = index[s];
120                 double Ai = QD[i];
121                 // double *alpha_i = &alpha[i*nr_class];
122                 alpha_i.setOffset(i * nr_class);
123 
124                 // int *alpha_index_i = &alpha_index[i*nr_class];
125                 alpha_index_i.setOffset(i * nr_class);
126 
127                 if (Ai > 0) {
128                     for (m = 0; m < active_size_i[i]; m++)
129                         G[m] = 1;
130                     if (y_index[i] < active_size_i[i]) G[y_index[i]] = 0;
131 
132                     for (FeatureNode xi : prob.x[i]) {
133                         // double *w_i = &w[(xi.index-1)*nr_class];
134                         int w_offset = (xi.index - 1) * nr_class;
135                         for (m = 0; m < active_size_i[i]; m++)
136                             // G[m] += w_i[alpha_index_i[m]]*(xi.value);
137                             G[m] += w[w_offset + alpha_index_i.get(m)] * (xi.value);
138 
139                     }
140 
141                     double minG = Double.POSITIVE_INFINITY;
142                     double maxG = Double.NEGATIVE_INFINITY;
143                     for (m = 0; m < active_size_i[i]; m++) {
144                         if (alpha_i.get(alpha_index_i.get(m)) < 0 && G[m] < minG) minG = G[m];
145                         if (G[m] > maxG) maxG = G[m];
146                     }
147                     if (y_index[i] < active_size_i[i]) {
148                         if (alpha_i.get(prob.y[i]) < C[GETI(i)] && G[y_index[i]] < minG) {
149                             minG = G[y_index[i]];
150                         }
151                     }
152 
153                     for (m = 0; m < active_size_i[i]; m++) {
154                         if (be_shrunk(i, m, y_index[i], alpha_i.get(alpha_index_i.get(m)), minG)) {
155                             active_size_i[i]--;
156                             while (active_size_i[i] > m) {
157                                 if (!be_shrunk(i, active_size_i[i], y_index[i], alpha_i.get(alpha_index_i.get(active_size_i[i])), minG)) {
158                                     swap(alpha_index_i, m, active_size_i[i]);
159                                     swap(G, m, active_size_i[i]);
160                                     if (y_index[i] == active_size_i[i])
161                                         y_index[i] = m;
162                                     else if (y_index[i] == m) y_index[i] = active_size_i[i];
163                                     break;
164                                 }
165                                 active_size_i[i]--;
166                             }
167                         }
168                     }
169 
170                     if (active_size_i[i] <= 1) {
171                         active_size--;
172                         swap(index, s, active_size);
173                         s--;
174                         continue;
175                     }
176 
177                     if (maxG - minG <= 1e-12)
178                         continue;
179                     else
180                         stopping = Math.max(maxG - minG, stopping);
181 
182                     for (m = 0; m < active_size_i[i]; m++)
183                         B[m] = G[m] - Ai * alpha_i.get(alpha_index_i.get(m));
184 
185                     solve_sub_problem(Ai, y_index[i], C[GETI(i)], active_size_i[i], alpha_new);
186                     int nz_d = 0;
187                     for (m = 0; m < active_size_i[i]; m++) {
188                         double d = alpha_new[m] - alpha_i.get(alpha_index_i.get(m));
189                         alpha_i.set(alpha_index_i.get(m), alpha_new[m]);
190                         if (Math.abs(d) >= 1e-12) {
191                             d_ind[nz_d] = alpha_index_i.get(m);
192                             d_val[nz_d] = d;
193                             nz_d++;
194                         }
195                     }
196 
197                     for (FeatureNode xi : prob.x[i]) {
198                         // double *w_i = &w[(xi->index-1)*nr_class];
199                         int w_offset = (xi.index - 1) * nr_class;
200                         for (m = 0; m < nz_d; m++) {
201                             w[w_offset + d_ind[m]] += d_val[m] * xi.value;
202                         }
203                     }
204                 }
205             }
206 
207             iter++;
208 
209             if (iter % 10 == 0) {
210                 info(".");
211             }
212 
213             if (stopping < eps_shrink) {
214                 if (stopping < eps && start_from_all == true)
215                     break;
216                 else {
217                     active_size = l;
218                     for (i = 0; i < l; i++)
219                         active_size_i[i] = nr_class;
220                     info("*");
221                     eps_shrink = Math.max(eps_shrink / 2, eps);
222                     start_from_all = true;
223                 }
224             } else
225                 start_from_all = false;
226         }
227 
228         info("%noptimization finished, #iter = %d%n", iter);
229         if (iter >= max_iter) info("%nWARNING: reaching max number of iterations%n");
230 
231         // calculate objective value
232         double v = 0;
233         int nSV = 0;
234         for (i = 0; i < w_size * nr_class; i++)
235             v += w[i] * w[i];
236         v = 0.5 * v;
237         for (i = 0; i < l * nr_class; i++) {
238             v += alpha[i];
239             if (Math.abs(alpha[i]) > 0) nSV++;
240         }
241         for (i = 0; i < l; i++)
242             v -= alpha[i * nr_class + prob.y[i]];
243         info("Objective value = %f%n", v);
244         info("nSV = %d%n", nSV);
245 
246     }
247 
248     private void solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double[] alpha_new) {
249 
250         int r;
251         assert active_i <= B.length; // no padding
252         double[] D = copyOf(B, active_i);
253         // clone(D, B, active_i);
254 
255         if (yi < active_i) D[yi] += A_i * C_yi;
256 
257         // qsort(D, active_i, sizeof(double), compare_double);
258         ArraySorter.reversedMergesort(D);
259 
260         double beta = D[0] - A_i * C_yi;
261         for (r = 1; r < active_i && beta < r * D[r]; r++)
262             beta += D[r];
263 
264         beta /= r;
265         for (r = 0; r < active_i; r++) {
266             if (r == yi)
267                 alpha_new[r] = Math.min(C_yi, (beta - B[r]) / A_i);
268             else
269                 alpha_new[r] = Math.min(0.0, (beta - B[r]) / A_i);
270         }
271     }
272 }