View Javadoc

1   package liblinear;
2   
3   import static liblinear.Linear.copyOf;
4   import static liblinear.Linear.info;
5   import static liblinear.Linear.swap;
6   
7   
8   /**
9    * A coordinate descent algorithm for
10   * multi-class support vector machines by Crammer and Singer
11   *
12   * <pre>
13   * min_{\alpha} 0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i
14   * s.t. \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i
15   *
16   * where e^m_i = 0 if y_i = m,
17   * e^m_i = 1 if y_i != m,
18   * C^m_i = C if m = y_i,
19   * C^m_i = 0 if m != y_i,
20   * and w_m(\alpha) = \sum_i \alpha^m_i x_i
21   *
22   * Given:
23   * x, y, C
24   * eps is the stopping tolerance
25   *
26   * solution will be put in w
27   * </pre>
28   */
29  class SolverMCSVM_CS {
30  
31      private final double[] B;
32      private final double[] C;
33      private final double   eps;
34      private final double[] G;
35      private final int      max_iter;
36      private final int      w_size, l;
37      private final int      nr_class;
38      private final Problem  prob;
39  
40      public SolverMCSVM_CS( Problem prob, int nr_class, double[] C ) {
41          this(prob, nr_class, C, 0.1);
42      }
43  
44      public SolverMCSVM_CS( Problem prob, int nr_class, double[] C, double eps ) {
45          this(prob, nr_class, C, eps, 100000);
46      }
47  
48  
49      public SolverMCSVM_CS( Problem prob, int nr_class, double[] weighted_C, double eps, int max_iter ) {
50          this.w_size = prob.n;
51          this.l = prob.l;
52          this.nr_class = nr_class;
53          this.eps = eps;
54          this.max_iter = max_iter;
55          this.prob = prob;
56          this.C = weighted_C;
57          this.B = new double[nr_class];
58          this.G = new double[nr_class];
59      }
60  
61      private int GETI(int i) {
62          return prob.y[i];
63      }
64  
65      private boolean be_shrunk(int i, int m, int yi, double alpha_i, double minG) {
66          double bound = 0;
67          if (m == yi) bound = C[GETI(i)];
68          if (alpha_i == bound && G[m] < minG) return true;
69          return false;
70      }
71  
72      public void solve(double[] w) {
73          int i, m, s;
74          int iter = 0;
75          double[] alpha = new double[l * nr_class];
76          double[] alpha_new = new double[nr_class];
77          int[] index = new int[l];
78          double[] QD = new double[l];
79          int[] d_ind = new int[nr_class];
80          double[] d_val = new double[nr_class];
81          int[] alpha_index = new int[nr_class * l];
82          int[] y_index = new int[l];
83          int active_size = l;
84          int[] active_size_i = new int[l];
85          double eps_shrink = Math.max(10.0 * eps, 1.0); // stopping tolerance for shrinking
86          boolean start_from_all = true;
87          // initial
88          for (i = 0; i < l * nr_class; i++)
89              alpha[i] = 0;
90          for (i = 0; i < w_size * nr_class; i++)
91              w[i] = 0;
92          for (i = 0; i < l; i++) {
93              for (m = 0; m < nr_class; m++)
94                  alpha_index[i * nr_class + m] = m;
95              QD[i] = 0;
96              for (FeatureNode xi : prob.x[i]) {
97                  QD[i] += xi.value * xi.value;
98              }
99              active_size_i[i] = nr_class;
100             y_index[i] = prob.y[i];
101             index[i] = i;
102         }
103 
104         DoubleArrayPointer alpha_i = new DoubleArrayPointer(alpha, 0);
105         IntArrayPointer alpha_index_i = new IntArrayPointer(alpha_index, 0);
106 
107         while (iter < max_iter) {
108             double stopping = Double.NEGATIVE_INFINITY;
109 
110             for (i = 0; i < active_size; i++) {
111                 // int j = i+rand()%(active_size-i);
112                 int j = i + Linear.random.nextInt(active_size - i);
113                 swap(index, i, j);
114             }
115             for (s = 0; s < active_size; s++) {
116 
117                 i = index[s];
118                 double Ai = QD[i];
119                 // double *alpha_i = &alpha[i*nr_class];
120                 alpha_i.setOffset(i * nr_class);
121 
122                 // int *alpha_index_i = &alpha_index[i*nr_class];
123                 alpha_index_i.setOffset(i * nr_class);
124 
125                 if (Ai > 0) {
126                     for (m = 0; m < active_size_i[i]; m++)
127                         G[m] = 1;
128                     if (y_index[i] < active_size_i[i]) G[y_index[i]] = 0;
129 
130                     for (FeatureNode xi : prob.x[i]) {
131                         // double *w_i = &w[(xi.index-1)*nr_class];
132                         int w_offset = (xi.index - 1) * nr_class;
133                         for (m = 0; m < active_size_i[i]; m++)
134                             // G[m] += w_i[alpha_index_i[m]]*(xi.value);
135                             G[m] += w[w_offset + alpha_index_i.get(m)] * (xi.value);
136 
137                     }
138 
139                     double minG = Double.POSITIVE_INFINITY;
140                     double maxG = Double.NEGATIVE_INFINITY;
141                     for (m = 0; m < active_size_i[i]; m++) {
142                         if (alpha_i.get(alpha_index_i.get(m)) < 0 && G[m] < minG) minG = G[m];
143                         if (G[m] > maxG) maxG = G[m];
144                     }
145                     if (y_index[i] < active_size_i[i]) {
146                         if (alpha_i.get(prob.y[i]) < C[GETI(i)] && G[y_index[i]] < minG) {
147                             minG = G[y_index[i]];
148                         }
149                     }
150 
151                     for (m = 0; m < active_size_i[i]; m++) {
152                         if (be_shrunk(i, m, y_index[i], alpha_i.get(alpha_index_i.get(m)), minG)) {
153                             active_size_i[i]--;
154                             while (active_size_i[i] > m) {
155                                 if (!be_shrunk(i, active_size_i[i], y_index[i], alpha_i.get(alpha_index_i.get(active_size_i[i])), minG)) {
156                                     swap(alpha_index_i, m, active_size_i[i]);
157                                     swap(G, m, active_size_i[i]);
158                                     if (y_index[i] == active_size_i[i])
159                                         y_index[i] = m;
160                                     else if (y_index[i] == m) y_index[i] = active_size_i[i];
161                                     break;
162                                 }
163                                 active_size_i[i]--;
164                             }
165                         }
166                     }
167 
168                     if (active_size_i[i] <= 1) {
169                         active_size--;
170                         swap(index, s, active_size);
171                         s--;
172                         continue;
173                     }
174 
175                     if (maxG - minG <= 1e-12)
176                         continue;
177                     else
178                         stopping = Math.max(maxG - minG, stopping);
179 
180                     for (m = 0; m < active_size_i[i]; m++)
181                         B[m] = G[m] - Ai * alpha_i.get(alpha_index_i.get(m));
182 
183                     solve_sub_problem(Ai, y_index[i], C[GETI(i)], active_size_i[i], alpha_new);
184                     int nz_d = 0;
185                     for (m = 0; m < active_size_i[i]; m++) {
186                         double d = alpha_new[m] - alpha_i.get(alpha_index_i.get(m));
187                         alpha_i.set(alpha_index_i.get(m), alpha_new[m]);
188                         if (Math.abs(d) >= 1e-12) {
189                             d_ind[nz_d] = alpha_index_i.get(m);
190                             d_val[nz_d] = d;
191                             nz_d++;
192                         }
193                     }
194 
195                     for (FeatureNode xi : prob.x[i]) {
196                         // double *w_i = &w[(xi->index-1)*nr_class];
197                         int w_offset = (xi.index - 1) * nr_class;
198                         for (m = 0; m < nz_d; m++) {
199                             w[w_offset + d_ind[m]] += d_val[m] * xi.value;
200                         }
201                     }
202                 }
203             }
204 
205             iter++;
206 
207             if (iter % 10 == 0) {
208                 info(".");
209             }
210 
211             if (stopping < eps_shrink) {
212                 if (stopping < eps && start_from_all == true)
213                     break;
214                 else {
215                     active_size = l;
216                     for (i = 0; i < l; i++)
217                         active_size_i[i] = nr_class;
218                     info("*");
219                     eps_shrink = Math.max(eps_shrink / 2, eps);
220                     start_from_all = true;
221                 }
222             } else
223                 start_from_all = false;
224         }
225 
226         info("\noptimization finished, #iter = %d\n", iter);
227         if (iter >= max_iter) info("Warning: reaching max number of iterations\n");
228 
229         // calculate objective value
230         double v = 0;
231         int nSV = 0;
232         for (i = 0; i < w_size * nr_class; i++)
233             v += w[i] * w[i];
234         v = 0.5 * v;
235         for (i = 0; i < l * nr_class; i++) {
236             v += alpha[i];
237             if (Math.abs(alpha[i]) > 0) nSV++;
238         }
239         for (i = 0; i < l; i++)
240             v -= alpha[i * nr_class + prob.y[i]];
241         info("Objective value = %f\n", v);
242         info("nSV = %d\n", nSV);
243 
244     }
245 
246     private void solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double[] alpha_new) {
247 
248         int r;
249         assert active_i <= B.length; // no padding
250         double[] D = copyOf(B, active_i);
251         // clone(D, B, active_i);
252 
253         if (yi < active_i) D[yi] += A_i * C_yi;
254 
255         // qsort(D, active_i, sizeof(double), compare_double);
256         ArraySorter.reversedMergesort(D);
257 
258         double beta = D[0] - A_i * C_yi;
259         for (r = 1; r < active_i && beta < r * D[r]; r++)
260             beta += D[r];
261 
262         beta /= r;
263         for (r = 0; r < active_i; r++) {
264             if (r == yi)
265                 alpha_new[r] = Math.min(C_yi, (beta - B[r]) / A_i);
266             else
267                 alpha_new[r] = Math.min(0.0, (beta - B[r]) / A_i);
268         }
269     }
270 }