hazy_sgd.hxx

Go to the documentation of this file.
00001 
00012 template <class T>
00013 Hazy_Sgd<T>::Hazy_Sgd(Storage_Manager<T> *sm, IncrementalSGD<T> *sgd, Skiing &_ski, hazy_model::strategy s) 
00014   : ski(_ski), hm(sgd->getModel(),s) {
00015   
00016   // set _dim
00017   pthread_rwlock_init(&_rwlock, NULL);
00018   this->sgd = sgd;
00019   _high_low_water_valid = true;
00020   _last_sort_model = sgd->getModel();
00021   _dim = _last_sort_model.dim;
00022   st_man = sm;
00023   if(hazy_model::isHazy(s))
00024      st_man->resort(hm);
00025 }
00026 
00030 template<class T>
00031 void
00032 Hazy_Sgd<T>::updateModel() {
00033   //pthread_mutex_lock( &_mutex );
00034   pthread_rwlock_wrlock(&_rwlock);
00035   
00036   hm._model = sgd->getModel();
00037   
00038   // If we are lazy, there is nothing more to do:
00039   if( !hm.isEager() ) {
00040     
00041     if(hm._strategy == hazy_model::LAZY_HAZY) { _high_low_water_valid = false; }
00042     //pthread_mutex_unlock( &_mutex );
00043     pthread_rwlock_unlock(&_rwlock);
00044     return;
00045   }
00046   
00047   if( hm._strategy == hazy_model::EAGER_NAIVE) {
00048     assert(hm._strategy == hazy_model::EAGER_NAIVE);
00049     double _notused = 0.0;
00050     st_man->incrementalUpdate(hm, _notused);
00051     //pthread_mutex_unlock( &_mutex );
00052     pthread_rwlock_unlock(&_rwlock);
00053     return;
00054   }
00055 
00056   assert(hm._strategy == hazy_model::EAGER_HAZY);
00057   // Here, we may assume that it is eager, hazy
00058   
00059   update_low_high_water();
00060   
00061   if(ski.shouldResort()) {
00062     Timer t(true);
00063     st_man->resort(hm);
00064     _last_sort_model = model(hm._model);
00065     
00066     ski.doResort(t.stop());
00067     hm.low_water = 0.0, hm.high_water = 0.0;
00068   } else {
00069     LOGGING_ONLY(std::cout << "storage manager update with eager hazy" << std::endl;);
00070     double waste_time = 0.0;
00071     st_man->incrementalUpdate(hm, waste_time);
00072     ski.updateAccCost(waste_time);
00073     LOGGING_ONLY(std::cout << "acc cost become: " << ski.getAccCost() << std::endl;);
00074   }
00075   
00076   //pthread_mutex_unlock( &_mutex );
00077   pthread_rwlock_unlock(&_rwlock);
00078 }
00079 
00089 template <class T>
00090 void
00091 Hazy_Sgd<T>::updateModel(T featureArray, int classOfExample) {
00092   LOGGING_ONLY(std::cout << "update model called" << std::endl;);
00093   
00094   LOGGING_ONLY(std::cout << "[hazy_sgd] ski.acc_cost = " << ski.getAccCost() << ", resort cost: " << ski.getResortCost() << ", baseline: " << ski.getSVMBaselineUpdate() << std::endl;);
00095   
00096   //pthread_mutex_lock( &_mutex );
00097   pthread_rwlock_wrlock(&_rwlock);
00098   // If the model doesn't change, we simply return.
00099   bool model_changed = sgd->addExample(classOfExample, featureArray); 
00100 
00101   if(model_changed) {    
00102     // Model changed and so is invalid
00103     LOGGING_ONLY(std::cout << "model changed" << std::endl;);
00104     hm.invalidate_db_model(); 
00105   } else {
00106     LOGGING_ONLY(std::cout << "model didn't change" << std::endl;);
00107     //   pthread_mutex_unlock( &_mutex );
00108     pthread_rwlock_unlock(&_rwlock);
00109     return; 
00110   } 
00111   //pthread_mutex_unlock( &_mutex );
00112   pthread_rwlock_unlock(&_rwlock);
00113   
00114   updateModel();
00115   LOGGING_ONLY(std::cout << "[hazy_sgd] {after update} ski.acc_cost = " << ski.getAccCost() << ", resort cost: " << ski.getResortCost() << ", baseline: " << ski.getSVMBaselineUpdate() << std::endl;);
00116 }
00117 
00118 template<class T>
00119 struct _id_label_vector_entry {
00120   int id;
00121   T x;
00122   int label;
00123   static int parse_tuple(PGresult *res, int index, _id_label_vector_entry &c) {
00124     c.id = atoi(PQgetvalue(res, index, 0));
00125     c.label = atoi(PQgetvalue(res, index, 1));
00126     std::string f_vec = PQgetvalue(res, index, 2);
00127     int r = c.x.fromPSQL(f_vec);
00128     return r;
00129   }
00130   static bool _entity_compare(const _id_label_vector_entry &x, const _id_label_vector_entry &y) {
00131     return x.eps < y.eps;
00132   }
00133 };
00134 
00135 template <class T>
00136 void
00137 Hazy_Sgd<T>::deleteModel(std::string db_name, std::string table_name) {
00138   _connection_map::iterator i = connections.find(db_name);
00139   Hazy_Database *db_conn;
00140   //there is no connection to specified db
00141   if(i == connections.end()) {
00142     db_conn = new Hazy_Database(db_name);
00143     connections[db_name] = db_conn;
00144   }
00145   else
00146     db_conn = i->second;
00147 
00148   std::vector< _id_label_vector_entry<T> > _tuples;
00149   LOGGING_ONLY(Timer retrieve_parse_timer(true););
00150   std::string retrieve_query = "SELECT id, label, feature_vector FROM " + table_name + ";";
00151   db_conn->postgresTupleParser<_id_label_vector_entry <T> >(retrieve_query.c_str(), _id_label_vector_entry<T>::parse_tuple, 
00152                                                             _tuples);
00153   LOGGING_ONLY(std::cout << "[deleteModel] time elapsed for retrieve & parse from training table: " << retrieve_parse_timer.stop() << std::endl;);
00154   LOGGING_ONLY(std::cout << "[deleteModel] entity size: " << _tuples.size() << std::endl;);
00155 
00156   LOGGING_ONLY(std::cout << "delete model called" << std::endl;);
00157   
00158   LOGGING_ONLY(std::cout << "[hazy_sgd] ski.acc_cost = " << ski.getAccCost() << ", resort cost: " << ski.getResortCost() << ", baseline: " << ski.getSVMBaselineUpdate() << std::endl;);
00159   
00160   //pthread_mutex_lock( &_mutex );
00161   pthread_rwlock_wrlock(&_rwlock);
00162   // If the model doesn't change, we simply return.
00163   bool model_changed = false;
00164 
00165   //reset model
00166   LOGGING_ONLY(Timer reset_model_timer(true););
00167   sgd->resetModel();
00168   LOGGING_ONLY(std::cout << "[deleteModel] reset model timer: " << reset_model_timer.stop() << std::endl;);
00169 
00170   LOGGING_ONLY(Timer learn_timer(true););
00171   for(unsigned int i = 0; i < _tuples.size(); i ++) {
00172     int classOfExample = _tuples[i].label;
00173     T featureArray = _tuples[i].x;
00174     model_changed = sgd->addExample(classOfExample, featureArray); 
00175   }
00176   LOGGING_ONLY(std::cout << "[deleteModel] time for retraining: " << learn_timer.stop() << std::endl;);
00177   
00178   if(model_changed) {    
00179     // Model changed and so is invalid
00180     LOGGING_ONLY(std::cout << "model changed" << std::endl;);
00181     hm.invalidate_db_model(); 
00182   } else {
00183     LOGGING_ONLY(std::cout << "model didn't change" << std::endl;);
00184     //   pthread_mutex_unlock( &_mutex );
00185     pthread_rwlock_unlock(&_rwlock);
00186     return; 
00187   } 
00188   //pthread_mutex_unlock( &_mutex );
00189   pthread_rwlock_unlock(&_rwlock);
00190   
00191   updateModel();
00192   LOGGING_ONLY(std::cout << "[hazy_sgd] {after update} ski.acc_cost = " << ski.getAccCost() << ", resort cost: " << ski.getResortCost() << ", baseline: " << ski.getSVMBaselineUpdate() << std::endl;);
00193 }
00194 
00200 template <class T>
00201 void
00202 Hazy_Sgd<T>::readEntityClass(key entity_id, sClass &c) {
00203   pthread_rwlock_rdlock(&_rwlock);
00204   if(hm._strategy == hazy_model::LAZY_HAZY && !_high_low_water_valid) {  update_low_high_water(); _high_low_water_valid = true; }
00205   st_man->getEntityClass(entity_id, c, hm);
00206   pthread_rwlock_unlock(&_rwlock);
00207 }
00208 
00214 template <class T>
00215 void
00216 Hazy_Sgd<T>::readEntityClass(T vec, sClass &c) {
00217   // This only called on LAZY approaches.. is this a good assert?
00218   assert(hm.isLazy()); 
00219   //pthread_mutex_lock( &_mutex );
00220   pthread_rwlock_rdlock(&_rwlock);
00221   c = sgd->classifyExample(vec) ? 1 : 0;
00222   //pthread_mutex_unlock( &_mutex );
00223   pthread_rwlock_unlock(&_rwlock);
00224 }
00225 
00232 template <class T>
00233 void
00234 Hazy_Sgd<T>::readEntityClass(T vec, double eps, sClass &c) {
00235   // This only called on LAZY HAZY
00236   // THIS DOESN'T NEED TO LOCK!
00237   assert(hm._strategy == hazy_model::LAZY_HAZY);
00238   if(!_high_low_water_valid) {  update_low_high_water(); _high_low_water_valid = true; }
00239   if(eps > hm.high_water) { c = 1; return; }
00240   if(eps < hm.low_water)  { c = 0; return; }
00241   // AND LEVENT, IF YOU LOCK THIS IT WILL DEADLOCK.
00242   readEntityClass(vec, c);
00243 }
00244 
00250 template <class T>
00251 void
00252 Hazy_Sgd<T>::readNumInClass(sClass c, int &nClass) {
00253   double waste_time = 0;
00254   //pthread_mutex_lock( &_mutex );
00255   pthread_rwlock_rdlock(&_rwlock);
00256   if(!_high_low_water_valid) {  update_low_high_water(); _high_low_water_valid = true; }
00257   
00258   st_man->getNumInClass(c, nClass, hm, waste_time);
00259   
00260   if(hm._strategy == hazy_model::LAZY_HAZY) {
00261     ski.updateAccCost(waste_time);
00262   }
00263   //pthread_mutex_unlock( &_mutex );
00264   pthread_rwlock_unlock(&_rwlock);
00265 }
00266 
00272 template <class T>
00273 void
00274 Hazy_Sgd<T>::holder_difference_models(double &delta_w, double &delta_b) {// outputs
00275   struct model current = sgd->getModel();
00276   delta_b = current.bias - _last_sort_model.bias;
00277   delta_w = 0.0;
00278   for(int d = 1; d < _dim; d ++) {
00279     delta_w = std::max(delta_w, fabs((current.w.get(d) * current.wscale - _last_sort_model.w.get(d)*_last_sort_model.wscale)));
00280   }
00281   // Multiply with the max-1 norm of the dataset (why is this stored in skiing?)
00282   // Seems like this should be stored here....
00283   delta_w *= ski.getMFactor();
00284 }

Generated on Wed Dec 15 10:46:15 2010 for Hazy_System by  doxygen 1.4.7