main_memory_storage_manager.hxx

Go to the documentation of this file.
00001 //#include "main_memory_storage_manager.h"
00002 // This file is included in a header file now.
00003 #include "timer.h"
00004 #include <algorithm>
00005 
00010 template<class T>
00011 STLMainMemory_Storage_Manager<T>::STLMainMemory_Storage_Manager(struct main_memory_storage_spec *s) {
00012   strategy = s->strategy;
00013   low_water_it_cached = _itable.end();
00014   high_water_it_cached = _itable.begin();
00015 }
00016 
00023 template<class T>
00024 STLMainMemory_Storage_Manager<T>::STLMainMemory_Storage_Manager(Hazy_Database *db_conn, struct main_memory_storage_spec *s, struct hazy_model &hm) : Storage_Manager<T>(db_conn) {
00025   strategy = s->strategy;
00026   this->entity_table_name = s->entity_table_name;
00027   if(s->bRebuild)
00028     this->loadFromDatabase(hm);
00029   
00030   if(hm.isHazy()) {
00031     std::sort(_itable.begin(), _itable.end(), STLMainMemory_Storage_Manager<T>::cluster_sort_predicate);
00032     
00033     struct internal_record l, h;
00034     
00035     l.eps = hm.low_water; h.eps = hm.high_water;
00036     
00037     low_water_it_cached  = std::lower_bound( _itable.begin() , _itable.end(), l,
00038                                              STLMainMemory_Storage_Manager<T>::cluster_sort_predicate);
00039     high_water_it_cached = std::upper_bound( _itable.begin() , _itable.end(), h,
00040                                              STLMainMemory_Storage_Manager<T>::cluster_sort_predicate);
00041   }
00042 }
00043 
00048 template<class T>
00049 void
00050 STLMainMemory_Storage_Manager<T>::
00051 resort(struct hazy_model &hm) {
00052   internal_iterator it;
00053   LOGGING_ONLY(Timer t1(true););
00054   switch( strategy ) {
00055     // Only hazy resorts the table
00056   case hazy_model::EAGER_HAZY:   
00057     {
00058       LOGGING_ONLY(Timer update_eps(true); int nClassified = 0; double classify_time = 0.0; double update_ex_table_time = 0.0;);
00059       // Now we update the epsilon values of the internal table
00060       for(it = _itable.begin(); it != _itable.end(); it ++) {
00061         LOGGING_ONLY(Timer classify_timer(true););
00062         double _eps = classify(hm._model, it->v);
00063         LOGGING_ONLY(classify_time += classify_timer.stop(););
00064         // If the entities label may have changed, then update it.
00065         if(it->eps >= hm.low_water || it->eps <= hm.high_water) {
00066           LOGGING_ONLY(Timer update_ex_table_timer(true););
00067           _eager_external_table[it->k] = _eps > 0;
00068           LOGGING_ONLY(update_ex_table_time += update_ex_table_timer.stop(););
00069         }
00070         // update the label in _table for every entity
00071         it->eps = _eps ;
00072         LOGGING_ONLY(nClassified++;);
00073       }    
00074       LOGGING_ONLY(std::cout << "time for only classification: " << classify_time << std::endl);
00075       LOGGING_ONLY(std::cout << "time for only update external table: " << update_ex_table_time << std::endl;);
00076       LOGGING_ONLY(std::cout << "\tUpdated the epsilon value" << nClassified << " in " << update_eps.stop() << std::endl;  );
00077       
00078       // And now resort with the new epsilon values...
00079       LOGGING_ONLY(Timer internal_resort(true););
00080       std::sort(_itable.begin(), _itable.end(), STLMainMemory_Storage_Manager<T>::cluster_sort_predicate);
00081       LOGGING_ONLY(std::cout << "\t Internal Resort: " << internal_resort.stop() << std::endl;);
00082       
00083       struct internal_record l, h;
00084       
00085       l.eps = 0.0; h.eps = 0.0;
00086       
00087       LOGGING_ONLY(Timer bound_finder(true););
00088       low_water_it_cached  = std::lower_bound( _itable.begin() , _itable.end(), l,
00089                                                STLMainMemory_Storage_Manager<T>::cluster_sort_predicate);
00090       high_water_it_cached = std::upper_bound( _itable.begin() , _itable.end(), h,
00091                                                STLMainMemory_Storage_Manager<T>::cluster_sort_predicate);
00092       LOGGING_ONLY(std::cout << "\t Found the bound: " << bound_finder.stop() << std::endl;);
00093       LOGGING_ONLY(std::cout << "model is: " << hm._model << std::endl;);
00094     }
00095     break;
00096   case hazy_model::LAZY_HAZY:    
00097     {
00098       // Now we update the epsilon values of the internal table
00099       for(it = _itable.begin(); it != _itable.end(); it ++) {
00100         double _eps = classify(hm._model, it->v);
00101         // update the label in _table for every entity
00102         it->eps = _eps ;
00103       }    
00104       // And now resort with the new epsilon values...
00105       std::sort(_itable.begin(), _itable.end(), STLMainMemory_Storage_Manager<T>::cluster_sort_predicate);
00106       
00107       struct internal_record l, h;
00108       
00109       l.eps = 0.0; h.eps = 0.0;
00110       
00111       low_water_it_cached  = std::lower_bound( _itable.begin() , _itable.end(), l,
00112                                                STLMainMemory_Storage_Manager<T>::cluster_sort_predicate);
00113       high_water_it_cached = std::upper_bound( _itable.begin() , _itable.end(), h,
00114                                                STLMainMemory_Storage_Manager<T>::cluster_sort_predicate);
00115     }
00116     break;    
00117   case hazy_model::EAGER_NAIVE:
00118   case hazy_model::LAZY_NAIVE:
00119     break;
00120   default:
00121     throw UnknownStrategyException(__FILE__, __LINE__, "[resort]");
00122   }
00123   LOGGING_ONLY(std::cout << "resort takes: " << t1.stop() << std::endl;);
00124 }
00125 
00131 template<class T>
00132 void
00133 STLMainMemory_Storage_Manager<T>::
00134 incrementalUpdate(struct hazy_model &hm, double &waste_time) {
00135   LOGGING_ONLY(std::cout << "in mm storage manager, incremental update, strategy: " << strategy << std::endl;);
00136   waste_time = 0.0;
00137   LOGGING_ONLY(Timer total_timer(true););
00138   switch (strategy) {
00139   case hazy_model::EAGER_HAZY:
00140     {
00141       Timer waste_timer;
00142       internal_iterator low_water_it, high_water_it;
00143       struct internal_record l,h;
00144       int nTotalTuples = 0, nWastedTuples = 0;  
00145       LOGGING_ONLY(int tuplesChangeLabels = 0;);
00146       l.eps = hm.low_water; h.eps = hm.high_water;
00147       low_water_it  = std::lower_bound( _itable.begin() , low_water_it_cached, l, 
00148                                         STLMainMemory_Storage_Manager<T>::cluster_sort_predicate);
00149       high_water_it = std::upper_bound( high_water_it_cached, _itable.end(), h,
00150                                         STLMainMemory_Storage_Manager<T>::cluster_sort_predicate);
00151       
00152       //set new iterators
00153       low_water_it_cached = low_water_it;
00154       high_water_it_cached = high_water_it;
00155       
00156       waste_timer.start();
00157       for(; low_water_it != high_water_it; low_water_it++ ) {
00158         nTotalTuples++;      
00159         bool nVal = (classify(hm._model, low_water_it->v) > 0.0);
00160         LOGGING_ONLY(if(nVal != _eager_external_table[low_water_it->k]) tuplesChangeLabels ++; );
00161         VERBOSE_ONLY(std::cout << "\t\t Updating a label: " << low_water_it->k << " to " << nVal << " from " << _eager_external_table[low_water_it->k] << " v=" << low_water_it->v << std::endl;);
00162         _eager_external_table[low_water_it->k] = nVal;
00163         // if it's 0 then it's a wasted tuple
00164         nWastedTuples += (1 - (int) nVal );
00165       }
00166       waste_time = (nTotalTuples > 0) ? waste_timer.stop() * (double) nWastedTuples/ (double) nTotalTuples : 0.0;
00167       LOGGING_ONLY(std::cout << "incremental update: tuples between low & high water: " << nTotalTuples << std::endl;);
00168       LOGGING_ONLY(std::cout << "\nTotal Tuples=" << nTotalTuples << " nChangedLabels=" << tuplesChangeLabels << std::endl;);
00169       LOGGING_ONLY(std::cout << "\tWaste Ratio: " << ((double) nWastedTuples / (double) nTotalTuples) << " waste time is: " << waste_time << " of " << waste_timer.getElapsedTime() << std::endl;);
00170     }
00171     break;
00172   case hazy_model::EAGER_NAIVE:
00173     {
00174       key_iterator_type lz;
00175       LOGGING_ONLY(Timer t1(true););
00176 
00177       for(lz = _entity_table.begin(); lz != _entity_table.end(); lz++) {
00178         bool nVal = classify(hm._model, lz->second) > 0.0;      
00179         _eager_external_table[lz->first] = nVal;
00180       }
00181 
00182       LOGGING_ONLY(std::cout << "n-rescan update: " << t1.stop(););
00183     }
00184     break;      
00185   case hazy_model::LAZY_HAZY:
00186   case hazy_model::LAZY_NAIVE:
00187     break;
00188   default:
00189     throw UnknownStrategyException(__FILE__, __LINE__, "[incrementalUpdate]");
00190   }
00191   LOGGING_ONLY(std::cout << "\ttotal time is: " << total_timer.stop() << std::endl;);
00192 }
00193 
00194 
00201 template<class T>
00202 void
00203 STLMainMemory_Storage_Manager<T>::
00204 insertEntity(struct hazy_model &m, key e, T x)
00205 {
00206   struct internal_record ir;
00207   if(m.isHazy() ) {
00208     ir.k   = e;
00209     ir.eps = classify(m._model, x);
00210     ir.v   = x;
00211   }
00212   switch(strategy) {
00213   case hazy_model::LAZY_NAIVE:
00214     _entity_table[e] = x;       
00215     break;
00216   case hazy_model::LAZY_HAZY:
00217     _entity_table[e] = x;
00218     _itable.push_back(ir);
00219     break;
00220   case hazy_model::EAGER_NAIVE:
00221     _eager_external_table[e] = classify(m._model, x) > 0;
00222     _entity_table[e] = x;
00223     break;
00224   case hazy_model::EAGER_HAZY:
00225     {      
00226       _eager_external_table[e] = ir.eps > 0;
00227       _itable.push_back(ir);
00228     }
00229     break;
00230   default:
00231     throw UnknownStrategyException(__FILE__, __LINE__, "[insertEntity]");
00232     return;
00233   }
00234   VERBOSE_ONLY(std::cout << "entity inserted, id: " << e << std::endl;);
00235 }
00236 
00243 template<class T>
00244 void 
00245 STLMainMemory_Storage_Manager<T>::
00246 getEntityClass(key e, sClass &c, struct hazy_model &hm) {
00247   switch (strategy) {
00248   case hazy_model::LAZY_NAIVE:
00249     c = classify(hm._model, _entity_table[e]) > 0.0 ? 1 : 0;
00250     break;
00251   case hazy_model::LAZY_HAZY:
00252     // We could potentially avoid the dot product here...
00253     c = classify(hm._model, _entity_table[e]) > 0.0 ? 1 : 0;
00254     break;
00255   case hazy_model::EAGER_NAIVE:
00256   case hazy_model::EAGER_HAZY:
00257     c = _eager_external_table[e];      
00258     break;
00259   default:
00260     throw UnknownStrategyException(__FILE__, __LINE__, "[insert entity]");
00261   }
00262   LOGGING_ONLY(std::cout << "label of " << e << " is: " << c << std::endl;);
00263 }
00264 
00272 template<class T>
00273 void 
00274 STLMainMemory_Storage_Manager<T>::
00275 getNumInClass(sClass c, int &nClass, struct hazy_model &hm, double &waste_time) {
00276   
00277   
00278   switch (strategy) {
00279   case hazy_model::LAZY_NAIVE: 
00280     {
00281       LOGGING_ONLY(std::cout << "in get num in class for lazy naive" << std::endl;);
00282       key_iterator_type lz;
00283       for(lz = _entity_table.begin(); lz != _entity_table.end(); lz++) {
00284         nClass += classify(hm._model, lz->second) > 0.0 ? 1 : 0;
00285       }
00286       break;
00287     }
00288   case hazy_model::LAZY_HAZY:      
00289     {
00290       int nTotalTuples = 0, nWastedTuples = 0;  
00291       struct internal_record l;
00292       l.eps = hm.low_water; 
00293       internal_iterator  lazy_hazy_it = std::lower_bound( _itable.begin() , _itable.end(), l, 
00294                                                           STLMainMemory_Storage_Manager<T>::cluster_sort_predicate);
00295       Timer waste_timer(true);
00296       bool past_high_water = false;
00297       double high_water = hm.high_water;
00298       LOGGING_ONLY(int first_item = 0;);
00299       for(; lazy_hazy_it != _itable.end(); lazy_hazy_it++) {
00300         nTotalTuples++;
00301         LOGGING_ONLY(if(first_item == 0) std::cout << "first scanned tuple id: " << lazy_hazy_it->k << std::endl;);
00302         if(!past_high_water) { 
00303           nClass += classify(hm._model, lazy_hazy_it->v) > 0.0 ? 1 : 0; 
00304           past_high_water = lazy_hazy_it->eps > high_water;
00305         } else {
00306           nClass++;
00307         }
00308         LOGGING_ONLY(first_item ++;);
00309       }
00310       nWastedTuples = nTotalTuples - nClass;
00311       
00312       waste_time = (nTotalTuples > 0) ? waste_timer.stop() * ((double) nWastedTuples) / ((double) nTotalTuples): 0.0;
00313       LOGGING_ONLY(std::cout << "tuples between low&high water: " << nTotalTuples << std::endl;);
00314       LOGGING_ONLY(std::cout << "\tWaste Ratio: " << ((double) nWastedTuples) / ((double) nTotalTuples) <<  " waste time: " << waste_time << std::endl;);
00315       break;
00316     }
00317   case hazy_model::EAGER_NAIVE: 
00318     {    
00319       for(  std::map<key,bool>::iterator it  = _eager_external_table.begin();
00320             it != _eager_external_table.end();
00321             it++) {
00322         VERBOSE_ONLY(std::cout << it->second << " ";); 
00323         nClass += it->second ? 1 : 0;
00324       }
00325       
00326       VERBOSE_ONLY(std::cout << std::endl;);
00327       break;
00328     }
00329     
00330     // There does not appear to be an easy way to fix this, because it requires clustering...
00331   case hazy_model::EAGER_HAZY:
00332     {
00333       LOGGING_ONLY(int nScanned = 0;);
00334       for(  internal_iterator it = low_water_it_cached;
00335             it != high_water_it_cached; 
00336             it++) {
00337         nClass += _eager_external_table[it->k] ? 1 : 0;
00338         LOGGING_ONLY(if(nScanned == 0) std::cout << "first scanned tuple id: " << it->k << std::endl;);
00339         LOGGING_ONLY(nScanned++;);
00340       }
00341       for(  internal_iterator it = high_water_it_cached;
00342             it != _itable.end(); 
00343             it++) {
00344         nClass++;
00345       }
00346             
00347       LOGGING_ONLY(std::cout << "[Eager:Hazy] scanned " << nScanned << " number " << nClass << std::endl;);
00348       break;
00349     }
00350   default:
00351     throw UnknownStrategyException(__FILE__, __LINE__, "[numInclass]");
00352   }
00353   LOGGING_ONLY(std::cout << "num in class " << c << " is: " << nClass << std::endl;);
00354 }
00355 

Generated on Wed Dec 15 10:46:15 2010 for Hazy_System by  doxygen 1.4.7