00001
00002
00003 #include "timer.h"
00004 #include <algorithm>
00005
00010 template<class T>
00011 STLMainMemory_Storage_Manager<T>::STLMainMemory_Storage_Manager(struct main_memory_storage_spec *s) {
00012 strategy = s->strategy;
00013 low_water_it_cached = _itable.end();
00014 high_water_it_cached = _itable.begin();
00015 }
00016
00023 template<class T>
00024 STLMainMemory_Storage_Manager<T>::STLMainMemory_Storage_Manager(Hazy_Database *db_conn, struct main_memory_storage_spec *s, struct hazy_model &hm) : Storage_Manager<T>(db_conn) {
00025 strategy = s->strategy;
00026 this->entity_table_name = s->entity_table_name;
00027 if(s->bRebuild)
00028 this->loadFromDatabase(hm);
00029
00030 if(hm.isHazy()) {
00031 std::sort(_itable.begin(), _itable.end(), STLMainMemory_Storage_Manager<T>::cluster_sort_predicate);
00032
00033 struct internal_record l, h;
00034
00035 l.eps = hm.low_water; h.eps = hm.high_water;
00036
00037 low_water_it_cached = std::lower_bound( _itable.begin() , _itable.end(), l,
00038 STLMainMemory_Storage_Manager<T>::cluster_sort_predicate);
00039 high_water_it_cached = std::upper_bound( _itable.begin() , _itable.end(), h,
00040 STLMainMemory_Storage_Manager<T>::cluster_sort_predicate);
00041 }
00042 }
00043
00048 template<class T>
00049 void
00050 STLMainMemory_Storage_Manager<T>::
00051 resort(struct hazy_model &hm) {
00052 internal_iterator it;
00053 LOGGING_ONLY(Timer t1(true););
00054 switch( strategy ) {
00055
00056 case hazy_model::EAGER_HAZY:
00057 {
00058 LOGGING_ONLY(Timer update_eps(true); int nClassified = 0; double classify_time = 0.0; double update_ex_table_time = 0.0;);
00059
00060 for(it = _itable.begin(); it != _itable.end(); it ++) {
00061 LOGGING_ONLY(Timer classify_timer(true););
00062 double _eps = classify(hm._model, it->v);
00063 LOGGING_ONLY(classify_time += classify_timer.stop(););
00064
00065 if(it->eps >= hm.low_water || it->eps <= hm.high_water) {
00066 LOGGING_ONLY(Timer update_ex_table_timer(true););
00067 _eager_external_table[it->k] = _eps > 0;
00068 LOGGING_ONLY(update_ex_table_time += update_ex_table_timer.stop(););
00069 }
00070
00071 it->eps = _eps ;
00072 LOGGING_ONLY(nClassified++;);
00073 }
00074 LOGGING_ONLY(std::cout << "time for only classification: " << classify_time << std::endl);
00075 LOGGING_ONLY(std::cout << "time for only update external table: " << update_ex_table_time << std::endl;);
00076 LOGGING_ONLY(std::cout << "\tUpdated the epsilon value" << nClassified << " in " << update_eps.stop() << std::endl; );
00077
00078
00079 LOGGING_ONLY(Timer internal_resort(true););
00080 std::sort(_itable.begin(), _itable.end(), STLMainMemory_Storage_Manager<T>::cluster_sort_predicate);
00081 LOGGING_ONLY(std::cout << "\t Internal Resort: " << internal_resort.stop() << std::endl;);
00082
00083 struct internal_record l, h;
00084
00085 l.eps = 0.0; h.eps = 0.0;
00086
00087 LOGGING_ONLY(Timer bound_finder(true););
00088 low_water_it_cached = std::lower_bound( _itable.begin() , _itable.end(), l,
00089 STLMainMemory_Storage_Manager<T>::cluster_sort_predicate);
00090 high_water_it_cached = std::upper_bound( _itable.begin() , _itable.end(), h,
00091 STLMainMemory_Storage_Manager<T>::cluster_sort_predicate);
00092 LOGGING_ONLY(std::cout << "\t Found the bound: " << bound_finder.stop() << std::endl;);
00093 LOGGING_ONLY(std::cout << "model is: " << hm._model << std::endl;);
00094 }
00095 break;
00096 case hazy_model::LAZY_HAZY:
00097 {
00098
00099 for(it = _itable.begin(); it != _itable.end(); it ++) {
00100 double _eps = classify(hm._model, it->v);
00101
00102 it->eps = _eps ;
00103 }
00104
00105 std::sort(_itable.begin(), _itable.end(), STLMainMemory_Storage_Manager<T>::cluster_sort_predicate);
00106
00107 struct internal_record l, h;
00108
00109 l.eps = 0.0; h.eps = 0.0;
00110
00111 low_water_it_cached = std::lower_bound( _itable.begin() , _itable.end(), l,
00112 STLMainMemory_Storage_Manager<T>::cluster_sort_predicate);
00113 high_water_it_cached = std::upper_bound( _itable.begin() , _itable.end(), h,
00114 STLMainMemory_Storage_Manager<T>::cluster_sort_predicate);
00115 }
00116 break;
00117 case hazy_model::EAGER_NAIVE:
00118 case hazy_model::LAZY_NAIVE:
00119 break;
00120 default:
00121 throw UnknownStrategyException(__FILE__, __LINE__, "[resort]");
00122 }
00123 LOGGING_ONLY(std::cout << "resort takes: " << t1.stop() << std::endl;);
00124 }
00125
00131 template<class T>
00132 void
00133 STLMainMemory_Storage_Manager<T>::
00134 incrementalUpdate(struct hazy_model &hm, double &waste_time) {
00135 LOGGING_ONLY(std::cout << "in mm storage manager, incremental update, strategy: " << strategy << std::endl;);
00136 waste_time = 0.0;
00137 LOGGING_ONLY(Timer total_timer(true););
00138 switch (strategy) {
00139 case hazy_model::EAGER_HAZY:
00140 {
00141 Timer waste_timer;
00142 internal_iterator low_water_it, high_water_it;
00143 struct internal_record l,h;
00144 int nTotalTuples = 0, nWastedTuples = 0;
00145 LOGGING_ONLY(int tuplesChangeLabels = 0;);
00146 l.eps = hm.low_water; h.eps = hm.high_water;
00147 low_water_it = std::lower_bound( _itable.begin() , low_water_it_cached, l,
00148 STLMainMemory_Storage_Manager<T>::cluster_sort_predicate);
00149 high_water_it = std::upper_bound( high_water_it_cached, _itable.end(), h,
00150 STLMainMemory_Storage_Manager<T>::cluster_sort_predicate);
00151
00152
00153 low_water_it_cached = low_water_it;
00154 high_water_it_cached = high_water_it;
00155
00156 waste_timer.start();
00157 for(; low_water_it != high_water_it; low_water_it++ ) {
00158 nTotalTuples++;
00159 bool nVal = (classify(hm._model, low_water_it->v) > 0.0);
00160 LOGGING_ONLY(if(nVal != _eager_external_table[low_water_it->k]) tuplesChangeLabels ++; );
00161 VERBOSE_ONLY(std::cout << "\t\t Updating a label: " << low_water_it->k << " to " << nVal << " from " << _eager_external_table[low_water_it->k] << " v=" << low_water_it->v << std::endl;);
00162 _eager_external_table[low_water_it->k] = nVal;
00163
00164 nWastedTuples += (1 - (int) nVal );
00165 }
00166 waste_time = (nTotalTuples > 0) ? waste_timer.stop() * (double) nWastedTuples/ (double) nTotalTuples : 0.0;
00167 LOGGING_ONLY(std::cout << "incremental update: tuples between low & high water: " << nTotalTuples << std::endl;);
00168 LOGGING_ONLY(std::cout << "\nTotal Tuples=" << nTotalTuples << " nChangedLabels=" << tuplesChangeLabels << std::endl;);
00169 LOGGING_ONLY(std::cout << "\tWaste Ratio: " << ((double) nWastedTuples / (double) nTotalTuples) << " waste time is: " << waste_time << " of " << waste_timer.getElapsedTime() << std::endl;);
00170 }
00171 break;
00172 case hazy_model::EAGER_NAIVE:
00173 {
00174 key_iterator_type lz;
00175 LOGGING_ONLY(Timer t1(true););
00176
00177 for(lz = _entity_table.begin(); lz != _entity_table.end(); lz++) {
00178 bool nVal = classify(hm._model, lz->second) > 0.0;
00179 _eager_external_table[lz->first] = nVal;
00180 }
00181
00182 LOGGING_ONLY(std::cout << "n-rescan update: " << t1.stop(););
00183 }
00184 break;
00185 case hazy_model::LAZY_HAZY:
00186 case hazy_model::LAZY_NAIVE:
00187 break;
00188 default:
00189 throw UnknownStrategyException(__FILE__, __LINE__, "[incrementalUpdate]");
00190 }
00191 LOGGING_ONLY(std::cout << "\ttotal time is: " << total_timer.stop() << std::endl;);
00192 }
00193
00194
00201 template<class T>
00202 void
00203 STLMainMemory_Storage_Manager<T>::
00204 insertEntity(struct hazy_model &m, key e, T x)
00205 {
00206 struct internal_record ir;
00207 if(m.isHazy() ) {
00208 ir.k = e;
00209 ir.eps = classify(m._model, x);
00210 ir.v = x;
00211 }
00212 switch(strategy) {
00213 case hazy_model::LAZY_NAIVE:
00214 _entity_table[e] = x;
00215 break;
00216 case hazy_model::LAZY_HAZY:
00217 _entity_table[e] = x;
00218 _itable.push_back(ir);
00219 break;
00220 case hazy_model::EAGER_NAIVE:
00221 _eager_external_table[e] = classify(m._model, x) > 0;
00222 _entity_table[e] = x;
00223 break;
00224 case hazy_model::EAGER_HAZY:
00225 {
00226 _eager_external_table[e] = ir.eps > 0;
00227 _itable.push_back(ir);
00228 }
00229 break;
00230 default:
00231 throw UnknownStrategyException(__FILE__, __LINE__, "[insertEntity]");
00232 return;
00233 }
00234 VERBOSE_ONLY(std::cout << "entity inserted, id: " << e << std::endl;);
00235 }
00236
00243 template<class T>
00244 void
00245 STLMainMemory_Storage_Manager<T>::
00246 getEntityClass(key e, sClass &c, struct hazy_model &hm) {
00247 switch (strategy) {
00248 case hazy_model::LAZY_NAIVE:
00249 c = classify(hm._model, _entity_table[e]) > 0.0 ? 1 : 0;
00250 break;
00251 case hazy_model::LAZY_HAZY:
00252
00253 c = classify(hm._model, _entity_table[e]) > 0.0 ? 1 : 0;
00254 break;
00255 case hazy_model::EAGER_NAIVE:
00256 case hazy_model::EAGER_HAZY:
00257 c = _eager_external_table[e];
00258 break;
00259 default:
00260 throw UnknownStrategyException(__FILE__, __LINE__, "[insert entity]");
00261 }
00262 LOGGING_ONLY(std::cout << "label of " << e << " is: " << c << std::endl;);
00263 }
00264
00272 template<class T>
00273 void
00274 STLMainMemory_Storage_Manager<T>::
00275 getNumInClass(sClass c, int &nClass, struct hazy_model &hm, double &waste_time) {
00276
00277
00278 switch (strategy) {
00279 case hazy_model::LAZY_NAIVE:
00280 {
00281 LOGGING_ONLY(std::cout << "in get num in class for lazy naive" << std::endl;);
00282 key_iterator_type lz;
00283 for(lz = _entity_table.begin(); lz != _entity_table.end(); lz++) {
00284 nClass += classify(hm._model, lz->second) > 0.0 ? 1 : 0;
00285 }
00286 break;
00287 }
00288 case hazy_model::LAZY_HAZY:
00289 {
00290 int nTotalTuples = 0, nWastedTuples = 0;
00291 struct internal_record l;
00292 l.eps = hm.low_water;
00293 internal_iterator lazy_hazy_it = std::lower_bound( _itable.begin() , _itable.end(), l,
00294 STLMainMemory_Storage_Manager<T>::cluster_sort_predicate);
00295 Timer waste_timer(true);
00296 bool past_high_water = false;
00297 double high_water = hm.high_water;
00298 LOGGING_ONLY(int first_item = 0;);
00299 for(; lazy_hazy_it != _itable.end(); lazy_hazy_it++) {
00300 nTotalTuples++;
00301 LOGGING_ONLY(if(first_item == 0) std::cout << "first scanned tuple id: " << lazy_hazy_it->k << std::endl;);
00302 if(!past_high_water) {
00303 nClass += classify(hm._model, lazy_hazy_it->v) > 0.0 ? 1 : 0;
00304 past_high_water = lazy_hazy_it->eps > high_water;
00305 } else {
00306 nClass++;
00307 }
00308 LOGGING_ONLY(first_item ++;);
00309 }
00310 nWastedTuples = nTotalTuples - nClass;
00311
00312 waste_time = (nTotalTuples > 0) ? waste_timer.stop() * ((double) nWastedTuples) / ((double) nTotalTuples): 0.0;
00313 LOGGING_ONLY(std::cout << "tuples between low&high water: " << nTotalTuples << std::endl;);
00314 LOGGING_ONLY(std::cout << "\tWaste Ratio: " << ((double) nWastedTuples) / ((double) nTotalTuples) << " waste time: " << waste_time << std::endl;);
00315 break;
00316 }
00317 case hazy_model::EAGER_NAIVE:
00318 {
00319 for( std::map<key,bool>::iterator it = _eager_external_table.begin();
00320 it != _eager_external_table.end();
00321 it++) {
00322 VERBOSE_ONLY(std::cout << it->second << " ";);
00323 nClass += it->second ? 1 : 0;
00324 }
00325
00326 VERBOSE_ONLY(std::cout << std::endl;);
00327 break;
00328 }
00329
00330
00331 case hazy_model::EAGER_HAZY:
00332 {
00333 LOGGING_ONLY(int nScanned = 0;);
00334 for( internal_iterator it = low_water_it_cached;
00335 it != high_water_it_cached;
00336 it++) {
00337 nClass += _eager_external_table[it->k] ? 1 : 0;
00338 LOGGING_ONLY(if(nScanned == 0) std::cout << "first scanned tuple id: " << it->k << std::endl;);
00339 LOGGING_ONLY(nScanned++;);
00340 }
00341 for( internal_iterator it = high_water_it_cached;
00342 it != _itable.end();
00343 it++) {
00344 nClass++;
00345 }
00346
00347 LOGGING_ONLY(std::cout << "[Eager:Hazy] scanned " << nScanned << " number " << nClass << std::endl;);
00348 break;
00349 }
00350 default:
00351 throw UnknownStrategyException(__FILE__, __LINE__, "[numInclass]");
00352 }
00353 LOGGING_ONLY(std::cout << "num in class " << c << " is: " << nClass << std::endl;);
00354 }
00355