ondisk_storage_manager.hxx

Go to the documentation of this file.
00001 
00004 #include <sstream>
00005 
00012 template<class T>
00013 void
00014 Ondisk_Storage_Manager<T>::setup (bool bRebuild, struct hazy_model &hm)  { 
00015   // This is where we prepare the statements.
00016   std::ostringstream ostr;
00017   std::ostringstream schema_setup;
00018   
00019   ostr << "PREPARE model_initialization" << unique_id_for_ps << " AS SELECT initialize_waste_count();" << std::endl;;
00020   ostr << "PREPARE model_caching" << unique_id_for_ps << "(FLOAT8[], FLOAT8) AS SELECT cache_model_waste($1, $2);" << std::endl;;
00021   DEBUG_ONLY(std::cout << ostr.str() << std::endl;);
00022   int retVal = this->db_conn->execute_statement_msg(ostr.str().c_str(), "[Ondisk_Storage_Manager::model_prepared_statements:__FILE__: __LINE__");
00023   checkQueryReturnValue(retVal, ostr.str());
00024   
00025   if(bRebuild) {
00026     // external table could be either view or table...
00027     //schema_setup << "DROP VIEW  IF EXISTS " << external_table_name << " CASCADE;" << std::endl;
00028     schema_setup << "SELECT delete_table_or_view('" << external_table_name << "');" ;
00029     if(hazy_model::isHazy(_strategy)) {
00030       std::string model_string, bias_string;
00031       
00032       schema_setup << "DROP TABLE IF EXISTS " << hazy_intermediate_table_name << " CASCADE;" << std::endl;
00033       schema_setup << "EXECUTE model_initialization" << unique_id_for_ps << ";";
00034       if(!hm.test_and_set_model_in_db()) { model_to_dbstring(hm._model, model_string, bias_string); schema_setup << "EXECUTE model_caching" << unique_id_for_ps << "(" << model_string << ", " << bias_string << ");";}
00035       schema_setup << "CREATE TABLE " << hazy_intermediate_table_name << " AS (SELECT id, (dotprdct_cached_without_waste(feature_vector))::float8 as eps, feature_vector FROM " << this->entity_table_name << ");" << std::endl;
00036       schema_setup << "CREATE INDEX index1_" << hazy_intermediate_table_name << " ON " << hazy_intermediate_table_name << "(eps, id) WITH (FILLFACTOR=100);";
00037       schema_setup << "CREATE INDEX index2_" << hazy_intermediate_table_name << " ON " << hazy_intermediate_table_name << " USING hash(id);";
00038       schema_setup << "CLUSTER index1_" << hazy_intermediate_table_name << " ON " << hazy_intermediate_table_name << ";";
00039     }
00040     switch (_strategy) {
00041     case hazy_model::LAZY_NAIVE:
00042       schema_setup << "CREATE VIEW " << external_table_name << " AS SELECT id, getEntityLabelWithSQLNOEPS(feature_vector::text) as class from " << this->entity_table_name << ";" << std::endl;
00043       break;
00044     case hazy_model::LAZY_HAZY: 
00045       schema_setup << "CREATE VIEW " << external_table_name << " AS SELECT id, eps, getEntityLabelWithSQL(eps, feature_vector::text) as class from " << hazy_intermediate_table_name << ";" << std::endl;
00046       break;
00047       
00048     case hazy_model::EAGER_NAIVE:
00049       {           
00050         schema_setup << "EXECUTE model_initialization" << unique_id_for_ps << ";";
00051         if(!hm.test_and_set_model_in_db()) { 
00052           std::string model_string_naive, bias_string_naive;
00053           model_to_dbstring(hm._model, model_string_naive, bias_string_naive); 
00054           schema_setup << "EXECUTE model_caching" << unique_id_for_ps << "(" << model_string_naive << ", " << bias_string_naive << ");";
00055         } 
00056         schema_setup << "CREATE TABLE " << external_table_name << " AS (SELECT id, (dotprdct_cached_without_waste(feature_vector) > 0)::int as class FROM " << this->entity_table_name << ");" << std::endl;
00057         schema_setup << "CREATE INDEX index2_" << external_table_name << " ON " << external_table_name << " USING hash(id);";   
00058       }
00059       break;
00060     case hazy_model::EAGER_HAZY:
00061       {
00062         schema_setup << "EXECUTE model_initialization" << unique_id_for_ps << ";";
00063         if(!hm.test_and_set_model_in_db()) { 
00064           std::string model_string_hazy, bias_string_hazy;
00065           model_to_dbstring(hm._model, model_string_hazy, bias_string_hazy); 
00066           schema_setup << "EXECUTE model_caching" << unique_id_for_ps << "(" << model_string_hazy << ", " << bias_string_hazy << ");"; 
00067         }
00068         schema_setup << "CREATE TABLE " << external_table_name << " AS (SELECT id, (dotprdct_cached_without_waste(feature_vector))::float8 as eps, (dotprdct_cached_without_waste(feature_vector) > 0)::int as class FROM " << this->entity_table_name << ");" << std::endl;
00069         schema_setup << "CREATE INDEX index1_" << external_table_name << " ON " << external_table_name << "(eps) WITH (FILLFACTOR=100);";
00070         schema_setup << "CREATE INDEX index2_" << external_table_name << " ON " << external_table_name << " USING hash(id);";
00071       }
00072       break;
00073     default:
00074       std::cout << "[SETUP] Unknown Strategy " << std::endl;
00075       assert(false);      
00076     }
00077     schema_setup << "CREATE OR REPLACE FUNCTION update_external_table_inplace" << unique_id_for_ps << "(" << external_table_name << ", bool) RETURNS BOOLEAN AS 'inplace" << unique_id_for_ps << ".so', 'update_hack" << unique_id_for_ps << "' LANGUAGE C STRICT;"; 
00078     VERBOSE_ONLY(std::cout << "SETUP SQL Queries [Rebuild]" << std::endl << schema_setup.str() << std::endl << std::endl; );
00079     int retVal = this->db_conn->execute_statement_msg(schema_setup.str().c_str(), "[Ondisk_Storage_Manager::Setup:REBUILD]");
00080     checkQueryReturnValue(retVal, schema_setup.str());    
00081   }
00082   // This is for sparse vectors
00083   ostr.str("");
00084   if(hazy_model::isHazy(_strategy)) {
00085     ostr << "PREPARE view_recreate_in_hazy" << unique_id_for_ps << " AS SELECT id, eps, (eps > 0)::integer AS class INTO " << external_table_name << " FROM " << hazy_intermediate_table_name << " ORDER BY eps, id;" << std::endl;;
00086     ostr << "PREPARE single_hazy_lazy" << unique_id_for_ps << "(INT, FLOAT8, FLOAT8) AS SELECT CASE WHEN eps < $2 THEN 0 ELSE CASE WHEN eps > $3 THEN 1 ELSE (CASE WHEN (dotprdct_cached_without_waste(feature_vector) > 0) THEN 1 ELSE 0 END) END END FROM " << hazy_intermediate_table_name << " WHERE id = $1;";
00087     ostr << "PREPARE count_hazy_lazy" << unique_id_for_ps << "(FLOAT8, FLOAT8) AS SELECT ((SELECT COUNT(*) FROM " << hazy_intermediate_table_name << " WHERE (eps BETWEEN $1 AND $2) AND ((dotprdct_cached_waste(feature_vector)) > 0)) + (SELECT COUNT(*) FROM " << hazy_intermediate_table_name << " WHERE eps > $2));" << std::endl;;
00088     ostr << "PREPARE count_hazy_eager" << unique_id_for_ps << "(FLOAT8, FLOAT8) AS SELECT (SELECT COUNT(*) FROM " << external_table_name << " WHERE eps BETWEEN $1 AND $2 AND class = 1) + (SELECT COUNT(*) FROM " << external_table_name << " WHERE eps > $2);" << std::endl;;
00089     ostr << "PREPARE PREPARED_CREATE_VIEW2" << unique_id_for_ps << "(FLOAT8) AS SELECT id INTO " << external_table_name << " FROM " << hazy_intermediate_table_name << " WHERE eps > $1;" << std::endl;;
00090     ostr << "PREPARE PREPARED_CREATE_VIEW" << unique_id_for_ps << "(FLOAT8, FLOAT8) AS SELECT update_external_table_inplace" << unique_id_for_ps << "(e, dotprdct_cached_waste(i.feature_vector) > 0) FROM " << hazy_intermediate_table_name << " i, " << external_table_name << " e ";
00091     //ostr << "WHERE (i.eps BETWEEN $1 AND $2) AND (e.eps BETWEEN $1 AND $2) AND (e.eps = i.eps) AND (e.id = i.id);" << std::endl;;
00092     ostr << "WHERE (i.eps BETWEEN $1 AND $2) AND (e.eps BETWEEN $1 AND $2) AND (e.id = i.id);" << std::endl;;
00093     ostr << "PREPARE internal_recreate" << unique_id_for_ps << " AS SELECT id, (dotprdct_cached_without_waste(feature_vector)) AS eps, feature_vector INTO " << hazy_intermediate_table_name << "_copy" << " FROM " << hazy_intermediate_table_name << " ORDER BY eps, id;" << std::endl;;
00094   }
00095   else {
00096     ostr << "PREPARE view_recreate_in_naive" << unique_id_for_ps << " AS SELECT id, (dotprdct_cached_without_waste(feature_vector) > 0)::integer as class INTO " << external_table_name << " FROM " << this->entity_table_name << ";" << std::endl;;
00097     ostr << "PREPARE single_naive_lazy" << unique_id_for_ps << "(INT) AS SELECT CASE WHEN (dotprdct_cached_without_waste(feature_vector) > 0) THEN 1 ELSE 0 END FROM " << this->entity_table_name << " WHERE id = $1;";
00098     ostr << "PREPARE count_naive_lazy" << unique_id_for_ps << " AS SELECT COUNT(*) FROM " << this->entity_table_name << " WHERE (dotprdct_cached_without_waste(feature_vector) > 0);" << std::endl;;
00099     ostr << "PREPARE count_naive_eager" << unique_id_for_ps << " AS SELECT COUNT(*) FROM " << external_table_name << " WHERE class = 1;" << std::endl;;
00100   }
00101   ostr << "PREPARE retrieve_ratio" << unique_id_for_ps << " AS SELECT get_waste_ratio();" << std::endl;;
00102   ostr << "PREPARE single_entity_eager" << unique_id_for_ps << "(INT) AS SELECT class FROM " << external_table_name << " WHERE id = $1;" << std::endl;;
00103   
00104   // These prepare statements are for dense vectors.
00105   // ostr << "PREPARE model_initialization AS SELECT initialize_dense_waste_count();";
00106   // ostr << "PREPARE model_caching(FLOAT8[], FLOAT8) AS SELECT cache_dense_model_waste($1, $2);";
00107   // ostr << "PREPARE retrieve_ratio AS SELECT get_dense_waste_ratio();";
00108   // ostr << "PREPARE internal_recreate AS SELECT id, feature_value_array, dotprdctdense_cached_without_waste(feature_value_array) AS c INTO " << hazy_intermediate_table_name << "_copy" << " FROM " << hazy_intermediate_table_name << " ORDER BY c, id;";
00109   // ostr << "PREPARE view_recreate_in_hazy AS SELECT id, c, (c > 0)::boolean AS class INTO " << external_table_name << " FROM " << hazy_intermediate_table_name << " ORDER BY c, id;";
00110   // ostr << "PREPARE view_recreate_in_naive AS SELECT id, (dotprdctdense_cached_without_waste(feature_value_array) > 0)::boolean as class INTO " << external_table_name << " FROM " << entity_table_name << ";";
00111   // ostr << "PREPARE single_entity_eager(INT) AS SELECT class FROM " << external_table_name << " WHERE id = $1;";
00112   // ostr << "PREPARE count_hazy_lazy(FLOAT8, FLOAT8) AS SELECT ((SELECT COUNT(*) FROM " << hazy_intermediate_table_name << " WHERE (c BETWEEN $1 AND $2) AND ((dotprdctdense_cached_waste(feature_value_array)) > 0)) + (SELECT COUNT(*) FROM " << hazy_intermediate_table_name << " WHERE c > $2));";
00113   // ostr << "PREPARE count_naive_lazy AS SELECT COUNT(*) FROM " << entity_table_name << " WHERE (dotprdctdense_cached_without_waste(feature_value_array) > 0);";
00114   // ostr << "PREPARE count_hazy_eager(FLOAT8) AS SELECT COUNT(*) FROM " << external_table_name << " WHERE class='t' AND c >= $1;";
00115   // ostr << "PREPARE count_naive_eager AS SELECT COUNT(*) FROM " << external_table_name << " WHERE class = 't';";
00116   // ostr << "PREPARE PREPARED_CREATE_VIEW2(FLOAT4) AS  SELECT id INTO " << external_table_name << " FROM " << hazy_intermediate_table_name << " WHERE c > $1;";
00117   // ostr << "PREPARE PREPARED_CREATE_VIEW(FLOAT4, FLOAT4) AS SELECT update_external_table_inplace(e, dotprdctdense_cached_waste(i.feature_value_array) > 0) FROM " << hazy_intermediate_table_name << " i, " << external_table_name << " e ";
00118   // ostr << "WHERE (i.c BETWEEN $1 AND $2) AND (e.c BETWEEN $1 AND $2) AND (e.c = i.c) AND (e.id = i.id);";
00119   // end dense vector code
00120   DEBUG_ONLY(std::cout << ostr.str() << std::endl;)
00121     retVal = this->db_conn->execute_statement_msg(ostr.str().c_str(), "[Ondisk_Storage_Manager::Setup:__FILE__: __LINE__");
00122   checkQueryReturnValue(retVal, ostr.str());
00123   // what happens if it fails?
00124 }
00125 
00129 template<class T>
00130 Ondisk_Storage_Manager<T>::~Ondisk_Storage_Manager ()  { }
00131 
00133 
00138 template<class T>
00139 Ondisk_Storage_Manager<T>
00140 ::Ondisk_Storage_Manager(Hazy_Database *db_conn, struct ondisk_storage_spec *spec) : Storage_Manager<T>(db_conn) {
00141   std::ostringstream ostr;
00142   
00143   this->entity_table_name            = spec->entity_table_name;
00144   this->external_table_name          = spec->external_table_name;
00145   this->hazy_intermediate_table_name = spec->intermediate_table_name;
00146   _strategy                          = spec->strategy;
00147   unique_id_for_ps                   = spec->unique_id;
00148   
00149   if(hazy_model::isHazy(_strategy) && hazy_intermediate_table_name.length() == 0) { 
00150     std::cout << "PANIC: We are a hazy ondisk strategy, but we have no (or invalid) interemediate table name!" << std::endl;
00151     assert(false);
00152   }
00153 }
00154 
00155 
00157 
00161 template<class T>
00162 void
00163 Ondisk_Storage_Manager<T>::rescan(struct hazy_model &hm) {
00164   model &m       = hm._model;
00165   FVector w     = m.w; 
00166   
00167   std::ostringstream ostr,ostr2,ostr3;
00168   std::string w_string, bias_string;
00169 
00170   int retVal = 0;
00171   
00172   ostr << "DROP TABLE IF EXISTS " << external_table_name << " CASCADE;";
00173   ostr << "EXECUTE model_initialization" << unique_id_for_ps << ";";
00174   
00175   model_to_dbstring(hm._model, w_string, bias_string); 
00176   ostr << "EXECUTE model_caching" << unique_id_for_ps << "(" << w_string << ", " << bias_string << ");"; 
00177   
00178   LOGGING_ONLY(Timer t1(true););
00179   retVal = this->db_conn->execute_statement_msg(ostr.str().c_str(), "[naive rescan]");
00180   checkQueryReturnValue(retVal, ostr.str());
00181   LOGGING_ONLY(std::cout << "n-rescan drop table and caching: " << t1.stop(););
00182   
00183   ostr2 << "EXECUTE view_recreate_in_naive" << unique_id_for_ps << ";";
00184   LOGGING_ONLY(t1.start(););
00185   retVal = this->db_conn->execute_statement_msg(ostr2.str().c_str(), "[naive rescan]");
00186   checkQueryReturnValue(retVal, ostr2.str());
00187   LOGGING_ONLY(std::cout << "n-rescan create table: " << t1.stop(););
00188   
00189   ostr3 << "CREATE INDEX index2_" << external_table_name << " ON " << external_table_name << " USING hash(id);";   
00190   LOGGING_ONLY(t1.start());
00191   retVal = this->db_conn->execute_statement_msg(ostr3.str().c_str(), "[naive rescan]");
00192   checkQueryReturnValue(retVal, ostr3.str());
00193   LOGGING_ONLY(std::cout << "n-rescan create index: " << t1.stop(););   
00194 }
00195 
00196 
00198 
00202 template<class T>
00203 void
00204 Ondisk_Storage_Manager<T>::resort(struct hazy_model &hm) {
00205   updateInternalTable(hm);
00206   
00207   std::ostringstream ostr1;
00208   ostr1 << "DROP TABLE IF EXISTS " << external_table_name << " CASCADE;";
00209   ostr1 << "EXECUTE view_recreate_in_hazy" << unique_id_for_ps << ";";
00210   ostr1 << "CREATE OR REPLACE FUNCTION update_external_table_inplace" << unique_id_for_ps << "(" << external_table_name << ", bool) RETURNS BOOLEAN AS 'inplace" << unique_id_for_ps << ".so', 'update_hack" << unique_id_for_ps << "' LANGUAGE C STRICT;"; 
00211   ostr1 << "CREATE INDEX index1_" << external_table_name << " ON " << external_table_name << "(eps) WITH (FILLFACTOR=100);";
00212   ostr1 << "CREATE INDEX index2_" << external_table_name << " ON " << external_table_name << " USING hash(id);";
00213   
00214   LOGGING_ONLY(Timer t1(true););
00215   int retVal = this->db_conn->execute_statement_msg(ostr1.str().c_str(), "[Update:Rebuild]");
00216   checkQueryReturnValue(retVal, ostr1.str());
00217   LOGGING_ONLY(std::cout << "after sort, update view time: " << t1.stop() << std::endl;);       
00218 }
00219 
00220 
00222 
00227 template<class T>
00228 void
00229 Ondisk_Storage_Manager<T>::incrementalUpdate(struct hazy_model &hm, double &waste_time) {
00230   hm.invalidate_db_model();
00231   
00232   // ??TODO: Fix this to use the same code as below (Single entity)
00233   switch (hm._strategy) {
00234   case hazy_model::LAZY_NAIVE:
00235     break;
00236   case hazy_model::LAZY_HAZY: 
00237     VERBOSE_ONLY(std::cout << "[incrementalUpdate]: Lazy does nothing" << std::endl);
00238     break;
00239   case hazy_model::EAGER_NAIVE:
00240     rescan(hm);
00241     break;
00242   case hazy_model::EAGER_HAZY:
00243     update_disk_region(hm, hm.low_water, hm.high_water, waste_time);
00244     break;
00245   default:
00246     std::cout << "Unknown Strategy " << std::endl;
00247     assert(false);
00248   }
00249 }
00250 
00252 
00257 template<class T>
00258 void
00259 Ondisk_Storage_Manager<T>::updateInternalTable(struct hazy_model &hm) {
00260   std::string w_string, bias_string;
00261   model_to_dbstring(hm._model, w_string, bias_string);
00262   
00263   std::ostringstream querystr;
00264   querystr << "EXECUTE model_initialization" << unique_id_for_ps << ";";
00265   querystr << "EXECUTE model_caching" << unique_id_for_ps << "(" << w_string << ", " << bias_string << ");";
00266   querystr << "EXECUTE internal_recreate" << unique_id_for_ps << ";";
00267   querystr << "DROP TABLE IF EXISTS " << hazy_intermediate_table_name << " CASCADE;";
00268   querystr << "ALTER TABLE " << hazy_intermediate_table_name << "_copy" << " RENAME TO " << hazy_intermediate_table_name << ";";
00269   querystr << "CREATE INDEX index1_" << hazy_intermediate_table_name << " ON " << hazy_intermediate_table_name << "(eps, id) WITH (FILLFACTOR=100);";
00270   querystr << "CREATE INDEX index2_" << hazy_intermediate_table_name << " ON " << hazy_intermediate_table_name << " USING hash(id);";
00271   
00272   LOGGING_ONLY(Timer t1(true););
00273   
00274   int retVal = this->db_conn->execute_statement_msg(querystr.str().c_str(), "[UPDATE INTERNAL TABLE]");
00275   checkQueryReturnValue(retVal, querystr.str());
00276   
00277   LOGGING_ONLY(std::cout << "sort: time for recreating the table with two indexes: " << t1.stop() << std::endl;);
00278 #ifdef __LOGGING
00279   std::ostringstream detailed_count_query;
00280   detailed_count_query.str("");
00281   detailed_count_query << "SELECT COUNT(*) FROM " << hazy_intermediate_table_name << " WHERE (eps > 0) <> (dotprdct_cached_without_waste(feature_vector) > 0);";
00282   int count_val = 0;
00283   this->db_conn->execute_query_msg_int(detailed_count_query.str().c_str(), "[LOGGING sort info]", count_val);
00284   std::cout << "\t Change Label=" << count_val << std::endl;
00285 #endif
00286   
00287 }
00288 
00299 template<class T>
00300 void
00301 Ondisk_Storage_Manager<T>::
00302 update_disk_region(struct hazy_model &hm, double lower_bound, double upper_bound, double &waste_time) {
00303   std::ostringstream ostr1, ostr2, ostr3;
00304   
00305   double waste_ratio = 0.0;
00306   std::string w_string, bias_string;
00307   model_to_dbstring(hm._model, w_string, bias_string);
00308   DEBUG_ONLY(std::cout << "[OD:update_disk_region:debug] model is: " << hm._model << std::endl << std::endl;);
00309   
00310   ostr1 << "EXECUTE model_initialization" << unique_id_for_ps << ";";
00311   if(!hm.test_and_set_model_in_db()) { ostr1 << "EXECUTE model_caching" << unique_id_for_ps << "(" << w_string << ", " << bias_string << ");"; }
00312   LOGGING_ONLY(std::cout <<"[OD:update_disk_region]" << std::endl;);
00313   
00314   LOGGING_ONLY(Timer t1(true););
00315   int retVal = this->db_conn->execute_statement_msg(ostr1.str().c_str(), "PROBLEM in insert query in update method\n");
00316   checkQueryReturnValue(retVal, ostr1.str());
00317   LOGGING_ONLY(std::cout << "\t time for drop view, initialize waste count and cache model:" << t1.stop() << std::endl;);
00318   
00319   ostr2 << "EXECUTE PREPARED_CREATE_VIEW" << unique_id_for_ps << "(" << lower_bound << ", " << upper_bound << ");";
00320   Timer waste_timer(true);
00321   DEBUG_ONLY(std::cout << "\t\t [lower_bound,upper_bound]=" << lower_bound << ", " << upper_bound << std::endl;);
00322   retVal = this->db_conn->execute_statement_msg(ostr2.str().c_str(), "[UPDATE]");
00323   checkQueryReturnValue(retVal, ostr2.str());
00324   double total_time = waste_timer.stop();
00325   LOGGING_ONLY(std::cout << "\t time for prepared statement: " << total_time << std::endl;);
00326   
00327   ostr3 << "EXECUTE retrieve_ratio" << unique_id_for_ps << ";";
00328   retVal = this->db_conn->execute_query_msg_double(ostr3.str().c_str(), "[update]", waste_ratio);
00329   checkQueryReturnValue(retVal, ostr3.str());
00330   waste_time = total_time * waste_ratio;
00331   LOGGING_ONLY(std::cout << "\t Waste Ratio is " << waste_ratio << " waste_time " << waste_time << std::endl;);
00332   
00333   // **** START LOGGING ****
00334 #ifdef __LOGGING    
00335   std::ostringstream detailed_count_query;
00336   detailed_count_query << "SELECT COUNT(*) FROM " << hazy_intermediate_table_name << " WHERE eps BETWEEN " << lower_bound << " AND " << upper_bound << ";";
00337   int tuplesBetween = 0;
00338   retVal = this->db_conn->execute_query_msg_int(detailed_count_query.str().c_str(), "[update]", tuplesBetween);
00339   checkQueryReturnValue(retVal, detailed_count_query.str());
00340   std::cout << "\t tuples between lower_bound and upper bound: " << tuplesBetween << std::endl;
00341   
00342   detailed_count_query.str("");
00343   detailed_count_query << "SELECT COUNT(*) FROM " << hazy_intermediate_table_name << " WHERE (eps > 0) <> (dotprdct_cached_without_waste(feature_vector) > 0);";
00344   int nChangeLabels = 0;
00345   retVal = this->db_conn->execute_query_msg_int(detailed_count_query.str().c_str(), "[update]", nChangeLabels);
00346   checkQueryReturnValue(retVal, detailed_count_query.str());
00347   std::cout << "\t tuples changed labels: " << nChangeLabels << std::endl;
00348 #endif
00349   // **** END LOGGING ****  
00350 }
00351 
00358 template<class T>
00359 void 
00360 Ondisk_Storage_Manager<T>::
00361 getEntityClass(key e, sClass &c, struct hazy_model &hm) {
00362   std::ostringstream o; // will hold the SQL command
00363   std::string model_string, bias_string;
00364   
00365   switch (hm._strategy) {
00366   case hazy_model::LAZY_NAIVE:
00367     // o << "EXECUTE model_initialization;";
00368     if(!hm.test_and_set_model_in_db()) { 
00369       model_to_dbstring(hm._model, model_string, bias_string);
00370       LOGGING_ONLY(std::cout << "[Model is not Cached]" << std::endl;);
00371       o << "EXECUTE model_caching" << unique_id_for_ps << "(" << model_string << ", " << bias_string << ");"; 
00372     } else {
00373       LOGGING_ONLY(std::cout << "[Model is Cached]" << std::endl;);
00374     }
00375     o << "EXECUTE single_naive_lazy" << unique_id_for_ps << "(" << e << ");";
00376     break;
00377   case hazy_model::LAZY_HAZY: 
00378     //o << "EXECUTE model_initialization;";
00379     if(!hm.test_and_set_model_in_db()) { 
00380       model_to_dbstring(hm._model, model_string, bias_string);
00381       LOGGING_ONLY(std::cout << "[Model is not Cached]" << std::endl;);
00382       o << "EXECUTE model_caching" << unique_id_for_ps << "(" << model_string << ", " << bias_string << ");"; 
00383     } else {
00384       LOGGING_ONLY(std::cout << "[Model is Cached]" << std::endl;);
00385     }
00386     o << "EXECUTE single_hazy_lazy" << unique_id_for_ps << "(" << e << ", " << hm.low_water << ", " << hm.high_water << ");";
00387     break;
00388   case hazy_model::EAGER_NAIVE:
00389   case hazy_model::EAGER_HAZY: 
00390     // TODO:
00391     o << "EXECUTE single_entity_eager" << unique_id_for_ps << "(" << e << ");"; 
00392     break;
00393   default:
00394     std::cout << "[getEntityClass] Non Lazy or Eager strategy???" << std::endl;
00395     assert(false);
00396   }
00397   
00398   VERBOSE_ONLY(std::cout << "[getEntityClass] " << o.str() << std::endl;);
00399   LOGGING_ONLY(Timer qtimer(true););
00400   int retVal = this->db_conn->execute_query_msg_int(o.str().c_str(), "[Ondisk_Storage_Manager::GetEntityClass:__FILE__: __LINE__", c);
00401   checkQueryReturnValue(retVal, o.str());  
00402   LOGGING_ONLY(std::cout << "label of " << e << " is: " << c << " time:" << qtimer.stop() << std::endl;);
00403 }
00404 
00412 template<class T>
00413 void
00414 Ondisk_Storage_Manager<T>::
00415 getNumInClass(sClass c, int &nClass, struct hazy_model &hm, double &waste_time) {
00416   // TODO: ALLOW ANY CLASS HERE.
00417   assert(c == 1);
00418   std::string model_string, bias_string;
00419   std::ostringstream o;
00420   
00421   waste_time = 0.0;
00422   switch (hm._strategy) {
00423   case hazy_model::LAZY_NAIVE: 
00424     o << "EXECUTE model_initialization" << unique_id_for_ps << ";";
00425     if(!hm.test_and_set_model_in_db()) {    
00426       model_to_dbstring(hm._model, model_string, bias_string);    
00427       o << "EXECUTE model_caching" << unique_id_for_ps << "(" << model_string << ", " << bias_string << ");"; 
00428     }
00429     o << "EXECUTE count_naive_lazy" << unique_id_for_ps << ";";
00430     break;
00431   case hazy_model::LAZY_HAZY:
00432     o << "EXECUTE model_initialization" << unique_id_for_ps << ";";
00433     if(!hm.test_and_set_model_in_db()) {   
00434       model_to_dbstring(hm._model, model_string, bias_string);    
00435       o << "EXECUTE model_caching" << unique_id_for_ps << "(" << model_string << ", " << bias_string << ");"; 
00436     }
00437     o << "EXECUTE count_hazy_lazy" << unique_id_for_ps << "(" << hm.low_water << "," << hm.high_water << ");";
00438     break;
00439   case hazy_model::EAGER_NAIVE:
00440     o << "EXECUTE count_naive_eager" << unique_id_for_ps << ";"; 
00441     break;
00442   case hazy_model::EAGER_HAZY:
00443     o << "EXECUTE count_hazy_eager" << unique_id_for_ps << "(" << hm.low_water << ", " << hm.high_water << ");";
00444     break;
00445   default:
00446     std::cout << "UNKNOWN STRATEGY!!" << std::endl;
00447     assert(false);    
00448     break;
00449   }
00450   // Only LAZY, HAZY needs to update waste
00451   VERBOSE_ONLY(std::cout << "[getNumInClass] " << o.str() << std::endl;);
00452   if( hm._strategy == hazy_model::LAZY_HAZY ) {
00453     Timer t(true);
00454     double waste_ratio = 0.0;
00455     int retVal = this->db_conn->execute_query_msg_int(o.str().c_str(), "[Ondisk_Storage_Manager::GetEntityClass:__FILE__: __LINE__", nClass);
00456     checkQueryReturnValue(retVal, o.str());  
00457     std::ostringstream o2;
00458     o2 << "EXECUTE retrieve_ratio" << unique_id_for_ps << ";";
00459     retVal = this->db_conn->execute_query_msg_double(o2.str().c_str(), "[Ondisk_Storage_Manager::GetEntityClass:__FILE__: __LINE__", waste_ratio);
00460     checkQueryReturnValue(retVal, o2.str());  
00461     waste_time = t.stop() * waste_ratio;
00462     LOGGING_ONLY(std::cout << "\tWaste Ratio: " << waste_ratio << " waste time: " << waste_time << std::endl;);
00463   } else {
00464     int retVal = this->db_conn->execute_query_msg_int(o.str().c_str(), "[Ondisk_Storage_Manager::GetEntityClass:__FILE__: __LINE__", nClass);
00465     checkQueryReturnValue(retVal, o.str());  
00466     waste_time = 0.0;
00467   }
00468   
00469   LOGGING_ONLY(std::cout << "num in class " << c << " is: " << nClass << std::endl;);
00470 }
00471 
00478 void
00479 Ondisk_Storage_Manager_Sparse::
00480 insertEntity(struct hazy_model &m, key id, SVector f) {
00481   std::string feature_vector_str;
00482   svector_to_dbstring(f, feature_vector_str);
00483   
00484   std::ostringstream ostr;
00485   ostr << "INSERT INTO " << this->entity_table_name << "(id,feature_vector) ";
00486   ostr << "VALUES (" << id << "," << feature_vector_str << ");";
00487   
00488   if(hazy_model::isHazy(_strategy) || hazy_model::isEager(_strategy)) {
00489     std::string model_string, bias_string;
00490     
00491     ostr << "EXECUTE model_initialization" << unique_id_for_ps << ";";
00492     if(!m.test_and_set_model_in_db()) { 
00493       model_to_dbstring(m._model, model_string, bias_string); 
00494       ostr << "EXECUTE model_caching" << unique_id_for_ps << "(" << model_string << ", " << bias_string << ");"; 
00495     }
00496     
00497     if(hazy_model::isHazy(_strategy))
00498       ostr << "INSERT INTO " << hazy_intermediate_table_name << "(id, eps, feature_vector) VALUES (" << id << ", " << "dotprdct_cached_without_waste(" << feature_vector_str << ")::float8" << ", " << feature_vector_str << ");";
00499     if(_strategy == hazy_model::EAGER_HAZY)
00500       ostr << "INSERT INTO " << external_table_name << "(id, eps, class) VALUES (" << id << ", " << "dotprdct_cached_without_waste(" << feature_vector_str << ")::float8" << ", (dotprdct_cached_without_waste(" << feature_vector_str << ")::float8 > 0)::int);";
00501     else if(_strategy == hazy_model::EAGER_NAIVE)
00502       ostr << "INSERT INTO " << external_table_name << "(id, class) VALUES (" << id << ", " << "(dotprdct_cached_without_waste(" << feature_vector_str << ") > 0)::int);";
00503   }
00504   int retVal = db_conn->execute_statement_msg(ostr.str().c_str(), "[Ondisk_Storage_Manager::InsertEntity:__FILE__: __LINE__");
00505   checkQueryReturnValue(retVal, ostr.str());
00506   LOGGING_ONLY(std::cout << "entity inserted, id: " << id << std::endl;);
00507 }
00508 
00515 void
00516 Ondisk_Storage_Manager_Dense::insertEntity(struct hazy_model &m, key id, FVector f) {
00517   std::string feature_vector_str;
00518   fvector_to_dbstring(f, feature_vector_str);
00519   
00520   std::ostringstream ostr;
00521   ostr << "INSERT INTO " << this->entity_table_name << "(id,feature_vector) ";
00522   ostr << "VALUES (" << id << "," << feature_vector_str << ");";
00523   
00524   if(hazy_model::isHazy(_strategy) || hazy_model::isEager(_strategy)) {
00525     std::string model_string, bias_string;
00526     
00527     ostr << "EXECUTE model_initialization" << unique_id_for_ps << ";";
00528     if(!m.test_and_set_model_in_db()) { model_to_dbstring(m._model, model_string, bias_string); ostr << "EXECUTE model_caching" << unique_id_for_ps << "(" << model_string << ", " << bias_string << ");"; }
00529     
00530     if(hazy_model::isHazy(_strategy))
00531       ostr << "INSERT INTO " << hazy_intermediate_table_name << "(id, eps, feature_vector) VALUES (" << id << ", " << "dotprdct_cached_without_waste(" << feature_vector_str << ")::float8" << ", " << feature_vector_str << ");";
00532     if(_strategy == hazy_model::EAGER_HAZY)
00533       ostr << "INSERT INTO " << external_table_name << "(id, eps, class) VALUES (" << id << ", " << "dotprdct_cached_without_waste(" << feature_vector_str << ")::float8" << ", (dotprdct_cached_without_waste(" << feature_vector_str << ")::float8 > 0)::int);";
00534     else if(_strategy == hazy_model::EAGER_NAIVE)
00535       ostr << "INSERT INTO " << external_table_name << "(id, class) VALUES (" << id << ", " << "(dotprdct_cached_without_waste(" << feature_vector_str << ") > 0)::int);";
00536   }
00537   int retVal = db_conn->execute_statement_msg(ostr.str().c_str(), "[Ondisk_Storage_Manager::InsertEntity:__FILE__: __LINE__");
00538   checkQueryReturnValue(retVal, ostr.str());
00539   LOGGING_ONLY(std::cout << "entity inserted, id: " << id << std::endl;);
00540 }

Generated on Wed Dec 15 10:46:15 2010 for Hazy_System by  doxygen 1.4.7