| 1 | package felix.util; |
| 2 | |
| 3 | import java.io.BufferedReader; |
| 4 | import java.io.BufferedWriter; |
| 5 | import java.io.File; |
| 6 | import java.io.FileReader; |
| 7 | import java.io.IOException; |
| 8 | import java.math.BigInteger; |
| 9 | import java.security.MessageDigest; |
| 10 | import java.security.NoSuchAlgorithmException; |
| 11 | import java.sql.DatabaseMetaData; |
| 12 | import java.sql.ResultSet; |
| 13 | import java.sql.ResultSetMetaData; |
| 14 | import java.util.ArrayList; |
| 15 | import java.util.Arrays; |
| 16 | import java.util.HashMap; |
| 17 | import java.util.HashSet; |
| 18 | |
| 19 | import tuffy.db.RDB; |
| 20 | import tuffy.mln.Literal; |
| 21 | import tuffy.mln.Predicate; |
| 22 | import tuffy.mln.Term; |
| 23 | import tuffy.util.Config; |
| 24 | import tuffy.util.ExceptionMan; |
| 25 | import tuffy.util.FileMan; |
| 26 | import tuffy.util.StringMan; |
| 27 | import tuffy.util.UIMan; |
| 28 | import felix.dstruct.FelixClause; |
| 29 | import felix.dstruct.FelixPredicate; |
| 30 | import felix.dstruct.FelixQuery; |
| 31 | import felix.parser.FelixCommandOptions; |
| 32 | |
| 33 | public class ActiveLearner { |
| 34 | |
| 35 | FelixCommandOptions opt; |
| 36 | |
| 37 | RDB db; |
| 38 | |
| 39 | // TODO: make this general |
| 40 | /** |
| 41 | * Generates xml file containing relevant info for query. |
| 42 | * @throws Exception |
| 43 | */ |
| 44 | public void getTopCandidates() throws Exception{ |
| 45 | |
| 46 | FelixUIMan.println(">>> Connecting to RDBMS at " + FelixConfig.db_url); |
| 47 | |
| 48 | if(FelixConfig.evidDBSchema == null){ |
| 49 | ExceptionMan.die("Active learning mode needs the evidence schema specified"); |
| 50 | } |
| 51 | |
| 52 | if(FelixConfig.evidDBSchema != null){ |
| 53 | FelixConfig.db_schema = FelixConfig.evidDBSchema; |
| 54 | } |
| 55 | |
| 56 | db = RDB.getRDBbyConfig(); |
| 57 | db.resetSchema(FelixConfig.db_schema); |
| 58 | |
| 59 | RDB db2 = RDB.getRDBbyConfig(); |
| 60 | db2.resetSchema(FelixConfig.db_schema); |
| 61 | |
| 62 | HashMap<Long, String> id2str = db.loadIdSymbolMapFromTable(); |
| 63 | HashMap<String, Integer> str2id = db.loadSymbolIdMapFromTable(); |
| 64 | |
| 65 | FelixQuery fq = new FelixQuery(); |
| 66 | |
| 67 | String[] progFiles = opt.fprog.split(","); |
| 68 | fq.loadPrograms(progFiles); |
| 69 | |
| 70 | if(opt.fquery != null){ |
| 71 | String[] queryFiles = opt.fquery.split(","); |
| 72 | fq.loadQueries(queryFiles); |
| 73 | } |
| 74 | |
| 75 | if(opt.queryAtoms != null){ |
| 76 | FelixUIMan.println(">>> Parsing query atoms in command line"); |
| 77 | fq.parseQueryCommaList(opt.queryAtoms); |
| 78 | } |
| 79 | |
| 80 | // Predicate -> (attr -> value) |
| 81 | /* |
| 82 | predicate | property | value |
| 83 | -----------+------------+------------- |
| 84 | el | optype | lr |
| 85 | el | provtable | pred_DMO_43 |
| 86 | el | labelfield | eid2 |
| 87 | */ |
| 88 | HashMap<FelixPredicate, HashMap<String, String>> attrs = new |
| 89 | HashMap<FelixPredicate, HashMap<String, String>>(); |
| 90 | |
| 91 | // TODO: use this |
| 92 | //String[] viewDesps = {"QueryWord(queryword) := el(query, entityid), query( query, queryWord, docID)", |
| 93 | //"EntityWord(wikiTitle, entityWord) := el(query, entityid), entity( entityid, wikiTitle, entityWord)"}; |
| 94 | |
| 95 | // TODO: use this! |
| 96 | // db.execute("CREATE TEMPORARY VIEW _active_view1 AS SELECT " + |
| 97 | // " t0.queryid1 AS queryid1, t0.entityid2 as entityid2, ARRAY[t1.queryword2] as viewcontent "+ |
| 98 | // " FROM pred_el t0, pred_query t1 WHERE t0.queryid1 = t1.queryid1"); |
| 99 | // |
| 100 | // db.execute("CREATE TEMPORARY VIEW _active_view2 AS SELECT " + |
| 101 | // " t0.queryid1 AS queryid1, t0.entityid2 as entityid2, ARRAY[t1.wikititle2, t1.entityword3] as viewcontent "+ |
| 102 | // " FROM pred_el t0, pred_entity t1 WHERE t0.entityid2 = t1.entityid1"); |
| 103 | |
| 104 | // load meta information |
| 105 | ResultSet rs = db.query("SELECT * from _prov_metainfo;"); |
| 106 | while(rs.next()){ |
| 107 | String pred = rs.getString(1); |
| 108 | String attr = rs.getString(2); |
| 109 | String value = rs.getString(3); |
| 110 | |
| 111 | FelixPredicate p = fq.getPredByName(pred); |
| 112 | |
| 113 | if(!attrs.containsKey(p)){ |
| 114 | attrs.put(p, new HashMap<String, String>()); |
| 115 | } |
| 116 | attrs.get(p).put(attr, value); |
| 117 | } |
| 118 | |
| 119 | BufferedWriter bw = FileMan.getBufferedWriterMaybeGZ(opt.fout); |
| 120 | |
| 121 | bw.append("<?xml version=\"1.0\"?>\n"); |
| 122 | bw.append("<root>\n\n"); |
| 123 | |
| 124 | |
| 125 | // TODO: make this work for predicates with arity != 2 |
| 126 | HashMap<Integer, Integer> gt = new HashMap<Integer, Integer>(); |
| 127 | |
| 128 | String predicate_name = ""; |
| 129 | |
| 130 | // parse gt file |
| 131 | // if(opt.gtFile != null) { |
| 132 | // System.out.println("Parsing gt file " + opt.gtFile); |
| 133 | // try { |
| 134 | // BufferedReader in = new BufferedReader(new FileReader(opt.gtFile)); |
| 135 | // String str; |
| 136 | // while ((str = in.readLine()) != null) { |
| 137 | // predicate_name = str.substring(0, str.indexOf('(')); |
| 138 | // String rest = str.substring(str.indexOf('(')+1, str.lastIndexOf(')') - str.indexOf('(') + 1); |
| 139 | // String[] args = rest.split(","); |
| 140 | // |
| 141 | // // Remove leading/trailing quotes |
| 142 | // for(int i = 0; i < args.length; i++) { |
| 143 | // args[i] = args[i].trim(); |
| 144 | // if(args[i].startsWith("\"")) { |
| 145 | // args[i] = args[i].substring(1); |
| 146 | // } |
| 147 | // if(args[i].endsWith("\"")) { |
| 148 | // args[i] = args[i].substring(0, args[i].length() - 1); |
| 149 | // } |
| 150 | // } |
| 151 | // //System.out.println(args[0] + "\t" + args[1]); |
| 152 | // //System.out.println("\t" + str2id.get(args[0]) + "\t" + str2id.get(args[1])); |
| 153 | // gt.put(str2id.get(args[0]), str2id.get(args[1])); |
| 154 | // } |
| 155 | // in.close(); |
| 156 | // } catch (IOException e) { |
| 157 | // ExceptionMan.die("Error reading from file " + opt.gtFile); |
| 158 | // } |
| 159 | // } |
| 160 | |
| 161 | // Go through each gt predicate |
| 162 | //for(int h = 0; h < gt.size(); h++) { |
| 163 | |
| 164 | |
| 165 | |
| 166 | FelixPredicate p = fq.getPredByName(predicate_name); |
| 167 | ArrayList<String> arg_names = p.getArgs(); |
| 168 | |
| 169 | //for(FelixPredicate p : fq.getAllPred()){ |
| 170 | if(p.hasQuery()){ |
| 171 | |
| 172 | if(!attrs.get(p).get("optype").equals("lr")){ |
| 173 | ExceptionMan.die("Currently, I can only deal with LR!"); |
| 174 | } |
| 175 | |
| 176 | // this contains: eid2 |
| 177 | ArrayList<String> labelfields = |
| 178 | new ArrayList<String>(Arrays.asList(attrs.get(p).get("labelfield").split(","))); |
| 179 | |
| 180 | // this works for ouput of every gt link |
| 181 | // String where = arg_names.get(0) +y "=" + str2id.get(gt_args[0]); |
| 182 | // for(int i = 1; i < arg_names.size(); i++) { |
| 183 | // where += " and " + arg_names.get(i) + "=" + str2id.get(gt_args[i]); |
| 184 | // } |
| 185 | // rs = db.query("select * from " + p.getRelName() + " where " + where); |
| 186 | |
| 187 | //String select = "id, truth, max(prior), club, atomid"; |
| 188 | //for(int i = 1; i < arg_names.size(); i++) { |
| 189 | // select += "," + arg_names.get(i); |
| 190 | //} |
| 191 | |
| 192 | //ArrayList<Integer> temp = new ArrayList<Integer>(gt.keySet()); |
| 193 | //ArrayList<String> where = new ArrayList<String>(); |
| 194 | //for(int i = 0; i < temp.size(); i++) { |
| 195 | // where.add(arg_names.get(0) + "=" + temp.get(i)); |
| 196 | //} |
| 197 | |
| 198 | // TODO: make this general (check key constraint?) |
| 199 | // TODO: make this faster (use GROUP BY?) |
| 200 | // TODO: use JOIN (create table for gt) |
| 201 | //String query = "SELECT * FROM " + p.getRelName() + " WHERE " + StringMan.join(" OR ", where) + " ORDER BY " + arg_names.get(0) + " ASC, prior DESC"; |
| 202 | String query = "SELECT * FROM " + p.getRelName() + " ORDER BY " + arg_names.get(0) + " ASC, prior DESC"; |
| 203 | System.out.println("rs query: " + query); |
| 204 | rs = db.query(query); |
| 205 | |
| 206 | /* |
| 207 | id | truth | prior | club | atomid | mid1 | eid2 |
| 208 | ----+-------+----------------------+------+--------+------+--------- |
| 209 | 1 | t | 1 | 2 | | 5 | 1203067 |
| 210 | 2 | t | 3.72007597602084e-44 | 2 | | 5 | 1 |
| 211 | 3 | t | 2.67863696180808e-33 | 2 | | 5 | 1199200 |
| 212 | 4 | t | 6.71418428821159e-79 | 2 | | 9 | 909864 |
| 213 | 5 | t | 2.05388455404083e-85 | 2 | | 9 | 1 |
| 214 | 6 | t | 6.71418428821159e-79 | 2 | | 9 | 1744854 |
| 215 | 7 | t | 6.71418428821159e-79 | 2 | | 9 | 1300787 |
| 216 | 8 | t | 6.71418428821159e-79 | 2 | | 9 | 818955 |
| 217 | 9 | t | 1.80485138784542e-35 | 2 | | 9 | 1590722 |
| 218 | 10 | t | 6.71418428821159e-79 | 2 | | 9 | 299086 |
| 219 | */ |
| 220 | |
| 221 | int cur_id = 0; |
| 222 | int choice = 0; |
| 223 | int output_count = 0; |
| 224 | boolean found = false; |
| 225 | |
| 226 | while(rs.next() && output_count < 10) { |
| 227 | |
| 228 | // Mention id (felix id) |
| 229 | Integer id1 = rs.getInt(p.getArgs().get(0)); |
| 230 | |
| 231 | // Entity id (felix id) |
| 232 | Integer id2 = rs.getInt(p.getArgs().get(1)); |
| 233 | |
| 234 | // check if there's a gt for this result |
| 235 | if(gt.get(id1) == null) { |
| 236 | continue; |
| 237 | } |
| 238 | |
| 239 | // Mention id |
| 240 | String str1 = id2str.get(id1); |
| 241 | |
| 242 | // Entity id |
| 243 | String str2 = id2str.get(id2); |
| 244 | |
| 245 | // New id |
| 246 | if(id1 != cur_id) { |
| 247 | cur_id = id1; |
| 248 | choice = 0; |
| 249 | found = false; |
| 250 | } |
| 251 | |
| 252 | // check gt |
| 253 | boolean match = (id2.equals(gt.get(id1))); |
| 254 | |
| 255 | // Already found correct result |
| 256 | if(found) { |
| 257 | continue; |
| 258 | } |
| 259 | // result found |
| 260 | else if(match) { |
| 261 | found = true; |
| 262 | // correct |
| 263 | if(choice == 0) { |
| 264 | System.out.println(str1 + "\t" + str2); |
| 265 | System.out.println("\tCorrect"); |
| 266 | continue; |
| 267 | } |
| 268 | // otherwise output result |
| 269 | } |
| 270 | // incorrect |
| 271 | else { |
| 272 | if(choice != 0) { |
| 273 | choice++; |
| 274 | continue; |
| 275 | } |
| 276 | choice++; |
| 277 | System.out.println(str1 + "\t" + str2); |
| 278 | System.out.println("\tIncorrect\t" + id2str.get(gt.get(id1))); |
| 279 | } |
| 280 | |
| 281 | output_count++; |
| 282 | |
| 283 | //String mmmmmid = id2str.get(rs.getInt(p.getArgs().get(0))); |
| 284 | //System.out.println(mmmmmid); |
| 285 | |
| 286 | ArrayList<String> binds = new ArrayList<String>(); |
| 287 | ArrayList<String> joincond = new ArrayList<String>(); |
| 288 | ArrayList<String> smalllabelfields = new ArrayList<String>(); |
| 289 | |
| 290 | // Go through each column |
| 291 | for(int i=0;i<p.arity();i++){ |
| 292 | if(!labelfields.contains(p.getArgs().get(i))){ |
| 293 | joincond.add("t0." + p.getArgs().get(i) + "=" + rs.getString(p.getArgs().get(i))); |
| 294 | }else{ |
| 295 | smalllabelfields.add(id2str.get(rs.getInt(p.getArgs().get(i)))); |
| 296 | } |
| 297 | binds.add("t0." + p.getArgs().get(i) + "=" + rs.getInt(p.getArgs().get(i))); |
| 298 | } |
| 299 | |
| 300 | String label = StringMan.commaList(smalllabelfields); |
| 301 | Double prob = rs.getDouble("prior"); |
| 302 | String probability = UIMan.decimalRound(4, prob); |
| 303 | |
| 304 | //TODO: THINK ABOUT ``NO-KEY'' CASE |
| 305 | // SELECT * FROM pred_DMO_39 WHERE t0.mid1=163545 |
| 306 | // TODO: only do this once per mention |
| 307 | String sql = "SELECT * FROM " + attrs.get(p).get("provtable") + " t0 WHERE " + |
| 308 | StringMan.join(" AND ",joincond) + " ORDER BY t0.weight DESC"; |
| 309 | System.out.println("prov query: " + sql); |
| 310 | |
| 311 | ResultSet provrs = db2.query(sql); |
| 312 | // System.out.print(p.getName() + "("); |
| 313 | bw.append("<QUERY>\n"); |
| 314 | bw.append(" <NAME>" + p.getName() + "("); |
| 315 | |
| 316 | ArrayList<String> args = new ArrayList<String>(); |
| 317 | ArrayList<String> rawargs = new ArrayList<String>(); |
| 318 | for(int i=0;i<p.arity();i++){ |
| 319 | args.add(id2str.get(rs.getInt(p.getArgs().get(i)))); |
| 320 | rawargs.add(rs.getString(p.getArgs().get(i))); |
| 321 | } |
| 322 | bw.append(StringMan.commaList(args) + ")</NAME>\n"); |
| 323 | bw.append(" <TYPE>" + "LR" + "</TYPE>\n"); |
| 324 | bw.append(" <LABEL>" + label + "</LABEL>\n"); |
| 325 | bw.append(" <PROB>" + probability + "</PROB>\n"); |
| 326 | bw.append(" <CORRECT>" + found + "</CORRECT>\n"); |
| 327 | |
| 328 | |
| 329 | // TODO: make this general |
| 330 | // Get mention text |
| 331 | String sql1 = "SELECT t0.word4 FROM pred_mention t0 WHERE t0.mid1=" + id1; |
| 332 | System.out.println("mention query: " + sql1); |
| 333 | ResultSet mid = db.query(sql1); |
| 334 | String mention = "Missing"; |
| 335 | if(mid.next()) { |
| 336 | mention = id2str.get(mid.getInt(1)); |
| 337 | } |
| 338 | |
| 339 | // Get entity text |
| 340 | String sql2 = "SELECT t0.word2 FROM pred_entity t0 WHERE t0.eid1=" + id2; |
| 341 | System.out.println("entity query: " + sql2); |
| 342 | ResultSet eid = db.query(sql2); |
| 343 | String entity = "NIL"; |
| 344 | if(eid.next()) { |
| 345 | entity = id2str.get(eid.getInt(1)); |
| 346 | } |
| 347 | |
| 348 | // Get sentence text |
| 349 | String sql3 = "SELECT t0.sentencetext2 FROM pred_sentence t0 WHERE t0.mid1=" + id1; |
| 350 | System.out.println("sentence query: " + sql3); |
| 351 | ResultSet sent = db.query(sql3); |
| 352 | String sentence = ""; |
| 353 | if(sent.next()) { |
| 354 | sentence = id2str.get(sent.getInt(1)); |
| 355 | sentence = sentence.replaceAll("&", "&"); |
| 356 | sentence = sentence.replaceAll("<", "<"); |
| 357 | sentence = sentence.replaceAll(">", ">"); |
| 358 | } |
| 359 | bw.append(" <COMMENT>\n"); |
| 360 | bw.append(" <DESP>Mention</DESP>\n"); |
| 361 | bw.append(" <RS><METARS>" + mention + "</METARS></RS>\n"); |
| 362 | bw.append(" </COMMENT>\n"); |
| 363 | bw.append(" <COMMENT>\n"); |
| 364 | bw.append(" <DESP>Entity</DESP>\n"); |
| 365 | bw.append(" <RS><METARS>" + entity + "</METARS></RS>\n"); |
| 366 | bw.append(" </COMMENT>\n"); |
| 367 | bw.append(" <COMMENT>\n"); |
| 368 | bw.append(" <DESP>Sentence</DESP>\n"); |
| 369 | bw.append(" <RS><METARS>" + sentence + "</METARS></RS>\n"); |
| 370 | bw.append(" </COMMENT>\n"); |
| 371 | |
| 372 | |
| 373 | // for(int i=1;i<=2;i++){ |
| 374 | // bw.append(" <COMMENT>\n"); |
| 375 | // bw.append(" <DESP>" + viewDesps[i-1] + "</DESP>\n"); |
| 376 | // String viewname = "_active_view" + i; |
| 377 | // //ResultSet viewrs = db.query("SELECT * FROM " + viewname + " t0 WHERE " + |
| 378 | // // StringMan.join(" AND ", binds)); |
| 379 | // //while(viewrs.next()){ |
| 380 | // // Integer[] columns = (Integer[])viewrs.getArray("viewcontent").getArray(); |
| 381 | // // bw.append(" <RS>\n"); |
| 382 | // // for(Integer ec : columns){ |
| 383 | // // bw.append(" <METARS>" + id2str.get(ec) + "</METARS>\n"); |
| 384 | // // } |
| 385 | // // bw.append(" </RS>\n"); |
| 386 | // bw.append("<RS><METARS></METARS></RS>\n"); |
| 387 | // //} |
| 388 | // bw.append(" </COMMENT>\n"); |
| 389 | // } |
| 390 | |
| 391 | //bw.append(" <COMMENT>" + StringMan.commaList(rawargs) + "</COMMENT>\n"); |
| 392 | |
| 393 | // Go through each rule |
| 394 | while(provrs.next()){ |
| 395 | |
| 396 | // This contains entity id |
| 397 | smalllabelfields = new ArrayList<String>(); |
| 398 | for(int i=0;i<p.arity();i++){ |
| 399 | if(labelfields.contains(p.getArgs().get(i))){ |
| 400 | smalllabelfields.add(id2str.get(provrs.getInt(p.getArgs().get(i)))); |
| 401 | } |
| 402 | } |
| 403 | String thislabel = StringMan.commaList(smalllabelfields); |
| 404 | |
| 405 | int clauseID = provrs.getInt("prov"); |
| 406 | double weight = provrs.getFloat("weight"); |
| 407 | |
| 408 | |
| 409 | |
| 410 | // NOTE: deepprov doesn't match up with terms |
| 411 | |
| 412 | String deepProv = provrs.getString("deepprov"); //{qid:160,docID:264066,queryWord:155} |
| 413 | deepProv = deepProv.replaceAll("\\{|\\}", ""); |
| 414 | |
| 415 | HashMap<String, Integer> term2constant = new HashMap<String, Integer>(); |
| 416 | String[] smalls = deepProv.split(","); |
| 417 | //int num = 1; |
| 418 | for(String small : smalls){ |
| 419 | String[] metamap = small.split(":"); |
| 420 | //String temp = metamap[0] + num; |
| 421 | //term2constant.put(temp, Integer.valueOf(metamap[1])); |
| 422 | //num++; |
| 423 | term2constant.put(metamap[0], Integer.valueOf(metamap[1])); |
| 424 | } |
| 425 | |
| 426 | // TODO: Be carefully with the consistency of clause id between different runs |
| 427 | FelixClause clause = fq.id2clause.get(clauseID); |
| 428 | |
| 429 | // Get source clause/comment |
| 430 | String source_clause = clause.toString(); |
| 431 | String name = ""; |
| 432 | String content = ""; |
| 433 | |
| 434 | String[] lines = source_clause.split("\n"); |
| 435 | for(String line : lines) { |
| 436 | if(!line.equals("")) { |
| 437 | if(line.contains(">")) { |
| 438 | line = line.replaceAll(">", ">"); |
| 439 | } |
| 440 | if(line.contains("<")) { |
| 441 | line = line.replaceAll("<", "<"); |
| 442 | } |
| 443 | if(line.startsWith("//")) { |
| 444 | line = line.replaceFirst("//", ""); |
| 445 | line = line.trim(); |
| 446 | if(name.length() != 0) { |
| 447 | name += "\n"; |
| 448 | } |
| 449 | name += line; |
| 450 | } else { |
| 451 | if(content.length() != 0) { |
| 452 | content += "\n"; |
| 453 | } |
| 454 | content += line; |
| 455 | } |
| 456 | } |
| 457 | } |
| 458 | |
| 459 | bw.append(" <PROV>\n"); |
| 460 | bw.append(" <SOURCECLAUSE>\n"); |
| 461 | bw.append(" <NAME>" + name + "</NAME>\n"); |
| 462 | bw.append(" <CONTENT>" + content + "</CONTENT>\n"); |
| 463 | bw.append(" </SOURCECLAUSE>\n"); |
| 464 | //bw.append(" <UNAME>" + clause.g) |
| 465 | bw.append(" <WEIGHT>" + weight + "</WEIGHT>\n"); |
| 466 | bw.append(" <LABEL>" + thislabel + "</LABEL>\n"); |
| 467 | bw.append(" <CLAUSEID>" + clauseID + "</CLAUSEID>\n"); |
| 468 | |
| 469 | // Go through each literal |
| 470 | for(Literal l2 : clause.getRegLiterals()){ |
| 471 | Literal l = (Literal) l2.clone(); |
| 472 | |
| 473 | // Rule for this predicate |
| 474 | if(l.getPred().equals(p)){ |
| 475 | bw.append(" <LITERAL>\n"); |
| 476 | bw.append(" <SENSE>" + (l.getSense()==true ? "POS" : "NEG") + "</SENSE>\n"); |
| 477 | bw.append(" <NAME>" + l.getPred().getName() + "</NAME>\n"); |
| 478 | bw.append(" <STRINGFORM>=>"); |
| 479 | }else{ |
| 480 | l.setSense(!l.getSense()); |
| 481 | bw.append(" <LITERAL>\n"); |
| 482 | bw.append(" <SENSE>" + (l.getSense()==true ? "POS" : "NEG") + "</SENSE>\n"); |
| 483 | bw.append(" <NAME>" + l.getPred().getName() + "</NAME>\n"); |
| 484 | bw.append(" <STRINGFORM>"); |
| 485 | } |
| 486 | |
| 487 | // output predicate name |
| 488 | bw.append(l.getSense()==true? "" : "!"); |
| 489 | bw.append(l.getPred().getName() + "("); |
| 490 | |
| 491 | ArrayList<String> terms = new ArrayList<String>(); |
| 492 | ArrayList<Integer> arguments = new ArrayList<Integer>(); |
| 493 | |
| 494 | for(Term t : l.getTerms()){ |
| 495 | if(t.isConstant()){ |
| 496 | terms.add("\"" + id2str.get(Integer.valueOf(t.toString())) + "\""); |
| 497 | }else{ |
| 498 | terms.add("\"" + id2str.get(term2constant.get(t.toString())) + "\""); |
| 499 | arguments.add(term2constant.get(t.toString())); |
| 500 | } |
| 501 | } |
| 502 | bw.append(StringMan.join(", ", terms) + ")</STRINGFORM>\n"); |
| 503 | |
| 504 | |
| 505 | // Go through each argument |
| 506 | for(Integer id : arguments) { |
| 507 | String str = "" + id2str.get(id); |
| 508 | bw.append(" <ARG><NAME>" + str + "</NAME>"); |
| 509 | |
| 510 | // TODO: make this general |
| 511 | if(str.startsWith("MEN_")) { |
| 512 | String sql_men = "SELECT t0.word4 FROM pred_mention t0 WHERE t0.mid1=" + id; |
| 513 | ResultSet men = db.query(sql_men); |
| 514 | if(men.next()) { |
| 515 | String temp = id2str.get(men.getInt(1)); |
| 516 | bw.append("<VIEW><METARS>" + temp + "</METARS>"); |
| 517 | } |
| 518 | String sql_sent = "SELECT t0.sentencetext2 FROM pred_sentence t0 WHERE t0.mid1=" + id; |
| 519 | ResultSet sen = db.query(sql_sent); |
| 520 | if(sen.next()) { |
| 521 | String temp = id2str.get(sen.getInt(1)); |
| 522 | bw.append("<METARS>" + temp + "</METARS>"); |
| 523 | } |
| 524 | bw.append("</VIEW>"); |
| 525 | } else if(str.startsWith("E_m_")) { |
| 526 | String sql_ent = "SELECT t0.word2 FROM pred_entity t0 WHERE t0.eid1=" + id; |
| 527 | ResultSet ent = db.query(sql_ent); |
| 528 | if(ent.next()) { |
| 529 | String temp = id2str.get(ent.getInt(1)); |
| 530 | temp = temp.replaceAll("&", "&"); |
| 531 | temp = temp.replaceAll("<", "<"); |
| 532 | temp = temp.replaceAll(">", ">"); |
| 533 | bw.append("<VIEW><METARS>" + temp + "</METARS></VIEW>"); |
| 534 | } |
| 535 | } |
| 536 | bw.append("</ARG>\n"); |
| 537 | } |
| 538 | bw.append(" </LITERAL>\n"); |
| 539 | } |
| 540 | bw.append(" </PROV>\n"); |
| 541 | |
| 542 | //System.out.print("\t" + provrs.getString("prov") + "\t" + provrs.getString("weight") + "\t" + provrs.getString("deepprov")+"\n\n"); |
| 543 | |
| 544 | } |
| 545 | |
| 546 | bw.append("</QUERY>\n"); |
| 547 | } |
| 548 | } |
| 549 | bw.append("\n\n</root>\n\n"); |
| 550 | bw.close(); |
| 551 | |
| 552 | } |
| 553 | |
| 554 | // TODO: make this general |
| 555 | /** |
| 556 | * Creates tables/views in public schema for use by django feedback tool. |
| 557 | * @throws Exception |
| 558 | */ |
| 559 | public void DjangoSetup() throws Exception { |
| 560 | |
| 561 | HashMap<FelixPredicate, HashMap<String, String>> attrs = new |
| 562 | HashMap<FelixPredicate, HashMap<String, String>>(); |
| 563 | FelixQuery fq = new FelixQuery(); |
| 564 | ResultSet rs; |
| 565 | String query = ""; |
| 566 | |
| 567 | // For generating django models.py, admin,py |
| 568 | ArrayList<String> models = new ArrayList<String>(); |
| 569 | ArrayList<String> admin = new ArrayList<String>(); |
| 570 | |
| 571 | // setup stuff |
| 572 | db = RDB.getRDBbyConfig(opt.evidDBSchema); |
| 573 | db.resetSchema(opt.evidDBSchema); |
| 574 | //db.resetSchema(FelixConfig.db_schema); |
| 575 | String[] progFiles = opt.fprog.split(","); |
| 576 | fq.loadPrograms(progFiles); |
| 577 | |
| 578 | |
| 579 | HashMap<Long, String> id2str = db.loadIdSymbolMapFromTable(); |
| 580 | HashMap<String, Integer> str2id = db.loadSymbolIdMapFromTable(); |
| 581 | |
| 582 | if(opt.fquery != null){ |
| 583 | String[] queryFiles = opt.fquery.split(","); |
| 584 | fq.loadQueries(queryFiles); |
| 585 | } |
| 586 | |
| 587 | if(opt.queryAtoms != null){ |
| 588 | FelixUIMan.println(">>> Parsing query atoms in command line"); |
| 589 | fq.parseQueryCommaList(opt.queryAtoms); |
| 590 | } |
| 591 | |
| 592 | HashMap<Integer, Integer> gt = new HashMap<Integer, Integer>(); |
| 593 | |
| 594 | // TODO: get this somewhere |
| 595 | String predicate_name = "el"; |
| 596 | |
| 597 | // gets metainfo |
| 598 | rs = db.query("SELECT * FROM _prov_metainfo;"); |
| 599 | while(rs.next()){ |
| 600 | String pred = rs.getString(1); |
| 601 | String attr = rs.getString(2); |
| 602 | String value = rs.getString(3); |
| 603 | |
| 604 | FelixPredicate p = fq.getPredByName(pred); |
| 605 | |
| 606 | if(!attrs.containsKey(p)){ |
| 607 | attrs.put(p, new HashMap<String, String>()); |
| 608 | } |
| 609 | attrs.get(p).put(attr, value); |
| 610 | } |
| 611 | |
| 612 | FelixPredicate p = fq.getPredByName(predicate_name); |
| 613 | ArrayList<String> arg_names = p.getArgs(); |
| 614 | |
| 615 | // TODO: allow multiple predicates |
| 616 | //for(FelixPredicate p : fq.getAllPred()){ |
| 617 | if(p.hasQuery()) { |
| 618 | |
| 619 | if(!attrs.get(p).get("optype").equals("lr")){ |
| 620 | ExceptionMan.die("Currently, I can only deal with LR!"); |
| 621 | } |
| 622 | |
| 623 | // this contains: eid2 |
| 624 | ArrayList<String> labelfields = new ArrayList<String>(Arrays.asList(attrs.get(p).get("labelfield").split(","))); |
| 625 | |
| 626 | |
| 627 | // TODO: allow gt with different fields/key constraints |
| 628 | // parse gt file |
| 629 | String gtTable = "public." + p.getRelName() + "_gt"; |
| 630 | String gtTableTmp = "public." + p.getRelName() + "_gt_tmp"; |
| 631 | |
| 632 | if(opt.gt != null) { |
| 633 | // TODO: make this general |
| 634 | // create tmp table for gt predicate |
| 635 | db.dropTable(gtTableTmp); |
| 636 | query = "CREATE TABLE " + gtTableTmp + " (mid1 integer, eid2 integer, mid1_value text, eid2_value text)"; |
| 637 | System.out.println(query); |
| 638 | db.execute(query); |
| 639 | |
| 640 | |
| 641 | System.out.println("Parsing gt file " + opt.gt); |
| 642 | try { |
| 643 | BufferedReader in = new BufferedReader(new FileReader(opt.gt)); |
| 644 | String str; |
| 645 | while ((str = in.readLine()) != null) { |
| 646 | if(predicate_name.equals(str.substring(0, str.indexOf('(')))) { |
| 647 | |
| 648 | String rest = str.substring(str.indexOf('(')+1, str.lastIndexOf(')') - str.indexOf('(') + 1); |
| 649 | String[] args = rest.split(","); |
| 650 | |
| 651 | // Remove leading/trailing quotes |
| 652 | for(int i = 0; i < args.length; i++) { |
| 653 | args[i] = args[i].trim(); |
| 654 | if(args[i].startsWith("\"")) { |
| 655 | args[i] = args[i].substring(1); |
| 656 | } |
| 657 | if(args[i].endsWith("\"")) { |
| 658 | args[i] = args[i].substring(0, args[i].length() - 1); |
| 659 | } |
| 660 | } |
| 661 | //System.out.println(args[0] + "\t" + args[1]); |
| 662 | //System.out.println("\t" + str2id.get(args[0]) + "\t" + str2id.get(args[1])); |
| 663 | |
| 664 | // add to gt table |
| 665 | Integer id1 = str2id.get(args[0]); |
| 666 | Integer id2 = str2id.get(args[1]); |
| 667 | |
| 668 | query = "INSERT INTO " + gtTableTmp + " VALUES (" + id1 + ", " + id2 + ", '" + args[0] + "', 'n/" + args[1] + "')"; |
| 669 | System.out.println(query); |
| 670 | db.execute(query); |
| 671 | } |
| 672 | } |
| 673 | in.close(); |
| 674 | |
| 675 | // TODO: make this general |
| 676 | // create table for gt predicate |
| 677 | db.dropTable(gtTable); |
| 678 | query = "CREATE TABLE " + gtTable + " AS " + |
| 679 | "SELECT t1.id AS id, t0.mid1 AS mid1, t0.eid2 AS eid2, t0.mid1_value AS mid1_value, t0.eid2_value AS eid2_value " + |
| 680 | "FROM " + gtTableTmp + " AS t0, " + p.getRelName() + " AS t1 " + |
| 681 | "WHERE t0.mid1 = t1.mid1 AND t0.eid2 = t1.eid2"; |
| 682 | System.out.println(query); |
| 683 | db.execute(query); |
| 684 | |
| 685 | // drop tmp gt table |
| 686 | query = "DROP TABLE " + gtTableTmp; |
| 687 | System.out.println(query); |
| 688 | db.execute(query); |
| 689 | } catch (IOException e) { |
| 690 | ExceptionMan.die("Error reading from file " + opt.gt); |
| 691 | } |
| 692 | } |
| 693 | |
| 694 | // TODO: make this general |
| 695 | // Creates public view for this predicate |
| 696 | db.dropView("public." + p.getRelName()); |
| 697 | if(gt != null) { |
| 698 | query = "CREATE VIEW public." + p.getRelName() + " AS SELECT t0.*, t1.constantvalue as mid1_value, t2.constantvalue as eid2_value FROM " + p.getRelName() + " AS t0, type_mid AS t1, type_eid AS t2 WHERE t0.mid1 = t1.constantid AND t0.eid2 = t2.constantid AND mid1 IN (SELECT mid1 FROM " + gtTable + ")"; |
| 699 | System.out.println(query); |
| 700 | db.execute(query); |
| 701 | query = "ALTER TABLE " + gtTable + " ADD FOREIGN KEY (id) REFERENCES " + p.getRelName() + "(id)"; |
| 702 | System.out.println(query); |
| 703 | db.execute(query); |
| 704 | } else { |
| 705 | query = "CREATE VIEW public." + p.getRelName() + " AS SELECT * FROM " + p.getRelName(); |
| 706 | System.out.println(query); |
| 707 | db.execute(query); |
| 708 | } |
| 709 | |
| 710 | |
| 711 | String prov_table = attrs.get(p).get("provtable"); |
| 712 | |
| 713 | if(prov_table == null) { |
| 714 | ExceptionMan.die("Unable to find provtable for query predicate"); |
| 715 | } |
| 716 | |
| 717 | // TODO: make this general |
| 718 | if(prov_table != null) { |
| 719 | db.dropTable("public." + prov_table); |
| 720 | if(opt.gt != null) { |
| 721 | // TODO: make this step faster! (takes about 10 mins) |
| 722 | // maybe have command line option specifying if rules should be updated? |
| 723 | query = "CREATE TABLE public." + prov_table + " AS SELECT t0.mid1, t0.eid2, t0.weight, t0.prov, array_to_string(t0.deepprov, ',') as deepprov, t1.id AS lid FROM " + prov_table + " AS t0, public." + p.getRelName() + " AS t1 WHERE t0.mid1 = t1.mid1 and t0.eid2 = t1.eid2"; |
| 724 | System.out.println(query); |
| 725 | db.execute(query); |
| 726 | query = "ALTER TABLE public." + prov_table + " ADD FOREIGN KEY (lid) REFERENCES " + p.getRelName() + "(id)"; |
| 727 | System.out.println(query); |
| 728 | db.execute(query); |
| 729 | query = "ALTER TABLE public." + prov_table + " ADD id SERIAL"; |
| 730 | System.out.println(query); |
| 731 | db.update(query); |
| 732 | } else { |
| 733 | System.out.println("CREATE TABLE public." + prov_table + " AS SELECT * FROM " + prov_table); |
| 734 | db.copyTable(prov_table, "public." + prov_table); |
| 735 | query = "ALTER TABLE public." + prov_table + " ADD id SERIAL"; |
| 736 | System.out.println(query); |
| 737 | db.update(query); |
| 738 | } |
| 739 | } |
| 740 | } |
| 741 | |
| 742 | // Constants |
| 743 | db.dropView("public.constants"); |
| 744 | query = "CREATE VIEW public.constants AS SELECT * FROM constants"; |
| 745 | System.out.println(query); |
| 746 | db.execute(query); |
| 747 | |
| 748 | // Clauses |
| 749 | db.dropTable("public.clauses"); |
| 750 | query = "CREATE TABLE public.clauses(id serial, comment text, content text)"; |
| 751 | System.out.println(query); |
| 752 | db.execute(query); |
| 753 | for(Integer id : fq.id2clause.keySet()) { |
| 754 | FelixClause clause = fq.id2clause.get(id); |
| 755 | |
| 756 | int clauseID = clause.getId(); |
| 757 | // Get source clause/comment |
| 758 | String source_clause = clause.toString(); |
| 759 | String comment = ""; |
| 760 | String content = ""; |
| 761 | |
| 762 | String[] lines = source_clause.split("\n"); |
| 763 | for(String line : lines) { |
| 764 | if(!line.equals("")) { |
| 765 | line = line.replaceAll("'", ""); |
| 766 | if(line.startsWith("//")) { |
| 767 | line = line.replaceFirst("//", ""); |
| 768 | line = line.trim(); |
| 769 | if(comment.length() != 0) { |
| 770 | comment += "\n"; |
| 771 | } |
| 772 | comment += line; |
| 773 | } else { |
| 774 | if(content.length() != 0) { |
| 775 | content += "\n"; |
| 776 | } |
| 777 | content += line; |
| 778 | } |
| 779 | } |
| 780 | } |
| 781 | |
| 782 | query = "INSERT INTO public.clauses VALUES (" + clauseID + ", '" + comment + "', '" + content + "')"; |
| 783 | System.out.println(query); |
| 784 | db.update(query); |
| 785 | } |
| 786 | |
| 787 | // // TODO: go through all evidence files, create views (join with type tables) |
| 788 | // for (Predicate pred : fq.getAllPred()) { |
| 789 | // if(pred.hasEvid) { |
| 790 | // String pred_name = pred.getRelName(); |
| 791 | // System.out.println("pred: " + pred_name); |
| 792 | // } |
| 793 | // |
| 794 | // } |
| 795 | |
| 796 | |
| 797 | // Entity |
| 798 | db.dropView("public.pred_entity"); |
| 799 | query = "CREATE VIEW public.pred_entity AS " + |
| 800 | "SELECT t0.*, t1.constantvalue AS eid1_value, t2.constantvalue AS word2_value " + |
| 801 | "FROM pred_entity AS t0, type_eid AS t1, type_word AS t2 " + |
| 802 | "WHERE t0.eid1 = t1.constantid AND t0.word2 = t2.constantid"; |
| 803 | System.out.println(query); |
| 804 | db.execute(query); |
| 805 | |
| 806 | // Mention |
| 807 | db.dropView("public.pred_mention"); |
| 808 | query = "CREATE VIEW public.pred_mention AS " + |
| 809 | "SELECT t0.*, t1.constantvalue AS mid1_value, t2.constantvalue AS sentid2_value, t3.constantvalue AS docid3_value, t4.constantvalue AS word4_value " + |
| 810 | "FROM pred_mention AS t0, type_mid AS t1, type_sentid AS t2, type_docid AS t3, type_word AS t4 " + |
| 811 | "WHERE t0.mid1 = t1.constantid AND t0.sentid2 = t2.constantid AND t0.docid3 = t3.constantid AND t0.word4 = t4.constantid"; |
| 812 | System.out.println(query); |
| 813 | db.execute(query); |
| 814 | |
| 815 | // Sentence |
| 816 | db.dropView("public.pred_sentence"); |
| 817 | query = "CREATE VIEW public.pred_sentence AS " + |
| 818 | "SELECT t0.*, t1.constantvalue AS mid1_value, t2.constantvalue AS sentencetext2_value " + |
| 819 | "FROM pred_sentence AS t0, type_mid AS t1, type_sentencetext AS t2 " + |
| 820 | "WHERE t0.mid1 = t1.constantid AND t0.sentencetext2 = t2.constantid"; |
| 821 | System.out.println(query); |
| 822 | db.execute(query); |
| 823 | |
| 824 | // TODO: auto-generate models.py |
| 825 | |
| 826 | // TODO: auto-generate admin.py |
| 827 | } |
| 828 | |
| 829 | /** |
| 830 | * The constructor. |
| 831 | * @param _opt |
| 832 | */ |
| 833 | public ActiveLearner(FelixCommandOptions _opt){ |
| 834 | opt = _opt; |
| 835 | } |
| 836 | |
| 837 | |
| 838 | } |