| 1 | package felix.dstruct; |
| 2 | |
| 3 | import java.io.FileInputStream; |
| 4 | import java.io.IOException; |
| 5 | import java.util.ArrayList; |
| 6 | import java.util.HashMap; |
| 7 | import java.util.HashSet; |
| 8 | |
| 9 | import org.postgresql.PGConnection; |
| 10 | |
| 11 | import felix.dstruct.StatOperator.OPType; |
| 12 | import felix.util.FelixConfig; |
| 13 | import felix.util.FelixUIMan; |
| 14 | |
| 15 | import tuffy.db.RDB; |
| 16 | import tuffy.mln.Atom; |
| 17 | import tuffy.mln.Literal; |
| 18 | import tuffy.mln.Predicate; |
| 19 | import tuffy.util.ExceptionMan; |
| 20 | import tuffy.util.FileMan; |
| 21 | import tuffy.util.StringMan; |
| 22 | import tuffy.util.UIMan; |
| 23 | |
| 24 | |
| 25 | /** |
| 26 | * The predicate object used in Felix, which extends |
| 27 | * the Predicate class in Tuffy to contain Felix-related |
| 28 | * fields and methods. |
| 29 | * |
| 30 | * @author Ce Zhang |
| 31 | * |
| 32 | */ |
| 33 | public class FelixPredicate extends Predicate{ |
| 34 | |
| 35 | /** |
| 36 | * @deprecated |
| 37 | */ |
| 38 | String embeddedPythonCode = ""; |
| 39 | |
| 40 | /** |
| 41 | * Properties that can be assigned to each predicate. |
| 42 | * Example properties include: SYMM, REPLEX, TRANS etc. |
| 43 | * |
| 44 | */ |
| 45 | public enum FPProperty {SYMM, REFLEX, TRANS, CHAIN_RECUR, |
| 46 | OTHER_RECUR, OTHER_RECUR_WITHOTHER_OPENPRED, KEY_CONSTRAINT, NON_RECUR, EMBED_WEIGHT_RULE}; |
| 47 | |
| 48 | /** |
| 49 | * Path the HDFS file that this predicate depends on. |
| 50 | */ |
| 51 | public String dependencyFile = null; |
| 52 | |
| 53 | /** |
| 54 | * Type of dependencies, e.g., "hdfs", "jdbc" etc. |
| 55 | */ |
| 56 | public String dependencyName = null; |
| 57 | |
| 58 | /** |
| 59 | * Python script for MAP. |
| 60 | */ |
| 61 | public String mapScript = null; |
| 62 | |
| 63 | /** |
| 64 | * Python script for running before all MAPs. |
| 65 | */ |
| 66 | public String mapinitScript = ""; |
| 67 | |
| 68 | /** |
| 69 | * Python script for running before all REDUCEs. |
| 70 | */ |
| 71 | public String reduceinitScript = ""; |
| 72 | |
| 73 | /** |
| 74 | * Python script for MAP. |
| 75 | */ |
| 76 | public String reduceScript = "\tfor v in _inputvalues:\n"+ |
| 77 | "\t\tfelixio_push(_inputkey, v)"; |
| 78 | |
| 79 | /** |
| 80 | * Do we need to extract features from HDFS for this relation? |
| 81 | */ |
| 82 | public boolean needExtractFeatures = false; |
| 83 | |
| 84 | /** |
| 85 | * If the input is XML tag, which <xmltag></xmltag> |
| 86 | * should we send to MAP as a unit? |
| 87 | */ |
| 88 | public String xmltag = null; |
| 89 | |
| 90 | /** |
| 91 | * Whether the extraction of this relation relies on other relations. |
| 92 | */ |
| 93 | public String jdbcdep = null; |
| 94 | |
| 95 | /** |
| 96 | * The name of MAP's input variable. |
| 97 | */ |
| 98 | public String mapinputvar = "_input"; |
| 99 | |
| 100 | /** |
| 101 | * The name of REDUCE's input key variable. |
| 102 | */ |
| 103 | public String reduceinputkeyvar = "_inputkey"; |
| 104 | |
| 105 | /** |
| 106 | * The name of REDUCE's input value variable. |
| 107 | */ |
| 108 | public String reduceinputvaluesvar = "_inputvalues"; |
| 109 | |
| 110 | /** |
| 111 | * Whether the evidence file of this relation exists |
| 112 | * in some relational table instead of input evid. file. |
| 113 | */ |
| 114 | public boolean loadFromDatabase = false; |
| 115 | |
| 116 | /** |
| 117 | * see {@link #loadFromDatabase} |
| 118 | */ |
| 119 | public String loadingSchema = null; |
| 120 | |
| 121 | /** |
| 122 | * see {@link #loadFromDatabase} |
| 123 | */ |
| 124 | public String loadingTable = null; |
| 125 | |
| 126 | |
| 127 | /** |
| 128 | * Adds atom as evidence to this predicate (Override Felix version) - |
| 129 | * the difference is that we sometimes flush it to file |
| 130 | * directly. |
| 131 | * @param a |
| 132 | */ |
| 133 | public void addEvidence(Atom a) { |
| 134 | |
| 135 | hasEvid = true; |
| 136 | |
| 137 | if (a.isSoftEvidence()) |
| 138 | setHasSoftEvidence(true); |
| 139 | |
| 140 | if(FelixConfig.mixturedLoading == true){ |
| 141 | |
| 142 | ArrayList<String> towrite = new ArrayList<String>(); |
| 143 | |
| 144 | try { |
| 145 | |
| 146 | this.loadingFileWriter.append((a.truth == true? "True" : "False")); |
| 147 | this.loadingFileWriter.append("\t"); |
| 148 | this.loadingFileWriter.append(a.prior == null? "1" : a.prior.toString());; |
| 149 | this.loadingFileWriter.append("\t"); |
| 150 | |
| 151 | this.loadingFileWriter.append(StringMan.joinAndEscape("\t", a.sargs)); |
| 152 | this.loadingFileWriter.append("\n"); |
| 153 | } catch (IOException e) { |
| 154 | e.printStackTrace(); |
| 155 | } |
| 156 | |
| 157 | }else{ |
| 158 | addEvidenceTuple(a); |
| 159 | } |
| 160 | } |
| 161 | |
| 162 | /** |
| 163 | * The type of operator this predicate must be assigned to. |
| 164 | */ |
| 165 | public OPType mustbe = null; |
| 166 | |
| 167 | /** |
| 168 | * The parent {@link ConcurrentOperatorsBucket}. |
| 169 | */ |
| 170 | public ConcurrentOperatorsBucket belongsTo = null; |
| 171 | |
| 172 | /** |
| 173 | * Whether this predicate is the view-based representation of |
| 174 | * a coref operator. This predicate must with a name suffix ``_map''. |
| 175 | */ |
| 176 | public boolean isCorefMapPredicate = false; |
| 177 | |
| 178 | /** |
| 179 | * Whether this predicate is a coref operator. |
| 180 | */ |
| 181 | public boolean isCorefPredicate = false; |
| 182 | |
| 183 | /** |
| 184 | * If {@link FelixPredicate#isCorefMapPredicate} is true, to which this |
| 185 | * predicate serves. |
| 186 | */ |
| 187 | public FelixPredicate oriCorefPredicate = null; |
| 188 | |
| 189 | /** |
| 190 | * If {@link FelixPredicate#isCorefPredicate} is true, which |
| 191 | * relation serves as the linear-view-representation of it? |
| 192 | */ |
| 193 | public FelixPredicate corefMAPPredicate = null; |
| 194 | |
| 195 | /** |
| 196 | * Map from predicate properties ({@link FPProperty}) to |
| 197 | * clauses satisfying the corresponding property. |
| 198 | */ |
| 199 | HashMap<FPProperty, HashSet<FelixClause>> properities |
| 200 | = new HashMap<FPProperty, HashSet<FelixClause>>(); |
| 201 | |
| 202 | /** |
| 203 | * Clauses related to this predicate. |
| 204 | */ |
| 205 | HashSet<FelixClause> registeredClauses = new HashSet<FelixClause>(); |
| 206 | |
| 207 | /** |
| 208 | * If this predicate has key constraints, this set records the position |
| 209 | * of keys. |
| 210 | */ |
| 211 | HashSet<Integer> labelPositions = new HashSet<Integer>(); |
| 212 | |
| 213 | /** |
| 214 | * If this relation is defined as a view instead of |
| 215 | * a table, what is its view definition? |
| 216 | */ |
| 217 | public String viewDef = null; |
| 218 | |
| 219 | /** |
| 220 | * Map from chain-rule clauses to possible partitions of sequence. |
| 221 | */ |
| 222 | HashMap<FelixClause, ArrayList<String>> chainRulePartitions = |
| 223 | new HashMap<FelixClause, ArrayList<String>>(); |
| 224 | |
| 225 | /** |
| 226 | * Get the label position if this predicate is LR or CRF. |
| 227 | * @return |
| 228 | */ |
| 229 | public ArrayList<Integer> getLabelPositions(){ |
| 230 | ArrayList<Integer> ret = new ArrayList<Integer>(); |
| 231 | for(int i=0;i<this.arity();i++){ |
| 232 | if(!this.labelPositions.contains(i)){ |
| 233 | continue; |
| 234 | } |
| 235 | ret.add(i); |
| 236 | } |
| 237 | return ret; |
| 238 | } |
| 239 | |
| 240 | /** |
| 241 | * Sets embedded python code for this predicate |
| 242 | * @param content |
| 243 | */ |
| 244 | public void setEmbeddedPythonCode(String content){ |
| 245 | |
| 246 | this.embeddedPythonCode = content; |
| 247 | |
| 248 | } |
| 249 | |
| 250 | /** |
| 251 | * Get partitioning fields of sequence if this predicate is CRF. |
| 252 | * @return |
| 253 | */ |
| 254 | public ArrayList<String> getCRFPartitionFields(){ |
| 255 | |
| 256 | if(chainRulePartitions.keySet().size() != 1){ |
| 257 | return null; |
| 258 | } |
| 259 | |
| 260 | FelixClause key = chainRulePartitions.keySet().iterator().next(); |
| 261 | |
| 262 | if(chainRulePartitions.get(key).size() == 0){ |
| 263 | return null; |
| 264 | } |
| 265 | |
| 266 | return chainRulePartitions.get(key); |
| 267 | |
| 268 | } |
| 269 | |
| 270 | /** |
| 271 | * Get the key position if this predicate is LR or CRF. |
| 272 | * @return |
| 273 | */ |
| 274 | public ArrayList<String> getKeyFieldsArgs(){ |
| 275 | ArrayList<String> ret = new ArrayList<String>(); |
| 276 | for(int i=0;i<this.arity();i++){ |
| 277 | if(!labelPositions.contains(i)){ |
| 278 | ret.add(this.getArgs().get(i)); |
| 279 | } |
| 280 | } |
| 281 | return ret; |
| 282 | } |
| 283 | |
| 284 | /** |
| 285 | * Get the label types if this predicate is LR or CRF. |
| 286 | * @return |
| 287 | */ |
| 288 | public ArrayList<String> getLabelFieldsTypeTable(){ |
| 289 | ArrayList<String> ret = new ArrayList<String>(); |
| 290 | for(int i=0;i<this.arity();i++){ |
| 291 | if(!labelPositions.contains(i)){ |
| 292 | continue; |
| 293 | } |
| 294 | ret.add(this.getTypeAt(i).getRelName()); |
| 295 | } |
| 296 | return ret; |
| 297 | } |
| 298 | |
| 299 | /** |
| 300 | * Get the label fields' name if this predicate is LR or CRF. |
| 301 | * @return |
| 302 | */ |
| 303 | public ArrayList<String> getLabelFieldsArgs(){ |
| 304 | ArrayList<String> ret = new ArrayList<String>(); |
| 305 | for(int i=0;i<this.arity();i++){ |
| 306 | if(!labelPositions.contains(i)){ |
| 307 | continue; |
| 308 | } |
| 309 | ret.add(this.getArgs().get(i)); |
| 310 | } |
| 311 | return ret; |
| 312 | } |
| 313 | |
| 314 | /** |
| 315 | * Global counter for temporary predicates. |
| 316 | */ |
| 317 | static int tmpPredCounter = 0; |
| 318 | |
| 319 | /** |
| 320 | * Get the name of the next temporary predicate. |
| 321 | * @return |
| 322 | */ |
| 323 | public static String getNextTmpPredicateName(){ |
| 324 | return "tmp_predicate_" + (tmpPredCounter++); |
| 325 | } |
| 326 | |
| 327 | /** |
| 328 | * Whether this predicate is the view-based representation of |
| 329 | * a coref operator. |
| 330 | * @return |
| 331 | */ |
| 332 | public boolean isCorefMap(){ |
| 333 | return this.isCorefMapPredicate; |
| 334 | } |
| 335 | |
| 336 | /** |
| 337 | * If {@link FelixPredicate#isCorefMapPredicate} is true, to which this |
| 338 | * predicate serves. |
| 339 | * @return |
| 340 | */ |
| 341 | public FelixPredicate getOriCorefPredicate(){ |
| 342 | return this.oriCorefPredicate; |
| 343 | } |
| 344 | |
| 345 | /** |
| 346 | * Get clauses associated to the given property. |
| 347 | * @param prop |
| 348 | * @return |
| 349 | */ |
| 350 | public HashSet<FelixClause> getPropertyClauses(FPProperty prop){ |
| 351 | if(this.hasProperty(prop)){ |
| 352 | return new HashSet<FelixClause>(properities.get(prop)); |
| 353 | }else{ |
| 354 | return new HashSet<FelixClause>(); |
| 355 | } |
| 356 | } |
| 357 | |
| 358 | /** |
| 359 | * Whether this predicate has the given property. |
| 360 | * @param prop |
| 361 | * @return |
| 362 | */ |
| 363 | public boolean hasProperty(FPProperty prop){ |
| 364 | return properities.containsKey(prop); |
| 365 | } |
| 366 | |
| 367 | /** |
| 368 | * The constructor. |
| 369 | * @param aname |
| 370 | * @param aClosedWorld |
| 371 | */ |
| 372 | public FelixPredicate(String aname, boolean aClosedWorld) { |
| 373 | super(null, aname, aClosedWorld); |
| 374 | } |
| 375 | |
| 376 | /** |
| 377 | * Get {@link FelixPredicate#keyPositions}. |
| 378 | * @return |
| 379 | */ |
| 380 | public HashSet<Integer> getKeyPositions(){ |
| 381 | HashSet<Integer> ret = new HashSet<Integer>(); |
| 382 | for(int i=0;i<this.arity();i++){ |
| 383 | if(this.labelPositions.contains(i)){ |
| 384 | continue; |
| 385 | } |
| 386 | ret.add(i); |
| 387 | } |
| 388 | return ret; |
| 389 | } |
| 390 | |
| 391 | /** |
| 392 | * Flushes string-based evidence to database. |
| 393 | * @param rName |
| 394 | */ |
| 395 | public void flushStrEvidence(String rName) { |
| 396 | try { |
| 397 | |
| 398 | //create table |
| 399 | ArrayList<String> tableColumn = new ArrayList<String>(); |
| 400 | tableColumn.add("truth BOOL"); |
| 401 | tableColumn.add("prior FLOAT"); |
| 402 | for(String arg : this.getArgs()){ |
| 403 | tableColumn.add(arg + " TEXT"); |
| 404 | } |
| 405 | String sql = "CREATE TABLE " + rName + " ( " + StringMan.commaList(tableColumn) + ");"; |
| 406 | RDB db = RDB.getRDBbyConfig(FelixConfig.db_schema); |
| 407 | db.dropTable(rName); |
| 408 | db.execute(sql); |
| 409 | db.commit(); |
| 410 | |
| 411 | // flush the file |
| 412 | loadingFileWriter.close(); |
| 413 | loadingFileWriter = null; |
| 414 | // copy into DB |
| 415 | ArrayList<String> cols = new ArrayList<String>(); |
| 416 | cols.add("truth"); |
| 417 | cols.add("prior"); |
| 418 | cols.addAll(this.getArgs()); |
| 419 | FileInputStream in = new FileInputStream(loadingFile); |
| 420 | PGConnection con = (PGConnection) db.getConnection(); |
| 421 | sql = "COPY " + rName + |
| 422 | StringMan.commaListParen(cols) + " FROM STDIN"; |
| 423 | con.getCopyAPI().copyIn(sql, in); |
| 424 | in.close(); |
| 425 | db.commit(); |
| 426 | db.analyze(rName); |
| 427 | FileMan.removeFile(loadingFile.getAbsolutePath()); |
| 428 | |
| 429 | db.close(); |
| 430 | |
| 431 | } catch (Exception e) { |
| 432 | ExceptionMan.handle(e); |
| 433 | } |
| 434 | } |
| 435 | |
| 436 | /** |
| 437 | * Get all clauses related to this predicate. |
| 438 | * @return |
| 439 | */ |
| 440 | public HashSet<FelixClause> getRelevantClauses(){ |
| 441 | return registeredClauses; |
| 442 | } |
| 443 | |
| 444 | /** |
| 445 | * Add a property to this predicate, along with the clause with this property. |
| 446 | * @param prop |
| 447 | * @param evid |
| 448 | * @param _pos if this property is key constraint, this parameter is the position |
| 449 | * of keys. |
| 450 | */ |
| 451 | public void registerProperty(FPProperty prop, FelixClause evid, int... _pos){ |
| 452 | |
| 453 | FelixUIMan.println(2, 0, "\nRegister property {" + prop + "} to Predicate {" |
| 454 | + this.getName() + "(" + StringMan.join(",", this.getArgs()) + ")" |
| 455 | + "} with evidence {" + evid + "} {" |
| 456 | + FelixUIMan.joinArray(_pos) + "}"); |
| 457 | |
| 458 | if(prop == FPProperty.EMBED_WEIGHT_RULE){ |
| 459 | if(!this.properities.containsKey(FPProperty.EMBED_WEIGHT_RULE)){ |
| 460 | this.properities.put(FPProperty.EMBED_WEIGHT_RULE, new HashSet<FelixClause>()); |
| 461 | } |
| 462 | this.properities.get(FPProperty.EMBED_WEIGHT_RULE).add(evid); |
| 463 | return; |
| 464 | } |
| 465 | |
| 466 | if(prop == FPProperty.KEY_CONSTRAINT){ |
| 467 | |
| 468 | if(_pos.length == 0){ |
| 469 | ExceptionMan.die("Cannot register key constriant without " + |
| 470 | "any given positions"); |
| 471 | } |
| 472 | |
| 473 | if(this.labelPositions.size() != 0){ |
| 474 | UIMan.warn("Ignore second key constraints of predicate " |
| 475 | + this.getName()); |
| 476 | }else{ |
| 477 | for(int i=0;i<_pos.length;i++){ |
| 478 | this.labelPositions.add(_pos[i]); |
| 479 | } |
| 480 | } |
| 481 | } |
| 482 | |
| 483 | if(this.hasDependentAttributes() == false && prop != FPProperty.SYMM |
| 484 | && prop != FPProperty.REFLEX && evid == null){ |
| 485 | ExceptionMan.die("NULL clause assigned to non key_constraint predicate"); |
| 486 | } |
| 487 | |
| 488 | if(registeredClauses.contains(evid)){ |
| 489 | FelixUIMan.println(2, 0, "Ignore replicated clause registering for " |
| 490 | + "property {" + prop + "} to Predicate {" |
| 491 | + this.getName() + "(" + StringMan.join(",", this.getArgs()) + ")" |
| 492 | + "} with evidence {" + evid + "} {" |
| 493 | + FelixUIMan.joinArray(_pos) + "}"); |
| 494 | }else{ |
| 495 | if(evid != null){ |
| 496 | if(!properities.containsKey(prop)){ |
| 497 | properities.put(prop, new HashSet<FelixClause>()); |
| 498 | } |
| 499 | properities.get(prop).add(evid); |
| 500 | registeredClauses.add(evid); |
| 501 | |
| 502 | if(prop == FPProperty.CHAIN_RECUR){ |
| 503 | this.tryToExtractSeqFromChainRule(evid); |
| 504 | } |
| 505 | }else{ |
| 506 | if(!properities.containsKey(prop)){ |
| 507 | properities.put(prop, new HashSet<FelixClause>()); |
| 508 | } |
| 509 | } |
| 510 | } |
| 511 | } |
| 512 | |
| 513 | /** |
| 514 | * If the given clause is a CRF chain rule, try to extract the |
| 515 | * partitions of sequence. |
| 516 | * @param fc |
| 517 | */ |
| 518 | public void tryToExtractSeqFromChainRule(FelixClause fc){ |
| 519 | |
| 520 | ArrayList<String> ret = new ArrayList<String>(); |
| 521 | |
| 522 | Literal l1 = null; |
| 523 | Literal l2 = null; |
| 524 | |
| 525 | for(Literal l : fc.getRegLiterals()){ |
| 526 | if(l.getPred().getName().equals(this.getName())){ |
| 527 | if(l1 == null){ |
| 528 | l1 = l; |
| 529 | }else{ |
| 530 | l2 = l; |
| 531 | } |
| 532 | } |
| 533 | } |
| 534 | |
| 535 | for(int i=0;i<this.arity();i++){ |
| 536 | // seq partition must be on keys |
| 537 | if(this.labelPositions.contains(i)){ |
| 538 | continue; |
| 539 | } |
| 540 | |
| 541 | if(l1.getTerms().get(i).toString().equals( |
| 542 | l2.getTerms().get(i).toString())){ |
| 543 | ret.add(this.getArgs().get(i)); |
| 544 | } |
| 545 | } |
| 546 | |
| 547 | this.chainRulePartitions.put(fc, ret); |
| 548 | |
| 549 | } |
| 550 | |
| 551 | /** |
| 552 | * Returns string representation of this predicate. |
| 553 | */ |
| 554 | public String toString(){ |
| 555 | String ret = ""; |
| 556 | |
| 557 | ret = this.getName(); |
| 558 | ret += "("; |
| 559 | ret += StringMan.commaList(this.getArgs()); |
| 560 | ret += ")"; |
| 561 | |
| 562 | return ret; |
| 563 | } |
| 564 | |
| 565 | |
| 566 | } |