From 962586a5fd40d61302bb880e27b8e32413096593 Mon Sep 17 00:00:00 2001 From: austin Date: Mon, 30 Dec 2024 23:02:33 -0500 Subject: [PATCH] more predicate processing, lemma bank, fix tests --- data/dictionary/verbs.csv | 407 +++++++++++++++++- src/main/java/org/studiorailgun/Globals.java | 2 + .../evaluators/query/QueryEval.java | 24 +- .../evaluators/transfer/TransferEval.java | 1 - .../conversation/parser/bank/Lemma.java | 17 - .../conversation/parser/bank/LemmaBank.java | 45 ++ 6 files changed, 474 insertions(+), 22 deletions(-) delete mode 100644 src/main/java/org/studiorailgun/conversation/parser/bank/Lemma.java create mode 100644 src/main/java/org/studiorailgun/conversation/parser/bank/LemmaBank.java diff --git a/data/dictionary/verbs.csv b/data/dictionary/verbs.csv index 10accc8..ab0e003 100644 --- a/data/dictionary/verbs.csv +++ b/data/dictionary/verbs.csv @@ -6,4 +6,409 @@ are,be was,be were,be being,be -been,be \ No newline at end of file +been,be +have,have +has,have +had,have +having,have +do,do +does,do +did,do +done,do +doing,do +say,say +said,say +saying,say +says,say +go,go +going,go +goes,go +went,go +gone,go +get,get +gets,get +got,get +getting,get +gotten,get +make,make +makes,make +made,make +making,make +know,know +knows,know +knew,know +knowing,know +known,know +think,think +thinks,think +thought,think +thinking,think +take,take +takes,take +took,take +taking,take +taken,take +see,see +sees,see +saw,see +seeing,see +seen,see +come,come +comes,come +game,come +coming,come +want,want +wants,want +wanted,want +wanting,want +look,look +looks,look +looked,look +looking,look +use,use +uses,use +used,use +using,use +find,find +finds,find +found,find +finding,find +give,give +gives,give +gave,give +giving,give +given,give +tell,tell +tells,tell +told,tell +telling,tell +work,work +works,work +worked,work +working,work +call,call +calls,call +called,call +calling,call +try,try +tries,try +tried,try +trying,try +ask,ask +asks,ask +asked,ask +asking,ask +need,need +needs,need +needed,need +needing,need +feel,feel +feels,feel +felt,feel +feeling,feel +become,become +becomes,become +became,become +becoming,become +leave,leave +leaves,leave +left,leave +leaving,leave +put,put +puts,put +putting,put +mean,mean +means,mean +meant,mean +meaning,mean +keep,keep +keeps,keep +kept,keep +keeping,keep +let,let +lets,let +letting,let +begin,begin +begins,begin +began,begin +beginning,begin +begun,begin +seem,seem +seems,seem +seemed,seem +seeming,seem +help,help +helps,help +helped,help +helping,help +talk,talk +talks,talk +talked,talk +talking,talk +turn,turn +turns,turn +turned,turn +turning,turn +start,start +starts,start +started,start +starting,start +show,show +shows,show +showed,show +showing,show +shown,show +hear,hear +hears,hear +heard,hear +hearing,hear +play,play +plays,play +played,play +playing,play +run,run +runs,run +ran,run +running,run +move,move +moves,move +moved,move +moving,move +like,like +likes,like +liked,like +liking,like +live,live +lives,live +lived,live +living,live +believe,believe +believes,believe +believed,believe +believing,believe +hold,hold +holds,hold +held,hold +holding,hold +bring,bring +brings,bring +brought,bring +bringing,bring +happen,happen +happens,happen +happened,happen +happening,happen +write,write +writes,write +wrote,write +writing,write +written,write +provide,provide +provides,provide +provided,provide +providing,provide +sit,sit +sits,sit +sat,sit +sitting,sit +stand,stand +stands,stand +stood,stand +standing,stand +lose,lose +loses,lose +lost,lose +losing,lose +pay,pay +pays,pay +paid,pay +paying,pay +meet,meet +meets,meet +met,meet +meeting,meet +include,include +includes,include +included,include +including,include +continue,continue +continues,continue +continued,continue +continuing,continue +set,set +sets,set +setting,set +learn,learn +learns,learn +learned,learn +learnt,learn +learning,learn +change,change +changes,change +changed,change +changing,change +lead,lead +leads,lead +led,lead +leading,lead +understand,understand +understands,understand +understood,understand +understanding,understand +watch,watch +watches,watch +watched,watch +watching,watch +follow,follow +follows,follow +followed,follow +following,follow +stop,stop +stops,stop +stopped,stop +stopping,stop +create,create +creates,create +created,create +creating,create +speak,speak +speaks,speak +spoke,speak +speaking,speak +spoken,speak +read,read +reads,read +reading,read +allow,allow +allos,allow +allowed,allow +allowing,allow +add,add +adds,add +added,add +adding,add +spend,spend +spends,spend +spent,spend +spending,spend +grow,grow +grows,grow +grew,grow +growing,grow +grown,grow +open,open +opens,open +opened,open +opening,open +walk,walk +walks,walk +walked,walk +walking,walk +win,win +wins,win +won,win +winning,win +offer,offer +offers,offer +offered,offer +offering,offer +remember,remember +remembers,remember +remembered,remember +remembering,remember +love,love +loves,love +loved,love +loving,love +consider,consider +considers,consider +considered,consider +considering,consider +appear,appear +appears,appear +appeared,appear +appearing,appear +buy,buy +buys,buy +bought,buy +buying,buy +wait,wait +waits,wait +waited,wait +waiting,wait +serve,serve +serves,serve +served,serve +serving,serve +die,die +dies,die +died,die +dying,die +send,send +sends,send +sent,send +sending,send +expect,expect +expects,expect +expected,expect +expecting,expect +build,build +builds,build +built,build +building,build +stay,stay +stays,stay +stayed,stay +staying,stay +fall,fall +falls,fall +fell,fall +falling,fall +fallen,fall +cut,cut +cuts,cut +cutting,cut +reach,reach +reaches,reach +reached,reach +reaching,reach +kill,kill +kills,kill +killed,kill +killing,kill +remain,remain +reamins,remain +remained,remain +remaining,remain +suggest,suggest +suggests,suggest +suggested,suggest +suggesting,suggest +raise,raise +raises,raise +raised,raise +raising,raise +pass,pass +passes,pass +passed,pass +passing,pass +sell,sell +sells,sell +sold,sell +selling,sell +require,require +requires,require +required,require +requiring,require +report,report +reports,report +reported,report +reporting,report +decide,decide +decides,decide +decided,decide +deciding,decide +pull,pull +pulls,pull +pulled,pull +pulling,pull diff --git a/src/main/java/org/studiorailgun/Globals.java b/src/main/java/org/studiorailgun/Globals.java index d5f60f6..2216d38 100644 --- a/src/main/java/org/studiorailgun/Globals.java +++ b/src/main/java/org/studiorailgun/Globals.java @@ -3,6 +3,7 @@ package org.studiorailgun; import org.studiorailgun.conversation.categorization.SentenceFunctionCategorizor; import org.studiorailgun.conversation.evaluators.greet.GreetingEval; import org.studiorailgun.conversation.parser.NLPParser; +import org.studiorailgun.conversation.parser.bank.LemmaBank; import org.studiorailgun.conversation.tracking.Conversation; import org.studiorailgun.knowledge.CSVExport; import org.studiorailgun.knowledge.KnowledgeWeb; @@ -29,6 +30,7 @@ public class Globals { //initialize evaluators GreetingEval.init(); SentenceFunctionCategorizor.init(); + LemmaBank.init(); //init nlp parser NLPParser.init(); diff --git a/src/main/java/org/studiorailgun/conversation/evaluators/query/QueryEval.java b/src/main/java/org/studiorailgun/conversation/evaluators/query/QueryEval.java index c3a264c..a9013c9 100644 --- a/src/main/java/org/studiorailgun/conversation/evaluators/query/QueryEval.java +++ b/src/main/java/org/studiorailgun/conversation/evaluators/query/QueryEval.java @@ -3,6 +3,8 @@ package org.studiorailgun.conversation.evaluators.query; import java.util.Iterator; import java.util.Set; +import org.studiorailgun.conversation.parser.PennTreebankTagSet; +import org.studiorailgun.conversation.parser.bank.LemmaBank; import org.studiorailgun.conversation.parser.depend.Clause; import org.studiorailgun.conversation.parser.depend.Predicate; import org.studiorailgun.conversation.tracking.Conversation; @@ -27,9 +29,19 @@ public class QueryEval { Clause mainClause = sentence.getMainClause(); Predicate predicate = mainClause.getPredicate(); + String predicateTag = predicate.getRoot().tag(); if(predicate.getCopular() != null){ QueryEval.evaluateCopular(conversation,quote,sentence); + } else if(PennTreebankTagSet.isVerb(predicateTag)){ + if(PennTreebankTagSet.isBe(predicateTag)){ + QueryEval.evaluateBe(conversation, quote, sentence); + } else { + String message = "Unsupported predicate type!\n" + + "\"" + sentence.getRaw() + "\"\n" + + semanticGraph; + throw new UnsupportedOperationException(message); + } } else { String message = "Unsupported predicate type!\n" + "\"" + sentence.getRaw() + "\"\n" + @@ -45,12 +57,18 @@ public class QueryEval { * @param sentence The sentence */ private static void evaluateCopular(Conversation conversation, Quote quote, Sentence sentence){ - SemanticGraph semanticGraph = sentence.getGraph(); Clause mainClause = sentence.getMainClause(); Predicate predicate = mainClause.getPredicate(); IndexedWord copular = predicate.getCopular(); - //todo, different logic based on type of copular verb - QueryEval.evaluateBe(conversation, quote, sentence); + String lemma = LemmaBank.getLemma(copular.originalText()); + switch(lemma){ + case "be": { + QueryEval.evaluateBe(conversation, quote, sentence); + } break; + default: { + throw new Error("Unhandled copular verb type! lemma: " + lemma + " original: " + copular.originalText()); + } + } } /** diff --git a/src/main/java/org/studiorailgun/conversation/evaluators/transfer/TransferEval.java b/src/main/java/org/studiorailgun/conversation/evaluators/transfer/TransferEval.java index 11ac126..21b6512 100644 --- a/src/main/java/org/studiorailgun/conversation/evaluators/transfer/TransferEval.java +++ b/src/main/java/org/studiorailgun/conversation/evaluators/transfer/TransferEval.java @@ -7,7 +7,6 @@ import org.studiorailgun.conversation.parser.PennTreebankTagSet; import org.studiorailgun.conversation.tracking.Conversation; import org.studiorailgun.conversation.tracking.Quote; import org.studiorailgun.conversation.tracking.Sentence; -import org.studiorailgun.knowledge.KnowledgeWeb; import org.studiorailgun.knowledge.query.NodePropQuery; import edu.stanford.nlp.ling.IndexedWord; diff --git a/src/main/java/org/studiorailgun/conversation/parser/bank/Lemma.java b/src/main/java/org/studiorailgun/conversation/parser/bank/Lemma.java deleted file mode 100644 index fcd400c..0000000 --- a/src/main/java/org/studiorailgun/conversation/parser/bank/Lemma.java +++ /dev/null @@ -1,17 +0,0 @@ -package org.studiorailgun.conversation.parser.bank; - -/** - * A lemma lookup bank - */ -public class Lemma { - - /** - * Gets the lemma of a word - * @param word The word - * @return The lemma of the word - */ - public static String getLemma(String word){ - return "be"; - } - -} diff --git a/src/main/java/org/studiorailgun/conversation/parser/bank/LemmaBank.java b/src/main/java/org/studiorailgun/conversation/parser/bank/LemmaBank.java new file mode 100644 index 0000000..46b57f0 --- /dev/null +++ b/src/main/java/org/studiorailgun/conversation/parser/bank/LemmaBank.java @@ -0,0 +1,45 @@ +package org.studiorailgun.conversation.parser.bank; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.util.HashMap; +import java.util.Map; + +/** + * A lemma lookup bank + */ +public class LemmaBank { + + /** + * The map of word -> lemma + */ + static Map wordLemmaMap; + + /** + * Initializes the lemma bank + */ + public static void init(){ + wordLemmaMap = new HashMap(); + try { + String lemmaData = Files.readString(new File("./data/dictionary/verbs.csv").toPath()); + String[] lines = lemmaData.split("\n"); + for(int i = 1; i < lines.length; i++){ + String[] items = lines[i].split(","); + wordLemmaMap.put(items[0],items[1]); + } + } catch (IOException e) { + e.printStackTrace(); + } + } + + /** + * Gets the lemma of a word + * @param word The word + * @return The lemma of the word + */ + public static String getLemma(String word){ + return wordLemmaMap.get(word); + } + +}