more predicate processing, lemma bank, fix tests
All checks were successful
studiorailgun/trpg/pipeline/head This commit looks good

This commit is contained in:
austin 2024-12-30 23:02:33 -05:00
parent 9ff39934c5
commit 962586a5fd
6 changed files with 474 additions and 22 deletions

View File

@ -6,4 +6,409 @@ are,be
was,be was,be
were,be were,be
being,be being,be
been,be been,be
have,have
has,have
had,have
having,have
do,do
does,do
did,do
done,do
doing,do
say,say
said,say
saying,say
says,say
go,go
going,go
goes,go
went,go
gone,go
get,get
gets,get
got,get
getting,get
gotten,get
make,make
makes,make
made,make
making,make
know,know
knows,know
knew,know
knowing,know
known,know
think,think
thinks,think
thought,think
thinking,think
take,take
takes,take
took,take
taking,take
taken,take
see,see
sees,see
saw,see
seeing,see
seen,see
come,come
comes,come
game,come
coming,come
want,want
wants,want
wanted,want
wanting,want
look,look
looks,look
looked,look
looking,look
use,use
uses,use
used,use
using,use
find,find
finds,find
found,find
finding,find
give,give
gives,give
gave,give
giving,give
given,give
tell,tell
tells,tell
told,tell
telling,tell
work,work
works,work
worked,work
working,work
call,call
calls,call
called,call
calling,call
try,try
tries,try
tried,try
trying,try
ask,ask
asks,ask
asked,ask
asking,ask
need,need
needs,need
needed,need
needing,need
feel,feel
feels,feel
felt,feel
feeling,feel
become,become
becomes,become
became,become
becoming,become
leave,leave
leaves,leave
left,leave
leaving,leave
put,put
puts,put
putting,put
mean,mean
means,mean
meant,mean
meaning,mean
keep,keep
keeps,keep
kept,keep
keeping,keep
let,let
lets,let
letting,let
begin,begin
begins,begin
began,begin
beginning,begin
begun,begin
seem,seem
seems,seem
seemed,seem
seeming,seem
help,help
helps,help
helped,help
helping,help
talk,talk
talks,talk
talked,talk
talking,talk
turn,turn
turns,turn
turned,turn
turning,turn
start,start
starts,start
started,start
starting,start
show,show
shows,show
showed,show
showing,show
shown,show
hear,hear
hears,hear
heard,hear
hearing,hear
play,play
plays,play
played,play
playing,play
run,run
runs,run
ran,run
running,run
move,move
moves,move
moved,move
moving,move
like,like
likes,like
liked,like
liking,like
live,live
lives,live
lived,live
living,live
believe,believe
believes,believe
believed,believe
believing,believe
hold,hold
holds,hold
held,hold
holding,hold
bring,bring
brings,bring
brought,bring
bringing,bring
happen,happen
happens,happen
happened,happen
happening,happen
write,write
writes,write
wrote,write
writing,write
written,write
provide,provide
provides,provide
provided,provide
providing,provide
sit,sit
sits,sit
sat,sit
sitting,sit
stand,stand
stands,stand
stood,stand
standing,stand
lose,lose
loses,lose
lost,lose
losing,lose
pay,pay
pays,pay
paid,pay
paying,pay
meet,meet
meets,meet
met,meet
meeting,meet
include,include
includes,include
included,include
including,include
continue,continue
continues,continue
continued,continue
continuing,continue
set,set
sets,set
setting,set
learn,learn
learns,learn
learned,learn
learnt,learn
learning,learn
change,change
changes,change
changed,change
changing,change
lead,lead
leads,lead
led,lead
leading,lead
understand,understand
understands,understand
understood,understand
understanding,understand
watch,watch
watches,watch
watched,watch
watching,watch
follow,follow
follows,follow
followed,follow
following,follow
stop,stop
stops,stop
stopped,stop
stopping,stop
create,create
creates,create
created,create
creating,create
speak,speak
speaks,speak
spoke,speak
speaking,speak
spoken,speak
read,read
reads,read
reading,read
allow,allow
allos,allow
allowed,allow
allowing,allow
add,add
adds,add
added,add
adding,add
spend,spend
spends,spend
spent,spend
spending,spend
grow,grow
grows,grow
grew,grow
growing,grow
grown,grow
open,open
opens,open
opened,open
opening,open
walk,walk
walks,walk
walked,walk
walking,walk
win,win
wins,win
won,win
winning,win
offer,offer
offers,offer
offered,offer
offering,offer
remember,remember
remembers,remember
remembered,remember
remembering,remember
love,love
loves,love
loved,love
loving,love
consider,consider
considers,consider
considered,consider
considering,consider
appear,appear
appears,appear
appeared,appear
appearing,appear
buy,buy
buys,buy
bought,buy
buying,buy
wait,wait
waits,wait
waited,wait
waiting,wait
serve,serve
serves,serve
served,serve
serving,serve
die,die
dies,die
died,die
dying,die
send,send
sends,send
sent,send
sending,send
expect,expect
expects,expect
expected,expect
expecting,expect
build,build
builds,build
built,build
building,build
stay,stay
stays,stay
stayed,stay
staying,stay
fall,fall
falls,fall
fell,fall
falling,fall
fallen,fall
cut,cut
cuts,cut
cutting,cut
reach,reach
reaches,reach
reached,reach
reaching,reach
kill,kill
kills,kill
killed,kill
killing,kill
remain,remain
reamins,remain
remained,remain
remaining,remain
suggest,suggest
suggests,suggest
suggested,suggest
suggesting,suggest
raise,raise
raises,raise
raised,raise
raising,raise
pass,pass
passes,pass
passed,pass
passing,pass
sell,sell
sells,sell
sold,sell
selling,sell
require,require
requires,require
required,require
requiring,require
report,report
reports,report
reported,report
reporting,report
decide,decide
decides,decide
decided,decide
deciding,decide
pull,pull
pulls,pull
pulled,pull
pulling,pull

1 word lemma
6 was be
7 were be
8 being be
9 been be
10 have have
11 has have
12 had have
13 having have
14 do do
15 does do
16 did do
17 done do
18 doing do
19 say say
20 said say
21 saying say
22 says say
23 go go
24 going go
25 goes go
26 went go
27 gone go
28 get get
29 gets get
30 got get
31 getting get
32 gotten get
33 make make
34 makes make
35 made make
36 making make
37 know know
38 knows know
39 knew know
40 knowing know
41 known know
42 think think
43 thinks think
44 thought think
45 thinking think
46 take take
47 takes take
48 took take
49 taking take
50 taken take
51 see see
52 sees see
53 saw see
54 seeing see
55 seen see
56 come come
57 comes come
58 game come
59 coming come
60 want want
61 wants want
62 wanted want
63 wanting want
64 look look
65 looks look
66 looked look
67 looking look
68 use use
69 uses use
70 used use
71 using use
72 find find
73 finds find
74 found find
75 finding find
76 give give
77 gives give
78 gave give
79 giving give
80 given give
81 tell tell
82 tells tell
83 told tell
84 telling tell
85 work work
86 works work
87 worked work
88 working work
89 call call
90 calls call
91 called call
92 calling call
93 try try
94 tries try
95 tried try
96 trying try
97 ask ask
98 asks ask
99 asked ask
100 asking ask
101 need need
102 needs need
103 needed need
104 needing need
105 feel feel
106 feels feel
107 felt feel
108 feeling feel
109 become become
110 becomes become
111 became become
112 becoming become
113 leave leave
114 leaves leave
115 left leave
116 leaving leave
117 put put
118 puts put
119 putting put
120 mean mean
121 means mean
122 meant mean
123 meaning mean
124 keep keep
125 keeps keep
126 kept keep
127 keeping keep
128 let let
129 lets let
130 letting let
131 begin begin
132 begins begin
133 began begin
134 beginning begin
135 begun begin
136 seem seem
137 seems seem
138 seemed seem
139 seeming seem
140 help help
141 helps help
142 helped help
143 helping help
144 talk talk
145 talks talk
146 talked talk
147 talking talk
148 turn turn
149 turns turn
150 turned turn
151 turning turn
152 start start
153 starts start
154 started start
155 starting start
156 show show
157 shows show
158 showed show
159 showing show
160 shown show
161 hear hear
162 hears hear
163 heard hear
164 hearing hear
165 play play
166 plays play
167 played play
168 playing play
169 run run
170 runs run
171 ran run
172 running run
173 move move
174 moves move
175 moved move
176 moving move
177 like like
178 likes like
179 liked like
180 liking like
181 live live
182 lives live
183 lived live
184 living live
185 believe believe
186 believes believe
187 believed believe
188 believing believe
189 hold hold
190 holds hold
191 held hold
192 holding hold
193 bring bring
194 brings bring
195 brought bring
196 bringing bring
197 happen happen
198 happens happen
199 happened happen
200 happening happen
201 write write
202 writes write
203 wrote write
204 writing write
205 written write
206 provide provide
207 provides provide
208 provided provide
209 providing provide
210 sit sit
211 sits sit
212 sat sit
213 sitting sit
214 stand stand
215 stands stand
216 stood stand
217 standing stand
218 lose lose
219 loses lose
220 lost lose
221 losing lose
222 pay pay
223 pays pay
224 paid pay
225 paying pay
226 meet meet
227 meets meet
228 met meet
229 meeting meet
230 include include
231 includes include
232 included include
233 including include
234 continue continue
235 continues continue
236 continued continue
237 continuing continue
238 set set
239 sets set
240 setting set
241 learn learn
242 learns learn
243 learned learn
244 learnt learn
245 learning learn
246 change change
247 changes change
248 changed change
249 changing change
250 lead lead
251 leads lead
252 led lead
253 leading lead
254 understand understand
255 understands understand
256 understood understand
257 understanding understand
258 watch watch
259 watches watch
260 watched watch
261 watching watch
262 follow follow
263 follows follow
264 followed follow
265 following follow
266 stop stop
267 stops stop
268 stopped stop
269 stopping stop
270 create create
271 creates create
272 created create
273 creating create
274 speak speak
275 speaks speak
276 spoke speak
277 speaking speak
278 spoken speak
279 read read
280 reads read
281 reading read
282 allow allow
283 allos allow
284 allowed allow
285 allowing allow
286 add add
287 adds add
288 added add
289 adding add
290 spend spend
291 spends spend
292 spent spend
293 spending spend
294 grow grow
295 grows grow
296 grew grow
297 growing grow
298 grown grow
299 open open
300 opens open
301 opened open
302 opening open
303 walk walk
304 walks walk
305 walked walk
306 walking walk
307 win win
308 wins win
309 won win
310 winning win
311 offer offer
312 offers offer
313 offered offer
314 offering offer
315 remember remember
316 remembers remember
317 remembered remember
318 remembering remember
319 love love
320 loves love
321 loved love
322 loving love
323 consider consider
324 considers consider
325 considered consider
326 considering consider
327 appear appear
328 appears appear
329 appeared appear
330 appearing appear
331 buy buy
332 buys buy
333 bought buy
334 buying buy
335 wait wait
336 waits wait
337 waited wait
338 waiting wait
339 serve serve
340 serves serve
341 served serve
342 serving serve
343 die die
344 dies die
345 died die
346 dying die
347 send send
348 sends send
349 sent send
350 sending send
351 expect expect
352 expects expect
353 expected expect
354 expecting expect
355 build build
356 builds build
357 built build
358 building build
359 stay stay
360 stays stay
361 stayed stay
362 staying stay
363 fall fall
364 falls fall
365 fell fall
366 falling fall
367 fallen fall
368 cut cut
369 cuts cut
370 cutting cut
371 reach reach
372 reaches reach
373 reached reach
374 reaching reach
375 kill kill
376 kills kill
377 killed kill
378 killing kill
379 remain remain
380 reamins remain
381 remained remain
382 remaining remain
383 suggest suggest
384 suggests suggest
385 suggested suggest
386 suggesting suggest
387 raise raise
388 raises raise
389 raised raise
390 raising raise
391 pass pass
392 passes pass
393 passed pass
394 passing pass
395 sell sell
396 sells sell
397 sold sell
398 selling sell
399 require require
400 requires require
401 required require
402 requiring require
403 report report
404 reports report
405 reported report
406 reporting report
407 decide decide
408 decides decide
409 decided decide
410 deciding decide
411 pull pull
412 pulls pull
413 pulled pull
414 pulling pull

View File

@ -3,6 +3,7 @@ package org.studiorailgun;
import org.studiorailgun.conversation.categorization.SentenceFunctionCategorizor; import org.studiorailgun.conversation.categorization.SentenceFunctionCategorizor;
import org.studiorailgun.conversation.evaluators.greet.GreetingEval; import org.studiorailgun.conversation.evaluators.greet.GreetingEval;
import org.studiorailgun.conversation.parser.NLPParser; import org.studiorailgun.conversation.parser.NLPParser;
import org.studiorailgun.conversation.parser.bank.LemmaBank;
import org.studiorailgun.conversation.tracking.Conversation; import org.studiorailgun.conversation.tracking.Conversation;
import org.studiorailgun.knowledge.CSVExport; import org.studiorailgun.knowledge.CSVExport;
import org.studiorailgun.knowledge.KnowledgeWeb; import org.studiorailgun.knowledge.KnowledgeWeb;
@ -29,6 +30,7 @@ public class Globals {
//initialize evaluators //initialize evaluators
GreetingEval.init(); GreetingEval.init();
SentenceFunctionCategorizor.init(); SentenceFunctionCategorizor.init();
LemmaBank.init();
//init nlp parser //init nlp parser
NLPParser.init(); NLPParser.init();

View File

@ -3,6 +3,8 @@ package org.studiorailgun.conversation.evaluators.query;
import java.util.Iterator; import java.util.Iterator;
import java.util.Set; import java.util.Set;
import org.studiorailgun.conversation.parser.PennTreebankTagSet;
import org.studiorailgun.conversation.parser.bank.LemmaBank;
import org.studiorailgun.conversation.parser.depend.Clause; import org.studiorailgun.conversation.parser.depend.Clause;
import org.studiorailgun.conversation.parser.depend.Predicate; import org.studiorailgun.conversation.parser.depend.Predicate;
import org.studiorailgun.conversation.tracking.Conversation; import org.studiorailgun.conversation.tracking.Conversation;
@ -27,9 +29,19 @@ public class QueryEval {
Clause mainClause = sentence.getMainClause(); Clause mainClause = sentence.getMainClause();
Predicate predicate = mainClause.getPredicate(); Predicate predicate = mainClause.getPredicate();
String predicateTag = predicate.getRoot().tag();
if(predicate.getCopular() != null){ if(predicate.getCopular() != null){
QueryEval.evaluateCopular(conversation,quote,sentence); QueryEval.evaluateCopular(conversation,quote,sentence);
} else if(PennTreebankTagSet.isVerb(predicateTag)){
if(PennTreebankTagSet.isBe(predicateTag)){
QueryEval.evaluateBe(conversation, quote, sentence);
} else {
String message = "Unsupported predicate type!\n" +
"\"" + sentence.getRaw() + "\"\n" +
semanticGraph;
throw new UnsupportedOperationException(message);
}
} else { } else {
String message = "Unsupported predicate type!\n" + String message = "Unsupported predicate type!\n" +
"\"" + sentence.getRaw() + "\"\n" + "\"" + sentence.getRaw() + "\"\n" +
@ -45,12 +57,18 @@ public class QueryEval {
* @param sentence The sentence * @param sentence The sentence
*/ */
private static void evaluateCopular(Conversation conversation, Quote quote, Sentence sentence){ private static void evaluateCopular(Conversation conversation, Quote quote, Sentence sentence){
SemanticGraph semanticGraph = sentence.getGraph();
Clause mainClause = sentence.getMainClause(); Clause mainClause = sentence.getMainClause();
Predicate predicate = mainClause.getPredicate(); Predicate predicate = mainClause.getPredicate();
IndexedWord copular = predicate.getCopular(); IndexedWord copular = predicate.getCopular();
//todo, different logic based on type of copular verb String lemma = LemmaBank.getLemma(copular.originalText());
QueryEval.evaluateBe(conversation, quote, sentence); switch(lemma){
case "be": {
QueryEval.evaluateBe(conversation, quote, sentence);
} break;
default: {
throw new Error("Unhandled copular verb type! lemma: " + lemma + " original: " + copular.originalText());
}
}
} }
/** /**

View File

@ -7,7 +7,6 @@ import org.studiorailgun.conversation.parser.PennTreebankTagSet;
import org.studiorailgun.conversation.tracking.Conversation; import org.studiorailgun.conversation.tracking.Conversation;
import org.studiorailgun.conversation.tracking.Quote; import org.studiorailgun.conversation.tracking.Quote;
import org.studiorailgun.conversation.tracking.Sentence; import org.studiorailgun.conversation.tracking.Sentence;
import org.studiorailgun.knowledge.KnowledgeWeb;
import org.studiorailgun.knowledge.query.NodePropQuery; import org.studiorailgun.knowledge.query.NodePropQuery;
import edu.stanford.nlp.ling.IndexedWord; import edu.stanford.nlp.ling.IndexedWord;

View File

@ -1,17 +0,0 @@
package org.studiorailgun.conversation.parser.bank;
/**
* A lemma lookup bank
*/
public class Lemma {
/**
* Gets the lemma of a word
* @param word The word
* @return The lemma of the word
*/
public static String getLemma(String word){
return "be";
}
}

View File

@ -0,0 +1,45 @@
package org.studiorailgun.conversation.parser.bank;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.util.HashMap;
import java.util.Map;
/**
* A lemma lookup bank
*/
public class LemmaBank {
/**
* The map of word -> lemma
*/
static Map<String,String> wordLemmaMap;
/**
* Initializes the lemma bank
*/
public static void init(){
wordLemmaMap = new HashMap<String,String>();
try {
String lemmaData = Files.readString(new File("./data/dictionary/verbs.csv").toPath());
String[] lines = lemmaData.split("\n");
for(int i = 1; i < lines.length; i++){
String[] items = lines[i].split(",");
wordLemmaMap.put(items[0],items[1]);
}
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* Gets the lemma of a word
* @param word The word
* @return The lemma of the word
*/
public static String getLemma(String word){
return wordLemmaMap.get(word);
}
}