some query processing work
All checks were successful
studiorailgun/trpg/pipeline/head This commit looks good

This commit is contained in:
austin 2024-12-29 16:48:46 -05:00
parent 2fb22d70fe
commit d451906ea8
8 changed files with 290 additions and 10 deletions

View File

@ -4,3 +4,5 @@
2. make everything easily debug-able
3. work on a single sentence at a time
4. only work on important sentences (ie dont work on asking what color something ISNT)

View File

@ -0,0 +1,28 @@
package org.studiorailgun.conversation.evaluators.query;
/**
* Interrogatives available
*/
public class Interrogative {
/**
* Pronouns that are interrogative
*/
public static enum InterrogativePronoun {
WHAT,
WHICH,
WHO,
WHOSE,
}
/**
* Adverbs that are interrogative
*/
public static enum InterrogativeAdverb {
WHY,
WHERE,
HOW,
WHEN,
}
}

View File

@ -0,0 +1,80 @@
package org.studiorailgun.conversation.evaluators.query;
import java.util.List;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.semgraph.SemanticGraph;
/**
* A noun stack
*/
public class NounStack {
/**
* If not null, this is the interrogative for this part of the noun stack
*/
String interrogative;
/**
* If not null, this is the possessive pronoun for this part of the noun stack
*/
String possessive;
/**
* The indexed word for this part of the noun stack
*/
IndexedWord indexedWord = null;
/**
* Parses a noun stack from an indexed word
* @param graph The semantic graph
* @param indexedWord The word
* @return The NounStack
*/
public static NounStack parse(SemanticGraph graph, IndexedWord indexedWord){
NounStack rVal = NounStack.parseRecursive(graph, indexedWord);
return rVal;
}
/**
* Parses a noun stack recursively
* @param graph The semantic graph
* @param currentRoot The current root
* @return The noun stack
*/
private static NounStack parseRecursive(SemanticGraph graph, IndexedWord currentRoot){
NounStack rVal = new NounStack();
rVal.indexedWord = currentRoot;
List<IndexedWord> children = graph.getChildList(currentRoot);
if(children.size() > 0){
for(IndexedWord child : children){
switch(child.tag()){
case "WDT":
case "WP":
case "WP$":
case "WRB": {
//the child is an interrogative modifying the verb
rVal.interrogative = child.originalText();
} break;
case "PRP$": {
//the child is a possessive pronoun
rVal.possessive = child.originalText();
} break;
default: {
System.out.println(child);
System.out.println(child.tag());
String message = "Unsupported child type in noun stack processing!\n" +
child + "\n" +
child.tag() + "\n" +
graph;
throw new UnsupportedOperationException(message);
}
}
}
} else {
return rVal;
}
return rVal;
}
}

View File

@ -1,8 +1,16 @@
package org.studiorailgun.conversation.evaluators.query;
import java.util.Iterator;
import java.util.Set;
import org.studiorailgun.conversation.parser.NLPParser;
import org.studiorailgun.conversation.parser.PennTreebankTagSet;
import org.studiorailgun.conversation.tracking.Conversation;
import org.studiorailgun.conversation.tracking.Quote;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.semgraph.SemanticGraph;
/**
* Evaluates queries
*/
@ -14,7 +22,48 @@ public class QueryEval {
* @param quote The quote
*/
public static void evaluate(Conversation conversation, Quote quote){
System.out.println("Is a query!");
NLPParser.parse(quote);
SemanticGraph semanticGraph = quote.getGraph();
if(semanticGraph.getRoots().size() > 1){
String message = "Multiple roots to sentence!\n" +
"\"" + quote.getRaw() + "\"\n" +
semanticGraph;
throw new UnsupportedOperationException(message);
}
IndexedWord root = semanticGraph.getFirstRoot();
if(PennTreebankTagSet.isVerb(root.tag())){
if(PennTreebankTagSet.isBe(root.tag())){
QueryEval.evaluateBe(conversation, quote);
} else {
String message = "Unsupported root verb type!\n" +
"\"" + quote.getRaw() + "\"\n" +
semanticGraph;
throw new UnsupportedOperationException(message);
}
} else {
String message = "Unsupported root type!\n" +
"\"" + quote.getRaw() + "\"\n" +
semanticGraph;
throw new UnsupportedOperationException(message);
}
}
/**
* Evaluates an equivalence query
* @param conversation The conversation
* @param quote The quote
*/
private static void evaluateBe(Conversation conversation, Quote quote){
//get the two things we're comparing
SemanticGraph graph = quote.getGraph();
IndexedWord root = graph.getFirstRoot();
Set<IndexedWord> dependents = graph.descendants(root);
Set<IndexedWord> children = graph.getChildren(root);
Iterator<IndexedWord> iterator = children.iterator();
IndexedWord firstItem = iterator.next();
IndexedWord secondItem = iterator.next();
NounStack firstNoun = NounStack.parse(graph, firstItem);
NounStack secondNoun = NounStack.parse(graph, secondItem);
}
}

View File

@ -1,13 +1,11 @@
package org.studiorailgun.conversation.parser;
import edu.stanford.nlp.coref.data.CorefChain;
import edu.stanford.nlp.ling.*;
import edu.stanford.nlp.ie.util.*;
import edu.stanford.nlp.pipeline.*;
import edu.stanford.nlp.semgraph.*;
import edu.stanford.nlp.trees.*;
import java.util.*;
import org.studiorailgun.conversation.tracking.Quote;
/**
* Parses a sentence
*/
@ -40,11 +38,17 @@ public class NLPParser {
* Parses the input sentence
* @param input The input sentence
*/
public static void parse(String input){
// create a document object
CoreDocument document = new CoreDocument(text);
// annnotate the document
pipeline.annotate(document);
public static void parse(Quote quote){
if(quote.getParsedDocument() == null){
// annnotate the document and store
CoreDocument document = new CoreDocument(quote.getRaw());
pipeline.annotate(document);
quote.setParsedDocument(document);
//store the semantic graph
SemanticGraph graph = document.sentences().get(0).dependencyParse();
quote.setGraph(graph);
}
//TODO: grab information from document here
}

View File

@ -0,0 +1,62 @@
package org.studiorailgun.conversation.parser;
/**
* The Penn Treebank Tag Set
*/
public class PennTreebankTagSet {
/**
* Checks if this tag is a verb or not
* @param tag The tag
* @return true if it is a verb, false otherwise
*/
public static boolean isVerb(String tag){
switch(tag){
case "VB":
case "VBD":
case "VBG":
case "VBN":
case "VBP":
case "VBZ":
case "VH":
case "VHD":
case "VHG":
case "VHN":
case "VHP":
case "VHZ":
case "VV":
case "VVD":
case "VVG":
case "VVN":
case "VVP":
case "VVZ": {
return true;
}
default: {
return false;
}
}
}
/**
* Checks if this tag is a form of "to be" or not
* @param tag The tag
* @return true if it is a form of "to be", false otherwise
*/
public static boolean isBe(String tag){
switch(tag){
case "VB":
case "VBD":
case "VBG":
case "VBN":
case "VBP":
case "VBZ": {
return true;
}
default: {
return false;
}
}
}
}

View File

@ -2,6 +2,9 @@ package org.studiorailgun.conversation.tracking;
import org.studiorailgun.conversation.categorization.SentenceFunctionCategorizor.SentenceFunction;
import edu.stanford.nlp.pipeline.CoreDocument;
import edu.stanford.nlp.semgraph.SemanticGraph;
/**
* A quote stated during the conversation
*/
@ -17,6 +20,16 @@ public class Quote {
*/
SentenceFunction function;
/**
* The CoreNLP parsed document
*/
CoreDocument parsedDocument;
/**
* The parsed semantic graph
*/
SemanticGraph graph;
/**
* Constructor
* @param input The raw text of the quote
@ -45,4 +58,23 @@ public class Quote {
this.function = function;
}
public CoreDocument getParsedDocument() {
return parsedDocument;
}
public void setParsedDocument(CoreDocument parsedDocument) {
this.parsedDocument = parsedDocument;
}
public SemanticGraph getGraph() {
return graph;
}
public void setGraph(SemanticGraph graph) {
this.graph = graph;
}
}

View File

@ -0,0 +1,23 @@
package org.studiorailgun;
import static org.junit.jupiter.api.Assertions.*;
import org.junit.jupiter.api.Test;
import org.studiorailgun.conversation.ConvAI;
import org.studiorailgun.conversation.tracking.Quote;
/**
* Query tests
*/
public class QueryTests {
@Test
public void testOtherGreeting(){
Globals.init("./data/test/webs/web.json");
Quote response = ConvAI.simFrame("What color is your hat?");
// assertEquals(response.getRaw(), "Blue");
}
}