diff --git a/docs/mantas.txt b/docs/mantas.txt index fbbf2e4..7872505 100644 --- a/docs/mantas.txt +++ b/docs/mantas.txt @@ -4,3 +4,5 @@ 2. make everything easily debug-able 3. work on a single sentence at a time + +4. only work on important sentences (ie dont work on asking what color something ISNT) diff --git a/src/main/java/org/studiorailgun/conversation/evaluators/query/Interrogative.java b/src/main/java/org/studiorailgun/conversation/evaluators/query/Interrogative.java new file mode 100644 index 0000000..57c6507 --- /dev/null +++ b/src/main/java/org/studiorailgun/conversation/evaluators/query/Interrogative.java @@ -0,0 +1,28 @@ +package org.studiorailgun.conversation.evaluators.query; + +/** + * Interrogatives available + */ +public class Interrogative { + + /** + * Pronouns that are interrogative + */ + public static enum InterrogativePronoun { + WHAT, + WHICH, + WHO, + WHOSE, + } + + /** + * Adverbs that are interrogative + */ + public static enum InterrogativeAdverb { + WHY, + WHERE, + HOW, + WHEN, + } + +} diff --git a/src/main/java/org/studiorailgun/conversation/evaluators/query/NounStack.java b/src/main/java/org/studiorailgun/conversation/evaluators/query/NounStack.java new file mode 100644 index 0000000..19eb8b2 --- /dev/null +++ b/src/main/java/org/studiorailgun/conversation/evaluators/query/NounStack.java @@ -0,0 +1,80 @@ +package org.studiorailgun.conversation.evaluators.query; + +import java.util.List; + +import edu.stanford.nlp.ling.IndexedWord; +import edu.stanford.nlp.semgraph.SemanticGraph; + +/** + * A noun stack + */ +public class NounStack { + + /** + * If not null, this is the interrogative for this part of the noun stack + */ + String interrogative; + + /** + * If not null, this is the possessive pronoun for this part of the noun stack + */ + String possessive; + + /** + * The indexed word for this part of the noun stack + */ + IndexedWord indexedWord = null; + + /** + * Parses a noun stack from an indexed word + * @param graph The semantic graph + * @param indexedWord The word + * @return The NounStack + */ + public static NounStack parse(SemanticGraph graph, IndexedWord indexedWord){ + NounStack rVal = NounStack.parseRecursive(graph, indexedWord); + return rVal; + } + + /** + * Parses a noun stack recursively + * @param graph The semantic graph + * @param currentRoot The current root + * @return The noun stack + */ + private static NounStack parseRecursive(SemanticGraph graph, IndexedWord currentRoot){ + NounStack rVal = new NounStack(); + rVal.indexedWord = currentRoot; + List children = graph.getChildList(currentRoot); + if(children.size() > 0){ + for(IndexedWord child : children){ + switch(child.tag()){ + case "WDT": + case "WP": + case "WP$": + case "WRB": { + //the child is an interrogative modifying the verb + rVal.interrogative = child.originalText(); + } break; + case "PRP$": { + //the child is a possessive pronoun + rVal.possessive = child.originalText(); + } break; + default: { + System.out.println(child); + System.out.println(child.tag()); + String message = "Unsupported child type in noun stack processing!\n" + + child + "\n" + + child.tag() + "\n" + + graph; + throw new UnsupportedOperationException(message); + } + } + } + } else { + return rVal; + } + return rVal; + } + +} diff --git a/src/main/java/org/studiorailgun/conversation/evaluators/query/QueryEval.java b/src/main/java/org/studiorailgun/conversation/evaluators/query/QueryEval.java index 3fc5d92..54b1eee 100644 --- a/src/main/java/org/studiorailgun/conversation/evaluators/query/QueryEval.java +++ b/src/main/java/org/studiorailgun/conversation/evaluators/query/QueryEval.java @@ -1,8 +1,16 @@ package org.studiorailgun.conversation.evaluators.query; +import java.util.Iterator; +import java.util.Set; + +import org.studiorailgun.conversation.parser.NLPParser; +import org.studiorailgun.conversation.parser.PennTreebankTagSet; import org.studiorailgun.conversation.tracking.Conversation; import org.studiorailgun.conversation.tracking.Quote; +import edu.stanford.nlp.ling.IndexedWord; +import edu.stanford.nlp.semgraph.SemanticGraph; + /** * Evaluates queries */ @@ -14,7 +22,48 @@ public class QueryEval { * @param quote The quote */ public static void evaluate(Conversation conversation, Quote quote){ - System.out.println("Is a query!"); + NLPParser.parse(quote); + SemanticGraph semanticGraph = quote.getGraph(); + if(semanticGraph.getRoots().size() > 1){ + String message = "Multiple roots to sentence!\n" + + "\"" + quote.getRaw() + "\"\n" + + semanticGraph; + throw new UnsupportedOperationException(message); + } + IndexedWord root = semanticGraph.getFirstRoot(); + if(PennTreebankTagSet.isVerb(root.tag())){ + if(PennTreebankTagSet.isBe(root.tag())){ + QueryEval.evaluateBe(conversation, quote); + } else { + String message = "Unsupported root verb type!\n" + + "\"" + quote.getRaw() + "\"\n" + + semanticGraph; + throw new UnsupportedOperationException(message); + } + } else { + String message = "Unsupported root type!\n" + + "\"" + quote.getRaw() + "\"\n" + + semanticGraph; + throw new UnsupportedOperationException(message); + } + } + + /** + * Evaluates an equivalence query + * @param conversation The conversation + * @param quote The quote + */ + private static void evaluateBe(Conversation conversation, Quote quote){ + //get the two things we're comparing + SemanticGraph graph = quote.getGraph(); + IndexedWord root = graph.getFirstRoot(); + Set dependents = graph.descendants(root); + Set children = graph.getChildren(root); + Iterator iterator = children.iterator(); + IndexedWord firstItem = iterator.next(); + IndexedWord secondItem = iterator.next(); + NounStack firstNoun = NounStack.parse(graph, firstItem); + NounStack secondNoun = NounStack.parse(graph, secondItem); } } diff --git a/src/main/java/org/studiorailgun/conversation/parser/NLPParser.java b/src/main/java/org/studiorailgun/conversation/parser/NLPParser.java index 23e20a4..13fa021 100644 --- a/src/main/java/org/studiorailgun/conversation/parser/NLPParser.java +++ b/src/main/java/org/studiorailgun/conversation/parser/NLPParser.java @@ -1,13 +1,11 @@ package org.studiorailgun.conversation.parser; -import edu.stanford.nlp.coref.data.CorefChain; -import edu.stanford.nlp.ling.*; -import edu.stanford.nlp.ie.util.*; import edu.stanford.nlp.pipeline.*; import edu.stanford.nlp.semgraph.*; -import edu.stanford.nlp.trees.*; import java.util.*; +import org.studiorailgun.conversation.tracking.Quote; + /** * Parses a sentence */ @@ -40,11 +38,17 @@ public class NLPParser { * Parses the input sentence * @param input The input sentence */ - public static void parse(String input){ - // create a document object - CoreDocument document = new CoreDocument(text); - // annnotate the document - pipeline.annotate(document); + public static void parse(Quote quote){ + if(quote.getParsedDocument() == null){ + // annnotate the document and store + CoreDocument document = new CoreDocument(quote.getRaw()); + pipeline.annotate(document); + quote.setParsedDocument(document); + + //store the semantic graph + SemanticGraph graph = document.sentences().get(0).dependencyParse(); + quote.setGraph(graph); + } //TODO: grab information from document here } diff --git a/src/main/java/org/studiorailgun/conversation/parser/PennTreebankTagSet.java b/src/main/java/org/studiorailgun/conversation/parser/PennTreebankTagSet.java new file mode 100644 index 0000000..9c47208 --- /dev/null +++ b/src/main/java/org/studiorailgun/conversation/parser/PennTreebankTagSet.java @@ -0,0 +1,62 @@ +package org.studiorailgun.conversation.parser; + +/** + * The Penn Treebank Tag Set + */ +public class PennTreebankTagSet { + + /** + * Checks if this tag is a verb or not + * @param tag The tag + * @return true if it is a verb, false otherwise + */ + public static boolean isVerb(String tag){ + switch(tag){ + case "VB": + case "VBD": + case "VBG": + case "VBN": + case "VBP": + case "VBZ": + case "VH": + case "VHD": + case "VHG": + case "VHN": + case "VHP": + case "VHZ": + case "VV": + case "VVD": + case "VVG": + case "VVN": + case "VVP": + case "VVZ": { + return true; + } + default: { + return false; + } + } + } + + /** + * Checks if this tag is a form of "to be" or not + * @param tag The tag + * @return true if it is a form of "to be", false otherwise + */ + public static boolean isBe(String tag){ + switch(tag){ + case "VB": + case "VBD": + case "VBG": + case "VBN": + case "VBP": + case "VBZ": { + return true; + } + default: { + return false; + } + } + } + +} diff --git a/src/main/java/org/studiorailgun/conversation/tracking/Quote.java b/src/main/java/org/studiorailgun/conversation/tracking/Quote.java index fbb8037..1ba7f00 100644 --- a/src/main/java/org/studiorailgun/conversation/tracking/Quote.java +++ b/src/main/java/org/studiorailgun/conversation/tracking/Quote.java @@ -2,6 +2,9 @@ package org.studiorailgun.conversation.tracking; import org.studiorailgun.conversation.categorization.SentenceFunctionCategorizor.SentenceFunction; +import edu.stanford.nlp.pipeline.CoreDocument; +import edu.stanford.nlp.semgraph.SemanticGraph; + /** * A quote stated during the conversation */ @@ -17,6 +20,16 @@ public class Quote { */ SentenceFunction function; + /** + * The CoreNLP parsed document + */ + CoreDocument parsedDocument; + + /** + * The parsed semantic graph + */ + SemanticGraph graph; + /** * Constructor * @param input The raw text of the quote @@ -45,4 +58,23 @@ public class Quote { this.function = function; } + public CoreDocument getParsedDocument() { + return parsedDocument; + } + + public void setParsedDocument(CoreDocument parsedDocument) { + this.parsedDocument = parsedDocument; + } + + public SemanticGraph getGraph() { + return graph; + } + + public void setGraph(SemanticGraph graph) { + this.graph = graph; + } + + + + } diff --git a/src/test/java/org/studiorailgun/QueryTests.java b/src/test/java/org/studiorailgun/QueryTests.java new file mode 100644 index 0000000..73c6e74 --- /dev/null +++ b/src/test/java/org/studiorailgun/QueryTests.java @@ -0,0 +1,23 @@ +package org.studiorailgun; + +import static org.junit.jupiter.api.Assertions.*; + +import org.junit.jupiter.api.Test; +import org.studiorailgun.conversation.ConvAI; +import org.studiorailgun.conversation.tracking.Quote; + +/** + * Query tests + */ +public class QueryTests { + + @Test + public void testOtherGreeting(){ + Globals.init("./data/test/webs/web.json"); + + Quote response = ConvAI.simFrame("What color is your hat?"); + + // assertEquals(response.getRaw(), "Blue"); + } + +}