some query processing work

2024-12-29 16:48:46 -05:00 · 2024-12-29 16:48:46 -05:00 · d451906ea8
commit d451906ea8
parent 2fb22d70fe
8 changed files with 290 additions and 10 deletions
--- a/docs/mantas.txt
+++ b/docs/mantas.txt
@ -4,3 +4,5 @@
 2. make everything easily debug-able

 3. work on a single sentence at a time
+
+4. only work on important sentences (ie dont work on asking what color something ISNT)
--- a/src/main/java/org/studiorailgun/conversation/evaluators/query/Interrogative.java
+++ b/src/main/java/org/studiorailgun/conversation/evaluators/query/Interrogative.java
@ -0,0 +1,28 @@
+package org.studiorailgun.conversation.evaluators.query;
+
+/**
+ * Interrogatives available
+ */
+public class Interrogative {
+    
+    /**
+     * Pronouns that are interrogative
+     */
+    public static enum InterrogativePronoun {
+        WHAT,
+        WHICH,
+        WHO,
+        WHOSE,
+    }
+
+    /**
+     * Adverbs that are interrogative
+     */
+    public static enum InterrogativeAdverb {
+        WHY,
+        WHERE,
+        HOW,
+        WHEN,
+    }
+
+}
--- a/src/main/java/org/studiorailgun/conversation/evaluators/query/NounStack.java
+++ b/src/main/java/org/studiorailgun/conversation/evaluators/query/NounStack.java
@ -0,0 +1,80 @@
+package org.studiorailgun.conversation.evaluators.query;
+
+import java.util.List;
+
+import edu.stanford.nlp.ling.IndexedWord;
+import edu.stanford.nlp.semgraph.SemanticGraph;
+
+/**
+ * A noun stack
+ */
+public class NounStack {
+
+    /**
+     * If not null, this is the interrogative for this part of the noun stack
+     */
+    String interrogative;
+
+    /**
+     * If not null, this is the possessive pronoun for this part of the noun stack
+     */
+    String possessive;
+
+    /**
+     * The indexed word for this part of the noun stack
+     */
+    IndexedWord indexedWord = null;
+    
+    /**
+     * Parses a noun stack from an indexed word
+     * @param graph The semantic graph
+     * @param indexedWord The word
+     * @return The NounStack
+     */
+    public static NounStack parse(SemanticGraph graph, IndexedWord indexedWord){
+        NounStack rVal = NounStack.parseRecursive(graph, indexedWord);
+        return rVal;
+    }
+
+    /**
+     * Parses a noun stack recursively
+     * @param graph The semantic graph
+     * @param currentRoot The current root
+     * @return The noun stack
+     */
+    private static NounStack parseRecursive(SemanticGraph graph, IndexedWord currentRoot){
+        NounStack rVal = new NounStack();
+        rVal.indexedWord = currentRoot;
+        List<IndexedWord> children = graph.getChildList(currentRoot);
+        if(children.size() > 0){
+            for(IndexedWord child : children){
+                switch(child.tag()){
+                    case "WDT":
+                    case "WP":
+                    case "WP$":
+                    case "WRB": {
+                        //the child is an interrogative modifying the verb
+                        rVal.interrogative = child.originalText();
+                    } break;
+                    case "PRP$": {
+                        //the child is a possessive pronoun
+                        rVal.possessive = child.originalText();
+                    } break;
+                    default: {
+                        System.out.println(child);
+                        System.out.println(child.tag());
+                        String message = "Unsupported child type in noun stack processing!\n" + 
+                        child + "\n" +
+                        child.tag() + "\n" +
+                        graph;
+                        throw new UnsupportedOperationException(message);
+                    }
+                }
+            }
+        } else {
+            return rVal;
+        }
+        return rVal;
+    }
+
+}
--- a/src/main/java/org/studiorailgun/conversation/evaluators/query/QueryEval.java
+++ b/src/main/java/org/studiorailgun/conversation/evaluators/query/QueryEval.java
@ -1,8 +1,16 @@
 package org.studiorailgun.conversation.evaluators.query;

+import java.util.Iterator;
+import java.util.Set;
+
+import org.studiorailgun.conversation.parser.NLPParser;
+import org.studiorailgun.conversation.parser.PennTreebankTagSet;
 import org.studiorailgun.conversation.tracking.Conversation;
 import org.studiorailgun.conversation.tracking.Quote;

+import edu.stanford.nlp.ling.IndexedWord;
+import edu.stanford.nlp.semgraph.SemanticGraph;
+
 /**
 * Evaluates queries
 */
@ -14,7 +22,48 @@ public class QueryEval {
     * @param quote The quote
     */
    public static void evaluate(Conversation conversation, Quote quote){
-        System.out.println("Is a query!");
+        NLPParser.parse(quote);
+        SemanticGraph semanticGraph = quote.getGraph();
+        if(semanticGraph.getRoots().size() > 1){
+            String message = "Multiple roots to sentence!\n" +
+            "\"" + quote.getRaw() + "\"\n" +
+            semanticGraph;
+            throw new UnsupportedOperationException(message);
+        }
+        IndexedWord root = semanticGraph.getFirstRoot();
+        if(PennTreebankTagSet.isVerb(root.tag())){
+            if(PennTreebankTagSet.isBe(root.tag())){
+                QueryEval.evaluateBe(conversation, quote);
+            } else {
+                String message = "Unsupported root verb type!\n" +
+                "\"" + quote.getRaw() + "\"\n" +
+                semanticGraph;
+                throw new UnsupportedOperationException(message);
+            }
+        } else {
+            String message = "Unsupported root type!\n" +
+                "\"" + quote.getRaw() + "\"\n" +
+                semanticGraph;
+            throw new UnsupportedOperationException(message);
+        }
+    }
+
+    /**
+     * Evaluates an equivalence query
+     * @param conversation The conversation
+     * @param quote The quote
+     */
+    private static void evaluateBe(Conversation conversation, Quote quote){
+        //get the two things we're comparing
+        SemanticGraph graph = quote.getGraph();
+        IndexedWord root = graph.getFirstRoot();
+        Set<IndexedWord> dependents = graph.descendants(root);
+        Set<IndexedWord> children = graph.getChildren(root);
+        Iterator<IndexedWord> iterator = children.iterator();
+        IndexedWord firstItem = iterator.next();
+        IndexedWord secondItem = iterator.next();
+        NounStack firstNoun = NounStack.parse(graph, firstItem);
+        NounStack secondNoun = NounStack.parse(graph, secondItem);
    }

 }
--- a/src/main/java/org/studiorailgun/conversation/parser/NLPParser.java
+++ b/src/main/java/org/studiorailgun/conversation/parser/NLPParser.java
@ -1,13 +1,11 @@
 package org.studiorailgun.conversation.parser;

-import edu.stanford.nlp.coref.data.CorefChain;
-import edu.stanford.nlp.ling.*;
-import edu.stanford.nlp.ie.util.*;
 import edu.stanford.nlp.pipeline.*;
 import edu.stanford.nlp.semgraph.*;
-import edu.stanford.nlp.trees.*;
 import java.util.*;

+import org.studiorailgun.conversation.tracking.Quote;
+
 /**
 * Parses a sentence
 */
@ -40,11 +38,17 @@ public class NLPParser {
     * Parses the input sentence
     * @param input The input sentence
     */
-    public static void parse(String input){
-        // create a document object
-        CoreDocument document = new CoreDocument(text);
-        // annnotate the document
-        pipeline.annotate(document);
+    public static void parse(Quote quote){
+        if(quote.getParsedDocument() == null){
+            // annnotate the document and store
+            CoreDocument document = new CoreDocument(quote.getRaw());
+            pipeline.annotate(document);
+            quote.setParsedDocument(document);
+
+            //store the semantic graph
+            SemanticGraph graph = document.sentences().get(0).dependencyParse();
+            quote.setGraph(graph);
+        }
        
        //TODO: grab information from document here
    }
--- a/src/main/java/org/studiorailgun/conversation/parser/PennTreebankTagSet.java
+++ b/src/main/java/org/studiorailgun/conversation/parser/PennTreebankTagSet.java
@ -0,0 +1,62 @@
+package org.studiorailgun.conversation.parser;
+
+/**
+ * The Penn Treebank Tag Set
+ */
+public class PennTreebankTagSet {
+    
+    /**
+     * Checks if this tag is a verb or not
+     * @param tag The tag
+     * @return true if it is a verb, false otherwise
+     */
+    public static boolean isVerb(String tag){
+        switch(tag){
+            case "VB":
+            case "VBD":
+            case "VBG":
+            case "VBN":
+            case "VBP":
+            case "VBZ":
+            case "VH":
+            case "VHD":
+            case "VHG":
+            case "VHN":
+            case "VHP":
+            case "VHZ":
+            case "VV":
+            case "VVD":
+            case "VVG":
+            case "VVN":
+            case "VVP":
+            case "VVZ": {
+                return true;
+            }
+            default: {
+                return false;
+            }
+        }
+    }
+
+    /**
+     * Checks if this tag is a form of "to be" or not
+     * @param tag The tag
+     * @return true if it is a form of "to be", false otherwise
+     */
+    public static boolean isBe(String tag){
+        switch(tag){
+            case "VB":
+            case "VBD":
+            case "VBG":
+            case "VBN":
+            case "VBP":
+            case "VBZ": {
+                return true;
+            }
+            default: {
+                return false;
+            }
+        }
+    }
+
+}
--- a/src/main/java/org/studiorailgun/conversation/tracking/Quote.java
+++ b/src/main/java/org/studiorailgun/conversation/tracking/Quote.java
@ -2,6 +2,9 @@ package org.studiorailgun.conversation.tracking;

 import org.studiorailgun.conversation.categorization.SentenceFunctionCategorizor.SentenceFunction;

+import edu.stanford.nlp.pipeline.CoreDocument;
+import edu.stanford.nlp.semgraph.SemanticGraph;
+
 /**
 * A quote stated during the conversation
 */
@ -17,6 +20,16 @@ public class Quote {
     */
    SentenceFunction function;

+    /**
+     * The CoreNLP parsed document
+     */
+    CoreDocument parsedDocument;
+
+    /**
+     * The parsed semantic graph
+     */
+    SemanticGraph graph;
+
    /**
     * Constructor
     * @param input The raw text of the quote
@ -45,4 +58,23 @@ public class Quote {
        this.function = function;
    }

+    public CoreDocument getParsedDocument() {
+        return parsedDocument;
+    }
+
+    public void setParsedDocument(CoreDocument parsedDocument) {
+        this.parsedDocument = parsedDocument;
+    }
+
+    public SemanticGraph getGraph() {
+        return graph;
+    }
+
+    public void setGraph(SemanticGraph graph) {
+        this.graph = graph;
+    }
+
+    
+    
+
 }
--- a/src/test/java/org/studiorailgun/QueryTests.java
+++ b/src/test/java/org/studiorailgun/QueryTests.java
@ -0,0 +1,23 @@
+package org.studiorailgun;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+import org.junit.jupiter.api.Test;
+import org.studiorailgun.conversation.ConvAI;
+import org.studiorailgun.conversation.tracking.Quote;
+
+/**
+ * Query tests
+ */
+public class QueryTests {
+    
+    @Test
+    public void testOtherGreeting(){
+        Globals.init("./data/test/webs/web.json");
+        
+        Quote response = ConvAI.simFrame("What color is your hat?");
+
+        // assertEquals(response.getRaw(), "Blue");
+    }
+
+}