From 7ebee6b8d0e4ba31d5f41b28b31fa7f2f88cfa72 Mon Sep 17 00:00:00 2001
From: austin <austin@austinhoover.net>
Date: Mon, 30 Dec 2024 19:24:56 -0500
Subject: [PATCH] basic clause parsing

---
 data/dictionary/verbs.csv                     |   2 +
 .../conversation/parser/NLPParser.java        |  15 +-
 .../conversation/parser/depend/Adjunct.java   |  31 +++
 .../conversation/parser/depend/Adverb.java    |  31 +++
 .../conversation/parser/depend/Argument.java  |  31 +++
 .../conversation/parser/depend/Clause.java    | 190 ++++++++++++++++++
 .../parser/depend/Coordinator.java            |  31 +++
 .../conversation/parser/depend/Predicate.java |  77 +++++++
 .../org/studiorailgun/ComplexClauseTests.java |  72 +++++++
 .../org/studiorailgun/SimpleClauseTests.java  | 175 ++++++++++++++++
 10 files changed, 653 insertions(+), 2 deletions(-)
 create mode 100644 data/dictionary/verbs.csv
 create mode 100644 src/main/java/org/studiorailgun/conversation/parser/depend/Adjunct.java
 create mode 100644 src/main/java/org/studiorailgun/conversation/parser/depend/Adverb.java
 create mode 100644 src/main/java/org/studiorailgun/conversation/parser/depend/Argument.java
 create mode 100644 src/main/java/org/studiorailgun/conversation/parser/depend/Clause.java
 create mode 100644 src/main/java/org/studiorailgun/conversation/parser/depend/Coordinator.java
 create mode 100644 src/main/java/org/studiorailgun/conversation/parser/depend/Predicate.java
 create mode 100644 src/test/java/org/studiorailgun/ComplexClauseTests.java
 create mode 100644 src/test/java/org/studiorailgun/SimpleClauseTests.java

diff --git a/data/dictionary/verbs.csv b/data/dictionary/verbs.csv
new file mode 100644
index 0000000..b15f834
--- /dev/null
+++ b/data/dictionary/verbs.csv
@@ -0,0 +1,2 @@
+﻿word,lemma,valence,tense,mood,aspect
+be,be,2,?,?,?
diff --git a/src/main/java/org/studiorailgun/conversation/parser/NLPParser.java b/src/main/java/org/studiorailgun/conversation/parser/NLPParser.java
index 02209b5..061d1c3 100644
--- a/src/main/java/org/studiorailgun/conversation/parser/NLPParser.java
+++ b/src/main/java/org/studiorailgun/conversation/parser/NLPParser.java
@@ -42,8 +42,7 @@ public class NLPParser {
     public static void parse(Quote quote){
         if(quote.getParsedDocument() == null){
             // annnotate the document and store
-            CoreDocument document = new CoreDocument(quote.getRaw());
-            pipeline.annotate(document);
+            CoreDocument document = NLPParser.parse(quote.getRaw());
             quote.setParsedDocument(document);
 
             for(CoreSentence coreSentence : document.sentences()){
@@ -56,4 +55,16 @@ public class NLPParser {
         }
     }
 
+    /**
+     * Parses the input sentence
+     * @param input The input sentence
+     */
+    public static CoreDocument parse(String input){
+        // annnotate the document and store
+        CoreDocument document = new CoreDocument(input);
+        pipeline.annotate(document);
+
+        return document;
+    }
+
 }
diff --git a/src/main/java/org/studiorailgun/conversation/parser/depend/Adjunct.java b/src/main/java/org/studiorailgun/conversation/parser/depend/Adjunct.java
new file mode 100644
index 0000000..b368351
--- /dev/null
+++ b/src/main/java/org/studiorailgun/conversation/parser/depend/Adjunct.java
@@ -0,0 +1,31 @@
+package org.studiorailgun.conversation.parser.depend;
+
+import edu.stanford.nlp.ling.IndexedWord;
+
+/**
+ * A linguistic adjunct
+ */
+public class Adjunct {
+    
+    /**
+     * The root of the adjunct
+     */
+    IndexedWord root;
+
+    /**
+     * Constructor
+     * @param root The root of the adjunct
+     */
+    public Adjunct(IndexedWord root){
+        this.root = root;
+    }
+
+    /**
+     * Gets the root of this adjunct
+     * @return The root
+     */
+    public IndexedWord getRoot(){
+        return root;
+    }
+
+}
diff --git a/src/main/java/org/studiorailgun/conversation/parser/depend/Adverb.java b/src/main/java/org/studiorailgun/conversation/parser/depend/Adverb.java
new file mode 100644
index 0000000..57b8a4e
--- /dev/null
+++ b/src/main/java/org/studiorailgun/conversation/parser/depend/Adverb.java
@@ -0,0 +1,31 @@
+package org.studiorailgun.conversation.parser.depend;
+
+import edu.stanford.nlp.ling.IndexedWord;
+
+/**
+ * An adverb
+ */
+public class Adverb {
+    
+    /**
+     * The root of the adverb
+     */
+    IndexedWord root;
+
+    /**
+     * Constructor
+     * @param root The root of the adverb
+     */
+    public Adverb(IndexedWord root){
+        this.root = root;
+    }
+
+    /**
+     * Gets the root of this adverb
+     * @return The root
+     */
+    public IndexedWord getRoot(){
+        return root;
+    }
+
+}
diff --git a/src/main/java/org/studiorailgun/conversation/parser/depend/Argument.java b/src/main/java/org/studiorailgun/conversation/parser/depend/Argument.java
new file mode 100644
index 0000000..3865fd9
--- /dev/null
+++ b/src/main/java/org/studiorailgun/conversation/parser/depend/Argument.java
@@ -0,0 +1,31 @@
+package org.studiorailgun.conversation.parser.depend;
+
+import edu.stanford.nlp.ling.IndexedWord;
+
+/**
+ * A linguistic argument
+ */
+public class Argument {
+    
+    /**
+     * The root of the argument
+     */
+    IndexedWord root;
+
+    /**
+     * Constructor
+     * @param root The root of the argument
+     */
+    public Argument(IndexedWord root){
+        this.root = root;
+    }
+
+    /**
+     * Gets the root of this argument
+     * @return The root
+     */
+    public IndexedWord getRoot(){
+        return root;
+    }
+
+}
diff --git a/src/main/java/org/studiorailgun/conversation/parser/depend/Clause.java b/src/main/java/org/studiorailgun/conversation/parser/depend/Clause.java
new file mode 100644
index 0000000..4286ff7
--- /dev/null
+++ b/src/main/java/org/studiorailgun/conversation/parser/depend/Clause.java
@@ -0,0 +1,190 @@
+package org.studiorailgun.conversation.parser.depend;
+
+import java.util.LinkedList;
+import java.util.List;
+
+import edu.stanford.nlp.ling.IndexedWord;
+import edu.stanford.nlp.semgraph.SemanticGraph;
+import edu.stanford.nlp.trees.GrammaticalRelation;
+
+/**
+ * Parses the macro structure of the sentence
+ */
+public class Clause {
+    
+    /**
+     * The predicate of the sentence
+     */
+    Predicate predicate;
+
+    /**
+     * The arguments of the sentence
+     */
+    List<Argument> arguments = new LinkedList<Argument>();
+
+    /**
+     * The subject of the structure
+     */
+    Argument subject;
+
+    /**
+     * The adjuncts of the sentence
+     */
+    List<Adjunct> adjuncts = new LinkedList<Adjunct>();
+
+    /**
+     * The list of clauses
+     */
+    List<Clause> clauses = new LinkedList<Clause>();
+
+    /**
+     * The coordinator for this clause
+     */
+    Coordinator coordinator;
+
+    /**
+     * Parses the macro structure of a semantic graph
+     * @param graph The graph
+     * @return The macro structure
+     */
+    public static Clause parse(SemanticGraph graph){
+        if(graph.getRoots().size() != 1){
+            throw new Error("Unable to parse sentences with roots != 1!");
+        }
+        IndexedWord root = graph.getFirstRoot();
+        Clause rVal = parse(graph,root);
+        // throw new Error("\n" + graph);
+        return rVal;
+    }
+
+    /**
+     * Parses the macro structure of a semantic graph
+     * @param graph The graph
+     * @param root The root to parse from
+     * @return The macro structure
+     */
+    private static Clause parse(SemanticGraph graph, IndexedWord root){
+        Clause rVal = new Clause();
+        List<IndexedWord> children = graph.getChildList(root);
+
+        //the root is (typically) the predicate
+        Predicate pred = new Predicate(root);
+        rVal.predicate = pred;
+
+        //parse all arguments
+        for(IndexedWord child : children){
+            GrammaticalRelation relation = graph.reln(root, child);
+            switch(relation.getLongName()){
+
+                //subjects
+                case "nominal subject": {
+                    Argument arg = new Argument(child);
+                    rVal.arguments.add(arg);
+                    rVal.subject = arg;
+                } break;
+
+                //predicates
+                case "copula": {
+                    //this means the root is a noun, but this related word is turning it into a copular predicate
+                } break;
+
+                //a prepositional adjunct (oblique)
+                case "obl_preposition": {
+                    Adjunct adj = new Adjunct(child);
+                    rVal.adjuncts.add(adj);
+                } break;
+
+                //direct objects
+                case "direct object": {
+                    Argument arg = new Argument(child);
+                    rVal.arguments.add(arg);
+                } break;
+
+                //indirect objects
+                case "indirect object": {
+                    Argument arg = new Argument(child);
+                    rVal.arguments.add(arg);
+                } break;
+
+                //adverb modifying the root predicate
+                case "adverbial modifier": {
+                    pred.addAdverb(new Adverb(child));
+                } break;
+
+                //certain cases of "there" and "it" in sentences
+                //when these cases occur, they are signifying that the predicate is existential
+                case "expletive": {
+                    pred.setExistential(true);
+                } break;
+
+                //the word coordinating this clause
+                case "coordination": {
+                    rVal.coordinator = new Coordinator(child);
+                } break;
+
+                //A subject in a compound sentence
+                case "compound modifier":{
+                    Argument arg = new Argument(child);
+                    rVal.arguments.add(arg);
+                    rVal.subject = arg;
+                } break;
+
+
+                //
+                //clauses
+                //
+
+                //a collapsed clause
+                case "conj_collapsed": {
+                    Clause clause = Clause.parse(graph, child);
+                    rVal.clauses.add(clause);
+                } break;
+
+                //this is a dependent clause that is functioning as an argument
+                case "clausal complement": {
+                    Argument arg = new Argument(child);
+                    rVal.arguments.add(arg);
+                } break;
+
+                //cases to ignore
+                case "punctuation": {
+                    continue;
+                }
+
+                //unhandled cases
+                default: {
+                    throw new Error("Unsupported relation type! " + relation.getLongName() + "\n" + "for " + child.originalText() + "\n" + graph);
+                }
+            }
+        }
+        // throw new Error("\n" + graph);
+        return rVal;
+    }
+
+    public Predicate getPredicate() {
+        return predicate;
+    }
+
+    public List<Argument> getArguments() {
+        return arguments;
+    }
+
+    public Argument getSubject() {
+        return subject;
+    }
+
+    public List<Adjunct> getAdjuncts() {
+        return adjuncts;
+    }
+
+    public List<Clause> getClauses(){
+        return clauses;
+    }
+    
+    public Coordinator getCoordinator(){
+        return coordinator;
+    }
+
+    
+
+}
diff --git a/src/main/java/org/studiorailgun/conversation/parser/depend/Coordinator.java b/src/main/java/org/studiorailgun/conversation/parser/depend/Coordinator.java
new file mode 100644
index 0000000..26d7c0d
--- /dev/null
+++ b/src/main/java/org/studiorailgun/conversation/parser/depend/Coordinator.java
@@ -0,0 +1,31 @@
+package org.studiorailgun.conversation.parser.depend;
+
+import edu.stanford.nlp.ling.IndexedWord;
+
+/**
+ * The coordinating word for this clause
+ */
+public class Coordinator {
+    
+    /**
+     * The root of the coordinator
+     */
+    IndexedWord root;
+
+    /**
+     * Constructor
+     * @param root The root of the coordinator
+     */
+    public Coordinator(IndexedWord root){
+        this.root = root;
+    }
+
+    /**
+     * Gets the root of this coordinator
+     * @return The root
+     */
+    public IndexedWord getRoot(){
+        return root;
+    }
+
+}
diff --git a/src/main/java/org/studiorailgun/conversation/parser/depend/Predicate.java b/src/main/java/org/studiorailgun/conversation/parser/depend/Predicate.java
new file mode 100644
index 0000000..b3c3c37
--- /dev/null
+++ b/src/main/java/org/studiorailgun/conversation/parser/depend/Predicate.java
@@ -0,0 +1,77 @@
+package org.studiorailgun.conversation.parser.depend;
+
+import java.util.LinkedList;
+import java.util.List;
+
+import edu.stanford.nlp.ling.IndexedWord;
+
+/**
+ * A linguistic predicate
+ */
+public class Predicate {
+    
+    /**
+     * The root of the predicate
+     */
+    IndexedWord root;
+
+    /**
+     * The list of adverbs
+     */
+    List<Adverb> adverbs = new LinkedList<Adverb>();
+
+
+    /**
+     * The predicate is existential. Essentially, this means that the sentence is declaring the existence of the subject.
+     */
+    boolean existential;
+
+    /**
+     * Constructor
+     * @param root The root of the predicate
+     */
+    public Predicate(IndexedWord root){
+        this.root = root;
+    }
+
+    /**
+     * Gets the root of this predicate
+     * @return The root
+     */
+    public IndexedWord getRoot(){
+        return root;
+    }
+
+    /**
+     * Adds an adverb to the predicate
+     * @param adverb The adverb
+     */
+    public void addAdverb(Adverb adverb){
+        this.adverbs.add(adverb);
+    }
+
+    /**
+     * Gets the adverbs modifying this predicate
+     * @return The list of adverbs
+     */
+    public List<Adverb> getAdverbs(){
+        return adverbs;
+    }
+
+    /**
+     * Sets the existential status of the predicate
+     * @param existential true if existential, false otherwise
+     */
+    public void setExistential(boolean existential){
+        this.existential = existential;
+    }
+
+    /**
+     * Checks if this is an existential predicate or not
+     * @return true if is existential, false otherwise
+     */
+    public boolean isExistential(){
+        return existential;
+    }
+
+}
diff --git a/src/test/java/org/studiorailgun/ComplexClauseTests.java b/src/test/java/org/studiorailgun/ComplexClauseTests.java
new file mode 100644
index 0000000..720c33e
--- /dev/null
+++ b/src/test/java/org/studiorailgun/ComplexClauseTests.java
@@ -0,0 +1,72 @@
+package org.studiorailgun;
+
+import static org.junit.Assert.*;
+
+import org.junit.Test;
+import org.studiorailgun.conversation.parser.NLPParser;
+import org.studiorailgun.conversation.parser.depend.Clause;
+
+import edu.stanford.nlp.pipeline.CoreDocument;
+import edu.stanford.nlp.pipeline.CoreSentence;
+import edu.stanford.nlp.semgraph.SemanticGraph;
+
+/**
+ * Macro structure parsing complex sentence tests
+ */
+public class ComplexClauseTests {
+    
+    @Test
+    public void testMacroStructureParse1(){
+        NLPParser.init();
+        CoreDocument doc = NLPParser.parse("The sun lowered, the stars appeared, and the moon rose.");
+        CoreSentence sentence = doc.sentences().get(0);
+        SemanticGraph graph = sentence.dependencyParse();
+        Clause struct = Clause.parse(graph);
+
+        //test number of returns
+        assertNotNull(struct.getPredicate());
+        assertEquals(struct.getArguments().size(),2);
+        assertEquals(struct.getClauses().size(), 1);
+
+        //test returned data
+        assertEquals(struct.getPredicate().getRoot().originalText(), "lowered"); //should be copular verb
+        assertEquals(struct.getArguments().get(0).getRoot().originalText(), "sun"); //should be the subject
+    }
+
+    @Test
+    public void testMacroStructureParse2(){
+        NLPParser.init();
+        CoreDocument doc = NLPParser.parse("Birds fly, but fish swim.");
+        CoreSentence sentence = doc.sentences().get(0);
+        SemanticGraph graph = sentence.dependencyParse();
+        Clause struct = Clause.parse(graph);
+
+        //test number of returns
+        assertNotNull(struct.getPredicate());
+        assertEquals(struct.getArguments().size(),1);
+        assertEquals(struct.getClauses().size(), 1);
+
+        //test returned data
+        assertEquals(struct.getPredicate().getRoot().originalText(), "fly"); //should be copular verb
+        assertEquals(struct.getArguments().get(0).getRoot().originalText(), "Birds"); //should be the subject
+    }
+
+    @Test
+    public void testMacroStructureParse3(){
+        NLPParser.init();
+        CoreDocument doc = NLPParser.parse("She cooked dinner, yet nobody ate it.");
+        CoreSentence sentence = doc.sentences().get(0);
+        SemanticGraph graph = sentence.dependencyParse();
+        Clause struct = Clause.parse(graph);
+
+        //test number of returns
+        assertNotNull(struct.getPredicate());
+        assertEquals(struct.getArguments().size(),2);
+        assertEquals(struct.getClauses().size(), 1);
+
+        //test returned data
+        assertEquals(struct.getPredicate().getRoot().originalText(), "cooked"); //should be copular verb
+        assertEquals(struct.getArguments().get(0).getRoot().originalText(), "She"); //should be the subject
+    }
+
+}
diff --git a/src/test/java/org/studiorailgun/SimpleClauseTests.java b/src/test/java/org/studiorailgun/SimpleClauseTests.java
new file mode 100644
index 0000000..ea0dcd3
--- /dev/null
+++ b/src/test/java/org/studiorailgun/SimpleClauseTests.java
@@ -0,0 +1,175 @@
+package org.studiorailgun;
+
+import static org.junit.Assert.*;
+
+import org.junit.Test;
+import org.studiorailgun.conversation.parser.NLPParser;
+import org.studiorailgun.conversation.parser.depend.Clause;
+
+import edu.stanford.nlp.pipeline.CoreDocument;
+import edu.stanford.nlp.pipeline.CoreSentence;
+import edu.stanford.nlp.semgraph.SemanticGraph;
+
+/**
+ * Macro structure parsing simple sentence tests
+ */
+public class SimpleClauseTests {
+    
+    @Test
+    public void testMacroStructureParse1(){
+        NLPParser.init();
+        CoreDocument doc = NLPParser.parse("His name is Bob.");
+        CoreSentence sentence = doc.sentences().get(0);
+        SemanticGraph graph = sentence.dependencyParse();
+        Clause struct = Clause.parse(graph);
+
+        //test number of returns
+        assertNotNull(struct.getPredicate());
+        assertEquals(struct.getArguments().size(),1);
+
+        //test returned data
+        assertEquals(struct.getPredicate().getRoot().originalText(), "Bob"); //should be copular verb
+        assertEquals(struct.getArguments().get(0).getRoot().originalText(), "name"); //should be the subject
+    }
+
+    @Test
+    public void testMacroStructureParse2(){
+        NLPParser.init();
+        CoreDocument doc = NLPParser.parse("She ran.");
+        CoreSentence sentence = doc.sentences().get(0);
+        SemanticGraph graph = sentence.dependencyParse();
+        Clause struct = Clause.parse(graph);
+
+        //test number of returns
+        assertNotNull(struct.getPredicate());
+        assertEquals(struct.getArguments().size(),1);
+
+        //test returned data
+        assertEquals(struct.getPredicate().getRoot().originalText(), "ran"); //should be copular verb
+        assertEquals(struct.getArguments().get(0).getRoot().originalText(), "She"); //should be the subject
+    }
+
+    @Test
+    public void testMacroStructureParse3(){
+        NLPParser.init();
+        CoreDocument doc = NLPParser.parse("The cat sleeps.");
+        CoreSentence sentence = doc.sentences().get(0);
+        SemanticGraph graph = sentence.dependencyParse();
+        Clause struct = Clause.parse(graph);
+
+        //test number of returns
+        assertNotNull(struct.getPredicate());
+        assertEquals(struct.getArguments().size(),1);
+
+        //test returned data
+        assertEquals(struct.getPredicate().getRoot().originalText(), "sleeps"); //should be copular verb
+        assertEquals(struct.getArguments().get(0).getRoot().originalText(), "cat"); //should be the subject
+    }
+
+    @Test
+    public void testMacroStructureParse4(){
+        NLPParser.init();
+        CoreDocument doc = NLPParser.parse("A dog barked.");
+        CoreSentence sentence = doc.sentences().get(0);
+        SemanticGraph graph = sentence.dependencyParse();
+        Clause struct = Clause.parse(graph);
+
+        //test number of returns
+        assertNotNull(struct.getPredicate());
+        assertEquals(struct.getArguments().size(),1);
+
+        //test returned data
+        assertEquals(struct.getPredicate().getRoot().originalText(), "barked"); //should be copular verb
+        assertEquals(struct.getArguments().get(0).getRoot().originalText(), "dog"); //should be the subject
+    }
+
+    @Test
+    public void testMacroStructureParse5(){
+        NLPParser.init();
+        CoreDocument doc = NLPParser.parse("Colorful birds fly.");
+        CoreSentence sentence = doc.sentences().get(0);
+        SemanticGraph graph = sentence.dependencyParse();
+        Clause struct = Clause.parse(graph);
+
+        //test number of returns
+        assertNotNull(struct.getPredicate());
+        assertEquals(struct.getArguments().size(),1);
+
+        //test returned data
+        assertEquals(struct.getPredicate().getRoot().originalText(), "fly"); //should be copular verb
+        assertEquals(struct.getArguments().get(0).getRoot().originalText(), "birds"); //should be the subject
+    }
+
+    @Test
+    public void testMacroStructureParse6(){
+        NLPParser.init();
+        CoreDocument doc = NLPParser.parse("The cat sleeps on the mat.");
+        CoreSentence sentence = doc.sentences().get(0);
+        SemanticGraph graph = sentence.dependencyParse();
+        Clause struct = Clause.parse(graph);
+
+        //test number of returns
+        assertNotNull(struct.getPredicate());
+        assertEquals(struct.getArguments().size(),1);
+        assertEquals(struct.getAdjuncts().size(),1);
+
+        //test returned data
+        assertEquals(struct.getPredicate().getRoot().originalText(), "sleeps"); //should be copular verb
+        assertEquals(struct.getArguments().get(0).getRoot().originalText(), "cat"); //should be the subject
+        assertEquals(struct.getAdjuncts().get(0).getRoot().originalText(),"mat");
+    }
+
+    @Test
+    public void testMacroStructureParse7(){
+        NLPParser.init();
+        CoreDocument doc = NLPParser.parse("Guys, take it easy.");
+        CoreSentence sentence = doc.sentences().get(0);
+        SemanticGraph graph = sentence.dependencyParse();
+        Clause struct = Clause.parse(graph);
+
+        //test number of returns
+        assertNotNull(struct.getPredicate());
+        assertEquals(struct.getArguments().size(),2);
+
+        //test returned data
+        assertEquals(struct.getPredicate().getRoot().originalText(), "take"); //should be copular verb
+        assertEquals(struct.getArguments().get(0).getRoot().originalText(), "Guys"); //should be the subject
+        assertEquals(struct.getPredicate().getAdverbs().get(0).getRoot().originalText(), "easy"); //should be the adverb
+    }
+
+    @Test
+    public void testMacroStructureParse8(){
+        NLPParser.init();
+        CoreDocument doc = NLPParser.parse("He dropped her the ball.");
+        CoreSentence sentence = doc.sentences().get(0);
+        SemanticGraph graph = sentence.dependencyParse();
+        Clause struct = Clause.parse(graph);
+
+        //test number of returns
+        assertNotNull(struct.getPredicate());
+        assertEquals(struct.getArguments().size(),3);
+
+        //test returned data
+        assertEquals(struct.getPredicate().getRoot().originalText(), "dropped"); //should be copular verb
+        assertEquals(struct.getArguments().get(0).getRoot().originalText(), "He"); //should be the subject
+    }
+
+    @Test
+    public void testMacroStructureParse9(){
+        NLPParser.init();
+        CoreDocument doc = NLPParser.parse("There is a ball below.");
+        CoreSentence sentence = doc.sentences().get(0);
+        SemanticGraph graph = sentence.dependencyParse();
+        Clause struct = Clause.parse(graph);
+
+        //test number of returns
+        assertNotNull(struct.getPredicate());
+        assertEquals(struct.getArguments().size(),1);
+
+        //test returned data
+        assertEquals(struct.getPredicate().getRoot().originalText(), "is"); //should be copular verb
+        assertEquals(struct.getPredicate().isExistential(), true); //should be copular verb
+        assertEquals(struct.getArguments().get(0).getRoot().originalText(), "ball"); //should be the subject
+    }
+
+}