From 7ebee6b8d0e4ba31d5f41b28b31fa7f2f88cfa72 Mon Sep 17 00:00:00 2001 From: austin Date: Mon, 30 Dec 2024 19:24:56 -0500 Subject: [PATCH] basic clause parsing --- data/dictionary/verbs.csv | 2 + .../conversation/parser/NLPParser.java | 15 +- .../conversation/parser/depend/Adjunct.java | 31 +++ .../conversation/parser/depend/Adverb.java | 31 +++ .../conversation/parser/depend/Argument.java | 31 +++ .../conversation/parser/depend/Clause.java | 190 ++++++++++++++++++ .../parser/depend/Coordinator.java | 31 +++ .../conversation/parser/depend/Predicate.java | 77 +++++++ .../org/studiorailgun/ComplexClauseTests.java | 72 +++++++ .../org/studiorailgun/SimpleClauseTests.java | 175 ++++++++++++++++ 10 files changed, 653 insertions(+), 2 deletions(-) create mode 100644 data/dictionary/verbs.csv create mode 100644 src/main/java/org/studiorailgun/conversation/parser/depend/Adjunct.java create mode 100644 src/main/java/org/studiorailgun/conversation/parser/depend/Adverb.java create mode 100644 src/main/java/org/studiorailgun/conversation/parser/depend/Argument.java create mode 100644 src/main/java/org/studiorailgun/conversation/parser/depend/Clause.java create mode 100644 src/main/java/org/studiorailgun/conversation/parser/depend/Coordinator.java create mode 100644 src/main/java/org/studiorailgun/conversation/parser/depend/Predicate.java create mode 100644 src/test/java/org/studiorailgun/ComplexClauseTests.java create mode 100644 src/test/java/org/studiorailgun/SimpleClauseTests.java diff --git a/data/dictionary/verbs.csv b/data/dictionary/verbs.csv new file mode 100644 index 0000000..b15f834 --- /dev/null +++ b/data/dictionary/verbs.csv @@ -0,0 +1,2 @@ +word,lemma,valence,tense,mood,aspect +be,be,2,?,?,? diff --git a/src/main/java/org/studiorailgun/conversation/parser/NLPParser.java b/src/main/java/org/studiorailgun/conversation/parser/NLPParser.java index 02209b5..061d1c3 100644 --- a/src/main/java/org/studiorailgun/conversation/parser/NLPParser.java +++ b/src/main/java/org/studiorailgun/conversation/parser/NLPParser.java @@ -42,8 +42,7 @@ public class NLPParser { public static void parse(Quote quote){ if(quote.getParsedDocument() == null){ // annnotate the document and store - CoreDocument document = new CoreDocument(quote.getRaw()); - pipeline.annotate(document); + CoreDocument document = NLPParser.parse(quote.getRaw()); quote.setParsedDocument(document); for(CoreSentence coreSentence : document.sentences()){ @@ -56,4 +55,16 @@ public class NLPParser { } } + /** + * Parses the input sentence + * @param input The input sentence + */ + public static CoreDocument parse(String input){ + // annnotate the document and store + CoreDocument document = new CoreDocument(input); + pipeline.annotate(document); + + return document; + } + } diff --git a/src/main/java/org/studiorailgun/conversation/parser/depend/Adjunct.java b/src/main/java/org/studiorailgun/conversation/parser/depend/Adjunct.java new file mode 100644 index 0000000..b368351 --- /dev/null +++ b/src/main/java/org/studiorailgun/conversation/parser/depend/Adjunct.java @@ -0,0 +1,31 @@ +package org.studiorailgun.conversation.parser.depend; + +import edu.stanford.nlp.ling.IndexedWord; + +/** + * A linguistic adjunct + */ +public class Adjunct { + + /** + * The root of the adjunct + */ + IndexedWord root; + + /** + * Constructor + * @param root The root of the adjunct + */ + public Adjunct(IndexedWord root){ + this.root = root; + } + + /** + * Gets the root of this adjunct + * @return The root + */ + public IndexedWord getRoot(){ + return root; + } + +} diff --git a/src/main/java/org/studiorailgun/conversation/parser/depend/Adverb.java b/src/main/java/org/studiorailgun/conversation/parser/depend/Adverb.java new file mode 100644 index 0000000..57b8a4e --- /dev/null +++ b/src/main/java/org/studiorailgun/conversation/parser/depend/Adverb.java @@ -0,0 +1,31 @@ +package org.studiorailgun.conversation.parser.depend; + +import edu.stanford.nlp.ling.IndexedWord; + +/** + * An adverb + */ +public class Adverb { + + /** + * The root of the adverb + */ + IndexedWord root; + + /** + * Constructor + * @param root The root of the adverb + */ + public Adverb(IndexedWord root){ + this.root = root; + } + + /** + * Gets the root of this adverb + * @return The root + */ + public IndexedWord getRoot(){ + return root; + } + +} diff --git a/src/main/java/org/studiorailgun/conversation/parser/depend/Argument.java b/src/main/java/org/studiorailgun/conversation/parser/depend/Argument.java new file mode 100644 index 0000000..3865fd9 --- /dev/null +++ b/src/main/java/org/studiorailgun/conversation/parser/depend/Argument.java @@ -0,0 +1,31 @@ +package org.studiorailgun.conversation.parser.depend; + +import edu.stanford.nlp.ling.IndexedWord; + +/** + * A linguistic argument + */ +public class Argument { + + /** + * The root of the argument + */ + IndexedWord root; + + /** + * Constructor + * @param root The root of the argument + */ + public Argument(IndexedWord root){ + this.root = root; + } + + /** + * Gets the root of this argument + * @return The root + */ + public IndexedWord getRoot(){ + return root; + } + +} diff --git a/src/main/java/org/studiorailgun/conversation/parser/depend/Clause.java b/src/main/java/org/studiorailgun/conversation/parser/depend/Clause.java new file mode 100644 index 0000000..4286ff7 --- /dev/null +++ b/src/main/java/org/studiorailgun/conversation/parser/depend/Clause.java @@ -0,0 +1,190 @@ +package org.studiorailgun.conversation.parser.depend; + +import java.util.LinkedList; +import java.util.List; + +import edu.stanford.nlp.ling.IndexedWord; +import edu.stanford.nlp.semgraph.SemanticGraph; +import edu.stanford.nlp.trees.GrammaticalRelation; + +/** + * Parses the macro structure of the sentence + */ +public class Clause { + + /** + * The predicate of the sentence + */ + Predicate predicate; + + /** + * The arguments of the sentence + */ + List arguments = new LinkedList(); + + /** + * The subject of the structure + */ + Argument subject; + + /** + * The adjuncts of the sentence + */ + List adjuncts = new LinkedList(); + + /** + * The list of clauses + */ + List clauses = new LinkedList(); + + /** + * The coordinator for this clause + */ + Coordinator coordinator; + + /** + * Parses the macro structure of a semantic graph + * @param graph The graph + * @return The macro structure + */ + public static Clause parse(SemanticGraph graph){ + if(graph.getRoots().size() != 1){ + throw new Error("Unable to parse sentences with roots != 1!"); + } + IndexedWord root = graph.getFirstRoot(); + Clause rVal = parse(graph,root); + // throw new Error("\n" + graph); + return rVal; + } + + /** + * Parses the macro structure of a semantic graph + * @param graph The graph + * @param root The root to parse from + * @return The macro structure + */ + private static Clause parse(SemanticGraph graph, IndexedWord root){ + Clause rVal = new Clause(); + List children = graph.getChildList(root); + + //the root is (typically) the predicate + Predicate pred = new Predicate(root); + rVal.predicate = pred; + + //parse all arguments + for(IndexedWord child : children){ + GrammaticalRelation relation = graph.reln(root, child); + switch(relation.getLongName()){ + + //subjects + case "nominal subject": { + Argument arg = new Argument(child); + rVal.arguments.add(arg); + rVal.subject = arg; + } break; + + //predicates + case "copula": { + //this means the root is a noun, but this related word is turning it into a copular predicate + } break; + + //a prepositional adjunct (oblique) + case "obl_preposition": { + Adjunct adj = new Adjunct(child); + rVal.adjuncts.add(adj); + } break; + + //direct objects + case "direct object": { + Argument arg = new Argument(child); + rVal.arguments.add(arg); + } break; + + //indirect objects + case "indirect object": { + Argument arg = new Argument(child); + rVal.arguments.add(arg); + } break; + + //adverb modifying the root predicate + case "adverbial modifier": { + pred.addAdverb(new Adverb(child)); + } break; + + //certain cases of "there" and "it" in sentences + //when these cases occur, they are signifying that the predicate is existential + case "expletive": { + pred.setExistential(true); + } break; + + //the word coordinating this clause + case "coordination": { + rVal.coordinator = new Coordinator(child); + } break; + + //A subject in a compound sentence + case "compound modifier":{ + Argument arg = new Argument(child); + rVal.arguments.add(arg); + rVal.subject = arg; + } break; + + + // + //clauses + // + + //a collapsed clause + case "conj_collapsed": { + Clause clause = Clause.parse(graph, child); + rVal.clauses.add(clause); + } break; + + //this is a dependent clause that is functioning as an argument + case "clausal complement": { + Argument arg = new Argument(child); + rVal.arguments.add(arg); + } break; + + //cases to ignore + case "punctuation": { + continue; + } + + //unhandled cases + default: { + throw new Error("Unsupported relation type! " + relation.getLongName() + "\n" + "for " + child.originalText() + "\n" + graph); + } + } + } + // throw new Error("\n" + graph); + return rVal; + } + + public Predicate getPredicate() { + return predicate; + } + + public List getArguments() { + return arguments; + } + + public Argument getSubject() { + return subject; + } + + public List getAdjuncts() { + return adjuncts; + } + + public List getClauses(){ + return clauses; + } + + public Coordinator getCoordinator(){ + return coordinator; + } + + + +} diff --git a/src/main/java/org/studiorailgun/conversation/parser/depend/Coordinator.java b/src/main/java/org/studiorailgun/conversation/parser/depend/Coordinator.java new file mode 100644 index 0000000..26d7c0d --- /dev/null +++ b/src/main/java/org/studiorailgun/conversation/parser/depend/Coordinator.java @@ -0,0 +1,31 @@ +package org.studiorailgun.conversation.parser.depend; + +import edu.stanford.nlp.ling.IndexedWord; + +/** + * The coordinating word for this clause + */ +public class Coordinator { + + /** + * The root of the coordinator + */ + IndexedWord root; + + /** + * Constructor + * @param root The root of the coordinator + */ + public Coordinator(IndexedWord root){ + this.root = root; + } + + /** + * Gets the root of this coordinator + * @return The root + */ + public IndexedWord getRoot(){ + return root; + } + +} diff --git a/src/main/java/org/studiorailgun/conversation/parser/depend/Predicate.java b/src/main/java/org/studiorailgun/conversation/parser/depend/Predicate.java new file mode 100644 index 0000000..b3c3c37 --- /dev/null +++ b/src/main/java/org/studiorailgun/conversation/parser/depend/Predicate.java @@ -0,0 +1,77 @@ +package org.studiorailgun.conversation.parser.depend; + +import java.util.LinkedList; +import java.util.List; + +import edu.stanford.nlp.ling.IndexedWord; + +/** + * A linguistic predicate + */ +public class Predicate { + + /** + * The root of the predicate + */ + IndexedWord root; + + /** + * The list of adverbs + */ + List adverbs = new LinkedList(); + + + /** + * The predicate is existential. Essentially, this means that the sentence is declaring the existence of the subject. + */ + boolean existential; + + /** + * Constructor + * @param root The root of the predicate + */ + public Predicate(IndexedWord root){ + this.root = root; + } + + /** + * Gets the root of this predicate + * @return The root + */ + public IndexedWord getRoot(){ + return root; + } + + /** + * Adds an adverb to the predicate + * @param adverb The adverb + */ + public void addAdverb(Adverb adverb){ + this.adverbs.add(adverb); + } + + /** + * Gets the adverbs modifying this predicate + * @return The list of adverbs + */ + public List getAdverbs(){ + return adverbs; + } + + /** + * Sets the existential status of the predicate + * @param existential true if existential, false otherwise + */ + public void setExistential(boolean existential){ + this.existential = existential; + } + + /** + * Checks if this is an existential predicate or not + * @return true if is existential, false otherwise + */ + public boolean isExistential(){ + return existential; + } + +} diff --git a/src/test/java/org/studiorailgun/ComplexClauseTests.java b/src/test/java/org/studiorailgun/ComplexClauseTests.java new file mode 100644 index 0000000..720c33e --- /dev/null +++ b/src/test/java/org/studiorailgun/ComplexClauseTests.java @@ -0,0 +1,72 @@ +package org.studiorailgun; + +import static org.junit.Assert.*; + +import org.junit.Test; +import org.studiorailgun.conversation.parser.NLPParser; +import org.studiorailgun.conversation.parser.depend.Clause; + +import edu.stanford.nlp.pipeline.CoreDocument; +import edu.stanford.nlp.pipeline.CoreSentence; +import edu.stanford.nlp.semgraph.SemanticGraph; + +/** + * Macro structure parsing complex sentence tests + */ +public class ComplexClauseTests { + + @Test + public void testMacroStructureParse1(){ + NLPParser.init(); + CoreDocument doc = NLPParser.parse("The sun lowered, the stars appeared, and the moon rose."); + CoreSentence sentence = doc.sentences().get(0); + SemanticGraph graph = sentence.dependencyParse(); + Clause struct = Clause.parse(graph); + + //test number of returns + assertNotNull(struct.getPredicate()); + assertEquals(struct.getArguments().size(),2); + assertEquals(struct.getClauses().size(), 1); + + //test returned data + assertEquals(struct.getPredicate().getRoot().originalText(), "lowered"); //should be copular verb + assertEquals(struct.getArguments().get(0).getRoot().originalText(), "sun"); //should be the subject + } + + @Test + public void testMacroStructureParse2(){ + NLPParser.init(); + CoreDocument doc = NLPParser.parse("Birds fly, but fish swim."); + CoreSentence sentence = doc.sentences().get(0); + SemanticGraph graph = sentence.dependencyParse(); + Clause struct = Clause.parse(graph); + + //test number of returns + assertNotNull(struct.getPredicate()); + assertEquals(struct.getArguments().size(),1); + assertEquals(struct.getClauses().size(), 1); + + //test returned data + assertEquals(struct.getPredicate().getRoot().originalText(), "fly"); //should be copular verb + assertEquals(struct.getArguments().get(0).getRoot().originalText(), "Birds"); //should be the subject + } + + @Test + public void testMacroStructureParse3(){ + NLPParser.init(); + CoreDocument doc = NLPParser.parse("She cooked dinner, yet nobody ate it."); + CoreSentence sentence = doc.sentences().get(0); + SemanticGraph graph = sentence.dependencyParse(); + Clause struct = Clause.parse(graph); + + //test number of returns + assertNotNull(struct.getPredicate()); + assertEquals(struct.getArguments().size(),2); + assertEquals(struct.getClauses().size(), 1); + + //test returned data + assertEquals(struct.getPredicate().getRoot().originalText(), "cooked"); //should be copular verb + assertEquals(struct.getArguments().get(0).getRoot().originalText(), "She"); //should be the subject + } + +} diff --git a/src/test/java/org/studiorailgun/SimpleClauseTests.java b/src/test/java/org/studiorailgun/SimpleClauseTests.java new file mode 100644 index 0000000..ea0dcd3 --- /dev/null +++ b/src/test/java/org/studiorailgun/SimpleClauseTests.java @@ -0,0 +1,175 @@ +package org.studiorailgun; + +import static org.junit.Assert.*; + +import org.junit.Test; +import org.studiorailgun.conversation.parser.NLPParser; +import org.studiorailgun.conversation.parser.depend.Clause; + +import edu.stanford.nlp.pipeline.CoreDocument; +import edu.stanford.nlp.pipeline.CoreSentence; +import edu.stanford.nlp.semgraph.SemanticGraph; + +/** + * Macro structure parsing simple sentence tests + */ +public class SimpleClauseTests { + + @Test + public void testMacroStructureParse1(){ + NLPParser.init(); + CoreDocument doc = NLPParser.parse("His name is Bob."); + CoreSentence sentence = doc.sentences().get(0); + SemanticGraph graph = sentence.dependencyParse(); + Clause struct = Clause.parse(graph); + + //test number of returns + assertNotNull(struct.getPredicate()); + assertEquals(struct.getArguments().size(),1); + + //test returned data + assertEquals(struct.getPredicate().getRoot().originalText(), "Bob"); //should be copular verb + assertEquals(struct.getArguments().get(0).getRoot().originalText(), "name"); //should be the subject + } + + @Test + public void testMacroStructureParse2(){ + NLPParser.init(); + CoreDocument doc = NLPParser.parse("She ran."); + CoreSentence sentence = doc.sentences().get(0); + SemanticGraph graph = sentence.dependencyParse(); + Clause struct = Clause.parse(graph); + + //test number of returns + assertNotNull(struct.getPredicate()); + assertEquals(struct.getArguments().size(),1); + + //test returned data + assertEquals(struct.getPredicate().getRoot().originalText(), "ran"); //should be copular verb + assertEquals(struct.getArguments().get(0).getRoot().originalText(), "She"); //should be the subject + } + + @Test + public void testMacroStructureParse3(){ + NLPParser.init(); + CoreDocument doc = NLPParser.parse("The cat sleeps."); + CoreSentence sentence = doc.sentences().get(0); + SemanticGraph graph = sentence.dependencyParse(); + Clause struct = Clause.parse(graph); + + //test number of returns + assertNotNull(struct.getPredicate()); + assertEquals(struct.getArguments().size(),1); + + //test returned data + assertEquals(struct.getPredicate().getRoot().originalText(), "sleeps"); //should be copular verb + assertEquals(struct.getArguments().get(0).getRoot().originalText(), "cat"); //should be the subject + } + + @Test + public void testMacroStructureParse4(){ + NLPParser.init(); + CoreDocument doc = NLPParser.parse("A dog barked."); + CoreSentence sentence = doc.sentences().get(0); + SemanticGraph graph = sentence.dependencyParse(); + Clause struct = Clause.parse(graph); + + //test number of returns + assertNotNull(struct.getPredicate()); + assertEquals(struct.getArguments().size(),1); + + //test returned data + assertEquals(struct.getPredicate().getRoot().originalText(), "barked"); //should be copular verb + assertEquals(struct.getArguments().get(0).getRoot().originalText(), "dog"); //should be the subject + } + + @Test + public void testMacroStructureParse5(){ + NLPParser.init(); + CoreDocument doc = NLPParser.parse("Colorful birds fly."); + CoreSentence sentence = doc.sentences().get(0); + SemanticGraph graph = sentence.dependencyParse(); + Clause struct = Clause.parse(graph); + + //test number of returns + assertNotNull(struct.getPredicate()); + assertEquals(struct.getArguments().size(),1); + + //test returned data + assertEquals(struct.getPredicate().getRoot().originalText(), "fly"); //should be copular verb + assertEquals(struct.getArguments().get(0).getRoot().originalText(), "birds"); //should be the subject + } + + @Test + public void testMacroStructureParse6(){ + NLPParser.init(); + CoreDocument doc = NLPParser.parse("The cat sleeps on the mat."); + CoreSentence sentence = doc.sentences().get(0); + SemanticGraph graph = sentence.dependencyParse(); + Clause struct = Clause.parse(graph); + + //test number of returns + assertNotNull(struct.getPredicate()); + assertEquals(struct.getArguments().size(),1); + assertEquals(struct.getAdjuncts().size(),1); + + //test returned data + assertEquals(struct.getPredicate().getRoot().originalText(), "sleeps"); //should be copular verb + assertEquals(struct.getArguments().get(0).getRoot().originalText(), "cat"); //should be the subject + assertEquals(struct.getAdjuncts().get(0).getRoot().originalText(),"mat"); + } + + @Test + public void testMacroStructureParse7(){ + NLPParser.init(); + CoreDocument doc = NLPParser.parse("Guys, take it easy."); + CoreSentence sentence = doc.sentences().get(0); + SemanticGraph graph = sentence.dependencyParse(); + Clause struct = Clause.parse(graph); + + //test number of returns + assertNotNull(struct.getPredicate()); + assertEquals(struct.getArguments().size(),2); + + //test returned data + assertEquals(struct.getPredicate().getRoot().originalText(), "take"); //should be copular verb + assertEquals(struct.getArguments().get(0).getRoot().originalText(), "Guys"); //should be the subject + assertEquals(struct.getPredicate().getAdverbs().get(0).getRoot().originalText(), "easy"); //should be the adverb + } + + @Test + public void testMacroStructureParse8(){ + NLPParser.init(); + CoreDocument doc = NLPParser.parse("He dropped her the ball."); + CoreSentence sentence = doc.sentences().get(0); + SemanticGraph graph = sentence.dependencyParse(); + Clause struct = Clause.parse(graph); + + //test number of returns + assertNotNull(struct.getPredicate()); + assertEquals(struct.getArguments().size(),3); + + //test returned data + assertEquals(struct.getPredicate().getRoot().originalText(), "dropped"); //should be copular verb + assertEquals(struct.getArguments().get(0).getRoot().originalText(), "He"); //should be the subject + } + + @Test + public void testMacroStructureParse9(){ + NLPParser.init(); + CoreDocument doc = NLPParser.parse("There is a ball below."); + CoreSentence sentence = doc.sentences().get(0); + SemanticGraph graph = sentence.dependencyParse(); + Clause struct = Clause.parse(graph); + + //test number of returns + assertNotNull(struct.getPredicate()); + assertEquals(struct.getArguments().size(),1); + + //test returned data + assertEquals(struct.getPredicate().getRoot().originalText(), "is"); //should be copular verb + assertEquals(struct.getPredicate().isExistential(), true); //should be copular verb + assertEquals(struct.getArguments().get(0).getRoot().originalText(), "ball"); //should be the subject + } + +}