basic clause parsing
All checks were successful
studiorailgun/trpg/pipeline/head This commit looks good

This commit is contained in:
austin 2024-12-30 19:24:56 -05:00
parent 28488da1a5
commit 7ebee6b8d0
10 changed files with 653 additions and 2 deletions

View File

@ -0,0 +1,2 @@
word,lemma,valence,tense,mood,aspect
be,be,2,?,?,?
1 word lemma valence tense mood aspect
2 be be 2 ? ? ?

View File

@ -42,8 +42,7 @@ public class NLPParser {
public static void parse(Quote quote){
if(quote.getParsedDocument() == null){
// annnotate the document and store
CoreDocument document = new CoreDocument(quote.getRaw());
pipeline.annotate(document);
CoreDocument document = NLPParser.parse(quote.getRaw());
quote.setParsedDocument(document);
for(CoreSentence coreSentence : document.sentences()){
@ -56,4 +55,16 @@ public class NLPParser {
}
}
/**
* Parses the input sentence
* @param input The input sentence
*/
public static CoreDocument parse(String input){
// annnotate the document and store
CoreDocument document = new CoreDocument(input);
pipeline.annotate(document);
return document;
}
}

View File

@ -0,0 +1,31 @@
package org.studiorailgun.conversation.parser.depend;
import edu.stanford.nlp.ling.IndexedWord;
/**
* A linguistic adjunct
*/
public class Adjunct {
/**
* The root of the adjunct
*/
IndexedWord root;
/**
* Constructor
* @param root The root of the adjunct
*/
public Adjunct(IndexedWord root){
this.root = root;
}
/**
* Gets the root of this adjunct
* @return The root
*/
public IndexedWord getRoot(){
return root;
}
}

View File

@ -0,0 +1,31 @@
package org.studiorailgun.conversation.parser.depend;
import edu.stanford.nlp.ling.IndexedWord;
/**
* An adverb
*/
public class Adverb {
/**
* The root of the adverb
*/
IndexedWord root;
/**
* Constructor
* @param root The root of the adverb
*/
public Adverb(IndexedWord root){
this.root = root;
}
/**
* Gets the root of this adverb
* @return The root
*/
public IndexedWord getRoot(){
return root;
}
}

View File

@ -0,0 +1,31 @@
package org.studiorailgun.conversation.parser.depend;
import edu.stanford.nlp.ling.IndexedWord;
/**
* A linguistic argument
*/
public class Argument {
/**
* The root of the argument
*/
IndexedWord root;
/**
* Constructor
* @param root The root of the argument
*/
public Argument(IndexedWord root){
this.root = root;
}
/**
* Gets the root of this argument
* @return The root
*/
public IndexedWord getRoot(){
return root;
}
}

View File

@ -0,0 +1,190 @@
package org.studiorailgun.conversation.parser.depend;
import java.util.LinkedList;
import java.util.List;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.trees.GrammaticalRelation;
/**
* Parses the macro structure of the sentence
*/
public class Clause {
/**
* The predicate of the sentence
*/
Predicate predicate;
/**
* The arguments of the sentence
*/
List<Argument> arguments = new LinkedList<Argument>();
/**
* The subject of the structure
*/
Argument subject;
/**
* The adjuncts of the sentence
*/
List<Adjunct> adjuncts = new LinkedList<Adjunct>();
/**
* The list of clauses
*/
List<Clause> clauses = new LinkedList<Clause>();
/**
* The coordinator for this clause
*/
Coordinator coordinator;
/**
* Parses the macro structure of a semantic graph
* @param graph The graph
* @return The macro structure
*/
public static Clause parse(SemanticGraph graph){
if(graph.getRoots().size() != 1){
throw new Error("Unable to parse sentences with roots != 1!");
}
IndexedWord root = graph.getFirstRoot();
Clause rVal = parse(graph,root);
// throw new Error("\n" + graph);
return rVal;
}
/**
* Parses the macro structure of a semantic graph
* @param graph The graph
* @param root The root to parse from
* @return The macro structure
*/
private static Clause parse(SemanticGraph graph, IndexedWord root){
Clause rVal = new Clause();
List<IndexedWord> children = graph.getChildList(root);
//the root is (typically) the predicate
Predicate pred = new Predicate(root);
rVal.predicate = pred;
//parse all arguments
for(IndexedWord child : children){
GrammaticalRelation relation = graph.reln(root, child);
switch(relation.getLongName()){
//subjects
case "nominal subject": {
Argument arg = new Argument(child);
rVal.arguments.add(arg);
rVal.subject = arg;
} break;
//predicates
case "copula": {
//this means the root is a noun, but this related word is turning it into a copular predicate
} break;
//a prepositional adjunct (oblique)
case "obl_preposition": {
Adjunct adj = new Adjunct(child);
rVal.adjuncts.add(adj);
} break;
//direct objects
case "direct object": {
Argument arg = new Argument(child);
rVal.arguments.add(arg);
} break;
//indirect objects
case "indirect object": {
Argument arg = new Argument(child);
rVal.arguments.add(arg);
} break;
//adverb modifying the root predicate
case "adverbial modifier": {
pred.addAdverb(new Adverb(child));
} break;
//certain cases of "there" and "it" in sentences
//when these cases occur, they are signifying that the predicate is existential
case "expletive": {
pred.setExistential(true);
} break;
//the word coordinating this clause
case "coordination": {
rVal.coordinator = new Coordinator(child);
} break;
//A subject in a compound sentence
case "compound modifier":{
Argument arg = new Argument(child);
rVal.arguments.add(arg);
rVal.subject = arg;
} break;
//
//clauses
//
//a collapsed clause
case "conj_collapsed": {
Clause clause = Clause.parse(graph, child);
rVal.clauses.add(clause);
} break;
//this is a dependent clause that is functioning as an argument
case "clausal complement": {
Argument arg = new Argument(child);
rVal.arguments.add(arg);
} break;
//cases to ignore
case "punctuation": {
continue;
}
//unhandled cases
default: {
throw new Error("Unsupported relation type! " + relation.getLongName() + "\n" + "for " + child.originalText() + "\n" + graph);
}
}
}
// throw new Error("\n" + graph);
return rVal;
}
public Predicate getPredicate() {
return predicate;
}
public List<Argument> getArguments() {
return arguments;
}
public Argument getSubject() {
return subject;
}
public List<Adjunct> getAdjuncts() {
return adjuncts;
}
public List<Clause> getClauses(){
return clauses;
}
public Coordinator getCoordinator(){
return coordinator;
}
}

View File

@ -0,0 +1,31 @@
package org.studiorailgun.conversation.parser.depend;
import edu.stanford.nlp.ling.IndexedWord;
/**
* The coordinating word for this clause
*/
public class Coordinator {
/**
* The root of the coordinator
*/
IndexedWord root;
/**
* Constructor
* @param root The root of the coordinator
*/
public Coordinator(IndexedWord root){
this.root = root;
}
/**
* Gets the root of this coordinator
* @return The root
*/
public IndexedWord getRoot(){
return root;
}
}

View File

@ -0,0 +1,77 @@
package org.studiorailgun.conversation.parser.depend;
import java.util.LinkedList;
import java.util.List;
import edu.stanford.nlp.ling.IndexedWord;
/**
* A linguistic predicate
*/
public class Predicate {
/**
* The root of the predicate
*/
IndexedWord root;
/**
* The list of adverbs
*/
List<Adverb> adverbs = new LinkedList<Adverb>();
/**
* The predicate is existential. Essentially, this means that the sentence is declaring the existence of the subject.
*/
boolean existential;
/**
* Constructor
* @param root The root of the predicate
*/
public Predicate(IndexedWord root){
this.root = root;
}
/**
* Gets the root of this predicate
* @return The root
*/
public IndexedWord getRoot(){
return root;
}
/**
* Adds an adverb to the predicate
* @param adverb The adverb
*/
public void addAdverb(Adverb adverb){
this.adverbs.add(adverb);
}
/**
* Gets the adverbs modifying this predicate
* @return The list of adverbs
*/
public List<Adverb> getAdverbs(){
return adverbs;
}
/**
* Sets the existential status of the predicate
* @param existential true if existential, false otherwise
*/
public void setExistential(boolean existential){
this.existential = existential;
}
/**
* Checks if this is an existential predicate or not
* @return true if is existential, false otherwise
*/
public boolean isExistential(){
return existential;
}
}

View File

@ -0,0 +1,72 @@
package org.studiorailgun;
import static org.junit.Assert.*;
import org.junit.Test;
import org.studiorailgun.conversation.parser.NLPParser;
import org.studiorailgun.conversation.parser.depend.Clause;
import edu.stanford.nlp.pipeline.CoreDocument;
import edu.stanford.nlp.pipeline.CoreSentence;
import edu.stanford.nlp.semgraph.SemanticGraph;
/**
* Macro structure parsing complex sentence tests
*/
public class ComplexClauseTests {
@Test
public void testMacroStructureParse1(){
NLPParser.init();
CoreDocument doc = NLPParser.parse("The sun lowered, the stars appeared, and the moon rose.");
CoreSentence sentence = doc.sentences().get(0);
SemanticGraph graph = sentence.dependencyParse();
Clause struct = Clause.parse(graph);
//test number of returns
assertNotNull(struct.getPredicate());
assertEquals(struct.getArguments().size(),2);
assertEquals(struct.getClauses().size(), 1);
//test returned data
assertEquals(struct.getPredicate().getRoot().originalText(), "lowered"); //should be copular verb
assertEquals(struct.getArguments().get(0).getRoot().originalText(), "sun"); //should be the subject
}
@Test
public void testMacroStructureParse2(){
NLPParser.init();
CoreDocument doc = NLPParser.parse("Birds fly, but fish swim.");
CoreSentence sentence = doc.sentences().get(0);
SemanticGraph graph = sentence.dependencyParse();
Clause struct = Clause.parse(graph);
//test number of returns
assertNotNull(struct.getPredicate());
assertEquals(struct.getArguments().size(),1);
assertEquals(struct.getClauses().size(), 1);
//test returned data
assertEquals(struct.getPredicate().getRoot().originalText(), "fly"); //should be copular verb
assertEquals(struct.getArguments().get(0).getRoot().originalText(), "Birds"); //should be the subject
}
@Test
public void testMacroStructureParse3(){
NLPParser.init();
CoreDocument doc = NLPParser.parse("She cooked dinner, yet nobody ate it.");
CoreSentence sentence = doc.sentences().get(0);
SemanticGraph graph = sentence.dependencyParse();
Clause struct = Clause.parse(graph);
//test number of returns
assertNotNull(struct.getPredicate());
assertEquals(struct.getArguments().size(),2);
assertEquals(struct.getClauses().size(), 1);
//test returned data
assertEquals(struct.getPredicate().getRoot().originalText(), "cooked"); //should be copular verb
assertEquals(struct.getArguments().get(0).getRoot().originalText(), "She"); //should be the subject
}
}

View File

@ -0,0 +1,175 @@
package org.studiorailgun;
import static org.junit.Assert.*;
import org.junit.Test;
import org.studiorailgun.conversation.parser.NLPParser;
import org.studiorailgun.conversation.parser.depend.Clause;
import edu.stanford.nlp.pipeline.CoreDocument;
import edu.stanford.nlp.pipeline.CoreSentence;
import edu.stanford.nlp.semgraph.SemanticGraph;
/**
* Macro structure parsing simple sentence tests
*/
public class SimpleClauseTests {
@Test
public void testMacroStructureParse1(){
NLPParser.init();
CoreDocument doc = NLPParser.parse("His name is Bob.");
CoreSentence sentence = doc.sentences().get(0);
SemanticGraph graph = sentence.dependencyParse();
Clause struct = Clause.parse(graph);
//test number of returns
assertNotNull(struct.getPredicate());
assertEquals(struct.getArguments().size(),1);
//test returned data
assertEquals(struct.getPredicate().getRoot().originalText(), "Bob"); //should be copular verb
assertEquals(struct.getArguments().get(0).getRoot().originalText(), "name"); //should be the subject
}
@Test
public void testMacroStructureParse2(){
NLPParser.init();
CoreDocument doc = NLPParser.parse("She ran.");
CoreSentence sentence = doc.sentences().get(0);
SemanticGraph graph = sentence.dependencyParse();
Clause struct = Clause.parse(graph);
//test number of returns
assertNotNull(struct.getPredicate());
assertEquals(struct.getArguments().size(),1);
//test returned data
assertEquals(struct.getPredicate().getRoot().originalText(), "ran"); //should be copular verb
assertEquals(struct.getArguments().get(0).getRoot().originalText(), "She"); //should be the subject
}
@Test
public void testMacroStructureParse3(){
NLPParser.init();
CoreDocument doc = NLPParser.parse("The cat sleeps.");
CoreSentence sentence = doc.sentences().get(0);
SemanticGraph graph = sentence.dependencyParse();
Clause struct = Clause.parse(graph);
//test number of returns
assertNotNull(struct.getPredicate());
assertEquals(struct.getArguments().size(),1);
//test returned data
assertEquals(struct.getPredicate().getRoot().originalText(), "sleeps"); //should be copular verb
assertEquals(struct.getArguments().get(0).getRoot().originalText(), "cat"); //should be the subject
}
@Test
public void testMacroStructureParse4(){
NLPParser.init();
CoreDocument doc = NLPParser.parse("A dog barked.");
CoreSentence sentence = doc.sentences().get(0);
SemanticGraph graph = sentence.dependencyParse();
Clause struct = Clause.parse(graph);
//test number of returns
assertNotNull(struct.getPredicate());
assertEquals(struct.getArguments().size(),1);
//test returned data
assertEquals(struct.getPredicate().getRoot().originalText(), "barked"); //should be copular verb
assertEquals(struct.getArguments().get(0).getRoot().originalText(), "dog"); //should be the subject
}
@Test
public void testMacroStructureParse5(){
NLPParser.init();
CoreDocument doc = NLPParser.parse("Colorful birds fly.");
CoreSentence sentence = doc.sentences().get(0);
SemanticGraph graph = sentence.dependencyParse();
Clause struct = Clause.parse(graph);
//test number of returns
assertNotNull(struct.getPredicate());
assertEquals(struct.getArguments().size(),1);
//test returned data
assertEquals(struct.getPredicate().getRoot().originalText(), "fly"); //should be copular verb
assertEquals(struct.getArguments().get(0).getRoot().originalText(), "birds"); //should be the subject
}
@Test
public void testMacroStructureParse6(){
NLPParser.init();
CoreDocument doc = NLPParser.parse("The cat sleeps on the mat.");
CoreSentence sentence = doc.sentences().get(0);
SemanticGraph graph = sentence.dependencyParse();
Clause struct = Clause.parse(graph);
//test number of returns
assertNotNull(struct.getPredicate());
assertEquals(struct.getArguments().size(),1);
assertEquals(struct.getAdjuncts().size(),1);
//test returned data
assertEquals(struct.getPredicate().getRoot().originalText(), "sleeps"); //should be copular verb
assertEquals(struct.getArguments().get(0).getRoot().originalText(), "cat"); //should be the subject
assertEquals(struct.getAdjuncts().get(0).getRoot().originalText(),"mat");
}
@Test
public void testMacroStructureParse7(){
NLPParser.init();
CoreDocument doc = NLPParser.parse("Guys, take it easy.");
CoreSentence sentence = doc.sentences().get(0);
SemanticGraph graph = sentence.dependencyParse();
Clause struct = Clause.parse(graph);
//test number of returns
assertNotNull(struct.getPredicate());
assertEquals(struct.getArguments().size(),2);
//test returned data
assertEquals(struct.getPredicate().getRoot().originalText(), "take"); //should be copular verb
assertEquals(struct.getArguments().get(0).getRoot().originalText(), "Guys"); //should be the subject
assertEquals(struct.getPredicate().getAdverbs().get(0).getRoot().originalText(), "easy"); //should be the adverb
}
@Test
public void testMacroStructureParse8(){
NLPParser.init();
CoreDocument doc = NLPParser.parse("He dropped her the ball.");
CoreSentence sentence = doc.sentences().get(0);
SemanticGraph graph = sentence.dependencyParse();
Clause struct = Clause.parse(graph);
//test number of returns
assertNotNull(struct.getPredicate());
assertEquals(struct.getArguments().size(),3);
//test returned data
assertEquals(struct.getPredicate().getRoot().originalText(), "dropped"); //should be copular verb
assertEquals(struct.getArguments().get(0).getRoot().originalText(), "He"); //should be the subject
}
@Test
public void testMacroStructureParse9(){
NLPParser.init();
CoreDocument doc = NLPParser.parse("There is a ball below.");
CoreSentence sentence = doc.sentences().get(0);
SemanticGraph graph = sentence.dependencyParse();
Clause struct = Clause.parse(graph);
//test number of returns
assertNotNull(struct.getPredicate());
assertEquals(struct.getArguments().size(),1);
//test returned data
assertEquals(struct.getPredicate().getRoot().originalText(), "is"); //should be copular verb
assertEquals(struct.getPredicate().isExistential(), true); //should be copular verb
assertEquals(struct.getArguments().get(0).getRoot().originalText(), "ball"); //should be the subject
}
}