sentence func model work

2024-12-30 12:06:24 -05:00 · 2024-12-30 12:06:24 -05:00 · d077d894fb
commit d077d894fb
parent cf3ede4882
7 changed files with 58 additions and 13 deletions
--- a/data/model/sent_func/fingerprint.pb
+++ b/data/model/sent_func/fingerprint.pb
@ -1 +1 @@
-╪МёКУ╘ТВэТН▒║Х∙╫√=╔в╔▓ИКЙG ▄шЁЗаБ÷Т╒(фБЕ╨≥┘юШ2
+ｼⅶ喜矣ﾊﾔﾘﾁ<EFBE98><EFBE81>ﾌ鈺･ﾗ･帝<EFBDA5>G 鼓ｳ愠筺<E684A0>(<28>ﾜｪﾌﾞﾈ<EFBE9E>2
--- a/data/model/sent_func/saved_model.pb
+++ b/data/model/sent_func/saved_model.pb
--- a/data/model/sent_func/variables/variables.data-00000-of-00001
+++ b/data/model/sent_func/variables/variables.data-00000-of-00001
--- a/data/model/sent_func/variables/variables.index
+++ b/data/model/sent_func/variables/variables.index
--- a/data/semantic/sent_func/test.csv
+++ b/data/semantic/sent_func/test.csv
@ -3,4 +3,9 @@
 1,0,0,0,"Hi"
 1,0,0,0,"Howdy"
 0,0,1,0,"What color is your hat?"
-0,0,1,0,"Which color is your hat?"
+0,0,1,0,"Which color is your hat?"
+0,0,1,0,"What is the color is your hat?"
+0,1,0,0,"My hat is Blue"
+0,1,0,0,"My hat is blue"
+0,1,0,0,"Your hat is Blue"
+0,1,0,0,"Your hat is blue"
--- a/data/semantic/sent_func/train.csv
+++ b/data/semantic/sent_func/train.csv
@ -4,4 +4,8 @@
 1,0,0,0,"Howdy"
 0,0,1,0,"What color is your hat?"
 0,0,1,0,"Which color is your hat?"
-0,0,1,0,"What is the color is your hat?"
+0,0,1,0,"What is the color is your hat?"
+0,1,0,0,"My hat is Blue"
+0,1,0,0,"My hat is blue"
+0,1,0,0,"Your hat is Blue"
+0,1,0,0,"Your hat is blue"
--- a/src/main/python/conversation/sentence/function.py
+++ b/src/main/python/conversation/sentence/function.py
@ -27,21 +27,38 @@ from pandas import DataFrame



+#
+#
+# MODEL CONSTANTS
+#
+#

-# Model constants.
+# this is the maximum allowed size of the vocabulary
 max_features: int = 20000
+
+# the dimension of the output from the embedding layer
 embedding_dim: int = 128
-sequence_length: int = 500
+
+# The number of epochs to train for
 epochs: int = 50
+
+# Maximum size of the vocab for this layer
 max_tokens: int = 5000
+
+# (Only valid in INT mode) If set, the output will have its time dimension padded or truncated to exactly output_sequence_length values
 output_sequence_length: int = 4
+
+# The number of classes we're training for
 num_classes: int = 4



+
+#
 #
 # LOAD DATA
 #
+#


 # read training sentences
@ -73,9 +90,11 @@ test_labels: DataFrame = test_csv_raw[["utility", "transfer", "query", "imperati



+#
 #
 # CREATE VECTORIZER
 #
+#


 # init vectorizer
@ -83,7 +102,8 @@ textVec: TextVectorization = TextVectorization(
    max_tokens=max_tokens,
    output_mode='int',
    output_sequence_length=output_sequence_length,
-    pad_to_max_tokens=True)
+    pad_to_max_tokens=True
+)

 # Add the vocab to the tokenizer
 textVec.adapt(vocab) 
@ -92,10 +112,11 @@ train_data: Tensor = textVec.call(input_data)



-
+#
 #
 # CREATE MODEL
 #
+#


 # construct model
@ -103,7 +124,7 @@ model: Sequential = Sequential([
    keras.Input(shape=(1,), dtype=tf.string),
    textVec,
    Embedding(max_features + 1, embedding_dim),
-    LSTM(64),
+    LSTM(128),
    Dense(num_classes, activation='sigmoid')
 ])

@ -113,9 +134,11 @@ model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']



+#
 #
 # TRAIN MODEL
 #
+#

 # Final formatting of data
 npTrainData = train_data_split.to_numpy(dtype=object).flatten()
@ -131,10 +154,11 @@ model.fit(npTrainData,npTrainLabel,epochs=epochs)



-
+#
 #
 # EVALUATE MODEL
 #
+#


 # evaluate here
@ -149,7 +173,13 @@ print("Evaluating..")
 model.evaluate(npTestData,npTestLabel)


-# predict
+
+#
+#
+# PREDICT
+#
+#
+
 # predictTargetRaw: Tensor = tf.constant(['Hello'])
 # npPredict: npt.NDArray = np.array(predictTargetRaw, dtype=object)
 # print("Prediction test..")
@ -162,13 +192,21 @@ model.evaluate(npTestData,npTestLabel)



+#
+#
+# SAVE (DEVELOPMENT) MODEL
+#
+#
+
 # save the model so keras can reload
 # savePath: str = './data/semantic/model.keras'
 # model.save(savePath)


 #
-# SAVE MODEL
+#
+# SAVE (PRODUCTION) MODEL
+#
 #

 # export the model so java can leverage it
@ -176,5 +214,3 @@ print("Saving..")
 exportPath: str = './data/model/sent_func'
 model.export(exportPath)

-# tf.keras.utils.get_file('asdf')
-# asdf: str = 'a'