Spaces:
Runtime error
Runtime error
Steven Zhang
commited on
Commit
•
21c0ae2
1
Parent(s):
db80ce1
autocorrect merged, finished training spanish
Browse files
.idea/2022-summer-speech-translation.iml
CHANGED
@@ -1,7 +1,9 @@
|
|
1 |
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
<module type="PYTHON_MODULE" version="4">
|
3 |
<component name="NewModuleRootManager">
|
4 |
-
<content url="file://$MODULE_DIR$"
|
|
|
|
|
5 |
<orderEntry type="inheritedJdk" />
|
6 |
<orderEntry type="sourceFolder" forTests="false" />
|
7 |
</component>
|
1 |
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
<module type="PYTHON_MODULE" version="4">
|
3 |
<component name="NewModuleRootManager">
|
4 |
+
<content url="file://$MODULE_DIR$">
|
5 |
+
<excludeFolder url="file://$MODULE_DIR$/EngToSpanishckpts" />
|
6 |
+
</content>
|
7 |
<orderEntry type="inheritedJdk" />
|
8 |
<orderEntry type="sourceFolder" forTests="false" />
|
9 |
</component>
|
Autocorrect/autocorrectreal.py
CHANGED
@@ -7,11 +7,6 @@ Original file is located at
|
|
7 |
https://colab.research.google.com/drive/1aH5mYp1dxyn55XMjtVUllBvg37nqGVir
|
8 |
"""
|
9 |
|
10 |
-
from google.colab import drive
|
11 |
-
drive.mount('/content/drive')
|
12 |
-
|
13 |
-
!pip install textdistance
|
14 |
-
|
15 |
import re
|
16 |
from collections import Counter
|
17 |
import numpy as np
|
@@ -19,13 +14,11 @@ import pandas as pd
|
|
19 |
import textdistance
|
20 |
|
21 |
w = []
|
22 |
-
with open('/
|
23 |
file_name_data = f.read()
|
24 |
file_name_data = file_name_data.lower()
|
25 |
w = re.findall('\w+', file_name_data)
|
26 |
|
27 |
-
print(f"First 10 words: \n{w[0:10]}")
|
28 |
-
print(f"{len(w)} total words ")
|
29 |
|
30 |
from nltk.metrics.distance import edit_distance
|
31 |
def edit(input_sentence):
|
@@ -40,8 +33,4 @@ def edit(input_sentence):
|
|
40 |
sentence[sentence.index(i)] = closest[1]
|
41 |
output_sentence = ' '.join(sentence)
|
42 |
|
43 |
-
return output_sentence
|
44 |
-
|
45 |
-
print(edit("My namee is uncele sdtevven"))
|
46 |
-
print(edit("moneeyeh is greeat"))
|
47 |
-
print(edit("establishe that nitrgen is theh essentil vegchtable as of animal living matter"))
|
7 |
https://colab.research.google.com/drive/1aH5mYp1dxyn55XMjtVUllBvg37nqGVir
|
8 |
"""
|
9 |
|
|
|
|
|
|
|
|
|
|
|
10 |
import re
|
11 |
from collections import Counter
|
12 |
import numpy as np
|
14 |
import textdistance
|
15 |
|
16 |
w = []
|
17 |
+
with open('Autocorrect/words.txt', 'r') as f:
|
18 |
file_name_data = f.read()
|
19 |
file_name_data = file_name_data.lower()
|
20 |
w = re.findall('\w+', file_name_data)
|
21 |
|
|
|
|
|
22 |
|
23 |
from nltk.metrics.distance import edit_distance
|
24 |
def edit(input_sentence):
|
33 |
sentence[sentence.index(i)] = closest[1]
|
34 |
output_sentence = ' '.join(sentence)
|
35 |
|
36 |
+
return output_sentence
|
|
|
|
|
|
|
|
TestTranslation/translation.py
CHANGED
@@ -251,9 +251,10 @@ transformer = keras.Model(
|
|
251 |
|
252 |
transformer.summary()
|
253 |
|
254 |
-
#load weights using gdown
|
|
|
255 |
gdown.download_folder("https://drive.google.com/drive/folders/1DwN-MlL6MMh7qVJbwoLrWBSMVBN5zbBi")
|
256 |
-
transformer.load_weights("./EngToSpanishckpts/cp.ckpt")
|
257 |
|
258 |
spa_vocab = spa_vectorization.get_vocabulary()
|
259 |
spa_index_lookup = dict(zip(range(len(spa_vocab)), spa_vocab))
|
@@ -275,6 +276,8 @@ def decode_sequence(input_sentence):
|
|
275 |
break
|
276 |
return decoded_sentence
|
277 |
|
278 |
-
|
|
|
|
|
279 |
|
280 |
|
251 |
|
252 |
transformer.summary()
|
253 |
|
254 |
+
# load weights using gdown
|
255 |
+
print(os.listdir())
|
256 |
gdown.download_folder("https://drive.google.com/drive/folders/1DwN-MlL6MMh7qVJbwoLrWBSMVBN5zbBi")
|
257 |
+
transformer.load_weights("./EngToSpanishckpts/cp.ckpt")
|
258 |
|
259 |
spa_vocab = spa_vectorization.get_vocabulary()
|
260 |
spa_index_lookup = dict(zip(range(len(spa_vocab)), spa_vocab))
|
276 |
break
|
277 |
return decoded_sentence
|
278 |
|
279 |
+
transformer.compile(
|
280 |
+
"rmsprop", loss="sparse_categorical_crossentropy"
|
281 |
+
)
|
282 |
|
283 |
|
TestTranslation/translation_test.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
from TestTranslation.translation import *
|
2 |
|
|
|
3 |
|
4 |
test_eng_texts = [pair[0] for pair in test_pairs]
|
5 |
input_sentence = "This is a test."
|
1 |
from TestTranslation.translation import *
|
2 |
|
3 |
+
transformer.evaluate(train_ds)
|
4 |
|
5 |
test_eng_texts = [pair[0] for pair in test_pairs]
|
6 |
input_sentence = "This is a test."
|
TestTranslation/translation_train.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
from translation import *
|
2 |
# steven's addition: saving checkpoints
|
3 |
-
checkpoint_path = "
|
4 |
checkpoint_dir = os.path.dirname(checkpoint_path)
|
5 |
|
6 |
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
|
@@ -8,7 +8,5 @@ cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
|
|
8 |
verbose=1)
|
9 |
|
10 |
epochs = 20 # This should be at least 30 for convergence
|
11 |
-
|
12 |
-
"rmsprop", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
|
13 |
-
)
|
14 |
transformer.fit(train_ds, epochs=epochs, validation_data=val_ds, callbacks=[cp_callback])
|
1 |
from translation import *
|
2 |
# steven's addition: saving checkpoints
|
3 |
+
checkpoint_path = "./EngToSpanishckpts/cp.ckpt"
|
4 |
checkpoint_dir = os.path.dirname(checkpoint_path)
|
5 |
|
6 |
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
|
8 |
verbose=1)
|
9 |
|
10 |
epochs = 20 # This should be at least 30 for convergence
|
11 |
+
|
|
|
|
|
12 |
transformer.fit(train_ds, epochs=epochs, validation_data=val_ds, callbacks=[cp_callback])
|
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
# have to run this locally as streamlit run app.py
|
2 |
import streamlit as st
|
3 |
-
|
4 |
from TestTranslation.translation import *
|
5 |
|
6 |
|
@@ -11,7 +11,9 @@ option = st.selectbox("Select input type:", ("text input", "audio input"))
|
|
11 |
if option == "text input":
|
12 |
input_sentence = st.text_input("Enter input sentence:")
|
13 |
if input_sentence is not None and len(input_sentence) > 0:
|
14 |
-
|
|
|
|
|
15 |
st.write(translated)
|
16 |
input_sentence = None
|
17 |
else:
|
1 |
# have to run this locally as streamlit run app.py
|
2 |
import streamlit as st
|
3 |
+
from Autocorrect.autocorrectreal import edit
|
4 |
from TestTranslation.translation import *
|
5 |
|
6 |
|
11 |
if option == "text input":
|
12 |
input_sentence = st.text_input("Enter input sentence:")
|
13 |
if input_sentence is not None and len(input_sentence) > 0:
|
14 |
+
edited = edit(input_sentence)
|
15 |
+
st.write("Autocorrected sentence: " + edited)
|
16 |
+
translated = decode_sequence(edited)[8:-5]
|
17 |
st.write(translated)
|
18 |
input_sentence = None
|
19 |
else:
|