Spaces:

andy7475
/

english_place_name_generator

Sleeping

App Files Files Community

Andy7475 commited on Oct 20, 2023

Commit

3f3286b

1 Parent(s): e7be7ab

new function

Browse files

Files changed (6) hide show

20230909_english_place_names.keras +0 -0
app.py +114 -4
df_training_data.pkl +3 -0
model_config.json +1 -0
requirements.txt +4 -0
word generator.ipynb +0 -0

20230909_english_place_names.keras ADDED Viewed

Binary file (921 kB). View file

app.py CHANGED Viewed

@@ -1,12 +1,122 @@
 import streamlit as st
-def generate_word(input_text):
-    return 'Hello World ' + input_text
 st.title('HuggingFace Space')
-user_input = st.text_input("Enter some text")
 if st.button('Generate'):
-    result = generate_word(user_input)
     st.write(result)

 import streamlit as st
+from tensorflow.keras.models import Sequential, load_model
+import pandas as pd
+import json
+import numpy as np
+def generate_words(
+    model,
+    vocab_size,
+    max_len,
+    idx_to_char,
+    char_to_idx,
+    number=1,
+    temperature=1,
+    seed_word=None,
+):
+    """takes the model and generates words based on softmax output for each character, it will run through the model for
+    every character in the sequence and randomly sample from the character probabilities (not the max probability) this means
+    we get variable words each time"""
+    seed_word_original = seed_word
+    def generate_word(seed_word, i=0):
+        def adjust_temperature(predictions, temperature):
+            predictions = np.log(predictions) / temperature
+            exp_preds = np.exp(predictions)
+            adjusted_preds = exp_preds / np.sum(exp_preds)
+            return adjusted_preds
+        def next_char(preds):
+            next_idx = np.random.choice(range(vocab_size), p=preds.ravel())
+            # next_idx = np.argmax(preds)
+            char = idx_to_char[next_idx]
+            return char
+        def word_to_input(word: str):
+            """takes a string and turns it into a sequence matrix"""
+            x_pred = np.zeros((1, max_len, vocab_size))
+            for t, char in enumerate(word):
+                x_pred[0, t, char_to_idx[char]] = 1.0
+            return x_pred
+        if len(seed_word) == max_len:
+            return seed_word
+        x_input = word_to_input(seed_word)
+        preds = model.predict(x_input, verbose=False)
+        if temperature != 1:
+            preds = adjust_temperature(preds, temperature)
+        char = next_char(preds)
+        i += 1
+        # print(seed_word, char, i)
+        if char == "\n":
+            return seed_word
+        else:
+            return generate_word(seed_word + char, i)
+    output = []
+    print("generating words")
+    for i in range(number):
+        if seed_word is None:
+            seed_word = idx_to_char[np.random.choice(np.arange(2, len(char_to_idx)))]
+        word = generate_word(seed_word)
+        output.append(word)
+        seed_word = seed_word_original
+    return output
+def save_dict_as_json(dictionary, filename):
+    """
+    Save a dictionary as a JSON file.
+    Args:
+        dictionary (dict): The dictionary to be saved as JSON.
+        filename (str): The name of the JSON file to save the dictionary to.
+    """
+    with open(filename, "w") as json_file:
+        json.dump(dictionary, json_file)
+def load_json_as_dict(filename):
+    """
+    Load a JSON file and return its contents as a dictionary.
+    Args:
+        filename (str): The name of the JSON file to load.
+    Returns:
+        dict: The dictionary loaded from the JSON file.
+    """
+    with open(filename, 'r') as json_file:
+        data = json.load(json_file)
+    return data
+#load model
+best_model = load_model("20230909_english_place_names.keras")
+#load config
+model_config = load_json_as_dict("model_config.json")
+#print(model_config)
+#load training data checker
+df_check = pd.read_pickle("df_training_data.pkl")
+#print(df_check[0:10])
 st.title('HuggingFace Space')
+user_input = st.text_input("Enter a letter")
 if st.button('Generate'):
+    user_input = user_input.lower()
+    result = generate_words(
+        model=best_model,
+        vocab_size=model_config["vocab_size"],
+        max_len = model_config["max_len"],
+        idx_to_char=model_config["idx_to_char"],
+        char_to_idx=model_config["char_to_idx"],
+        number = 1,
+        temperature=1,
+        seed_word=user_input)
+    if result in df_check.index:
+        result = result + " (is found in training data)"
     st.write(result)

df_training_data.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5e7f9913224ee50f483fac00ed36ea1b4886dcf22f640569403459d3a543493c
+size 1210025

model_config.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"vocab": ["\n", " ", "!", "&", "'", "(", ")", ",", "-", ".", "/", ":", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"], "vocab_size": 38, "char_to_idx": {"\n": 0, " ": 1, "!": 2, "&": 3, "'": 4, "(": 5, ")": 6, ",": 7, "-": 8, ".": 9, "/": 10, ":": 11, "a": 12, "b": 13, "c": 14, "d": 15, "e": 16, "f": 17, "g": 18, "h": 19, "i": 20, "j": 21, "k": 22, "l": 23, "m": 24, "n": 25, "o": 26, "p": 27, "q": 28, "r": 29, "s": 30, "t": 31, "u": 32, "v": 33, "w": 34, "x": 35, "y": 36, "z": 37}, "idx_to_char": {"0": "\n", "1": " ", "2": "!", "3": "&", "4": "'", "5": "(", "6": ")", "7": ",", "8": "-", "9": ".", "10": "/", "11": ":", "12": "a", "13": "b", "14": "c", "15": "d", "16": "e", "17": "f", "18": "g", "19": "h", "20": "i", "21": "j", "22": "k", "23": "l", "24": "m", "25": "n", "26": "o", "27": "p", "28": "q", "29": "r", "30": "s", "31": "t", "32": "u", "33": "v", "34": "w", "35": "x", "36": "y", "37": "z"}, "max_len": 25}

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+tensorflow
+numpy
+json
+pandas

word generator.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff