Andy7475 commited on
Commit
3f3286b
·
1 Parent(s): e7be7ab

new function

Browse files
20230909_english_place_names.keras ADDED
Binary file (921 kB). View file
 
app.py CHANGED
@@ -1,12 +1,122 @@
1
  import streamlit as st
 
 
 
 
2
 
3
- def generate_word(input_text):
4
- return 'Hello World ' + input_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  st.title('HuggingFace Space')
7
 
8
- user_input = st.text_input("Enter some text")
9
 
10
  if st.button('Generate'):
11
- result = generate_word(user_input)
 
 
 
 
 
 
 
 
 
 
 
 
12
  st.write(result)
 
1
  import streamlit as st
2
+ from tensorflow.keras.models import Sequential, load_model
3
+ import pandas as pd
4
+ import json
5
+ import numpy as np
6
 
7
+ def generate_words(
8
+ model,
9
+ vocab_size,
10
+ max_len,
11
+ idx_to_char,
12
+ char_to_idx,
13
+ number=1,
14
+ temperature=1,
15
+ seed_word=None,
16
+ ):
17
+ """takes the model and generates words based on softmax output for each character, it will run through the model for
18
+ every character in the sequence and randomly sample from the character probabilities (not the max probability) this means
19
+ we get variable words each time"""
20
+ seed_word_original = seed_word
21
+
22
+ def generate_word(seed_word, i=0):
23
+ def adjust_temperature(predictions, temperature):
24
+ predictions = np.log(predictions) / temperature
25
+ exp_preds = np.exp(predictions)
26
+ adjusted_preds = exp_preds / np.sum(exp_preds)
27
+ return adjusted_preds
28
+
29
+ def next_char(preds):
30
+ next_idx = np.random.choice(range(vocab_size), p=preds.ravel())
31
+ # next_idx = np.argmax(preds)
32
+ char = idx_to_char[next_idx]
33
+ return char
34
+
35
+ def word_to_input(word: str):
36
+ """takes a string and turns it into a sequence matrix"""
37
+ x_pred = np.zeros((1, max_len, vocab_size))
38
+ for t, char in enumerate(word):
39
+ x_pred[0, t, char_to_idx[char]] = 1.0
40
+ return x_pred
41
+
42
+ if len(seed_word) == max_len:
43
+ return seed_word
44
+
45
+ x_input = word_to_input(seed_word)
46
+ preds = model.predict(x_input, verbose=False)
47
+ if temperature != 1:
48
+ preds = adjust_temperature(preds, temperature)
49
+ char = next_char(preds)
50
+ i += 1
51
+ # print(seed_word, char, i)
52
+
53
+ if char == "\n":
54
+ return seed_word
55
+ else:
56
+ return generate_word(seed_word + char, i)
57
+
58
+ output = []
59
+ print("generating words")
60
+ for i in range(number):
61
+ if seed_word is None:
62
+ seed_word = idx_to_char[np.random.choice(np.arange(2, len(char_to_idx)))]
63
+ word = generate_word(seed_word)
64
+ output.append(word)
65
+ seed_word = seed_word_original
66
+ return output
67
+
68
+ def save_dict_as_json(dictionary, filename):
69
+ """
70
+ Save a dictionary as a JSON file.
71
+
72
+ Args:
73
+ dictionary (dict): The dictionary to be saved as JSON.
74
+ filename (str): The name of the JSON file to save the dictionary to.
75
+ """
76
+ with open(filename, "w") as json_file:
77
+ json.dump(dictionary, json_file)
78
+
79
+ def load_json_as_dict(filename):
80
+ """
81
+ Load a JSON file and return its contents as a dictionary.
82
+
83
+ Args:
84
+ filename (str): The name of the JSON file to load.
85
+
86
+ Returns:
87
+ dict: The dictionary loaded from the JSON file.
88
+ """
89
+ with open(filename, 'r') as json_file:
90
+ data = json.load(json_file)
91
+ return data
92
+
93
+
94
+ #load model
95
+ best_model = load_model("20230909_english_place_names.keras")
96
+
97
+ #load config
98
+ model_config = load_json_as_dict("model_config.json")
99
+ #print(model_config)
100
+ #load training data checker
101
+ df_check = pd.read_pickle("df_training_data.pkl")
102
+ #print(df_check[0:10])
103
 
104
  st.title('HuggingFace Space')
105
 
106
+ user_input = st.text_input("Enter a letter")
107
 
108
  if st.button('Generate'):
109
+ user_input = user_input.lower()
110
+ result = generate_words(
111
+ model=best_model,
112
+ vocab_size=model_config["vocab_size"],
113
+ max_len = model_config["max_len"],
114
+ idx_to_char=model_config["idx_to_char"],
115
+ char_to_idx=model_config["char_to_idx"],
116
+ number = 1,
117
+ temperature=1,
118
+ seed_word=user_input)
119
+
120
+ if result in df_check.index:
121
+ result = result + " (is found in training data)"
122
  st.write(result)
df_training_data.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e7f9913224ee50f483fac00ed36ea1b4886dcf22f640569403459d3a543493c
3
+ size 1210025
model_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"vocab": ["\n", " ", "!", "&", "'", "(", ")", ",", "-", ".", "/", ":", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"], "vocab_size": 38, "char_to_idx": {"\n": 0, " ": 1, "!": 2, "&": 3, "'": 4, "(": 5, ")": 6, ",": 7, "-": 8, ".": 9, "/": 10, ":": 11, "a": 12, "b": 13, "c": 14, "d": 15, "e": 16, "f": 17, "g": 18, "h": 19, "i": 20, "j": 21, "k": 22, "l": 23, "m": 24, "n": 25, "o": 26, "p": 27, "q": 28, "r": 29, "s": 30, "t": 31, "u": 32, "v": 33, "w": 34, "x": 35, "y": 36, "z": 37}, "idx_to_char": {"0": "\n", "1": " ", "2": "!", "3": "&", "4": "'", "5": "(", "6": ")", "7": ",", "8": "-", "9": ".", "10": "/", "11": ":", "12": "a", "13": "b", "14": "c", "15": "d", "16": "e", "17": "f", "18": "g", "19": "h", "20": "i", "21": "j", "22": "k", "23": "l", "24": "m", "25": "n", "26": "o", "27": "p", "28": "q", "29": "r", "30": "s", "31": "t", "32": "u", "33": "v", "34": "w", "35": "x", "36": "y", "37": "z"}, "max_len": 25}
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ tensorflow
2
+ numpy
3
+ json
4
+ pandas
word generator.ipynb ADDED
The diff for this file is too large to render. See raw diff