henryholloway commited on
Commit
b78a659
1 Parent(s): 8709338

Porting over notebook file

Browse files
Files changed (1) hide show
  1. app.py +55 -2
app.py CHANGED
@@ -1,4 +1,57 @@
1
  import streamlit as st
 
 
 
2
 
3
- x = st.slider('Select a value')
4
- st.write(x, 'squared is', x * x)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import random
3
+ from tensorflow.keras.preprocessing.text import Tokenizer
4
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
5
 
6
+ st.title("Addition Equation Generator")
7
+
8
+ # Sidebar for user input
9
+ num_samples = st.sidebar.number_input("Number of Samples", min_value=100, max_value=100000, value=5000)
10
+ max_num = st.sidebar.slider("Maximum Number for Addition", min_value=10, max_value=100, value=99)
11
+
12
+ # Function to generate addition data
13
+ def generate_addition_data(num_samples, max_num, stop_token=';'):
14
+ input_equations = []
15
+ answers = []
16
+ for _ in range(num_samples):
17
+ a = random.randint(0, max_num)
18
+ b = random.randint(0, max_num)
19
+ input_eq = f"{a} + {b} ="
20
+ answer = str(a + b) + stop_token
21
+ input_equations.append(input_eq)
22
+ answers.append(answer)
23
+ return input_equations, answers
24
+
25
+ # Button to generate and process data
26
+ if st.button('Generate and Process Data'):
27
+ input_equations, answers = generate_addition_data(num_samples, max_num)
28
+
29
+ # Display some sample data
30
+ st.write("Sample Generated Data:")
31
+ for i in range(min(5, len(input_equations))):
32
+ st.write(f"Input Equation: {input_equations[i]}")
33
+ st.write(f"Answer: {answers[i]}")
34
+
35
+ # Tokenization
36
+ tokenizer = Tokenizer(char_level=True)
37
+ tokenizer.fit_on_texts(input_equations + answers)
38
+ input_sequences = tokenizer.texts_to_sequences(input_equations)
39
+ answer_sequences = tokenizer.texts_to_sequences(answers)
40
+
41
+ # Padding sequences
42
+ max_len = max(max([len(seq) for seq in input_sequences]), max([len(seq) for seq in answer_sequences]))
43
+ input_sequences_padded = pad_sequences(input_sequences, maxlen=max_len, padding='post')
44
+ answer_sequences_padded = pad_sequences(answer_sequences, maxlen=max_len, padding='post')
45
+
46
+ # Display tokenization and padding results
47
+ st.write("Tokenization and Padding Results:")
48
+ for i in range(min(5, len(input_equations))):
49
+ st.write(f"Input Equation: {input_equations[i]}")
50
+ st.write(f"Tokenized Input Sequence: {input_sequences[i]}")
51
+ st.write(f"Padded Input Sequence: {input_sequences_padded[i]}")
52
+ st.write(f"Answer: {answers[i]}")
53
+ st.write(f"Tokenized Answer Sequence: {answer_sequences[i]}")
54
+ st.write(f"Padded Answer Sequence: {answer_sequences_padded[i]}")
55
+
56
+ # Instruction to run the app
57
+ st.write("Run the app with `streamlit run <script_name>.py` in your terminal.")