Kc-12 commited on
Commit
6fd8787
β€’
1 Parent(s): e724aac

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -19
app.py CHANGED
@@ -1,54 +1,54 @@
1
  import streamlit as st
2
  import time
3
- import torch
4
 
5
  from better_transformer import *
6
 
7
-
8
  def main():
9
 
10
  # Enable CUDA if available and load in tokenizer
11
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
12
  tokenizer, EMPTY_TOKENS = load_tokenizer(device)
13
 
14
- st.title("Scaling Transformers")
15
  st.subheader("UCLA DSU Project, Fall 2023")
16
- st.markdown("Daniel Mendelevitch \n Terry Ming \n Casey Tattersall \n Sean Tjoa")
17
 
18
- st.header("What Are Transformers? πŸš—πŸ”„πŸ€–")
19
 
20
- header_text = """A transformer is a specific type of neural network that uses a mechanism called self-attention to learn the context (and
21
- thus meaning) of sequential data. Transformer-based models can be used in many different domains, such as processing language, predicting
22
- the weather, or even generating images. \n\n You might be familiar with ChatGPT, a Transformer-based model which cost over \$100 million to train. \n In contrast, we spent \$40*.
23
- """
24
- st.markdown(header_text)
 
 
25
 
26
  st.header("Let's make some stories! πŸ“–")
27
 
28
  # Input from user
29
- user_input = st.text_input("Enter your prompt:", placeholder="Write a prompt to make a story of your own or leave it empty for a random story!").strip()
30
 
31
  if st.checkbox("Show Prompting Tips"):
32
- st.markdown("Our model was trained on the TinyStories dataset, a collection of synthetic short stories generated by GPT-4. These stories only contain words and themes that a typical 3-4 year old would understand.")
33
  st.markdown(
34
  """
35
  - Use simple vocabulary - words and themes that would appear in a children's story
36
  - Avoid using idioms - for example, instead of "hit the gym", say "went to the gym"
37
  - Include plenty of descriptive adjectives
38
- - The model often struggles with names - using common names and only including a person's first name can help
39
  """
40
  )
41
  ## Default values for advanced settings
42
- user_seed = None # Set to a value if we want to rig the "random" demo
43
  generation_method = "top-k"
44
  specified_k = 5
45
  specified_nucleus = 0.5
46
  specified_temperature = 0.9
47
- max_tokens = 500
48
 
49
  if st.checkbox("Show Advanced Settings"):
50
  user_seed = st.number_input("Randomness Seed:", value = None, step = 1, placeholder="Use to replicate response", min_value = 1)
51
- generation_method = st.selectbox("Method of Generation:", ("top-k", "multinomial", "temperature", "greedy", "nucleus"), index = 0).strip()
52
 
53
  if generation_method == "top-k":
54
  specified_k = st.number_input("Value for k:", value = 5, step = 1)
@@ -59,7 +59,7 @@ def main():
59
  if generation_method == "temperature":
60
  specified_temperature = st.number_input("Value for temperature:", value = 0.9, step = 0.05, min_value = 0.0, max_value = 1.0)
61
 
62
- max_tokens = st.slider('Max Tokens Generated:', 100, 800, 500)
63
 
64
 
65
 
@@ -72,7 +72,6 @@ def main():
72
  model.cuda()
73
 
74
 
75
-
76
  if st.button('Write my story!'):
77
  placeholder = st.empty()
78
  # if model_version == 'smoll':
@@ -114,7 +113,8 @@ def main():
114
  if st.button('Clear Output'):
115
  placeholder = st.empty()
116
 
117
-
 
118
 
119
 
120
  if __name__ == "__main__":
 
1
  import streamlit as st
2
  import time
 
3
 
4
  from better_transformer import *
5
 
 
6
  def main():
7
 
8
  # Enable CUDA if available and load in tokenizer
9
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
10
  tokenizer, EMPTY_TOKENS = load_tokenizer(device)
11
 
12
+ st.title("Short Story Transformer Demo")
13
  st.subheader("UCLA DSU Project, Fall 2023")
14
+ st.markdown("By Daniel Mendelevitch, Terry Ming, Casey Tattersall, Sean Tjoa")
15
 
16
+ st.header("Data and Training")
17
 
18
+ st.markdown("""We used the dataset from the [TinyStories Research Paper](https://arxiv.org/pdf/2305.07759.pdf) (Ronen Eldan and Yuanzhi Li, Microsoft),
19
+ which consists of 2.1 million synthetic short children's stories generated by GPT-4, to train a Transformer LLM that we built from scratch in PyTorch.""")
20
+ st.markdown("""Our final model uses EleutherAI's [gpt-neo-1.3B tokenizer](https://huggingface.co/EleutherAI/gpt-neo-1.3B) (vocab size 50,256) and consists of 8 transformer blocks,
21
+ 16 attention heads, and an embedding dimension of 768, for a total of 133M parameters. The model was trained on 8 H100 GPUs for ~7 hours, and has a cross-entropy validation loss of 1.16,
22
+ which is superior to any model in the TinyStories paper (likely due to a larger vocab size and far more compute).""")
23
+ st.markdown("""Despite the simple themes and limited vocabulary present in the training data, the model is
24
+ quite effective at generating new short stories. **Try it out below!**""")
25
 
26
  st.header("Let's make some stories! πŸ“–")
27
 
28
  # Input from user
29
+ user_input = st.text_input("Enter your prompt:", placeholder="Write a prompt to make a story of your own, or leave it empty for a random story!").strip()
30
 
31
  if st.checkbox("Show Prompting Tips"):
32
+ st.markdown("The model can struggle with some prompts, especially those outside of its limited domain. If a response isn't satisfactory, try repeating the generation, or make the following modifications:")
33
  st.markdown(
34
  """
35
  - Use simple vocabulary - words and themes that would appear in a children's story
36
  - Avoid using idioms - for example, instead of "hit the gym", say "went to the gym"
37
  - Include plenty of descriptive adjectives
38
+ - The model often struggles with names. **Using common names and sticking with first names only can help.**
39
  """
40
  )
41
  ## Default values for advanced settings
42
+ user_seed = None # Remove if we're not rigging the "random" demo
43
  generation_method = "top-k"
44
  specified_k = 5
45
  specified_nucleus = 0.5
46
  specified_temperature = 0.9
47
+ max_tokens = 750
48
 
49
  if st.checkbox("Show Advanced Settings"):
50
  user_seed = st.number_input("Randomness Seed:", value = None, step = 1, placeholder="Use to replicate response", min_value = 1)
51
+ generation_method = st.selectbox("Method of Generation:", ("top-k", "nucleus", "temperature", "multinomial", "greedy"), index = 0).strip()
52
 
53
  if generation_method == "top-k":
54
  specified_k = st.number_input("Value for k:", value = 5, step = 1)
 
59
  if generation_method == "temperature":
60
  specified_temperature = st.number_input("Value for temperature:", value = 0.9, step = 0.05, min_value = 0.0, max_value = 1.0)
61
 
62
+ max_tokens = st.slider('Max Tokens Generated:', 50, 750, 750)
63
 
64
 
65
 
 
72
  model.cuda()
73
 
74
 
 
75
  if st.button('Write my story!'):
76
  placeholder = st.empty()
77
  # if model_version == 'smoll':
 
113
  if st.button('Clear Output'):
114
  placeholder = st.empty()
115
 
116
+ st.markdown('####')
117
+ st.caption(r'Data Attribution: Tinystories (License: CDLA-Sharing-1.0) https://arxiv.org/abs/2305.07759')
118
 
119
 
120
  if __name__ == "__main__":