Kc-12 commited on
Commit
de247ac
β€’
1 Parent(s): 66f6dbb

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -16
app.py CHANGED
@@ -9,34 +9,36 @@ def main():
9
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
10
  tokenizer, EMPTY_TOKENS = load_tokenizer(device)
11
 
12
- st.title("Short Story Transformer Demo")
13
- st.subheader("UCLA DSU Project, Fall 2023")
14
- st.markdown("By Daniel Mendelevitch, Terry Ming, Casey Tattersall, Sean Tjoa")
15
 
16
- st.header("Data and Training")
17
 
18
  st.markdown("""We used the dataset from Microsoft Research's [TinyStories Paper](https://arxiv.org/pdf/2305.07759.pdf) (Eldan and Li),
19
- which consists of 2.1 million synthetic short children's stories generated by GPT-4, to train a Transformer LLM that we built from scratch in PyTorch.""")
20
  st.markdown("""Our model uses EleutherAI's [gpt-neo-1.3B tokenizer](https://huggingface.co/EleutherAI/gpt-neo-1.3B) (vocab size 50,257) and consists of 8 transformer blocks,
21
- 16 attention heads, and an embedding dimension of 768, for a total of ~56M non-embedding parameters. The model was trained on 8 H100 GPUs for 7 hours, achieving a cross-entropy validation loss of 1.16,
22
- which is superior to all models in the TinyStories paper (likely due to a larger vocab size and far more compute).""")
23
  st.markdown("""Despite the simple themes and limited vocabulary present in the training data, the model is
24
  quite effective at generating new short stories. **Try it out below!**""")
25
 
26
- st.header("Prompting Tips")
27
  st.markdown(
28
- "The model can struggle with some prompts, especially those outside of its limited domain. If a response isn't satisfactory, try repeating the generation, or make the following modifications:"
 
 
 
 
29
  )
30
  st.markdown(
31
  """
32
- - Use simple vocabulary - words and themes that would appear in a children's story.
33
- - Avoid using idioms - for example, instead of "hit the gym", say "went to the gym".
34
- - Include plenty of descriptive adjectives.
35
- - The model often struggles with names. **Using common names and sticking with first names only can help.**
36
  """
37
  )
38
 
39
- st.header("Let's make some stories! πŸ“–")
40
 
41
  # Input from user
42
  user_input = st.text_input("Enter your prompt:", placeholder="Write a prompt to make a story of your own, or leave it empty for a random story!").strip()
@@ -71,8 +73,8 @@ def main():
71
  # model_version = st.radio("Which model would you like to use?", ["smoll", "beeg"])
72
  # small_model = load_casey_model(tokenizer, device)
73
  model = load_big_model(tokenizer, device)
74
- model.to('cuda')
75
- model.cuda()
76
 
77
 
78
  if st.button('Write my story!'):
@@ -121,6 +123,7 @@ def main():
121
  placeholder = st.empty()
122
 
123
  st.markdown('####')
 
124
  st.caption(r'Data Attribution: Tinystories (License: CDLA-Sharing-1.0) https://arxiv.org/abs/2305.07759')
125
 
126
 
 
9
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
10
  tokenizer, EMPTY_TOKENS = load_tokenizer(device)
11
 
12
+ st.title("TinyStories Transformer LLM Demo")
 
 
13
 
14
+ st.subheader("Data and Training")
15
 
16
  st.markdown("""We used the dataset from Microsoft Research's [TinyStories Paper](https://arxiv.org/pdf/2305.07759.pdf) (Eldan and Li),
17
+ which consists of 2.1 million synthetic short children's stories generated by GPT-4, to train a PyTorch Transformer LLM.""")
18
  st.markdown("""Our model uses EleutherAI's [gpt-neo-1.3B tokenizer](https://huggingface.co/EleutherAI/gpt-neo-1.3B) (vocab size 50,257) and consists of 8 transformer blocks,
19
+ 16 attention heads, and an embedding dimension of 768, for a total of ~56M non-embedding parameters. The model was trained overnight on 8 H100 GPUs, achieving a lower cross-entropy
20
+ validation loss than any of the models in the TinyStories paper (likely due to a larger vocab size).""")
21
  st.markdown("""Despite the simple themes and limited vocabulary present in the training data, the model is
22
  quite effective at generating new short stories. **Try it out below!**""")
23
 
24
+ st.subheader("How Do I Prompt?")
25
  st.markdown(
26
+ """
27
+ Instead of generating a new story from scratch, you can "prompt" the model by writing the first few words or sentences of a story, and let it finish from there. It can even jump in mid-sentence!
28
+
29
+ The model can struggle with some prompts, especially those outside of its limited domain. If a response isn't satisfactory, try repeating the generation, or make the following modifications:
30
+ """
31
  )
32
  st.markdown(
33
  """
34
+ - **Use simple vocabulary and syntax** - words, structures, and themes you'd see in a children's story.
35
+ - Use common first names only - the model can struggle with longer or uncommon names.
36
+
37
+ `SAMPLE PROMPT: One day, Timmy and Lily were playing at the park. They decided to`
38
  """
39
  )
40
 
41
+ st.subheader("Let's make some stories! πŸ“–")
42
 
43
  # Input from user
44
  user_input = st.text_input("Enter your prompt:", placeholder="Write a prompt to make a story of your own, or leave it empty for a random story!").strip()
 
73
  # model_version = st.radio("Which model would you like to use?", ["smoll", "beeg"])
74
  # small_model = load_casey_model(tokenizer, device)
75
  model = load_big_model(tokenizer, device)
76
+ #model.to('cuda')
77
+ #model.cuda()
78
 
79
 
80
  if st.button('Write my story!'):
 
123
  placeholder = st.empty()
124
 
125
  st.markdown('####')
126
+ st.caption('UCLA DSU Project Fall 2023: Daniel Mendelevitch, Terry Ming, Casey Tattersall, Sean Tjoa')
127
  st.caption(r'Data Attribution: Tinystories (License: CDLA-Sharing-1.0) https://arxiv.org/abs/2305.07759')
128
 
129