Kc-12 commited on
Commit
8bd777c
·
1 Parent(s): 7461558

Final (hopefully) spacing and text adjustments

Browse files
Files changed (1) hide show
  1. app.py +12 -8
app.py CHANGED
@@ -15,11 +15,11 @@ def main():
15
 
16
  st.header("Data and Training")
17
 
18
- st.markdown("""We used the dataset from the [TinyStories Research Paper](https://arxiv.org/pdf/2305.07759.pdf) (Ronen Eldan and Yuanzhi Li, Microsoft),
19
  which consists of 2.1 million synthetic short children's stories generated by GPT-4, to train a Transformer LLM that we built from scratch in PyTorch.""")
20
- st.markdown("""Our final model uses EleutherAI's [gpt-neo-1.3B tokenizer](https://huggingface.co/EleutherAI/gpt-neo-1.3B) (vocab size 50,257) and consists of 8 transformer blocks,
21
- 16 attention heads, and an embedding dimension of 768, for a total of ~56M non-embedding parameters. The model was trained on 8 H100 GPUs for ~7 hours, achieving a cross-entropy validation loss of 1.16,
22
- which is superior to any model in the TinyStories paper (likely due to a larger vocab size and far more compute).""")
23
  st.markdown("""Despite the simple themes and limited vocabulary present in the training data, the model is
24
  quite effective at generating new short stories. **Try it out below!**""")
25
 
@@ -43,7 +43,7 @@ def main():
43
  generation_method = "top-k"
44
  specified_k = 5
45
  specified_nucleus = 0.5
46
- specified_temperature = 0.9
47
  max_tokens = 750
48
 
49
  if st.checkbox("Show Advanced Settings"):
@@ -57,7 +57,7 @@ def main():
57
  specified_nucleus = st.number_input("Nucleus Cutoff:", value = 0.5, step = 0.05, min_value = 0.0, max_value = 1.0)
58
 
59
  if generation_method == "temperature":
60
- specified_temperature = st.number_input("Value for temperature:", value = 0.9, step = 0.05, min_value = 0.0, max_value = 1.0)
61
 
62
  max_tokens = st.slider('Max Tokens Generated:', 50, 750, 750)
63
 
@@ -88,12 +88,16 @@ def main():
88
  result = generate(model, tokenizer, device, method=generation_method, k=specified_k,
89
  p_nucleus=specified_nucleus, temp=specified_temperature, max_new_tokens=max_tokens,
90
  cond=user_input, deterministic=user_seed)
 
 
 
 
91
 
92
  streamed_input = ""
93
  for word in user_input.split(' '):
94
  streamed_input += word
95
  with placeholder.container():
96
- st.markdown(f"**{streamed_input}** \n\n\n\n")
97
  streamed_input += " "
98
  time.sleep(0.1)
99
 
@@ -108,7 +112,7 @@ def main():
108
  for word in result.split(' '):
109
  streamed_result += word + ' '
110
  with placeholder.container():
111
- st.markdown(f"{streamed_result} \n\n\n\n")
112
  time.sleep(0.1)
113
  if st.button('Clear Output'):
114
  placeholder = st.empty()
 
15
 
16
  st.header("Data and Training")
17
 
18
+ st.markdown("""We used the dataset from Microsoft Research's [TinyStories Paper](https://arxiv.org/pdf/2305.07759.pdf) (Eldan and Li),
19
  which consists of 2.1 million synthetic short children's stories generated by GPT-4, to train a Transformer LLM that we built from scratch in PyTorch.""")
20
+ st.markdown("""Our model uses EleutherAI's [gpt-neo-1.3B tokenizer](https://huggingface.co/EleutherAI/gpt-neo-1.3B) (vocab size 50,257) and consists of 8 transformer blocks,
21
+ 16 attention heads, and an embedding dimension of 768, for a total of ~56M non-embedding parameters. The model was trained on 8 H100 GPUs for 7 hours, achieving a cross-entropy validation loss of 1.16,
22
+ which is superior to all models in the TinyStories paper (likely due to a larger vocab size and far more compute).""")
23
  st.markdown("""Despite the simple themes and limited vocabulary present in the training data, the model is
24
  quite effective at generating new short stories. **Try it out below!**""")
25
 
 
43
  generation_method = "top-k"
44
  specified_k = 5
45
  specified_nucleus = 0.5
46
+ specified_temperature = 0.4
47
  max_tokens = 750
48
 
49
  if st.checkbox("Show Advanced Settings"):
 
57
  specified_nucleus = st.number_input("Nucleus Cutoff:", value = 0.5, step = 0.05, min_value = 0.0, max_value = 1.0)
58
 
59
  if generation_method == "temperature":
60
+ specified_temperature = st.number_input("Value for temperature:", value = 0.4, step = 0.05, min_value = 0.0, max_value = 1.0)
61
 
62
  max_tokens = st.slider('Max Tokens Generated:', 50, 750, 750)
63
 
 
88
  result = generate(model, tokenizer, device, method=generation_method, k=specified_k,
89
  p_nucleus=specified_nucleus, temp=specified_temperature, max_new_tokens=max_tokens,
90
  cond=user_input, deterministic=user_seed)
91
+ st.markdown("\n")
92
+ st.markdown("\n")
93
+ st.markdown("\n")
94
+ st.markdown("\n")
95
 
96
  streamed_input = ""
97
  for word in user_input.split(' '):
98
  streamed_input += word
99
  with placeholder.container():
100
+ st.markdown(f"**{streamed_input}**")
101
  streamed_input += " "
102
  time.sleep(0.1)
103
 
 
112
  for word in result.split(' '):
113
  streamed_result += word + ' '
114
  with placeholder.container():
115
+ st.markdown(f"{streamed_result}")
116
  time.sleep(0.1)
117
  if st.button('Clear Output'):
118
  placeholder = st.empty()