ncoop57 commited on
Commit
d75a461
·
1 Parent(s): c4d8cff

Add func to clean up the text generated by the model and added link to wiki

Browse files
Files changed (1) hide show
  1. app.py +33 -16
app.py CHANGED
@@ -4,6 +4,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
4
  # model_name = "flax-community/gpt-neo-1.3B-apps-all"
5
  model_name = "flax-community/gpt-neo-125M-apps-all"
6
 
 
7
  @st.cache(allow_output_mutation=True, max_entries=1)
8
  def get_model():
9
  model = AutoModelForCausalLM.from_pretrained(model_name)
@@ -13,16 +14,29 @@ def get_model():
13
 
14
 
15
  def format_input(question, starter_code=""):
16
- answer_type = "\nUse Call-Based format\n" if starter_code else \
17
- "\nUse Standard Input format\n"
 
18
  return f"\nQUESTION:\n{question}\n{starter_code}\n{answer_type}\nANSWER:\n"
19
 
20
 
21
- def generate_solution(model, tokenizer, question, starter_code="", temperature=1.0, num_beams=1):
 
 
 
 
 
 
 
 
 
 
 
 
22
  prompt = format_input(question, starter_code)
23
  input_ids = tokenizer(prompt, return_tensors="pt").input_ids
24
  start = len(input_ids[0])
25
-
26
  output = model.generate(
27
  input_ids,
28
  max_length=start + 150,
@@ -37,8 +51,10 @@ def generate_solution(model, tokenizer, question, starter_code="", temperature=1
37
  repetition_penalty=None,
38
  num_return_sequences=None,
39
  )
 
 
40
 
41
- return tokenizer.decode(output[0][start:], skip_special_tokens=True).strip()
42
 
43
 
44
  _EXAMPLES = [
@@ -76,10 +92,10 @@ def greet(name, owner):
76
  0.8,
77
  ],
78
  ]
 
 
79
  def run():
80
- st.set_page_config(
81
- page_title="Code Clippy Problem Solver"
82
- )
83
  # sidebar
84
  st.sidebar.title("Code Clippy")
85
  st.sidebar.image(
@@ -87,9 +103,10 @@ def run():
87
  caption="(c) awesome Aimee Trevett",
88
  )
89
  st.sidebar.markdown("[Github](https://github.com/ncoop57/gpt-code-clippy)")
90
-
 
91
  st.sidebar.markdown("### Controls:")
92
-
93
  temperature = st.sidebar.slider(
94
  "Temperature",
95
  min_value=0.5,
@@ -113,17 +130,17 @@ def run():
113
  help="Text description of the coding problem to be solved",
114
  )
115
  starter_code = st.text_input(
116
- "Started code: ",
117
- value="def greet(name):",
118
- help="Optional starter code"
119
  )
120
  submit_button = st.button("Solve")
121
 
122
  if submit_button:
123
  st.text("Solution:")
124
- output = generate_solution(model, tokenizer, question, starter_code, temperature, num_beams)
 
 
125
  st.code(output, language="python")
126
-
127
 
128
- if __name__=="__main__":
 
129
  run()
 
4
  # model_name = "flax-community/gpt-neo-1.3B-apps-all"
5
  model_name = "flax-community/gpt-neo-125M-apps-all"
6
 
7
+
8
  @st.cache(allow_output_mutation=True, max_entries=1)
9
  def get_model():
10
  model = AutoModelForCausalLM.from_pretrained(model_name)
 
14
 
15
 
16
  def format_input(question, starter_code=""):
17
+ answer_type = (
18
+ "\nUse Call-Based format\n" if starter_code else "\nUse Standard Input format\n"
19
+ )
20
  return f"\nQUESTION:\n{question}\n{starter_code}\n{answer_type}\nANSWER:\n"
21
 
22
 
23
+ def clean_text(generation):
24
+ # clean up text has discussed in OpenAI's paper "Evaluating Large Language Models Trained on Code"
25
+ generation = generation.split("\ndef")[0]
26
+ generation = generation.split("\nclass")[0]
27
+ generation = generation.split("\n#")[0]
28
+ generation = generation.split("\nif")[0]
29
+
30
+ return generation
31
+
32
+
33
+ def generate_solution(
34
+ model, tokenizer, question, starter_code="", temperature=1.0, num_beams=1
35
+ ):
36
  prompt = format_input(question, starter_code)
37
  input_ids = tokenizer(prompt, return_tensors="pt").input_ids
38
  start = len(input_ids[0])
39
+
40
  output = model.generate(
41
  input_ids,
42
  max_length=start + 150,
 
51
  repetition_penalty=None,
52
  num_return_sequences=None,
53
  )
54
+ output_str = tokenizer.decode(output[0][start:], skip_special_tokens=True).strip()
55
+ output_str = clean_text(output_str)
56
 
57
+ return output_str
58
 
59
 
60
  _EXAMPLES = [
 
92
  0.8,
93
  ],
94
  ]
95
+
96
+
97
  def run():
98
+ st.set_page_config(page_title="Code Clippy Problem Solver")
 
 
99
  # sidebar
100
  st.sidebar.title("Code Clippy")
101
  st.sidebar.image(
 
103
  caption="(c) awesome Aimee Trevett",
104
  )
105
  st.sidebar.markdown("[Github](https://github.com/ncoop57/gpt-code-clippy)")
106
+ st.sidebar.markdown("[Report](https://github.com/ncoop57/gpt-code-clippy/wiki)")
107
+
108
  st.sidebar.markdown("### Controls:")
109
+
110
  temperature = st.sidebar.slider(
111
  "Temperature",
112
  min_value=0.5,
 
130
  help="Text description of the coding problem to be solved",
131
  )
132
  starter_code = st.text_input(
133
+ "Started code: ", value="def greet(name):", help="Optional starter code"
 
 
134
  )
135
  submit_button = st.button("Solve")
136
 
137
  if submit_button:
138
  st.text("Solution:")
139
+ output = generate_solution(
140
+ model, tokenizer, question, starter_code, temperature, num_beams
141
+ )
142
  st.code(output, language="python")
 
143
 
144
+
145
+ if __name__ == "__main__":
146
  run()