pcasale commited on
Commit
91e4c14
·
verified ·
1 Parent(s): df86c89

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -28
app.py CHANGED
@@ -1,36 +1,69 @@
1
  from transformers import AutoTokenizer, AutoModelForCausalLM
2
  import gradio as gr
 
3
 
4
- # Load the CodeGen model and tokenizer. This model has 2B parameters and is specialized for code generation:contentReference[oaicite:16]{index=16}.
5
- # Note: Downloading and loading this model may be slow, and it may require a GPU for reasonable performance.
6
  tokenizer = AutoTokenizer.from_pretrained("Salesforce/codegen-2B-mono")
7
  model = AutoModelForCausalLM.from_pretrained("Salesforce/codegen-2B-mono")
8
 
9
- # Define the code generation function.
10
- def generate_code(prompt):
11
- # Format the prompt as a comment, because CodeGen is trained to take a code-related prompt in comment form:contentReference[oaicite:17]{index=17}.
12
- formatted_prompt = f"# {prompt}\n"
13
- # Tokenize the prompt and get input IDs for the model
14
- input_ids = tokenizer.encode(formatted_prompt, return_tensors="pt")
15
- # Use the model to generate code. We set a limit on max_length for the output.
16
- # We also use a low temperature (0.2) to make the output more deterministic and focused.
17
- output_ids = model.generate(input_ids, max_length=256, num_beams=1, do_sample=True, temperature=0.2)
18
- # Decode the generated tokens back into a string of code.
19
- generated_code = tokenizer.decode(output_ids[0], skip_special_tokens=True)
20
- return generated_code
21
-
22
- # Set up Gradio interface with a textbox for the task description and a code output component.
23
- input_desc = gr.Textbox(lines=2, label="Task Description", placeholder="Describe the code you need...")
24
- output_code = gr.Code(language="python", label="Generated Code")
25
-
26
- demo = gr.Interface(
27
- fn=generate_code,
28
- inputs=input_desc,
29
- outputs=output_code,
30
- title="💻 Code Generation Assistant (CodeGen-2B)",
31
- description="**Description:** Provide a natural language description of a programming task, "
32
- "and the model will generate Python code to accomplish the task. "
33
- "Uses Salesforce's CodeGen-2B-mono model (2B parameters) for code generation."
34
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  demo.launch()
 
 
1
  from transformers import AutoTokenizer, AutoModelForCausalLM
2
  import gradio as gr
3
+ import re
4
 
5
+ # Load CodeGen (Python-specialised)
 
6
  tokenizer = AutoTokenizer.from_pretrained("Salesforce/codegen-2B-mono")
7
  model = AutoModelForCausalLM.from_pretrained("Salesforce/codegen-2B-mono")
8
 
9
+ def generate_code(user_request):
10
+ """
11
+ Produce clean Python code from a natural language instruction.
12
+ """
13
+
14
+ # A structured prompt works significantly better with CodeGen.
15
+ prompt = (
16
+ "# Task: Write Python code that accomplishes the following:\n"
17
+ f"# {user_request}\n"
18
+ "# Code:\n"
19
+ )
20
+
21
+ input_ids = tokenizer(prompt, return_tensors="pt").input_ids
22
+
23
+ # Deterministic decoding avoids messy repetition.
24
+ output_ids = model.generate(
25
+ input_ids,
26
+ max_length=256,
27
+ num_beams=4,
28
+ do_sample=False,
29
+ eos_token_id=tokenizer.eos_token_id
30
+ )
31
+
32
+ full_output = tokenizer.decode(output_ids[0])
33
+
34
+ # Remove the prompt section so that only the generated code remains.
35
+ code_only = full_output.split("# Code:\n", 1)[-1]
36
+
37
+ # Strip trailing text the model sometimes adds.
38
+ code_only = code_only.strip()
39
+
40
+ # Remove accidental markdown or stray tokens
41
+ code_only = re.sub(r"<\|.*?\|>", "", code_only)
42
+
43
+ return code_only
44
+
45
+
46
+ with gr.Blocks(title="Code Generation with CodeGen-2B") as demo:
47
+
48
+ gr.Markdown(
49
+ """### Code Generation Assistant
50
+ Provide a description of the code you need, and the model will return Python code only.
51
+ """
52
+ )
53
+
54
+ task = gr.Textbox(
55
+ lines=2,
56
+ label="Task Description",
57
+ placeholder="For example: create a function that prints the first n Fibonacci numbers."
58
+ )
59
+
60
+ output = gr.Code(
61
+ label="Generated Python Code",
62
+ language="python"
63
+ )
64
+
65
+ btn = gr.Button("Generate Code")
66
+ btn.click(generate_code, inputs=task, outputs=output)
67
 
68
  demo.launch()
69
+