omegaodin commited on
Commit
7a67559
1 Parent(s): 120ecb6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -1
app.py CHANGED
@@ -1,3 +1,76 @@
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- gr.Interface.load("models/teknium/Replit-v1-CodeInstruct-3B-fp16").launch()
 
1
+
2
+ import os
3
  import gradio as gr
4
+ import torch
5
+
6
+ from transformers import AutoTokenizer, AutoModelForCausalLM
7
+
8
+ REPO = "teknium/Replit-v1-CodeInstruct-3B"
9
+
10
+ description = """# <h1 style="text-align: center; color: white;"><span style='color: #F26207;'> Code Generation by Instruction with Replit-v1-CodeInstruct-3B </h1>
11
+ <span style="color: white; text-align: center;"> This model is trained on a large amount of code and fine tuned on code-instruct datasets. You can type an instruction in the ### Instruction: section and received code generation.</span>"""
12
+
13
+ device = "cuda" if torch.cuda.is_available() else "cpu"
14
+
15
+ tokenizer = AutoTokenizer.from_pretrained(REPO, trust_remote_code=True)
16
+ model = AutoModelForCausalLM.from_pretrained(REPO, torch_dtype=torch.bfloat16, trust_remote_code=True)
17
+ model.to(device)
18
+
19
+ model.eval()
20
+
21
+ custom_css = """
22
+ .gradio-container {
23
+ background-color: #0D1525;
24
+ color:white
25
+ }
26
+ #orange-button {
27
+ background: #F26207 !important;
28
+ color: white;
29
+ }
30
+ .cm-gutters{
31
+ border: none !important;
32
+ }
33
+ """
34
+
35
+ def post_processing(prompt, completion):
36
+ return prompt + completion
37
+
38
+ def code_generation(prompt, max_new_tokens=128, temperature=0.2, top_p=0.9, eos_token_id=tokenizer.eos_token_id):
39
+ input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
40
+ generated_ids = model.generate(input_ids, max_new_tokens=max_new_tokens, do_sample=True, use_cache=True, temperature=temperature, top_p=top_p, eos_token_id=eos_token_id)
41
+ completion = tokenizer.decode(generated_ids[0][input_ids.shape[-1]:], skip_special_tokens=True, clean_up_tokenization_spaces=False)
42
+ return post_processing(prompt, completion)
43
+
44
+ demo = gr.Blocks(
45
+ css=custom_css
46
+ )
47
+
48
+ with demo:
49
+ gr.Markdown(value=description)
50
+ with gr.Row():
51
+ input_col , settings_col = gr.Column(scale=6), gr.Column(scale=6),
52
+ with input_col:
53
+ code = gr.Code(lines=28,label='Input', value="### Instruction:\n\n### Response:")
54
+ with settings_col:
55
+ with gr.Accordion("Generation Settings", open=True):
56
+ max_new_tokens= gr.Slider(
57
+ minimum=8,
58
+ maximum=128,
59
+ step=1,
60
+ value=48,
61
+ label="Max Tokens",
62
+ )
63
+ temperature = gr.Slider(
64
+ minimum=0.1,
65
+ maximum=2.5,
66
+ step=0.1,
67
+ value=0.2,
68
+ label="Temperature",
69
+ )
70
+
71
+ with gr.Row():
72
+ run = gr.Button(elem_id="orange-button", value="Generate Response")
73
+
74
+ event = run.click(code_generation, [code, max_new_tokens, temperature], code, api_name="predict")
75
 
76
+ demo.queue(max_size=40).launch()