Spaces:
Sleeping
Sleeping
parsanna17
commited on
Commit
•
af0d1cc
1
Parent(s):
8576cb2
initial commit
Browse files
app.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio
|
2 |
+
import torch
|
3 |
+
from peft import PeftModel, PeftConfig
|
4 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
5 |
+
|
6 |
+
|
7 |
+
#Loading model
|
8 |
+
model_path = "parsanna17/finetune_starcoder2_with_R_data"
|
9 |
+
checkpoint = "bigcode/starcoder2-3b"
|
10 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
11 |
+
config = PeftConfig.from_pretrained(model_path)
|
12 |
+
model = AutoModelForCausalLM.from_pretrained(checkpoint , device_map=device, torch_dtype=torch.bfloat16)
|
13 |
+
model = PeftModel.from_pretrained(model, model_path).to(device)
|
14 |
+
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
|
15 |
+
|
16 |
+
if tokenizer.pad_token is None:
|
17 |
+
tokenizer.pad_token = tokenizer.eos_token
|
18 |
+
|
19 |
+
def remove_header_trailer(input):
|
20 |
+
text = input.split()
|
21 |
+
start=0
|
22 |
+
end=0
|
23 |
+
i=0
|
24 |
+
while i<len(text)-1 and text[i]!="#Solution:" :
|
25 |
+
i+=1
|
26 |
+
start =i+1
|
27 |
+
i+=1
|
28 |
+
while i<len(text)-1 and text[i]!="Solution:" and text[i]!="#Question:" and text[i]!=text[i+1] :
|
29 |
+
i+=1
|
30 |
+
end = i+1 if len(text)==i else i
|
31 |
+
text= text[start:end]
|
32 |
+
return " ".join(text)
|
33 |
+
|
34 |
+
def generate(inputs):
|
35 |
+
prompt = f"""Write a code as R programmer.
|
36 |
+
#Context: You are a R Programmer going for an interview you need to provide code snippet for the given question in R programming Language.
|
37 |
+
#Question: create a function to {inputs} in R language
|
38 |
+
#Solution: """
|
39 |
+
inputs = tokenizer(prompt, return_tensors="pt").to(device)
|
40 |
+
with torch.no_grad():
|
41 |
+
outputs = model.generate(**inputs, pad_token_id=tokenizer.pad_token_id,max_new_tokens=100)
|
42 |
+
return remove_header_trailer(tokenizer.decode(outputs[0]))
|
43 |
+
|
44 |
+
demo = gr.Interface(fn = generate,
|
45 |
+
inputs = gr.Textbox(lines=5, placeholder = "write you program details to generate code in R", label="Code Prompt"),
|
46 |
+
outputs=gr.Textbox(lines=5,placeholder = "Code will be generated here", label="R Code"),
|
47 |
+
title="R Programming Language Code Generator",
|
48 |
+
description="Code is being generated using Starcoder2-3b llm fine tuned on Kaggle using R dataset",
|
49 |
+
article = "Created and Maintained By Prasanna Dhungana")
|