typesdigital commited on
Commit
cba212b
1 Parent(s): 3d4072e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -0
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from peft import PeftModel, PeftConfig
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer, GenerationConfig
4
+
5
+ peft_model_id = "mrm8488/falcon-7b-ft-codeAlpaca_20k-v2" # adapter
6
+ config = PeftConfig.from_pretrained(peft_model_id)
7
+ model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=True, device_map={"":0}, trust_remote_code=True)
8
+ tokenizer = AutoTokenizer.from_pretrained(peft_model_id)
9
+
10
+ model = PeftModel.from_pretrained(model, peft_model_id)
11
+ model.eval()
12
+
13
+ def generate(
14
+ instruction,
15
+ max_new_tokens=128,
16
+ temperature=0.1,
17
+ top_p=0.75,
18
+ top_k=40,
19
+ num_beams=4,
20
+ **kwargs
21
+ ):
22
+ prompt = instruction + "\n### Solution:\n"
23
+ print(prompt)
24
+ inputs = tokenizer(prompt, return_tensors="pt")
25
+ input_ids = inputs["input_ids"].to("cuda")
26
+ attention_mask = inputs["attention_mask"].to("cuda")
27
+ generation_config = GenerationConfig(
28
+ temperature=temperature,
29
+ top_p=top_p,
30
+ top_k=top_k,
31
+ num_beams=num_beams,
32
+ **kwargs,
33
+ )
34
+ with torch.no_grad():
35
+ generation_output = model.generate(
36
+ input_ids=input_ids,
37
+ attention_mask=attention_mask,
38
+ generation_config=generation_config,
39
+ return_dict_in_generate=True,
40
+ output_scores=True,
41
+ max_new_tokens=max_new_tokens,
42
+ early_stopping=True
43
+ )
44
+ s = generation_output.sequences[0]
45
+ output = tokenizer.decode(s)
46
+ return output.split("### Solution:")[1].lstrip("\n")
47
+
48
+ import gradio as gr
49
+ def my_function(input):
50
+ # Perform your task or computation using the input
51
+ # Return the output/result
52
+ return output
53
+
54
+ iface = gr.Interface(fn=my_function, inputs="text", outputs="text")
55
+ iface.launch()
56
+