lifeofcoding commited on
Commit
955b037
1 Parent(s): 3ecbde7

trying new demo

Browse files
Files changed (2) hide show
  1. requirements.txt +8 -2
  2. app.py +113 -23
requirements.txt CHANGED
@@ -1,2 +1,8 @@
1
- huggingface-hub
2
- llama-cpp-python
 
 
 
 
 
 
 
1
+ datasets
2
+ loralib
3
+ sentencepiece
4
+ git+https://github.com/huggingface/transformers.git
5
+ accelerate
6
+ bitsandbytes
7
+ git+https://github.com/huggingface/peft.git
8
+ gradio
app.py CHANGED
@@ -4,11 +4,116 @@ import gradio as gr
4
  from gradio.themes.base import Base
5
  from gradio.themes.utils import colors, fonts, sizes
6
 
7
- from llama_cpp import Llama
8
- from huggingface_hub import hf_hub_download
9
-
10
- hf_hub_download(repo_id="lifeofcoding/alpaca-lora-movie-review-sentiment", filename="adapter_model.bin", local_dir=".")
11
- llm = Llama(model_path="./adapter_model.bin")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
 
14
  ins = '''Below is an instruction that describes a task. Write a response that appropriately completes the request.
@@ -28,21 +133,6 @@ theme = gr.themes.Monochrome(
28
 
29
 
30
 
31
- # def generate(instruction):
32
- # response = llm(ins.format(instruction))
33
- # response = response['choices'][0]['text']
34
- # result = ""
35
- # for word in response.split(" "):
36
- # result += word + " "
37
- # yield result
38
-
39
- def generate(instruction):
40
- result = ""
41
- for x in llm(ins.format(instruction), stop=['### Instruction:', '### End'], stream=True):
42
- result += x['choices'][0]['text']
43
- yield result
44
-
45
-
46
  examples = [
47
  "Instead of making a peanut butter and jelly sandwich, what else could I combine peanut butter with in a sandwich? Give five ideas",
48
  "How do I make a campfire?",
@@ -51,7 +141,7 @@ examples = [
51
  ]
52
 
53
  def process_example(args):
54
- for x in generate(args):
55
  pass
56
  return x
57
 
@@ -137,7 +227,7 @@ with gr.Blocks(theme=seafoam, analytics_enabled=False, css=css) as demo:
137
 
138
 
139
 
140
- submit.click(generate, inputs=[instruction], outputs=[output])
141
- instruction.submit(generate, inputs=[instruction], outputs=[output])
142
 
143
  demo.queue(concurrency_count=1).launch(debug=True)
 
4
  from gradio.themes.base import Base
5
  from gradio.themes.utils import colors, fonts, sizes
6
 
7
+ import torch
8
+ from peft import PeftModel
9
+ import transformers
10
+
11
+ assert (
12
+ "LlamaTokenizer" in transformers._import_structure["models.llama"]
13
+ ), "LLaMA is now in HuggingFace's main branch.\nPlease reinstall it: pip uninstall transformers && pip install git+https://github.com/huggingface/transformers.git"
14
+ from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig
15
+
16
+ tokenizer = LlamaTokenizer.from_pretrained("decapoda-research/llama-7b-hf")
17
+
18
+ BASE_MODEL = "decapoda-research/llama-7b-hf"
19
+ LORA_WEIGHTS = "lifeofcoding/alpaca-lora-movie-review-sentiment"
20
+
21
+ if torch.cuda.is_available():
22
+ device = "cuda"
23
+ else:
24
+ device = "cpu"
25
+
26
+ try:
27
+ if torch.backends.mps.is_available():
28
+ device = "mps"
29
+ except:
30
+ pass
31
+
32
+ if device == "cuda":
33
+ model = LlamaForCausalLM.from_pretrained(
34
+ BASE_MODEL,
35
+ load_in_8bit=False,
36
+ torch_dtype=torch.float16,
37
+ device_map="auto",
38
+ )
39
+ model = PeftModel.from_pretrained(
40
+ model, LORA_WEIGHTS, torch_dtype=torch.float16, force_download=True
41
+ )
42
+ elif device == "mps":
43
+ model = LlamaForCausalLM.from_pretrained(
44
+ BASE_MODEL,
45
+ device_map={"": device},
46
+ torch_dtype=torch.float16,
47
+ )
48
+ model = PeftModel.from_pretrained(
49
+ model,
50
+ LORA_WEIGHTS,
51
+ device_map={"": device},
52
+ torch_dtype=torch.float16,
53
+ )
54
+ else:
55
+ model = LlamaForCausalLM.from_pretrained(
56
+ BASE_MODEL, device_map={"": device}, low_cpu_mem_usage=True
57
+ )
58
+ model = PeftModel.from_pretrained(
59
+ model,
60
+ LORA_WEIGHTS,
61
+ device_map={"": device},
62
+ )
63
+
64
+
65
+ def generate_prompt(instruction, input=None):
66
+ if input:
67
+ return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
68
+ ### Instruction:
69
+ {instruction}
70
+ ### Input:
71
+ {input}
72
+ ### Response:"""
73
+ else:
74
+ return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
75
+ ### Instruction:
76
+ {instruction}
77
+ ### Response:"""
78
+
79
+ if device != "cpu":
80
+ model.half()
81
+ model.eval()
82
+ if torch.__version__ >= "2":
83
+ model = torch.compile(model)
84
+
85
+
86
+ def evaluate(
87
+ instruction,
88
+ input=None,
89
+ temperature=0.1,
90
+ top_p=0.75,
91
+ top_k=40,
92
+ num_beams=4,
93
+ max_new_tokens=128,
94
+ **kwargs,
95
+ ):
96
+ prompt = generate_prompt(instruction, input)
97
+ inputs = tokenizer(prompt, return_tensors="pt")
98
+ input_ids = inputs["input_ids"].to(device)
99
+ generation_config = GenerationConfig(
100
+ temperature=temperature,
101
+ top_p=top_p,
102
+ top_k=top_k,
103
+ num_beams=num_beams,
104
+ **kwargs,
105
+ )
106
+ with torch.no_grad():
107
+ generation_output = model.generate(
108
+ input_ids=input_ids,
109
+ generation_config=generation_config,
110
+ return_dict_in_generate=True,
111
+ output_scores=True,
112
+ max_new_tokens=max_new_tokens,
113
+ )
114
+ s = generation_output.sequences[0]
115
+ output = tokenizer.decode(s)
116
+ return output.split("### Response:")[1].strip()
117
 
118
 
119
  ins = '''Below is an instruction that describes a task. Write a response that appropriately completes the request.
 
133
 
134
 
135
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  examples = [
137
  "Instead of making a peanut butter and jelly sandwich, what else could I combine peanut butter with in a sandwich? Give five ideas",
138
  "How do I make a campfire?",
 
141
  ]
142
 
143
  def process_example(args):
144
+ for x in evaluate(args):
145
  pass
146
  return x
147
 
 
227
 
228
 
229
 
230
+ submit.click(evaluate, inputs=[instruction], outputs=[output])
231
+ instruction.submit(evaluate, inputs=[instruction], outputs=[output])
232
 
233
  demo.queue(concurrency_count=1).launch(debug=True)