Terry Zhuo commited on
Commit
6e12956
1 Parent(s): 2d714f8
Files changed (1) hide show
  1. app.py +114 -58
app.py CHANGED
@@ -5,15 +5,82 @@ import requests
5
 
6
  import gradio as gr
7
  from huggingface_hub import Repository
8
- from text_generation import Client
 
9
 
10
  from share_btn import community_icon_html, loading_icon_html, share_js, share_btn_css
11
 
12
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
- API_URL = "https://api-inference.huggingface.co/models/bigcode/starcoder"
15
- API_URL_BASE ="https://api-inference.huggingface.co/models/bigcode/starcoderbase"
16
- API_URL_PLUS = "https://api-inference.huggingface.co/models/bigcode/starcoderplus"
17
 
18
  FIM_PREFIX = "<fim_prefix>"
19
  FIM_MIDDLE = "<fim_middle>"
@@ -75,26 +142,19 @@ theme = gr.themes.Monochrome(
75
  ],
76
  )
77
 
78
- client = Client(
79
- API_URL,
80
- headers={"Authorization": f"Bearer {HF_TOKEN}"},
81
- )
82
- client_base = Client(
83
- API_URL_BASE, headers={"Authorization": f"Bearer {HF_TOKEN}"},
84
- )
85
- client_plus = Client(
86
- API_URL_PLUS, headers={"Authorization": f"Bearer {HF_TOKEN}"},
87
- )
88
 
89
  def generate(
90
- prompt, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0, version="StarCoder",
91
  ):
92
 
93
  temperature = float(temperature)
94
  if temperature < 1e-2:
95
  temperature = 1e-2
96
  top_p = float(top_p)
97
- fim_mode = False
98
 
99
  generate_kwargs = dict(
100
  temperature=temperature,
@@ -105,37 +165,21 @@ def generate(
105
  seed=42,
106
  )
107
 
108
- if FIM_INDICATOR in prompt:
109
- fim_mode = True
110
- try:
111
- prefix, suffix = prompt.split(FIM_INDICATOR)
112
- except:
113
- raise ValueError(f"Only one {FIM_INDICATOR} allowed in prompt!")
114
- prompt = f"{FIM_PREFIX}{prefix}{FIM_SUFFIX}{suffix}{FIM_MIDDLE}"
115
-
116
- if version == "StarCoder":
117
- stream = client.generate_stream(prompt, **generate_kwargs)
118
- elif version == "StarCoderPlus":
119
- stream = client_plus.generate_stream(prompt, **generate_kwargs)
120
  else:
121
- stream = client_base.generate_stream(prompt, **generate_kwargs)
122
 
123
- if fim_mode:
124
- output = prefix
125
- else:
126
- output = prompt
127
-
128
- previous_token = ""
129
- for response in stream:
130
- if response.token.text == "<|endoftext|>":
131
- if fim_mode:
132
- output += suffix
133
- else:
134
- return output
135
- else:
136
- output += response.token.text
137
- previous_token = response.token.text
138
- yield output
139
  return output
140
 
141
 
@@ -167,16 +211,20 @@ css += share_btn_css + monospace_css + ".gradio-container {color: black}"
167
 
168
  description = """
169
  <div style="text-align: center;">
170
- <h1> StarCoder <span style='color: #e6b800;'>Models</span> Playground</h1>
171
  </div>
172
  <div style="text-align: left;">
173
- <p>This is a demo to generate text and code with the following StarCoder models:</p>
 
174
  <ul>
175
- <li><a href="https://huggingface.co/bigcode/starcoderplus" style='color: #e6b800;'>StarCoderPlus</a>: A finetuned version of StarCoderBase on English web data, making it strong in both English text and code generation.</li>
176
- <li><a href="https://huggingface.co/bigcode/starcoderbase" style='color: #e6b800;'>StarCoderBase</a>: A code generation model trained on 80+ programming languages, providing broad language coverage for code generation tasks.</li>
177
- <li><a href="https://huggingface.co/bigcode/starcoder" style='color: #e6b800;'>StarCoder</a>: A finetuned version of StarCoderBase specifically focused on Python, while also maintaining strong performance on other programming languages.</li>
 
 
 
178
  </ul>
179
- <p><b>Please note:</b> These models are not designed for instruction purposes. If you're looking for instruction or want to chat with a fine-tuned model, you can visit the <a href="https://huggingface.co/spaces/HuggingFaceH4/starchat-playground">StarChat Playground</a>.</p>
180
  </div>
181
  """
182
  disclaimer = """⚠️<b>Any use or sharing of this demo constitues your acceptance of the BigCode [OpenRAIL-M](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) License Agreement and the use restrictions included within.</b>\
@@ -186,11 +234,18 @@ with gr.Blocks(theme=theme, analytics_enabled=False, css=css) as demo:
186
  with gr.Column():
187
  gr.Markdown(description)
188
  with gr.Row():
189
- version = gr.Dropdown(
190
- ["StarCoderPlus", "StarCoderBase", "StarCoder"],
191
- value="StarCoder",
 
 
 
 
 
 
 
192
  label="Model",
193
- info="Choose a model from the list",
194
  )
195
  with gr.Row():
196
  with gr.Column():
@@ -264,8 +319,9 @@ with gr.Blocks(theme=theme, analytics_enabled=False, css=css) as demo:
264
 
265
  submit.click(
266
  generate,
267
- inputs=[instruction, temperature, max_new_tokens, top_p, repetition_penalty, version],
268
  outputs=[output],
 
269
  )
270
- share_button.click(None, [], [], _js=share_js)
271
- demo.queue(concurrency_count=16).launch(share=True, debug=True)
 
5
 
6
  import gradio as gr
7
  from huggingface_hub import Repository
8
+ from transformers import AutoModelForCausalLM, AutoTokenizer
9
+ from peft import PeftModel
10
 
11
  from share_btn import community_icon_html, loading_icon_html, share_js, share_btn_css
12
 
13
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
14
+ CHECKPOINT_URL = "Salesforce/codegen-350M-mono"
15
+
16
+ SQLMODEL_PREFIX_URL = "https://huggingface.co/luna-code/sqlmodel-codegen-350M-mono-prefix"
17
+ SFEPY_PREFIX_URL = "https://huggingface.co/luna-code/sfepy-codegen-350M-mono-prefix"
18
+ MEGENGINE_PREFIX_URL = "https://huggingface.co/luna-code/megengine-codegen-350M-mono-prefix"
19
+ MAIN_EVO_PREFIX_URL = "https://huggingface.co/luna-code/codegen-350M-mono-evo-prefix"
20
+
21
+ SQLMODEL_FFT_URL = "https://huggingface.co/luna-code/sqlmodel-codegen-350M-mono-fft"
22
+ SFEPY_FFT_URL = "https://huggingface.co/luna-code/sfepy-codegen-350M-mono-fft"
23
+ MEGENGINE_FFT_URL = "https://huggingface.co/luna-code/megengine-codegen-350M-mono-fft"
24
+ MAIN_EVO_FFT_URL = "https://huggingface.co/luna-code/codegen-350M-mono-evo-fft"
25
+ MAIN_FD_FFT_URL = "https://huggingface.co/luna-code/codegen-350M-mono-fd-fft"
26
+
27
+ LANGCHAIN_PREFIX_URL = "https://huggingface.co/luna-code/langchain-codegen-350M-mono-prefix"
28
+ LLAMAINDEX_PREFIX_URL = "https://huggingface.co/luna-code/llamaindex-codegen-350M-mono-prefix"
29
+ DSPY_PREFIX_URL = "https://huggingface.co/luna-code/dspy-codegen-350M-mono-prefix"
30
+ CS_EVO_PREFIX_URL = "https://huggingface.co/luna-code/cs-codegen-350M-mono-evo-prefix"
31
+
32
+ tokenizer = AutoTokenizer.from_pretrained(CHECKPOINT_URL)
33
+ basemodel = AutoModelForCausalLM.from_pretrained(CHECKPOINT_URL, device_map="auto")
34
+
35
+ sql_prefix = PeftModel.from_pretrained(basemodel, SQLMODEL_PREFIX_URL, device_map="auto")
36
+ sfepy_prefix = PeftModel.from_pretrained(basemodel, SFEPY_PREFIX_URL, device_map="auto")
37
+ megengine_prefix = PeftModel.from_pretrained(basemodel, MEGENGINE_PREFIX_URL, device_map="auto")
38
+ main_evo_prefix = PeftModel.from_pretrained(basemodel, MAIN_EVO_PREFIX_URL, device_map="auto")
39
+
40
+ sqlmodel_fft = AutoModelForCausalLM.from_pretrained(SQLMODEL_FFT_URL, device_map="auto")
41
+ sfepy_fft = AutoModelForCausalLM.from_pretrained(SFEPY_FFT_URL, device_map="auto")
42
+ megengine_fft = AutoModelForCausalLM.from_pretrained(MEGENGINE_FFT_URL, device_map="auto")
43
+ main_evo_fft = AutoModelForCausalLM.from_pretrained(MAIN_EVO_FFT_URL, device_map="auto")
44
+ main_fd_fft = AutoModelForCausalLM.from_pretrained(MAIN_FD_FFT_URL, device_map="auto")
45
+
46
+ langchain_prefix = PeftModel.from_pretrained(basemodel, LANGCHAIN_PREFIX_URL, device_map="auto")
47
+ llamaindex_prefix = PeftModel.from_pretrained(basemodel, LLAMAINDEX_PREFIX_URL, device_map="auto")
48
+ dspy_prefix = PeftModel.from_pretrained(basemodel, DSPY_PREFIX_URL, device_map="auto")
49
+ cs_evo_prefix = PeftModel.from_pretrained(basemodel, CS_EVO_PREFIX_URL, device_map="auto")
50
+
51
+ # basemodel = ""
52
+ # sql_prefix = ""
53
+ # sfepy_prefix = ""
54
+ # megengine_prefix = ""
55
+ # main_evo_prefix = ""
56
+ # sqlmodel_fft = ""
57
+ # sfepy_fft = ""
58
+ # megengine_fft = ""
59
+ # main_evo_fft = ""
60
+ # main_fd_fft = ""
61
+ # langchain_prefix = ""
62
+ # llamaindex_prefix = ""
63
+ # dspy_prefix = ""
64
+ # cs_evo_prefix = ""
65
+
66
+
67
+ model_map = {
68
+ "Base": basemodel,
69
+ "SQLModel Prefix": sql_prefix,
70
+ "SfePy Prefix": sfepy_prefix,
71
+ "MegEngine Prefix": megengine_prefix,
72
+ "Main Evo Prefix": main_evo_prefix,
73
+ "SQLModel FFT": sqlmodel_fft,
74
+ "SfePy FFT": sfepy_fft,
75
+ "MegEngine FFT": megengine_fft,
76
+ "Main Evo FFT": main_evo_fft,
77
+ "Main FD FFT": main_fd_fft,
78
+ "LangChain Prefix": langchain_prefix,
79
+ "LlamaIndex Prefix": llamaindex_prefix,
80
+ "DSpy Prefix": dspy_prefix,
81
+ "CS Evo Prefix": cs_evo_prefix,
82
+ }
83
 
 
 
 
84
 
85
  FIM_PREFIX = "<fim_prefix>"
86
  FIM_MIDDLE = "<fim_middle>"
 
142
  ],
143
  )
144
 
145
+ def stream(model, code, generate_kwargs):
146
+ input_ids = tokenizer(code, return_tensors="pt").to("cuda")
147
+ generated_ids = model.generate(**input_ids, **generate_kwargs)
148
+ return tokenizer.decode(generated_ids[0][input_ids["input_ids"].shape[1]:], skip_special_tokens=True).strip()
 
 
 
 
 
 
149
 
150
  def generate(
151
+ prompt, temperature=0.6, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0, library="LangChain", method="Prefix"
152
  ):
153
 
154
  temperature = float(temperature)
155
  if temperature < 1e-2:
156
  temperature = 1e-2
157
  top_p = float(top_p)
 
158
 
159
  generate_kwargs = dict(
160
  temperature=temperature,
 
165
  seed=42,
166
  )
167
 
168
+ if method == "Base":
169
+ output = stream(basemodel, prompt, generate_kwargs)
170
+ elif method == "Prefix":
171
+ output = stream(model_map[library + " Prefix"], prompt, generate_kwargs)
172
+ elif method == "Evo Prefix" and library in ["SQLModel", "SfePy", "MegEngine"]:
173
+ output = stream(model_map["Main Evo Prefix"], prompt, generate_kwargs)
174
+ elif method == "FFT" and library in ["SQLModel", "SfePy", "MegEngine"]:
175
+ output = stream(model_map[library + " FFT"], prompt, generate_kwargs)
176
+ elif method == "Evo FFT" and library in ["SQLModel", "SfePy", "MegEngine"]:
177
+ output = stream(model_map["Main Evo FFT"], prompt, generate_kwargs)
178
+ elif method == "Full Data FFT" and library in ["SQLModel", "SfePy", "MegEngine"]:
179
+ output = stream(model_map["Main FD FFT"], prompt, generate_kwargs)
180
  else:
181
+ output = ""
182
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
  return output
184
 
185
 
 
211
 
212
  description = """
213
  <div style="text-align: center;">
214
+ <h1> 🌙 LUNA Models Playground</h1>
215
  </div>
216
  <div style="text-align: left;">
217
+ <p>This is a demo to generate text and code with unknown libraries. The supported based model is <a href="https://huggingface.co/Salesforce/codegen-350M-mono" style='color: #e6b800;'>CodeGen-350M-mono</a></p>
218
+ <p>The supported libraries are:</p>
219
  <ul>
220
+ <li><a href="https://sqlmodel.tiangolo.com" style='color: #e6b800;'>SQLModel</a></li>
221
+ <li><a href="https://sfepy.org" style='color: #e6b800;'>SfePy</a></li>
222
+ <li><a href="https://megengine.org" style='color: #e6b800;'>MegEngine</a></li>
223
+ <li><a href="https://www.langchain.com/" style='color: #e6b800;'>LangChain</a></li>
224
+ <li><a href="https://www.llamaindex.ai/" style='color: #e6b800;'>LlamaIndex</a></li>
225
+ <li><a href="https://dspy-docs.vercel.app/" style='color: #e6b800;'>DSpy</a></li>
226
  </ul>
227
+ <p><b>Please note:</b> These models are not designed for instruction purposes.</p>
228
  </div>
229
  """
230
  disclaimer = """⚠️<b>Any use or sharing of this demo constitues your acceptance of the BigCode [OpenRAIL-M](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) License Agreement and the use restrictions included within.</b>\
 
234
  with gr.Column():
235
  gr.Markdown(description)
236
  with gr.Row():
237
+ library = gr.Dropdown(
238
+ ["SQLModel", "SfePy", "MegEngine", "LangChain", "LlamaIndex", "DSpy"],
239
+ value="LangChain",
240
+ label="Library",
241
+ info="Choose a library from the list",
242
+ )
243
+ with gr.Row():
244
+ method = gr.Dropdown(
245
+ ["Base", "Prefix", "Evo Prefix", "FFT", "Evo FFT", "Full Data FFT"],
246
+ value="Prefix",
247
  label="Model",
248
+ info="Choose an expert from the list",
249
  )
250
  with gr.Row():
251
  with gr.Column():
 
319
 
320
  submit.click(
321
  generate,
322
+ inputs=[instruction, temperature, max_new_tokens, top_p, repetition_penalty, library, method],
323
  outputs=[output],
324
+ concurrency_limit=16
325
  )
326
+ share_button.click(None, [], [])
327
+ demo.queue().launch(debug=True)