sifujohn commited on
Commit
fbbfe38
1 Parent(s): 18afbd8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -37
app.py CHANGED
@@ -1,42 +1,8 @@
1
- from huggingface_hub import InferenceClient
2
- import gradio as gr
3
-
4
- client = InferenceClient("models/google/gemma-7b-it")
5
-
6
- def format_prompt(message, history):
7
- prompt = "<s>"
8
- for user_prompt, bot_response in history:
9
- prompt += f"[INST] {user_prompt} [/INST]"
10
- prompt += f" {bot_response}</s> "
11
- prompt += f"[INST] {message} [/INST]"
12
- return prompt
13
-
14
- def generate(
15
- prompt, history, system_prompt, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0,
16
- ):
17
- temperature = float(temperature)
18
- if temperature < 1e-2:
19
- temperature = 1e-2
20
- top_p = float(top_p)
21
 
22
- generate_kwargs = dict(
23
- temperature=temperature,
24
- max_new_tokens=max_new_tokens,
25
- top_p=top_p,
26
- repetition_penalty=repetition_penalty,
27
- do_sample=True,
28
- seed=42,
29
- )
30
-
31
- formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
32
- stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
33
- output = ""
34
-
35
- for response in stream:
36
- output += response.token.text
37
- yield output
38
- return output
39
 
 
40
 
41
  additional_inputs=[
42
  gr.Textbox(
 
1
+ #from huggingface_hub import InferenceClient
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
+ import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
+ gr.load("models/google/gemma-7b-it")
6
 
7
  additional_inputs=[
8
  gr.Textbox(