Tonic commited on
Commit
927b5de
1 Parent(s): 436ea43

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +134 -0
app.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoConfig, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, MistralForCausalLM
2
+ from peft import PeftModel, PeftConfig
3
+ import torch
4
+ import gradio as gr
5
+ import random
6
+ from textwrap import wrap
7
+
8
+ EXAMPLES = [
9
+ ["Hey Falcon! Any recommendations for my holidays in Abu Dhabi?"],
10
+ ["What's the Everett interpretation of quantum mechanics?"],
11
+ ["Give me a list of the top 10 dive sites you would recommend around the world."],
12
+ ["Can you tell me more about deep-water soloing?"],
13
+ ["Can you write a short tweet about the release of our latest AI model, Falcon LLM?"]
14
+ ]
15
+
16
+
17
+ device = "cuda" if torch.cuda.is_available() else "cpu"
18
+ base_model_id = "tiiuae/falcon-7b-instruct"
19
+ model_directory = "Tonic/GaiaMiniMed"
20
+
21
+ tokenizer = AutoTokenizer.from_pretrained(base_model_id, trust_remote_code=True, padding_side="left")
22
+ model_config = AutoConfig.from_pretrained(base_model_id)
23
+ peft_model = AutoModelForCausalLM.from_pretrained(model_directory, config=model_config)
24
+ peft_model = PeftModel.from_pretrained(peft_model, model_directory)
25
+
26
+ def format_prompt(message, history, system_prompt):
27
+ prompt = ""
28
+ if system_prompt:
29
+ prompt += f"System: {system_prompt}\n"
30
+ for user_prompt, bot_response in history:
31
+ prompt += f"User: {user_prompt}\n"
32
+ prompt += f"Falcon: {bot_response}\n" # Response already contains "Falcon: "
33
+ prompt += f"""User: {message}
34
+ Falcon:"""
35
+ return prompt
36
+
37
+ seed = 42
38
+
39
+ def peft_model.generate(
40
+ prompt, history, system_prompt="", temperature=0.9, max_new_tokens=500, top_p=0.95, repetition_penalty=1.0,
41
+ ):
42
+ temperature = float(temperature)
43
+ if temperature < 1e-2:
44
+ temperature = 1e-2
45
+ top_p = float(top_p)
46
+ global seed
47
+ generate_kwargs = dict(
48
+ temperature=temperature,
49
+ max_new_tokens=max_new_tokens,
50
+ top_p=top_p,
51
+ repetition_penalty=1.0,
52
+ stop_sequences="[END]",
53
+ do_sample=True,
54
+ seed=seed,
55
+ )
56
+ seed = seed + 1
57
+ formatted_prompt = format_prompt(prompt, history, system_prompt)
58
+
59
+ try:
60
+ stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
61
+ output = ""
62
+
63
+ for response in stream:
64
+ output += response.token.text
65
+
66
+ for stop_str in STOP_SEQUENCES:
67
+ if output.endswith(stop_str):
68
+ output = output[:-len(stop_str)]
69
+ output = output.rstrip()
70
+ yield output
71
+ yield output
72
+ except Exception as e:
73
+ raise gr.Error(f"Error while generating: {e}")
74
+ return output
75
+
76
+
77
+ additional_inputs=[
78
+ gr.Textbox("", label="Optional system prompt"),
79
+ gr.Slider(
80
+ label="Temperature",
81
+ value=0.9,
82
+ minimum=0.0,
83
+ maximum=1.0,
84
+ step=0.05,
85
+ interactive=True,
86
+ info="Higher values produce more diverse outputs",
87
+ ),
88
+ gr.Slider(
89
+ label="Max new tokens",
90
+ value=256,
91
+ minimum=0,
92
+ maximum=3000,
93
+ step=64,
94
+ interactive=True,
95
+ info="The maximum numbers of new tokens",
96
+ ),
97
+ gr.Slider(
98
+ label="Top-p (nucleus sampling)",
99
+ value=0.90,
100
+ minimum=0.01,
101
+ maximum=0.99,
102
+ step=0.05,
103
+ interactive=True,
104
+ info="Higher values sample more low-probability tokens",
105
+ ),
106
+ gr.Slider(
107
+ label="Repetition penalty",
108
+ value=1.2,
109
+ minimum=1.0,
110
+ maximum=2.0,
111
+ step=0.05,
112
+ interactive=True,
113
+ info="Penalize repeated tokens",
114
+ )
115
+ ]
116
+
117
+
118
+ with gr.Blocks() as demo:
119
+ with gr.Row():
120
+ with gr.Column(scale=0.4):
121
+ gr.Image("better_banner.jpeg", elem_id="banner-image", show_label=False)
122
+ with gr.Column():
123
+ gr.Markdown(
124
+ # 👋🏻Welcome to Tonic's GaiaMiniMed Chat🚀"
125
+ "You can use this Space to test out the current model [(Tonic/GaiaMiniMed)](https://huggingface.co/Tonic/GaiaMiniMed) or duplicate this Space and use it locally or on 🤗HuggingFace. [Join me on Discord to build together](https://discord.gg/VqTxc76K3u)."
126
+ )
127
+
128
+ gr.ChatInterface(
129
+ generate,
130
+ examples=EXAMPLES,
131
+ additional_inputs=additional_inputs,
132
+ )
133
+
134
+ demo.queue(concurrency_count=100, api_open=False).launch(show_api=False)