Spaces:
Runtime error
Runtime error
import os | |
import gradio as gr | |
import transformers | |
from torch import bfloat16 | |
from threading import Thread | |
from gradio.themes.utils.colors import Color | |
# Download model and tokenizer files | |
os.system('bash download_model.sh') | |
model_id = "/app/medllama2_7b" | |
tokenizer = transformers.AutoTokenizer.from_pretrained(model_id) | |
model_config = transformers.AutoConfig.from_pretrained(model_id) | |
bnb_config = transformers.BitsAndBytesConfig( | |
load_in_4bit=True, | |
bnb_4bit_quant_type='nf4', | |
bnb_4bit_use_double_quant=True, | |
bnb_4bit_compute_dtype=bfloat16 | |
) | |
model = transformers.AutoModelForCausalLM.from_pretrained( | |
model_id, | |
trust_remote_code=True, | |
config=model_config, | |
quantization_config=bnb_config, | |
device_map='auto' | |
) | |
prompts = ["You are a helpful AI Doctor."] | |
def prompt_build(system_prompt, user_inp, hist): | |
prompt = f"""### System:\n{system_prompt}\n\n""" | |
for pair in hist: | |
prompt += f"""### User:\n{pair[0]}\n\n### Assistant:\n{pair[1]}\n\n""" | |
prompt += f"""### User:\n{user_inp}\n\n### Assistant:""" | |
return prompt | |
def chat(user_input, history, system_prompt): | |
prompt = prompt_build(system_prompt, user_input, history) | |
model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda") | |
streamer = transformers.TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True) | |
generate_kwargs = dict( | |
model_inputs, | |
streamer=streamer, | |
max_length=2048, | |
do_sample=True, | |
top_p=0.95, | |
temperature=0.8, | |
top_k=50 | |
) | |
t = Thread(target=model.generate, kwargs=generate_kwargs) | |
t.start() | |
model_output = "" | |
for new_text in streamer: | |
model_output += new_text | |
yield model_output | |
return model_output | |
if __name__ == "__main__": | |
with gr.Blocks() as demo: | |
dropdown = gr.Dropdown(choices=prompts, label="Type your own or select a system prompt", value="You are a helpful AI Doctor.", allow_custom_value=True) | |
chatbot = gr.ChatInterface(fn=chat, additional_inputs=[dropdown]) | |
demo.queue(api_open=False).launch(show_api=False, share=True) | |