Spaces:
Sleeping
Sleeping
File size: 5,553 Bytes
ddfd0f5 c302e2b a6d9084 c302e2b a6d9084 ddfd0f5 c302e2b ddfd0f5 a6d9084 c302e2b ddfd0f5 c302e2b a6d9084 ddfd0f5 a6d9084 ddfd0f5 a6d9084 ddfd0f5 50b772d c5685d4 a6d9084 5451a71 ddfd0f5 a6d9084 ddfd0f5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
# import gradio as gr
# from huggingface_hub import InferenceClient
# """
# For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
# """
# client = InferenceClient("harsh4733/Llama-2-7b-chat-finetune-webglm")
# def respond(
# message,
# history: list[tuple[str, str]],
# system_message,
# max_tokens,
# temperature,
# top_p,
# ):
# messages = [{"role": "system", "content": system_message}]
# for val in history:
# if val[0]:
# messages.append({"role": "user", "content": val[0]})
# if val[1]:
# messages.append({"role": "assistant", "content": val[1]})
# messages.append({"role": "user", "content": message})
# response = ""
# for message in client.chat_completion(
# messages,
# max_tokens=max_tokens,
# stream=True,
# temperature=temperature,
# top_p=top_p,
# ):
# token = message.choices[0].delta.content
# response += token
# yield response
# """
# For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
# """
# demo = gr.ChatInterface(
# respond,
# additional_inputs=[
# gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
# gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
# gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
# gr.Slider(
# minimum=0.1,
# maximum=1.0,
# value=0.95,
# step=0.05,
# label="Top-p (nucleus sampling)",
# ),
# ],
# )
# import gradio as gr
# from transformers import pipeline
# def chat_with_model(question, prompt, system_message, max_tokens, temperature, top_p):
# prompt_template = f"<s>[INST] <<SYS>>\n{system_message} <</SYS>> {prompt} [/INST]"
# pipe = pipeline(
# task="text-generation",
# model="harsh4733/Llama-2-7b-chat-finetune-webglm",
# tokenizer="harsh4733/Llama-2-7b-chat-finetune-webglm",
# max_length=max_tokens,
# temperature=temperature,
# top_p=top_p,
# )
# result = pipe(prompt_template)
# return result[0]['generated_text']
# def respond(
# question,
# prompt,
# system_message,
# max_tokens,
# temperature,
# top_p,
# ):
# response = chat_with_model(question, prompt, system_message, max_tokens, temperature, top_p)
# return response
# # Define Gradio interface
# demo = gr.Interface(
# fn=respond,
# inputs=[
# gr.Textbox(value="What is a large language model?", label="Question"),
# gr.Textbox(value="You are a helpful assistant that provides answers to the questions given based on the references provided to you regarding the question.", label="System message"),
# gr.Textbox(value="You are a friendly Chatbot.", label="Prompt"),
# gr.Slider(minimum=1, maximum=2048, value=512, label="Max new tokens"),
# gr.Slider(minimum=0.1, maximum=4.0, value=0.7, label="Temperature"),
# gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
# ],
# outputs=gr.Textbox(label="Response"),
# title="Chat with Large Language Model",
# description="Interact with a large language model to generate responses based on your input.",
# )
# if __name__ == "__main__":
# demo.launch()
# if __name__ == "__main__":
# demo.launch()
import gradio as gr
from transformers import TFAutoModelForCausalLM, AutoTokenizer
import tensorflow as tf
def chat_with_model(question, prompt, system_message, max_tokens, temperature, top_p):
tokenizer = AutoTokenizer.from_pretrained("harsh4733/Llama-2-7b-chat-finetune-webglm")
model = TFAutoModelForCausalLM.from_pretrained("harsh4733/Llama-2-7b-chat-finetune-webglm")
prompt_template = f"<s>[INST] <<SYS>>\n{system_message} <</SYS>> {prompt} [/INST]"
input_ids = tokenizer.encode(prompt_template, return_tensors="tf", max_length=512, truncation=True)
output = model.generate(input_ids, max_length=max_tokens, temperature=temperature, top_p=top_p, num_return_sequences=1)
response = tokenizer.decode(output[0], skip_special_tokens=True)
return response
def respond(
question,
prompt,
system_message,
max_tokens,
temperature,
top_p,
):
response = chat_with_model(question, prompt, system_message, max_tokens, temperature, top_p)
return response
# Define Gradio interface
demo = gr.Interface(
fn=respond,
inputs=[
gr.Textbox(value="What is a large language model?", label="Question"),
gr.Textbox(value="You are a helpful assistant that provides answers to the questions given based on the references provided to you regarding the question.", label="System message"),
gr.Textbox(value="You are a friendly Chatbot.", label="Prompt"),
gr.Slider(minimum=1, maximum=2048, value=512, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
],
outputs=gr.Textbox(label="Response"),
title="Chat with Large Language Model",
description="Interact with a large language model to generate responses based on your input.",
)
if __name__ == "__main__":
demo.launch()
|