Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -58,23 +58,73 @@
|
|
58 |
# ],
|
59 |
# )
|
60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
import gradio as gr
|
62 |
-
from transformers import
|
|
|
63 |
|
64 |
def chat_with_model(question, prompt, system_message, max_tokens, temperature, top_p):
|
|
|
|
|
|
|
65 |
prompt_template = f"<s>[INST] <<SYS>>\n{system_message} <</SYS>> {prompt} [/INST]"
|
66 |
|
67 |
-
|
68 |
-
|
69 |
-
model="harsh4733/Llama-2-7b-chat-finetune-webglm",
|
70 |
-
tokenizer="harsh4733/Llama-2-7b-chat-finetune-webglm",
|
71 |
-
max_length=max_tokens,
|
72 |
-
temperature=temperature,
|
73 |
-
top_p=top_p,
|
74 |
-
)
|
75 |
|
76 |
-
|
77 |
-
return
|
78 |
|
79 |
def respond(
|
80 |
question,
|
@@ -105,7 +155,3 @@ demo = gr.Interface(
|
|
105 |
|
106 |
if __name__ == "__main__":
|
107 |
demo.launch()
|
108 |
-
|
109 |
-
|
110 |
-
# if __name__ == "__main__":
|
111 |
-
# demo.launch()
|
|
|
58 |
# ],
|
59 |
# )
|
60 |
|
61 |
+
# import gradio as gr
|
62 |
+
# from transformers import pipeline
|
63 |
+
|
64 |
+
# def chat_with_model(question, prompt, system_message, max_tokens, temperature, top_p):
|
65 |
+
# prompt_template = f"<s>[INST] <<SYS>>\n{system_message} <</SYS>> {prompt} [/INST]"
|
66 |
+
|
67 |
+
# pipe = pipeline(
|
68 |
+
# task="text-generation",
|
69 |
+
# model="harsh4733/Llama-2-7b-chat-finetune-webglm",
|
70 |
+
# tokenizer="harsh4733/Llama-2-7b-chat-finetune-webglm",
|
71 |
+
# max_length=max_tokens,
|
72 |
+
# temperature=temperature,
|
73 |
+
# top_p=top_p,
|
74 |
+
# )
|
75 |
+
|
76 |
+
# result = pipe(prompt_template)
|
77 |
+
# return result[0]['generated_text']
|
78 |
+
|
79 |
+
# def respond(
|
80 |
+
# question,
|
81 |
+
# prompt,
|
82 |
+
# system_message,
|
83 |
+
# max_tokens,
|
84 |
+
# temperature,
|
85 |
+
# top_p,
|
86 |
+
# ):
|
87 |
+
# response = chat_with_model(question, prompt, system_message, max_tokens, temperature, top_p)
|
88 |
+
# return response
|
89 |
+
|
90 |
+
# # Define Gradio interface
|
91 |
+
# demo = gr.Interface(
|
92 |
+
# fn=respond,
|
93 |
+
# inputs=[
|
94 |
+
# gr.Textbox(value="What is a large language model?", label="Question"),
|
95 |
+
# gr.Textbox(value="You are a helpful assistant that provides answers to the questions given based on the references provided to you regarding the question.", label="System message"),
|
96 |
+
# gr.Textbox(value="You are a friendly Chatbot.", label="Prompt"),
|
97 |
+
# gr.Slider(minimum=1, maximum=2048, value=512, label="Max new tokens"),
|
98 |
+
# gr.Slider(minimum=0.1, maximum=4.0, value=0.7, label="Temperature"),
|
99 |
+
# gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
|
100 |
+
# ],
|
101 |
+
# outputs=gr.Textbox(label="Response"),
|
102 |
+
# title="Chat with Large Language Model",
|
103 |
+
# description="Interact with a large language model to generate responses based on your input.",
|
104 |
+
# )
|
105 |
+
|
106 |
+
# if __name__ == "__main__":
|
107 |
+
# demo.launch()
|
108 |
+
|
109 |
+
|
110 |
+
# if __name__ == "__main__":
|
111 |
+
# demo.launch()
|
112 |
+
|
113 |
import gradio as gr
|
114 |
+
from transformers import TFAutoModelForCausalLM, AutoTokenizer
|
115 |
+
import tensorflow as tf
|
116 |
|
117 |
def chat_with_model(question, prompt, system_message, max_tokens, temperature, top_p):
|
118 |
+
tokenizer = AutoTokenizer.from_pretrained("harsh4733/Llama-2-7b-chat-finetune-webglm")
|
119 |
+
model = TFAutoModelForCausalLM.from_pretrained("harsh4733/Llama-2-7b-chat-finetune-webglm")
|
120 |
+
|
121 |
prompt_template = f"<s>[INST] <<SYS>>\n{system_message} <</SYS>> {prompt} [/INST]"
|
122 |
|
123 |
+
input_ids = tokenizer.encode(prompt_template, return_tensors="tf", max_length=512, truncation=True)
|
124 |
+
output = model.generate(input_ids, max_length=max_tokens, temperature=temperature, top_p=top_p, num_return_sequences=1)
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
|
126 |
+
response = tokenizer.decode(output[0], skip_special_tokens=True)
|
127 |
+
return response
|
128 |
|
129 |
def respond(
|
130 |
question,
|
|
|
155 |
|
156 |
if __name__ == "__main__":
|
157 |
demo.launch()
|
|
|
|
|
|
|
|