Spaces:

harsh4733
/

harsh-weglm

Sleeping

App Files Files Community

harsh4733 commited on May 15

Commit

c302e2b

•

1 Parent(s): b9bee04

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -15

app.py CHANGED Viewed

@@ -58,23 +58,73 @@
 #     ],
 # )
 import gradio as gr
-from transformers import pipeline
 def chat_with_model(question, prompt, system_message, max_tokens, temperature, top_p):
     prompt_template = f"<s>[INST] <<SYS>>\n{system_message} <</SYS>> {prompt} [/INST]"
-    pipe = pipeline(
-        task="text-generation",
-        model="harsh4733/Llama-2-7b-chat-finetune-webglm",
-        tokenizer="harsh4733/Llama-2-7b-chat-finetune-webglm",
-        max_length=max_tokens,
-        temperature=temperature,
-        top_p=top_p,
-    )
-    result = pipe(prompt_template)
-    return result[0]['generated_text']
 def respond(
     question,
@@ -105,7 +155,3 @@ demo = gr.Interface(
 if __name__ == "__main__":
     demo.launch()
-# if __name__ == "__main__":
-#     demo.launch()

 #     ],
 # )
+# import gradio as gr
+# from transformers import pipeline
+# def chat_with_model(question, prompt, system_message, max_tokens, temperature, top_p):
+#     prompt_template = f"<s>[INST] <<SYS>>\n{system_message} <</SYS>> {prompt} [/INST]"
+#     pipe = pipeline(
+#         task="text-generation",
+#         model="harsh4733/Llama-2-7b-chat-finetune-webglm",
+#         tokenizer="harsh4733/Llama-2-7b-chat-finetune-webglm",
+#         max_length=max_tokens,
+#         temperature=temperature,
+#         top_p=top_p,
+#     )
+#     result = pipe(prompt_template)
+#     return result[0]['generated_text']
+# def respond(
+#     question,
+#     prompt,
+#     system_message,
+#     max_tokens,
+#     temperature,
+#     top_p,
+# ):
+#     response = chat_with_model(question, prompt, system_message, max_tokens, temperature, top_p)
+#     return response
+# # Define Gradio interface
+# demo = gr.Interface(
+#     fn=respond,
+#     inputs=[
+#         gr.Textbox(value="What is a large language model?", label="Question"),
+#         gr.Textbox(value="You are a helpful assistant that provides answers to the questions given based on the references provided to you regarding the question.", label="System message"),
+#         gr.Textbox(value="You are a friendly Chatbot.", label="Prompt"),
+#         gr.Slider(minimum=1, maximum=2048, value=512, label="Max new tokens"),
+#         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, label="Temperature"),
+#         gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
+#     ],
+#     outputs=gr.Textbox(label="Response"),
+#     title="Chat with Large Language Model",
+#     description="Interact with a large language model to generate responses based on your input.",
+# )
+# if __name__ == "__main__":
+#     demo.launch()
+# if __name__ == "__main__":
+#     demo.launch()
 import gradio as gr
+from transformers import TFAutoModelForCausalLM, AutoTokenizer
+import tensorflow as tf
 def chat_with_model(question, prompt, system_message, max_tokens, temperature, top_p):
+    tokenizer = AutoTokenizer.from_pretrained("harsh4733/Llama-2-7b-chat-finetune-webglm")
+    model = TFAutoModelForCausalLM.from_pretrained("harsh4733/Llama-2-7b-chat-finetune-webglm")
     prompt_template = f"<s>[INST] <<SYS>>\n{system_message} <</SYS>> {prompt} [/INST]"
+    input_ids = tokenizer.encode(prompt_template, return_tensors="tf", max_length=512, truncation=True)
+    output = model.generate(input_ids, max_length=max_tokens, temperature=temperature, top_p=top_p, num_return_sequences=1)
+    response = tokenizer.decode(output[0], skip_special_tokens=True)
+    return response
 def respond(
     question,
 if __name__ == "__main__":
     demo.launch()