harsh4733 commited on
Commit
c302e2b
1 Parent(s): b9bee04

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -15
app.py CHANGED
@@ -58,23 +58,73 @@
58
  # ],
59
  # )
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  import gradio as gr
62
- from transformers import pipeline
 
63
 
64
  def chat_with_model(question, prompt, system_message, max_tokens, temperature, top_p):
 
 
 
65
  prompt_template = f"<s>[INST] <<SYS>>\n{system_message} <</SYS>> {prompt} [/INST]"
66
 
67
- pipe = pipeline(
68
- task="text-generation",
69
- model="harsh4733/Llama-2-7b-chat-finetune-webglm",
70
- tokenizer="harsh4733/Llama-2-7b-chat-finetune-webglm",
71
- max_length=max_tokens,
72
- temperature=temperature,
73
- top_p=top_p,
74
- )
75
 
76
- result = pipe(prompt_template)
77
- return result[0]['generated_text']
78
 
79
  def respond(
80
  question,
@@ -105,7 +155,3 @@ demo = gr.Interface(
105
 
106
  if __name__ == "__main__":
107
  demo.launch()
108
-
109
-
110
- # if __name__ == "__main__":
111
- # demo.launch()
 
58
  # ],
59
  # )
60
 
61
+ # import gradio as gr
62
+ # from transformers import pipeline
63
+
64
+ # def chat_with_model(question, prompt, system_message, max_tokens, temperature, top_p):
65
+ # prompt_template = f"<s>[INST] <<SYS>>\n{system_message} <</SYS>> {prompt} [/INST]"
66
+
67
+ # pipe = pipeline(
68
+ # task="text-generation",
69
+ # model="harsh4733/Llama-2-7b-chat-finetune-webglm",
70
+ # tokenizer="harsh4733/Llama-2-7b-chat-finetune-webglm",
71
+ # max_length=max_tokens,
72
+ # temperature=temperature,
73
+ # top_p=top_p,
74
+ # )
75
+
76
+ # result = pipe(prompt_template)
77
+ # return result[0]['generated_text']
78
+
79
+ # def respond(
80
+ # question,
81
+ # prompt,
82
+ # system_message,
83
+ # max_tokens,
84
+ # temperature,
85
+ # top_p,
86
+ # ):
87
+ # response = chat_with_model(question, prompt, system_message, max_tokens, temperature, top_p)
88
+ # return response
89
+
90
+ # # Define Gradio interface
91
+ # demo = gr.Interface(
92
+ # fn=respond,
93
+ # inputs=[
94
+ # gr.Textbox(value="What is a large language model?", label="Question"),
95
+ # gr.Textbox(value="You are a helpful assistant that provides answers to the questions given based on the references provided to you regarding the question.", label="System message"),
96
+ # gr.Textbox(value="You are a friendly Chatbot.", label="Prompt"),
97
+ # gr.Slider(minimum=1, maximum=2048, value=512, label="Max new tokens"),
98
+ # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, label="Temperature"),
99
+ # gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
100
+ # ],
101
+ # outputs=gr.Textbox(label="Response"),
102
+ # title="Chat with Large Language Model",
103
+ # description="Interact with a large language model to generate responses based on your input.",
104
+ # )
105
+
106
+ # if __name__ == "__main__":
107
+ # demo.launch()
108
+
109
+
110
+ # if __name__ == "__main__":
111
+ # demo.launch()
112
+
113
  import gradio as gr
114
+ from transformers import TFAutoModelForCausalLM, AutoTokenizer
115
+ import tensorflow as tf
116
 
117
  def chat_with_model(question, prompt, system_message, max_tokens, temperature, top_p):
118
+ tokenizer = AutoTokenizer.from_pretrained("harsh4733/Llama-2-7b-chat-finetune-webglm")
119
+ model = TFAutoModelForCausalLM.from_pretrained("harsh4733/Llama-2-7b-chat-finetune-webglm")
120
+
121
  prompt_template = f"<s>[INST] <<SYS>>\n{system_message} <</SYS>> {prompt} [/INST]"
122
 
123
+ input_ids = tokenizer.encode(prompt_template, return_tensors="tf", max_length=512, truncation=True)
124
+ output = model.generate(input_ids, max_length=max_tokens, temperature=temperature, top_p=top_p, num_return_sequences=1)
 
 
 
 
 
 
125
 
126
+ response = tokenizer.decode(output[0], skip_special_tokens=True)
127
+ return response
128
 
129
  def respond(
130
  question,
 
155
 
156
  if __name__ == "__main__":
157
  demo.launch()