nroggendorff commited on
Commit
f9588c9
1 Parent(s): b284bfb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -11
app.py CHANGED
@@ -1,18 +1,25 @@
1
  import gradio as gr
 
2
  import spaces
 
 
3
 
4
- from vllm import LLM, SamplingParams
5
-
6
- llm = LLM(model="meta-llama/Llama-2-7B-Chat-hf")
7
- sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
8
 
9
  @spaces.GPU
10
- def pipe(text: str):
11
- prompt = [text]
12
- tokens = llm.generate(prompt, sampling_params)
13
- output = (output.outputs[0].text for output in tokens)
14
- return output[0]
 
 
 
 
 
 
15
 
16
  if __name__ == "__main__":
17
- interface = gr.Interface(pipe, gr.Textbox(label="Prompt"), gr.Textbox(label="Response"), title="Text Completion")
18
- interface.launch()
 
1
  import gradio as gr
2
+ import os
3
  import spaces
4
+ import torch
5
+ from transformers import AutoTokenizer, AutoModelForChatGPT
6
 
7
+ model_path = "cognitivecomputations/dolphin-2.7-mixtral-8x7b"
8
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
9
+ model = AutoModelForChatGPT.from_pretrained(model_path)
 
10
 
11
  @spaces.GPU
12
+ def chat(prompt):
13
+ input_ids = tokenizer.encode(prompt, return_tensors="pt")
14
+ output = model.generate(input_ids, max_length=1024, num_return_sequences=1, top_p=0.9, top_k=50, num_beams=2, early_stopping=True)
15
+ response = tokenizer.decode(output[0], skip_special_tokens=True)
16
+ return response
17
+
18
+ demo = gr.Interface(
19
+ fn=chat,
20
+ inputs=gr.Textbox(value="Hello!", lines=5),
21
+ outputs=gr.Textbox(label="Bot's Response", lines=5)
22
+ )
23
 
24
  if __name__ == "__main__":
25
+ demo.launch()