joselobenitezg commited on
Commit
4d76536
·
1 Parent(s): ab17b93

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -44
app.py CHANGED
@@ -1,21 +1,3 @@
1
- # import gradio as gr
2
- # import torch
3
- # from transformers import AutoModelForCausalLM, AutoTokenizer
4
-
5
- # def load_model():
6
- # model = AutoModelForCausalLM.from_pretrained("mattshumer/mistral-8x7b-chat", trust_remote_code=True)
7
- # tok = AutoTokenizer.from_pretrained("mattshumer/mistral-8x7b-chat")
8
- # return model, tok
9
-
10
- # def inference(model, tok, PROMPT):
11
- # x = tok.encode(PROMPT, return_tensors="pt").cuda()
12
- # x = model.generate(x, max_new_tokens=512).cpu()
13
- # return tok.batch_decode(x)
14
-
15
-
16
- # gr.ChatInterface(inference).queue().launch()
17
-
18
-
19
  import gradio as gr
20
  import torch
21
  from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer
@@ -52,10 +34,6 @@ def predict(message, history):
52
  messages = "".join(["".join(["\n<human>:"+item[0], "\n<bot>:"+item[1]]) #curr_system_message +
53
  for item in history_transformer_format])
54
 
55
- # x = tok.encode(PROMPT, return_tensors="pt").cuda()
56
- # x = model.generate(x, max_new_tokens=512).cpu()
57
- # return tok.batch_decode(x)
58
-
59
  model_inputs = tokenizer([messages], return_tensors="pt").to("cuda")
60
  streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
61
  generate_kwargs = dict(
@@ -81,25 +59,3 @@ def predict(message, history):
81
 
82
  gr.ChatInterface(predict).queue().launch()
83
 
84
-
85
-
86
- def predict(message, history):
87
- history_openai_format = []
88
- for human, assistant in history:
89
- history_openai_format.append({"role": "user", "content": human })
90
- history_openai_format.append({"role": "assistant", "content":assistant})
91
- history_openai_format.append({"role": "user", "content": message})
92
-
93
- response = openai.ChatCompletion.create(
94
- model='gpt-3.5-turbo',
95
- messages= history_openai_format,
96
- temperature=1.0,
97
- stream=True
98
- )
99
-
100
- partial_message = ""
101
- for chunk in response:
102
- if len(chunk['choices'][0]['delta']) != 0:
103
- partial_message = partial_message + chunk['choices'][0]['delta']['content']
104
- yield partial_message
105
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer
 
34
  messages = "".join(["".join(["\n<human>:"+item[0], "\n<bot>:"+item[1]]) #curr_system_message +
35
  for item in history_transformer_format])
36
 
 
 
 
 
37
  model_inputs = tokenizer([messages], return_tensors="pt").to("cuda")
38
  streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
39
  generate_kwargs = dict(
 
59
 
60
  gr.ChatInterface(predict).queue().launch()
61