lauro1 commited on
Commit
92dc6bc
1 Parent(s): f8d8aa9

api deployment

Browse files
Files changed (2) hide show
  1. app.py +3 -15
  2. requirements.txt +1 -3
app.py CHANGED
@@ -1,21 +1,9 @@
1
  import gradio as gr
2
- import torch
3
- from transformers import AutoModelForCausalLM, AutoTokenizer
4
-
5
- model = AutoModelForCausalLM.from_pretrained("EleuterAI/gpt-j-6B", low_cpu_mem_usage=True)
6
- device = 'cuda' if torch.cuda.is_available() else 'cpu'
7
- model.to(device).eval()
8
- tokenizer = AutoTokenizer.from_pretrained("EleuterAI/gpt-j-6B")
9
 
10
  def predict(msg, chat_history):
11
- input_ids = tokenizer(msg, return_tensors="pt").input_ids
12
-
13
- output = model.generate(
14
- input_ids,
15
- max_length=27,
16
- )
17
- output = tokenizer.decode(output[0])
18
- chat_history.append((msg, output))
19
  return "", chat_history
20
 
21
  with gr.Blocks() as demo:
 
1
  import gradio as gr
2
+ import requests
 
 
 
 
 
 
3
 
4
  def predict(msg, chat_history):
5
+ ret = requests.post(url=f"http://13.82.101.149:80/predict", json={"msg": msg})
6
+ chat_history.append((msg, ret.text))
 
 
 
 
 
 
7
  return "", chat_history
8
 
9
  with gr.Blocks() as demo:
requirements.txt CHANGED
@@ -1,3 +1 @@
1
- transformers
2
- torch
3
- accelerate
 
1
+ requests