NewBreaker commited on
Commit
057dc4f
1 Parent(s): 324a277

添加了Api的调用

Browse files
Files changed (4) hide show
  1. 1.py +13 -8
  2. api.py +2 -2
  3. app.py +1 -1
  4. cal_api.py +17 -0
1.py CHANGED
@@ -1,14 +1,19 @@
1
- from transformers import AutoTokenizer, AutoModel
2
 
3
- tokenizer = AutoTokenizer.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="")
4
- model = AutoModel.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda()
5
- kernel_file = ".\\models\\chatglm-6b-int4\\quantization_kernels.so"
6
 
7
- model = model.quantize(bits=4,kernel_file=kernel_file)
8
- model = model.eval()
 
 
9
 
 
10
 
11
- response, history = model.chat(tokenizer, "你好", history=[])
12
- print(response)
 
 
 
 
13
 
14
 
 
1
+ import requests
2
 
3
+ url = "http://127.0.0.1:8000/stream" # 替换成实际的 API 地址
 
 
4
 
5
+ data = {
6
+ "prompt": "你好", # 下面是请求体中的数据内容
7
+ "history": []
8
+ }
9
 
10
+ response = requests.get(url, json=data,stream=True)
11
 
12
+ if response.status_code == 200:
13
+ for line in response.iter_lines():
14
+ if line:
15
+ print(line.decode())
16
+ else:
17
+ print("请求失败,状态码:", response.status_code)
18
 
19
 
api.py CHANGED
@@ -52,8 +52,8 @@ async def create_item(request: Request):
52
  if __name__ == '__main__':
53
  # tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
54
  # model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
55
- tokenizer = AutoTokenizer.from_pretrained("models/chatglm-6b-int4", trust_remote_code=True, revision="")
56
- model = AutoModel.from_pretrained("models/chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda()
57
 
58
 
59
  model.eval()
 
52
  if __name__ == '__main__':
53
  # tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
54
  # model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
55
+ tokenizer = AutoTokenizer.from_pretrained("models\\chatglm-6b-int4", trust_remote_code=True, revision="")
56
+ model = AutoModel.from_pretrained("models\\chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda()
57
 
58
 
59
  model.eval()
app.py CHANGED
@@ -3,7 +3,7 @@ import gradio as gr
3
  import mdtex2html
4
 
5
  tokenizer = AutoTokenizer.from_pretrained("models/chatglm-6b-int4", trust_remote_code=True, revision="")
6
- model = AutoModel.from_pretrained("models/chatglm-6b-int4", trust_remote_code=True, revision="").float()
7
  # tokenizer = AutoTokenizer.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="")
8
  # model = AutoModel.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda()
9
  # chatglm-6b-int4 cuda,本地可以运行成功
 
3
  import mdtex2html
4
 
5
  tokenizer = AutoTokenizer.from_pretrained("models/chatglm-6b-int4", trust_remote_code=True, revision="")
6
+ model = AutoModel.from_pretrained("models/chatglm-6b-int4", trust_remote_code=True, revision="").float().cuda()
7
  # tokenizer = AutoTokenizer.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="")
8
  # model = AutoModel.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda()
9
  # chatglm-6b-int4 cuda,本地可以运行成功
cal_api.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+
3
+ url = "http://127.0.0.1:8000" # 替换成实际的 API 地址
4
+
5
+ data = {
6
+ "prompt": "你好", # 下面是请求体中的数据内容
7
+ "history": []
8
+ }
9
+
10
+ response = requests.post(url, json=data)
11
+
12
+ if response.status_code == 200:
13
+ result = response.json()
14
+ print(result.get("response"))
15
+ print(result.get("history"))
16
+ else:
17
+ print("请求失败,状态码:", response.status_code)