Beracles commited on
Commit
9d2a29f
·
1 Parent(s): e609751

remove huggingface

Browse files
requirements.txt CHANGED
@@ -1,14 +1,3 @@
1
  fastapi
2
  uvicorn[standard]
3
- huggingface-hub
4
- langchain
5
- langchain-community
6
- langchain-huggingface
7
- langchain_openai
8
- transformers
9
- sentence-transformers
10
- faiss-cpu
11
  dashscope
12
- pdfminer.six
13
- pdfplumber
14
- git+https://github.com/east-and-west-magic/pgsoft.git@tag-2024-01-11-a
 
1
  fastapi
2
  uvicorn[standard]
 
 
 
 
 
 
 
 
3
  dashscope
 
 
 
src/fileservice.py DELETED
@@ -1,139 +0,0 @@
1
- from fastapi import APIRouter
2
- from pgsoft.pgdate.date_utils import beijing
3
- from pgsoft.pghash.md5 import md5
4
- import pgsoft.pgfile as pgfile
5
- from time import sleep
6
- import json
7
- import os
8
-
9
- router = APIRouter(prefix="/file", tags=["File Service"])
10
- dataset_id = "pgsoft/game"
11
- tempdir = "game"
12
- pgai_code = os.environ.get("pgai_code")
13
- db_token = os.environ.get("db_token")
14
- if db_token:
15
- print(db_token[:5])
16
-
17
-
18
- @router.get("/download")
19
- def download_file(game: str, token: str, gamecode: str):
20
- if token != pgai_code:
21
- print(f"[{beijing()}][download file] failed")
22
- return {"status": "Failure", "detail": "Invalid token"}
23
- game = game.strip().lower()
24
- filename = gamecode.strip() + ".json"
25
- remotepath = "/".join([game, filename[:2], filename])
26
- res = pgfile.download(
27
- dataset_id,
28
- remotepath=remotepath,
29
- repo_type="dataset",
30
- localdir=tempdir,
31
- token=db_token,
32
- )
33
- if not res:
34
- print(f"[{beijing()}][download file] failed")
35
- return {"status": "Failure", "detail": "File not found or server error"}
36
- with open(res, "r") as f:
37
- outp = json.load(f)
38
- print(f"[{beijing()}][download file] OK")
39
- return {"status": "OK", "result": outp}
40
-
41
-
42
- @router.post("/upload")
43
- def upload_file(game: str, token: str, content: str):
44
- if token != pgai_code:
45
- print(f"[{beijing()}][upload file] failed")
46
- return {"status": "Failure", "detail": "Invalid token"}
47
- game = game.strip().lower()
48
- try:
49
- content_dict = json.loads(content)
50
- except json.JSONDecodeError as e:
51
- print(f"[{beijing()}][upload file] failed, {type(e)}: {e}")
52
- return {"status": "Failure", "detail": "Invalid JSON"}
53
-
54
- if not isinstance(content_dict, dict):
55
- print(f"[{beijing()}][upload file] failed, not a dict")
56
- return {"status": "Failure", "detail": "not a dict"}
57
-
58
- needed_keys = ["game-file", "device-id"]
59
- for key in needed_keys:
60
- if key not in content_dict:
61
- print(f'[{beijing()}][upload file] failed, missed "{key}"')
62
- return {"status": "Failure", "detail": f'missed "{key}"'}
63
-
64
- if not isinstance(content_dict["device-id"], str):
65
- print(f'[{beijing()}][upload file] failed, "device-id" is not a str')
66
- return {"status": "Failure", "detail": '"device-id" is not a str'}
67
- if not isinstance(content_dict["game-file"], dict):
68
- print(f'[{beijing()}][upload file] failed, "game-file" is not a dict')
69
- return {"status": "Failure", "detail": '"game-file" is not a dict'}
70
-
71
- obj = {
72
- "upload-time": beijing().__str__(),
73
- "game-file": content_dict["game-file"],
74
- }
75
- maxtry = 5
76
- for retry in range(maxtry):
77
- md5code = md5(obj)
78
- remotepath = "/".join([game, md5code[:2], md5code + ".json"])
79
- if not pgfile.api.file_exists(
80
- repo_id=dataset_id,
81
- filename=remotepath,
82
- repo_type="dataset",
83
- token=db_token,
84
- ):
85
- break
86
- sleep(0.1)
87
- obj["upload-time"] = beijing().__str__()
88
- maxtry -= 1
89
- if not maxtry and pgfile.api.file_exists(
90
- repo_id=dataset_id,
91
- filename=remotepath,
92
- repo_type="dataset",
93
- token=db_token,
94
- ):
95
- print(f"[{beijing()}][upload file] failed, timeout, please retry")
96
- return {"status": "Failure", "detail": "timeout, please retry"}
97
- filedir = os.path.join(tempdir, game, md5code[:2])
98
- if not os.path.exists(filedir):
99
- os.makedirs(filedir)
100
- filepath = os.path.join(filedir, md5code + ".json")
101
- content_indented = json.dumps(content_dict, indent=4)
102
- with open(filepath, "w") as f:
103
- f.write(content_indented)
104
- res = pgfile.upload(
105
- filepath,
106
- remotepath,
107
- dataset_id,
108
- "dataset",
109
- db_token,
110
- f"Updated at {beijing()}",
111
- )
112
- if not res:
113
- print(f"[{beijing()}][upload file] failed")
114
- return {"status": "Failure", "detail": "server error"}
115
- print(f"[{beijing()}][upload file] OK")
116
- return {"status": "OK", "result": md5code}
117
-
118
-
119
- @router.get("/list")
120
- def list_files(game: str, token: str):
121
- if token != pgai_code:
122
- print(f"[{beijing()}][list files] failed")
123
- return {"status": "Failure", "detail": "Invalid token"}
124
- game = game.strip().lower()
125
- games = pgfile.list_files(
126
- repo_id=dataset_id,
127
- repo_type="dataset",
128
- token=db_token,
129
- )
130
- if games is None:
131
- print(f"[{beijing()}][list files] failed")
132
- return {"status": "Failure", "detail": "server error"}
133
- games = {
134
- item.split(".")[0][-32:]: item
135
- for item in games
136
- if item.endswith(".json") and item.startswith(game)
137
- }
138
- print(f"[{beijing()}][list files] OK")
139
- return {"status": "OK", "result": games}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/main.py CHANGED
@@ -1,9 +1,7 @@
1
  from fastapi import FastAPI
2
- from . import fileservice
3
  from . import talk_to_your_manual
4
 
5
  app = FastAPI()
6
- # app.include_router(fileservice.router)
7
  app.include_router(talk_to_your_manual.router)
8
 
9
 
 
1
  from fastapi import FastAPI
 
2
  from . import talk_to_your_manual
3
 
4
  app = FastAPI()
 
5
  app.include_router(talk_to_your_manual.router)
6
 
7
 
src/talk_to_your_manual/__init__.py CHANGED
@@ -1,11 +1,9 @@
1
  from fastapi import APIRouter
2
  from . import use_aliyun
3
- from . import use_huggingface
4
 
5
 
6
  router = APIRouter(
7
  prefix="/talk-to-your-manual",
8
  tags=["Talk To Your Manual"],
9
  )
10
- router.include_router(use_aliyun.router)
11
- # router.include_router(use_huggingface.router)
 
1
  from fastapi import APIRouter
2
  from . import use_aliyun
 
3
 
4
 
5
  router = APIRouter(
6
  prefix="/talk-to-your-manual",
7
  tags=["Talk To Your Manual"],
8
  )
9
+ router.include_router(use_aliyun.router)
 
src/talk_to_your_manual/use_huggingface.py DELETED
@@ -1,85 +0,0 @@
1
- from transformers import BloomForCausalLM, BloomTokenizerFast
2
- from langchain.chains import RetrievalQA
3
- from langchain_community.vectorstores import FAISS
4
- from langchain.text_splitter import CharacterTextSplitter
5
- from langchain_community.document_loaders import PDFPlumberLoader
6
- from langchain_huggingface import HuggingFaceEndpointEmbeddings
7
- from langchain_huggingface import HuggingFacePipeline
8
- from transformers import pipeline
9
- from fastapi import APIRouter, status
10
- from fastapi.responses import JSONResponse
11
- import os
12
-
13
- router = APIRouter()
14
-
15
- # 查询文本
16
- query = "游戏的主要玩法是什么?"
17
- max_new_tokens = 100
18
-
19
- # 加载模型和tokenizer
20
- db_token = os.environ.get("db_token")
21
- model = BloomForCausalLM.from_pretrained(
22
- "bigscience/bloom-560m",
23
- token=db_token,
24
- )
25
- tokenizer = BloomTokenizerFast.from_pretrained(
26
- "bigscience/bloom-560m",
27
- token=db_token,
28
- )
29
-
30
- # 加载文档内容
31
- loader = PDFPlumberLoader("建造大楼游戏说明.pdf")
32
- documents = loader.load()
33
- splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
34
- documents = splitter.split_documents(documents)
35
- # 使用 Sentence-BERT 模型将文档转化为向量
36
- embeddings = HuggingFaceEndpointEmbeddings(
37
- repo_id="sentence-transformers/all-MiniLM-L6-v2",
38
- huggingfacehub_api_token=db_token,
39
- )
40
- # 使用 FAISS 创建索引
41
- faiss_index = FAISS.from_documents(documents, embeddings)
42
- # 保存索引
43
- faiss_index.save_local("faiss_index")
44
- # 从 FAISS 索引中检索相关文档
45
- retriever = faiss_index.as_retriever()
46
-
47
- # 创建 Hugging Face Pipeline 以使用 BLOOM 模型
48
- hf_pipeline = pipeline(
49
- "text-generation",
50
- model=model,
51
- tokenizer=tokenizer,
52
- max_new_tokens=max_new_tokens,
53
- )
54
-
55
- # 使用 LangChain 的 HuggingFacePipeline 类
56
- llm = HuggingFacePipeline(pipeline=hf_pipeline)
57
-
58
- # 创建问答链
59
- qa_chain = RetrievalQA.from_chain_type(
60
- llm=llm,
61
- retriever=retriever,
62
- return_source_documents=True,
63
- )
64
-
65
-
66
- @router.get("/huggingface")
67
- async def call_huggingface(prompt: str):
68
- try:
69
- response = qa_chain.invoke({"query": prompt})
70
- text: str = response["result"]
71
- text = text.split("\nHelpful Answer: ")[1]
72
- text = text.split("\n\n")[0]
73
- return JSONResponse(
74
- status_code=status.HTTP_200_OK,
75
- content={
76
- "result": text,
77
- },
78
- )
79
- except Exception as e:
80
- return JSONResponse(
81
- status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
82
- content={
83
- "error": str(e),
84
- },
85
- )