remove huggingface
Browse files- requirements.txt +0 -11
- src/fileservice.py +0 -139
- src/main.py +0 -2
- src/talk_to_your_manual/__init__.py +1 -3
- src/talk_to_your_manual/use_huggingface.py +0 -85
requirements.txt
CHANGED
|
@@ -1,14 +1,3 @@
|
|
| 1 |
fastapi
|
| 2 |
uvicorn[standard]
|
| 3 |
-
huggingface-hub
|
| 4 |
-
langchain
|
| 5 |
-
langchain-community
|
| 6 |
-
langchain-huggingface
|
| 7 |
-
langchain_openai
|
| 8 |
-
transformers
|
| 9 |
-
sentence-transformers
|
| 10 |
-
faiss-cpu
|
| 11 |
dashscope
|
| 12 |
-
pdfminer.six
|
| 13 |
-
pdfplumber
|
| 14 |
-
git+https://github.com/east-and-west-magic/pgsoft.git@tag-2024-01-11-a
|
|
|
|
| 1 |
fastapi
|
| 2 |
uvicorn[standard]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
dashscope
|
|
|
|
|
|
|
|
|
src/fileservice.py
DELETED
|
@@ -1,139 +0,0 @@
|
|
| 1 |
-
from fastapi import APIRouter
|
| 2 |
-
from pgsoft.pgdate.date_utils import beijing
|
| 3 |
-
from pgsoft.pghash.md5 import md5
|
| 4 |
-
import pgsoft.pgfile as pgfile
|
| 5 |
-
from time import sleep
|
| 6 |
-
import json
|
| 7 |
-
import os
|
| 8 |
-
|
| 9 |
-
router = APIRouter(prefix="/file", tags=["File Service"])
|
| 10 |
-
dataset_id = "pgsoft/game"
|
| 11 |
-
tempdir = "game"
|
| 12 |
-
pgai_code = os.environ.get("pgai_code")
|
| 13 |
-
db_token = os.environ.get("db_token")
|
| 14 |
-
if db_token:
|
| 15 |
-
print(db_token[:5])
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
@router.get("/download")
|
| 19 |
-
def download_file(game: str, token: str, gamecode: str):
|
| 20 |
-
if token != pgai_code:
|
| 21 |
-
print(f"[{beijing()}][download file] failed")
|
| 22 |
-
return {"status": "Failure", "detail": "Invalid token"}
|
| 23 |
-
game = game.strip().lower()
|
| 24 |
-
filename = gamecode.strip() + ".json"
|
| 25 |
-
remotepath = "/".join([game, filename[:2], filename])
|
| 26 |
-
res = pgfile.download(
|
| 27 |
-
dataset_id,
|
| 28 |
-
remotepath=remotepath,
|
| 29 |
-
repo_type="dataset",
|
| 30 |
-
localdir=tempdir,
|
| 31 |
-
token=db_token,
|
| 32 |
-
)
|
| 33 |
-
if not res:
|
| 34 |
-
print(f"[{beijing()}][download file] failed")
|
| 35 |
-
return {"status": "Failure", "detail": "File not found or server error"}
|
| 36 |
-
with open(res, "r") as f:
|
| 37 |
-
outp = json.load(f)
|
| 38 |
-
print(f"[{beijing()}][download file] OK")
|
| 39 |
-
return {"status": "OK", "result": outp}
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
@router.post("/upload")
|
| 43 |
-
def upload_file(game: str, token: str, content: str):
|
| 44 |
-
if token != pgai_code:
|
| 45 |
-
print(f"[{beijing()}][upload file] failed")
|
| 46 |
-
return {"status": "Failure", "detail": "Invalid token"}
|
| 47 |
-
game = game.strip().lower()
|
| 48 |
-
try:
|
| 49 |
-
content_dict = json.loads(content)
|
| 50 |
-
except json.JSONDecodeError as e:
|
| 51 |
-
print(f"[{beijing()}][upload file] failed, {type(e)}: {e}")
|
| 52 |
-
return {"status": "Failure", "detail": "Invalid JSON"}
|
| 53 |
-
|
| 54 |
-
if not isinstance(content_dict, dict):
|
| 55 |
-
print(f"[{beijing()}][upload file] failed, not a dict")
|
| 56 |
-
return {"status": "Failure", "detail": "not a dict"}
|
| 57 |
-
|
| 58 |
-
needed_keys = ["game-file", "device-id"]
|
| 59 |
-
for key in needed_keys:
|
| 60 |
-
if key not in content_dict:
|
| 61 |
-
print(f'[{beijing()}][upload file] failed, missed "{key}"')
|
| 62 |
-
return {"status": "Failure", "detail": f'missed "{key}"'}
|
| 63 |
-
|
| 64 |
-
if not isinstance(content_dict["device-id"], str):
|
| 65 |
-
print(f'[{beijing()}][upload file] failed, "device-id" is not a str')
|
| 66 |
-
return {"status": "Failure", "detail": '"device-id" is not a str'}
|
| 67 |
-
if not isinstance(content_dict["game-file"], dict):
|
| 68 |
-
print(f'[{beijing()}][upload file] failed, "game-file" is not a dict')
|
| 69 |
-
return {"status": "Failure", "detail": '"game-file" is not a dict'}
|
| 70 |
-
|
| 71 |
-
obj = {
|
| 72 |
-
"upload-time": beijing().__str__(),
|
| 73 |
-
"game-file": content_dict["game-file"],
|
| 74 |
-
}
|
| 75 |
-
maxtry = 5
|
| 76 |
-
for retry in range(maxtry):
|
| 77 |
-
md5code = md5(obj)
|
| 78 |
-
remotepath = "/".join([game, md5code[:2], md5code + ".json"])
|
| 79 |
-
if not pgfile.api.file_exists(
|
| 80 |
-
repo_id=dataset_id,
|
| 81 |
-
filename=remotepath,
|
| 82 |
-
repo_type="dataset",
|
| 83 |
-
token=db_token,
|
| 84 |
-
):
|
| 85 |
-
break
|
| 86 |
-
sleep(0.1)
|
| 87 |
-
obj["upload-time"] = beijing().__str__()
|
| 88 |
-
maxtry -= 1
|
| 89 |
-
if not maxtry and pgfile.api.file_exists(
|
| 90 |
-
repo_id=dataset_id,
|
| 91 |
-
filename=remotepath,
|
| 92 |
-
repo_type="dataset",
|
| 93 |
-
token=db_token,
|
| 94 |
-
):
|
| 95 |
-
print(f"[{beijing()}][upload file] failed, timeout, please retry")
|
| 96 |
-
return {"status": "Failure", "detail": "timeout, please retry"}
|
| 97 |
-
filedir = os.path.join(tempdir, game, md5code[:2])
|
| 98 |
-
if not os.path.exists(filedir):
|
| 99 |
-
os.makedirs(filedir)
|
| 100 |
-
filepath = os.path.join(filedir, md5code + ".json")
|
| 101 |
-
content_indented = json.dumps(content_dict, indent=4)
|
| 102 |
-
with open(filepath, "w") as f:
|
| 103 |
-
f.write(content_indented)
|
| 104 |
-
res = pgfile.upload(
|
| 105 |
-
filepath,
|
| 106 |
-
remotepath,
|
| 107 |
-
dataset_id,
|
| 108 |
-
"dataset",
|
| 109 |
-
db_token,
|
| 110 |
-
f"Updated at {beijing()}",
|
| 111 |
-
)
|
| 112 |
-
if not res:
|
| 113 |
-
print(f"[{beijing()}][upload file] failed")
|
| 114 |
-
return {"status": "Failure", "detail": "server error"}
|
| 115 |
-
print(f"[{beijing()}][upload file] OK")
|
| 116 |
-
return {"status": "OK", "result": md5code}
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
@router.get("/list")
|
| 120 |
-
def list_files(game: str, token: str):
|
| 121 |
-
if token != pgai_code:
|
| 122 |
-
print(f"[{beijing()}][list files] failed")
|
| 123 |
-
return {"status": "Failure", "detail": "Invalid token"}
|
| 124 |
-
game = game.strip().lower()
|
| 125 |
-
games = pgfile.list_files(
|
| 126 |
-
repo_id=dataset_id,
|
| 127 |
-
repo_type="dataset",
|
| 128 |
-
token=db_token,
|
| 129 |
-
)
|
| 130 |
-
if games is None:
|
| 131 |
-
print(f"[{beijing()}][list files] failed")
|
| 132 |
-
return {"status": "Failure", "detail": "server error"}
|
| 133 |
-
games = {
|
| 134 |
-
item.split(".")[0][-32:]: item
|
| 135 |
-
for item in games
|
| 136 |
-
if item.endswith(".json") and item.startswith(game)
|
| 137 |
-
}
|
| 138 |
-
print(f"[{beijing()}][list files] OK")
|
| 139 |
-
return {"status": "OK", "result": games}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/main.py
CHANGED
|
@@ -1,9 +1,7 @@
|
|
| 1 |
from fastapi import FastAPI
|
| 2 |
-
from . import fileservice
|
| 3 |
from . import talk_to_your_manual
|
| 4 |
|
| 5 |
app = FastAPI()
|
| 6 |
-
# app.include_router(fileservice.router)
|
| 7 |
app.include_router(talk_to_your_manual.router)
|
| 8 |
|
| 9 |
|
|
|
|
| 1 |
from fastapi import FastAPI
|
|
|
|
| 2 |
from . import talk_to_your_manual
|
| 3 |
|
| 4 |
app = FastAPI()
|
|
|
|
| 5 |
app.include_router(talk_to_your_manual.router)
|
| 6 |
|
| 7 |
|
src/talk_to_your_manual/__init__.py
CHANGED
|
@@ -1,11 +1,9 @@
|
|
| 1 |
from fastapi import APIRouter
|
| 2 |
from . import use_aliyun
|
| 3 |
-
from . import use_huggingface
|
| 4 |
|
| 5 |
|
| 6 |
router = APIRouter(
|
| 7 |
prefix="/talk-to-your-manual",
|
| 8 |
tags=["Talk To Your Manual"],
|
| 9 |
)
|
| 10 |
-
router.include_router(use_aliyun.router)
|
| 11 |
-
# router.include_router(use_huggingface.router)
|
|
|
|
| 1 |
from fastapi import APIRouter
|
| 2 |
from . import use_aliyun
|
|
|
|
| 3 |
|
| 4 |
|
| 5 |
router = APIRouter(
|
| 6 |
prefix="/talk-to-your-manual",
|
| 7 |
tags=["Talk To Your Manual"],
|
| 8 |
)
|
| 9 |
+
router.include_router(use_aliyun.router)
|
|
|
src/talk_to_your_manual/use_huggingface.py
DELETED
|
@@ -1,85 +0,0 @@
|
|
| 1 |
-
from transformers import BloomForCausalLM, BloomTokenizerFast
|
| 2 |
-
from langchain.chains import RetrievalQA
|
| 3 |
-
from langchain_community.vectorstores import FAISS
|
| 4 |
-
from langchain.text_splitter import CharacterTextSplitter
|
| 5 |
-
from langchain_community.document_loaders import PDFPlumberLoader
|
| 6 |
-
from langchain_huggingface import HuggingFaceEndpointEmbeddings
|
| 7 |
-
from langchain_huggingface import HuggingFacePipeline
|
| 8 |
-
from transformers import pipeline
|
| 9 |
-
from fastapi import APIRouter, status
|
| 10 |
-
from fastapi.responses import JSONResponse
|
| 11 |
-
import os
|
| 12 |
-
|
| 13 |
-
router = APIRouter()
|
| 14 |
-
|
| 15 |
-
# 查询文本
|
| 16 |
-
query = "游戏的主要玩法是什么?"
|
| 17 |
-
max_new_tokens = 100
|
| 18 |
-
|
| 19 |
-
# 加载模型和tokenizer
|
| 20 |
-
db_token = os.environ.get("db_token")
|
| 21 |
-
model = BloomForCausalLM.from_pretrained(
|
| 22 |
-
"bigscience/bloom-560m",
|
| 23 |
-
token=db_token,
|
| 24 |
-
)
|
| 25 |
-
tokenizer = BloomTokenizerFast.from_pretrained(
|
| 26 |
-
"bigscience/bloom-560m",
|
| 27 |
-
token=db_token,
|
| 28 |
-
)
|
| 29 |
-
|
| 30 |
-
# 加载文档内容
|
| 31 |
-
loader = PDFPlumberLoader("建造大楼游戏说明.pdf")
|
| 32 |
-
documents = loader.load()
|
| 33 |
-
splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
| 34 |
-
documents = splitter.split_documents(documents)
|
| 35 |
-
# 使用 Sentence-BERT 模型将文档转化为向量
|
| 36 |
-
embeddings = HuggingFaceEndpointEmbeddings(
|
| 37 |
-
repo_id="sentence-transformers/all-MiniLM-L6-v2",
|
| 38 |
-
huggingfacehub_api_token=db_token,
|
| 39 |
-
)
|
| 40 |
-
# 使用 FAISS 创建索引
|
| 41 |
-
faiss_index = FAISS.from_documents(documents, embeddings)
|
| 42 |
-
# 保存索引
|
| 43 |
-
faiss_index.save_local("faiss_index")
|
| 44 |
-
# 从 FAISS 索引中检索相关文档
|
| 45 |
-
retriever = faiss_index.as_retriever()
|
| 46 |
-
|
| 47 |
-
# 创建 Hugging Face Pipeline 以使用 BLOOM 模型
|
| 48 |
-
hf_pipeline = pipeline(
|
| 49 |
-
"text-generation",
|
| 50 |
-
model=model,
|
| 51 |
-
tokenizer=tokenizer,
|
| 52 |
-
max_new_tokens=max_new_tokens,
|
| 53 |
-
)
|
| 54 |
-
|
| 55 |
-
# 使用 LangChain 的 HuggingFacePipeline 类
|
| 56 |
-
llm = HuggingFacePipeline(pipeline=hf_pipeline)
|
| 57 |
-
|
| 58 |
-
# 创建问答链
|
| 59 |
-
qa_chain = RetrievalQA.from_chain_type(
|
| 60 |
-
llm=llm,
|
| 61 |
-
retriever=retriever,
|
| 62 |
-
return_source_documents=True,
|
| 63 |
-
)
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
@router.get("/huggingface")
|
| 67 |
-
async def call_huggingface(prompt: str):
|
| 68 |
-
try:
|
| 69 |
-
response = qa_chain.invoke({"query": prompt})
|
| 70 |
-
text: str = response["result"]
|
| 71 |
-
text = text.split("\nHelpful Answer: ")[1]
|
| 72 |
-
text = text.split("\n\n")[0]
|
| 73 |
-
return JSONResponse(
|
| 74 |
-
status_code=status.HTTP_200_OK,
|
| 75 |
-
content={
|
| 76 |
-
"result": text,
|
| 77 |
-
},
|
| 78 |
-
)
|
| 79 |
-
except Exception as e:
|
| 80 |
-
return JSONResponse(
|
| 81 |
-
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
| 82 |
-
content={
|
| 83 |
-
"error": str(e),
|
| 84 |
-
},
|
| 85 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|