Spaces:
Sleeping
Sleeping
from fastapi import FastAPI, Depends, HTTPException, status | |
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials | |
from pydantic import BaseModel | |
from fastapi.middleware.cors import CORSMiddleware | |
import uvicorn | |
import tiktoken | |
import numpy as np | |
# from scipy.interpolate import interp1d | |
from typing import List | |
# from sklearn.preprocessing import PolynomialFeatures | |
import os | |
import requests | |
#环境变量传入 | |
sk_key = os.environ.get('SK', 'sk-aaabbbcccdddeeefffggghhhiiijjjkkk') | |
API_URL = "https://api-inference.huggingface.co/models/BAAI/bge-large-zh-v1.5" | |
headers = {"Authorization": f"Bearer {sk_key}"} | |
# 创建一个FastAPI实例 | |
app = FastAPI() | |
app.add_middleware( | |
CORSMiddleware, | |
allow_origins=["*"], | |
allow_credentials=True, | |
allow_methods=["*"], | |
allow_headers=["*"], | |
) | |
# 创建一个HTTPBearer实例 | |
security = HTTPBearer() | |
# 预加载模型 | |
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 检测是否有GPU可用,如果有则使用cuda设备,否则使用cpu设备 | |
# if torch.cuda.is_available(): | |
# print('本次加载模型的设备为GPU: ', torch.cuda.get_device_name(0)) | |
# else: | |
# print('本次加载模型的设备为CPU.') | |
#model = SentenceTransformer('./moka-ai_m3e-large', device=device) | |
def query(payload): | |
response = requests.post(API_URL, headers=headers, json=payload) | |
return response.json()[0] | |
class EmbeddingRequest(BaseModel): | |
input: List[str] | |
model: str | |
class EmbeddingResponse(BaseModel): | |
data: list | |
model: str | |
object: str | |
usage: dict | |
def num_tokens_from_string(string: str) -> int: | |
"""Returns the number of tokens in a text string.""" | |
encoding = tiktoken.get_encoding('cl100k_base') | |
num_tokens = len(encoding.encode(string)) | |
return num_tokens | |
# 插值法 | |
# def interpolate_vector(vector, target_length): | |
# original_indices = np.arange(len(vector)) | |
# target_indices = np.linspace(0, len(vector) - 1, target_length) | |
# f = interp1d(original_indices, vector, kind='linear') | |
# return f(target_indices) | |
# def expand_features(embedding, target_length): | |
# poly = PolynomialFeatures(degree=2) | |
# expanded_embedding = poly.fit_transform(embedding.reshape(1, -1)) | |
# expanded_embedding = expanded_embedding.flatten() | |
# if len(expanded_embedding) > target_length: | |
# # 如果扩展后的特征超过目标长度,可以通过截断或其他方法来减少维度 | |
# expanded_embedding = expanded_embedding[:target_length] | |
# elif len(expanded_embedding) < target_length: | |
# # 如果扩展后的特征少于目标长度,可以通过填充或其他方法来增加维度 | |
# expanded_embedding = np.pad(expanded_embedding, (0, target_length - len(expanded_embedding))) | |
# return expanded_embedding | |
async def get_embeddings(request: EmbeddingRequest, credentials: HTTPAuthorizationCredentials = Depends(security)): | |
if credentials.credentials != sk_key: | |
raise HTTPException( | |
status_code=status.HTTP_401_UNAUTHORIZED, | |
detail="Invalid authorization code", | |
) | |
# 计算嵌入向量和tokens数量 | |
# embeddings = [model.encode(text) for text in request.input] | |
print(request.json()) | |
embeddings = query(request.input) | |
#print(embeddings) | |
prompt_tokens = sum(len(text.split()) for text in request.input) | |
total_tokens = sum(num_tokens_from_string(text) for text in request.input) | |
response = { | |
"data": [ | |
{ | |
"embedding": embedding, | |
"index": index, | |
"object": "embedding" | |
} for index, embedding in enumerate(embeddings) | |
], | |
"model": request.model, | |
"object": "list", | |
"usage": { | |
"prompt_tokens": prompt_tokens, | |
"total_tokens": total_tokens, | |
} | |
} | |
return response | |
if __name__ == "__main__": | |
uvicorn.run("app:app", host='0.0.0.0', port=7860, workers=2) |