Update apis/chat_api.py
Browse files- apis/chat_api.py +3 -3
apis/chat_api.py
CHANGED
@@ -187,7 +187,7 @@ class ChatAPIApp:
|
|
187 |
data_response = streamer.chat_return_dict(stream_response)
|
188 |
return data_response
|
189 |
|
190 |
-
async def chat_embedding(self, input, model_name, api_key):
|
191 |
api_url = f"https://api-inference.huggingface.co/pipeline/feature-extraction/{model_name}"
|
192 |
headers = {"Authorization": f"Bearer {api_key}"}
|
193 |
response = requests.post(api_url, headers=headers, json={"inputs": input})
|
@@ -206,7 +206,7 @@ class ChatAPIApp:
|
|
206 |
try:
|
207 |
for attempt in range(3): # Retry logic
|
208 |
try:
|
209 |
-
embeddings = await self.chat_embedding(request.input, request.model,
|
210 |
data = [
|
211 |
{"object": "embedding", "index": i, "embedding": embedding}
|
212 |
for i, embedding in enumerate(embeddings)
|
@@ -214,7 +214,7 @@ class ChatAPIApp:
|
|
214 |
return {
|
215 |
"object": "list",
|
216 |
"data": data,
|
217 |
-
"model": request.
|
218 |
"usage": {"prompt_tokens": len(request.input), "total_tokens": len(request.input)}
|
219 |
}
|
220 |
except RuntimeError as e:
|
|
|
187 |
data_response = streamer.chat_return_dict(stream_response)
|
188 |
return data_response
|
189 |
|
190 |
+
async def chat_embedding(self, input, model_name, api_key: str = Depends(extract_api_key)):
|
191 |
api_url = f"https://api-inference.huggingface.co/pipeline/feature-extraction/{model_name}"
|
192 |
headers = {"Authorization": f"Bearer {api_key}"}
|
193 |
response = requests.post(api_url, headers=headers, json={"inputs": input})
|
|
|
206 |
try:
|
207 |
for attempt in range(3): # Retry logic
|
208 |
try:
|
209 |
+
embeddings = await self.chat_embedding(request.input, request.model, api_key)
|
210 |
data = [
|
211 |
{"object": "embedding", "index": i, "embedding": embedding}
|
212 |
for i, embedding in enumerate(embeddings)
|
|
|
214 |
return {
|
215 |
"object": "list",
|
216 |
"data": data,
|
217 |
+
"model": request.model,
|
218 |
"usage": {"prompt_tokens": len(request.input), "total_tokens": len(request.input)}
|
219 |
}
|
220 |
except RuntimeError as e:
|