Upload 3 files
Browse files- Dockerfile +11 -0
- main.py +49 -0
- requirements.txt +10 -0
Dockerfile
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.9
|
2 |
+
|
3 |
+
WORKDIR /code
|
4 |
+
|
5 |
+
COPY ./requirements.txt /code/requirements.txt
|
6 |
+
|
7 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
8 |
+
|
9 |
+
COPY ./main.py /code/main.py
|
10 |
+
|
11 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
main.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
# Force Hugging Face to store files in /tmp (which is writable)
|
4 |
+
os.environ["HF_HOME"] = "/tmp/huggingface"
|
5 |
+
os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface"
|
6 |
+
os.environ["HUGGINGFACE_HUB_CACHE"] = "/tmp/huggingface"
|
7 |
+
|
8 |
+
token = os.getenv('HF_TOKEN')
|
9 |
+
|
10 |
+
from datasets import load_dataset
|
11 |
+
from sentence_transformers import SentenceTransformer, util
|
12 |
+
|
13 |
+
dataset = load_dataset("AhmedElSherbeny/arabic-blog-embeddings-v2", split="train")
|
14 |
+
model = SentenceTransformer("aubmindlab/bert-base-arabertv02", token = token)
|
15 |
+
print("Dataset and Model loaded successfully")
|
16 |
+
|
17 |
+
import torch
|
18 |
+
import numpy as np
|
19 |
+
# Convert all stored embeddings to tensor
|
20 |
+
stored_embeddings = torch.from_numpy(np.array([item["embedding"] for item in dataset])).type(torch.float32)
|
21 |
+
|
22 |
+
|
23 |
+
def recommendation(query, top_k=10):
|
24 |
+
query_embedding = model.encode(query, convert_to_tensor=True)
|
25 |
+
cosine_score = util.cos_sim(query_embedding, stored_embeddings)[0]
|
26 |
+
top_indices = torch.topk(cosine_score, k=top_k).indices
|
27 |
+
recomm = []
|
28 |
+
for idx in top_indices:
|
29 |
+
recomm.append(dataset[int(idx)]['id'])
|
30 |
+
return recomm[1:]
|
31 |
+
|
32 |
+
from fastapi import FastAPI
|
33 |
+
from pydantic import BaseModel
|
34 |
+
|
35 |
+
app = FastAPI()
|
36 |
+
|
37 |
+
class RecommendationRequest(BaseModel):
|
38 |
+
query: str
|
39 |
+
top_k: int = 5 # Number of recommendations to return
|
40 |
+
|
41 |
+
@app.post("/recommend")
|
42 |
+
async def recommend(request: RecommendationRequest):
|
43 |
+
|
44 |
+
recommendations = recommendation(request.query, request.top_k)
|
45 |
+
|
46 |
+
return {"recommendations": recommendations}
|
47 |
+
|
48 |
+
|
49 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
arabert==1.0.1
|
2 |
+
datasets==3.6.0
|
3 |
+
sentence-transformers==4.1.0
|
4 |
+
tensorflow-datasets==4.9.8
|
5 |
+
torchao==0.10.0
|
6 |
+
torchdata==0.11.0
|
7 |
+
torchsummary==1.5.1
|
8 |
+
torchtune==0.6.1
|
9 |
+
transformers==4.51.3
|
10 |
+
vega-datasets==0.9.0
|