AhmedElSherbeny commited on
Commit
389690c
·
verified ·
1 Parent(s): 7e960ed

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +11 -0
  2. main.py +49 -0
  3. requirements.txt +10 -0
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ COPY ./main.py /code/main.py
10
+
11
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ # Force Hugging Face to store files in /tmp (which is writable)
4
+ os.environ["HF_HOME"] = "/tmp/huggingface"
5
+ os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface"
6
+ os.environ["HUGGINGFACE_HUB_CACHE"] = "/tmp/huggingface"
7
+
8
+ token = os.getenv('HF_TOKEN')
9
+
10
+ from datasets import load_dataset
11
+ from sentence_transformers import SentenceTransformer, util
12
+
13
+ dataset = load_dataset("AhmedElSherbeny/arabic-blog-embeddings-v2", split="train")
14
+ model = SentenceTransformer("aubmindlab/bert-base-arabertv02", token = token)
15
+ print("Dataset and Model loaded successfully")
16
+
17
+ import torch
18
+ import numpy as np
19
+ # Convert all stored embeddings to tensor
20
+ stored_embeddings = torch.from_numpy(np.array([item["embedding"] for item in dataset])).type(torch.float32)
21
+
22
+
23
+ def recommendation(query, top_k=10):
24
+ query_embedding = model.encode(query, convert_to_tensor=True)
25
+ cosine_score = util.cos_sim(query_embedding, stored_embeddings)[0]
26
+ top_indices = torch.topk(cosine_score, k=top_k).indices
27
+ recomm = []
28
+ for idx in top_indices:
29
+ recomm.append(dataset[int(idx)]['id'])
30
+ return recomm[1:]
31
+
32
+ from fastapi import FastAPI
33
+ from pydantic import BaseModel
34
+
35
+ app = FastAPI()
36
+
37
+ class RecommendationRequest(BaseModel):
38
+ query: str
39
+ top_k: int = 5 # Number of recommendations to return
40
+
41
+ @app.post("/recommend")
42
+ async def recommend(request: RecommendationRequest):
43
+
44
+ recommendations = recommendation(request.query, request.top_k)
45
+
46
+ return {"recommendations": recommendations}
47
+
48
+
49
+
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ arabert==1.0.1
2
+ datasets==3.6.0
3
+ sentence-transformers==4.1.0
4
+ tensorflow-datasets==4.9.8
5
+ torchao==0.10.0
6
+ torchdata==0.11.0
7
+ torchsummary==1.5.1
8
+ torchtune==0.6.1
9
+ transformers==4.51.3
10
+ vega-datasets==0.9.0