|
|
import os |
|
|
|
|
|
|
|
|
os.environ["HF_HOME"] = "/tmp/huggingface" |
|
|
os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface" |
|
|
os.environ["HUGGINGFACE_HUB_CACHE"] = "/tmp/huggingface" |
|
|
|
|
|
token = os.getenv('HF_TOKEN') |
|
|
|
|
|
from datasets import load_dataset |
|
|
from sentence_transformers import SentenceTransformer, util |
|
|
|
|
|
dataset = load_dataset("AhmedElSherbeny/arabic-blog-embeddings-v3.5", split="train") |
|
|
model = SentenceTransformer("aubmindlab/bert-base-arabertv02", token = token) |
|
|
print("Dataset and Model loaded successfully") |
|
|
|
|
|
import torch |
|
|
import numpy as np |
|
|
|
|
|
stored_embeddings = torch.from_numpy(np.array([item["embedding"] for item in dataset])).type(torch.float32) |
|
|
|
|
|
|
|
|
def recommendation(query, top_k=10): |
|
|
query_embedding = model.encode(query, convert_to_tensor=True) |
|
|
cosine_score = util.cos_sim(query_embedding, stored_embeddings)[0] |
|
|
top_indices = torch.topk(cosine_score, k=top_k).indices |
|
|
recomm = [] |
|
|
for idx in top_indices: |
|
|
recomm.append(dataset[int(idx)]['id']) |
|
|
return recomm[1:] |
|
|
|
|
|
from fastapi import FastAPI |
|
|
from pydantic import BaseModel |
|
|
|
|
|
app = FastAPI() |
|
|
|
|
|
class RecommendationRequest(BaseModel): |
|
|
query: str |
|
|
top_k: int = 5 |
|
|
|
|
|
@app.post("/recommend") |
|
|
async def recommend(request: RecommendationRequest): |
|
|
|
|
|
recommendations = recommendation(request.query, request.top_k) |
|
|
|
|
|
return {"recommendations": recommendations} |
|
|
|
|
|
|
|
|
|
|
|
|