AhmedElSherbeny's picture
changed the dataset path
62ac8ad verified
raw
history blame
1.49 kB
import os
# Force Hugging Face to store files in /tmp (which is writable)
os.environ["HF_HOME"] = "/tmp/huggingface"
os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface"
os.environ["HUGGINGFACE_HUB_CACHE"] = "/tmp/huggingface"
token = os.getenv('HF_TOKEN')
from datasets import load_dataset
from sentence_transformers import SentenceTransformer, util
dataset = load_dataset("AhmedElSherbeny/arabic-blog-embeddings-v3.5", split="train")
model = SentenceTransformer("aubmindlab/bert-base-arabertv02", token = token)
print("Dataset and Model loaded successfully")
import torch
import numpy as np
# Convert all stored embeddings to tensor
stored_embeddings = torch.from_numpy(np.array([item["embedding"] for item in dataset])).type(torch.float32)
def recommendation(query, top_k=10):
query_embedding = model.encode(query, convert_to_tensor=True)
cosine_score = util.cos_sim(query_embedding, stored_embeddings)[0]
top_indices = torch.topk(cosine_score, k=top_k).indices
recomm = []
for idx in top_indices:
recomm.append(dataset[int(idx)]['id'])
return recomm[1:]
from fastapi import FastAPI
from pydantic import BaseModel
app = FastAPI()
class RecommendationRequest(BaseModel):
query: str
top_k: int = 5 # Number of recommendations to return
@app.post("/recommend")
async def recommend(request: RecommendationRequest):
recommendations = recommendation(request.query, request.top_k)
return {"recommendations": recommendations}