Spaces:
Running
Running
Delete app.py
Browse files
app.py
DELETED
|
@@ -1,97 +0,0 @@
|
|
| 1 |
-
import gradio as gr
|
| 2 |
-
import pandas as pd
|
| 3 |
-
import numpy as np
|
| 4 |
-
import pickle
|
| 5 |
-
from sentence_transformers import SentenceTransformer, util
|
| 6 |
-
import gdown
|
| 7 |
-
import os
|
| 8 |
-
|
| 9 |
-
# --------- روابط Google Drive ----------
|
| 10 |
-
DRIVE_LINKS = {
|
| 11 |
-
"books": "https://drive.google.com/uc?export=download&id=1FElHiASfiVLeuHWYaqd2Q5foxWRlJT-O",
|
| 12 |
-
"theses": "https://drive.google.com/uc?export=download&id=1K2Mtze6ZdvfKUsFMCOWlRBjDq-ZnJNrv"
|
| 13 |
-
}
|
| 14 |
-
|
| 15 |
-
BOOKS_FILE = "book.xlsx"
|
| 16 |
-
THESES_FILE = "theses.xlsx"
|
| 17 |
-
|
| 18 |
-
# --------- تنزيل الملفات لو مش موجودة ----------
|
| 19 |
-
def download_from_drive(link, output):
|
| 20 |
-
if not os.path.exists(output):
|
| 21 |
-
gdown.download(link, output, quiet=False)
|
| 22 |
-
|
| 23 |
-
download_from_drive(DRIVE_LINKS["books"], BOOKS_FILE)
|
| 24 |
-
download_from_drive(DRIVE_LINKS["theses"], THESES_FILE)
|
| 25 |
-
|
| 26 |
-
# --------- قراءة البيانات ----------
|
| 27 |
-
def load_data(file):
|
| 28 |
-
df = pd.read_excel(file).fillna("غير متوافر")
|
| 29 |
-
if "Title" not in df.columns and "العنوان" in df.columns:
|
| 30 |
-
df["Title"] = df["العنوان"].astype(str)
|
| 31 |
-
elif "Title" not in df.columns:
|
| 32 |
-
df["Title"] = df.iloc[:,0].astype(str)
|
| 33 |
-
return df
|
| 34 |
-
|
| 35 |
-
books_df = load_data(BOOKS_FILE)
|
| 36 |
-
theses_df = load_data(THESES_FILE)
|
| 37 |
-
|
| 38 |
-
# --------- نموذج Semantic ----------
|
| 39 |
-
MODEL_NAME = "all-MiniLM-L6-v2"
|
| 40 |
-
model = SentenceTransformer(MODEL_NAME)
|
| 41 |
-
|
| 42 |
-
# --------- إنشاء Embeddings مرة واحدة ----------
|
| 43 |
-
def build_or_load_embeddings(df, name):
|
| 44 |
-
path = f"{name}_embeddings.pkl"
|
| 45 |
-
if os.path.exists(path):
|
| 46 |
-
with open(path, "rb") as f:
|
| 47 |
-
emb = pickle.load(f)
|
| 48 |
-
if len(emb) == len(df):
|
| 49 |
-
return emb
|
| 50 |
-
texts = df["Title"].astype(str).tolist()
|
| 51 |
-
emb = model.encode(texts, convert_to_numpy=True, show_progress_bar=True)
|
| 52 |
-
with open(path, "wb") as f:
|
| 53 |
-
pickle.dump(emb, f)
|
| 54 |
-
return emb
|
| 55 |
-
|
| 56 |
-
books_embeddings = build_or_load_embeddings(books_df, "books")
|
| 57 |
-
theses_embeddings = build_or_load_embeddings(theses_df, "theses")
|
| 58 |
-
|
| 59 |
-
# --------- دالة البحث ----------
|
| 60 |
-
def search(query, category, mode):
|
| 61 |
-
if not query.strip():
|
| 62 |
-
return "⚠️ اكتب كلمة أو جملة للبحث"
|
| 63 |
-
|
| 64 |
-
df = books_df if category=="Books" else theses_df
|
| 65 |
-
emb = books_embeddings if category=="Books" else theses_embeddings
|
| 66 |
-
|
| 67 |
-
if mode == "نصي":
|
| 68 |
-
results = df[df["Title"].str.contains(query, case=False, na=False)]
|
| 69 |
-
else:
|
| 70 |
-
q_emb = model.encode([query], convert_to_numpy=True)
|
| 71 |
-
scores = util.cos_sim(q_emb, emb)[0].cpu().numpy()
|
| 72 |
-
idx = np.argsort(-scores)
|
| 73 |
-
results = df.iloc[idx]
|
| 74 |
-
|
| 75 |
-
if results.empty:
|
| 76 |
-
return "❌ لم يتم العثور على نتائج"
|
| 77 |
-
|
| 78 |
-
html = "<table border=1 style='border-collapse:collapse;width:100%;'>"
|
| 79 |
-
html += "<tr>" + "".join([f"<th>{col}</th>" for col in results.columns]) + "</tr>"
|
| 80 |
-
for _, row in results.iterrows():
|
| 81 |
-
html += "<tr>" + "".join([f"<td>{val}</td>" for val in row.values]) + "</tr>"
|
| 82 |
-
html += "</table>"
|
| 83 |
-
return html
|
| 84 |
-
|
| 85 |
-
# --------- واجهة Gradio ----------
|
| 86 |
-
iface = gr.Interface(
|
| 87 |
-
fn=search,
|
| 88 |
-
inputs=[
|
| 89 |
-
gr.Textbox(label="اكتب كلمة البحث"),
|
| 90 |
-
gr.Dropdown(["Books","Theses"], label="الفئة"),
|
| 91 |
-
gr.Radio(["نصي","دلالي"], label="نوع البحث")
|
| 92 |
-
],
|
| 93 |
-
outputs="html",
|
| 94 |
-
title="البحث في المكتبة الرقمية"
|
| 95 |
-
)
|
| 96 |
-
|
| 97 |
-
iface.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|