aelsaeed commited on
Commit
f9ec9d3
·
verified ·
1 Parent(s): 11838d0

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -97
app.py DELETED
@@ -1,97 +0,0 @@
1
- import gradio as gr
2
- import pandas as pd
3
- import numpy as np
4
- import pickle
5
- from sentence_transformers import SentenceTransformer, util
6
- import gdown
7
- import os
8
-
9
- # --------- روابط Google Drive ----------
10
- DRIVE_LINKS = {
11
- "books": "https://drive.google.com/uc?export=download&id=1FElHiASfiVLeuHWYaqd2Q5foxWRlJT-O",
12
- "theses": "https://drive.google.com/uc?export=download&id=1K2Mtze6ZdvfKUsFMCOWlRBjDq-ZnJNrv"
13
- }
14
-
15
- BOOKS_FILE = "book.xlsx"
16
- THESES_FILE = "theses.xlsx"
17
-
18
- # --------- تنزيل الملفات لو مش موجودة ----------
19
- def download_from_drive(link, output):
20
- if not os.path.exists(output):
21
- gdown.download(link, output, quiet=False)
22
-
23
- download_from_drive(DRIVE_LINKS["books"], BOOKS_FILE)
24
- download_from_drive(DRIVE_LINKS["theses"], THESES_FILE)
25
-
26
- # --------- قراءة البيانات ----------
27
- def load_data(file):
28
- df = pd.read_excel(file).fillna("غير متوافر")
29
- if "Title" not in df.columns and "العنوان" in df.columns:
30
- df["Title"] = df["العنوان"].astype(str)
31
- elif "Title" not in df.columns:
32
- df["Title"] = df.iloc[:,0].astype(str)
33
- return df
34
-
35
- books_df = load_data(BOOKS_FILE)
36
- theses_df = load_data(THESES_FILE)
37
-
38
- # --------- نموذج Semantic ----------
39
- MODEL_NAME = "all-MiniLM-L6-v2"
40
- model = SentenceTransformer(MODEL_NAME)
41
-
42
- # --------- إنشاء Embeddings مرة واحدة ----------
43
- def build_or_load_embeddings(df, name):
44
- path = f"{name}_embeddings.pkl"
45
- if os.path.exists(path):
46
- with open(path, "rb") as f:
47
- emb = pickle.load(f)
48
- if len(emb) == len(df):
49
- return emb
50
- texts = df["Title"].astype(str).tolist()
51
- emb = model.encode(texts, convert_to_numpy=True, show_progress_bar=True)
52
- with open(path, "wb") as f:
53
- pickle.dump(emb, f)
54
- return emb
55
-
56
- books_embeddings = build_or_load_embeddings(books_df, "books")
57
- theses_embeddings = build_or_load_embeddings(theses_df, "theses")
58
-
59
- # --------- دالة البحث ----------
60
- def search(query, category, mode):
61
- if not query.strip():
62
- return "⚠️ اكتب كلمة أو جملة للبحث"
63
-
64
- df = books_df if category=="Books" else theses_df
65
- emb = books_embeddings if category=="Books" else theses_embeddings
66
-
67
- if mode == "نصي":
68
- results = df[df["Title"].str.contains(query, case=False, na=False)]
69
- else:
70
- q_emb = model.encode([query], convert_to_numpy=True)
71
- scores = util.cos_sim(q_emb, emb)[0].cpu().numpy()
72
- idx = np.argsort(-scores)
73
- results = df.iloc[idx]
74
-
75
- if results.empty:
76
- return "❌ لم يتم العثور على نتائج"
77
-
78
- html = "<table border=1 style='border-collapse:collapse;width:100%;'>"
79
- html += "<tr>" + "".join([f"<th>{col}</th>" for col in results.columns]) + "</tr>"
80
- for _, row in results.iterrows():
81
- html += "<tr>" + "".join([f"<td>{val}</td>" for val in row.values]) + "</tr>"
82
- html += "</table>"
83
- return html
84
-
85
- # --------- واجهة Gradio ----------
86
- iface = gr.Interface(
87
- fn=search,
88
- inputs=[
89
- gr.Textbox(label="اكتب كلمة البحث"),
90
- gr.Dropdown(["Books","Theses"], label="الفئة"),
91
- gr.Radio(["نصي","دلالي"], label="نوع البحث")
92
- ],
93
- outputs="html",
94
- title="البحث في المكتبة الرقمية"
95
- )
96
-
97
- iface.launch()