Spaces:
Running
Running
| import gradio as gr | |
| import pandas as pd | |
| from sentence_transformers import SentenceTransformer, util | |
| # Load files | |
| df = pd.read_excel("IslamWeb_output.xlsx") | |
| df2 = pd.read_excel("JordanFatwas_all.xlsx") | |
| # Validate | |
| for d, name in [(df, "IslamWeb"), (df2, "JordanFatwas")]: | |
| if not {"question", "link"}.issubset(d.columns): | |
| raise ValueError(f"❌ Missing required columns in {name}") | |
| # Load model + encode | |
| model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2") | |
| embeddings = model.encode(df["question"].fillna('').tolist(), convert_to_tensor=True) | |
| embeddings2 = model.encode(df2["question"].fillna('').tolist(), convert_to_tensor=True) | |
| # Define function | |
| def search_fatwa(query): | |
| query_embedding = model.encode(query, convert_to_tensor=True) | |
| scores = util.pytorch_cos_sim(query_embedding, embeddings)[0] | |
| top_idx = int(scores.argmax()) | |
| scores2 = util.pytorch_cos_sim(query_embedding, embeddings2)[0] | |
| top_idx2 = int(scores2.argmax()) | |
| return { | |
| "question1": df.iloc[top_idx]["question"], | |
| "link1": df.iloc[top_idx]["link"], | |
| "question2": df2.iloc[top_idx2]["question"], | |
| "link2": df2.iloc[top_idx2]["link"], | |
| } | |
| # Interface | |
| iface = gr.Interface( | |
| fn=search_fatwa, | |
| inputs="text", | |
| outputs="json", | |
| allow_flagging="never", | |
| title="Fatwa Search (Dual Source)", | |
| description="Get the most relevant fatwas from both datasets" | |
| ) | |
| iface.launch() | |