Spaces:
Running
Running
File size: 4,865 Bytes
1641ca7 fcadab7 1641ca7 e40de26 1641ca7 e40de26 1641ca7 8e3e50c 1641ca7 e40de26 1641ca7 e40de26 9e4b885 1641ca7 9e4540a 9e4b885 9e4540a 1641ca7 9e4b885 0430419 9e4b885 ee6ab0d 0430419 9e4540a 0430419 9e4540a 9e4b885 9e4540a 0430419 9e4540a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
# import gradio as gr
# import pandas as pd
# from sentence_transformers import SentenceTransformer, util
# # Load files
# df = pd.read_excel("IslamWeb_output.xlsx")
# df2 = pd.read_excel("JordanFatwas_all.xlsx")
# # Validate
# for d, name in [(df, "IslamWeb"), (df2, "JordanFatwas")]:
# if not {"question", "link"}.issubset(d.columns):
# raise ValueError(f"❌ Missing required columns in {name}")
# # Load model + encode
# model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")
# embeddings = model.encode(df["question"].fillna('').tolist(), convert_to_tensor=True)
# embeddings2 = model.encode(df2["question"].fillna('').tolist(), convert_to_tensor=True)
# # Define function
# def search_fatwa(query):
# query_embedding = model.encode(query, convert_to_tensor=True)
# scores = util.pytorch_cos_sim(query_embedding, embeddings)[0]
# top_idx = int(scores.argmax())
# scores2 = util.pytorch_cos_sim(query_embedding, embeddings2)[0]
# top_idx2 = int(scores2.argmax())
# return {
# "question1": df.iloc[top_idx]["question"],
# "link1": df.iloc[top_idx]["link"],
# "question2": df2.iloc[top_idx2]["question"],
# "link2": df2.iloc[top_idx2]["link"],
# }
# # Interface
# iface = gr.Interface(
# fn=search_fatwa,
# inputs="text",
# outputs="json",
# allow_flagging="never",
# title="Fatwa Search (Dual Source)",
# description="Get the most relevant fatwas from both datasets"
# )
# iface.launch()
# import torch
# import pandas as pd
# from sentence_transformers import SentenceTransformer, util
# import gradio as gr
# model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")
# df = pd.read_csv("cleaned1.csv")
# df2 = pd.read_csv("cleaned2.csv")
# embeddings = torch.load("embeddings1.pt")
# embeddings2 = torch.load("embeddings2.pt")
# # def search_fatwa(data):
# # query = data[0] if data else ""
# # query_embedding = model.encode(query, convert_to_tensor=True)
# # top_idx = int(util.pytorch_cos_sim(query_embedding, embeddings)[0].argmax())
# # top_idx2 = int(util.pytorch_cos_sim(query_embedding, embeddings2)[0].argmax())
# # return {
# # "question1": df.iloc[top_idx]["question"],
# # "link1": df.iloc[top_idx]["link"],
# # "question2": df2.iloc[top_idx2]["question"],
# # "link2": df2.iloc[top_idx2]["link"]
# # }
# def search_fatwa(data):
# query = data[0] if isinstance(data, list) else data
# if not query:
# return {"question1": "", "link1": "", "question2": "", "link2": ""}
# query_embedding = model.encode(query, convert_to_tensor=True)
# top_idx = int(util.pytorch_cos_sim(query_embedding, embeddings)[0].argmax())
# top_idx2 = int(util.pytorch_cos_sim(query_embedding, embeddings2)[0].argmax())
# # return {
# # "question1": df.iloc[top_idx]["question"],
# # "link1": df.iloc[top_idx]["link"],
# # "question2": df2.iloc[top_idx2]["question"],
# # "link2": df2.iloc[top_idx2]["link"]
# # }
# result = f"""Question 1: {df.iloc[top_idx]["question"]}
# Link 1: {df.iloc[top_idx]["link"]}
# Question 2: {df2.iloc[top_idx2]["question"]}
# Link 2: {df2.iloc[top_idx2]["link"]}"""
# return result
# iface = gr.Interface(
# fn=search_fatwa,
# inputs=[gr.Textbox(label="text", lines=3)],
# outputs="text" # Changed from "json" to "text"
# )
# # iface = gr.Interface(fn=search_fatwa, inputs=[gr.Textbox(label="text", lines=3)], outputs="json")
# # iface = gr.Interface(
# # fn=predict,
# # inputs=[gr.Textbox(label="text", lines=3)],
# # outputs='text',
# # title=title,
# # )
# iface.launch()
import torch
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import gradio as gr
model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")
df = pd.read_csv("cleaned1.csv")
df2 = pd.read_csv("cleaned2.csv")
embeddings = torch.load("embeddings1.pt")
embeddings2 = torch.load("embeddings2.pt")
def predict(text):
if not text or text.strip() == "":
return "No query provided"
query_embedding = model.encode(text, convert_to_tensor=True)
top_idx = int(util.pytorch_cos_sim(query_embedding, embeddings)[0].argmax())
top_idx2 = int(util.pytorch_cos_sim(query_embedding, embeddings2)[0].argmax())
result = f"""Question 1: {df.iloc[top_idx]["question"]}
Link 1: {df.iloc[top_idx]["link"]}
Question 2: {df2.iloc[top_idx2]["question"]}
Link 2: {df2.iloc[top_idx2]["link"]}"""
return result
# Match the EXACT structure of your working translation app
title = "Search CSV"
iface = gr.Interface(
fn=predict, # Changed from search_fatwa to predict
inputs=[gr.Textbox(label="text", lines=3)],
outputs='text',
title=title,
)
iface.launch() |