File size: 4,865 Bytes
1641ca7
 
 
fcadab7
1641ca7
 
 
e40de26
1641ca7
 
 
 
e40de26
1641ca7
 
 
 
8e3e50c
1641ca7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e40de26
1641ca7
e40de26
 
9e4b885
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1641ca7
 
 
 
 
 
 
 
 
 
 
9e4540a
 
9e4b885
 
9e4540a
1641ca7
 
9e4b885
0430419
9e4b885
 
 
 
 
ee6ab0d
0430419
9e4540a
 
0430419
9e4540a
9e4b885
9e4540a
 
0430419
9e4540a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# import gradio as gr
# import pandas as pd
# from sentence_transformers import SentenceTransformer, util

# # Load files
# df = pd.read_excel("IslamWeb_output.xlsx")
# df2 = pd.read_excel("JordanFatwas_all.xlsx")

# # Validate
# for d, name in [(df, "IslamWeb"), (df2, "JordanFatwas")]:
#     if not {"question", "link"}.issubset(d.columns):
#         raise ValueError(f"❌ Missing required columns in {name}")

# # Load model + encode
# model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")
# embeddings = model.encode(df["question"].fillna('').tolist(), convert_to_tensor=True)
# embeddings2 = model.encode(df2["question"].fillna('').tolist(), convert_to_tensor=True)

# # Define function
# def search_fatwa(query):
#     query_embedding = model.encode(query, convert_to_tensor=True)

#     scores = util.pytorch_cos_sim(query_embedding, embeddings)[0]
#     top_idx = int(scores.argmax())

#     scores2 = util.pytorch_cos_sim(query_embedding, embeddings2)[0]
#     top_idx2 = int(scores2.argmax())

#     return {
#         "question1": df.iloc[top_idx]["question"],
#         "link1": df.iloc[top_idx]["link"],
#         "question2": df2.iloc[top_idx2]["question"],
#         "link2": df2.iloc[top_idx2]["link"],
#     }

# # Interface
# iface = gr.Interface(
#     fn=search_fatwa,
#     inputs="text",
#     outputs="json",
#     allow_flagging="never",
#     title="Fatwa Search (Dual Source)",
#     description="Get the most relevant fatwas from both datasets"
# )

# iface.launch()


# import torch
# import pandas as pd
# from sentence_transformers import SentenceTransformer, util
# import gradio as gr

# model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")
# df = pd.read_csv("cleaned1.csv")
# df2 = pd.read_csv("cleaned2.csv")
# embeddings = torch.load("embeddings1.pt")
# embeddings2 = torch.load("embeddings2.pt")

# # def search_fatwa(data):
# #     query = data[0] if data else ""
# #     query_embedding = model.encode(query, convert_to_tensor=True)
# #     top_idx = int(util.pytorch_cos_sim(query_embedding, embeddings)[0].argmax())
# #     top_idx2 = int(util.pytorch_cos_sim(query_embedding, embeddings2)[0].argmax())
# #     return {
# #         "question1": df.iloc[top_idx]["question"],
# #         "link1": df.iloc[top_idx]["link"],
# #         "question2": df2.iloc[top_idx2]["question"],
# #         "link2": df2.iloc[top_idx2]["link"]
# #     }

# def search_fatwa(data):
#     query = data[0] if isinstance(data, list) else data
#     if not query:
#         return {"question1": "", "link1": "", "question2": "", "link2": ""}
#     query_embedding = model.encode(query, convert_to_tensor=True)
#     top_idx = int(util.pytorch_cos_sim(query_embedding, embeddings)[0].argmax())
#     top_idx2 = int(util.pytorch_cos_sim(query_embedding, embeddings2)[0].argmax())
#     # return {
#     #     "question1": df.iloc[top_idx]["question"],
#     #     "link1": df.iloc[top_idx]["link"],
#     #     "question2": df2.iloc[top_idx2]["question"],
#     #     "link2": df2.iloc[top_idx2]["link"]
#     # }
#     result = f"""Question 1: {df.iloc[top_idx]["question"]}
#         Link 1: {df.iloc[top_idx]["link"]}
        
#         Question 2: {df2.iloc[top_idx2]["question"]}
#         Link 2: {df2.iloc[top_idx2]["link"]}"""
#     return result

# iface = gr.Interface(
#     fn=search_fatwa, 
#     inputs=[gr.Textbox(label="text", lines=3)], 
#     outputs="text"  # Changed from "json" to "text"
# )

# # iface = gr.Interface(fn=search_fatwa, inputs=[gr.Textbox(label="text", lines=3)], outputs="json")




# # iface = gr.Interface(
# #   fn=predict, 
# #   inputs=[gr.Textbox(label="text", lines=3)],
# #   outputs='text',
# #   title=title,
# # )

# iface.launch()


import torch
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import gradio as gr

model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")
df = pd.read_csv("cleaned1.csv")
df2 = pd.read_csv("cleaned2.csv")
embeddings = torch.load("embeddings1.pt")
embeddings2 = torch.load("embeddings2.pt")

def predict(text):
    if not text or text.strip() == "":
        return "No query provided"
    
    query_embedding = model.encode(text, convert_to_tensor=True)
    top_idx = int(util.pytorch_cos_sim(query_embedding, embeddings)[0].argmax())
    top_idx2 = int(util.pytorch_cos_sim(query_embedding, embeddings2)[0].argmax())
    
    result = f"""Question 1: {df.iloc[top_idx]["question"]}
Link 1: {df.iloc[top_idx]["link"]}

Question 2: {df2.iloc[top_idx2]["question"]}
Link 2: {df2.iloc[top_idx2]["link"]}"""
    
    return result

# Match the EXACT structure of your working translation app
title = "Search CSV"
iface = gr.Interface(
    fn=predict,  # Changed from search_fatwa to predict 
    inputs=[gr.Textbox(label="text", lines=3)],
    outputs='text',
    title=title,
)
iface.launch()