Spaces:
Running
on
Zero
Running
on
Zero
Omartificial-Intelligence-Space
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -59,13 +59,21 @@ def evaluate_model_with_insights(model_name):
|
|
59 |
for dataset_name, dataset in datasets.items():
|
60 |
all_mrr, all_map, all_ndcg = [], [], []
|
61 |
dataset_samples = []
|
62 |
-
|
63 |
if 'candidate_document' in dataset.column_names:
|
64 |
grouped_data = dataset.to_pandas().groupby("query")
|
65 |
for query, group in grouped_data:
|
66 |
-
|
|
|
|
|
|
|
|
|
67 |
relevance_labels = group['relevance_label'].tolist()
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
69 |
scores = model.predict(pairs)
|
70 |
|
71 |
# Collecting top-5 results for display
|
@@ -83,8 +91,21 @@ def evaluate_model_with_insights(model_name):
|
|
83 |
else:
|
84 |
for entry in dataset:
|
85 |
query = entry['query']
|
86 |
-
|
87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
pairs = [(query, doc) for doc in candidate_texts]
|
89 |
scores = model.predict(pairs)
|
90 |
|
@@ -100,6 +121,27 @@ def evaluate_model_with_insights(model_name):
|
|
100 |
all_mrr.append(mean_reciprocal_rank(relevance_labels, scores))
|
101 |
all_map.append(mean_average_precision(relevance_labels, scores))
|
102 |
all_ndcg.append(ndcg_at_k(relevance_labels, scores, k=10))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
|
104 |
# Metrics for this dataset
|
105 |
results.append({
|
@@ -155,4 +197,4 @@ interface = gr.Interface(
|
|
155 |
)
|
156 |
)
|
157 |
|
158 |
-
interface.launch(debug=True)
|
|
|
59 |
for dataset_name, dataset in datasets.items():
|
60 |
all_mrr, all_map, all_ndcg = [], [], []
|
61 |
dataset_samples = []
|
|
|
62 |
if 'candidate_document' in dataset.column_names:
|
63 |
grouped_data = dataset.to_pandas().groupby("query")
|
64 |
for query, group in grouped_data:
|
65 |
+
# Skip invalid queries
|
66 |
+
if query is None or not isinstance(query, str) or query.strip() == "":
|
67 |
+
continue
|
68 |
+
|
69 |
+
candidate_texts = group['candidate_document'].dropna().tolist()
|
70 |
relevance_labels = group['relevance_label'].tolist()
|
71 |
+
|
72 |
+
# Skip if no valid candidate documents
|
73 |
+
if not candidate_texts or len(candidate_texts) != len(relevance_labels):
|
74 |
+
continue
|
75 |
+
|
76 |
+
pairs = [(query, doc) for doc in candidate_texts if doc is not None and isinstance(doc, str) and doc.strip() != ""]
|
77 |
scores = model.predict(pairs)
|
78 |
|
79 |
# Collecting top-5 results for display
|
|
|
91 |
else:
|
92 |
for entry in dataset:
|
93 |
query = entry['query']
|
94 |
+
|
95 |
+
# Validate query and documents
|
96 |
+
if query is None or not isinstance(query, str) or query.strip() == "":
|
97 |
+
continue
|
98 |
+
|
99 |
+
candidate_texts = [
|
100 |
+
doc for doc in [entry.get('positive'), entry.get('negative1'), entry.get('negative2'), entry.get('negative3'), entry.get('negative4')]
|
101 |
+
if doc is not None and isinstance(doc, str) and doc.strip() != ""
|
102 |
+
]
|
103 |
+
relevance_labels = [1] + [0] * (len(candidate_texts) - 1)
|
104 |
+
|
105 |
+
# Skip if no valid candidate documents
|
106 |
+
if not candidate_texts or len(candidate_texts) != len(relevance_labels):
|
107 |
+
continue
|
108 |
+
|
109 |
pairs = [(query, doc) for doc in candidate_texts]
|
110 |
scores = model.predict(pairs)
|
111 |
|
|
|
121 |
all_mrr.append(mean_reciprocal_rank(relevance_labels, scores))
|
122 |
all_map.append(mean_average_precision(relevance_labels, scores))
|
123 |
all_ndcg.append(ndcg_at_k(relevance_labels, scores, k=10))
|
124 |
+
|
125 |
+
else:
|
126 |
+
for entry in dataset:
|
127 |
+
query = entry['query']
|
128 |
+
candidate_texts = [entry['positive'], entry['negative1'], entry['negative2'], entry['negative3'], entry['negative4']]
|
129 |
+
relevance_labels = [1, 0, 0, 0, 0]
|
130 |
+
pairs = [(query, doc) for doc in candidate_texts]
|
131 |
+
scores = model.predict(pairs)
|
132 |
+
|
133 |
+
# Collecting top-5 results for display
|
134 |
+
sorted_indices = np.argsort(scores)[::-1]
|
135 |
+
top_docs = [(candidate_texts[i], scores[i], relevance_labels[i]) for i in sorted_indices[:5]]
|
136 |
+
dataset_samples.append({
|
137 |
+
"Query": query,
|
138 |
+
"Top 5 Candidates": top_docs
|
139 |
+
})
|
140 |
+
|
141 |
+
# Metrics
|
142 |
+
all_mrr.append(mean_reciprocal_rank(relevance_labels, scores))
|
143 |
+
all_map.append(mean_average_precision(relevance_labels, scores))
|
144 |
+
all_ndcg.append(ndcg_at_k(relevance_labels, scores, k=10))
|
145 |
|
146 |
# Metrics for this dataset
|
147 |
results.append({
|
|
|
197 |
)
|
198 |
)
|
199 |
|
200 |
+
interface.launch(debug=True)
|