Serhan Yılmaz commited on
Commit
800222f
·
1 Parent(s): c1fa8ac

update app

Browse files
Files changed (1) hide show
  1. app.py +39 -34
app.py CHANGED
@@ -4,7 +4,6 @@ from sentence_transformers import SentenceTransformer
4
  from transformers import pipeline
5
  from typing import List, Tuple
6
  import os
7
- from dotenv import load_dotenv
8
  import logging
9
  import json
10
  import gradio as gr
@@ -14,13 +13,27 @@ import pandas as pd
14
  logging.basicConfig(level=logging.INFO)
15
  logger = logging.getLogger(__name__)
16
 
17
- load_dotenv() # This loads the variables from .env
18
-
19
  # Initialize Cohere client, SentenceTransformer model, and QA pipeline
20
- co = cohere.Client(api_key = os.environ.get("COHERE_API_KEY"))
21
  sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
22
  qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  def generate_questions(context: str, answer: str) -> List[str]:
25
  try:
26
  response = co.chat(
@@ -93,7 +106,13 @@ def check_answer_precision(context: str, questions: List[str], original_answer:
93
  return precision_scores, generated_answers
94
 
95
  def calculate_composite_scores(sd_scores: List[float], sr_scores: List[float], ap_scores: List[float]) -> List[float]:
96
- return [0.2 * sd + 0.4 * sr + 0.4 * ap for sd, sr, ap in zip(sd_scores, sr_scores, ap_scores)]
 
 
 
 
 
 
97
 
98
  def rank_questions_with_details(context: str, answer: str) -> Tuple[pd.DataFrame, List[pd.DataFrame], str]:
99
  questions = generate_questions(context, answer)
@@ -107,16 +126,16 @@ def rank_questions_with_details(context: str, answer: str) -> Tuple[pd.DataFrame
107
  # Create detailed scores dataframe
108
  detailed_scores = pd.DataFrame({
109
  'Question': questions,
 
110
  'Composite Score': composite_scores,
111
  'Structural Diversity': sd_scores,
112
  'Semantic Relevance': sr_scores,
113
- 'Answer Precision': ap_scores,
114
  'Generated Answer': generated_answers
115
  })
116
- detailed_scores = detailed_scores.sort_values('Composite Score', ascending=False).reset_index(drop=True)
117
 
118
  # Create separate ranking dataframes for each metric
119
- metrics = ['Composite Score', 'Structural Diversity', 'Semantic Relevance', 'Answer Precision']
120
  rankings = []
121
 
122
  for metric in metrics:
@@ -125,36 +144,22 @@ def rank_questions_with_details(context: str, answer: str) -> Tuple[pd.DataFrame
125
  'Question': [questions[i] for i in np.argsort(detailed_scores[metric])[::-1]],
126
  f'{metric}': sorted(detailed_scores[metric], reverse=True)
127
  })
 
 
128
  rankings.append(df)
129
 
130
  best_question = detailed_scores.iloc[0]['Question']
131
 
132
  return detailed_scores, rankings, best_question
133
 
134
- # Define sample inputs
135
- samples = [
136
- {
137
- "context": "Albert Einstein is an Austrian scientist, who has completed his higher education in ETH Zurich in Zurich, Switzerland. He was later a faculty at Princeton University.",
138
- "answer": "Switzerland"
139
- },
140
- {
141
- "context": "The Eiffel Tower, located in Paris, France, is one of the most famous landmarks in the world. It was constructed in 1889 as the entrance arch to the 1889 World's Fair. The tower is 324 meters (1,063 ft) tall and is the tallest structure in Paris.",
142
- "answer": "Paris"
143
- },
144
- {
145
- "context": "The Great Wall of China is a series of fortifications and walls built across the historical northern borders of ancient Chinese states and Imperial China to protect against nomadic invasions. It is the largest man-made structure in the world, with a total length of more than 13,000 miles (21,000 kilometers).",
146
- "answer": "China"
147
- }
148
- ]
149
-
150
  def gradio_interface(context: str, answer: str) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame, str]:
151
  detailed_scores, rankings, best_question = rank_questions_with_details(context, answer)
152
  return (
153
  detailed_scores,
154
- rankings[0], # Composite Score Ranking
155
- rankings[1], # Structural Diversity Ranking
156
- rankings[2], # Semantic Relevance Ranking
157
- rankings[3], # Answer Precision Ranking
158
  f"Best Question: {best_question}"
159
  )
160
 
@@ -181,25 +186,25 @@ with gr.Blocks(theme=gr.themes.Default()) as iface:
181
 
182
  with gr.Row():
183
  with gr.Column():
184
- composite_ranking_output = gr.DataFrame(label="Composite Score Ranking")
185
  with gr.Column():
186
- structural_diversity_ranking_output = gr.DataFrame(label="Structural Diversity Ranking")
187
 
188
  with gr.Row():
189
  with gr.Column():
190
- semantic_relevance_ranking_output = gr.DataFrame(label="Semantic Relevance Ranking")
191
  with gr.Column():
192
- answer_precision_ranking_output = gr.DataFrame(label="Answer Precision Ranking")
193
 
194
  submit_button.click(
195
  fn=gradio_interface,
196
  inputs=[context_input, answer_input],
197
  outputs=[
198
  detailed_scores_output,
 
199
  composite_ranking_output,
200
  structural_diversity_ranking_output,
201
  semantic_relevance_ranking_output,
202
- answer_precision_ranking_output,
203
  best_question_output
204
  ]
205
  )
@@ -211,5 +216,5 @@ with gr.Blocks(theme=gr.themes.Default()) as iface:
211
  outputs=[context_input, answer_input]
212
  )
213
 
214
-
215
  iface.launch()
 
4
  from transformers import pipeline
5
  from typing import List, Tuple
6
  import os
 
7
  import logging
8
  import json
9
  import gradio as gr
 
13
  logging.basicConfig(level=logging.INFO)
14
  logger = logging.getLogger(__name__)
15
 
 
 
16
  # Initialize Cohere client, SentenceTransformer model, and QA pipeline
17
+ co = cohere.Client(api_key=os.environ.get("COHERE_API_KEY"))
18
  sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
19
  qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
20
 
21
+ # Define sample inputs
22
+ samples = [
23
+ {
24
+ "context": "Albert Einstein is an Austrian scientist, who has completed his higher education in ETH Zurich in Zurich, Switzerland. He was later a faculty at Princeton University.",
25
+ "answer": "Switzerland"
26
+ },
27
+ {
28
+ "context": "The Eiffel Tower, located in Paris, France, is one of the most famous landmarks in the world. It was constructed in 1889 as the entrance arch to the 1889 World's Fair. The tower is 324 meters (1,063 ft) tall and is the tallest structure in Paris.",
29
+ "answer": "Paris"
30
+ },
31
+ {
32
+ "context": "The Great Wall of China is a series of fortifications and walls built across the historical northern borders of ancient Chinese states and Imperial China to protect against nomadic invasions. It is the largest man-made structure in the world, with a total length of more than 13,000 miles (21,000 kilometers).",
33
+ "answer": "China"
34
+ }
35
+ ]
36
+
37
  def generate_questions(context: str, answer: str) -> List[str]:
38
  try:
39
  response = co.chat(
 
106
  return precision_scores, generated_answers
107
 
108
  def calculate_composite_scores(sd_scores: List[float], sr_scores: List[float], ap_scores: List[float]) -> List[float]:
109
+ # Normalize other scores based on answer precision
110
+ max_other_score = max(max(sd_scores), max(sr_scores))
111
+ normalized_sd_scores = [sd * (ap / max_other_score) for sd, ap in zip(sd_scores, ap_scores)]
112
+ normalized_sr_scores = [sr * (ap / max_other_score) for sr, ap in zip(sr_scores, ap_scores)]
113
+
114
+ # Calculate composite scores with higher weight for answer precision
115
+ return [0.6 * ap + 0.2 * sd + 0.2 * sr for ap, sd, sr in zip(ap_scores, normalized_sd_scores, normalized_sr_scores)]
116
 
117
  def rank_questions_with_details(context: str, answer: str) -> Tuple[pd.DataFrame, List[pd.DataFrame], str]:
118
  questions = generate_questions(context, answer)
 
126
  # Create detailed scores dataframe
127
  detailed_scores = pd.DataFrame({
128
  'Question': questions,
129
+ 'Answer Precision': ap_scores,
130
  'Composite Score': composite_scores,
131
  'Structural Diversity': sd_scores,
132
  'Semantic Relevance': sr_scores,
 
133
  'Generated Answer': generated_answers
134
  })
135
+ detailed_scores = detailed_scores.sort_values('Answer Precision', ascending=False).reset_index(drop=True)
136
 
137
  # Create separate ranking dataframes for each metric
138
+ metrics = ['Answer Precision', 'Composite Score', 'Structural Diversity', 'Semantic Relevance']
139
  rankings = []
140
 
141
  for metric in metrics:
 
144
  'Question': [questions[i] for i in np.argsort(detailed_scores[metric])[::-1]],
145
  f'{metric}': sorted(detailed_scores[metric], reverse=True)
146
  })
147
+ if metric == 'Answer Precision':
148
+ df['Generated Answer'] = [generated_answers[i] for i in np.argsort(detailed_scores[metric])[::-1]]
149
  rankings.append(df)
150
 
151
  best_question = detailed_scores.iloc[0]['Question']
152
 
153
  return detailed_scores, rankings, best_question
154
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  def gradio_interface(context: str, answer: str) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame, str]:
156
  detailed_scores, rankings, best_question = rank_questions_with_details(context, answer)
157
  return (
158
  detailed_scores,
159
+ rankings[0], # Answer Precision Ranking
160
+ rankings[1], # Composite Score Ranking
161
+ rankings[2], # Structural Diversity Ranking
162
+ rankings[3], # Semantic Relevance Ranking
163
  f"Best Question: {best_question}"
164
  )
165
 
 
186
 
187
  with gr.Row():
188
  with gr.Column():
189
+ answer_precision_ranking_output = gr.DataFrame(label="Answer Precision Ranking")
190
  with gr.Column():
191
+ composite_ranking_output = gr.DataFrame(label="Composite Score Ranking")
192
 
193
  with gr.Row():
194
  with gr.Column():
195
+ structural_diversity_ranking_output = gr.DataFrame(label="Structural Diversity Ranking")
196
  with gr.Column():
197
+ semantic_relevance_ranking_output = gr.DataFrame(label="Semantic Relevance Ranking")
198
 
199
  submit_button.click(
200
  fn=gradio_interface,
201
  inputs=[context_input, answer_input],
202
  outputs=[
203
  detailed_scores_output,
204
+ answer_precision_ranking_output,
205
  composite_ranking_output,
206
  structural_diversity_ranking_output,
207
  semantic_relevance_ranking_output,
 
208
  best_question_output
209
  ]
210
  )
 
216
  outputs=[context_input, answer_input]
217
  )
218
 
219
+ # Launch the app
220
  iface.launch()