devrajsinh2012 commited on
Commit
f7a4d18
·
2 Parent(s): 29809c8b2d56ba

Merge remote-tracking branch 'github/main'

Browse files

# Conflicts:
# README.md
# backend/evaluation/ablation_chunk_size.py
# backend/evaluation/backbone_comparison.py
# backend/evaluation/baseline_runner.py
# backend/evaluation/benchmark_runner.py
# backend/evaluation/metrics.py

README.md CHANGED
@@ -7,7 +7,6 @@ sdk: docker
7
  pinned: false
8
  license: mit
9
  ---
10
-
11
  # MEXAR Ultimate 🧠
12
 
13
  **Multimodal Explainable AI Reasoning Assistant**
 
7
  pinned: false
8
  license: mit
9
  ---
 
10
  # MEXAR Ultimate 🧠
11
 
12
  **Multimodal Explainable AI Reasoning Assistant**
backend/evaluation/baseline_runner.py CHANGED
@@ -4,13 +4,11 @@ Runs CRAG and RAPTOR baselines against a set of test queries.
4
  import sys
5
  import os
6
  from typing import Dict, List, Optional
7
-
8
  sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
9
 
10
  from modules.reasoning_engine import create_reasoning_engine
11
  from evaluation.metrics import MetricsRunner
12
 
13
-
14
  def _append_score(results: Dict[str, List[float]], baseline: str, score: Optional[float]) -> None:
15
  if score is None:
16
  print(f"{baseline}: Faithfulness score unavailable for this query.")
@@ -66,7 +64,6 @@ def run_baselines(agent_name: str, queries: List[str]):
66
 
67
  return results
68
 
69
-
70
  if __name__ == "__main__":
71
  # Example usage
72
  test_queries = [
 
4
  import sys
5
  import os
6
  from typing import Dict, List, Optional
 
7
  sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
8
 
9
  from modules.reasoning_engine import create_reasoning_engine
10
  from evaluation.metrics import MetricsRunner
11
 
 
12
  def _append_score(results: Dict[str, List[float]], baseline: str, score: Optional[float]) -> None:
13
  if score is None:
14
  print(f"{baseline}: Faithfulness score unavailable for this query.")
 
64
 
65
  return results
66
 
 
67
  if __name__ == "__main__":
68
  # Example usage
69
  test_queries = [
backend/evaluation/metrics.py CHANGED
@@ -24,7 +24,7 @@ class MetricsRunner:
24
  return {
25
  "faithfulness": faith_res.score,
26
  "bart_nli": bart_res.score,
27
- "factscore": fact_res.score
28
  }
29
 
30
  def extract_faithfulness(self, response: Dict[str, Any]) -> Optional[float]:
 
24
  return {
25
  "faithfulness": faith_res.score,
26
  "bart_nli": bart_res.score,
27
+ "factscore": fact_res.score,
28
  }
29
 
30
  def extract_faithfulness(self, response: Dict[str, Any]) -> Optional[float]: