Spaces:
Sleeping
Sleeping
| import asyncio | |
| import json | |
| import os | |
| from datetime import datetime | |
| from app.services.llm import LLMService | |
| from app.core.config import settings | |
| class BenchmarkManager: | |
| TEST_QUERIES = [ | |
| "What is the biblical basis for Grace vs Works?", | |
| "Explain the role of the Holy Spirit in a believer's daily life.", | |
| "How should a Christian respond to suffering according to the book of Job?", | |
| "Explain the concept of 'Stillness' or 'Sabbath' in modern terms.", | |
| "What does the Bible say about caring for the poor and marginalized?" | |
| ] | |
| RESULTS_DIR = "important/benchmarks" | |
| def __init__(self): | |
| if not os.path.exists(self.RESULTS_DIR): | |
| os.makedirs(self.RESULTS_DIR, exist_ok=True) | |
| self.llm = LLMService() | |
| async def run_benchmark(self, model_name: str): | |
| print(f"\n{'='*50}") | |
| print(f"Benchmark: Running tests for model {model_name}...") | |
| print(f"{'='*50}") | |
| # Patch the model name in settings | |
| original_model = settings.MODEL_NAME | |
| settings.MODEL_NAME = model_name | |
| results = { | |
| "model": model_name, | |
| "timestamp": datetime.now().isoformat(), | |
| "tests": [] | |
| } | |
| for query in self.TEST_QUERIES: | |
| print(f"Testing query: {query}") | |
| try: | |
| # Re-initialize or clear offline state if needed | |
| self.llm.is_offline = False | |
| response = await self.llm.generate_response(query) | |
| results["tests"].append({ | |
| "query": query, | |
| "response": response.get("content", ""), | |
| "tool_calls": str(response.get("tool_calls", [])) | |
| }) | |
| except Exception as e: | |
| print(f"Error during benchmark: {str(e)}") | |
| results["tests"].append({"query": query, "error": str(e)}) | |
| # Restore original settings | |
| settings.MODEL_NAME = original_model | |
| # Save results | |
| safe_model_name = model_name.replace(':', '_').replace('/', '_') | |
| filename = f"benchmark_{safe_model_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json" | |
| with open(os.path.join(self.RESULTS_DIR, filename), "w", encoding="utf-8") as f: | |
| json.dump(results, f, indent=2) | |
| print(f"Benchmark: Results saved to {filename}") | |
| if __name__ == "__main__": | |
| benchmark = BenchmarkManager() | |
| async def main(): | |
| print("Starting ORA Model Benchmarks...") | |
| # 2. Test Gabriel-Mini (llama3.2:1b) | |
| await benchmark.run_benchmark("llama3.2:1b") | |
| print("\nBenchmarks complete. Visit important/benchmarks/ to see the reports.") | |
| asyncio.run(main()) | |