| import re | |
| import json | |
| import asyncio | |
| from lightrag import LightRAG, QueryParam | |
| from tqdm import tqdm | |
| def extract_queries(file_path): | |
| with open(file_path, "r") as f: | |
| data = f.read() | |
| data = data.replace("**", "") | |
| queries = re.findall(r"- Question \d+: (.+)", data) | |
| return queries | |
| async def process_query(query_text, rag_instance, query_param): | |
| try: | |
| result = await rag_instance.aquery(query_text, param=query_param) | |
| return {"query": query_text, "result": result}, None | |
| except Exception as e: | |
| return None, {"query": query_text, "error": str(e)} | |
| def always_get_an_event_loop() -> asyncio.AbstractEventLoop: | |
| try: | |
| loop = asyncio.get_event_loop() | |
| except RuntimeError: | |
| loop = asyncio.new_event_loop() | |
| asyncio.set_event_loop(loop) | |
| return loop | |
| def run_queries_and_save_to_json( | |
| queries, rag_instance, query_param, output_file, error_file | |
| ): | |
| loop = always_get_an_event_loop() | |
| with open(output_file, "a", encoding="utf-8") as result_file, open( | |
| error_file, "a", encoding="utf-8" | |
| ) as err_file: | |
| result_file.write("[\n") | |
| first_entry = True | |
| for query_text in tqdm(queries, desc="Processing queries", unit="query"): | |
| result, error = loop.run_until_complete( | |
| process_query(query_text, rag_instance, query_param) | |
| ) | |
| if result: | |
| if not first_entry: | |
| result_file.write(",\n") | |
| json.dump(result, result_file, ensure_ascii=False, indent=4) | |
| first_entry = False | |
| elif error: | |
| json.dump(error, err_file, ensure_ascii=False, indent=4) | |
| err_file.write("\n") | |
| result_file.write("\n]") | |
| if __name__ == "__main__": | |
| cls = "agriculture" | |
| mode = "hybrid" | |
| WORKING_DIR = f"../{cls}" | |
| rag = LightRAG(working_dir=WORKING_DIR) | |
| query_param = QueryParam(mode=mode) | |
| queries = extract_queries(f"../datasets/questions/{cls}_questions.txt") | |
| run_queries_and_save_to_json( | |
| queries, rag, query_param, f"{cls}_result.json", f"{cls}_errors.json" | |
| ) | |