Spaces:

ambrosfitz
/

md-qa-test

Sleeping

App Files Files Community

ambrosfitz commited on Oct 13

Commit

ececa0b

•

1 Parent(s): afaec0f

Update question_generator.py

Browse files

Files changed (1) hide show

question_generator.py +126 -2

question_generator.py CHANGED Viewed

@@ -24,7 +24,49 @@ model = "mistral-large-latest"
 # Initialize Mistral client
 client = MistralClient(api_key=api_key)
-# ... (previous functions remain the same)
 def extract_json_from_markdown(markdown_text: str) -> str:
     """Extract JSON content from Markdown-formatted text."""
@@ -36,7 +78,89 @@ def extract_json_from_markdown(markdown_text: str) -> str:
 def generate_microbiology_question() -> Dict[str, str]:
     """Generate a microbiology question."""
-    # ... (previous code remains the same)
     try:
         chat_response = client.chat(

 # Initialize Mistral client
 client = MistralClient(api_key=api_key)
+# Load data from CSV files
+def load_csv_data(file_path: str) -> List[Dict[str, str]]:
+    """Load data from a CSV file."""
+    logging.info(f"Loading data from {file_path}...")
+    try:
+        with open(file_path, 'r', encoding='utf-8') as csvfile:
+            reader = csv.DictReader(csvfile)
+            data = list(reader)
+        logging.info(f"Loaded {len(data)} rows from {file_path}")
+        return data
+    except FileNotFoundError:
+        logging.error(f"File not found: {file_path}")
+        raise
+    except csv.Error as e:
+        logging.error(f"Error reading CSV file {file_path}: {e}")
+        raise
+# Load data from both CSV files
+try:
+    detailed_cases = load_csv_data('processed_medical_history.csv')
+    infectious_diseases = load_csv_data('infectious_diseases.csv')
+except Exception as e:
+    logging.error(f"Failed to load CSV data: {e}")
+    raise
+def hash_question(question: str) -> str:
+    """Generate a hash for a question to check for duplicates."""
+    return hashlib.md5(question.encode()).hexdigest()
+def load_generated_questions() -> set:
+    """Load previously generated question hashes from a file."""
+    try:
+        with open('generated_questions.txt', 'r') as f:
+            return set(line.strip() for line in f)
+    except FileNotFoundError:
+        return set()
+def save_generated_question(question_hash: str):
+    """Save a newly generated question hash to the file."""
+    with open('generated_questions.txt', 'a') as f:
+        f.write(question_hash + '\n')
+generated_questions = load_generated_questions()
 def extract_json_from_markdown(markdown_text: str) -> str:
     """Extract JSON content from Markdown-formatted text."""
 def generate_microbiology_question() -> Dict[str, str]:
     """Generate a microbiology question."""
+    question_types = [
+        "clinical_vignette",
+        "mechanism_of_pathogenesis",
+        "laboratory_diagnosis",
+        "antimicrobial_resistance",
+        "vaccine_preventable_disease",
+        "microbial_physiology_genetics",
+        "epidemiology_transmission"
+    ]
+    question_type = random.choice(question_types)
+    logging.info(f"Generating {question_type} question...")
+    if question_type == "clinical_vignette":
+        case = random.choice(detailed_cases)
+        context = f"""
+        Pathogen: {case['Pathogen_Name']} ({case['Pathogen_Type']})
+        Key Symptoms: {case['Key_Symptoms']}
+        Physical Findings: {case['Physical_Findings']}
+        Lab Results: {case['Lab_Results']}
+        Patient Demographics: {case['Patient_Demographics']}
+        """
+    else:
+        disease = random.choice(infectious_diseases)
+        context = f"""
+        Infectious Agent: {disease['infectious_agent']}
+        Diagnosis: {disease['diagnosis']}
+        Treatment: {disease['treatment']}
+        """
+    prompt = f"""
+    Create a microbiology question that could appear on the NBME exam. This should be a {question_type} question.
+    Use the following information as inspiration, but feel free to expand or modify:
+    {context}
+    Generate a question based on the following template, depending on the question type:
+    1. Clinical Vignette with Pathogen Identification:
+    A [age]-year-old [gender] presents with [symptoms and clinical findings]. [Additional relevant information]. Which of the following is the most likely causal organism?
+    2. Mechanism of Pathogenesis:
+    [Description of a pathogen or clinical scenario]
+    Which of the following best describes the mechanism by which this organism causes disease?
+    3. Laboratory Diagnosis:
+    A patient presents with [symptoms]. [Description of laboratory findings or test results].
+    Which of the following is the most likely diagnosis based on these laboratory findings?
+    4. Antimicrobial Mechanism and Resistance:
+    A patient is diagnosed with [infection]. The causative organism is found to be resistant to [antibiotic]. Which of the following mechanisms is most likely responsible for this resistance?
+    5. Vaccine-Preventable Disease:
+    A [age]-year-old [gender] presents with [symptoms of a vaccine-preventable disease]. Which of the following vaccines would have been most likely to prevent this condition?
+    6. Microbial Physiology and Genetics:
+    An investigator observes [description of microbial behavior or genetic phenomenon]. Which of the following best explains this observation?
+    7. Epidemiology and Transmission:
+    A cluster of [disease] cases is reported in [location]. [Description of affected population and circumstances]. Which of the following is the most likely mode of transmission?
+    Include:
+    1. The question based on the selected template
+    2. Five possible answer options (A through E)
+    3. The correct answer
+    4. A brief explanation of why the correct answer is right and why the other options are incorrect
+    5. Detailed medical reasoning for the correct answer, including relevant pathophysiology, microbiology concepts, and clinical implications.
+    Format the response as a JSON object with the following keys:
+    {{
+        "question": "The question text",
+        "options": {{
+            "A": "Option A text",
+            "B": "Option B text",
+            "C": "Option C text",
+            "D": "Option D text",
+            "E": "Option E text"
+        }},
+        "correct_answer": "The letter of the correct answer (A, B, C, D, or E)",
+        "explanation": "The explanation text",
+        "medical_reasoning": "The detailed medical reasoning text"
+    }}
+    """
     try:
         chat_response = client.chat(