ambrosfitz commited on
Commit
ececa0b
1 Parent(s): afaec0f

Update question_generator.py

Browse files
Files changed (1) hide show
  1. question_generator.py +126 -2
question_generator.py CHANGED
@@ -24,7 +24,49 @@ model = "mistral-large-latest"
24
  # Initialize Mistral client
25
  client = MistralClient(api_key=api_key)
26
 
27
- # ... (previous functions remain the same)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  def extract_json_from_markdown(markdown_text: str) -> str:
30
  """Extract JSON content from Markdown-formatted text."""
@@ -36,7 +78,89 @@ def extract_json_from_markdown(markdown_text: str) -> str:
36
 
37
  def generate_microbiology_question() -> Dict[str, str]:
38
  """Generate a microbiology question."""
39
- # ... (previous code remains the same)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  try:
42
  chat_response = client.chat(
 
24
  # Initialize Mistral client
25
  client = MistralClient(api_key=api_key)
26
 
27
+ # Load data from CSV files
28
+ def load_csv_data(file_path: str) -> List[Dict[str, str]]:
29
+ """Load data from a CSV file."""
30
+ logging.info(f"Loading data from {file_path}...")
31
+ try:
32
+ with open(file_path, 'r', encoding='utf-8') as csvfile:
33
+ reader = csv.DictReader(csvfile)
34
+ data = list(reader)
35
+ logging.info(f"Loaded {len(data)} rows from {file_path}")
36
+ return data
37
+ except FileNotFoundError:
38
+ logging.error(f"File not found: {file_path}")
39
+ raise
40
+ except csv.Error as e:
41
+ logging.error(f"Error reading CSV file {file_path}: {e}")
42
+ raise
43
+
44
+ # Load data from both CSV files
45
+ try:
46
+ detailed_cases = load_csv_data('processed_medical_history.csv')
47
+ infectious_diseases = load_csv_data('infectious_diseases.csv')
48
+ except Exception as e:
49
+ logging.error(f"Failed to load CSV data: {e}")
50
+ raise
51
+
52
+ def hash_question(question: str) -> str:
53
+ """Generate a hash for a question to check for duplicates."""
54
+ return hashlib.md5(question.encode()).hexdigest()
55
+
56
+ def load_generated_questions() -> set:
57
+ """Load previously generated question hashes from a file."""
58
+ try:
59
+ with open('generated_questions.txt', 'r') as f:
60
+ return set(line.strip() for line in f)
61
+ except FileNotFoundError:
62
+ return set()
63
+
64
+ def save_generated_question(question_hash: str):
65
+ """Save a newly generated question hash to the file."""
66
+ with open('generated_questions.txt', 'a') as f:
67
+ f.write(question_hash + '\n')
68
+
69
+ generated_questions = load_generated_questions()
70
 
71
  def extract_json_from_markdown(markdown_text: str) -> str:
72
  """Extract JSON content from Markdown-formatted text."""
 
78
 
79
  def generate_microbiology_question() -> Dict[str, str]:
80
  """Generate a microbiology question."""
81
+ question_types = [
82
+ "clinical_vignette",
83
+ "mechanism_of_pathogenesis",
84
+ "laboratory_diagnosis",
85
+ "antimicrobial_resistance",
86
+ "vaccine_preventable_disease",
87
+ "microbial_physiology_genetics",
88
+ "epidemiology_transmission"
89
+ ]
90
+ question_type = random.choice(question_types)
91
+ logging.info(f"Generating {question_type} question...")
92
+
93
+ if question_type == "clinical_vignette":
94
+ case = random.choice(detailed_cases)
95
+ context = f"""
96
+ Pathogen: {case['Pathogen_Name']} ({case['Pathogen_Type']})
97
+ Key Symptoms: {case['Key_Symptoms']}
98
+ Physical Findings: {case['Physical_Findings']}
99
+ Lab Results: {case['Lab_Results']}
100
+ Patient Demographics: {case['Patient_Demographics']}
101
+ """
102
+ else:
103
+ disease = random.choice(infectious_diseases)
104
+ context = f"""
105
+ Infectious Agent: {disease['infectious_agent']}
106
+ Diagnosis: {disease['diagnosis']}
107
+ Treatment: {disease['treatment']}
108
+ """
109
+
110
+ prompt = f"""
111
+ Create a microbiology question that could appear on the NBME exam. This should be a {question_type} question.
112
+ Use the following information as inspiration, but feel free to expand or modify:
113
+
114
+ {context}
115
+
116
+ Generate a question based on the following template, depending on the question type:
117
+
118
+ 1. Clinical Vignette with Pathogen Identification:
119
+ A [age]-year-old [gender] presents with [symptoms and clinical findings]. [Additional relevant information]. Which of the following is the most likely causal organism?
120
+
121
+ 2. Mechanism of Pathogenesis:
122
+ [Description of a pathogen or clinical scenario]
123
+ Which of the following best describes the mechanism by which this organism causes disease?
124
+
125
+ 3. Laboratory Diagnosis:
126
+ A patient presents with [symptoms]. [Description of laboratory findings or test results].
127
+ Which of the following is the most likely diagnosis based on these laboratory findings?
128
+
129
+ 4. Antimicrobial Mechanism and Resistance:
130
+ A patient is diagnosed with [infection]. The causative organism is found to be resistant to [antibiotic]. Which of the following mechanisms is most likely responsible for this resistance?
131
+
132
+ 5. Vaccine-Preventable Disease:
133
+ A [age]-year-old [gender] presents with [symptoms of a vaccine-preventable disease]. Which of the following vaccines would have been most likely to prevent this condition?
134
+
135
+ 6. Microbial Physiology and Genetics:
136
+ An investigator observes [description of microbial behavior or genetic phenomenon]. Which of the following best explains this observation?
137
+
138
+ 7. Epidemiology and Transmission:
139
+ A cluster of [disease] cases is reported in [location]. [Description of affected population and circumstances]. Which of the following is the most likely mode of transmission?
140
+
141
+ Include:
142
+ 1. The question based on the selected template
143
+ 2. Five possible answer options (A through E)
144
+ 3. The correct answer
145
+ 4. A brief explanation of why the correct answer is right and why the other options are incorrect
146
+ 5. Detailed medical reasoning for the correct answer, including relevant pathophysiology, microbiology concepts, and clinical implications.
147
+
148
+ Format the response as a JSON object with the following keys:
149
+
150
+ {{
151
+ "question": "The question text",
152
+ "options": {{
153
+ "A": "Option A text",
154
+ "B": "Option B text",
155
+ "C": "Option C text",
156
+ "D": "Option D text",
157
+ "E": "Option E text"
158
+ }},
159
+ "correct_answer": "The letter of the correct answer (A, B, C, D, or E)",
160
+ "explanation": "The explanation text",
161
+ "medical_reasoning": "The detailed medical reasoning text"
162
+ }}
163
+ """
164
 
165
  try:
166
  chat_response = client.chat(