medication-ai-model / generate_dataset.py
fullstuckdev
first init
8c2f469
raw
history blame
2.86 kB
import json
import random
conditions = [
"Hypertension", "Diabetes", "Asthma", "Arthritis", "Depression",
"Anxiety", "Obesity", "Migraine", "Allergies", "Influenza"
]
medications = [
"Lisinopril", "Metformin", "Albuterol", "Ibuprofen", "Sertraline",
"Alprazolam", "Orlistat", "Sumatriptan", "Cetirizine", "Oseltamivir"
]
def generate_question(condition):
questions = [
f"What are the symptoms of {condition}?",
f"How is {condition} typically diagnosed?",
f"What are the common treatments for {condition}?",
f"Can you explain the causes of {condition}?",
f"What lifestyle changes can help manage {condition}?",
f"Are there any complications associated with {condition}?",
f"How can {condition} be prevented?",
f"What's the long-term outlook for someone with {condition}?",
f"Are there any new treatments being developed for {condition}?",
f"How does {condition} affect daily life?"
]
return random.choice(questions)
# Function to generate an answer (simplified for this example)
def generate_answer(condition, question):
return f"Here's some information about {condition} related to your question: '{question}' [Detailed medical explanation would go here.]"
# Function to generate a medication question
def generate_medication_question(medication):
questions = [
f"What is {medication} used for?",
f"What are the common side effects of {medication}?",
f"How should {medication} be taken?",
f"Are there any drug interactions with {medication}?",
f"What should I know before starting {medication}?",
f"How long does it take for {medication} to start working?",
f"Can {medication} be taken during pregnancy?",
f"What should I do if I miss a dose of {medication}?",
f"Is {medication} habit-forming?",
f"Are there any alternatives to {medication}?"
]
return random.choice(questions)
def generate_medication_answer(medication, question):
return f"Regarding {medication} and your question: '{question}' [Detailed medication information would go here.]"
dataset = []
for _ in range(5000):
if random.choice([True, False]):
condition = random.choice(conditions)
question = generate_question(condition)
answer = generate_answer(condition, question)
else:
medication = random.choice(medications)
question = generate_medication_question(medication)
answer = generate_medication_answer(medication, question)
dataset.append({
"question": question,
"answer": answer,
"text": f"Question: {question}\nAnswer: {answer}"
})
with open("medical_dataset.json", "w") as f:
json.dump(dataset, f, indent=2)
print("Dataset generated and saved to medical_dataset.json")