Spaces:
Runtime error
Runtime error
import torch | |
import pandas as pd | |
import networkx as nx | |
import pdfplumber | |
import gradio as gr | |
from transformers import pipeline, MBartTokenizer, MBartForConditionalGeneration | |
from sentence_transformers import SentenceTransformer | |
import re | |
from typing import List, Dict, Optional | |
class MultilingualAyurvedicRecommender: | |
def __init__(self): | |
self.device = "cuda" if torch.cuda.is_available() else "cpu" | |
print(f"Using device: {self.device}") | |
# Initialize multilingual question-answering model | |
self.qa_model = pipeline( | |
"question-answering", | |
model="deepset/xlm-roberta-large-squad2", | |
device=0 if self.device == "cuda" else -1 | |
) | |
# Initialize multilingual sentence transformer | |
self.similarity_model = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2') | |
self.similarity_model.to(self.device) | |
# Initialize translation models | |
print("Loading translation models...") | |
self.translation_tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-50-many-to-many-mmt") | |
self.translation_model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt") | |
self.translation_model.to(self.device) | |
# Language codes for supported Indian languages | |
self.language_codes = { | |
"en_XX": "English", | |
"hi_IN": "Hindi", | |
"te_IN": "Telugu", | |
"ta_IN": "Tamil", | |
"mr_IN": "Marathi", | |
"gu_IN": "Gujarati", | |
"bn_IN": "Bengali" | |
} | |
self.G = nx.Graph() | |
def detect_language(self, text: str) -> str: | |
""" | |
Detect the language of input text | |
Args: | |
text: Input text to detect language for | |
Returns: | |
str: Detected language code | |
""" | |
try: | |
inputs = self.translation_tokenizer(text, return_tensors="pt", padding=True).to(self.device) | |
lang_scores = self.translation_model(**inputs).logits[0] | |
detected_lang = self.translation_tokenizer.decode(torch.argmax(lang_scores)) | |
return self.language_codes.get(detected_lang, "en_XX") | |
except Exception as e: | |
print(f"Language detection error: {e}") | |
return "en_XX" | |
def translate_text(self, text: str, target_lang: str) -> str: | |
""" | |
Translate text to target language | |
Args: | |
text: Text to translate | |
target_lang: Target language code | |
Returns: | |
str: Translated text | |
""" | |
try: | |
source_lang = self.detect_language(text) | |
if source_lang == target_lang: | |
return text | |
inputs = self.translation_tokenizer(text, return_tensors="pt", padding=True).to(self.device) | |
translated = self.translation_model.generate( | |
**inputs, | |
forced_bos_token_id=self.translation_tokenizer.lang_code_to_id[target_lang], | |
max_length=1024, | |
num_beams=4, | |
length_penalty=1.0 | |
) | |
return self.translation_tokenizer.decode(translated[0], skip_special_tokens=True) | |
except Exception as e: | |
print(f"Translation error: {e}") | |
return text | |
def extract_from_pdf(self, pdf_path: str) -> pd.DataFrame: | |
""" | |
Extract text from PDF and parse into structured format | |
Args: | |
pdf_path: Path to PDF file | |
Returns: | |
pd.DataFrame: Extracted medicine data | |
""" | |
medicines_data = { | |
"Medicine": [], | |
"Conditions": [], | |
"Remedies": [] | |
} | |
try: | |
with pdfplumber.open(pdf_path) as pdf: | |
current_medicine = None | |
current_conditions = [] | |
current_remedies = [] | |
for page in pdf.pages: | |
text = page.extract_text() | |
# Skip non-content pages | |
if any(header in text.upper() for header in ["INSTRUCTIONS", "INDEX", "FOREWORD"]): | |
continue | |
lines = text.split('\n') | |
for line in lines: | |
line = line.strip() | |
if not line: | |
continue | |
# Detect medicine headers | |
if re.match(r'^[A-Za-z\s]+\([A-Za-z\s]+\)', line): | |
if current_medicine and current_conditions: | |
medicines_data["Medicine"].append(current_medicine) | |
medicines_data["Conditions"].append(';'.join(set(current_conditions))) | |
medicines_data["Remedies"].append(';'.join(current_remedies)) | |
current_medicine = line.split('(')[0].strip() | |
current_conditions = [] | |
current_remedies = [] | |
continue | |
if current_medicine: | |
# Detect remedy instructions | |
if re.search(r'\d+(?:\s*(?:gm|ml|times|drops|days))', line.lower()): | |
current_remedies.append(line) | |
# Detect conditions | |
elif any(condition in line.lower() for condition in [ | |
'pain', 'ache', 'fever', 'cold', 'cough', 'diabetes', | |
'wounds', 'ulcer', 'skin', 'digestion', 'appetite' | |
]): | |
condition = line.split(':')[0] if ':' in line else line | |
current_conditions.append(condition) | |
# Add final medicine entry | |
if current_medicine and current_conditions: | |
medicines_data["Medicine"].append(current_medicine) | |
medicines_data["Conditions"].append(';'.join(set(current_conditions))) | |
medicines_data["Remedies"].append(';'.join(current_remedies)) | |
df = pd.DataFrame(medicines_data) | |
return df[df['Conditions'].str.len() > 0].drop_duplicates() | |
except Exception as e: | |
print(f"Error processing PDF: {e}") | |
return pd.DataFrame() | |
def build_knowledge_graph(self, df: pd.DataFrame) -> None: | |
""" | |
Build knowledge graph from medicine data | |
Args: | |
df: DataFrame containing medicine data | |
""" | |
self.G.clear() | |
for _, row in df.iterrows(): | |
medicine = row['Medicine'] | |
conditions = row['Conditions'].split(';') | |
remedies = row['Remedies'].split(';') | |
self.G.add_node(medicine, type='medicine') | |
for condition in conditions: | |
condition = condition.strip() | |
if condition: | |
self.G.add_node(condition, type='condition') | |
self.G.add_edge(medicine, condition) | |
for remedy in remedies: | |
remedy = remedy.strip() | |
if remedy: | |
self.G.add_node(remedy, type='remedy', info=remedy) | |
self.G.add_edge(medicine, remedy) | |
def find_similar_conditions(self, symptoms: str, conditions: List[str]) -> List[tuple]: | |
""" | |
Find conditions similar to input symptoms | |
Args: | |
symptoms: Input symptoms text | |
conditions: List of known conditions | |
Returns: | |
List[tuple]: List of (condition, similarity_score) pairs | |
""" | |
symptoms_embedding = self.similarity_model.encode(symptoms, convert_to_tensor=True) | |
conditions_embeddings = self.similarity_model.encode(conditions, convert_to_tensor=True) | |
similarities = torch.nn.functional.cosine_similarity( | |
symptoms_embedding.unsqueeze(0), | |
conditions_embeddings, | |
dim=1 | |
) | |
similar_conditions = [ | |
(condition, float(similarity)) | |
for condition, similarity in zip(conditions, similarities) | |
if similarity > 0.5 | |
] | |
return sorted(similar_conditions, key=lambda x: x[1], reverse=True) | |
def recommend_medicines(self, symptoms: str, df: pd.DataFrame, target_lang: str = "en_XX") -> List[Dict]: | |
""" | |
Recommend medicines based on symptoms with language support | |
Args: | |
symptoms: Input symptoms text | |
df: DataFrame containing medicine data | |
target_lang: Target language code | |
Returns: | |
List[Dict]: List of recommendations | |
""" | |
english_symptoms = self.translate_text(symptoms, "en_XX") | |
all_conditions = [ | |
c.strip() for conditions_list in df['Conditions'].str.split(';') | |
for c in conditions_list if c.strip() | |
] | |
all_conditions = list(set(all_conditions)) | |
if not all_conditions: | |
return [] | |
similar_conditions = self.find_similar_conditions(english_symptoms, all_conditions) | |
recommendations = [] | |
for condition, confidence in similar_conditions: | |
medicines = [ | |
n for n, attr in self.G.nodes(data=True) | |
if attr.get('type') == 'medicine' and self.G.has_edge(n, condition) | |
] | |
for medicine in medicines: | |
remedies = [ | |
self.G.nodes[n]['info'] | |
for n in self.G.neighbors(medicine) | |
if self.G.nodes[n]['type'] == 'remedy' | |
] | |
recommendations.append({ | |
'medicine': self.translate_text(medicine, target_lang), | |
'condition': self.translate_text(condition, target_lang), | |
'confidence': confidence, | |
'remedies': [self.translate_text(remedy, target_lang) for remedy in remedies] | |
}) | |
return sorted(recommendations, key=lambda x: x['confidence'], reverse=True) | |
def process_file_and_recommend( | |
self, | |
file: gr.File, | |
symptoms: str, | |
target_language: str = "English" | |
) -> str: | |
""" | |
Process input file and return recommendations in specified language | |
Args: | |
file: Uploaded PDF file | |
symptoms: Input symptoms text | |
target_language: Target language name | |
Returns: | |
str: Formatted recommendations text | |
""" | |
try: | |
target_lang = next( | |
(code for code, lang in self.language_codes.items() | |
if lang.lower() == target_language.lower()), | |
"en_XX" | |
) | |
df = self.extract_from_pdf(file.name) | |
if df.empty: | |
return self.translate_text("Error: Could not extract data from the PDF file.", target_lang) | |
self.build_knowledge_graph(df) | |
recommendations = self.recommend_medicines(symptoms, df, target_lang) | |
if not recommendations: | |
return self.translate_text("No matching recommendations found.", target_lang) | |
output = [self.translate_text("Ayurvedic Medicine Recommendations:", target_lang)] | |
for i, rec in enumerate(recommendations[:5], 1): | |
output.extend([ | |
f"\n{i}. {self.translate_text('Medicine', target_lang)}: {rec['medicine']}", | |
f" {self.translate_text('Matching Condition', target_lang)}: {rec['condition']}", | |
f" {self.translate_text('Confidence Score', target_lang)}: {rec['confidence']:.2f}", | |
f" {self.translate_text('Recommended Remedies', target_lang)}:" | |
]) | |
output.extend([f" - {remedy}" for remedy in rec['remedies']]) | |
output.append("") | |
return "\n".join(output) | |
except Exception as e: | |
return f"Error: {str(e)}" | |
# Create and launch Gradio interface | |
def main(): | |
recommender = MultilingualAyurvedicRecommender() | |
interface = gr.Interface( | |
fn=recommender.process_file_and_recommend, | |
inputs=[ | |
gr.File(label="Upload Ayurvedic Home Remedies PDF"), | |
gr.Textbox( | |
label="Enter symptoms in any language (e.g., 'cold and fever' या 'सर्दी और बुखार' या 'జలుబు మరియు జ్వరం')" | |
), | |
gr.Dropdown( | |
choices=list(recommender.language_codes.values()), | |
label="Select output language", | |
value="English" | |
) | |
], | |
outputs=gr.Textbox(label="Recommendations"), | |
title="Multilingual Ayurvedic Medicine Recommender", | |
description="Get Ayurvedic medicine recommendations in your preferred language. Enter symptoms in any language!" | |
) | |
interface.launch(share=True) | |
if __name__ == "__main__": | |
main() |