from pymed import PubMed
from typing import List 
from haystack import component, Document, Pipleline
from haystack.components.generators import HuggingFaceTGIGenerator
from haystack.components.builders.prompt_builder import PromptBuilder
from dotenv import load_dotenv 
import gradio as gr
import time
import os 

load_dotenv()

pubmed = PubMed(tool="Haystack2.0Prototype", email="dummyemail@gmail.com")

def documentize(article):
  return Document(content=article.abstract, meta={'title': article.title, 'keywords': article.keywords})

@component
class PubMedFetcher():

  @component.output_types(articles=List[Document])
  def run(self, queries: list[str]):
    cleaned_queries = queries[0].strip().split('\n')

    articles = []
    try:
      for query in cleaned_queries:
        response = pubmed.query(query, max_results = 1)
        documents = [documentize(article) for article in response]
        articles.extend(documents)
    except Exception as e:
        print(e)
        print(f"Couldn't fetch articles for queries: {queries}" )
    results = {'articles': articles}
    return results

keyword_llm = HuggingFaceTGIGenerator("mistralai/Mixtral-8x7B-Instruct-v0.1")
keyword_llm.warm_up()

llm = HuggingFaceTGIGenerator("mistralai/Mixtral-8x7B-Instruct-v0.1")
llm.warm_up()


keyword_prompt_template = """
Your task is to convert the following question into 3 keywords that can be used to find relevant medical research papers on PubMed.
Here is an examples:
question: "What are the latest treatments for major depressive disorder?"
keywords:
Antidepressive Agents
Depressive Disorder, Major
Treatment-Resistant depression
---
question: {{ question }}
keywords:
"""

prompt_template = """
Answer the question truthfully based on the given documents.
If the documents don't contain an answer, use your existing knowledge base.

q: {{ question }}
Articles:
{% for article in articles %}
  {{article.content}}
  keywords: {{article.meta['keywords']}}
  title: {{article.meta['title']}}
{% endfor %}

"""

keyword_prompt_builder = PromptBuilder(template=keyword_prompt_template)

prompt_builder = PromptBuilder(template=prompt_template)
fetcher = PubMedFetcher()

pipe = Pipeline()

pipe.add_component("keyword_prompt_builder", keyword_prompt_builder)
pipe.add_component("keyword_llm", keyword_llm)
pipe.add_component("pubmed_fetcher", fetcher)
pipe.add_component("prompt_builder", prompt_builder)
pipe.add_component("llm", llm)

pipe.connect("keyword_prompt_builder.prompt", "keyword_llm.prompt")
pipe.connect("keyword_llm.replies", "pubmed_fetcher.queries")

pipe.connect("pubmed_fetcher.articles", "prompt_builder.articles")
pipe.connect("prompt_builder.prompt", "llm.prompt")

def ask(question):
  output = pipe.run(data={"keyword_prompt_builder":{"question":question},
                          "prompt_builder":{"question": question},
                          "llm":{"generation_kwargs": {"max_new_tokens": 500}}})
  print(question)
  print(output['llm']['replies'][0])
  return output['llm']['replies'][0]

# result = ask("How are mRNA vaccines being used for cancer treatment?")

# print(result)

iface = gr.Interface(fn=ask, inputs=gr.Textbox(
    value="How are mRNA vaccines being used for cancer treatment?"), 
        outputs="markdown",  
        title="LLM Augmented Q&A over PubMed Search Engine",
        description="Ask a question about BioMedical and get an answer from a friendly AI assistant.",
        examples=[["How are mRNA vaccines being used for cancer treatment?"], 
                ["Suggest me some Case Studies related to Pneumonia."],
                ["Tell me about HIV AIDS."],["Suggest some case studies related to Auto Immune Disorders."],
                ["How to treat a COVID infected Patient?"]],
    theme=gr.themes.Soft(),
    allow_flagging="never",)

iface.launch(debug=True)