Spaces:

geekyrakshit
/

medrag

Runtime error

App Files Files Community

geekyrakshit commited on Oct 24, 2024

Commit

bcd7446

1 Parent(s): 76f0b82

add: docs ffor assitant module

Browse files

Files changed (7) hide show

docs/assistant/figure_annotation.md +3 -0
docs/assistant/llm_client.md +3 -0
docs/assistant/medqa_assistant.md +3 -0
medrag_multi_modal/assistant/figure_annotation.py +44 -0
medrag_multi_modal/assistant/llm_client.py +31 -0
medrag_multi_modal/assistant/medqa_assistant.py +2 -4
mkdocs.yml +4 -0

docs/assistant/figure_annotation.md ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ # Figure Annotation
2	+
3	+ ::: medrag_multi_modal.assistant.figure_annotation

docs/assistant/llm_client.md ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ # LLM Client
2	+
3	+ ::: medrag_multi_modal.assistant.llm_client

docs/assistant/medqa_assistant.md ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ # MedQA Assistant
2	+
3	+ ::: medrag_multi_modal.assistant.medqa_assistant

medrag_multi_modal/assistant/figure_annotation.py CHANGED Viewed

@@ -22,6 +22,34 @@ class FigureAnnotations(BaseModel):
 class FigureAnnotatorFromPageImage(weave.Model):
     figure_extraction_llm_client: LLMClient
     structured_output_llm_client: LLMClient
@@ -65,6 +93,22 @@ Here are some clues you need to follow:
     @weave.op()
     def predict(self, image_artifact_address: str):
         artifact_dir = get_wandb_artifact(image_artifact_address, "dataset")
         metadata = read_jsonl_file(os.path.join(artifact_dir, "metadata.jsonl"))
         annotations = []

 class FigureAnnotatorFromPageImage(weave.Model):
+    """
+    `FigureAnnotatorFromPageImage` is a class that leverages two LLM clients to annotate figures from a page image of a scientific textbook.
+    !!! example "Example Usage"
+        ```python
+        import weave
+        from dotenv import load_dotenv
+        from medrag_multi_modal.assistant import (
+            FigureAnnotatorFromPageImage, LLMClient
+        )
+        load_dotenv()
+        weave.init(project_name="ml-colabs/medrag-multi-modal")
+        figure_annotator = FigureAnnotatorFromPageImage(
+            figure_extraction_llm_client=LLMClient(model_name="pixtral-12b-2409"),
+            structured_output_llm_client=LLMClient(model_name="gpt-4o"),
+        )
+        annotations = figure_annotator.predict(
+            image_artifact_address="ml-colabs/medrag-multi-modal/grays-anatomy-images-marker:v6"
+        )
+        ```
+    Attributes:
+        figure_extraction_llm_client (LLMClient): An LLM client used to extract figure annotations from the page image.
+        structured_output_llm_client (LLMClient): An LLM client used to convert the extracted annotations into a structured format.
+    """
     figure_extraction_llm_client: LLMClient
     structured_output_llm_client: LLMClient
     @weave.op()
     def predict(self, image_artifact_address: str):
+        """
+        Predicts figure annotations for images in a given artifact directory.
+        This function retrieves an artifact directory using the provided image artifact address.
+        It reads metadata from a JSONL file in the artifact directory and iterates over each item in the metadata.
+        For each item, it constructs the file path for the page image and checks for the presence of figure image files.
+        If figure image files are found, it reads and converts the page image, then uses the `annotate_figures` method
+        to extract figure annotations from the page image. The extracted annotations are then structured using the
+        `extract_structured_output` method and appended to the annotations list.
+        Args:
+            image_artifact_address (str): The address of the image artifact.
+        Returns:
+            list: A list of dictionaries containing page indices and their corresponding figure annotations.
+        """
         artifact_dir = get_wandb_artifact(image_artifact_address, "dataset")
         metadata = read_jsonl_file(os.path.join(artifact_dir, "metadata.jsonl"))
         annotations = []

medrag_multi_modal/assistant/llm_client.py CHANGED Viewed

@@ -59,6 +59,17 @@ OPENAI_MODELS = ["gpt-4o", "gpt-4o-2024-08-06", "gpt-4o-mini", "gpt-4o-mini-2024
 class LLMClient(weave.Model):
     model_name: str
     client_type: Optional[ClientType]
@@ -196,6 +207,26 @@ class LLMClient(weave.Model):
         system_prompt: Optional[Union[str, list[str]]] = None,
         schema: Optional[Any] = None,
     ) -> Union[str, Any]:
         if self.client_type == ClientType.GEMINI:
             return self.execute_gemini_sdk(user_prompt, system_prompt, schema)
         elif self.client_type == ClientType.MISTRAL:

 class LLMClient(weave.Model):
+    """
+    LLMClient is a class that interfaces with different large language model (LLM) providers
+    such as Google Gemini, Mistral, and OpenAI. It abstracts the complexity of interacting with
+    these different APIs and provides a unified interface for making predictions.
+    Args:
+        model_name (str): The name of the model to be used for predictions.
+        client_type (Optional[ClientType]): The type of client (e.g., GEMINI, MISTRAL, OPENAI).
+            If not provided, it is inferred from the model_name.
+    """
     model_name: str
     client_type: Optional[ClientType]
         system_prompt: Optional[Union[str, list[str]]] = None,
         schema: Optional[Any] = None,
     ) -> Union[str, Any]:
+        """
+        Predicts the response from a language model based on the provided prompts and schema.
+        This function determines the client type and calls the appropriate SDK execution function
+        to get the response from the language model. It supports multiple client types including
+        GEMINI, MISTRAL, and OPENAI. Depending on the client type, it calls the corresponding
+        execution function with the provided user and system prompts, and an optional schema.
+        Args:
+            user_prompt (Union[str, list[str]]): The user prompt(s) to be sent to the language model.
+            system_prompt (Optional[Union[str, list[str]]]): The system prompt(s) to be sent to the language model.
+            schema (Optional[Any]): The schema to be used for parsing the response, if applicable.
+        Returns:
+            Union[str, Any]: The response from the language model, which could be a string or any other type
+            depending on the schema provided.
+        Raises:
+            ValueError: If the client type is invalid.
+        """
         if self.client_type == ClientType.GEMINI:
             return self.execute_gemini_sdk(user_prompt, system_prompt, schema)
         elif self.client_type == ClientType.MISTRAL:

medrag_multi_modal/assistant/medqa_assistant.py CHANGED Viewed

@@ -1,21 +1,19 @@
-from typing import Optional
 import weave
-from PIL import Image
 from ..retrieval import SimilarityMetric
 from .llm_client import LLMClient
 class MedQAAssistant(weave.Model):
     llm_client: LLMClient
     retriever: weave.Model
     top_k_chunks: int = 2
     retrieval_similarity_metric: SimilarityMetric = SimilarityMetric.COSINE
     @weave.op()
-    def predict(self, query: str, image: Optional[Image.Image] = None) -> str:
-        _image = image
         retrieved_chunks = self.retriever.predict(
             query, top_k=self.top_k_chunks, metric=self.retrieval_similarity_metric
         )

 import weave
 from ..retrieval import SimilarityMetric
 from .llm_client import LLMClient
 class MedQAAssistant(weave.Model):
+    """Cuming"""
     llm_client: LLMClient
     retriever: weave.Model
     top_k_chunks: int = 2
     retrieval_similarity_metric: SimilarityMetric = SimilarityMetric.COSINE
     @weave.op()
+    def predict(self, query: str) -> str:
         retrieved_chunks = self.retriever.predict(
             query, top_k=self.top_k_chunks, metric=self.retrieval_similarity_metric
         )

mkdocs.yml CHANGED Viewed

@@ -83,5 +83,9 @@ nav:
     - Contriever: 'retreival/contriever.md'
     - MedCPT: 'retreival/medcpt.md'
     - NV-Embed-v2: 'retreival/nv_embed_2.md'
 repo_url: https://github.com/soumik12345/medrag-multi-modal

     - Contriever: 'retreival/contriever.md'
     - MedCPT: 'retreival/medcpt.md'
     - NV-Embed-v2: 'retreival/nv_embed_2.md'
+  - Assistant:
+    - MedQA Assistant: 'assistant/medqa_assistant.md'
+    - Figure Annotation: 'assistant/figure_annotation.md'
+    - LLM Client: 'assistant/llm_client.md'
 repo_url: https://github.com/soumik12345/medrag-multi-modal