File size: 2,804 Bytes

from typing import Dict, Any
from transformers import AutoTokenizer

class EndpointHandler:
    def __init__(self, path="prashanthbsp/reasoning-cpg-entity-v1"):
        # Only load the tokenizer - the model is loaded by TGI
        self.tokenizer = AutoTokenizer.from_pretrained(path)

    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
        """
        data args:
            inputs: Text or dict containing text
        Return:
            Dict with prompt and generation parameters
        """
        # Extract the input text
        inputs = data.pop("inputs", data)
        context = inputs.pop("context", inputs)
        
        # Format the prompt
        prompt = """Below is an instruction that describes a task, paired with an input that provides further context.
            Write a response that appropriately completes the request.
            Before answering, think carefully about the task to ensure a logical and accurate response.

            ### Instruction
            You are a helpful assistant analyzing social media posts. Your task is to extract ANY food, beverage, or supplement entities mentioned in the post and determine whether each entity is used as an ingredient or consumed as a product.

            Guidelines:
            - Extract ONLY food, beverage, or supplement entities mentioned in the post
            - An entity is considered an ingredient if it's used as part of a recipe or combined with other foods
            - An entity is considered a product if it's a food, beverage, or supplement consumed as is
            - Focus on specific items rather than general categories when possible

            Main thing to note - we ONLY want to extract food, beverage, or supplement entities, nothing else

            Output in JSON format only:
            {{
            "entities": [
                {{
                "entity": "name of first entity",
                "type": "ingredient or product"
                }},
                {{
                "entity": "name of second entity",
                "type": "ingredient or product"
                }}
            ]
            }}

            If no entities are found, output:
            {{
            "entities": []
            }}

            ### Social Media Post:
            {0}
            ### Response:
            <think>""".format(context)
        
        # Return the formatted prompt and generation parameters for TGI
        return {
            "inputs": prompt,
            "parameters": {
                "max_new_tokens": 1200,
                "temperature": 0.01,  # Low temperature for more deterministic outputs
                "do_sample": False,
                "return_full_text": False  # Only return the generated text, not the prompt
            }
        }