import gradio as gr
from transformers import pipeline, AutoTokenizer

# Define the model name
MODEL_NAME = "impresso-project/ner-stacked-bert-multilingual"

# Load the tokenizer and model using the pipeline
ner_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

ner_pipeline = pipeline(
    "generic-ner",
    model=MODEL_NAME,
    tokenizer=ner_tokenizer,
    trust_remote_code=True,
    device="cpu",
)


def format_entities_as_html(entities):
    excluded_keys = {"start", "end", "index"}  # Keys to exclude from the output
    html_output = "<div>"

    for entity in entities:
        html_output += (
            "<div style='margin-bottom: 10px;'>"  # Each entity in a separate div
        )

        # Dynamically add all fields except the excluded ones
        for key, value in entity.items():
            if key not in excluded_keys:
                if isinstance(value, float):  # Format score if it's a float
                    html_output += (
                        f"<strong>{key.capitalize()}:</strong> {value:.2f}<br>"
                    )
                else:
                    html_output += f"<strong>{key.capitalize()}:</strong> {value}<br>"

        html_output += "</div>"

    html_output += "</div>"
    return html_output


# Function to process the sentence and extract entities
def extract_entities(sentence):
    results = ner_pipeline(sentence)

    # Debugging the result format
    print(f"NER results: {results}")

    entities = []
    seen_spans = set()  # Track the spans we have already added to avoid overlaps

    # Print debug info about tokenization
    print(f"Original text: {sentence}")
    print("Results:", results)
    # it should look like:
    # [{'entity': 'org.ent.pressagency.Reuters', 'score': np.float32(98.47),
    # 'index': 78, 'text': 'Reuters', 'start': 440, 'end': 447}]
    for entity in results:
        entity["start"] = entity["lOffset"]
        entity["end"] = entity["rOffset"]
        entity_span = (entity["start"], entity["end"])

        # Only add non-overlapping entities
        if entity_span not in seen_spans:
            seen_spans.add(entity_span)
            entity_text = sentence[
                entity["start"] : entity["end"]
            ].strip()  # Ensure we're working with the correct portion of the text
            entity["surface"] = entity_text
            label = f"{entity['type']}"
            if "title" in entity:
                label += f" - Title: {entity['title']}"
            if "name" in entity:
                label += f" - Name: {entity['name']}"
            if "function" in entity:
                label += f" - Function: {entity['function']}"
            entity["entity"] = label
            # print(f"Entity text: {entity}")

            entities.append(entity)
    print(f"Entities: {entities}")
    # Sort entities by their start position
    # entities = sorted(entities, key=lambda x: x["start"])
    return {"text": sentence, "entities": entities}


# Create Gradio interface
def ner_app_interface():
    input_sentence = gr.Textbox(
        lines=5, label="Input Sentence", placeholder="Enter a sentence for NER:"
    )
    output_entities = gr.HTML(label="Extracted Entities")

    # Interface definition
    interface = gr.Interface(
        fn=extract_entities,
        inputs=input_sentence,
        outputs=[gr.HighlightedText(label="Text with  mentions")],
        # outputs=output_entities,
        title="Named Entity Recognition",
        description="Enter a sentence to extract named entities using the NER model from the Impresso project.",
        examples=[
            [
                "Des chercheurs de l'Université de Cambridge ont développé une nouvelle technique de calcul quantique qui promet d'augmenter exponentiellement les vitesses de calcul."
            ],
            [
                "Le rapport complet sur ces découvertes a été publié dans la prestigieuse revue 'Nature Physics'. (Reuters)"
            ],
            ["In the year 1789, the Estates-General was convened in France."],
            [
                "The event was held at the Palace of Versailles, a symbol of French monarchy."
            ],
            [
                "At Versailles, Marie Antoinette, the Queen of France, was involved in discussions."
            ],
            [
                "Maximilien Robespierre, a leading member of the National Assembly, also participated."
            ],
            [
                "Jean-Jacques Rousseau, the famous philosopher, was a significant figure in the debate."
            ],
            [
                "Another important participant was Charles de Talleyrand, the Bishop of Autun."
            ],
            [
                "Meanwhile, across the Atlantic, George Washington, the first President of the United States, was shaping policies."
            ],
            [
                "Thomas Jefferson, the nation's Secretary of State, played a key role in drafting policies for the new American government."
            ],
        ],
        live=False,
    )

    interface.launch(share=True)


# Run the app
if __name__ == "__main__":
    ner_app_interface()