import pandas_profiling as pp
import pandas as pd
from datasets import load_dataset

#LOINC
datasetLOINC = load_dataset("awacke1/LOINC-CodeSet-Value-Description.csv")
#SNOMED:
datasetSNOMED = load_dataset("awacke1/SNOMED-CT-Code-Value-Semantic-Set.csv")
#eCQM:
dataseteCQM = load_dataset("awacke1/eCQM-Code-Value-Semantic-Set.csv")

# map using autotokenizer
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
dataset = datasetLOINC.map(lambda examples: tokenizer(examples["Description"]), batched=True)
JSONOBJ2=dataset['train'][0]

sw = datasetLOINC.filter(lambda example: example["Description"].startswith("Allergy"))
len(sw)
print(sw)
print(datasetLOINC)
print(datasetSNOMED)
print(dataseteCQM)

# play with some dataset tools before the show:

#print(start_with_ar["Description"])

#---
#Main Stage - Begin!
#---

import os
import json
import numpy as np
import gradio as gr

HF_TOKEN = os.environ.get("HF_TOKEN")
CHOICES = ["SNOMED", "LOINC", "CQM"]
JSONOBJ = """{"items":{"item":[{"id": "0001","type": null,"is_good": false,"ppu": 0.55,"batters":{"batter":[{ "id": "1001", "type": "Regular" },{ "id": "1002", "type": "Chocolate" },{ "id": "1003", "type": "Blueberry" },{ "id": "1004", "type": "Devil's Food" }]},"topping":[{ "id": "5001", "type": "None" },{ "id": "5002", "type": "Glazed" },{ "id": "5005", "type": "Sugar" },{ "id": "5007", "type": "Powdered Sugar" },{ "id": "5006", "type": "Chocolate with Sprinkles" },{ "id": "5003", "type": "Chocolate" },{ "id": "5004", "type": "Maple" }]}]}}"""


def profile_dataset(dataset=datasetSNOMED, username="awacke1", token=HF_TOKEN, dataset_name="awacke1/SNOMED-CT-Code-Value-Semantic-Set.csv"):
    df = pd.read_csv(dataset.Description)
    if len(df.columns) <= 15:
        profile = pp.ProfileReport(df, title=f"{dataset_name} Report")
    else:
        profile = pp.ProfileReport(df, title=f"{dataset_name} Report", minimal = True)
    
    repo_url = create_repo(f"{username}/{dataset_name}", repo_type = "space", token = token, space_sdk = "static", private=False)
    
    profile.to_file("./index.html")

    upload_file(path_or_fileobj ="./index.html", path_in_repo = "index.html", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token)
    readme = f"---\ntitle: {dataset_name}\nemoji: ✨\ncolorFrom: green\ncolorTo: red\nsdk: static\npinned: false\ntags:\n- dataset-report\n---"    
    with open("README.md", "w+") as f:
        f.write(readme)
    upload_file(path_or_fileobj ="./README.md", path_in_repo = "README.md", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token)
    return f"Your dataset report will be ready at {repo_url}"

#def lowercase_title(example):
#    return {"Description": example[title].lower()}

# demonstrate map function of dataset
#JSONOBJ_MAP=datasetLOINC.map(lowercase_title)
#JSONOBJ_MAP=datasetLOINC.filter(lambda example: example["Description"].startswith("Mental health")) 

#def fn(    text1,    text2,    num,    slider1,    slider2,    single_checkbox,    checkboxes,    radio,    dropdown,    im1,    im2,    im3,    im4,
#    video,    audio1,    audio2,    file,    df1,    df2,):
def fn(    text1,    text2,    single_checkbox,    checkboxes,    radio,    im4,  file,    df1,    df2,):

    searchTerm = text1
    searchTermSentence = text2
    
    start_with_searchTermLOINC = datasetLOINC.filter(lambda example: example["Description"].startswith('Allergy'))    #Allergy
    start_with_searchTermSNOMED = datasetSNOMED.filter(lambda example: example["Description"].startswith('Hospital'))    #Hospital
    start_with_searchTermCQM = dataseteCQM.filter(lambda example: example["Description"].startswith('Telephone'))    #Telephone
    
    #print(start_with_searchTermLOINC )
    #print(start_with_searchTermSNOMED )
    #print(start_with_searchTermCQM)
    
    #print(start_with_searchTermLOINC["train"][0] )
    #print(start_with_searchTermSNOMED["train"][0] )
    #print(start_with_searchTermCQM["train"][0] )
        
    #returnMsg=profile_dataset()
    #print(returnMsg)
    
#    try:
        #top1matchLOINC = json.loads(start_with_searchTermLOINC['train'])
        #top1matchSNOMED = json.loads(start_with_searchTermSNOMED['train'])
        #top1matchCQM = json.loads(start_with_searchTermCQM['train'])
#        top1matchLOINC = json.loads(start_with_searchTermLOINC)
#        top1matchSNOMED = json.loads(start_with_searchTermSNOMED)
#        top1matchCQM = json.loads(start_with_searchTermCQM)
#    except:
#        print('Hello')
        #print(start_with_searchTermLOINC[0])
        #print(start_with_searchTermSNOMED[0] )
        #print(start_with_searchTermCQM[0] )
        
    #print(returnMsg)
 #   print("Datasets Processed")
    
    return (
        #(text1 if single_checkbox else text2)        + ", selected:"        + ", ".join(checkboxes),  # Text
        #(start_with_searchTermLOINC if single_checkbox else start_with_searchTermSNOMED)        + ", selected:"        + ", ".join(checkboxes),  # Text
#        {"positive": num / (num + slider1 + slider2),"negative": slider1 / (num + slider1 + slider2),"neutral": slider2 / (num + slider1 + slider2),},  # Label
#        (audio1[0], np.flipud(audio1[1])) if audio1 is not None  else os.path.join(os.path.dirname(__file__), "files/cantina.wav"),  # Audio
#        np.flipud(im1) if im1 is not None  else os.path.join(os.path.dirname(__file__), "files/cheetah1.jpg"),  # Image
#        video if video is not None  else os.path.join(os.path.dirname(__file__), "files/world.mp4"),  # Video
        [
            (JSONOBJ, "nn"),
            (JSONOBJ, "nn"  ),
            (JSONOBJ, "nn"  ),
            (searchTerm, "vrb"),
            ("The", "art"),
            ("quick brown", "adj"),
            ("fox", "nn"),
            ("jumped", "vrb"),
            ("testing testing testing", None),
            ("over", "prp"),
            ("the", "art"),
            ("testing", None),
            ("lazy", "adj"),
            ("dogs", "nn"),
            (".", "punc"),
        ]   + [(f"test {x}", f"test {x}") for x in range(10)],  # HighlightedText        
        [
            (JSONOBJ, 0.8 ),
            (JSONOBJ, 0.8  ),
            (JSONOBJ, 0.8 ),
            ("The testing testing testing", None),
            ("over", 0.6),
            ("the", 0.2),
            ("testing", None),
            ("lazy", -0.1),
            ("dogs", 0.4),
            (".", 0),
        ]   + [(f"test", x / 10) for x in range(-10, 10)],  # HighlightedText
              

        json.loads(JSONOBJ),  # JSON
        #json.loads(JSONOBJ_MAP),  # JSONOBJ_MAP
        #json.loads(top1matchLOINC),
        
        "<button style='background-color: red'>Click Me: " + radio + "</button>",  # HTML
        os.path.join(os.path.dirname(__file__), "files/titanic.csv"),
        df1,  # Dataframe
        np.random.randint(0, 10, (4, 4)),  # Dataframe
        df2,  # Timeseries
    )


demo = gr.Interface(
    fn,
    inputs=[
        gr.Textbox(value="Allergy", label="Textbox"),
        gr.Textbox(lines=3, value="Bathing", placeholder="Type here..", label="Textbox 2"),
        #gr.Number(label="Number", value=42),
        #gr.Slider(10, 20, value=15, label="Slider: 10 - 20"),
        #gr.Slider(maximum=20, step=0.04, label="Slider: step @ 0.04"),
        gr.Checkbox(label="Check for NER Match on Submit"),
        gr.CheckboxGroup(label="Clinical Terminology to Check", choices=CHOICES, value=CHOICES[0:2]),
        gr.Radio(label="Preferred Terminology Output", choices=CHOICES, value=CHOICES[2]),
        #gr.Dropdown(label="Dropdown", choices=CHOICES),
        #gr.Image(label="Image"),
        #gr.Image(label="Image w/ Cropper", tool="select"),
        #gr.Image(label="Sketchpad", source="canvas"),
        gr.Image(label="Webcam", source="webcam"),
        #gr.Video(label="Video"),
        #gr.Audio(label="Audio"),
        #gr.Audio(label="Microphone", source="microphone"),
        gr.File(label="File"),
        gr.Dataframe(label="Filters", headers=["Name", "Age", "Gender"]),
        gr.Timeseries(x="time", y=["price", "value"], colors=["pink", "purple"]),
    ],
    outputs=[
        gr.Textbox(label="Textbox"),
        #gr.Label(label="Label"),
        #gr.Audio(label="Audio"),
        #gr.Image(label="Image"),
        #gr.Video(label="Video"),
        gr.HighlightedText(label="HighlightedText", color_map={"punc": "pink", "test 0": "blue"}),
        gr.HighlightedText(label="HighlightedText", show_legend=True),
        gr.JSON(label="JSON"),
        gr.HTML(label="HTML"),
        gr.File(label="File"),
        gr.Dataframe(label="Dataframe"),
        gr.Dataframe(label="Numpy"),
        gr.Timeseries(x="time", y=["price", "value"], label="Timeseries"),
    ],
    examples=[
        [
            "Allergy",
            "Admission",
            #10,
            #12,
            #4,
            True,
            ["SNOMED", "LOINC", "CQM"],
            "SNOMED",
            #"bar",
            #os.path.join(os.path.dirname(__file__), "files/cheetah1.jpg"),
            #os.path.join(os.path.dirname(__file__), "files/cheetah1.jpg"),
            #os.path.join(os.path.dirname(__file__), "files/cheetah1.jpg"),
            os.path.join(os.path.dirname(__file__), "files/cheetah1.jpg"),
            #os.path.join(os.path.dirname(__file__), "files/world.mp4"),
            #os.path.join(os.path.dirname(__file__), "files/cantina.wav"),
            #os.path.join(os.path.dirname(__file__), "files/cantina.wav"),
            os.path.join(os.path.dirname(__file__), "files/titanic.csv"),
            [[1, 2, 3], [3, 4, 5]],
            os.path.join(os.path.dirname(__file__), "files/time.csv"),
        ]
    ]
    * 3,
    theme="default",
    title="⚗️🧠🔬🧬 Clinical Terminology Auto Mapper AI 👩‍⚕️🩺⚕️🙋",
    cache_examples=False,
    description="Clinical Terminology Auto Mapper AI",
    article="Learn more at [Yggdrasil](https://github.com/AaronCWacker/Yggdrasil)",
#    live=True,
)

if __name__ == "__main__":
    demo.launch(debug=True)