Models-Datasets-Spaces-Search-Hub / backup.061924.app.py
awacke1's picture
Rename app.py to backup.061924.app.py
f567a91 verified
raw
history blame
No virus
7.8 kB
from typing import List, Dict
import httpx
import gradio as gr
import pandas as pd
import json
async def get_valid_datasets() -> Dict[str, List[str]]:
URL = f"https://huggingface.co/api/datasets"
async with httpx.AsyncClient() as session:
response = await session.get(URL)
try:
datasets = [dataset["id"] for dataset in response.json()]
except (KeyError, json.JSONDecodeError):
datasets = [] # Set a default value if the response is not in the expected format
return gr.Dropdown.update(choices=datasets, value="awacke1/ChatbotMemory.csv")
async def get_splits(dataset_name: str) -> Dict[str, List[Dict]]:
URL = f"https://datasets-server.huggingface.co/splits?dataset={dataset_name}"
async with httpx.AsyncClient() as session:
response = await session.get(URL)
return response.json()
async def get_valid_datasets_old() -> Dict[str, List[str]]:
URL = f"https://datasets-server.huggingface.co/valid"
async with httpx.AsyncClient() as session:
response = await session.get(URL)
datasets = response.json()["valid"]
return gr.Dropdown.update(choices=datasets, value="awacke1/ChatbotMemory.csv")
# The one to watch: https://huggingface.co/rungalileo
# rungalileo/medical_transcription_40
async def get_valid_datasets_old2() -> Dict[str, List[str]]:
URL = f"https://datasets-server.huggingface.co/valid"
async with httpx.AsyncClient() as session:
response = await session.get(URL)
try:
datasets = response.json()["valid"]
except (KeyError, json.JSONDecodeError):
datasets = [] # Set a default value if the response is not in the expected format
return gr.Dropdown.update(choices=datasets, value="awacke1/ChatbotMemory.csv")
async def get_first_rows(dataset: str, config: str, split: str) -> Dict[str, Dict[str, List[Dict]]]:
URL = f"https://datasets-server.huggingface.co/first-rows?dataset={dataset}&config={config}&split={split}"
async with httpx.AsyncClient() as session:
response = await session.get(URL)
print(URL)
gr.Markdown(URL)
return response.json()
def get_df_from_rows(api_output):
dfFromSort = pd.DataFrame([row["row"] for row in api_output["rows"]])
try:
dfFromSort.sort_values(by=1, axis=1, ascending=True, inplace=False, kind='mergesort', na_position='last', ignore_index=False, key=None)
except:
print("Exception sorting due to keyerror?")
return dfFromSort
async def update_configs(dataset_name: str):
splits = await get_splits(dataset_name)
all_configs = sorted(set([s["config"] for s in splits["splits"]]))
return (gr.Dropdown.update(choices=all_configs, value=all_configs[0]),
splits)
async def update_splits(config_name: str, state: gr.State):
splits_for_config = sorted(set([s["split"] for s in state["splits"] if s["config"] == config_name]))
dataset_name = state["splits"][0]["dataset"]
dataset = await update_dataset(splits_for_config[0], config_name, dataset_name)
return (gr.Dropdown.update(choices=splits_for_config, value=splits_for_config[0]), dataset)
async def update_dataset(split_name: str, config_name: str, dataset_name: str):
rows = await get_first_rows(dataset_name, config_name, split_name)
df = get_df_from_rows(rows)
return df
# Guido von Roissum: https://www.youtube.com/watch?v=-DVyjdw4t9I
async def update_URL(dataset: str, config: str, split: str) -> str:
URL = f"https://datasets-server.huggingface.co/first-rows?dataset={dataset}&config={config}&split={split}"
URL = f"https://huggingface.co/datasets/{split}"
return (URL)
async def openurl(URL: str) -> str:
html = f"<a href={URL} target=_blank>{URL}</a>"
return (html)
with gr.Blocks() as demo:
gr.Markdown("<h1><center>🥫Datasetter📊 Datasets Analyzer and Transformer</center></h1>")
gr.Markdown("""<div align="center">Curated Datasets: <a href = "https://www.kaggle.com/datasets">Kaggle</a>. <a href="https://www.nlm.nih.gov/research/umls/index.html">NLM UMLS</a>. <a href="https://loinc.org/downloads/">LOINC</a>. <a href="https://www.cms.gov/medicare/icd-10/2022-icd-10-cm">ICD10 Diagnosis</a>. <a href="https://icd.who.int/dev11/downloads">ICD11</a>. <a href="https://paperswithcode.com/datasets?q=medical&v=lst&o=newest">Papers,Code,Datasets for SOTA in Medicine</a>. <a href="https://paperswithcode.com/datasets?q=mental&v=lst&o=newest">Mental</a>. <a href="https://paperswithcode.com/datasets?q=behavior&v=lst&o=newest">Behavior</a>. <a href="https://www.cms.gov/medicare-coverage-database/downloads/downloads.aspx">CMS Downloads</a>. <a href="https://www.cms.gov/medicare/fraud-and-abuse/physicianselfreferral/list_of_codes">CMS CPT and HCPCS Procedures and Services</a> """)
splits_data = gr.State()
with gr.Row():
dataset_name = gr.Dropdown(label="Dataset", interactive=True)
config = gr.Dropdown(label="Subset", interactive=True)
split = gr.Dropdown(label="Split", interactive=True)
with gr.Row():
#filterleft = gr.Textbox(label="First Column Filter",placeholder="Filter Column 1")
URLcenter = gr.Textbox(label="Dataset URL", placeholder="URL")
btn = gr.Button("Use Dataset")
#URLoutput = gr.Textbox(label="Output",placeholder="URL Output")
#URLoutput = gr.HTML(label="Output",placeholder="URL Output")
URLoutput = gr.HTML(label="Output")
with gr.Row():
dataset = gr.DataFrame(wrap=True, interactive=True)
demo.load(get_valid_datasets, inputs=None, outputs=[dataset_name])
dataset_name.change(update_configs, inputs=[dataset_name], outputs=[config, splits_data])
config.change(update_splits, inputs=[config, splits_data], outputs=[split, dataset])
split.change(update_dataset, inputs=[split, config, dataset_name], outputs=[dataset])
dataset_name.change(update_URL, inputs=[split, config, dataset_name], outputs=[URLcenter])
btn.click(openurl, [URLcenter], URLoutput)
demo.launch(debug=True)
# original: https://huggingface.co/spaces/freddyaboulton/dataset-viewer -- Freddy thanks! Your examples are the best.
# playlist on Gradio and Mermaid: https://www.youtube.com/watch?v=o7kCD4aWMR4&list=PLHgX2IExbFosW7hWNryq8hs2bt2aj91R-
# Link to Mermaid model and code: [![](https://mermaid.ink/img/pako:eNp1U8mO2zAM_RXCZ-eQpZccCmSZTIpOMQESIAdnDrRMx0JkydXSNDOYfy_lpUgD1AfBfnx8fCTlj0SYgpJ5UipzFRVaD4flSQM_YjwafcVJ9-FCfrbYVGA0ZQeLUkt9futiOM72pEh4QFijR9iTf2tzsx3Z0ti6hxslvb_Lm0TSNPvBDhQsg1TFXXAag7NBef_9hdDqFA6knbEbdgvGwu7mjRXVkDOLOV-yNXmytdQEsoROvTfi4EhK9XTSxUNz_mo4uVHm1lPyce-uR1k_n2RHymHRNPAvNXaTT7NVZYwjeDECVbS4UiYUAyc2lc-yFoPXxkujHaAl2G54PCjIpfBssZAGtsZ5KlLYkjWXkMLiuOfjPVhiymr3_x4qS7wicneTFuMW6Gdxlb6Cb7oJvt1LbEpMso08sza8MnqskA9jL27Ij72Jafb0G-tGkQNTdgKOy_XcFP5GDxFbWsJLV3FQid2LWfZsfpHVqAXBCBYa1e2dAHUBu5Ar6dgby0ghPWxQWk2Oh_L0M0h_S2Ep0YHUrXFHXD_msefo5XEkfFWBK8atdkA7mgfoalpATJI0qfnWoCz4b_iI0VPiK6rplMz5taASg_Kn5KQ_mYrBm_1Ni2TubaA0CU2BntYSeQl1Mi9ROfr8A8FBGds?type=png)](https://mermaid.live/edit#pako:eNp1U8mO2zAM_RXCZ-eQpZccCmSZTIpOMQESIAdnDrRMx0JkydXSNDOYfy_lpUgD1AfBfnx8fCTlj0SYgpJ5UipzFRVaD4flSQM_YjwafcVJ9-FCfrbYVGA0ZQeLUkt9futiOM72pEh4QFijR9iTf2tzsx3Z0ti6hxslvb_Lm0TSNPvBDhQsg1TFXXAag7NBef_9hdDqFA6knbEbdgvGwu7mjRXVkDOLOV-yNXmytdQEsoROvTfi4EhK9XTSxUNz_mo4uVHm1lPyce-uR1k_n2RHymHRNPAvNXaTT7NVZYwjeDECVbS4UiYUAyc2lc-yFoPXxkujHaAl2G54PCjIpfBssZAGtsZ5KlLYkjWXkMLiuOfjPVhiymr3_x4qS7wicneTFuMW6Gdxlb6Cb7oJvt1LbEpMso08sza8MnqskA9jL27Ij72Jafb0G-tGkQNTdgKOy_XcFP5GDxFbWsJLV3FQid2LWfZsfpHVqAXBCBYa1e2dAHUBu5Ar6dgby0ghPWxQWk2Oh_L0M0h_S2Ep0YHUrXFHXD_msefo5XEkfFWBK8atdkA7mgfoalpATJI0qfnWoCz4b_iI0VPiK6rplMz5taASg_Kn5KQ_mYrBm_1Ni2TubaA0CU2BntYSeQl1Mi9ROfr8A8FBGds)