awacke1 commited on
Commit
ff0ccdb
1 Parent(s): c60c8cf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -3
app.py CHANGED
@@ -2,16 +2,17 @@ from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration
2
  import torch
3
  import gradio as gr
4
 
5
-
6
  # PersistDataset -----
7
  import os
8
  import csv
9
- import gradio as gr
10
  from gradio import inputs, outputs
11
  import huggingface_hub
12
  from huggingface_hub import Repository, hf_hub_download, upload_file
13
  from datetime import datetime
14
 
 
 
 
15
 
16
  # -------------------------------------------- For Memory - you will need to set up a dataset and HF_TOKEN ---------
17
  UseMemory=True
@@ -37,7 +38,60 @@ if UseMemory:
37
  local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN
38
  )
39
 
40
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  def store_message(name: str, message: str):
42
  if name and message:
43
  with open(DATA_FILE, "a") as csvfile:
 
2
  import torch
3
  import gradio as gr
4
 
 
5
  # PersistDataset -----
6
  import os
7
  import csv
 
8
  from gradio import inputs, outputs
9
  import huggingface_hub
10
  from huggingface_hub import Repository, hf_hub_download, upload_file
11
  from datetime import datetime
12
 
13
+ from typing import List, Dict
14
+ import httpx
15
+ import pandas as pd
16
 
17
  # -------------------------------------------- For Memory - you will need to set up a dataset and HF_TOKEN ---------
18
  UseMemory=True
 
38
  local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN
39
  )
40
 
41
+ async def get_splits(dataset_name: str) -> Dict[str, List[Dict]]:
42
+ URL = f"https://datasets-server.huggingface.co/splits?dataset={dataset_name}"
43
+ async with httpx.AsyncClient() as session:
44
+ response = await session.get(URL)
45
+ return response.json()
46
+
47
+ async def get_valid_datasets() -> Dict[str, List[str]]:
48
+ URL = f"https://datasets-server.huggingface.co/valid"
49
+ async with httpx.AsyncClient() as session:
50
+ response = await session.get(URL)
51
+ datasets = response.json()["valid"]
52
+ return gr.Dropdown.update(choices=datasets, value="kelm")
53
+ # The one to watch: https://huggingface.co/rungalileo
54
+ # rungalileo/medical_transcription_40
55
+
56
+ async def get_first_rows(dataset: str, config: str, split: str) -> Dict[str, Dict[str, List[Dict]]]:
57
+ URL = f"https://datasets-server.huggingface.co/first-rows?dataset={dataset}&config={config}&split={split}"
58
+ async with httpx.AsyncClient() as session:
59
+ response = await session.get(URL)
60
+ print(URL)
61
+ gr.Markdown(URL)
62
+ return response.json()
63
+
64
+ def get_df_from_rows(api_output):
65
+ return pd.DataFrame([row["row"] for row in api_output["rows"]])
66
+
67
+ async def update_configs(dataset_name: str):
68
+ splits = await get_splits(dataset_name)
69
+ all_configs = sorted(set([s["config"] for s in splits["splits"]]))
70
+ return (gr.Dropdown.update(choices=all_configs, value=all_configs[0]),
71
+ splits)
72
+
73
+ async def update_splits(config_name: str, state: gr.State):
74
+ splits_for_config = sorted(set([s["split"] for s in state["splits"] if s["config"] == config_name]))
75
+ dataset_name = state["splits"][0]["dataset"]
76
+ dataset = await update_dataset(splits_for_config[0], config_name, dataset_name)
77
+ return (gr.Dropdown.update(choices=splits_for_config, value=splits_for_config[0]), dataset)
78
+
79
+ async def update_dataset(split_name: str, config_name: str, dataset_name: str):
80
+ rows = await get_first_rows(dataset_name, config_name, split_name)
81
+ df = get_df_from_rows(rows)
82
+ return df
83
+
84
+ # Guido von Roissum: https://www.youtube.com/watch?v=-DVyjdw4t9I
85
+ async def update_URL(dataset: str, config: str, split: str) -> str:
86
+ URL = f"https://datasets-server.huggingface.co/first-rows?dataset={dataset}&config={config}&split={split}"
87
+ URL = f"https://huggingface.co/datasets/{split}"
88
+ return (URL)
89
+
90
+ async def openurl(URL: str) -> str:
91
+ html = f"<a href={URL} target=_blank>{URL}</a>"
92
+ return (html)
93
+
94
+
95
  def store_message(name: str, message: str):
96
  if name and message:
97
  with open(DATA_FILE, "a") as csvfile: