import gradio as gr import json from datetime import datetime from huggingface_hub import CommitScheduler from pathlib import Path from transformers import pipeline from uuid import uuid4 #based on https://huggingface.co/spaces/Wauplin/space_to_dataset_saver/blob/main/app_json.py #data is saved at https://huggingface.co/datasets/MR17u/tweeteval-irony-mcc/tree/main JSON_DATASET_DIR = Path("json_dataset") JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True) JSON_DATASET_PATH = JSON_DATASET_DIR / f"data-{uuid4()}.json" CLS_MODEL_NAME = "PierreEpron/tweeteval-irony-mcc" scheduler = CommitScheduler( repo_id="tweeteval-irony-mcc", repo_type="dataset", folder_path=JSON_DATASET_DIR, path_in_repo="data", ) classifier = pipeline(model = CLS_MODEL_NAME, tokenizer = 'cardiffnlp/twitter-roberta-large-2022-154m') def save_json(entry: str, result) -> None: with scheduler.lock: with JSON_DATASET_PATH.open("a") as f: result = json.loads(result.replace("'",'"'))[0] json.dump({"entry": entry, "label": result['label'], "score": result['score'], "datetime": datetime.now().isoformat()}, f) f.write("\n") def classif(text: str): return classifier(text) with gr.Blocks() as demo: with gr.Row(): entry = gr.Textbox(label="Input") result = gr.Textbox(label="Classification") input_btn = gr.Button("Submit") input_btn.click(fn=classif, inputs=entry, outputs=result).success( fn=save_json, inputs=[entry, result], outputs=None ) demo.launch()