import gradio as gr from huggingface_hub import HfApi import os import datasets from datasets import load_dataset, Dataset import pandas as pd TOKEN = os.getenv("TOKEN") DATASET_ID = os.getenv("DATASET_ID") datasets.set_caching_enabled(False) hf_api = HfApi(token=TOKEN) def create_dataset(): # creates the dataset if it doesn't exist try: hf_api.create_repo(repo_id = DATASET_ID, repo_type = "dataset", private=True) print("created dataset") initial_dataset() except: pass def initial_dataset(): # creates initial dataset repository and uploads csv file df = pd.DataFrame(columns=["question", "answer"]) df.loc[len(df)] = ["Find question here", "Find answer here"] # Add first row # persist initial dataset df.to_csv("persisted_dataset.csv", index=False) # push to hub hf_api.upload_file(path_or_fileobj="persisted_dataset.csv", path_in_repo = "./persisted_dataset.csv", repo_id = DATASET_ID, repo_type="dataset") def write_to_dataset(input): # if the dataset is not created, submit button will crash to we create it create_dataset() # load the dataset, append input and placeholder dataset = load_dataset(DATASET_ID, data_files="persisted_dataset.csv", use_auth_token=TOKEN) df = pd.DataFrame(dataset["train"]) df = df.append({'question': input, "answer":"ANSWER HERE"}, ignore_index=True) df = df.drop_duplicates() # persist it to local dataset and push it back to Hub df.to_csv("persisted_dataset.csv", index=False) hf_api.upload_file(path_or_fileobj="persisted_dataset.csv", path_in_repo = "./persisted_dataset.csv", repo_id = DATASET_ID, repo_type="dataset") # remove the persisted dataset for privacy # as of now we don't persist the dataset anyway so let's see if it will stay without removing # os.remove("persisted_dataset.csv") return df def read_dataset(): # read the dataset for second tab, will crash if it doesn't exist create_dataset() dataset = load_dataset(DATASET_ID, data_files="persisted_dataset.csv", use_auth_token=TOKEN, download_mode='force_redownload') # read it to a pandas df df = pd.DataFrame(dataset["train"]) #Β return only answered questions return df[df['answer'] != 'ANSWER HERE'] def render_answers(): # render dataframe into sequentially written text # used for textbox in second tab df = read_dataset() new_df = df.copy() new_df = new_df.sort_index(ascending=False) new_df['question'] = '❓ ' + new_df['question'] new_df['answer'] = 'πŸ™‹ ' + new_df['answer'] str_df = '\n'.join(new_df.apply(lambda x: x['question'] + '\n' + x['answer'] + '\n\n\n', axis=1)) return str_df with gr.Blocks() as demo: gr.Markdown("## Ask me anything, I'm not going to lie! πŸ‘‹ ") gr.Markdown("In this app, you can write me something anonymous and read my answers to your inputs. ✍️ ") gr.Markdown("Let's spread love and be respectful πŸ’ƒπŸ»πŸ•ΊπŸ»") with gr.Accordion("Open this toggle to see how you can build your own Ask Me Anything app to receive and answer questions ⬇️", open=False): gr.Markdown("Duplicate this Space by clicking three dots and then `Duplicate this Space` and provide TOKEN and DATASET_ID.") gr.Markdown("Provide your Hugging Face write token that you can get [here](https://huggingface.co/settings/tokens).") gr.Markdown("For DATASET_ID, provide something like `merve/answers` it's a dataset repository that will be created through this Space automatically, where your questions will be privately populated.") gr.Markdown("To answer questions, simply edit the `persisted_dataset.csv` file in the dataset repository, edit `ANSWER HERE` parts beside every question.") gr.Markdown("The app will not show unanswered questions by default.") with gr.Tab("Write me something ✍️ "): input = gr.Textbox(label="What do you want to say to me?") submit_btn = gr.Button("Submit") submit_btn.click(fn=write_to_dataset, inputs=[input]) with gr.Tab("Read My Answers"): answers = render_answers() gr.Markdown(answers) refresh_btn = gr.Button("Refresh") refresh_btn.click(fn=render_answers) demo.launch(debug=True)