import json
import os
from io import BytesIO
import gradio as gr
from huggingface_hub import upload_file
default_question = """
We're going to use the wikitext (link)
dataset with the bert-base-cased (link)
model checkpoint.
Start by loading the wikitext-2-raw-v1
version of that dataset, and take the 11th example (index 10) of the train
split.
We'll tokenize this using the appropriate tokenizer, and we'll mask the sixth token (index 5) the sequence.
When using the bert-base-cased
checkpoint to unmask that token, what is the most probable prediction?
Tips:
- You might find the transformers docs (link) useful.
- You might find the datasets docs (link) useful.
"""
internships = {
'Accelerate': default_question,
'Diffusion distillation': default_question,
'Skops & Scikit-Learn': default_question,
"Code Generation": default_question,
"Document AI Democratization": default_question,
"Evaluate": default_question,
"ASR": default_question,
"Efficient video pretraining": default_question,
"Embodied AI": default_question,
"Emergence of scene and text understanding": default_question,
"Everything is multimodal": default_question,
"Everything is vision": default_question,
"Retrieval augmentation as prompting": default_question,
"Social impact evaluations": default_question,
"Toolkit for detecting distribution shift": default_question,
"AI Art Tooling Residency": default_question,
"Gradio as an ecosystem": default_question,
}
with gr.Blocks() as demo:
gr.Markdown(
"""
# Internship introduction
Please select the internship you would like to apply to and answer the question asked in the Answer box.
"""
)
internship_choice = gr.Dropdown(label='Internship', choices=list(internships.keys()))
with gr.Column(visible=False) as details_col:
summary = gr.HTML(label='Question')
details = gr.Textbox(label="Answer")
username = gr.Textbox(label="Hugging Face Username")
generate_btn = gr.Button("Submit")
output = gr.Label()
def filter_species(species):
return gr.Label.update(
internships[species]
), gr.update(visible=True)
internship_choice.change(filter_species, internship_choice, [summary, details_col])
def on_click(_details, _username, _internship_choice):
response = {'response': _details, "internship": _internship_choice}
upload_file(
path_or_fileobj=BytesIO(bytes(json.dumps(response), 'utf-8')),
path_in_repo=_username,
repo_id='lysandre/internships',
repo_type='dataset',
token=os.environ['HF_TOKEN']
)
return f"Submitted: '{_details}' for user '{_username}'"
generate_btn.click(on_click, inputs=[details, username, internship_choice], outputs=[output])
if __name__ == "__main__":
demo.launch()