LysandreJik's picture
links
9a56e9e
import json
import os
from io import BytesIO
import gradio as gr
from huggingface_hub import upload_file
default_question = """
We're going to use the <a href="https://huggingface.co/datasets/wikitext"><code>wikitext (link)</a></code> dataset with the <code><a href="https://huggingface.co/bert-base-cased?">bert-base-cased (link)</a></code> model checkpoint.
<br/><br/>
Start by loading the <code>wikitext-2-raw-v1</code> version of that dataset, and take the 11th example (index 10) of the <code>train</code> split.<br/>
We'll tokenize this using the appropriate tokenizer, and we'll mask the sixth token (index 5) the sequence.
<br/><br/>
When using the <code>bert-base-cased</code> checkpoint to unmask that token, what is the most probable prediction?
<br/>
<br/>
Tips:
<br/>
- You might find the <a href="https://huggingface.co/docs/transformers/index">transformers docs (link)</a> useful.
<br/>
- You might find the <a href="https://huggingface.co/docs/datasets/index">datasets docs (link)</a> useful.
"""
internships = {
'Accelerate': default_question,
'Diffusion distillation': default_question,
'Skops & Scikit-Learn': default_question,
"Code Generation": default_question,
"Document AI Democratization": default_question,
"Evaluate": default_question,
"ASR": default_question,
"Efficient video pretraining": default_question,
"Embodied AI": default_question,
"Emergence of scene and text understanding": default_question,
"Everything is multimodal": default_question,
"Everything is vision": default_question,
"Retrieval augmentation as prompting": default_question,
"Social impact evaluations": default_question,
"Toolkit for detecting distribution shift": default_question,
"AI Art Tooling Residency": default_question,
"Gradio as an ecosystem": default_question,
}
with gr.Blocks() as demo:
gr.Markdown(
"""
# Internship introduction
Please select the internship you would like to apply to and answer the question asked in the Answer box.
"""
)
internship_choice = gr.Dropdown(label='Internship', choices=list(internships.keys()))
with gr.Column(visible=False) as details_col:
summary = gr.HTML(label='Question')
details = gr.Textbox(label="Answer")
username = gr.Textbox(label="Hugging Face Username")
generate_btn = gr.Button("Submit")
output = gr.Label()
def filter_species(species):
return gr.Label.update(
internships[species]
), gr.update(visible=True)
internship_choice.change(filter_species, internship_choice, [summary, details_col])
def on_click(_details, _username, _internship_choice):
response = {'response': _details, "internship": _internship_choice}
upload_file(
path_or_fileobj=BytesIO(bytes(json.dumps(response), 'utf-8')),
path_in_repo=_username,
repo_id='lysandre/internships',
repo_type='dataset',
token=os.environ['HF_TOKEN']
)
return f"Submitted: '{_details}' for user '{_username}'"
generate_btn.click(on_click, inputs=[details, username, internship_choice], outputs=[output])
if __name__ == "__main__":
demo.launch()