Spaces:
Sleeping
Sleeping
# coding=utf-8 | |
# Copyright 2023 The GIRT Authors. | |
# Lint as: python3 | |
# This space is built based on AMR-KELEG/ALDi and cis-lmu/GlotLID space. | |
# GIRT Space | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
import streamlit as st | |
import pandas as pd | |
import base64 | |
import json | |
def render_svg(svg): | |
"""Renders the given svg string.""" | |
b64 = base64.b64encode(svg.encode("utf-8")).decode("utf-8") | |
html = rf'<p align="center"> <img src="data:image/svg+xml;base64,{b64}", width="40%"/> </p>' | |
c = st.container() | |
c.write(html, unsafe_allow_html=True) | |
def load_model(model_name): | |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name) | |
return model | |
def load_tokenizer(model_name): | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
return tokenizer | |
def load_examples(): | |
with open("assets/examples.json", "r") as f: | |
examples = json.load(f) | |
return examples | |
# load resources | |
with st.spinner(text="Please wait while the model is loading...."): | |
model = load_model('nafisehNik/girt-t5-base') | |
tokenizer = load_tokenizer('nafisehNik/girt-t5-base') | |
examples = load_examples() | |
# create instruction from metadata | |
def create_instruction(name, about, title, labels, assignees, headline_type, headline, summary): | |
value_list = [name, about, title, labels, assignees, headline_type, headline] | |
value_list = ['<|MASK|>' if not element else element for element in value_list] | |
if not summary: | |
summary = '<|EMPTY|>' | |
instruction = f'name: {value_list[0]}\nabout: {value_list[1]}\ntitle: {value_list[2]}\nlabels: {value_list[3]}\nassignees: {value_list[4]}\nheadlines_type: {value_list[5]}\nheadlines: {value_list[6]}\nsummary: {summary}' | |
return instruction | |
# compute the output | |
def compute(sample, top_p, top_k, do_sample, max_length, min_length): | |
inputs = tokenizer(sample, return_tensors="pt").to('cpu') | |
outputs = model.generate( | |
**inputs, | |
min_length= min_length, | |
max_length=max_length, | |
do_sample=do_sample, | |
top_p=top_p, | |
top_k=top_k).to('cpu') | |
generated_texts = tokenizer.batch_decode(outputs, skip_special_tokens=False) | |
generated_text = generated_texts[0] | |
replace_dict = { | |
'\n ': '\n', | |
'</s>': '', | |
'<pad> ': '', | |
'<pad>': '', | |
'<unk>': '' | |
} | |
postprocess_text = generated_text | |
for key, value in replace_dict.items(): | |
postprocess_text = postprocess_text.replace(key, value) | |
return postprocess_text | |
# load deduplicate icon | |
st.markdown("[![Duplicate Space](https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14)](https://huggingface.co/spaces/nafisehNik/girt-space?duplicate=true)") | |
# load logo | |
render_svg(open("assets/logo.svg").read()) | |
# exapnd sidebar | |
st.markdown( | |
""" | |
<style> | |
[data-testid="stSidebar"][aria-expanded="true"]{ | |
min-width: 450px; | |
max-width: 450px; | |
} | |
""", | |
unsafe_allow_html=True) | |
with st.sidebar: | |
st.title(" π§ Settings") | |
with st.expander("π Issue Template Inputs", True): | |
# choose examples | |
in_examples = st.selectbox( | |
label = 'You can select one of the following examples and customize it:', | |
options = ('no example', 'bug report', 'feature request', 'question', 'documentation'), | |
index = 1) | |
in_name = st.text_input( | |
label = "Name Metadata: ", | |
value = examples[in_examples]['name'], | |
placeholder="e.g., Bug Report or Feqture Request or Question", | |
on_change = None) | |
in_about = st.text_input( | |
label = "About Metadata: ", | |
value = examples[in_examples]['about'], | |
placeholder="e.g., File a bug report", | |
on_change=None) | |
# Title | |
if examples[in_examples]['title'] == '<|EMPTY|>': | |
empty_title_value_default = True | |
else: | |
empty_title_value_default = False | |
in_empty_title = st.checkbox(label ='without title', value = empty_title_value_default) | |
if in_empty_title == False: | |
# check if the example title is actually not empty | |
title_value_default = examples[in_examples]['title'] if examples[in_examples]['title'] != '<|EMPTY|>' else "" | |
in_title = st.text_input( | |
label = "Title Metadata: ", | |
value = title_value_default, | |
placeholder="e.g., [Bug]: ", | |
on_change=None) | |
else: | |
in_title = '<|EMPTY|>' | |
# Labels | |
if examples[in_examples]['labels'] == '<|EMPTY|>': | |
empty_labels_value_default = True | |
else: | |
empty_labels_value_default = False | |
in_empty_labels = st.checkbox(label ='without labels', value = empty_labels_value_default) | |
if in_empty_labels == False: | |
# check if the example labels is actually not empty | |
labels_value_default = examples[in_examples]['labels'] if examples[in_examples]['labels'] != '<|EMPTY|>' else "" | |
in_labels = st.text_input( | |
label = "Labels Metadata: ", | |
value = labels_value_default, | |
placeholder="e.g., feature, enhancement", | |
on_change=None) | |
else: | |
in_labels = '<|EMPTY|>' | |
# Assignees | |
if examples[in_examples]['assignees'] == '<|EMPTY|>': | |
empty_assignees_value_default = True | |
else: | |
empty_assignees_value_default = False | |
in_empty_assignees = st.checkbox(label ='without assignees', value = empty_assignees_value_default) | |
if in_empty_assignees == False: | |
# check if the example assignees is actually not empty | |
assignees_value_default = examples[in_examples]['assignees'] if examples[in_examples]['assignees'] != '<|EMPTY|>' else "" | |
in_assignees = st.text_input( | |
label = "Assignees Metadata: ", | |
value = assignees_value_default, | |
placeholder="e.g., feature, enhancement", | |
on_change=None) | |
else: | |
in_assignees = '<|EMPTY|>' | |
# headline type | |
if examples[in_examples]['headlines_type'] == '<|EMPTY|>': | |
headlines_type_value_default = 3 | |
elif examples[in_examples]['headlines_type'] == '': | |
headlines_type_value_default = 2 | |
elif examples[in_examples]['headlines_type'] == '**Emphasis**': | |
headlines_type_value_default = 1 | |
else: | |
headlines_type_value_default = 0 | |
# if no headlines is selected, force the headlines to be empty as well. | |
in_headline_type = st.selectbox( | |
label = 'How would you like to be your Headlines?', | |
options = ('# Heading', '**Emphasis**', 'Either', 'No headlines'), | |
index = headlines_type_value_default) | |
if in_headline_type == 'Either': | |
in_headline_type = '<|MASK|>' | |
if in_headline_type == 'No headlines': | |
in_headline_type = '<|EMPTY|>' | |
in_headlines = '<|EMPTY|>' | |
if in_headline_type!='No headlines': | |
headlines_value_default = examples[in_examples]['headlines'] if examples[in_examples]['headlines'] != '<|EMPTY|>' else "" | |
in_headlines = st.text_area( | |
label = "Headlines: ", | |
value = headlines_value_default, | |
placeholder="Enter each headline in one line. e.g.,\nWelcome\nConcise Description\nAdditional Info", | |
on_change=None, | |
height=200) | |
if not in_headlines: | |
in_headlines = '<|MASK|>' | |
else: | |
in_headlines = in_headlines.split('\n') | |
in_headlines = [element.strip() for element in in_headlines] | |
# summary | |
summary_value_default = examples[in_examples]['summary'] if examples[in_examples]['summary'] != '<|EMPTY|>' else "" | |
in_summary = st.text_area( | |
label = "Summary: ", | |
value = summary_value_default, | |
placeholder="This Github Issue Template is ...", | |
on_change=None, | |
height=200) | |
with st.expander("π Model Configs", False): | |
max_length_in = st.slider("max_length", 30, 512, 300) | |
min_length_in = st.slider("min_length", 0, 300, 30) | |
top_p_in = st.slider("top_p", 0.0, 1.0, 0.92) | |
top_k_in = st.slider("top_k", 0, 100, 0) | |
prompt = create_instruction(in_name, in_about, in_title, in_labels, in_assignees, in_headline_type, in_headlines, in_summary) | |
st.code(prompt, language="python") | |
clicked = st.button("Submit") | |
if clicked: | |
with st.spinner("Please Wait..."): | |
res = compute(prompt, top_p = top_p_in, top_k=top_k_in, do_sample=True, max_length=max_length_in, min_length=min_length_in) | |
st.code(res, language="python") |