girt-space / app.py
nafisehNik's picture
Update app.py
330e431
raw
history blame
9.41 kB
# coding=utf-8
# Copyright 2023 The GIRT Authors.
# Lint as: python3
# This space is built based on AMR-KELEG/ALDi and cis-lmu/GlotLID space.
# GIRT Space
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import streamlit as st
import pandas as pd
import base64
import json
@st.cache_data
def render_svg(svg):
"""Renders the given svg string."""
b64 = base64.b64encode(svg.encode("utf-8")).decode("utf-8")
html = rf'<p align="center"> <img src="data:image/svg+xml;base64,{b64}", width="40%"/> </p>'
c = st.container()
c.write(html, unsafe_allow_html=True)
@st.cache_resource
def load_model(model_name):
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
return model
@st.cache_resource
def load_tokenizer(model_name):
tokenizer = AutoTokenizer.from_pretrained(model_name)
return tokenizer
@st.cache_resource
def load_examples():
with open("assets/examples.json", "r") as f:
examples = json.load(f)
return examples
# load resources
with st.spinner(text="Please wait while the model is loading...."):
model = load_model('nafisehNik/girt-t5-base')
tokenizer = load_tokenizer('nafisehNik/girt-t5-base')
examples = load_examples()
# create instruction from metadata
def create_instruction(name, about, title, labels, assignees, headline_type, headline, summary):
value_list = [name, about, title, labels, assignees, headline_type, headline]
value_list = ['<|MASK|>' if not element else element for element in value_list]
if not summary:
summary = '<|EMPTY|>'
instruction = f'name: {value_list[0]}\nabout: {value_list[1]}\ntitle: {value_list[2]}\nlabels: {value_list[3]}\nassignees: {value_list[4]}\nheadlines_type: {value_list[5]}\nheadlines: {value_list[6]}\nsummary: {summary}'
return instruction
# compute the output
def compute(sample, top_p, top_k, do_sample, max_length, min_length):
inputs = tokenizer(sample, return_tensors="pt").to('cpu')
outputs = model.generate(
**inputs,
min_length= min_length,
max_length=max_length,
do_sample=do_sample,
top_p=top_p,
top_k=top_k).to('cpu')
generated_texts = tokenizer.batch_decode(outputs, skip_special_tokens=False)
generated_text = generated_texts[0]
replace_dict = {
'\n ': '\n',
'</s>': '',
'<pad> ': '',
'<pad>': '',
'<unk>': ''
}
postprocess_text = generated_text
for key, value in replace_dict.items():
postprocess_text = postprocess_text.replace(key, value)
return postprocess_text
# load deduplicate icon
st.markdown("[![Duplicate Space](https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14)](https://huggingface.co/spaces/nafisehNik/girt-space?duplicate=true)")
# load logo
render_svg(open("assets/logo.svg").read())
# exapnd sidebar
st.markdown(
"""
<style>
[data-testid="stSidebar"][aria-expanded="true"]{
min-width: 450px;
max-width: 450px;
}
""",
unsafe_allow_html=True)
with st.sidebar:
st.title(" πŸ”§ Settings")
with st.expander("πŸ— Issue Template Inputs", True):
# choose examples
in_examples = st.selectbox(
label = 'You can select one of the following examples and customize it:',
options = ('no example', 'bug report', 'feature request', 'question', 'documentation'),
index = 1)
in_name = st.text_input(
label = "Name Metadata: ",
value = examples[in_examples]['name'],
placeholder="e.g., Bug Report or Feqture Request or Question",
on_change = None)
in_about = st.text_input(
label = "About Metadata: ",
value = examples[in_examples]['about'],
placeholder="e.g., File a bug report",
on_change=None)
# Title
if examples[in_examples]['title'] == '<|EMPTY|>':
empty_title_value_default = True
else:
empty_title_value_default = False
in_empty_title = st.checkbox(label ='without title', value = empty_title_value_default)
if in_empty_title == False:
# check if the example title is actually not empty
title_value_default = examples[in_examples]['title'] if examples[in_examples]['title'] != '<|EMPTY|>' else ""
in_title = st.text_input(
label = "Title Metadata: ",
value = title_value_default,
placeholder="e.g., [Bug]: ",
on_change=None)
else:
in_title = '<|EMPTY|>'
# Labels
if examples[in_examples]['labels'] == '<|EMPTY|>':
empty_labels_value_default = True
else:
empty_labels_value_default = False
in_empty_labels = st.checkbox(label ='without labels', value = empty_labels_value_default)
if in_empty_labels == False:
# check if the example labels is actually not empty
labels_value_default = examples[in_examples]['labels'] if examples[in_examples]['labels'] != '<|EMPTY|>' else ""
in_labels = st.text_input(
label = "Labels Metadata: ",
value = labels_value_default,
placeholder="e.g., feature, enhancement",
on_change=None)
else:
in_labels = '<|EMPTY|>'
# Assignees
if examples[in_examples]['assignees'] == '<|EMPTY|>':
empty_assignees_value_default = True
else:
empty_assignees_value_default = False
in_empty_assignees = st.checkbox(label ='without assignees', value = empty_assignees_value_default)
if in_empty_assignees == False:
# check if the example assignees is actually not empty
assignees_value_default = examples[in_examples]['assignees'] if examples[in_examples]['assignees'] != '<|EMPTY|>' else ""
in_assignees = st.text_input(
label = "Assignees Metadata: ",
value = assignees_value_default,
placeholder="e.g., feature, enhancement",
on_change=None)
else:
in_assignees = '<|EMPTY|>'
# headline type
if examples[in_examples]['headlines_type'] == '<|EMPTY|>':
headlines_type_value_default = 3
elif examples[in_examples]['headlines_type'] == '':
headlines_type_value_default = 2
elif examples[in_examples]['headlines_type'] == '**Emphasis**':
headlines_type_value_default = 1
else:
headlines_type_value_default = 0
# if no headlines is selected, force the headlines to be empty as well.
in_headline_type = st.selectbox(
label = 'How would you like to be your Headlines?',
options = ('# Heading', '**Emphasis**', 'Either', 'No headlines'),
index = headlines_type_value_default)
if in_headline_type == 'Either':
in_headline_type = '<|MASK|>'
if in_headline_type == 'No headlines':
in_headline_type = '<|EMPTY|>'
in_headlines = '<|EMPTY|>'
if in_headline_type!='No headlines':
headlines_value_default = examples[in_examples]['headlines'] if examples[in_examples]['headlines'] != '<|EMPTY|>' else ""
in_headlines = st.text_area(
label = "Headlines: ",
value = headlines_value_default,
placeholder="Enter each headline in one line. e.g.,\nWelcome\nConcise Description\nAdditional Info",
on_change=None,
height=200)
if not in_headlines:
in_headlines = '<|MASK|>'
else:
in_headlines = in_headlines.split('\n')
in_headlines = [element.strip() for element in in_headlines]
# summary
summary_value_default = examples[in_examples]['summary'] if examples[in_examples]['summary'] != '<|EMPTY|>' else ""
in_summary = st.text_area(
label = "Summary: ",
value = summary_value_default,
placeholder="This Github Issue Template is ...",
on_change=None,
height=200)
with st.expander("πŸŽ› Model Configs", False):
max_length_in = st.slider("max_length", 30, 512, 300)
min_length_in = st.slider("min_length", 0, 300, 30)
top_p_in = st.slider("top_p", 0.0, 1.0, 0.92)
top_k_in = st.slider("top_k", 0, 100, 0)
prompt = create_instruction(in_name, in_about, in_title, in_labels, in_assignees, in_headline_type, in_headlines, in_summary)
st.code(prompt, language="python")
clicked = st.button("Submit")
if clicked:
with st.spinner("Please Wait..."):
res = compute(prompt, top_p = top_p_in, top_k=top_k_in, do_sample=True, max_length=max_length_in, min_length=min_length_in)
st.code(res, language="python")