import json
import math
import random
import os
import streamlit as st
import lyricsgenius
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM
st.set_page_config(page_title="HuggingArtists")
st.title("HuggingArtists")
st.sidebar.markdown(
"""
""",
unsafe_allow_html=True,
)
st.sidebar.markdown(
"""
GitHub | Project Report
""",
unsafe_allow_html=True,
)
st.sidebar.header("SETTINGS")
num_sequences = st.sidebar.number_input(
"Number of sequences to generate",
min_value=1,
value=5,
help="The amount of generated texts",
)
min_length = st.sidebar.number_input(
"Minimum length of the sequence",
min_value=1,
value=100,
help="The minimum length of the sequence to be generated",
)
max_length= st.sidebar.number_input(
"Maximum length of the sequence",
min_value=1,
value=160,
help="The maximum length of the sequence to be generated",
)
temperature = st.sidebar.slider(
"Temperature",
min_value=0.0,
max_value=3.0,
step=0.01,
value=1.0,
help="The value used to module the next token probabilities",
)
top_p = st.sidebar.slider(
"Top-P",
min_value=0.0,
max_value=1.0,
step=0.01,
value=0.95,
help="If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation.",
)
top_k= st.sidebar.number_input(
"Top-K",
min_value=0,
value=50,
step=1,
help="The number of highest probability vocabulary tokens to keep for top-k-filtering.",
)
caption = (
"In HuggingArtists, we can generate lyrics by a specific artist. This was made by fine-tuning a pre-trained HuggingFace Transformer on parsed datasets from Genius."
)
st.markdown("`HuggingArtists` - Train a model to generate lyrics 🎵")
st.markdown(caption)
artist_name = st.text_input("Artist name:", "Eminem")
start = st.text_input("Beginning of the song:", "But for me to rap like a computer")
TOKEN = "q_JK_BFy9OMiG7fGTzL-nUto9JDv3iXI24aYRrQnkOvjSCSbY4BuFIindweRsr5I"
genius = lyricsgenius.Genius(TOKEN)
model_html = """
"""
def post_process(output_sequences):
predictions = []
generated_sequences = []
max_repeat = 2
# decode prediction
for generated_sequence_idx, generated_sequence in enumerate(output_sequences):
generated_sequence = generated_sequence.tolist()
text = tokenizer.decode(generated_sequence, clean_up_tokenization_spaces=True, skip_special_tokens=True)
generated_sequences.append(text.strip())
for i, g in enumerate(generated_sequences):
res = str(g).replace('\n\n\n', '\n').replace('\n\n', '\n')
lines = res.split('\n')
# print(lines)
# i = max_repeat
# while i != len(lines):
# remove_count = 0
# for index in range(0, max_repeat):
# # print(i - index - 1, i - index)
# if lines[i - index - 1] == lines[i - index]:
# remove_count += 1
# if remove_count == max_repeat:
# lines.pop(i)
# i -= 1
# else:
# i += 1
predictions.append('\n'.join(lines))
return predictions
def get_table(table_data):
html = ("\r\n"
"\r\n\r\n"
""
"\r\n\r\n"
"\r\n"
" \r\n"
" \r\n"
" \r\n"
" \r\n"
f"{' '.join(table_data)}"
"
\r\n\r\n"
"\r\n"
"