import json import math import random import os import streamlit as st import lyricsgenius import transformers from transformers import AutoTokenizer, AutoModelForCausalLM st.set_page_config(page_title="HuggingArtists") st.title("HuggingArtists") st.sidebar.markdown( """
""", unsafe_allow_html=True, ) st.sidebar.markdown( """ """, unsafe_allow_html=True, ) st.sidebar.header("SETTINGS") num_sequences = st.sidebar.number_input( "Number of sequences to generate", min_value=1, value=5, help="The amount of generated texts", ) min_length = st.sidebar.number_input( "Minimum length of the sequence", min_value=1, value=100, help="The minimum length of the sequence to be generated", ) max_length= st.sidebar.number_input( "Maximum length of the sequence", min_value=1, value=160, help="The maximum length of the sequence to be generated", ) temperature = st.sidebar.slider( "Temperature", min_value=0.0, max_value=3.0, step=0.01, value=1.0, help="The value used to module the next token probabilities", ) top_p = st.sidebar.slider( "Top-P", min_value=0.0, max_value=1.0, step=0.01, value=0.95, help="If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation.", ) top_k= st.sidebar.number_input( "Top-K", min_value=0, value=50, step=1, help="The number of highest probability vocabulary tokens to keep for top-k-filtering.", ) caption = ( "In HuggingArtists, we can generate lyrics by a specific artist. This was made by fine-tuning a pre-trained HuggingFace Transformer on parsed datasets from Genius." ) st.markdown("`HuggingArtists` - Train a model to generate lyrics 🎵") st.markdown(caption) artist_name = st.text_input("Artist name:", "Eminem") start = st.text_input("Beginning of the song:", "But for me to rap like a computer") TOKEN = "q_JK_BFy9OMiG7fGTzL-nUto9JDv3iXI24aYRrQnkOvjSCSbY4BuFIindweRsr5I" genius = lyricsgenius.Genius(TOKEN) model_html = """ """ def post_process(output_sequences): predictions = [] generated_sequences = [] max_repeat = 2 # decode prediction for generated_sequence_idx, generated_sequence in enumerate(output_sequences): generated_sequence = generated_sequence.tolist() text = tokenizer.decode(generated_sequence, clean_up_tokenization_spaces=True, skip_special_tokens=True) generated_sequences.append(text.strip()) for i, g in enumerate(generated_sequences): res = str(g).replace('\n\n\n', '\n').replace('\n\n', '\n') lines = res.split('\n') # print(lines) # i = max_repeat # while i != len(lines): # remove_count = 0 # for index in range(0, max_repeat): # # print(i - index - 1, i - index) # if lines[i - index - 1] == lines[i - index]: # remove_count += 1 # if remove_count == max_repeat: # lines.pop(i) # i -= 1 # else: # i += 1 predictions.append('\n'.join(lines)) return predictions def get_table(table_data): html = ("\r\n" "\r\n\r\n" "" "\r\n\r\n" "