Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import AutoModel, AutoTokenizer | |
| from sklearn.neighbors import NearestNeighbors | |
| title = "Temporal evolution of word association (Overselling :P)" | |
| description = "Based on TimeLMs which is a RoBERTa model finetuned on tweets at periodic interval" | |
| article = "This outputs the top 500 similar tokens to the input word, as a list. Stay tuned for more info" | |
| available_models = ['2019', | |
| '2020', | |
| '2022' | |
| ] | |
| model_2019 = AutoModel.from_pretrained('cardiffnlp/twitter-roberta-base-2019-90m') | |
| tokenizers_2019 = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-2019-90m') | |
| embedding_matrix_2019 = model_2019.embeddings.word_embeddings.weight | |
| embedding_matrix_2019 = embedding_matrix_2019.detach().numpy() | |
| knn_model_2019 = NearestNeighbors(n_neighbors=500, | |
| metric='cosine', | |
| algorithm='auto', | |
| n_jobs=3) | |
| nbrs_2019 = knn_model_2019.fit(embedding_matrix_2019) | |
| distances_2019, indices_2019 = nbrs_2019.kneighbors(embedding_matrix_2019) | |
| model_2020 = AutoModel.from_pretrained('cardiffnlp/twitter-roberta-base-jun2020') | |
| tokenizers_2020 = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-jun2020') | |
| embedding_matrix_2020 = model_2020.embeddings.word_embeddings.weight | |
| embedding_matrix_2020 = embedding_matrix_2020.detach().numpy() | |
| knn_model_2020 = NearestNeighbors(n_neighbors=500, | |
| metric='cosine', | |
| algorithm='auto', | |
| n_jobs=3) | |
| nbrs_2020 = knn_model_2020.fit(embedding_matrix_2020) | |
| distances_2020, indices_2020 = nbrs_2020.kneighbors(embedding_matrix_2020) | |
| model_2022 = AutoModel.from_pretrained('cardiffnlp/twitter-roberta-base-2022-154m') | |
| tokenizers_2022 = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-2022-154m') | |
| embedding_matrix_2022 = model_2022.embeddings.word_embeddings.weight | |
| embedding_matrix_2022 = embedding_matrix_2022.detach().numpy() | |
| knn_model_2022 = NearestNeighbors(n_neighbors=500, | |
| metric='cosine', | |
| algorithm='auto', | |
| n_jobs=3) | |
| nbrs_2022 = knn_model_2022.fit(embedding_matrix_2022) | |
| distances_2022, indices_2022 = nbrs_2020.kneighbors(embedding_matrix_2022) | |
| title = "How does a word's meaning change with time?" | |
| def topk(word,model): | |
| outs = [] | |
| if model == '2019': | |
| index = tokenizers_2019.encode(f'{word}') | |
| print(index) | |
| for i in indices_2019[index[1]]: | |
| outs.append(tokenizers_2019.decode(i)) | |
| # print(tokenizers_2019.decode(i)) | |
| return outs | |
| if model == '2020': | |
| index = tokenizers_2020.encode(f'{word}') | |
| print(index) | |
| for i in indices_2020[index[1]]: | |
| outs.append(tokenizers_2020.decode(i)) | |
| # print(tokenizers_2020.decode(i)) | |
| return outs | |
| if model == '2022': | |
| index = tokenizers_2022.encode(f'{word}') | |
| print(index) | |
| for i in indices_2022[index[1]]: | |
| outs.append(tokenizers_2022.decode(i)) | |
| # print(tokenizers_2022decode(i)) | |
| return outs | |
| # with gr.Blocks() as demo: | |
| # gr.Markdown(f" # {title}") | |
| # # gr.Markdown(f" ## {description1}") | |
| # # gr.Markdown(f"{description2}") | |
| # # gr.Markdown(f"{description3}") | |
| # with gr.Row(): | |
| # word = gr.Textbox(label="Word") | |
| # with gr.Row(): | |
| # greet_btn = gr.Button("Compute") | |
| # with gr.Row(): | |
| # greet_btn.click(fn=topk, inputs=[word,gr.Dropdown(models)], outputs=gr.outputs.Textbox()) | |
| # demo.launch() | |
| interface = gr.Interface(fn=topk, | |
| inputs=[gr.Textbox(label="Word"), gr.Dropdown(available_models)], | |
| outputs=gr.outputs.Textbox(), | |
| title = title, | |
| description = description, | |
| article = article) | |
| interface.launch() |