| import gradio as gr |
| import numpy as np |
| import torch |
| from transformers import ( |
| AutoTokenizer, |
| AutoModel, |
| AutoModelForSequenceClassification |
| ) |
| from scipy.special import softmax |
|
|
| |
| |
| |
|
|
| bert_model_name = "bert-base-uncased" |
| tokenizer = AutoTokenizer.from_pretrained(bert_model_name) |
| bert_model = AutoModel.from_pretrained(bert_model_name) |
| bert_model.eval() |
|
|
| sentiment_model_name = "cardiffnlp/twitter-roberta-base-sentiment" |
| sentiment_tokenizer = AutoTokenizer.from_pretrained(sentiment_model_name) |
| sentiment_model = AutoModelForSequenceClassification.from_pretrained(sentiment_model_name) |
| sentiment_model.eval() |
|
|
|
|
| |
| |
| |
| |
| |
|
|
| def nlp_encode_sentence(values): |
|
|
| feature_rows = [] |
|
|
| for row in values: |
| sentence = row[0] |
|
|
| inputs = tokenizer( |
| sentence, |
| return_tensors="pt", |
| truncation=True, |
| padding=True |
| ) |
|
|
| with torch.no_grad(): |
| outputs = bert_model(**inputs) |
|
|
| cls_embedding = outputs.last_hidden_state[:, 0, :].squeeze().numpy() |
|
|
| embedding_mean = float(np.mean(cls_embedding)) |
| embedding_median = float(np.median(cls_embedding)) |
| embedding_std = float(np.std(cls_embedding)) |
| embedding_min = float(np.min(cls_embedding)) |
| embedding_max = float(np.max(cls_embedding)) |
|
|
| sentiment_inputs = sentiment_tokenizer( |
| sentence, |
| return_tensors="pt", |
| truncation=True, |
| padding=True |
| ) |
|
|
| with torch.no_grad(): |
| sentiment_outputs = sentiment_model(**sentiment_inputs) |
|
|
| probs = softmax(sentiment_outputs.logits.numpy()[0]) |
| sentiment_score = float(probs[2] - probs[0]) |
|
|
| feature_rows.append([ |
| embedding_mean, |
| embedding_median, |
| embedding_std, |
| embedding_min, |
| embedding_max, |
| sentiment_score |
| ]) |
|
|
| return feature_rows |
|
|
|
|
| |
| |
| |
|
|
| with gr.Blocks() as demo: |
| gr.Markdown("### NLP Encoder") |
|
|
| input_data = gr.Dataframe( |
| headers=["value"], |
| datatype=["str"], |
| type="array" |
| ) |
|
|
| output_data = gr.Dataframe( |
| headers=[ |
| "embedding_mean", |
| "embedding_median", |
| "embedding_std", |
| "embedding_min", |
| "embedding_max", |
| "sentiment_score" |
| ], |
| type="array" |
| ) |
|
|
| btn = gr.Button("Run") |
|
|
| btn.click( |
| fn=nlp_encode_sentence, |
| inputs=input_data, |
| outputs=output_data |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|