nlp_use_case / app.py
safoinme's picture
Upload folder using huggingface_hub
91489bf
raw
history blame
3.29 kB
# Apache Software License 2.0
#
# Copyright (c) ZenML GmbH 2023. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import click
import numpy as np
import os
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from os.path import dirname
import gradio as gr
@click.command()
@click.option("--tokenizer_name_or_path", default="tokenizer", help="Name or the path of the tokenizer.")
@click.option("--model_name_or_path", default="model", help="Name or the path of the model.")
@click.option(
"--labels", default="Negative,Positive", help="Comma-separated list of labels."
)
@click.option(
"--title", default="ZenML NLP Use-Case", help="Title of the Gradio interface."
)
@click.option(
"--description",
default="Sentiment Analyzer",
help="Description of the Gradio interface.",
)
@click.option(
"--interpretation",
default="default",
help="Interpretation mode for the Gradio interface.",
)
@click.option(
"--examples",
default="This is an awesome journey, I love it!",
help="Comma-separated list of examples to show in the Gradio interface.",
)
def sentiment_analysis(
tokenizer_name_or_path, model_name_or_path, labels, title, description, interpretation, examples
):
labels = labels.split(",")
examples = [examples]
def preprocess(text):
new_text = []
for t in text.split(" "):
t = "@user" if t.startswith("@") and len(t) > 1 else t
t = "http" if t.startswith("http") else t
new_text.append(t)
return " ".join(new_text)
def softmax(x):
e_x = np.exp(x - np.max(x))
return e_x / e_x.sum(axis=0)
def analyze_text(text):
model_path = f"{dirname(__file__)}/{model_name_or_path}/"
print(f"Loading model from {model_path}")
tokenizer_path = f"{dirname(__file__)}/{tokenizer_name_or_path}/"
print(f"Loading tokenizer from {tokenizer_path}")
tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)
text = preprocess(text)
encoded_input = tokenizer(text, return_tensors="pt")
output = model(**encoded_input)
scores_ = output[0][0].detach().numpy()
scores_ = softmax(scores_)
scores = {l: float(s) for (l, s) in zip(labels, scores_)}
return scores
demo = gr.Interface(
fn=analyze_text,
inputs=[gr.TextArea("Write your text or tweet here", label="Analyze Text")],
outputs=["label"],
title=title,
description=description,
interpretation=interpretation,
examples=examples,
)
demo.launch(share=True, debug=True)
if __name__ == "__main__":
sentiment_analysis()