|
import streamlit as st |
|
import torch |
|
from transformers import GPT2Tokenizer |
|
import pandas as pd |
|
|
|
|
|
tokenizer = GPT2Tokenizer.from_pretrained("gpt2") |
|
|
|
|
|
def classify_review(text, model, tokenizer, device, max_length=None, pad_token_id=50256): |
|
model.eval() |
|
|
|
|
|
input_ids = tokenizer.encode(text) |
|
supported_context_length = model.pos_emb.weight.shape[1] |
|
|
|
|
|
input_ids = input_ids[:min(max_length, supported_context_length)] |
|
|
|
|
|
input_ids += [pad_token_id] * (max_length - len(input_ids)) |
|
input_tensor = torch.tensor(input_ids, device=device).unsqueeze(0) |
|
|
|
|
|
with torch.no_grad(): |
|
logits = model(input_tensor)[:, -1, :] |
|
predicted_label = torch.argmax(logits, dim=-1).item() |
|
|
|
|
|
return "Proper Naming Notfcn" if predicted_label == 1 else "Wrong Naming Notificn" |
|
|
|
|
|
model_path = "clv__classifier_774M.pth" |
|
model = torch.load(model_path) |
|
model.eval() |
|
|
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
model.to(device) |
|
|
|
|
|
def main(): |
|
st.title("Text Classification App") |
|
|
|
|
|
input_option = st.radio("Select input option", ("Single Text Query", "Upload Table")) |
|
|
|
if input_option == "Single Text Query": |
|
|
|
text_query = st.text_input("Enter text query") |
|
if st.button("Classify"): |
|
if text_query: |
|
|
|
predicted_label = classify_review(text_query, model, tokenizer, device, max_length=train_dataset.max_length) |
|
st.write("Predicted Label:") |
|
st.write(predicted_label) |
|
else: |
|
st.warning("Please enter a text query.") |
|
|
|
elif input_option == "Upload Table": |
|
|
|
uploaded_file = st.file_uploader("Choose a file", type=["csv", "xlsx"]) |
|
if uploaded_file is not None: |
|
|
|
if uploaded_file.name.endswith(".csv"): |
|
df = pd.read_csv(uploaded_file) |
|
else: |
|
df = pd.read_excel(uploaded_file) |
|
|
|
|
|
text_column = st.selectbox("Select the text column", df.columns) |
|
|
|
|
|
predicted_labels = [] |
|
for text in df[text_column]: |
|
predicted_label = classify_review(text, model, tokenizer, device, max_length=train_dataset.max_length) |
|
predicted_labels.append(predicted_label) |
|
|
|
|
|
df["Predicted Label"] = predicted_labels |
|
|
|
|
|
st.write(df) |
|
|
|
if __name__ == "__main__": |
|
main() |