amiguel
/

lightining_studio

Text Classification

Model card Files Files and versions Community

amiguel commited on May 22

Commit

bdb3fd1

•

1 Parent(s): bfc8095

Create app.py

Files changed (1) hide show

app.py +86 -0

app.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import streamlit as st
+import torch
+from transformers import GPT2Tokenizer
+import pandas as pd
+# Load the tokenizer
+tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
+# Define the classification function
+def classify_review(text, model, tokenizer, device, max_length=None, pad_token_id=50256):
+    model.eval()
+    # Prepare inputs to the model
+    input_ids = tokenizer.encode(text)
+    supported_context_length = model.pos_emb.weight.shape[1]
+    # Truncate sequences if they are too long
+    input_ids = input_ids[:min(max_length, supported_context_length)]
+    # Pad sequences to the longest sequence
+    input_ids += [pad_token_id] * (max_length - len(input_ids))
+    input_tensor = torch.tensor(input_ids, device=device).unsqueeze(0)  # add batch dimension
+    # Model inference
+    with torch.no_grad():
+        logits = model(input_tensor)[:, -1, :]  # Logits of the last output token
+    predicted_label = torch.argmax(logits, dim=-1).item()
+    # Return the classified result
+    return "Proper Naming Notfcn" if predicted_label == 1 else "Wrong Naming Notificn"
+# Load the trained model from the local directory
+model_path = "clv__classifier_774M.pth"
+model = torch.load(model_path)
+model.eval()
+# Set the device to run the model on (GPU if available, else CPU)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
+# Streamlit app
+def main():
+    st.title("Text Classification App")
+    # Input options
+    input_option = st.radio("Select input option", ("Single Text Query", "Upload Table"))
+    if input_option == "Single Text Query":
+        # Single text query input
+        text_query = st.text_input("Enter text query")
+        if st.button("Classify"):
+            if text_query:
+                # Classify the text query
+                predicted_label = classify_review(text_query, model, tokenizer, device, max_length=train_dataset.max_length)
+                st.write("Predicted Label:")
+                st.write(predicted_label)
+            else:
+                st.warning("Please enter a text query.")
+    elif input_option == "Upload Table":
+        # Table upload
+        uploaded_file = st.file_uploader("Choose a file", type=["csv", "xlsx"])
+        if uploaded_file is not None:
+            # Read the uploaded file
+            if uploaded_file.name.endswith(".csv"):
+                df = pd.read_csv(uploaded_file)
+            else:
+                df = pd.read_excel(uploaded_file)
+            # Select the text column
+            text_column = st.selectbox("Select the text column", df.columns)
+            # Classify the texts in the selected column
+            predicted_labels = []
+            for text in df[text_column]:
+                predicted_label = classify_review(text, model, tokenizer, device, max_length=train_dataset.max_length)
+                predicted_labels.append(predicted_label)
+            # Add the predicted labels to the DataFrame
+            df["Predicted Label"] = predicted_labels
+            # Display the DataFrame with predicted labels
+            st.write(df)
+if __name__ == "__main__":
+    main()