amiguel
/

lightining_studio

Text Classification

Model card Files Files and versions Community

lightining_studio / app.py

amiguel's picture

Create app.py

bdb3fd1 verified about 2 months ago

history blame contribute delete

No virus

3.12 kB

	import streamlit as st
	import torch
	from transformers import GPT2Tokenizer
	import pandas as pd

	# Load the tokenizer
	tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

	# Define the classification function
	def classify_review(text, model, tokenizer, device, max_length=None, pad_token_id=50256):
	model.eval()

	# Prepare inputs to the model
	input_ids = tokenizer.encode(text)
	supported_context_length = model.pos_emb.weight.shape[1]

	# Truncate sequences if they are too long
	input_ids = input_ids[:min(max_length, supported_context_length)]

	# Pad sequences to the longest sequence
	input_ids += [pad_token_id] * (max_length - len(input_ids))
	input_tensor = torch.tensor(input_ids, device=device).unsqueeze(0) # add batch dimension

	# Model inference
	with torch.no_grad():
	logits = model(input_tensor)[:, -1, :] # Logits of the last output token
	predicted_label = torch.argmax(logits, dim=-1).item()

	# Return the classified result
	return "Proper Naming Notfcn" if predicted_label == 1 else "Wrong Naming Notificn"

	# Load the trained model from the local directory
	model_path = "clv__classifier_774M.pth"
	model = torch.load(model_path)
	model.eval()

	# Set the device to run the model on (GPU if available, else CPU)
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model.to(device)

	# Streamlit app
	def main():
	st.title("Text Classification App")

	# Input options
	input_option = st.radio("Select input option", ("Single Text Query", "Upload Table"))

	if input_option == "Single Text Query":
	# Single text query input
	text_query = st.text_input("Enter text query")
	if st.button("Classify"):
	if text_query:
	# Classify the text query
	predicted_label = classify_review(text_query, model, tokenizer, device, max_length=train_dataset.max_length)
	st.write("Predicted Label:")
	st.write(predicted_label)
	else:
	st.warning("Please enter a text query.")

	elif input_option == "Upload Table":
	# Table upload
	uploaded_file = st.file_uploader("Choose a file", type=["csv", "xlsx"])
	if uploaded_file is not None:
	# Read the uploaded file
	if uploaded_file.name.endswith(".csv"):
	df = pd.read_csv(uploaded_file)
	else:
	df = pd.read_excel(uploaded_file)

	# Select the text column
	text_column = st.selectbox("Select the text column", df.columns)

	# Classify the texts in the selected column
	predicted_labels = []
	for text in df[text_column]:
	predicted_label = classify_review(text, model, tokenizer, device, max_length=train_dataset.max_length)
	predicted_labels.append(predicted_label)

	# Add the predicted labels to the DataFrame
	df["Predicted Label"] = predicted_labels

	# Display the DataFrame with predicted labels
	st.write(df)

	if __name__ == "__main__":
	main()