Spaces:

Emily666666
/

Questions_Classification

Sleeping

App Files Files Community

Questions_Classification / app.py

Emily666666

Update app.py

9f7bad0 verified about 1 year ago

raw

history blame contribute delete

2.81 kB

	import streamlit as st
	from transformers import pipeline
	import re

	# Function to remove strange characters from the input text
	def clean_text(text):
	# Only keep alphanumeric characters and some punctuation
	return re.sub(r"[^a-zA-Z0-9\s.,!?']", "", text)

	# Load the text summarization pipeline
	try:
	summarizer = pipeline("summarization", model="syndi-models/titlewave-t5-base")
	summarizer_loaded = True
	except ValueError as e:
	st.error(f"Error loading summarization model: {e}")
	summarizer_loaded = False

	# Load the Question classification pipeline
	model_name = "Emily666666/bert-base-cased-news-category-test"
	try:
	classifier = pipeline("text-classification", model=model_name, return_all_scores=True)
	classifier_loaded = True
	except ValueError as e:
	st.error(f"Error loading classification model: {e}")
	classifier_loaded = False

	# Dictionary to map numerical labels to real labels
	label_mapping = {
	0: "Society & Culture",
	1: "Science & Mathematics",
	2: "Health",
	3: "Education & Reference",
	4: "Computers & Internet",
	5: "Sports",
	6: "Business & Finance",
	7: "Entertainment & Music",
	8: "Family & Relationships",
	9: "Politics & Government"
	}

	# Streamlit app title
	st.title("Question Rephrase and Classification")

	# Input text for summarization and classification
	text_input = st.text_area("Enter long question to rephrase and classify:", "")

	if st.button("Process"):
	if summarizer_loaded and classifier_loaded and text_input:
	try:
	# Clean the text input
	cleaned_text = clean_text(text_input)

	# Perform text summarization
	summary = summarizer(cleaned_text, max_length=130, min_length=30, do_sample=False)
	summarized_text = summary[0]['summary_text']
	except Exception as e:
	st.error(f"Error during summarization: {e}")
	summarized_text = ""

	if summarized_text:
	try:
	# Perform question classification on the summarized text
	results = classifier(summarized_text)[0]
	# Find the category with the highest score
	max_score = max(results, key=lambda x: x['score'])
	predicted_label_index = int(max_score['label'].split('_')[-1]) # Assuming labels are like "LABEL_0", "LABEL_1", etc.
	predicted_label = label_mapping[predicted_label_index]
	st.write("Rephrased Text:", summarized_text)
	st.write("Category:", predicted_label)
	st.write("Score:", max_score['score'])
	except Exception as e:
	st.error(f"Error during classification: {e}")
	else:
	st.warning("Please enter text to process and ensure both models are loaded.")