import streamlit as st from transformers import pipeline import re # Function to remove strange characters from the input text def clean_text(text): # Only keep alphanumeric characters and some punctuation return re.sub(r"[^a-zA-Z0-9\s.,!?']", "", text) # Load the text summarization pipeline try: summarizer = pipeline("summarization", model="syndi-models/titlewave-t5-base") summarizer_loaded = True except ValueError as e: st.error(f"Error loading summarization model: {e}") summarizer_loaded = False # Load the Question classification pipeline model_name = "Emily666666/bert-base-cased-news-category-test" try: classifier = pipeline("text-classification", model=model_name, return_all_scores=True) classifier_loaded = True except ValueError as e: st.error(f"Error loading classification model: {e}") classifier_loaded = False # Dictionary to map numerical labels to real labels label_mapping = { 0: "Society & Culture", 1: "Science & Mathematics", 2: "Health", 3: "Education & Reference", 4: "Computers & Internet", 5: "Sports", 6: "Business & Finance", 7: "Entertainment & Music", 8: "Family & Relationships", 9: "Politics & Government" } # Streamlit app title st.title("Question Rephrase and Classification") # Input text for summarization and classification text_input = st.text_area("Enter long question to rephrase and classify:", "") if st.button("Process"): if summarizer_loaded and classifier_loaded and text_input: try: # Clean the text input cleaned_text = clean_text(text_input) # Perform text summarization summary = summarizer(cleaned_text, max_length=130, min_length=30, do_sample=False) summarized_text = summary[0]['summary_text'] except Exception as e: st.error(f"Error during summarization: {e}") summarized_text = "" if summarized_text: try: # Perform question classification on the summarized text results = classifier(summarized_text)[0] # Find the category with the highest score max_score = max(results, key=lambda x: x['score']) predicted_label_index = int(max_score['label'].split('_')[-1]) # Assuming labels are like "LABEL_0", "LABEL_1", etc. predicted_label = label_mapping[predicted_label_index] st.write("Rephrased Text:", summarized_text) st.write("Category:", predicted_label) st.write("Score:", max_score['score']) except Exception as e: st.error(f"Error during classification: {e}") else: st.warning("Please enter text to process and ensure both models are loaded.")