import streamlit as st from transformers import AutoModelForSequenceClassification, AutoTokenizer import torch import numpy as np import fitz # PyMuPDF import pandas as pd import io # Load the model and tokenizer from Hugging Face model_name = "KevSun/Engessay_grading_ML" model = AutoModelForSequenceClassification.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) # Streamlit app st.title("Automated Scoring App") st.write("Enter your English essay below to predict scores from multiple dimensions:") # Replace text input with file uploader uploaded_file = st.file_uploader("Upload your PDF essay:", type=['pdf']) if uploaded_file: # Convert uploaded file to bytes for fitz pdf_bytes = uploaded_file.read() # Read and display PDF content with fitz.open(stream=pdf_bytes, filetype="pdf") as doc: text_content = "" for page in doc: text_content += page.get_text() # Display the extracted text st.write("Extracted text from PDF:") st.text_area("PDF Content", text_content, height=200, disabled=True) if st.button("Predict"): if uploaded_file: # Use the already extracted text_content for prediction # Tokenize input text with truncation inputs = tokenizer( text_content, return_tensors="pt", truncation=True, max_length=512 # Standard BERT/RoBERTa max length ) # After tokenization token_count = len(inputs['input_ids'][0]) if token_count == 512: st.warning("⚠️ The text was too long and has been truncated to fit the model's maximum length. This might affect the accuracy of the predictions.") # Get predictions from the model with torch.no_grad(): outputs = model(**inputs) # Extract and process predictions predictions = outputs.logits.squeeze() predicted_scores = predictions.numpy() # Scale the predictions scaled_scores = 2.25 * predicted_scores - 1.25 rounded_scores = [round(score * 2) / 2 for score in scaled_scores] # Create results DataFrame labels = ["cohesion", "syntax", "vocabulary", "phraseology", "grammar", "conventions"] results_dict = { 'Dimension': labels, 'Score': rounded_scores } df = pd.DataFrame(results_dict) # Display results in app st.write("Scores:") st.dataframe(df) # Save CSV locally local_path = "essay_scores.csv" df.to_csv(local_path, index=False) st.success(f"Results saved locally to {local_path}") # Create download button for CSV csv = df.to_csv(index=False) st.download_button( label="Download results as CSV", data=csv, file_name="essay_scores.csv", mime="text/csv" ) else: st.write("Please upload a PDF file to get scores.")