Spaces:

devhem
/

AI-grading-system

Runtime error

App Files Files Community

devhem commited on Nov 25, 2024

Commit

cace677

verified ·

1 Parent(s): f3a73ca

Create app.py

Browse files

Files changed (1) hide show

app.py +89 -0

app.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import streamlit as st
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+import torch
+import numpy as np
+import fitz  # PyMuPDF
+import pandas as pd
+import io
+# Load the model and tokenizer from Hugging Face
+model_name = "KevSun/Engessay_grading_ML"
+model = AutoModelForSequenceClassification.from_pretrained(model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+# Streamlit app
+st.title("Automated Scoring App")
+st.write("Enter your English essay below to predict scores from multiple dimensions:")
+# Replace text input with file uploader
+uploaded_file = st.file_uploader("Upload your PDF essay:", type=['pdf'])
+if uploaded_file:
+    # Convert uploaded file to bytes for fitz
+    pdf_bytes = uploaded_file.read()
+    # Read and display PDF content
+    with fitz.open(stream=pdf_bytes, filetype="pdf") as doc:
+        text_content = ""
+        for page in doc:
+            text_content += page.get_text()
+    # Display the extracted text
+    st.write("Extracted text from PDF:")
+    st.text_area("PDF Content", text_content, height=200, disabled=True)
+if st.button("Predict"):
+    if uploaded_file:
+        # Use the already extracted text_content for prediction
+        # Tokenize input text with truncation
+        inputs = tokenizer(
+            text_content,
+            return_tensors="pt",
+            truncation=True,
+            max_length=512  # Standard BERT/RoBERTa max length
+        )
+        # After tokenization
+        token_count = len(inputs['input_ids'][0])
+        if token_count == 512:
+            st.warning("⚠️ The text was too long and has been truncated to fit the model's maximum length. This might affect the accuracy of the predictions.")
+        # Get predictions from the model
+        with torch.no_grad():
+            outputs = model(**inputs)
+        # Extract and process predictions
+        predictions = outputs.logits.squeeze()
+        predicted_scores = predictions.numpy()
+        # Scale the predictions
+        scaled_scores = 2.25 * predicted_scores - 1.25
+        rounded_scores = [round(score * 2) / 2 for score in scaled_scores]
+        # Create results DataFrame
+        labels = ["cohesion", "syntax", "vocabulary", "phraseology", "grammar", "conventions"]
+        results_dict = {
+            'Dimension': labels,
+            'Score': rounded_scores
+        }
+        df = pd.DataFrame(results_dict)
+        # Display results in app
+        st.write("Scores:")
+        st.dataframe(df)
+        # Save CSV locally
+        local_path = "essay_scores.csv"
+        df.to_csv(local_path, index=False)
+        st.success(f"Results saved locally to {local_path}")
+        # Create download button for CSV
+        csv = df.to_csv(index=False)
+        st.download_button(
+            label="Download results as CSV",
+            data=csv,
+            file_name="essay_scores.csv",
+            mime="text/csv"
+        )
+    else:
+        st.write("Please upload a PDF file to get scores.")