devhem commited on
Commit
cace677
·
verified ·
1 Parent(s): f3a73ca

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -0
app.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
3
+ import torch
4
+ import numpy as np
5
+ import fitz # PyMuPDF
6
+ import pandas as pd
7
+ import io
8
+
9
+ # Load the model and tokenizer from Hugging Face
10
+ model_name = "KevSun/Engessay_grading_ML"
11
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
12
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
13
+
14
+ # Streamlit app
15
+ st.title("Automated Scoring App")
16
+ st.write("Enter your English essay below to predict scores from multiple dimensions:")
17
+
18
+ # Replace text input with file uploader
19
+ uploaded_file = st.file_uploader("Upload your PDF essay:", type=['pdf'])
20
+
21
+ if uploaded_file:
22
+ # Convert uploaded file to bytes for fitz
23
+ pdf_bytes = uploaded_file.read()
24
+
25
+ # Read and display PDF content
26
+ with fitz.open(stream=pdf_bytes, filetype="pdf") as doc:
27
+ text_content = ""
28
+ for page in doc:
29
+ text_content += page.get_text()
30
+
31
+ # Display the extracted text
32
+ st.write("Extracted text from PDF:")
33
+ st.text_area("PDF Content", text_content, height=200, disabled=True)
34
+
35
+ if st.button("Predict"):
36
+ if uploaded_file:
37
+ # Use the already extracted text_content for prediction
38
+ # Tokenize input text with truncation
39
+ inputs = tokenizer(
40
+ text_content,
41
+ return_tensors="pt",
42
+ truncation=True,
43
+ max_length=512 # Standard BERT/RoBERTa max length
44
+ )
45
+
46
+ # After tokenization
47
+ token_count = len(inputs['input_ids'][0])
48
+ if token_count == 512:
49
+ st.warning("⚠️ The text was too long and has been truncated to fit the model's maximum length. This might affect the accuracy of the predictions.")
50
+
51
+ # Get predictions from the model
52
+ with torch.no_grad():
53
+ outputs = model(**inputs)
54
+
55
+ # Extract and process predictions
56
+ predictions = outputs.logits.squeeze()
57
+ predicted_scores = predictions.numpy()
58
+
59
+ # Scale the predictions
60
+ scaled_scores = 2.25 * predicted_scores - 1.25
61
+ rounded_scores = [round(score * 2) / 2 for score in scaled_scores]
62
+
63
+ # Create results DataFrame
64
+ labels = ["cohesion", "syntax", "vocabulary", "phraseology", "grammar", "conventions"]
65
+ results_dict = {
66
+ 'Dimension': labels,
67
+ 'Score': rounded_scores
68
+ }
69
+ df = pd.DataFrame(results_dict)
70
+
71
+ # Display results in app
72
+ st.write("Scores:")
73
+ st.dataframe(df)
74
+
75
+ # Save CSV locally
76
+ local_path = "essay_scores.csv"
77
+ df.to_csv(local_path, index=False)
78
+ st.success(f"Results saved locally to {local_path}")
79
+
80
+ # Create download button for CSV
81
+ csv = df.to_csv(index=False)
82
+ st.download_button(
83
+ label="Download results as CSV",
84
+ data=csv,
85
+ file_name="essay_scores.csv",
86
+ mime="text/csv"
87
+ )
88
+ else:
89
+ st.write("Please upload a PDF file to get scores.")