Spaces:
Build error
Build error
| import streamlit as st | |
| import cv2 | |
| import numpy as np | |
| from PIL import Image | |
| import pytesseract | |
| from transformers import TrOCRProcessor, VisionEncoderDecoderModel, pipeline | |
| import re | |
| # Load TrOCR model for handwriting recognition | |
| processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten") | |
| model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten") | |
| # Load pre-trained QA model for grading | |
| qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad") | |
| # Function to preprocess the image | |
| def preprocess_image(image_file): | |
| image = np.array(Image.open(image_file).convert("RGB")) | |
| gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) | |
| blurred = cv2.GaussianBlur(gray, (5, 5), 0) | |
| thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] | |
| preprocessed_image = cv2.cvtColor(thresh, cv2.COLOR_GRAY2RGB) | |
| return Image.fromarray(preprocessed_image) | |
| # Function to extract text using Tesseract OCR | |
| def extract_text_with_tesseract(image): | |
| return pytesseract.image_to_string(image) | |
| # Function to extract text using TrOCR | |
| def extract_text_with_trocr(image): | |
| pixel_values = processor(images=image, return_tensors="pt").pixel_values | |
| generated_ids = model.generate(pixel_values) | |
| extracted_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
| return extracted_text | |
| # Extract student name and roll number | |
| def extract_student_info(text): | |
| name = re.search(r"NAME\s*=\s*([\w\s]+)", text, re.IGNORECASE) | |
| roll_no = re.search(r"Roll\s*NO\s*=\s*(\d+)", text, re.IGNORECASE) | |
| student_name = name.group(1).strip() if name else "Unknown" | |
| roll_number = roll_no.group(1).strip() if roll_no else "Unknown" | |
| return student_name, roll_number | |
| # Extract questions from the text | |
| def extract_questions_from_text(text): | |
| questions = re.findall(r'(?:[^\n]*\?)', text) | |
| return questions | |
| # Grade answers | |
| def grade_answer(question, context): | |
| result = qa_pipeline(question=question, context=context) | |
| return result['score'], "Correct" if result['score'] > 0.5 else "Incorrect" | |
| # Streamlit App | |
| st.title("Handwritten Answer Sheet Grading System") | |
| st.write("Upload an image or handwritten file to process.") | |
| # Upload image | |
| uploaded_image = st.file_uploader("Upload Handwritten Image", type=["png", "jpg", "jpeg"]) | |
| if uploaded_image: | |
| st.image(uploaded_image, caption="Original Image", use_container_width=True) | |
| # Preprocess the image | |
| preprocessed_image = preprocess_image(uploaded_image) | |
| st.image(preprocessed_image, caption="Preprocessed Image", use_container_width=True) | |
| # Attempt text extraction with Tesseract | |
| st.subheader("Extracted Text:") | |
| tesseract_text = extract_text_with_tesseract(preprocessed_image) | |
| if len(tesseract_text.strip()) > 10: | |
| extracted_text = tesseract_text # Use Tesseract output if it seems valid | |
| else: | |
| extracted_text = extract_text_with_trocr(preprocessed_image) # Use TrOCR fallback | |
| st.text(extracted_text) | |
| # Extract student info | |
| student_name, roll_number = extract_student_info(extracted_text) | |
| st.subheader(f"Student Name: {student_name}") | |
| st.subheader(f"Roll No: {roll_number}") | |
| # Extract questions | |
| questions = extract_questions_from_text(extracted_text) | |
| st.subheader("Extracted Questions") | |
| for i, question in enumerate(questions): | |
| st.write(f"Q{i+1}: {question}") | |
| # Grade the answers | |
| st.subheader("Grading Results") | |
| for question in questions: | |
| score, feedback = grade_answer(question, extracted_text) | |
| st.write(f"**Question:** {question}") | |
| st.write(f"**Score:** {score:.2f}") | |
| st.write(f"**Feedback:** {feedback}") | |
| st.write("---") | |