Spaces:

KeenWoo
/

Demential_Classification

Sleeping

App Files Files Community

KeenWoo commited on 28 days ago

Commit

f5a8bba

verified ·

1 Parent(s): 4146de9

Create utils.py

Browse files

Files changed (1) hide show

utils.py +186 -0

utils.py ADDED Viewed

	@@ -0,0 +1,186 @@

+# utils.py
+# Contains shared utility functions for text processing, audio transcription,
+# date/time handling, and image analysis that can be used by any assessment module.
+import os
+import re
+import time
+from datetime import datetime
+import cv2
+import nltk
+import numpy as np
+import pytz
+import whisper
+from scipy.io.wavfile import write as write_wav
+from shapely.geometry import Polygon
+# --- NLTK Setup ---
+LOCAL_NLTK_DATA_PATH = os.path.join(os.path.dirname(__file__), 'nltk_data')
+if LOCAL_NLTK_DATA_PATH not in nltk.data.path:
+    nltk.data.path.append(LOCAL_NLTK_DATA_PATH)
+def download_nltk_data_if_needed(resource_name, download_name):
+    """Checks if NLTK data exists and downloads it if necessary."""
+    try:
+        nltk.data.find(resource_name)
+    except LookupError:
+        print(f"Downloading NLTK resource '{download_name}'...")
+        if not os.path.exists(LOCAL_NLTK_DATA_PATH):
+            os.makedirs(LOCAL_NLTK_DATA_PATH)
+        nltk.download(download_name, download_dir=LOCAL_NLTK_DATA_PATH)
+        print("Download complete.")
+# Download necessary NLTK packages
+download_nltk_data_if_needed('tokenizers/punkt', 'punkt')
+download_nltk_data_if_needed('taggers/averaged_perceptron_tagger', 'averaged_perceptron_tagger')
+# --- Whisper Model Loading ---
+print("Loading Whisper transcription model...")
+model = whisper.load_model("small")
+print("Whisper model loaded.")
+def transcribe(audio):
+    """Transcribes audio using the Whisper model."""
+    if audio is None:
+        return ""
+    sample_rate, y = audio
+    temp_wav_path = "/tmp/temp_audio.wav"
+    write_wav(temp_wav_path, sample_rate, y)
+    result = model.transcribe(temp_wav_path, language="en")
+    return result["text"]
+# --- Date & Time Utilities ---
+TARGET_TIMEZONE = pytz.timezone("America/New_York")
+now_utc = datetime.now(pytz.utc)
+now = now_utc.astimezone(TARGET_TIMEZONE)
+def get_season(month):
+    """Determines the season in the Northern Hemisphere based on the month."""
+    if 3 <= month <= 5: return "spring"
+    elif 6 <= month <= 8: return "summer"
+    elif 9 <= month <= 11: return "fall"
+    else: return "winter"
+# --- Text Normalization and Cleaning Dictionaries & Functions ---
+WORD_TO_DIGIT = {
+    'zero': '0', 'one': '1', 'two': '2', 'three': '3', 'four': '4', 'five': '5',
+    'six': '6', 'seven': '7', 'eight': '8', 'nine': '9', 'ten': '10',
+    'eleven': '11', 'twelve': '12', 'thirteen': '13', 'fourteen': '14',
+    'fifteen': '15', 'sixteen': '16', 'seventeen': '17', 'eighteen': '18',
+    'nineteen': '19', 'twenty': '20', 'thirty': '30', 'thirty one': '31',
+    'ninety three': '93', 'eighty six': '86', 'seventy nine': '79',
+    'seventy two': '72', 'sixty five': '65'
+}
+ORDINAL_TO_DIGIT = {
+    'first': '1', 'second': '2', 'third': '3', 'fourth': '4', 'fifth': '5',
+    'sixth': '6', 'seventh': '7', 'eighth': '8', 'ninth': '9', 'tenth': '10',
+    'eleventh': '11', 'twelfth': '12', 'thirteenth': '13', 'fourteenth': '14',
+    'fifteenth': '15', 'sixteenth': '16', 'seventeenth': '17', 'eighteenth': '18',
+    'nineteenth': '19', 'twentieth': '20', 'thirtieth': '30', 'twenty-first': '21',
+    'twenty-second': '22', 'twenty-third': '23', 'twenty-fourth': '24',
+    'twenty-fifth': '25', 'twenty-sixth': '26', 'twenty-seventh': '27',
+    'twenty-eighth': '28', 'twenty-ninth': '29', 'thirty-first': '31',
+    'twenty first': '21', 'twenty second': '22', 'twenty third': '23',
+    'twenty fourth': '24', 'twenty fifth': '25', 'twenty sixth': '26',
+    'twenty seventh': '27', 'twenty eighth': '28', 'twenty ninth': '29',
+    'thirty first': '31', '1st': '1', '2nd': '2', '3rd': '3', '4th': '4',
+    '5th': '5', '6th': '6', '7th': '7', '8th': '8', '9th': '9', '10th': '10',
+    '11th': '11', '12th': '12', '13th': '13', '14th': '14', '15th': '15',
+    '16th': '16', '17th': '17', '18th': '18', '19th': '19', '20th': '20',
+    '21st': '21', '22nd': '22', '23rd': '23', '24th': '24', '25th': '25',
+    '26th': '26', '27th': '27', '28th': '28', '29th': '29', '30th': '30', '31st': '31'
+}
+def clean_text_answer(text: str) -> str:
+    """A robust function to clean all text inputs before scoring."""
+    if not text: return ""
+    text = text.lower()
+    text = re.sub(r'[^\w\s]', '', text)
+    text = " ".join(text.split())
+    return text
+def normalize_date_answer(text: str) -> str:
+    """Converts spoken ordinals and phrases into a clean numeric string for dates."""
+    if not text: return ""
+    clean_text = text.lower().strip()
+    if clean_text.startswith("the "):
+        clean_text = clean_text[4:]
+    for word, digit in ORDINAL_TO_DIGIT.items():
+        if word in clean_text:
+            clean_text = clean_text.replace(word, digit)
+            break
+    return re.sub(r'\D', '', clean_text)
+def clean_numeric_answer(text: str) -> str:
+    """Removes all non-digit characters from a string."""
+    return re.sub(r'\D', '', text or "")
+def normalize_numeric_words(text: str) -> str:
+    """Converts spoken number words in a string to digits."""
+    if not text: return ""
+    text = text.lower().strip()
+    for word, digit in WORD_TO_DIGIT.items():
+        text = re.sub(r'\b' + re.escape(word) + r'\b', digit, text)
+    return text
+# --- Generic Scoring Utilities ---
+def score_keyword_match(expected, user_input):
+    """Checks if any expected keywords (separated by '|') are in the user's answer."""
+    if not expected or not user_input:
+        return 0
+    cleaned_user = clean_text_answer(user_input)
+    possible_answers = expected.split('|')
+    for ans in possible_answers:
+        cleaned_ans = clean_text_answer(ans)
+        if cleaned_ans in cleaned_user:
+            return 1
+    return 0
+def score_sentence_structure(raw_user_input):
+    """Checks for noun/verb in the original, un-cleaned text using NLTK."""
+    try:
+        text = nltk.word_tokenize(raw_user_input or "")
+        if len(text) < 2: return 0
+        pos_tags = nltk.pos_tag(text)
+        has_noun = any(tag.startswith('NN') for _, tag in pos_tags)
+        has_verb = any(tag.startswith('VB') for _, tag in pos_tags)
+        return 1 if has_noun and has_verb else 0
+    except Exception as e:
+        print(f"[NLTK ERROR] Failed to parse sentence: {e}")
+        return 0
+def score_drawing(image_path, expected_sides):
+    """Scores a drawing by finding the number of sides of the smallest significant polygon."""
+    if not image_path or not os.path.exists(image_path):
+        return 0, 0
+    try:
+        img = cv2.imread(image_path)
+        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        _, thresh = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY_INV)
+        contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+        significant_contours = [c for c in contours if cv2.contourArea(c) > 500]
+        if len(significant_contours) < 3:
+            return 0, 0 # Not enough shapes to form a valid intersection
+        min_area = float('inf')
+        sides_of_smallest_shape = 0
+        for contour in significant_contours:
+            area = cv2.contourArea(contour)
+            if area < min_area:
+                min_area = area
+                epsilon = 0.04 * cv2.arcLength(contour, True)
+                approx = cv2.approxPolyDP(contour, epsilon, True)
+                sides_of_smallest_shape = len(approx)
+        score = 1 if sides_of_smallest_shape == expected_sides else 0
+        return score, sides_of_smallest_shape
+    except Exception as e:
+        print(f"[OpenCV ERROR] Failed to process image: {e}")
+        return 0, 0