Spaces:

nolzZ
/

dashbordclass

Sleeping

File size: 8,069 Bytes

4fc8e0c

import os
import gspread
import pandas as pd
from google.oauth2.service_account import Credentials
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

# ========== STEP 1: ตั้งค่า path ==========
BASE_DIR = os.path.dirname(__file__)
CRED_PATH = os.path.join(BASE_DIR, "credentials.json")

DATA_DIR = os.path.join(BASE_DIR, "Senti_real", "Data")
SENTIMENT_DIR = os.path.join(BASE_DIR, "Senti_real", "Sentiment")

os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(SENTIMENT_DIR, exist_ok=True)

# ========== STEP 2: เชื่อมต่อ Google Sheet ==========
scopes = ['https://www.googleapis.com/auth/spreadsheets']
credentials = Credentials.from_service_account_file(CRED_PATH, scopes=scopes)
client = gspread.authorize(credentials)

sheet_url = "https://docs.google.com/spreadsheets/d/1M3hwj9BRg4diW8_7rdk0OskLndnEUscdn_CWM2vNj_0/edit?usp=sharing"
worksheet = client.open_by_url(sheet_url).get_worksheet(0)
data = worksheet.get_all_records()
df = pd.DataFrame(data)

# ========== STEP 3: Preprocess ==========
column_mapping = {
    'ประทับเวลา': 'timestamp',
    'เพศ': 'gender',
    'อายุ (ใส่เป็นตัวเลข)': 'age',
    'บทบาทของคุณในการใช้งาน แอปพลิเคชัน Check PD': 'user_role',
    'ผลลัพธ์ที่ได้จาก แอปพลิเคชัน Check PD': 'app_result',
    'สถานที่ท่านเข้ารับบริการ ( อื่น ๆ โปรดระบุจังหวัด )': 'service_location',
    'วันที่เข้ารับบริการ  ': 'service_date',
    'แอปพลิเคชัน CheckPD ใช้งานง่ายและไม่ซับซ้อน ': 'app_usability',
    'ข้อมูลและคำแนะนำในแอปพลิเคชันมีความเข้าใจง่าย  ': 'app_info_clarity',
    'คุณรู้สึกว่าแอปพลิเคชันให้ผลวิเคราะห์ที่แม่นยำและน่าเชื่อถือ': 'app_accuracy',
    'แอปช่วยให้คุณสามารถติดตามหรือวางแผนดูแลอาการได้ดีขึ้น': 'app_support_effectiveness',
    'โดยรวมแล้วคุณพึงพอใจกับการใช้งานแอปพลิเคชัน CheckPD มากน้อยเพียงใด  ': 'app_overall_satisfaction',
    'การให้บริการของเจ้าหน้าที่': 'staff_service_quality',
    'ความรวดเร็วในการให้บริการของเจ้าหน้าที่': 'staff_response_speed',
    'เจ้าหน้าที่สามารถให้คำแนะนำหรือข้อมูลเบื้องต้นได้ชัดเจน': 'staff_info_clarity',
    'คุณรู้สึกสะดวกและประทับใจเมื่อขอความช่วยเหลือ  ': 'staff_comfort',
    'คุณพึงพอใจต่อการบริการจากเจ้าหน้าที่ทางการแพทย์หรือไม่ ': 'staff_overall_satisfaction',
    'คุณรู้สึกอย่างไรเมื่อใช้งานแอปพลิเคชัน CheckPD ครั้งแรก?  ': 'user_feeling_first_use',
    'บริการของเจ้าหน้าที่ทำให้คุณรู้สึกอย่างไร?': 'staff_emotion_feedback',
    'สิ่งใดในแอปพลิเคชันหรืองานบริการที่คุณคิดว่าควรปรับปรุง?  ': 'improvement_suggestions',
    'แบบทดสอบที่ได้ทำในแอปพลิเคชัน Check PD ': 'app_quiz_used',
    'ปัญหาการใช้งานแอปพลิเคชัน Check PD  ': 'app_issue_encountered',
    '  ความพร้อมในการให้บริการของเจ้าหน้าที่  ': 'staff_readiness',
}
df.rename(columns=column_mapping, inplace=True)

# แยกแบบทดสอบ
df_split = df['app_quiz_used'].str.split(',\s*', expand=True)
quiz_column_names = {
    "ประวัติส่วนตัว (ชื่อ-นามสกุล เลขบัตรประจำตัวประชาชน ที่อยู่)": "personal_info",
    "การออกเสียง 'อาาา' (Voice Test - Ahhh)": "voice_ahhh",
    "การออกเสียง 'ยายพาหลานไปซื้อขนมที่ตลาด' (Voice Test - ยายพาหลานไปซื้อขนมที่ตลาด)": "voice_sentence",
    "อาการสั่นขณะนั่งนิ่ง (Resting Tremor)": "resting_tremor",
    "อาการสั่นขณะยกแขน (Postural Tremor)": "postural_tremor",
    "แตะสลับนิ้วขวา (Dual Tap - Right)": "dual_tap_right",
    "แตะสลับนิ้วซ้าย (Dual Tap - Left)": "dual_tap_left",
    "การขยายวงกลม - ขวา (Pinch to Size - Right)": "pinch_size_right",
    "การขยายวงกลม - ซ้าย (Pinch to Size - Left)": "pinch_size_left",
    "การเดิน (Gait walk)": "gait_walk",
    "การทรงตัวขณะยืน (Balance)": "balance",
    "ตอบคำถาม 20 ข้อ (Questionnaire)": "questionnaire"
}
for col in quiz_column_names.values():
    df[col] = 0
for idx, row in df_split.iterrows():
    for val in row.dropna():
        en_col = quiz_column_names.get(val.strip())
        if en_col:
            df.at[idx, en_col] = 1
df.drop(columns=['app_quiz_used'], inplace=True)

# ========== STEP 4: บันทึกข้อมูลหลัง Preprocess ==========
csv_path_clean = os.path.join(DATA_DIR, "checkpd_data.csv")
df.to_csv(csv_path_clean, index=False)

# ========== STEP 5: Sentiment Analysis ==========
text_columns = ['user_feeling_first_use', 'staff_emotion_feedback', 'improvement_suggestions']
df_sentiment = df[['service_location', 'user_feeling_first_use', 'staff_emotion_feedback', 'improvement_suggestions']].copy()



model_name = "phoner45/wangchan-sentiment-thai-text-model"
tokenizer = AutoTokenizer.from_pretrained(model_name , use_fast=False)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
sentiment_pipe = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)

def analyze_sentiment(series):
    labels = []
    scores = []
    for text in series.fillna("").astype(str):
        if text.strip() == "":
            labels.append("NA")
            scores.append(None)
        else:
            try:
                result = sentiment_pipe(text[:512])[0]
                labels.append(result['label'])
                scores.append(round(result['score'], 4))
            except:
                labels.append("ERROR")
                scores.append(None)
    return labels, scores

for col in text_columns:
    label_col = col + "_sentiment"
    score_col = col + "_score"
    labels, scores = analyze_sentiment(df_sentiment[col])
    df_sentiment[label_col] = labels
    df_sentiment[score_col] = scores

# ========== STEP 6: บันทึกผลลัพธ์ ==========
output_path = os.path.join(SENTIMENT_DIR, "checkpd_sentiment.csv")
df_sentiment.to_csv(output_path, index=False)

print(f"\n✅ วิเคราะห์ Sentiment เสร็จสมบูรณ์! บันทึกที่: {output_path}")