Spaces:
Sleeping
Sleeping
# FeatureExtraction.py | |
# Purpose: Script to extract additional features from chat transcripts for psychological assessments. | |
# Imports | |
import pandas as pd | |
import numpy as np | |
from textblob import TextBlob | |
# Function to calculate sentiment polarity | |
def get_sentiment_polarity(text): | |
return TextBlob(text).sentiment.polarity | |
# Function to calculate sentiment subjectivity | |
def get_sentiment_subjectivity(text): | |
return TextBlob(text).sentiment.subjectivity | |
# Load data with embeddings | |
input_datapath = "data/chat_transcripts_with_embeddings.csv" | |
output_datapath = "data/chat_transcripts_with_features.csv" | |
df = pd.read_csv(input_datapath) | |
# Feature Extraction | |
# Example: Extracting sentiment polarity and subjectivity | |
df['sentiment_polarity'] = df['chathistory'].apply(get_sentiment_polarity) | |
df['sentiment_subjectivity'] = df['chathistory'].apply(get_sentiment_subjectivity) | |
# TODO: Add any additional feature extraction relevant to your study here. | |
# Example: df['feature_name'] = df['column'].apply(your_custom_function) | |
# Save the data with additional features | |
df.to_csv(output_datapath, index=False) | |
print(f"Data with additional features saved to {output_datapath}") | |
# Note to Users: | |
# - Ensure that 'input_datapath' points to your data file with embeddings. | |
# - This script uses TextBlob for sentiment analysis. Install it using 'pip install textblob' if not already installed. | |
# - You can add more feature extraction functions as needed for your specific research requirements. | |