ameythakur's picture
DEPRESSION-DETECTION
c061ce5 verified
# ==============================================================================
# PROJECT: DEPRESSION-DETECTION-USING-TWEETS
# AUTHORS: AMEY THAKUR & MEGA SATISH
# GITHUB (AMEY): https://github.com/Amey-Thakur
# GITHUB (MEGA): https://github.com/msatmod
# REPOSITORY: https://github.com/Amey-Thakur/DEPRESSION-DETECTION-USING-TWEETS
# RELEASE DATE: June 5, 2022
# LICENSE: MIT License
# DESCRIPTION: Utility for cleaning raw tweet data for analysis.
# ==============================================================================
import argparse
import warnings
import clean_utilities as CU
# Suppression of non-critical runtime warnings to ensure output clarity
warnings.filterwarnings("ignore")
def main():
"""
Primary execution routine for the tweet cleaning utility.
This script facilitates the transformation of raw unstructured text
into a standardized format, essential for downstream machine learning
inference and training.
"""
# Configuration of the command-line argument parser
parser = argparse.ArgumentParser(
description="Twitter Depression Detection: Text Cleaning Utility"
)
# Definition of the mandatory positional argument for input file path
parser.add_argument(
'filename',
help="Path to the raw text file containing the tweet to be sanitized"
)
# Parsing and validation of terminal arguments
args = parser.parse_args()
# Conditional logic to verify input availability before processing
if args.filename is not None:
print(f"Targeting file for preprocessing: {args.filename}")
try:
# Atomic read operation for the target text file
with open(args.filename, 'r', encoding='utf-8') as file:
raw_tweet = file.read()
# Invocation of the granular cleaning pipeline
# Methodology includes contraction expansion, tokenization, and lemmatization
print("Linguistic cleaning in progress...")
sanitized_tweet = CU.tweets_cleaner(raw_tweet)
# Persisting the sanitized result to local storage
with open('clean_tweet.txt', 'w', encoding='utf-8') as output_file:
print("Sanitization complete. Persistence target: clean_tweet.txt")
output_file.write(sanitized_tweet)
except FileNotFoundError:
print(f"Error: The specified file '{args.filename}' was not discovered.")
except Exception as e:
print(f"An unexpected analytical error occurred: {e}")
else:
print("Required input: Please specify a valid filename as a positional argument.")
if __name__ == '__main__':
main()