| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | import argparse |
| | import warnings |
| | import clean_utilities as CU |
| |
|
| | |
| | warnings.filterwarnings("ignore") |
| |
|
| | def main(): |
| | """ |
| | Primary execution routine for the tweet cleaning utility. |
| | |
| | This script facilitates the transformation of raw unstructured text |
| | into a standardized format, essential for downstream machine learning |
| | inference and training. |
| | """ |
| | |
| | parser = argparse.ArgumentParser( |
| | description="Twitter Depression Detection: Text Cleaning Utility" |
| | ) |
| |
|
| | |
| | parser.add_argument( |
| | 'filename', |
| | help="Path to the raw text file containing the tweet to be sanitized" |
| | ) |
| |
|
| | |
| | args = parser.parse_args() |
| |
|
| | |
| | if args.filename is not None: |
| | print(f"Targeting file for preprocessing: {args.filename}") |
| | |
| | try: |
| | |
| | with open(args.filename, 'r', encoding='utf-8') as file: |
| | raw_tweet = file.read() |
| | |
| | |
| | |
| | print("Linguistic cleaning in progress...") |
| | sanitized_tweet = CU.tweets_cleaner(raw_tweet) |
| | |
| | |
| | with open('clean_tweet.txt', 'w', encoding='utf-8') as output_file: |
| | print("Sanitization complete. Persistence target: clean_tweet.txt") |
| | output_file.write(sanitized_tweet) |
| | |
| | except FileNotFoundError: |
| | print(f"Error: The specified file '{args.filename}' was not discovered.") |
| | except Exception as e: |
| | print(f"An unexpected analytical error occurred: {e}") |
| | |
| | else: |
| | print("Required input: Please specify a valid filename as a positional argument.") |
| |
|
| | if __name__ == '__main__': |
| | main() |
| |
|
| |
|
| |
|
| |
|