| import fasttext |
| import pandas as pd |
|
|
|
|
| |
| try: |
| data = pd.read_csv('sms_spam_phishing_dataset.csv', encoding='utf-8') |
| except UnicodeDecodeError: |
| data = pd.read_csv('sms_spam_phishing_dataset.csv', encoding='ISO-8859-1') |
|
|
|
|
| |
| data['ft_format'] = data.apply(lambda row: f'__label__{row["Label"]} {row["Message"]}', axis=1) |
|
|
| |
| data['ft_format'].to_csv('ft_data.txt', index=False, header=False) |
|
|
| |
| model = fasttext.train_supervised(input='ft_data.txt', epoch=25, lr=1.0, wordNgrams=2) |
|
|
| |
| model.save_model('ots_sms_model_v1.1.bin') |
|
|
|
|