|
|
""" |
|
|
Example inference script for TikTok Bot Detection Model |
|
|
""" |
|
|
|
|
|
import joblib |
|
|
import pandas as pd |
|
|
from sklearn.preprocessing import MinMaxScaler |
|
|
|
|
|
|
|
|
def load_model(model_path="TIKTOK_BOT_Detection_Model_v1.pkl"): |
|
|
"""Load the trained bot detection model""" |
|
|
return joblib.load(model_path) |
|
|
|
|
|
|
|
|
def prepare_features(account_data): |
|
|
""" |
|
|
Prepare account features for prediction |
|
|
|
|
|
Args: |
|
|
account_data (dict): Dictionary containing account features |
|
|
|
|
|
Returns: |
|
|
numpy.ndarray: Scaled features ready for prediction |
|
|
""" |
|
|
features = [ |
|
|
"IsPrivate", |
|
|
"IsVerified", |
|
|
"HasProfilePic", |
|
|
"FollowingCount", |
|
|
"FollowerCount", |
|
|
"HasInstagram", |
|
|
"HasYoutube", |
|
|
"HasBio", |
|
|
"HasLinkInBio", |
|
|
"HasPosts", |
|
|
"PostsCount", |
|
|
"FollowToFollowerRatio", |
|
|
] |
|
|
|
|
|
df = pd.DataFrame([account_data]) |
|
|
|
|
|
|
|
|
scaler = MinMaxScaler() |
|
|
df_scaled = scaler.fit_transform(df[features]) |
|
|
|
|
|
return df_scaled |
|
|
|
|
|
|
|
|
def predict_single_account(model, account_data): |
|
|
""" |
|
|
Predict if a single account is a bot |
|
|
|
|
|
Args: |
|
|
model: Trained sklearn model |
|
|
account_data (dict): Account features |
|
|
|
|
|
Returns: |
|
|
dict: Prediction results with probabilities |
|
|
""" |
|
|
features_scaled = prepare_features(account_data) |
|
|
|
|
|
prediction = model.predict(features_scaled)[0] |
|
|
probability = model.predict_proba(features_scaled)[0] |
|
|
|
|
|
return { |
|
|
"is_bot": bool(prediction), |
|
|
"bot_probability": float(probability[1]), |
|
|
"human_probability": float(probability[0]), |
|
|
"confidence": float(max(probability)), |
|
|
} |
|
|
|
|
|
|
|
|
def predict_batch(model, accounts_df): |
|
|
""" |
|
|
Predict for multiple accounts at once |
|
|
|
|
|
Args: |
|
|
model: Trained sklearn model |
|
|
accounts_df (pd.DataFrame): DataFrame with account features |
|
|
|
|
|
Returns: |
|
|
pd.DataFrame: Original data with predictions added |
|
|
""" |
|
|
features = [ |
|
|
"IsPrivate", |
|
|
"IsVerified", |
|
|
"HasProfilePic", |
|
|
"FollowingCount", |
|
|
"FollowerCount", |
|
|
"HasInstagram", |
|
|
"HasYoutube", |
|
|
"HasBio", |
|
|
"HasLinkInBio", |
|
|
"HasPosts", |
|
|
"PostsCount", |
|
|
"FollowToFollowerRatio", |
|
|
] |
|
|
|
|
|
scaler = MinMaxScaler() |
|
|
features_scaled = scaler.fit_transform(accounts_df[features]) |
|
|
|
|
|
predictions = model.predict(features_scaled) |
|
|
probabilities = model.predict_proba(features_scaled) |
|
|
|
|
|
accounts_df["is_bot"] = predictions |
|
|
accounts_df["bot_probability"] = probabilities[:, 1] |
|
|
accounts_df["human_probability"] = probabilities[:, 0] |
|
|
|
|
|
return accounts_df |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
print("Loading TikTok bot detection model...") |
|
|
model = load_model() |
|
|
print("✓ Model loaded successfully!\n") |
|
|
|
|
|
|
|
|
print("=" * 60) |
|
|
print("Example 1: Single Account Prediction") |
|
|
print("=" * 60) |
|
|
|
|
|
suspicious_account = { |
|
|
"IsPrivate": 0, |
|
|
"IsVerified": 0, |
|
|
"HasProfilePic": 1, |
|
|
"FollowingCount": 5000, |
|
|
"FollowerCount": 100, |
|
|
"HasInstagram": 0, |
|
|
"HasYoutube": 0, |
|
|
"HasBio": 0, |
|
|
"HasLinkInBio": 1, |
|
|
"HasPosts": 1, |
|
|
"PostsCount": 50, |
|
|
"FollowToFollowerRatio": 50.0, |
|
|
} |
|
|
|
|
|
result = predict_single_account(model, suspicious_account) |
|
|
|
|
|
print(f"Account Analysis:") |
|
|
print(f" Following: {suspicious_account['FollowingCount']}") |
|
|
print(f" Followers: {suspicious_account['FollowerCount']}") |
|
|
print(f" Posts: {suspicious_account['PostsCount']}") |
|
|
print(f"\nPrediction:") |
|
|
print(f" Is Bot: {result['is_bot']}") |
|
|
print(f" Bot Probability: {result['bot_probability']:.2%}") |
|
|
print(f" Confidence: {result['confidence']:.2%}") |
|
|
|
|
|
|
|
|
print(f"\n{'='*60}") |
|
|
print("Example 2: Batch Prediction") |
|
|
print("=" * 60) |
|
|
|
|
|
accounts = pd.DataFrame( |
|
|
[ |
|
|
{ |
|
|
"IsPrivate": 0, |
|
|
"IsVerified": 1, |
|
|
"HasProfilePic": 1, |
|
|
"FollowingCount": 500, |
|
|
"FollowerCount": 10000, |
|
|
"HasInstagram": 1, |
|
|
"HasYoutube": 1, |
|
|
"HasBio": 1, |
|
|
"HasLinkInBio": 1, |
|
|
"HasPosts": 1, |
|
|
"PostsCount": 200, |
|
|
"FollowToFollowerRatio": 0.05, |
|
|
}, |
|
|
{ |
|
|
"IsPrivate": 0, |
|
|
"IsVerified": 0, |
|
|
"HasProfilePic": 0, |
|
|
"FollowingCount": 8000, |
|
|
"FollowerCount": 50, |
|
|
"HasInstagram": 0, |
|
|
"HasYoutube": 0, |
|
|
"HasBio": 0, |
|
|
"HasLinkInBio": 1, |
|
|
"HasPosts": 1, |
|
|
"PostsCount": 10, |
|
|
"FollowToFollowerRatio": 160.0, |
|
|
}, |
|
|
] |
|
|
) |
|
|
|
|
|
results = predict_batch(model, accounts.copy()) |
|
|
|
|
|
print("\nResults:") |
|
|
for idx, row in results.iterrows(): |
|
|
print(f"\nAccount {idx + 1}:") |
|
|
print(f" Followers: {row['FollowerCount']}") |
|
|
print(f" Is Bot: {bool(row['is_bot'])}") |
|
|
print(f" Bot Probability: {row['bot_probability']:.2%}") |
|
|
|