|
import requests |
|
import json |
|
import pprint |
|
import time |
|
import sys |
|
import os |
|
import numpy as np |
|
|
|
def check_internet_connectivity(): |
|
"""Check if we can connect to the internet""" |
|
print("Testing internet connectivity...") |
|
try: |
|
response = requests.get("https://huggingface.co", timeout=5) |
|
print(f"Connection to huggingface.co: Status {response.status_code}") |
|
return response.status_code == 200 |
|
except Exception as e: |
|
print(f"Error connecting to huggingface.co: {str(e)}") |
|
return False |
|
|
|
def check_model_repository(): |
|
"""Check if we can connect to the specific model repository""" |
|
print("Testing connection to model repository...") |
|
try: |
|
|
|
url = "https://huggingface.co/allenai/longformer-base-4096" |
|
response = requests.get(url, timeout=5) |
|
print(f"Connection to model repository: Status {response.status_code}") |
|
return response.status_code == 200 |
|
except Exception as e: |
|
print(f"Error connecting to model repository: {str(e)}") |
|
return False |
|
|
|
def check_debug_endpoint(api_url): |
|
"""Check the debug endpoint for diagnostic information""" |
|
print(f"Checking debug endpoint at {api_url.replace('/predict', '/debug')}...") |
|
try: |
|
response = requests.get(api_url.replace("/predict", "/debug"), timeout=10) |
|
if response.status_code == 200: |
|
debug_info = response.json() |
|
print("Debug information retrieved:") |
|
print(f"- API Status: {debug_info.get('api_status', 'Unknown')}") |
|
print(f"- Model Loaded: {debug_info.get('model_loaded', 'Unknown')}") |
|
print(f"- Cache Directory Exists: {debug_info.get('model_cache_exists', 'Unknown')}") |
|
print(f"- Temp Directory Writable: {debug_info.get('tmp_directory_writable', 'Unknown')}") |
|
|
|
|
|
internet_check = debug_info.get('internet_connectivity', {}) |
|
print(f"- Server Internet Connectivity: {internet_check.get('status', 'Unknown')}") |
|
if internet_check.get('message'): |
|
print(f" Message: {internet_check.get('message')}") |
|
|
|
|
|
tokenizer_test = debug_info.get('tokenizer_test', {}) |
|
print(f"- Tokenizer Test: {tokenizer_test.get('status', 'Unknown')}") |
|
if tokenizer_test.get('message'): |
|
print(f" Message: {tokenizer_test.get('message')}") |
|
|
|
|
|
disk_space = debug_info.get('disk_space', {}) |
|
if disk_space.get('status') == 'ok': |
|
print(f"- Disk Space: Total: {disk_space.get('total_gb', 0):.2f} GB, Used: {disk_space.get('used_gb', 0):.2f} GB, Free: {disk_space.get('free_gb', 0):.2f} GB ({disk_space.get('percent_used', 0):.1f}% used)") |
|
|
|
return debug_info |
|
else: |
|
print(f"Error accessing debug endpoint: Status {response.status_code}") |
|
print(response.text) |
|
return None |
|
except Exception as e: |
|
print(f"Exception when accessing debug endpoint: {str(e)}") |
|
return None |
|
|
|
|
|
API_URL = "https://angusfung-kickstarter-success-prediction.hf.space/predict" |
|
|
|
|
|
campaign_data = { |
|
"raw_description": "Introducing the AquaGo: The Smart, Eco-Friendly Portable Water Purifier! Clean water is a basic human right β yet for millions around the world, it's a daily struggle. Whether you're an outdoor adventurer, traveling to remote areas, or preparing for emergencies, access to safe drinking water should never be a compromise. That's why we created **AquaGo**, a revolutionary portable water purifier that combines cutting-edge filtration technology, smart sensors, and sustainable materials β all packed into a sleek, lightweight design you can take anywhere.", |
|
"raw_blurb": "AquaGo is a smart, eco-friendly portable water purifier that delivers clean, safe drinking water anywhere.", |
|
"raw_risks": "Bringing a product to market involves complex engineering, regulatory approvals, and safety testing. Delays may occur due to certification or supply chain issues.", |
|
"raw_subcategory": "Gadgets", |
|
"raw_category": "Technology", |
|
"raw_country": "Canada", |
|
"funding_goal": 2000, |
|
"image_count": 8, |
|
"video_count": 3, |
|
"campaign_duration": 90, |
|
"previous_projects_count": 5, |
|
"previous_success_rate": 0.4, |
|
"previous_pledged": 18745.33, |
|
"previous_funding_goal": 23564.99 |
|
} |
|
|
|
def predict_success(data, max_retries=3, retry_delay=10): |
|
"""Send data to the API and get prediction results with retries""" |
|
for attempt in range(max_retries): |
|
try: |
|
|
|
print(f"Sending request to: {API_URL} (Attempt {attempt + 1}/{max_retries})") |
|
response = requests.post(API_URL, json=data, timeout=60) |
|
|
|
|
|
if response.status_code == 200: |
|
return response.json() |
|
else: |
|
print(f"Error: {response.status_code}") |
|
print(response.text) |
|
|
|
if response.status_code == 500 and "Can't load tokenizer" in response.text: |
|
print(f"The model might be downloading. Waiting {retry_delay} seconds before retry...") |
|
time.sleep(retry_delay) |
|
else: |
|
|
|
return None |
|
|
|
except Exception as e: |
|
print(f"Exception occurred: {str(e)}") |
|
print(f"Waiting {retry_delay} seconds before retry...") |
|
time.sleep(retry_delay) |
|
|
|
return None |
|
|
|
def display_results(results): |
|
"""Display the prediction results in a user-friendly way""" |
|
if not results: |
|
print("No results to display.") |
|
return |
|
|
|
print("\n===== KICKSTARTER SUCCESS PREDICTION =====\n") |
|
print(f"Success Probability: {results['success_probability']:.2%}") |
|
print(f"Predicted Outcome: {results['predicted_outcome']}") |
|
|
|
print("\n----- TOP INFLUENCING FACTORS -----") |
|
|
|
top_factors = sorted( |
|
results['shap_values'].items(), |
|
key=lambda x: abs(float(x[1])), |
|
reverse=True |
|
)[:5] |
|
|
|
for factor, value in top_factors: |
|
impact = "POSITIVE" if float(value) > 0 else "NEGATIVE" |
|
print(f"{factor}: {value:.4f} ({impact})") |
|
|
|
print("\n----- ALL SHAP VALUES -----") |
|
pp = pprint.PrettyPrinter(indent=2) |
|
pp.pprint(results['shap_values']) |
|
|
|
|
|
if 'longformer_embedding' in results: |
|
embedding = np.array(results['longformer_embedding']) |
|
print("\n----- LONGFORMER EMBEDDING -----") |
|
print(f"Embedding Shape: {embedding.shape if hasattr(embedding, 'shape') else len(embedding)}") |
|
print(f"First 10 values: {embedding[:10]}") |
|
|
|
|
|
try: |
|
embedding_np = np.array(embedding) |
|
print(f"Mean: {np.mean(embedding_np):.4f}") |
|
print(f"Std: {np.std(embedding_np):.4f}") |
|
print(f"Min: {np.min(embedding_np):.4f}") |
|
print(f"Max: {np.max(embedding_np):.4f}") |
|
except Exception as e: |
|
print(f"Error calculating embedding statistics: {str(e)}") |
|
|
|
|
|
if __name__ == "__main__": |
|
print("==== DIAGNOSTICS ====") |
|
print("Testing connectivity from client machine...") |
|
internet_ok = check_internet_connectivity() |
|
repo_ok = check_model_repository() |
|
|
|
debug_info = check_debug_endpoint(API_URL) |
|
|
|
print("\n==== PREDICTION TEST ====") |
|
if not internet_ok: |
|
print("WARNING: Internet connectivity issues detected on client machine.") |
|
|
|
if not repo_ok: |
|
print("WARNING: Cannot access model repository from client machine.") |
|
|
|
print("Sending prediction request...") |
|
results = predict_success(campaign_data, max_retries=2, retry_delay=10) |
|
display_results(results) |