bert-tiny-amd / push_to_huggingface.py

Upload trained BERT-Tiny AMD model

4523f56 verified 5 months ago

8.76 kB

	#!/usr/bin/env python3
	"""
	Script to push the trained BERT-Tiny AMD model to Hugging Face Hub
	"""

	import os
	import torch
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	from huggingface_hub import HfApi, Repository
	import json
	from pathlib import Path

	# Configuration
	REPO_ID = "Adya662/bert-tiny-amd"
	MODEL_PATH = "best_enhanced_progressive_amd.pth"
	BASE_MODEL = "prajjwal1/bert-tiny"

	def create_model_config():
	"""Create model configuration"""
	config = {
	"model_type": "bert",
	"architectures": ["BertForSequenceClassification"],
	"attention_proxy_dtype": "float32",
	"attention_dropout": 0.1,
	"classifier_dropout": None,
	"hidden_act": "gelu",
	"hidden_dropout_prob": 0.1,
	"hidden_size": 128,
	"initializer_range": 0.02,
	"intermediate_size": 512,
	"layer_norm_eps": 1e-12,
	"max_position_embeddings": 512,
	"model_type": "bert",
	"num_attention_heads": 2,
	"num_hidden_layers": 2,
	"num_labels": 1,
	"pad_token_id": 0,
	"position_embedding_type": "absolute",
	"problem_type": "single_label_classification",
	"torch_dtype": "float32",
	"transformers_version": "4.21.0",
	"type_vocab_size": 2,
	"use_cache": True,
	"vocab_size": 30522
	}
	return config

	def create_training_metadata():
	"""Create training metadata"""
	metadata = {
	"model_name": "bert-tiny-amd",
	"base_model": "prajjwal1/bert-tiny",
	"task": "text-classification",
	"dataset": "ElevateNow call center transcripts",
	"language": "en",
	"license": "mit",
	"pipeline_tag": "text-classification",
	"tags": [
	"text-classification",
	"answering-machine-detection",
	"bert-tiny",
	"binary-classification",
	"call-center",
	"voice-processing"
	],
	"performance": {
	"validation_accuracy": 0.9394,
	"precision": 0.9275,
	"recall": 0.8727,
	"f1_score": 0.8993
	},
	"training_details": {
	"total_samples": 3548,
	"training_samples": 2838,
	"validation_samples": 710,
	"epochs": 15,
	"batch_size": 32,
	"learning_rate": 3e-5,
	"device": "mps"
	}
	}
	return metadata

	def push_model_to_hub():
	"""Push the trained model to Hugging Face Hub"""

	print("🚀 Starting model upload to Hugging Face Hub...")

	# Initialize HF API
	api = HfApi()

	# Create model configuration
	config = create_model_config()

	# Save config
	with open("config.json", "w") as f:
	json.dump(config, f, indent=2)

	# Create training metadata
	metadata = create_training_metadata()

	# Save training metadata
	with open("training_metadata.json", "w") as f:
	json.dump(metadata, f, indent=2)

	# Load tokenizer from base model
	print("📥 Loading tokenizer...")
	tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
	tokenizer.save_pretrained(".")

	# Load base model and update with trained weights
	print("📥 Loading base model...")
	model = AutoModelForSequenceClassification.from_pretrained(
	BASE_MODEL,
	num_labels=1,
	config=config
	)

	# Load trained weights
	print("📥 Loading trained weights...")
	if os.path.exists(MODEL_PATH):
	state_dict = torch.load(MODEL_PATH, map_location='cpu')
	model.load_state_dict(state_dict)
	print("✅ Trained weights loaded successfully")
	else:
	print(f"❌ Model file {MODEL_PATH} not found!")
	return False

	# Save model
	print("💾 Saving model...")
	model.save_pretrained(".", safe_serialization=True)

	# Create README.md
	readme_content = """---
	license: mit
	tags:
	- text-classification
	- answering-machine-detection
	- bert-tiny
	- binary-classification
	- call-center
	- voice-processing
	pipeline_tag: text-classification
	---

	# BERT-Tiny AMD Classifier

	A lightweight BERT-Tiny model fine-tuned for Answering Machine Detection (AMD) in call center environments.

	## Model Description

	This model is based on `prajjwal1/bert-tiny` and fine-tuned to classify phone call transcripts as either human or machine (answering machine/voicemail) responses. It's designed for real-time call center applications where quick and accurate detection of answering machines is crucial.

	## Model Architecture

	- Base Model: `prajjwal1/bert-tiny` (2 layers, 128 hidden size, 2 attention heads)
	- Total Parameters: ~4.4M (lightweight and efficient)
	- Input: User transcript text (max 128 tokens)
	- Output: Single logit with sigmoid activation for binary classification
	- Loss Function: BCEWithLogitsLoss with positive weight for class imbalance

	## Performance

	- Validation Accuracy: 93.94%
	- Precision: 92.75%
	- Recall: 87.27%
	- F1-Score: 89.93%
	- Training Device: MPS (Apple Silicon GPU)
	- Best Epoch: 15 (with early stopping)

	## Training Data

	- Total Samples: 3,548 phone call transcripts
	- Training Set: 2,838 samples
	- Validation Set: 710 samples
	- Class Distribution: 30.8% machine calls, 69.2% human calls
	- Source: ElevateNow call center data

	## Usage

	### Basic Inference

	```python
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import torch

	# Load model and tokenizer
	model = AutoModelForSequenceClassification.from_pretrained("Adya662/bert-tiny-amd")
	tokenizer = AutoTokenizer.from_pretrained("Adya662/bert-tiny-amd")

	# Prepare input
	text = "Hello, this is John speaking"
	inputs = tokenizer(text, return_tensors="pt", max_length=128, truncation=True, padding=True)

	# Make prediction
	with torch.no_grad():
	outputs = model(**inputs)
	logits = outputs.logits.squeeze(-1)
	probability = torch.sigmoid(logits).item()
	is_machine = probability >= 0.5

	print(f"Prediction: {'Machine' if is_machine else 'Human'}")
	print(f"Confidence: {probability:.4f}")
	```

	### Production Usage

	```python
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import torch

	class AMDClassifier:
	def __init__(self, model_name="Adya662/bert-tiny-amd"):
	self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
	self.tokenizer = AutoTokenizer.from_pretrained(model_name)
	self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	self.model.to(self.device)
	self.model.eval()

	def predict(self, transcript_text, threshold=0.5):
	# Predict if transcript is from answering machine
	inputs = self.tokenizer(
	transcript_text,
	return_tensors="pt",
	max_length=128,
	truncation=True,
	padding=True
	).to(self.device)

	with torch.no_grad():
	outputs = self.model(**inputs)
	logits = outputs.logits.squeeze(-1)
	probability = torch.sigmoid(logits).item()
	is_machine = probability >= threshold

	return is_machine, probability

	# Usage
	classifier = AMDClassifier()
	is_machine, confidence = classifier.predict("Hello, this is John speaking")
	```

	## Training Details

	- Optimizer: AdamW with weight decay (0.01)
	- Learning Rate: 3e-5 with linear scheduling
	- Batch Size: 32
	- Epochs: 15 (with early stopping)
	- Early Stopping: Patience of 3 epochs
	- Class Imbalance: Handled with positive weight

	## Limitations

	- Trained on English phone call transcripts
	- May not generalize well to other languages or domains
	- Performance may vary with different transcription quality
	- Designed for short utterances (max 128 tokens)

	## License

	MIT License - see LICENSE file for details.
	"""

	with open("README.md", "w") as f:
	f.write(readme_content)

	# Upload to Hub
	print("⬆️ Uploading to Hugging Face Hub...")
	try:
	api.upload_folder(
	folder_path=".",
	repo_id=REPO_ID,
	repo_type="model",
	commit_message="Upload trained BERT-Tiny AMD model with enhanced progressive features"
	)
	print("✅ Model uploaded successfully!")
	print(f"🔗 Model available at: https://huggingface.co/{REPO_ID}")
	return True
	except Exception as e:
	print(f"❌ Upload failed: {e}")
	return False

	if __name__ == "__main__":
	success = push_model_to_hub()
	if success:
	print("\n🎉 Model deployment completed successfully!")
	else:
	print("\n💥 Model deployment failed!")