deberta-v3-s-plot-arc-classifier / generate_readme_examples.py

Upload folder using huggingface_hub

2ee5f3c verified 6 months ago

5.9 kB

	#!/usr/bin/env python3
	"""
	Generate 8 synthetic examples for README with model predictions
	2 examples per class: simple + nuanced
	"""

	import torch
	from transformers import DebertaV2Tokenizer, DebertaV2ForSequenceClassification

	def get_examples():
	"""Define 8 synthetic examples: 2 per class (simple + nuanced)"""
	return [
	# NONE - Simple
	{
	"text": "Margaret runs the village bakery, making fresh bread every morning at 5 AM for the past thirty years.",
	"expected": "NONE",
	"type": "simple"
	},
	# NONE - Nuanced
	{
	"text": "Dr. Harrison performs routine medical check-ups with methodical precision, maintaining professional distance while patients share their deepest fears about mortality.",
	"expected": "NONE",
	"type": "nuanced"
	},
	# INTERNAL - Simple
	{
	"text": "Emma struggles with overwhelming anxiety after her father's harsh criticism, questioning her self-worth and abilities.",
	"expected": "INTERNAL",
	"type": "simple"
	},
	# INTERNAL - Nuanced
	{
	"text": "The renowned pianist Clara finds herself paralyzed by perfectionism, her childhood trauma surfacing as she prepares for the performance that could define her legacy.",
	"expected": "INTERNAL",
	"type": "nuanced"
	},
	# EXTERNAL - Simple
	{
	"text": "Knight Roderick embarks on a dangerous quest to retrieve the stolen crown from the dragon's lair.",
	"expected": "EXTERNAL",
	"type": "simple"
	},
	# EXTERNAL - Nuanced
	{
	"text": "Master thief Elias infiltrates the heavily guarded fortress, disabling security systems and evading patrol routes, each obstacle requiring new techniques and tools to reach the vault.",
	"expected": "EXTERNAL",
	"type": "nuanced"
	},
	# BOTH - Simple
	{
	"text": "Sarah must rescue her kidnapped daughter from the terrorist compound while confronting her own paralyzing guilt about being an absent mother.",
	"expected": "BOTH",
	"type": "simple"
	},
	# BOTH - Nuanced
	{
	"text": "Archaeologist Sophia discovers an ancient artifact that could rewrite history, but must confront her own ethical boundaries and childhood abandonment issues as powerful forces try to silence her.",
	"expected": "BOTH",
	"type": "nuanced"
	}
	]

	def predict_examples():
	"""Run predictions on all examples"""
	print("Loading model...")
	tokenizer = DebertaV2Tokenizer.from_pretrained('.')
	model = DebertaV2ForSequenceClassification.from_pretrained('.')

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model.to(device)
	model.eval()

	class_names = ['NONE', 'INTERNAL', 'EXTERNAL', 'BOTH']
	examples = get_examples()

	results = []

	print(f"Running predictions on {len(examples)} examples...\n")

	for i, example in enumerate(examples, 1):
	text = example['text']
	expected = example['expected']
	example_type = example['type']

	# Predict
	inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
	inputs = {k: v.to(device) for k, v in inputs.items()}

	with torch.no_grad():
	outputs = model(**inputs)
	probabilities = torch.softmax(outputs.logits, dim=-1)
	predicted_idx = torch.argmax(probabilities, dim=-1).item()
	confidence = probabilities[0][predicted_idx].item()

	predicted = class_names[predicted_idx]
	is_correct = predicted == expected

	result = {
	'text': text,
	'expected': expected,
	'predicted': predicted,
	'confidence': confidence,
	'correct': is_correct,
	'type': example_type
	}

	results.append(result)

	status = "✅" if is_correct else "❌"
	print(f"{status} Example {i} ({expected} - {example_type})")
	print(f" Predicted: {predicted} (confidence: {confidence:.3f})")
	print(f" Text: {text[:80]}...")
	print()

	return results

	def format_for_readme(results):
	"""Format results for README inclusion"""

	# Group by class
	by_class = {}
	for result in results:
	expected = result['expected']
	if expected not in by_class:
	by_class[expected] = []
	by_class[expected].append(result)

	readme_content = """
	## Example Classifications

	Here are sample classifications showing the model's predictions with confidence scores:

	"""

	for class_name in ['NONE', 'INTERNAL', 'EXTERNAL', 'BOTH']:
	if class_name in by_class:
	readme_content += f"### {class_name}\n\n"

	for result in by_class[class_name]:
	confidence_icon = "✅" if result['confidence'] > 0.7 else "⚠️" if result['confidence'] > 0.5 else "❌"

	readme_content += f"{result['type'].title()} Example:\n"
	readme_content += f"> \"{result['text']}\"\n\n"
	readme_content += f"Prediction: {result['predicted']} {confidence_icon} (confidence: {result['confidence']:.3f})\n\n"

	return readme_content

	if __name__ == "__main__":
	results = predict_examples()
	readme_section = format_for_readme(results)

	print("README Section:")
	print("=" * 50)
	print(readme_section)

	# Save to file
	with open('readme_examples_section.txt', 'w') as f:
	f.write(readme_section)

	print("Saved README section to 'readme_examples_section.txt'")