Spaces:

FlameF0X
/

SnowflakeCore-G1

Sleeping

App Files Files Community

SnowflakeCore-G1 / app.py

FlameF0X

Update app.py

1747725 verified 19 days ago

raw

history blame contribute delete

11.8 kB

	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import torch
	import spaces # Required for Hugging Face Spaces GPU
	import random
	import numpy as np

	# Set random seeds for reproducibility
	def set_random_seed(seed=42):
	random.seed(seed)
	np.random.seed(seed)
	torch.manual_seed(seed)
	if torch.cuda.is_available():
	torch.cuda.manual_seed(seed)
	torch.cuda.manual_seed_all(seed)
	torch.backends.cudnn.deterministic = True
	torch.backends.cudnn.benchmark = False

	# Set initial seed
	set_random_seed(42)

	# Determine the device to use (GPU if available, otherwise CPU)
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	print(f"Using device: {device}")

	# Load model and tokenizer
	model = AutoModelForCausalLM.from_pretrained(
	"FlameF0X/SnowflakeCore-G1-Tiny2",
	trust_remote_code=True,
	force_download=True,
	use_safetensors=True,
	).to(device)

	tokenizer = AutoTokenizer.from_pretrained(
	"FlameF0X/SnowflakeCore-G1-Tiny2",
	trust_remote_code=True,
	force_download=True,
	use_safetensors=True,
	)

	@spaces.GPU # Required decorator for GPU usage in Hugging Face Spaces
	def advanced_generate(prompt, max_length=50, temperature=1.0, top_k=50, top_p=0.9,
	repetition_penalty=1.1, do_sample=True, seed=None):
	"""
	Generates text with advanced sampling parameters.
	The model and input tensors are moved to the appropriate device (GPU/CPU).
	"""
	# Set seed if provided
	if seed is not None:
	set_random_seed(seed)

	model.eval()
	# Move input_ids to the same device as the model
	input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
	generated = input_ids

	with torch.no_grad():
	for i in range(max_length):
	# Get model outputs
	outputs = model(input_ids=generated)
	next_token_logits = outputs["logits"][:, -1, :]

	# Apply repetition penalty
	if repetition_penalty != 1.0:
	for token_id in set(generated[0].tolist()):
	next_token_logits[0, token_id] /= repetition_penalty

	# Apply temperature
	if temperature != 1.0:
	next_token_logits = next_token_logits / temperature

	# Convert logits to probabilities
	probs = torch.softmax(next_token_logits, dim=-1)

	if do_sample and temperature > 0:
	# Apply top-k filtering
	if top_k > 0:
	top_k_probs, top_k_indices = torch.topk(probs, min(top_k, probs.size(-1)))
	probs_filtered = torch.zeros_like(probs)
	probs_filtered.scatter_(1, top_k_indices, top_k_probs)
	probs = probs_filtered

	# Apply top-p (nucleus) filtering
	if top_p < 1.0:
	sorted_probs, sorted_indices = torch.sort(probs, descending=True)
	cumulative_probs = torch.cumsum(sorted_probs, dim=-1)

	# Remove tokens with cumulative probability above the threshold
	sorted_indices_to_remove = cumulative_probs > top_p
	# Keep at least one token
	sorted_indices_to_remove[0, 0] = False

	# Create mask for tokens to remove
	indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
	probs[indices_to_remove] = 0

	# Renormalize
	probs = probs / probs.sum(dim=-1, keepdim=True)

	# Sample from the filtered distribution
	next_token_id = torch.multinomial(probs, num_samples=1)
	else:
	# Greedy decoding
	next_token_id = torch.argmax(probs, dim=-1).unsqueeze(-1)

	generated = torch.cat((generated, next_token_id), dim=1)

	# Check for end of sequence
	if next_token_id.item() == tokenizer.eos_token_id:
	break

	return tokenizer.decode(generated[0], skip_special_tokens=True)

	def gradio_generate(prompt, max_length, temperature, top_k, top_p, repetition_penalty, do_sample, seed):
	"""
	Wrapper function for Gradio interface.
	"""
	# Convert seed to int if provided, otherwise use random
	if seed == "" or seed is None:
	seed = random.randint(0, 2**32 - 1)
	else:
	try:
	seed = int(seed)
	except ValueError:
	seed = random.randint(0, 2**32 - 1)

	# Ensure parameters are in valid ranges
	max_length = max(1, min(200, int(max_length)))
	temperature = max(0.1, min(2.0, float(temperature)))
	top_k = max(1, min(100, int(top_k)))
	top_p = max(0.1, min(1.0, float(top_p)))
	repetition_penalty = max(1.0, min(2.0, float(repetition_penalty)))

	return advanced_generate(
	prompt=prompt,
	max_length=max_length,
	temperature=temperature,
	top_k=top_k,
	top_p=top_p,
	repetition_penalty=repetition_penalty,
	do_sample=do_sample,
	seed=seed
	)

	# Custom CSS for ultra-compact UI and full width
	custom_css = """
	.gradio-container {
	max-width: 100% !important; /* Changed from 1200px to 100% for full width */
	padding: 10px !important;
	}
	.compact-slider {
	margin: 2px 0 !important;
	}
	.parameter-info {
	font-size: 10px;
	color: #888;
	margin: -8px 0 8px 0 !important;
	line-height: 1.1;
	padding: 2px 4px;
	background: rgba(0,0,0,0.05);
	border-radius: 3px;
	}
	.gradio-group {
	padding: 8px !important;
	margin: 4px 0 !important;
	}
	.gradio-textbox {
	margin-bottom: 8px !important;
	}
	.gradio-slider {
	margin: 4px 0 !important;
	}
	.gradio-checkbox {
	margin: 4px 0 !important;
	}
	.gradio-number {
	margin: 4px 0 !important;
	}
	.compact-header {
	margin: 0 0 10px 0 !important;
	padding: 8px !important;
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	border-radius: 8px;
	color: white;
	}
	.control-section {
	background: rgba(255,255,255,0.02);
	border-radius: 6px;
	padding: 8px !important;
	margin: 4px 0 !important;
	}
	"""

	# Create the Gradio interface with ultra-compact layout
	with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="SnowflakeCore Text Generator") as iface:

	# Compact header
	gr.HTML(f"""
	<div class="compact-header">
	<h2 style="margin: 0; font-size: 18px;">🔥 SnowflakeCore-G1-Tiny2 \| Running on: {device}</h2>
	</div>
	""")

	with gr.Row():
	# Left column - Input/Output (60% width)
	with gr.Column(scale=6, min_width=400):
	prompt_input = gr.Textbox(
	lines=4,
	placeholder="Enter your prompt here...",
	label="📝 Input Prompt",
	show_label=True
	)

	generate_btn = gr.Button("🚀 Generate", variant="primary", size="sm", scale=1)

	output_text = gr.Textbox(
	lines=8,
	label="✨ Generated Text",
	show_label=True,
	interactive=False
	)

	# Right column - Parameters (40% width)
	with gr.Column(scale=4, min_width=300):
	gr.HTML("<div style='font-weight: bold; font-size: 14px; margin-bottom: 8px; color: #333;'>⚙️ Parameters</div>")

	# Core parameters in compact group
	with gr.Group(elem_classes=["control-section"]):
	max_length = gr.Slider(10, 2048, value=100, step=5, label="Max Length", elem_classes=["compact-slider"])
	gr.HTML("<div class='parameter-info'>📏 Tokens to generate (10-2048)</div>")

	temperature = gr.Slider(0.1, 2.0, value=0.8, step=0.05, label="Temperature", elem_classes=["compact-slider"])
	gr.HTML("<div class='parameter-info'>🌡️ Creativity: 0.1=focused, 2.0=creative</div>")

	# Advanced parameters
	with gr.Group(elem_classes=["control-section"]):
	top_k = gr.Slider(1, 150, value=50, step=1, label="Top-K", elem_classes=["compact-slider"])
	gr.HTML("<div class='parameter-info'>🎯 Word choice diversity</div>")

	top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-P", elem_classes=["compact-slider"])
	gr.HTML("<div class='parameter-info'>🎪 Probability cutoff</div>")

	repetition_penalty = gr.Slider(1.0, 3.0, value=1.15, step=0.05, label="Rep. Penalty", elem_classes=["compact-slider"])
	gr.HTML("<div class='parameter-info'>🔄 Anti-repetition strength</div>")

	# Controls row
	with gr.Row():
	do_sample = gr.Checkbox(value=True, label="Sampling", scale=1)
	seed_input = gr.Number(label="Seed", value=None, precision=0, scale=1, minimum=0)
	gr.HTML("<div class='parameter-info'>🎲 Sampling=creative \| Seed=reproducible</div>")

	# Examples section
	gr.HTML("<h3 style='margin-top: 25px; margin-bottom: 10px;'>💡 Quick Examples</h3>")

	examples = gr.Examples(
	examples=[
	["Once upon a time in a magical forest,", 120, 0.8, 40, 0.9, 1.2, True, 42],
	["The future of artificial intelligence is", 80, 1.0, 50, 0.95, 1.1, True, None],
	["In a world where technology and nature coexist,", 150, 1.2, 60, 0.85, 1.3, True, 123],
	["Write a haiku about winter:", 50, 0.7, 30, 0.8, 1.0, True, None],
	["Explain quantum computing in simple terms:", 200, 0.6, 40, 0.9, 1.1, True, None]
	],
	inputs=[prompt_input, max_length, temperature, top_k, top_p, repetition_penalty, do_sample, seed_input],
	outputs=[output_text],
	fn=gradio_generate,
	cache_examples=False,
	label=None
	)

	# Event handlers
	generate_btn.click(
	fn=gradio_generate,
	inputs=[prompt_input, max_length, temperature, top_k, top_p, repetition_penalty, do_sample, seed_input],
	outputs=[output_text]
	)

	# Tips section
	with gr.Accordion("📚 Parameter Guide & Tips", open=False):
	gr.HTML("""
	<div style="font-size: 12px; line-height: 1.4;">
	<h4>🎛️ Parameter Combinations for Different Use Cases:</h4>

	<strong>📝 Creative Writing:</strong> Temperature: 0.8-1.2, Top-K: 40-60, Top-P: 0.85-0.95, Rep. Penalty: 1.1-1.3<br>
	<strong>📋 Factual/Technical:</strong> Temperature: 0.3-0.7, Top-K: 20-40, Top-P: 0.9-1.0, Rep. Penalty: 1.0-1.1<br>
	<strong>🎭 Experimental/Artistic:</strong> Temperature: 1.2-2.0, Top-K: 60-100, Top-P: 0.7-0.9, Rep. Penalty: 1.2-1.5<br>
	<strong>🎯 Focused/Consistent:</strong> Temperature: 0.1-0.5, Top-K: 10-30, Top-P: 0.8-0.95, Rep. Penalty: 1.0-1.2<br><br>

	<strong>💡 Pro Tips:</strong><br>
	• Use same seed for reproducible results across generations<br>
	• Higher repetition penalty helps with stuck loops or repeated phrases<br>
	• Lower temperature + higher top-p = focused but varied vocabulary<br>
	• Disable sampling for completely deterministic output (useful for testing)<br>
	• Start with defaults and adjust one parameter at a time to understand effects
	</div>
	""")

	# Launch the Gradio application
	if __name__ == "__main__":
	iface.launch()