Spaces:

spartan8806
/

atles-echo

Sleeping

App Files Files Community

atles-echo / app.py

spartan8806

Upload 3 files

73baae2 verified 9 days ago

raw

history blame contribute delete

6.82 kB

	"""
	ATLES-ECHO - Semantic Embedding Service
	A Hugging Face Space for generating embeddings using the ATLES Champion model.
	"""

	import gradio as gr
	from sentence_transformers import SentenceTransformer
	import numpy as np

	# Load the ATLES Champion embedding model
	print("Loading ATLES Champion Embedding model...")
	model = SentenceTransformer("spartan8806/atles-champion-embedding")
	print(f"Model loaded! Dimension: {model.get_sentence_embedding_dimension()}")

	def generate_embedding(text: str) -> dict:
	"""Generate embedding for input text"""
	if not text or not text.strip():
	return {"error": "Please enter some text", "embedding": None, "dimension": None}

	# Generate embedding
	embedding = model.encode(text, normalize_embeddings=True)

	return {
	"text_preview": text[:100] + "..." if len(text) > 100 else text,
	"dimension": len(embedding),
	"embedding_preview": embedding[:10].tolist(), # First 10 values
	"embedding_full": embedding.tolist()
	}

	def compare_texts(text1: str, text2: str) -> dict:
	"""Compare similarity between two texts"""
	if not text1.strip() or not text2.strip():
	return {"error": "Please enter both texts", "similarity": None}

	# Generate embeddings
	embeddings = model.encode([text1, text2], normalize_embeddings=True)

	# Calculate cosine similarity
	similarity = float(np.dot(embeddings[0], embeddings[1]))

	return {
	"text1_preview": text1[:50] + "..." if len(text1) > 50 else text1,
	"text2_preview": text2[:50] + "..." if len(text2) > 50 else text2,
	"similarity": round(similarity, 4),
	"similarity_percent": f"{similarity * 100:.1f}%",
	"interpretation": get_similarity_interpretation(similarity)
	}

	def get_similarity_interpretation(score: float) -> str:
	"""Interpret similarity score"""
	if score >= 0.9:
	return "🟢 Nearly identical meaning"
	elif score >= 0.7:
	return "🟡 Very similar"
	elif score >= 0.5:
	return "🟠 Somewhat related"
	elif score >= 0.3:
	return "🔴 Loosely related"
	else:
	return "⚫ Different topics"

	def batch_embed(texts: str) -> dict:
	"""Generate embeddings for multiple texts (one per line)"""
	lines = [l.strip() for l in texts.split('\n') if l.strip()]

	if not lines:
	return {"error": "Please enter at least one text (one per line)", "embeddings": None}

	if len(lines) > 10:
	return {"error": "Maximum 10 texts at a time", "embeddings": None}

	# Generate embeddings
	embeddings = model.encode(lines, normalize_embeddings=True)

	results = []
	for i, (text, emb) in enumerate(zip(lines, embeddings)):
	results.append({
	"index": i + 1,
	"text": text[:50] + "..." if len(text) > 50 else text,
	"embedding_preview": emb[:5].tolist()
	})

	return {
	"count": len(lines),
	"dimension": len(embeddings[0]),
	"results": results
	}

	# Create Gradio interface
	with gr.Blocks(
	title="ATLES-ECHO Embedding Service",
	theme=gr.themes.Soft(primary_hue="blue", secondary_hue="cyan")
	) as demo:

	gr.Markdown("""
	# 🧠 ATLES-ECHO Embedding Service

	Generate high-quality semantic embeddings using the ATLES Champion model.

	- Model: [spartan8806/atles-champion-embedding](https://huggingface.co/spartan8806/atles-champion-embedding)
	- Dimension: 768
	- Top-10 MTEB Performance: Pearson 0.8445, Spearman 0.8374
	""")

	with gr.Tabs():
	# Tab 1: Single Embedding
	with gr.TabItem("🔤 Single Embedding"):
	gr.Markdown("Generate an embedding for a single piece of text.")

	with gr.Row():
	with gr.Column():
	single_input = gr.Textbox(
	label="Input Text",
	placeholder="Enter text to embed...",
	lines=3
	)
	single_btn = gr.Button("Generate Embedding", variant="primary")

	with gr.Column():
	single_output = gr.JSON(label="Embedding Result")

	single_btn.click(
	fn=generate_embedding,
	inputs=single_input,
	outputs=single_output
	)

	# Tab 2: Compare Texts
	with gr.TabItem("⚖️ Compare Similarity"):
	gr.Markdown("Compare the semantic similarity between two texts.")

	with gr.Row():
	text1_input = gr.Textbox(label="Text 1", placeholder="First text...", lines=2)
	text2_input = gr.Textbox(label="Text 2", placeholder="Second text...", lines=2)

	compare_btn = gr.Button("Compare Similarity", variant="primary")
	compare_output = gr.JSON(label="Similarity Result")

	compare_btn.click(
	fn=compare_texts,
	inputs=[text1_input, text2_input],
	outputs=compare_output
	)

	# Tab 3: Batch Embedding
	with gr.TabItem("📦 Batch Embed"):
	gr.Markdown("Generate embeddings for multiple texts (one per line, max 10).")

	with gr.Row():
	with gr.Column():
	batch_input = gr.Textbox(
	label="Texts (one per line)",
	placeholder="Text 1\nText 2\nText 3...",
	lines=6
	)
	batch_btn = gr.Button("Generate Batch Embeddings", variant="primary")

	with gr.Column():
	batch_output = gr.JSON(label="Batch Results")

	batch_btn.click(
	fn=batch_embed,
	inputs=batch_input,
	outputs=batch_output
	)

	gr.Markdown("""
	---
	### About ATLES-ECHO

	ATLES-ECHO is the semantic memory core of the ATLES ecosystem - your AI digital twin that learns from your digital life.

	Features:
	- 🧠 High-quality semantic embeddings (768 dimensions)
	- ⚡ Fast inference with normalized vectors
	- 🎯 Top-10 MTEB benchmark performance
	- 🔒 Built for the ATLES privacy-first ecosystem

	[View Model Card](https://huggingface.co/spartan8806/atles-champion-embedding) \| [ATLES GitHub](https://github.com/spartan8806)
	""")

	# Launch the app
	if __name__ == "__main__":
	demo.launch()