Spaces:
Sleeping
Sleeping
File size: 23,538 Bytes
d8d14f1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 |
import json
from typing import List, Optional, Tuple
import numpy as np
from pydantic import BaseModel, Field
from tenacity import retry, stop_after_attempt, wait_exponential
from swarms.utils.auto_download_check_packages import (
auto_check_and_download_package,
)
from swarms.utils.lazy_loader import lazy_import_decorator
from swarms.utils.loguru_logger import initialize_logger
logger = initialize_logger(log_folder="swarm_matcher")
class SwarmType(BaseModel):
name: str
description: str
embedding: Optional[List[float]] = Field(
default=None, exclude=True
)
class SwarmMatcherConfig(BaseModel):
model_name: str = "sentence-transformers/all-MiniLM-L6-v2"
embedding_dim: int = (
512 # Dimension of the sentence-transformers model
)
@lazy_import_decorator
class SwarmMatcher:
"""
A class for matching tasks to swarm types based on their descriptions.
It utilizes a transformer model to generate embeddings for task and swarm type descriptions,
and then calculates the dot product to find the best match.
"""
def __init__(self, config: SwarmMatcherConfig):
"""
Initializes the SwarmMatcher with a configuration.
Args:
config (SwarmMatcherConfig): The configuration for the SwarmMatcher.
"""
logger.add("swarm_matcher_debug.log", level="DEBUG")
logger.debug("Initializing SwarmMatcher")
try:
import torch
except ImportError:
auto_check_and_download_package(
"torch", package_manager="pip", upgrade=True
)
import torch
try:
import transformers
except ImportError:
auto_check_and_download_package(
"transformers", package_manager="pip", upgrade=True
)
import transformers
self.torch = torch
try:
self.config = config
self.tokenizer = (
transformers.AutoTokenizer.from_pretrained(
config.model_name
)
)
self.model = transformers.AutoModel.from_pretrained(
config.model_name
)
self.swarm_types: List[SwarmType] = []
logger.debug("SwarmMatcher initialized successfully")
except Exception as e:
logger.error(f"Error initializing SwarmMatcher: {str(e)}")
raise
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=10),
)
def get_embedding(self, text: str) -> np.ndarray:
"""
Generates an embedding for a given text using the configured model.
Args:
text (str): The text for which to generate an embedding.
Returns:
np.ndarray: The embedding vector for the text.
"""
logger.debug(f"Getting embedding for text: {text[:50]}...")
try:
inputs = self.tokenizer(
text,
return_tensors="pt",
padding=True,
truncation=True,
max_length=512,
)
with self.torch.no_grad():
outputs = self.model(**inputs)
embedding = (
outputs.last_hidden_state.mean(dim=1)
.squeeze()
.numpy()
)
logger.debug("Embedding generated successfully")
return embedding
except Exception as e:
logger.error(f"Error generating embedding: {str(e)}")
raise
def add_swarm_type(self, swarm_type: SwarmType):
"""
Adds a swarm type to the list of swarm types, generating an embedding for its description.
Args:
swarm_type (SwarmType): The swarm type to add.
"""
logger.debug(f"Adding swarm type: {swarm_type.name}")
try:
embedding = self.get_embedding(swarm_type.description)
swarm_type.embedding = embedding.tolist()
self.swarm_types.append(swarm_type)
logger.info(f"Added swarm type: {swarm_type.name}")
except Exception as e:
logger.error(
f"Error adding swarm type {swarm_type.name}: {str(e)}"
)
raise
def find_best_match(self, task: str) -> Tuple[str, float]:
"""
Finds the best match for a given task among the registered swarm types.
Args:
task (str): The task for which to find the best match.
Returns:
Tuple[str, float]: A tuple containing the name of the best matching swarm type and the score.
"""
logger.debug(f"Finding best match for task: {task[:50]}...")
try:
task_embedding = self.get_embedding(task)
best_match = None
best_score = -float("inf")
for swarm_type in self.swarm_types:
score = np.dot(
task_embedding, np.array(swarm_type.embedding)
)
if score > best_score:
best_score = score
best_match = swarm_type
logger.info(
f"Best match for task: {best_match.name} (score: {best_score})"
)
return best_match.name, float(best_score)
except Exception as e:
logger.error(
f"Error finding best match for task: {str(e)}"
)
raise
def auto_select_swarm(self, task: str) -> str:
"""
Automatically selects the best swarm type for a given task based on their descriptions.
Args:
task (str): The task for which to select a swarm type.
Returns:
str: The name of the selected swarm type.
"""
logger.debug(f"Auto-selecting swarm for task: {task[:50]}...")
best_match, score = self.find_best_match(task)
logger.info(f"Task: {task}")
logger.info(f"Selected Swarm Type: {best_match}")
logger.info(f"Confidence Score: {score:.2f}")
return best_match
def run_multiple(self, tasks: List[str], *args, **kwargs) -> str:
swarms = []
for task in tasks:
output = self.auto_select_swarm(task)
# Append
swarms.append(output)
return swarms
def save_swarm_types(self, filename: str):
"""
Saves the registered swarm types to a JSON file.
Args:
filename (str): The name of the file to which to save the swarm types.
"""
try:
with open(filename, "w") as f:
json.dump([st.dict() for st in self.swarm_types], f)
logger.info(f"Saved swarm types to {filename}")
except Exception as e:
logger.error(f"Error saving swarm types: {str(e)}")
raise
def load_swarm_types(self, filename: str):
"""
Loads swarm types from a JSON file.
Args:
filename (str): The name of the file from which to load the swarm types.
"""
try:
with open(filename, "r") as f:
swarm_types_data = json.load(f)
self.swarm_types = [
SwarmType(**st) for st in swarm_types_data
]
logger.info(f"Loaded swarm types from {filename}")
except Exception as e:
logger.error(f"Error loading swarm types: {str(e)}")
raise
def initialize_swarm_types(matcher: SwarmMatcher):
logger.debug("Initializing swarm types")
swarm_types = [
SwarmType(
name="AgentRearrange",
description="Optimize agent order and rearrange flow for multi-step tasks, ensuring efficient task allocation and minimizing bottlenecks. Keywords: orchestration, coordination, pipeline optimization, task scheduling, resource allocation, workflow management, agent organization, process optimization",
),
SwarmType(
name="MixtureOfAgents",
description="Combine diverse expert agents for comprehensive analysis, fostering a collaborative approach to problem-solving and leveraging individual strengths. Keywords: multi-agent system, expert collaboration, distributed intelligence, collective problem solving, agent specialization, team coordination, hybrid approaches, knowledge synthesis",
),
SwarmType(
name="SpreadSheetSwarm",
description="Collaborative data processing and analysis in a spreadsheet-like environment, facilitating real-time data sharing and visualization. Keywords: data analysis, tabular processing, collaborative editing, data transformation, spreadsheet operations, data visualization, real-time collaboration, structured data",
),
SwarmType(
name="SequentialWorkflow",
description="Execute tasks in a step-by-step, sequential process workflow, ensuring a logical and methodical approach to task execution. Keywords: linear processing, waterfall methodology, step-by-step execution, ordered tasks, sequential operations, process flow, systematic approach, staged execution",
),
SwarmType(
name="ConcurrentWorkflow",
description="Process multiple tasks or data sources concurrently in parallel, maximizing productivity and reducing processing time. Keywords: parallel processing, multi-threading, asynchronous execution, distributed computing, concurrent operations, simultaneous tasks, parallel workflows, scalable processing",
),
# SwarmType(
# name="HierarchicalSwarm",
# description="Organize agents in a hierarchical structure with clear reporting lines and delegation of responsibilities. Keywords: management hierarchy, organizational structure, delegation, supervision, chain of command, tiered organization, structured coordination",
# ),
# SwarmType(
# name="AdaptiveSwarm",
# description="Dynamically adjust agent behavior and swarm configuration based on task requirements and performance feedback. Keywords: dynamic adaptation, self-optimization, feedback loops, learning systems, flexible configuration, responsive behavior, adaptive algorithms",
# ),
# SwarmType(
# name="ConsensusSwarm",
# description="Achieve group decisions through consensus mechanisms and voting protocols among multiple agents. Keywords: group decision making, voting systems, collective intelligence, agreement protocols, democratic processes, collaborative decisions",
# ),
]
for swarm_type in swarm_types:
matcher.add_swarm_type(swarm_type)
logger.debug("Swarm types initialized")
@lazy_import_decorator
def swarm_matcher(task: str, *args, **kwargs):
"""
Runs the SwarmMatcher example with predefined tasks and swarm types.
"""
config = SwarmMatcherConfig()
matcher = SwarmMatcher(config)
initialize_swarm_types(matcher)
# matcher.save_swarm_types(f"swarm_logs/{uuid4().hex}.json")
swarm_type = matcher.auto_select_swarm(task)
logger.info(f"{swarm_type}")
return swarm_type
# from typing import List, Tuple, Dict
# from pydantic import BaseModel, Field
# from loguru import logger
# from uuid import uuid4
# import chromadb
# import json
# from tenacity import retry, stop_after_attempt, wait_exponential
# class SwarmType(BaseModel):
# """A swarm type with its name, description and optional metadata"""
# id: str = Field(default_factory=lambda: str(uuid4()))
# name: str
# description: str
# metadata: Dict = Field(default_factory=dict)
# class SwarmMatcherConfig(BaseModel):
# """Configuration for the SwarmMatcher"""
# collection_name: str = "swarm_types"
# distance_metric: str = "cosine" # or "l2" or "ip"
# embedding_function: str = (
# "sentence-transformers/all-mpnet-base-v2" # Better model than MiniLM
# )
# persist_directory: str = "./chroma_db"
# class SwarmMatcher:
# """
# An improved swarm matcher that uses ChromaDB for better vector similarity search.
# Features:
# - Persistent storage of embeddings
# - Better vector similarity search with multiple distance metrics
# - Improved embedding model
# - Metadata filtering capabilities
# - Batch operations support
# """
# def __init__(self, config: SwarmMatcherConfig):
# """Initialize the improved swarm matcher"""
# logger.add("swarm_matcher.log", rotation="100 MB")
# self.config = config
# # Initialize ChromaDB client with persistence
# self.chroma_client = chromadb.Client()
# # Get or create collection
# try:
# self.collection = self.chroma_client.get_collection(
# name=config.collection_name,
# )
# except ValueError:
# self.collection = self.chroma_client.create_collection(
# name=config.collection_name,
# metadata={"hnsw:space": config.distance_metric},
# )
# logger.info(
# f"Initialized SwarmMatcher with collection '{config.collection_name}'"
# )
# def add_swarm_type(self, swarm_type: SwarmType) -> None:
# """Add a single swarm type to the collection"""
# try:
# self.collection.add(
# ids=[swarm_type.id],
# documents=[swarm_type.description],
# metadatas=[
# {"name": swarm_type.name, **swarm_type.metadata}
# ],
# )
# logger.info(f"Added swarm type: {swarm_type.name}")
# except Exception as e:
# logger.error(
# f"Error adding swarm type {swarm_type.name}: {str(e)}"
# )
# raise
# def add_swarm_types(self, swarm_types: List[SwarmType]) -> None:
# """Add multiple swarm types in batch"""
# try:
# self.collection.add(
# ids=[st.id for st in swarm_types],
# documents=[st.description for st in swarm_types],
# metadatas=[
# {"name": st.name, **st.metadata}
# for st in swarm_types
# ],
# )
# logger.info(f"Added {len(swarm_types)} swarm types")
# except Exception as e:
# logger.error(
# f"Error adding swarm types in batch: {str(e)}"
# )
# raise
# @retry(
# stop=stop_after_attempt(3),
# wait=wait_exponential(multiplier=1, min=4, max=10),
# )
# def find_best_matches(
# self,
# task: str,
# n_results: int = 3,
# score_threshold: float = 0.7,
# ) -> List[Tuple[str, float]]:
# """
# Find the best matching swarm types for a given task
# Returns multiple matches with their scores
# """
# try:
# results = self.collection.query(
# query_texts=[task],
# n_results=n_results,
# include=["metadatas", "distances"],
# )
# matches = []
# for metadata, distance in zip(
# results["metadatas"][0], results["distances"][0]
# ):
# # Convert distance to similarity score (1 - normalized_distance)
# score = 1 - (
# distance / 2
# ) # Normalize cosine distance to [0,1]
# if score >= score_threshold:
# matches.append((metadata["name"], score))
# logger.info(f"Found {len(matches)} matches for task")
# return matches
# except Exception as e:
# logger.error(f"Error finding matches for task: {str(e)}")
# raise
# def auto_select_swarm(self, task: str) -> str:
# """
# Automatically select the best swarm type for a task
# Returns only the top match
# """
# matches = self.find_best_matches(task, n_results=1)
# if not matches:
# logger.warning("No suitable matches found for task")
# return "SequentialWorkflow" # Default fallback
# best_match, score = matches[0]
# logger.info(
# f"Selected swarm type '{best_match}' with confidence {score:.3f}"
# )
# return best_match
# def run_multiple(self, tasks: List[str]) -> List[str]:
# """Process multiple tasks in batch"""
# return [self.auto_select_swarm(task) for task in tasks]
# def save_swarm_types(self, filename: str) -> None:
# """Export swarm types to JSON"""
# try:
# all_data = self.collection.get(
# include=["metadatas", "documents"]
# )
# swarm_types = [
# SwarmType(
# id=id_,
# name=metadata["name"],
# description=document,
# metadata={
# k: v
# for k, v in metadata.items()
# if k != "name"
# },
# )
# for id_, metadata, document in zip(
# all_data["ids"],
# all_data["metadatas"],
# all_data["documents"],
# )
# ]
# with open(filename, "w") as f:
# json.dump(
# [st.dict() for st in swarm_types], f, indent=2
# )
# logger.info(f"Saved swarm types to {filename}")
# except Exception as e:
# logger.error(f"Error saving swarm types: {str(e)}")
# raise
# def load_swarm_types(self, filename: str) -> None:
# """Import swarm types from JSON"""
# try:
# with open(filename, "r") as f:
# swarm_types_data = json.load(f)
# swarm_types = [SwarmType(**st) for st in swarm_types_data]
# self.add_swarm_types(swarm_types)
# logger.info(f"Loaded swarm types from {filename}")
# except Exception as e:
# logger.error(f"Error loading swarm types: {str(e)}")
# raise
# def initialize_default_swarm_types(matcher: SwarmMatcher) -> None:
# """Initialize the matcher with default swarm types"""
# swarm_types = [
# SwarmType(
# name="AgentRearrange",
# description="""
# Optimize agent order and rearrange flow for multi-step tasks, ensuring efficient task allocation
# and minimizing bottlenecks. Specialized in orchestration, coordination, pipeline optimization,
# task scheduling, resource allocation, workflow management, agent organization, and process optimization.
# Best for tasks requiring complex agent interactions and workflow optimization.
# """,
# metadata={
# "category": "optimization",
# "complexity": "high",
# },
# ),
# SwarmType(
# name="MixtureOfAgents",
# description="""
# Combine diverse expert agents for comprehensive analysis, fostering a collaborative approach
# to problem-solving and leveraging individual strengths. Focuses on multi-agent systems,
# expert collaboration, distributed intelligence, collective problem solving, agent specialization,
# team coordination, hybrid approaches, and knowledge synthesis. Ideal for complex problems
# requiring multiple areas of expertise.
# """,
# metadata={
# "category": "collaboration",
# "complexity": "high",
# },
# ),
# SwarmType(
# name="SpreadSheetSwarm",
# description="""
# Collaborative data processing and analysis in a spreadsheet-like environment, facilitating
# real-time data sharing and visualization. Specializes in data analysis, tabular processing,
# collaborative editing, data transformation, spreadsheet operations, data visualization,
# real-time collaboration, and structured data handling. Perfect for data-intensive tasks
# requiring structured analysis.
# """,
# metadata={
# "category": "data_processing",
# "complexity": "medium",
# },
# ),
# SwarmType(
# name="SequentialWorkflow",
# description="""
# Execute tasks in a step-by-step, sequential process workflow, ensuring a logical and methodical
# approach to task execution. Focuses on linear processing, waterfall methodology, step-by-step
# execution, ordered tasks, sequential operations, process flow, systematic approach, and staged
# execution. Best for tasks requiring strict order and dependencies.
# """,
# metadata={"category": "workflow", "complexity": "low"},
# ),
# SwarmType(
# name="ConcurrentWorkflow",
# description="""
# Process multiple tasks or data sources concurrently in parallel, maximizing productivity
# and reducing processing time. Specializes in parallel processing, multi-threading,
# asynchronous execution, distributed computing, concurrent operations, simultaneous tasks,
# parallel workflows, and scalable processing. Ideal for independent tasks that can be
# processed simultaneously.
# """,
# metadata={"category": "workflow", "complexity": "medium"},
# ),
# ]
# matcher.add_swarm_types(swarm_types)
# logger.info("Initialized default swarm types")
# def create_swarm_matcher(
# persist_dir: str = "./chroma_db",
# collection_name: str = "swarm_types",
# ) -> SwarmMatcher:
# """Convenience function to create and initialize a swarm matcher"""
# config = SwarmMatcherConfig(
# persist_directory=persist_dir, collection_name=collection_name
# )
# matcher = SwarmMatcher(config)
# initialize_default_swarm_types(matcher)
# return matcher
# # Example usage
# def swarm_matcher(task: str) -> str:
# # Create and initialize matcher
# matcher = create_swarm_matcher()
# swarm_type = matcher.auto_select_swarm(task)
# print(f"Task: {task}\nSelected Swarm: {swarm_type}\n")
# return swarm_type
# # # Example usage
# # if __name__ == "__main__":
# # # Create and initialize matcher
# # matcher = create_swarm_matcher()
# # # Example tasks
# # tasks = [
# # "Analyze this spreadsheet of sales data and create visualizations",
# # "Coordinate multiple AI agents to solve a complex problem",
# # "Process these tasks one after another in a specific order",
# # "Write multiple blog posts about the latest advancements in swarm intelligence all at once",
# # "Write a blog post about the latest advancements in swarm intelligence",
# # ]
# # # Process tasks
# # for task in tasks:
# # swarm_type = matcher.auto_select_swarm(task)
# # print(f"Task: {task}\nSelected Swarm: {swarm_type}\n")
|