mcp-server

Paused

App Files Files Community

mcp-server / tests /test_wandb_gql_examples.py

NiWaRe

mcp_base

f647629 3 months ago

raw

history blame contribute delete

11.8 kB

	# import pytest
	# import re
	# import inspect
	# import ast
	# import json
	# import os
	# from src.wandb_mcp_server.server import query_wandb_tool # Assuming src is importable


	# # --- Configuration ---
	# TARGET_ENTITY = "c-metrics"
	# TARGET_PROJECT = "hallucination"

	# # --- Helper Function to Extract Examples ---

	# def extract_gql_examples_from_docstring(docstring):
	# """Parses a docstring to extract GraphQL examples marked by specific delimiters."""
	# examples = []
	# # Regex to find the blocks delimited by <!-- WANDB_GQL_EXAMPLE_START/END -->
	# # Restore original regex with backreference
	# example_pattern = re.compile(
	# r'<!-- WANDB_GQL_EXAMPLE_START name=(\w+) -->(.*?)<!-- WANDB_GQL_EXAMPLE_END name=\1 -->', # Restored \1
	# re.DOTALL
	# )
	# # Regex to find graphql code blocks
	# graphql_pattern = re.compile(r'\s```graphql\s\n(.?)\n\s```', re.DOTALL)
	# # Regex to find python code blocks
	# python_pattern = re.compile(r'\s```python\s\n(.?)\n\s```', re.DOTALL)

	# # --- DEBUGGING ---
	# print(f"\n>>> DEBUG: Inside extract_gql_examples_from_docstring")
	# print(f" Attempting to find matches with pattern: {example_pattern.pattern}")
	# print(f" in docstring of length {len(docstring)}")
	# matches_found = 0
	# # --- END DEBUGGING ---

	# for match in example_pattern.finditer(docstring):
	# # --- DEBUGGING ---
	# matches_found += 1
	# print(f" >>> Found match {matches_found}: name='{match.group(1)}'")
	# # --- END DEBUGGING ---

	# name = match.group(1)
	# content = match.group(2)

	# # --- DEBUGGING ---
	# print(f" --- Content for '{name}' start ---")
	# print(content)
	# print(f" --- Content for '{name}' end ---")
	# # --- END DEBUGGING ---

	# graphql_match = graphql_pattern.search(content)
	# python_match = python_pattern.search(content)

	# if graphql_match and python_match:
	# query = graphql_match.group(1).strip()
	# # Extract the python code string, removing comments if necessary for exec
	# variables_code_str = python_match.group(1).strip()
	# # Remove comments starting with # to avoid issues with exec
	# variables_code_str = re.sub(r'^#.*$', '', variables_code_str, flags=re.MULTILINE).strip()

	# # Attempt to parse the variable assignment part more robustly if it's simple
	# try:
	# # A simple approach might assume the last line is `variables = ...`
	# # More robustly, find the assignment
	# assignment_match = re.search(r'variables\s=\s(\{.*?\})', variables_code_str, re.DOTALL)
	# variables_dict_code = assignment_match.group(1) if assignment_match else variables_code_str
	# # --- DEBUGGING ---
	# print(f" >>> Appending example: {name}")
	# # --- END DEBUGGING ---
	# examples.append({
	# "name": name,
	# "query": query,
	# "variables_code": variables_dict_code # Store the code string for the dict/assignment
	# })
	# except Exception as e:
	# print(f"Warning: Could not parse variables for example '{name}'. Error: {e}")
	# # Decide if you want to skip or add with None/error marker
	# # examples.append({"name": name, "query": query, "variables_code": None, "error": str(e)})

	# # --- DEBUGGING ---
	# print(f" Finished finditer loop. Total matches found: {matches_found}")
	# print(f"<<< DEBUG: Exiting extract_gql_examples_from_docstring\n")
	# # --- END DEBUGGING ---

	# if not examples:
	# raise ValueError("No examples found in docstring. Check delimiters and file content.")

	# return examples

	# # --- Pytest Fixture for Loading Examples ---
	# @pytest.fixture(scope="session")
	# def gql_examples():
	# """Reads the target function's docstring and extracts GQL examples."""
	# try:
	# target_docstring = inspect.getdoc(query_wandb_tool)
	# if not target_docstring:
	# raise ImportError(f"Could not get docstring for query_wandb_tool.")

	# # --- DEBUGGING: Print the retrieved docstring ---
	# print("\n--- Retrieved Docstring by inspect.getdoc() ---")
	# print(target_docstring)
	# print("--- End of Retrieved Docstring ---\n")
	# # --- END DEBUGGING ---

	# extracted = extract_gql_examples_from_docstring(target_docstring)
	# # Filter out examples where variables couldn't be parsed if the helper function indicates so
	# valid_examples = [ex for ex in extracted if ex.get("variables_code")]
	# if not valid_examples:
	# raise ValueError("No valid examples with variable code found after parsing.")
	# return valid_examples
	# except Exception as e:
	# # pytest will report this error during fixture setup
	# pytest.fail(f"Failed to setup gql_examples fixture: {e}", pytrace=False)

	# _example_names = []
	# try:
	# # Attempt to pre-load examples just to get names for parameterization
	# # Note: This duplicates loading but simplifies parametrize setup
	# # The fixture ensures the main test execution uses the proper setup/cached result.
	# _target_docstring = inspect.getdoc(query_wandb_tool)
	# if not _target_docstring:
	# raise ImportError("Docstring not found at collection time.")
	# _extracted_examples = extract_gql_examples_from_docstring(_target_docstring)
	# _example_names = [ex["name"] for ex in _extracted_examples if ex.get("variables_code")]
	# if not _example_names:
	# raise ValueError("No valid example names found at collection time.")
	# except Exception as e:
	# print(f"Warning during test collection: Could not pre-load example names - {e}")
	# # If collection fails to get names, the test function relying on the fixture
	# # will fail later during setup/execution, which is acceptable.
	# _example_names = ["SETUP_ERROR_DURING_COLLECTION"] # Provide a placeholder


	# # --- Test Function ---

	# # Apply the live_api marker
	# @pytest.mark.live_api
	# @pytest.mark.parametrize(
	# "name", # Parametrize only by the example name
	# _example_names
	# )
	# def test_wandb_gql_example(name, gql_examples): # Inject fixture here, remove query/variables_code
	# """Runs a test for each extracted GraphQL example using live API calls."""

	# if name == "SETUP_ERROR_DURING_COLLECTION":
	# pytest.fail("Test collection could not determine example names. Check setup.")

	# # Find the correct example data from the fixture result based on the parameterized name
	# example_data = next((ex for ex in gql_examples if ex['name'] == name), None)
	# if not example_data:
	# pytest.fail(f"Could not find example data for name '{name}' in gql_examples fixture result.")

	# # Use the data looked up from the fixture
	# query = example_data["query"]
	# variables_code = example_data["variables_code"]

	# # The rest of the test logic remains largely the same...
	# print(f"\nRunning test for example: {name}")
	# print(f"Query:\n{query}")
	# print(f"Variables Code:\n{variables_code}")

	# variables = {}
	# try:
	# # Execute the Python code string to get the variables dictionary.
	# # Reverting to exec as ast.literal_eval cannot handle nested strings required for JSON literals.
	# local_scope = {'json': json} # Provide json module in the execution scope
	# # The variable `variables_code` should contain the raw python code from the docstring block
	# exec(variables_code, local_scope)

	# # Check if 'variables' was defined in the executed code
	# if 'variables' not in local_scope:
	# raise NameError("Executed code snippet did not define a 'variables' dictionary.")

	# variables = local_scope['variables']

	# if not isinstance(variables, dict):
	# raise TypeError(f"Executed code defined 'variables', but it is not a dictionary. Got: {type(variables)}")

	# print(f"Original Variables: {variables}")

	# # Override entity and project for the test run
	# # Check if the keys exist before assigning, especially for mutations
	# if 'entity' in variables or name.endswith('Info') or name.endswith('Runs') or name.endswith('Keys') or name.endswith('Sampled') or name.endswith('Details'):
	# variables['entity'] = TARGET_ENTITY
	# if 'project' in variables or name.endswith('Info') or name.endswith('Runs') or name.endswith('Keys') or name.endswith('Sampled') or name.endswith('Details'):
	# variables['project'] = TARGET_PROJECT
	# # Handle entityName/projectName variants if needed
	# if 'entityName' in variables:
	# variables['entityName'] = TARGET_ENTITY
	# if 'projectName' in variables:
	# variables['projectName'] = TARGET_PROJECT

	# # Specific override for GetArtifactDetails test
	# if name == 'GetArtifactDetails':
	# # Use the specific artifact name provided by the user
	# variables['artifactName'] = "c-metrics/hallucination/SmolLM2-360M-sft-hallu:v12"
	# print(f" Overriding artifactName for {name} test.") # Debug print

	# # Handle mutations which might not have standard entity/project vars
	# if name == 'UpsertProject' or name == 'CreateProject':
	# # Ensure the mutation targets the test entity, adjust name if needed
	# variables['entity'] = TARGET_ENTITY
	# variables['name'] = f"{TARGET_PROJECT}-test-upsert" # Avoid conflicts


	# # Handle cases where limit might be needed but not in example vars (like mutations)
	# # For mutations, the tool itself might not use max_items, depends on implementation
	# # For queries, ensure a reasonable limit if not present? Or rely on tool default.
	# # Let's rely on the tool's default `max_items` for now.

	# print(f"Modified Variables: {variables}")


	# except Exception as e:
	# pytest.fail(f"Failed to execute or modify variables code for example '{name}': {e}\nCode: {variables_code}")

	# # --- Make the Live API Call ---
	# try:
	# # Use default max_items and items_per_page from the tool's signature
	# result = query_wandb_tool(query=query, variables=variables)

	# print(f"API Result for {name}: {result}")

	# # --- Assertions ---
	# assert isinstance(result, dict), f"Expected result to be a dictionary, got {type(result)}"

	# # Check specifically for the 'errors' key which indicates GraphQL level errors
	# if 'errors' in result:
	# # Sometimes 'errors' is present but None or empty list, check content
	# error_content = result.get('errors')
	# assert not error_content, f"GraphQL API returned errors for example '{name}': {error_content}"

	# # Optional: Add more specific checks based on the query name if needed
	# # e.g., if name == "GetProjectInfo": assert "project" in result.get("data", {})

	# except Exception as e:
	# pytest.fail(f"query_wandb_tool raised an exception for example '{name}': {e}")

	# # Note: This test makes live calls to the W&B API. Ensure:
	# # 1. You are logged into W&B (e.g., via `wandb login`).
	# # 2. The target project (c-metrics/hallucination) exists and is accessible.
	# # 3. Network connectivity is available.
	# # 4. Be mindful of API rate limits if running frequently.
	# # To run only these tests: pytest -m live_api
	# # To skip these tests: pytest -m "not live_api"