mcp-server / tests /test_wandb_gql_examples.py
NiWaRe's picture
mcp_base
f647629
# import pytest
# import re
# import inspect
# import ast
# import json
# import os
# from src.wandb_mcp_server.server import query_wandb_tool # Assuming src is importable
# # --- Configuration ---
# TARGET_ENTITY = "c-metrics"
# TARGET_PROJECT = "hallucination"
# # --- Helper Function to Extract Examples ---
# def extract_gql_examples_from_docstring(docstring):
# """Parses a docstring to extract GraphQL examples marked by specific delimiters."""
# examples = []
# # Regex to find the blocks delimited by <!-- WANDB_GQL_EXAMPLE_START/END -->
# # Restore original regex with backreference
# example_pattern = re.compile(
# r'<!-- WANDB_GQL_EXAMPLE_START name=(\w+) -->(.*?)<!-- WANDB_GQL_EXAMPLE_END name=\1 -->', # Restored \1
# re.DOTALL
# )
# # Regex to find graphql code blocks
# graphql_pattern = re.compile(r'\s*```graphql\s*\n(.*?)\n\s*```', re.DOTALL)
# # Regex to find python code blocks
# python_pattern = re.compile(r'\s*```python\s*\n(.*?)\n\s*```', re.DOTALL)
# # --- DEBUGGING ---
# print(f"\n>>> DEBUG: Inside extract_gql_examples_from_docstring")
# print(f" Attempting to find matches with pattern: {example_pattern.pattern}")
# print(f" in docstring of length {len(docstring)}")
# matches_found = 0
# # --- END DEBUGGING ---
# for match in example_pattern.finditer(docstring):
# # --- DEBUGGING ---
# matches_found += 1
# print(f" >>> Found match {matches_found}: name='{match.group(1)}'")
# # --- END DEBUGGING ---
# name = match.group(1)
# content = match.group(2)
# # --- DEBUGGING ---
# print(f" --- Content for '{name}' start ---")
# print(content)
# print(f" --- Content for '{name}' end ---")
# # --- END DEBUGGING ---
# graphql_match = graphql_pattern.search(content)
# python_match = python_pattern.search(content)
# if graphql_match and python_match:
# query = graphql_match.group(1).strip()
# # Extract the python code string, removing comments if necessary for exec
# variables_code_str = python_match.group(1).strip()
# # Remove comments starting with # to avoid issues with exec
# variables_code_str = re.sub(r'^#.*$', '', variables_code_str, flags=re.MULTILINE).strip()
# # Attempt to parse the variable assignment part more robustly if it's simple
# try:
# # A simple approach might assume the last line is `variables = ...`
# # More robustly, find the assignment
# assignment_match = re.search(r'variables\s*=\s*(\{.*?\})', variables_code_str, re.DOTALL)
# variables_dict_code = assignment_match.group(1) if assignment_match else variables_code_str
# # --- DEBUGGING ---
# print(f" >>> Appending example: {name}")
# # --- END DEBUGGING ---
# examples.append({
# "name": name,
# "query": query,
# "variables_code": variables_dict_code # Store the code string for the dict/assignment
# })
# except Exception as e:
# print(f"Warning: Could not parse variables for example '{name}'. Error: {e}")
# # Decide if you want to skip or add with None/error marker
# # examples.append({"name": name, "query": query, "variables_code": None, "error": str(e)})
# # --- DEBUGGING ---
# print(f" Finished finditer loop. Total matches found: {matches_found}")
# print(f"<<< DEBUG: Exiting extract_gql_examples_from_docstring\n")
# # --- END DEBUGGING ---
# if not examples:
# raise ValueError("No examples found in docstring. Check delimiters and file content.")
# return examples
# # --- Pytest Fixture for Loading Examples ---
# @pytest.fixture(scope="session")
# def gql_examples():
# """Reads the target function's docstring and extracts GQL examples."""
# try:
# target_docstring = inspect.getdoc(query_wandb_tool)
# if not target_docstring:
# raise ImportError(f"Could not get docstring for query_wandb_tool.")
# # --- DEBUGGING: Print the retrieved docstring ---
# print("\n--- Retrieved Docstring by inspect.getdoc() ---")
# print(target_docstring)
# print("--- End of Retrieved Docstring ---\n")
# # --- END DEBUGGING ---
# extracted = extract_gql_examples_from_docstring(target_docstring)
# # Filter out examples where variables couldn't be parsed if the helper function indicates so
# valid_examples = [ex for ex in extracted if ex.get("variables_code")]
# if not valid_examples:
# raise ValueError("No valid examples with variable code found after parsing.")
# return valid_examples
# except Exception as e:
# # pytest will report this error during fixture setup
# pytest.fail(f"Failed to setup gql_examples fixture: {e}", pytrace=False)
# _example_names = []
# try:
# # Attempt to pre-load examples just to get names for parameterization
# # Note: This duplicates loading but simplifies parametrize setup
# # The fixture ensures the main test execution uses the proper setup/cached result.
# _target_docstring = inspect.getdoc(query_wandb_tool)
# if not _target_docstring:
# raise ImportError("Docstring not found at collection time.")
# _extracted_examples = extract_gql_examples_from_docstring(_target_docstring)
# _example_names = [ex["name"] for ex in _extracted_examples if ex.get("variables_code")]
# if not _example_names:
# raise ValueError("No valid example names found at collection time.")
# except Exception as e:
# print(f"Warning during test collection: Could not pre-load example names - {e}")
# # If collection fails to get names, the test function relying on the fixture
# # will fail later during setup/execution, which is acceptable.
# _example_names = ["SETUP_ERROR_DURING_COLLECTION"] # Provide a placeholder
# # --- Test Function ---
# # Apply the live_api marker
# @pytest.mark.live_api
# @pytest.mark.parametrize(
# "name", # Parametrize only by the example name
# _example_names
# )
# def test_wandb_gql_example(name, gql_examples): # Inject fixture here, remove query/variables_code
# """Runs a test for each extracted GraphQL example using live API calls."""
# if name == "SETUP_ERROR_DURING_COLLECTION":
# pytest.fail("Test collection could not determine example names. Check setup.")
# # Find the correct example data from the fixture result based on the parameterized name
# example_data = next((ex for ex in gql_examples if ex['name'] == name), None)
# if not example_data:
# pytest.fail(f"Could not find example data for name '{name}' in gql_examples fixture result.")
# # Use the data looked up from the fixture
# query = example_data["query"]
# variables_code = example_data["variables_code"]
# # The rest of the test logic remains largely the same...
# print(f"\nRunning test for example: {name}")
# print(f"Query:\n{query}")
# print(f"Variables Code:\n{variables_code}")
# variables = {}
# try:
# # Execute the Python code string to get the variables dictionary.
# # Reverting to exec as ast.literal_eval cannot handle nested strings required for JSON literals.
# local_scope = {'json': json} # Provide json module in the execution scope
# # The variable `variables_code` should contain the raw python code from the docstring block
# exec(variables_code, local_scope)
# # Check if 'variables' was defined in the executed code
# if 'variables' not in local_scope:
# raise NameError("Executed code snippet did not define a 'variables' dictionary.")
# variables = local_scope['variables']
# if not isinstance(variables, dict):
# raise TypeError(f"Executed code defined 'variables', but it is not a dictionary. Got: {type(variables)}")
# print(f"Original Variables: {variables}")
# # Override entity and project for the test run
# # Check if the keys exist before assigning, especially for mutations
# if 'entity' in variables or name.endswith('Info') or name.endswith('Runs') or name.endswith('Keys') or name.endswith('Sampled') or name.endswith('Details'):
# variables['entity'] = TARGET_ENTITY
# if 'project' in variables or name.endswith('Info') or name.endswith('Runs') or name.endswith('Keys') or name.endswith('Sampled') or name.endswith('Details'):
# variables['project'] = TARGET_PROJECT
# # Handle entityName/projectName variants if needed
# if 'entityName' in variables:
# variables['entityName'] = TARGET_ENTITY
# if 'projectName' in variables:
# variables['projectName'] = TARGET_PROJECT
# # Specific override for GetArtifactDetails test
# if name == 'GetArtifactDetails':
# # Use the specific artifact name provided by the user
# variables['artifactName'] = "c-metrics/hallucination/SmolLM2-360M-sft-hallu:v12"
# print(f" Overriding artifactName for {name} test.") # Debug print
# # Handle mutations which might not have standard entity/project vars
# if name == 'UpsertProject' or name == 'CreateProject':
# # Ensure the mutation targets the test entity, adjust name if needed
# variables['entity'] = TARGET_ENTITY
# variables['name'] = f"{TARGET_PROJECT}-test-upsert" # Avoid conflicts
# # Handle cases where limit might be needed but not in example vars (like mutations)
# # For mutations, the tool itself might not use max_items, depends on implementation
# # For queries, ensure a reasonable limit if not present? Or rely on tool default.
# # Let's rely on the tool's default `max_items` for now.
# print(f"Modified Variables: {variables}")
# except Exception as e:
# pytest.fail(f"Failed to execute or modify variables code for example '{name}': {e}\nCode: {variables_code}")
# # --- Make the Live API Call ---
# try:
# # Use default max_items and items_per_page from the tool's signature
# result = query_wandb_tool(query=query, variables=variables)
# print(f"API Result for {name}: {result}")
# # --- Assertions ---
# assert isinstance(result, dict), f"Expected result to be a dictionary, got {type(result)}"
# # Check specifically for the 'errors' key which indicates GraphQL level errors
# if 'errors' in result:
# # Sometimes 'errors' is present but None or empty list, check content
# error_content = result.get('errors')
# assert not error_content, f"GraphQL API returned errors for example '{name}': {error_content}"
# # Optional: Add more specific checks based on the query name if needed
# # e.g., if name == "GetProjectInfo": assert "project" in result.get("data", {})
# except Exception as e:
# pytest.fail(f"query_wandb_tool raised an exception for example '{name}': {e}")
# # Note: This test makes live calls to the W&B API. Ensure:
# # 1. You are logged into W&B (e.g., via `wandb login`).
# # 2. The target project (c-metrics/hallucination) exists and is accessible.
# # 3. Network connectivity is available.
# # 4. Be mindful of API rate limits if running frequently.
# # To run only these tests: pytest -m live_api
# # To skip these tests: pytest -m "not live_api"