SOAPAssistV00

Runtime error

File size: 5,412 Bytes

35b22df

"""Experiment with different indices, models, and more."""
from __future__ import annotations

import time
from typing import Any, Dict, List, Optional, Type, Union

import pandas as pd
from langchain.input import get_color_mapping, print_text

from gpt_index.indices.base import BaseGPTIndex
from gpt_index.indices.list.base import GPTListIndex
from gpt_index.indices.tree.base import GPTTreeIndex
from gpt_index.indices.vector_store import GPTSimpleVectorIndex
from gpt_index.readers.schema.base import Document

DEFAULT_INDEX_CLASSES = [GPTSimpleVectorIndex, GPTTreeIndex, GPTListIndex]
DEFAULT_MODES = ["default", "summarize", "embedding", "retrieve", "recursive"]


class Playground:
    """Experiment with indices, models, embeddings, modes, and more."""

    def __init__(self, indices: List[BaseGPTIndex], modes: List[str] = DEFAULT_MODES):
        """Initialize with indices to experiment with.

        Args:
            indices: A list of BaseGPTIndex's to experiment with
            modes: A list of modes that specify which nodes are chosen
                from the index when a query is made. A full list of modes
                available to each index can be found here:
                https://gpt-index.readthedocs.io/en/latest/reference/query.html
        """
        self._validate_indices(indices)
        self._indices = indices
        self._validate_modes(modes)
        self._modes = modes

        index_range = [str(i) for i in range(len(indices))]
        self.index_colors = get_color_mapping(index_range)

    @classmethod
    def from_docs(
        cls,
        documents: List[Document],
        index_classes: List[Type[BaseGPTIndex]] = DEFAULT_INDEX_CLASSES,
        **kwargs: Any,
    ) -> Playground:
        """Initialize with Documents using the default list of indices.

        Args:
            documents: A List of Documents to experiment with.
        """
        if len(documents) == 0:
            raise ValueError(
                "Playground must be initialized with a nonempty list of Documents."
            )

        indices = [index_class(documents) for index_class in index_classes]
        return cls(indices, **kwargs)

    def _validate_indices(self, indices: List[BaseGPTIndex]) -> None:
        """Validate a list of indices."""
        if len(indices) == 0:
            raise ValueError("Playground must have a non-empty list of indices.")
        for index in indices:
            if not isinstance(index, BaseGPTIndex):
                raise ValueError(
                    "Every index in Playground should be an instance of BaseGPTIndex."
                )

    @property
    def indices(self) -> List[BaseGPTIndex]:
        """Get Playground's indices."""
        return self._indices

    @indices.setter
    def indices(self, indices: List[BaseGPTIndex]) -> None:
        """Set Playground's indices."""
        self._validate_indices(indices)
        self._indices = indices

    def _validate_modes(self, modes: List[str]) -> None:
        """Validate a list of modes."""
        if len(modes) == 0:
            raise ValueError(
                "Playground must have a nonzero number of modes."
                "Initialize without the `modes` argument to use the default list."
            )

    @property
    def modes(self) -> List[str]:
        """Get Playground's indices."""
        return self._modes

    @modes.setter
    def modes(self, modes: List[str]) -> None:
        """Set Playground's indices."""
        self._validate_modes(modes)
        self._modes = modes

    def compare(
        self, query_text: str, to_pandas: Optional[bool] = True
    ) -> Union[pd.DataFrame, List[Dict[str, Any]]]:
        """Compare index outputs on an input query.

        Args:
            query_text (str): Query to run all indices on.
            to_pandas (Optional[bool]): Return results in a pandas dataframe.
                True by default.

        Returns:
            The output of each index along with other data, such as the time it took to
            compute. Results are stored in a Pandas Dataframe or a list of Dicts.
        """
        print(f"\033[1mQuery:\033[0m\n{query_text}\n")
        print(f"Trying {len(self._indices) * len(self._modes)} combinations...\n\n")
        result = []
        for i, index in enumerate(self._indices):
            for mode in self._modes:
                if mode not in index.get_query_map():
                    continue
                start_time = time.time()

                index_name = type(index).__name__
                print_text(f"\033[1m{index_name}\033[0m, mode = {mode}", end="\n")
                output = index.query(query_text, mode=mode)
                print_text(str(output), color=self.index_colors[str(i)], end="\n\n")

                duration = time.time() - start_time

                result.append(
                    {
                        "Index": index_name,
                        "Mode": mode,
                        "Output": str(output),
                        "Duration": duration,
                        "LLM Tokens": index.llm_predictor.last_token_usage,
                        "Embedding Tokens": index.embed_model.last_token_usage,
                    }
                )
        print(f"\nRan {len(result)} combinations in total.")

        if to_pandas:
            return pd.DataFrame(result)
        else:
            return result