# Copyright 2022 DeepMind Technologies Limited. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Combines all steps of compiling a RASP program."""

from typing import Set

from tracr.compiler import assemble
from tracr.compiler import basis_inference
from tracr.compiler import craft_graph_to_model
from tracr.compiler import craft_model_to_transformer
from tracr.compiler import expr_to_craft_graph
from tracr.compiler import rasp_to_graph
from tracr.craft import bases
from tracr.rasp import rasp

COMPILER_BOS = "compiler_bos"
COMPILER_PAD = "compiler_pad"


def compile_rasp_to_model(
    program: rasp.SOp,
    vocab: Set[rasp.Value],
    max_seq_len: int,
    causal: bool = False,
    compiler_bos: str = COMPILER_BOS,
    compiler_pad: str = COMPILER_PAD,
    mlp_exactness: int = 100) -> assemble.AssembledTransformerModel:
  """Compile a RASP program to transformer weights.

  Args:
    program: the RASP program to compile.
    vocab: the set of vocab tokens expected by RASP.
    max_seq_len: the maximum sequence length for the compiled model.
    causal: if True, outputs a model with causal masking.
    compiler_bos: the name of the special BOS token that will be added by the
      compiler. Must not be present in the vocab.
    compiler_pad: the name of the special PAD token that will be added by the
      compiler. Must not be present in the vocab.
    mlp_exactness: Controls the approximation of the MLP layers. In theory,
      larger values yield a better approximation. But too large values can cause
      numerical issues due to large parameter norms. Reasonable values are
      between 1 and 100.

  Returns:
    The compiled model.
  """

  if compiler_bos in vocab:
    raise ValueError("Compiler BOS token must not be present in the vocab. "
                     f"Found '{compiler_bos}' in {vocab}")

  if compiler_pad in vocab:
    raise ValueError("Compiler PAD token must not be present in the vocab. "
                     f"Found '{compiler_pad}' in {vocab}")

  extracted = rasp_to_graph.extract_rasp_graph(program)
  graph, sources, sink = extracted.graph, extracted.sources, extracted.sink

  basis_inference.infer_bases(
      graph,
      sink,
      vocab,
      max_seq_len,
  )

  expr_to_craft_graph.add_craft_components_to_rasp_graph(
      graph,
      bos_dir=bases.BasisDirection(rasp.tokens.label, compiler_bos),
      mlp_exactness=mlp_exactness,
  )

  craft_model = craft_graph_to_model.craft_graph_to_model(graph, sources)

  return craft_model_to_transformer.craft_model_to_transformer(
      craft_model=craft_model,
      graph=graph,
      sink=sink,
      max_seq_len=max_seq_len,
      causal=causal,
      compiler_bos=compiler_bos,
      compiler_pad=compiler_pad,
  )