# tools created using Phi2

import json
import os

import requests
from langchain.tools import tool

import spaces
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
from threading import Thread
device = "cpu"
if torch.cuda.is_available():
    device = "cuda"
if torch.backends.mps.is_available():
    device = "mps"


tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    "microsoft/phi-2",
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
    trust_remote_code=True,
).to(device)


#@spaces.GPU(enable_queue=True)
class Phi2SearchTools():
  @tool("Phi2 Normal")
  def phi2_search(text, temperature=.75, maxLen=2048):
    """
    Searches for content based on the provided query using the Gemini model.
    Handles DeadlineExceeded exceptions from the Google API.
    Args:
        query (str): The search query.
    Returns:
        str: The response text from the Gemini model or an error message.
    """
    inputs = tokenizer([text], return_tensors="pt").to(device)
    streamer = TextIteratorStreamer(tokenizer)
    generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=maxLen, temperature=temperature)
    thread = Thread(target=model.generate, kwargs=generation_kwargs)
    thread.start()
    t = ""
    toks = 0
    for out in streamer:
        t += out
        yield t