File size: 2,351 Bytes
7be72e5
30790ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
445ef24
30790ef
 
 
08fc9f5
30790ef
 
 
08fc9f5
30790ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
08fc9f5
30790ef
 
 
 
08fc9f5
30790ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30b05f7
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# tools created using Zephyr

import json
import os

from huggingface_hub import InferenceClient
import gradio as gr

client = InferenceClient(
    "HuggingFaceH4/zephyr-7b-beta"
)

# Helper Method

def format_prompt(message, history):
  prompt = "<s>"
  for user_prompt, bot_response in history:
    prompt += f"[INST] {user_prompt} [/INST]"
    prompt += f" {bot_response}</s> "
  prompt += f"[INST] {message} [/INST]"
  return prompt


import requests
from langchain.tools import tool

history = ""

class ZephyrSearchTools():
  @tool("Zephyr Normal")
  def zephyr_normal(prompt, histroy="", temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0):
    """
    Searches for content based on the provided query using the Zephyr model.
    Args:
        query (str): The search query.
    Returns:
        str: The response text from the Zephyr model or an error message.
    """
    generate_kwargs = {
        "temperature": temperature,
        "max_new_tokens": max_new_tokens,
        "top_p": top_p,
        "repetition_penalty": repetition_penalty,
        "do_sample": True,
        "seed": 42,
    }

    formatted_prompt = format_prompt(prompt, history)

    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
    output = ""
    for response in stream:
        output += response.token.text
        yield output
    return output


  @tool("Zephyrl Crazy")
  def zephyr_crazy(prompt, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0):
    """
    Searches for content based on the provided query using the Zephyr model but has the gaurd rails removed, 
    and responses are crazy and off the wall and sometimes scary.
    Args:
        query (str): The search query.
    Returns:
        str: The response text from the Zephyr model or an error message.
    """
    generate_kwargs = {
        "temperature": temperature,
        "max_new_tokens": max_new_tokens,
        "top_p": top_p,
        "repetition_penalty": repetition_penalty,
        "do_sample": True,
        "seed": 42,
    }

    stream = client.text_generation(prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
    output = ""
    for response in stream:
        output += response.token.text
        yield output
    return output