File size: 6,181 Bytes
bc53764
 
 
 
 
 
 
 
 
 
 
 
 
84d493b
 
 
 
 
 
bc53764
 
af9f214
bc53764
af9f214
2c23988
 
 
 
 
 
c499f04
b8b0b89
84d493b
53a3a44
84d493b
438881a
af9f214
bc53764
 
84d493b
 
 
af9f214
bc53764
 
 
 
 
 
 
 
 
 
 
 
 
 
af9f214
 
 
 
 
 
 
 
 
 
bc53764
 
 
 
 
 
 
 
 
 
953ea93
af9f214
 
 
 
 
 
bc53764
 
 
 
 
 
 
af9f214
 
 
bc53764
af9f214
 
 
 
 
 
 
 
84d493b
 
 
 
 
568e154
84d493b
 
da3ee0e
84d493b
 
 
 
 
53a3a44
84d493b
 
568e154
84d493b
9e65eb2
 
84d493b
 
 
 
 
 
568e154
 
84d493b
568e154
 
84d493b
9e65eb2
84d493b
 
 
9e65eb2
84d493b
 
568e154
 
1eb5c78
 
84d493b
568e154
84d493b
 
 
 
 
 
 
 
568e154
 
1eb5c78
fa0a849
568e154
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
"""
Module: custom_agent

This module provides a custom class, CustomHfAgent, for interacting with the Hugging Face model API.

Dependencies:
- time: Standard Python time module for time-related operations.
- requests: HTTP library for making requests.
- transformers: Hugging Face's transformers library for NLP tasks.
- utils.logger: Custom logger module for logging responses.

Classes:
- CustomHfAgent: A custom class for interacting with the Hugging Face model API.

Reasono for making this https://github.com/huggingface/transformers/issues/28217 
Based on https://github.com/huggingface/transformers/blob/main/src/transformers/tools/agents.py

"return_full_text": False,

"""

import time
import requests
from transformers import Agent
#from transformers.tools.prompts import CHAT_MESSAGE_PROMPT

CHAT_MESSAGE_PROMPT = """
Human: <<task>>

Assistant: """

from utils.logger import log_response
    
from transformers import  AutoTokenizer


class CustomHfAgent(Agent):
    """A custom class for interacting with the Hugging Face model API."""




    def __init__(self, url_endpoint, token, chat_prompt_template=None, run_prompt_template=None, additional_tools=None, input_params=None):
        """
        Initialize the CustomHfAgent.

        Args:
        - url_endpoint (str): The URL endpoint for the Hugging Face model API.
        - token (str): The authentication token required to access the API.
        - chat_prompt_template (str): Template for chat prompts.
        - run_prompt_template (str): Template for run prompts.
        - additional_tools (list): Additional tools for the agent.
        - input_params (dict): Additional parameters for input.

        Returns:
        - None
        """
        super().__init__(
            chat_prompt_template=chat_prompt_template,
            run_prompt_template=run_prompt_template,
            additional_tools=additional_tools,
        )
        self.url_endpoint = url_endpoint
        self.token = token
        self.input_params = input_params

    def generate_one(self, prompt, stop):
        """
        Generate one response from the Hugging Face model.

        Args:
        - prompt (str): The prompt to generate a response for.
        - stop (list): A list of strings indicating where to stop generating text.

        Returns:
        - str: The generated response.
        """
        headers = {"Authorization": "Bearer " +self.token}
        max_new_tokens = self.input_params.get("max_new_tokens", 192)
        parameters = {"max_new_tokens": max_new_tokens, "return_full_text": False, "stop": stop, "padding": True, "truncation": True}
        inputs = {
            "inputs": prompt,
            "parameters": parameters,
        }
        print(inputs)
        try:
            response = requests.post(self.url_endpoint, json=inputs, headers=headers, timeout=300)
        except requests.Timeout:
            pass
        except requests.ConnectionError:
            pass
        if response.status_code == 429:
            log_response("Getting rate-limited, waiting a tiny bit before trying again.")
            time.sleep(1)
            return self.generate_one(prompt, stop)
        elif response.status_code != 200:
            raise ValueError(f"Errors {inputs} {response.status_code}: {response.json()}")
        log_response(response)
        result = response.json()[0]["generated_text"]
        for stop_seq in stop:
            if result.endswith(stop_seq):
                return result[: -len(stop_seq)]
        return result
###
    ### 
    ### https://github.com/huggingface/transformers/blob/main/src/transformers/tools/prompts.py -> run chat_template.txt 
    ### https://huggingface.co/datasets/huggingface-tools/default-prompts/blob/main/chat_prompt_template.txt
    ###
    def format_prompt(self, task, chat_mode=False):

        checkpoint = "bigcode/starcoder"
        tokenizer = AutoTokenizer.from_pretrained(checkpoint, token = self.token)
        #model = AutoModelForCausalLM.from_pretrained(checkpoint)  # You may want to use bfloat16 and/or move to GPU here

 

        description = "\n".join([f"- {name}: {tool.description}" for name, tool in self.toolbox.items()])
        
        if chat_mode:
            if self.chat_history is None:
                print("no histroy yet ")
                prompt = self.chat_prompt_template.replace("<<all_tools>>", description)
                prompt += CHAT_MESSAGE_PROMPT.replace("<<task>>", task)

                messages = [
                    {
                        "role": "user",
                        "content": prompt,
                    } 
                ]
                print("tokenized "+tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False))
            #    prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
            else:
                print(" chat histroy ")
                print(self.chat_history)
                prompt = self.chat_history
                prompt += CHAT_MESSAGE_PROMPT.replace("<<task>>", task)
                messages = [
                    {
                        "role": "user",
                        "content": prompt,
                    } 
                ]
                print("tokenized "+tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False))

          #      prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
          ##  prompt 
        else:
            print("else block not chat mode ")
            prompt = self.run_prompt_template.replace("<<all_tools>>", description)
            prompt = prompt.replace("<<prompt>>", task)
            messages = [
                    {
                        "role": "user",
                        "content": prompt,
                    } 
                ]
            print("tokenized "+tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False))

           # prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
        print("formatted propmpt ---- " + prompt)
        return prompt