File size: 11,825 Bytes
567930d
d0291ae
806953a
e91ac58
 
d174ce3
806953a
d0291ae
806953a
e91ac58
 
806953a
b8abf64
567930d
e91ac58
ae215ea
e91ac58
 
 
 
 
 
 
 
 
 
 
d0291ae
 
 
 
 
 
 
 
 
 
 
 
e91ac58
 
 
 
 
 
 
 
567930d
ae215ea
 
 
 
 
e91ac58
 
 
 
567930d
 
e91ac58
 
 
 
 
 
 
 
 
 
 
 
 
 
567930d
 
 
 
 
 
 
 
 
 
 
e91ac58
 
567930d
d174ce3
e91ac58
567930d
 
 
 
e91ac58
 
 
 
 
 
 
 
 
 
567930d
 
e91ac58
 
 
 
 
567930d
 
e91ac58
 
 
 
d0291ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e91ac58
 
567930d
 
 
 
 
d0291ae
e91ac58
d0291ae
 
 
 
 
e91ac58
 
 
 
d174ce3
e91ac58
 
 
 
 
567930d
e91ac58
d174ce3
 
 
e91ac58
 
 
9d06861
 
e91ac58
567930d
 
e91ac58
 
 
 
 
 
 
 
d0291ae
e91ac58
d0291ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
567930d
e91ac58
 
 
 
 
 
 
 
 
 
 
 
 
9d06861
567930d
 
 
ae215ea
9d06861
ae215ea
9d06861
 
e91ac58
9d06861
e91ac58
 
 
 
567930d
 
9d06861
e91ac58
 
 
 
 
 
 
 
567930d
 
e91ac58
ae215ea
9d06861
e91ac58
 
567930d
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
import os, time, json, typing
from dataclasses import dataclass
# import vertexai
from vertexai.language_models import TextGenerationModel
from vertexai.generative_models._generative_models import HarmCategory, HarmBlockThreshold
from vertexai.language_models import TextGenerationModel
# from vertexai.preview.generative_models import GenerativeModel
from langchain.output_parsers.retry import RetryWithErrorOutputParser
# from langchain.schema import HumanMessage
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
# from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_vertexai import VertexAI
from langchain_core.messages import BaseMessage, HumanMessage

from vouchervision.utils_LLM import SystemLoadMonitor, run_tools, count_tokens, save_individual_prompt, sanitize_prompt
from vouchervision.utils_LLM_JSON_validation import validate_and_align_JSON_keys_with_template

#https://cloud.google.com/vertex-ai/docs/python-sdk/use-vertex-ai-python-sdk
#pip install --upgrade google-cloud-aiplatform
# from google.cloud import aiplatform

#### have to authenticate gcloud 
# gcloud auth login
# gcloud config set project XXXXXXXXX
# https://cloud.google.com/docs/authentication

from pydantic import BaseModel
from langchain_core.prompt_values import PromptValue as BasePromptValue

class PromptValueWrapper(BaseModel):
    prompt_str: str

    def to_string(self) -> str:
        return self.prompt_str

    def to_messages(self):
        return [HumanMessage(content=self.prompt_str)]
    
class GooglePalm2Handler: 

    RETRY_DELAY = 10  # Wait 10 seconds before retrying
    MAX_RETRIES = 3  # Maximum number of retries
    TOKENIZER_NAME = 'gpt-4'
    VENDOR = 'google'
    STARTING_TEMP = 0.5

    def __init__(self, cfg, logger, model_name, JSON_dict_structure, config_vals_for_permutation):
        self.cfg = cfg
        self.tool_WFO = self.cfg['leafmachine']['project']['tool_WFO']
        self.tool_GEO = self.cfg['leafmachine']['project']['tool_GEO']
        self.tool_wikipedia = self.cfg['leafmachine']['project']['tool_wikipedia']

        self.logger = logger
        self.model_name = model_name
        self.JSON_dict_structure = JSON_dict_structure

        self.config_vals_for_permutation = config_vals_for_permutation

        self.monitor = SystemLoadMonitor(logger)

        self.parser = JsonOutputParser()

        # Define the prompt template
        self.prompt = PromptTemplate(
            template="Answer the user query.\n{format_instructions}\n{query}\n",
            input_variables=["query"],
            partial_variables={"format_instructions": self.parser.get_format_instructions()},
        )
        self._set_config()

    def _set_config(self):
        # vertexai.init(project=os.environ['PALM_PROJECT_ID'], location=os.environ['PALM_LOCATION'])
        if self.config_vals_for_permutation:
            self.starting_temp = float(self.config_vals_for_permutation.get('google').get('temperature'))
            self.config = {
                    'max_output_tokens': self.config_vals_for_permutation.get('google').get('max_output_tokens'),
                    'temperature': self.starting_temp,
                    'top_k': self.config_vals_for_permutation.get('google').get('top_k'),
                    'top_p': self.config_vals_for_permutation.get('google').get('top_p'),
                    }
        else:
            self.starting_temp = float(self.STARTING_TEMP)
            self.config = {
                "max_output_tokens": 1024,
                "temperature": self.starting_temp,
                "top_k": 1,
                "top_p": 1.0,
            }
            
        self.temp_increment = float(0.2)
        self.adjust_temp = self.starting_temp   

        self.safety_settings = {
            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
        }
        self._build_model_chain_parser()

    def _adjust_config(self):
        new_temp = self.adjust_temp + self.temp_increment
        if self.json_report:
            self.json_report.set_text(text_main=f'Incrementing temperature from {self.adjust_temp} to {new_temp}')
        self.logger.info(f'Incrementing temperature from {self.adjust_temp} to {new_temp}')
        self.adjust_temp += self.temp_increment
        self.config['temperature'] = self.adjust_temp    

    def _reset_config(self):
        if self.json_report:
            self.json_report.set_text(text_main=f'Resetting temperature from {self.adjust_temp} to {self.starting_temp}')
        self.logger.info(f'Resetting temperature from {self.adjust_temp} to {self.starting_temp}')
        self.adjust_temp = self.starting_temp
        self.config['temperature'] = self.starting_temp   

    # def _build_model_chain_parser(self):
    #     # Instantiate the parser and the retry parser
    #     # self.llm_model = ChatGoogleGenerativeAI(model=self.model_name)
    #     self.llm_model = VertexAI(model=self.model_name,
    #                               max_output_tokens=self.config.get('max_output_tokens'),
    #                               temperature=self.config.get('temperature'),
    #                               top_k=self.config.get('top_k'),
    #                               top_p=self.config.get('top_p'))
        
    #     self.retry_parser = RetryWithErrorOutputParser.from_llm(
    #                                             parser=self.parser,
    #                                             llm=self.llm_model,
    #                                             max_retries=self.MAX_RETRIES)
    #     # Prepare the chain
    #     self.chain = self.prompt | self.call_google_palm2
    def _build_model_chain_parser(self):
        # Instantiate the parser and the retry parser
        self.llm_model = VertexAI(model=self.model_name,
                                  max_output_tokens=self.config.get('max_output_tokens'),
                                  temperature=self.config.get('temperature'),
                                  top_k=self.config.get('top_k'),
                                  top_p=self.config.get('top_p'))

        self.retry_parser = RetryWithErrorOutputParser.from_llm(
            llm=self.llm_model,
            parser=self.parser,
            max_retries=self.MAX_RETRIES
        )

        # Prepare the chain
        self.chain = self.prompt | self.call_google_palm2

    # Define a function to format the input for Google PaLM call
    # https://cloud.google.com/vertex-ai/docs/generative-ai/migrate/migrate-palm-to-gemini?_ga=2.225326234.-1652490527.1705461451&_gac=1.186295771.1706291573.CjwKCAiAzc2tBhA6EiwArv-i6QCpx7xTP0yrBy9KKSwno3QXOWUe14mbp9RGZO0ShcbtFqyXii2PnRoCywgQAvD_BwE
    def call_google_palm2(self, prompt_text):
        model = TextGenerationModel.from_pretrained(self.model_name)
        response = model.predict(prompt_text.text,
                                max_output_tokens=self.config.get('max_output_tokens'),
                                temperature=self.config.get('temperature'),
                                top_k=self.config.get('top_k'),
                                top_p=self.config.get('top_p'))
        # model = GenerativeModel(self.model_name)

        # response = model.generate_content(prompt_text.text,generation_config=self.config, safety_settings=self.safety_settings, stream=False)
        return response.text


    def call_llm_api_GooglePalm2(self, prompt_template, json_report, paths):
        _____, ____, _, __, ___, json_file_path_wiki, txt_file_path_ind_prompt = paths
        self.json_report = json_report
        if json_report:
            self.json_report.set_text(text_main=f'Sending request to {self.model_name}')
        self.monitor.start_monitoring_usage()
        nt_in = 0
        nt_out = 0

        ind = 0
        while ind < self.MAX_RETRIES:
            ind += 1
            try:
                model_kwargs = {"temperature": self.adjust_temp}
                # Invoke the chain to generate prompt text
                response = self.chain.invoke({"query": prompt_template, "model_kwargs": model_kwargs})

                # Clean up the response by removing any Markdown formatting
                response_text = response.strip('```JSON\n').strip('\n```')

                output = json.loads(response_text)
                
                # # Use retry_parser to parse the response with retry logic
                # try:
                #     output = self.retry_parser.parse_with_prompt(response, prompt_value=PromptValue(prompt_template))
                # except:
                #     try:
                #         output = self.retry_parser.parse_with_prompt(response, prompt_value=prompt_template)
                #     except:
                #         try:
                #             output = json.loads(response)
                #         except Exception as e:
                #             print(e)
                #             output = None


                if output is None:
                    self.logger.error(f'[Attempt {ind}] Failed to extract JSON from:\n{response}')
                    self._adjust_config()
                else:
                    nt_in = count_tokens(prompt_template, self.VENDOR, self.TOKENIZER_NAME)
                    nt_out = count_tokens(response, self.VENDOR, self.TOKENIZER_NAME)
                    
                    output = validate_and_align_JSON_keys_with_template(output, self.JSON_dict_structure)
                    if output is None:
                        self.logger.error(f'[Attempt {ind}] Failed to extract JSON from:\n{response}')
                        self._adjust_config()           
                    else:
                        self.monitor.stop_inference_timer() # Starts tool timer too
                        
                        if self.json_report:
                            self.json_report.set_text(text_main=f'Working on WFO, Geolocation, Links')
                        output_WFO, WFO_record, output_GEO, GEO_record = run_tools(output, self.tool_WFO, self.tool_GEO, self.tool_wikipedia, json_file_path_wiki)

                        save_individual_prompt(sanitize_prompt(prompt_template), txt_file_path_ind_prompt)

                        self.logger.info(f"Formatted JSON:\n{json.dumps(output,indent=4)}")
                        
                        usage_report = self.monitor.stop_monitoring_report_usage()    

                        if self.adjust_temp != self.starting_temp:            
                            self._reset_config()

                        if self.json_report:
                            self.json_report.set_text(text_main=f'LLM call successful')
                        return output, nt_in, nt_out, WFO_record, GEO_record, usage_report

            except Exception as e:
                self.logger.error(f'{e}')
                
                self._adjust_config()           
                time.sleep(self.RETRY_DELAY)

        self.logger.info(f"Failed to extract valid JSON after [{ind}] attempts")
        if self.json_report:
            self.json_report.set_text(text_main=f'Failed to extract valid JSON after [{ind}] attempts')

        self.monitor.stop_inference_timer() # Starts tool timer too
        usage_report = self.monitor.stop_monitoring_report_usage()                
        self._reset_config()

        if self.json_report:
            self.json_report.set_text(text_main=f'LLM call failed')
        return None, nt_in, nt_out, None, None, usage_report