alfraser commited on
Commit
e94696c
1 Parent(s): 5117e0a

Initial merge in of architectures from old codebase before I modify the HF access

Browse files
Files changed (1) hide show
  1. src/architectures.py +349 -0
src/architectures.py ADDED
@@ -0,0 +1,349 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This file contains all the code which defines architectures and
3
+ architecture components.
4
+ """
5
+
6
+ import chromadb
7
+ import json
8
+ import os
9
+ import requests
10
+
11
+ from abc import ABC, abstractmethod
12
+ from enum import Enum
13
+ from time import time
14
+ from typing import List, Optional
15
+
16
+ from src.common import config_dir, data_dir, hf_api_token
17
+ from src.models import HFLlamaChatModel
18
+
19
+
20
+ class ArchitectureRequest:
21
+ """
22
+ This class represents a request (chat query) from a user which can then be built up or
23
+ modified through the pipeline process. It also holds the response to the request which again
24
+ is a stack which can be modified through life.
25
+ """
26
+ def __init__(self, query: str):
27
+ self._request = [query] # Stack for the request text as it evolves down the pipeline
28
+ self._response = [] # Stack for the response text as it evolves down the pipeline
29
+ self.early_exit = False
30
+
31
+ @property
32
+ def request(self):
33
+ return self._request[-1]
34
+
35
+ @request.setter
36
+ def request(self, value: str):
37
+ self._request.append(value)
38
+
39
+ @property
40
+ def response(self):
41
+ if len(self._response) > 0:
42
+ return self._response[-1]
43
+ return None
44
+
45
+ @response.setter
46
+ def response(self, value: str):
47
+ self._response.append(value)
48
+
49
+ def as_markdown(self) -> str:
50
+ """
51
+ Returns a markdown representation for display / testing
52
+ :return: str - the markdown
53
+ """
54
+ md = "- **Request evolution**"
55
+ for r in self._request:
56
+ md += f"\n - {r}"
57
+ md += "\n- **Response evolution**"
58
+ for r in self._response:
59
+ md += f"\n - {r}"
60
+ return md
61
+
62
+
63
+ class ArchitectureTraceOutcome(Enum):
64
+ """
65
+ Class representing the outcome of a component step in an architecture
66
+ """
67
+ NONE = 0
68
+ SUCCESS = 1
69
+ EARLY_EXIT = 2
70
+ EXCEPTION = 3
71
+
72
+
73
+ class ArchitectureTraceStep:
74
+ """
75
+ Class to hold the details of a single trace step
76
+ """
77
+ def __init__(self, name: str):
78
+ self.name = name
79
+ self.start_ms = int(time() * 1000)
80
+ self.end_ms = None
81
+ self.outcome = ArchitectureTraceOutcome.NONE
82
+ self._exception = None
83
+
84
+ def end(self, outcome: ArchitectureTraceOutcome):
85
+ self.end_ms = int(time() * 1000)
86
+ self.outcome = outcome
87
+
88
+ @property
89
+ def exception(self):
90
+ return self._exception
91
+
92
+ @exception.setter
93
+ def exception(self, value: Exception):
94
+ self._exception = f'{value}' # Hold any exception as a string in the trace
95
+
96
+ def as_markdown(self) -> str:
97
+ """
98
+ Converts the trace to markdown for simple display purposes
99
+ :return: a string of markdown
100
+ """
101
+ md = f"- **Step**: {self.name} \n"
102
+ md += f" - **Start**: {self.start_ms}; **End**: {self.end_ms} \n"
103
+ md += f" - **Elapsed time**: {self.end_ms - self.start_ms}ms \n"
104
+ outcome = "None"
105
+ if self.outcome == ArchitectureTraceOutcome.SUCCESS:
106
+ outcome = "Success"
107
+ elif self.outcome == ArchitectureTraceOutcome.EARLY_EXIT:
108
+ outcome = "Early Exit"
109
+ elif self.outcome == ArchitectureTraceOutcome.EXCEPTION:
110
+ outcome = f"Exception ({self._exception})"
111
+ md += f" - **Outcome**: {outcome}"
112
+ return md
113
+
114
+
115
+ class ArchitectureTrace:
116
+ """
117
+ This class represents the system instrumentation / trace for a request. It holds the name
118
+ for each component called, the start and end time of the component processing and the outcome
119
+ of the step.
120
+ """
121
+ def __init__(self):
122
+ self.steps: List[ArchitectureTraceStep] = []
123
+
124
+ def start_trace(self, name: str):
125
+ self.steps.append(ArchitectureTraceStep(name=name))
126
+
127
+ def end_trace(self, outcome):
128
+ assert len(self.steps) > 0
129
+ assert self.steps[-1].outcome == ArchitectureTraceOutcome.NONE
130
+ self.steps[-1].end(outcome=outcome)
131
+
132
+ def as_markdown(self) -> str:
133
+ """
134
+ Converts the trace to markdown for simple display purposes
135
+ :return: a string of markdown
136
+ """
137
+ md = ' \n'.join([s.as_markdown() for s in self.steps])
138
+ return md
139
+
140
+
141
+ class ArchitectureComponent(ABC):
142
+ description = "Components should override a description"
143
+
144
+ @abstractmethod
145
+ def process_request(self, request: ArchitectureRequest) -> None:
146
+ """
147
+ The principle method that concrete implementations of a component must implement.
148
+ They should signal anything to the pipeline through direct modification of the provided
149
+ request (i.e. amending the request text or response text, or setting the early_exit flag).
150
+ :param request: The request which is flowing down the pipeline
151
+ :return: None
152
+ """
153
+ pass
154
+
155
+ def config_description(self) -> str:
156
+ """
157
+ Optional method to override for providing a string of description in markdown format for
158
+ display purposes for the component
159
+ :return: a markdwon string (defaulting to empty in the base class)
160
+ """
161
+ return ""
162
+
163
+
164
+ class Architecture:
165
+ """
166
+ An architecture is built as a callable pipeline of steps. An
167
+ ArchitectureRequest object is passed down the pipeline sequentially
168
+ to each component. A component can modify the request if needed, update the response
169
+ or signal an early exit. The Architecture framework also provides trace timing
170
+ and logging, plus exception handling so an individual request cannot
171
+ crash the system.
172
+ """
173
+ architectures = None
174
+
175
+ @classmethod
176
+ def load_architectures(cls, force_reload: bool = False) -> None:
177
+ """
178
+ Class method to load the configuration file and try and set up architectures for each
179
+ config entry (a named sequence of components with optional setup params).
180
+ :param force_reload: A bool of whether to force a reload, defaults to False.
181
+ """
182
+ if cls.architectures is None or force_reload:
183
+ config_file = os.path.join(config_dir, "architectures.json")
184
+ with open(config_file, "r") as f:
185
+ configs = json.load(f)['architectures']
186
+ archs = []
187
+ for c in configs:
188
+ arch_name = c['name']
189
+ arch_description = c['description']
190
+ arch_comps = []
191
+ for s in c['steps']:
192
+ component_class_name = s['class']
193
+ component_init_params = {}
194
+ if 'params' in s:
195
+ component_init_params = s['params']
196
+ arch_comps.append(globals()[component_class_name](**component_init_params))
197
+ archs.append(Architecture(name=arch_name, description=arch_description, steps=arch_comps))
198
+ cls.architectures = archs
199
+
200
+ @classmethod
201
+ def get_architecture(cls, name: str):
202
+ """
203
+ Lookup an architecture by name
204
+ :param name: The name of the architecture to look up
205
+ :return: The architecture object
206
+ """
207
+ if cls.architectures is None:
208
+ cls.load_architectures()
209
+ for a in cls.architectures:
210
+ if a.name == name:
211
+ return a
212
+ raise ValueError(f"Could not find an architecture named {name}")
213
+
214
+ def __init__(self,
215
+ name: str,
216
+ description: str,
217
+ steps: List[ArchitectureComponent],
218
+ exception_text: str = "Sorry an internal technical error occurred.",
219
+ no_response_text: str = "Sorry I can't answer that."):
220
+ self.name = name
221
+ self.description = description
222
+ self.steps = steps
223
+ self.exception_text = exception_text
224
+ self.no_response_text = no_response_text
225
+
226
+ def __call__(self, request: ArchitectureRequest) -> ArchitectureTrace:
227
+ """
228
+ The main entry point to call the pipeline. Passes the request through each pipeline step
229
+ in sequence, allowing them to amend the request or early exit the processing. Also captures
230
+ exceptions and generates the trace, plus saves the request/response and the trace to a store
231
+ for analysis.
232
+ :param request:
233
+ :return:
234
+ """
235
+ trace = ArchitectureTrace()
236
+ for component in self.steps:
237
+ trace.start_trace(name=component.__class__.__name__)
238
+ try:
239
+ component.process_request(request)
240
+ if request.early_exit:
241
+ trace.end_trace(outcome=ArchitectureTraceOutcome.EARLY_EXIT)
242
+ break
243
+ else:
244
+ trace.end_trace(outcome=ArchitectureTraceOutcome.SUCCESS)
245
+ except Exception as err:
246
+ trace.end_trace(outcome=ArchitectureTraceOutcome.EXCEPTION)
247
+ trace.steps[-1].exception = err
248
+ break
249
+ # TODO - save the request / response
250
+ # TODO - save the trace
251
+ return trace
252
+
253
+
254
+ class InputRequestScreener(ArchitectureComponent):
255
+ description = "TODO: Screens inputs for harmful (i.e. offensive or technical attacks) requests from the user."
256
+
257
+ def process_request(self, request: ArchitectureRequest) -> None:
258
+ pass
259
+ # TODO - check the request and then early exit if harmful
260
+
261
+
262
+ class OutputResponseScreener(ArchitectureComponent):
263
+ description = "TODO: Screens outputs for harmful responses."
264
+
265
+ def process_request(self, request: ArchitectureRequest) -> None:
266
+ pass
267
+ # TODO - check the response and then early exit if harmful
268
+
269
+
270
+ class RetrievalAugmentor(ArchitectureComponent):
271
+ description = "Retrieves appropriate documents from the store and then augments the request."
272
+
273
+ def __init__(self, vector_store: str, doc_count: int = 5):
274
+ chroma_db = os.path.join(data_dir, 'vector_stores', f'{vector_store}_chroma')
275
+ self.vector_store = chroma_db
276
+ client = chromadb.PersistentClient(path=chroma_db)
277
+ self.collection = client.get_collection(name='products')
278
+ self.doc_count = doc_count
279
+
280
+ def process_request(self, request: ArchitectureRequest) -> None:
281
+ # Get the count nearest documents from the doc store
282
+ input_query = request.request
283
+ results = self.collection.query(query_texts=[input_query], n_results=self.doc_count)
284
+ print(results)
285
+ documents = results['documents'][0] # Index 0 as we are always asking one question
286
+
287
+ # Update the request to include the retrieved documents
288
+ new_query = f'QUESTION: {input_query}\n\n'
289
+ new_query += '\n'.join([f'FACT: {d}' for d in documents])
290
+
291
+ # Put the request back into the architecture request
292
+ request.request = new_query
293
+
294
+ def config_description(self) -> str:
295
+ """
296
+ Custom config details as markdown
297
+ """
298
+ desc = f"Vector Store: {self.vector_store}; "
299
+ desc += f"Max docs: {self.doc_count}"
300
+ return desc
301
+
302
+
303
+ class HFLlamaHttpRequestor(ArchitectureComponent):
304
+ """
305
+ A concrete pipeline component which sends the user text to a given llama chat based
306
+ model on hugging face.
307
+ """
308
+ description = "Passes the request to a model hosted on hugging face hub"
309
+
310
+ def __init__(self, model: str, system_prompt: str, max_tokens: int):
311
+ self.model: str = model
312
+ self.system_prompt: str = system_prompt
313
+ self.max_tokens = max_tokens
314
+ self.api_token = hf_api_token()
315
+
316
+ def config_description(self) -> str:
317
+ """
318
+ Custom config details as markdown
319
+ """
320
+ desc = f"Model: {self.model}; "
321
+ desc += f"Max tokens: {self.max_tokens}; "
322
+ desc += f"System prompt: {self.system_prompt}"
323
+ return desc
324
+
325
+ def process_request(self, request: ArchitectureRequest) -> None:
326
+ """
327
+ Main processing method for this function. Calls the HTTP service for the model
328
+ by port if provided or attempting to lookup by name, and then adds this to the
329
+ response element of the request.
330
+ """
331
+ chat_endpoint = f'https://api-inference.huggingface.co/models/{self.model}'
332
+
333
+ data = {
334
+ "max_tokens": self.max_tokens,
335
+ "messages": [
336
+ {"role": "system", "content": self.system_prompt},
337
+ {"role": "user", "content": request.request}
338
+ ]
339
+ }
340
+ data_json = json.dumps(data, default=lambda o: o.__dict__)
341
+ headers = {
342
+ 'accept': 'application/json',
343
+ 'Content-Type': 'application/json',
344
+ 'Authorization': f'Bearer {self.api_token}'
345
+ }
346
+ response = requests.post(chat_endpoint, headers=headers, data=data_json)
347
+ if response.status_code != 200:
348
+ raise ValueError(f"Call to model returned status {response.status_code}: {response.reason}")
349
+ request.response = response.json()['choices'][0]['message']['content']