alfraser commited on
Commit
cc9a95f
1 Parent(s): e94696c

Integrated the architecture page structure and code from the old codebase and integrated for hugging face deployment

Browse files
config/architectures.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ {
4
+ "name": "Baseline LLM",
5
+ "description": "Just a direct call through to the LLM without any additional components.",
6
+ "steps": [
7
+ {"class": "HFLlamaHttpRequestor", "params": {"model": "meta-llama/Llama-2-7b-chat-hf", "system_prompt": "You are a helpful agent.", "max_tokens": 2000}}
8
+ ]
9
+ },
10
+ {
11
+ "name": "RAG Architecture",
12
+ "description": "An architecture which uses a raw baseline LLM for its core, but augments requests from the user with information which has been retrieved from a knowledge store where the organisational knowledge has previously been stored for this purpose.",
13
+ "steps": [
14
+ {"class": "InputRequestScreener"},
15
+ {"class": "RetrievalAugmentor", "params": {"vector_store": "products_tvs"}},
16
+ {"class": "HFLlamaHttpRequestor", "params": {"model": "meta-llama/Llama-2-7b-chat-hf", "system_prompt": "You are a helpful domestic appliance advisor. Please answer the following customer question, answering only from the facts provided. Do not make things up, and say if you cannot answer.", "max_tokens": 2000}},
17
+ {"class": "OutputResponseScreener"}
18
+ ]
19
+ }
20
+ ]
21
+ }
pages/010_LLM_Architectures.py CHANGED
@@ -1,5 +1,78 @@
 
1
  import streamlit as st
 
2
  from src.st_helpers import st_setup
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  if st_setup('LLM Arch'):
5
  st.write("# LLM Architectures")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
  import streamlit as st
3
+
4
  from src.st_helpers import st_setup
5
+ from src.architectures import *
6
+
7
+
8
+ def show_architecture(architecture: str) -> None:
9
+ """
10
+ Convenience wrapper for the streamlit rendering of an architecture details and the
11
+ ability to interact with the architecture
12
+ :param architecture: the name of the architecture to output
13
+ """
14
+ arch = Architecture.get_architecture(architecture)
15
+
16
+ # Segment into two containers for organisation
17
+ arch_container = st.container()
18
+ chat_container = st.container()
19
+
20
+ with arch_container:
21
+ st.divider()
22
+ st.write(f'### {arch.name}')
23
+ st.write('#### Architecture description')
24
+ st.write(arch.description)
25
+ table_data = []
26
+ for j, s in enumerate(arch.steps, start=1):
27
+ table_data.append(
28
+ [j, s.__class__.__name__, s.description, s.config_description()]
29
+ )
30
+ table_cols = ['Step', 'Name', 'Description', 'Config details']
31
+ st.write('#### Architecture pipeline steps')
32
+ st.table(pd.DataFrame(table_data, columns=table_cols))
33
+
34
+ with chat_container:
35
+ st.write(f"### Chat with {arch.name}")
36
+ st.write("Note this is a simple single query through the relevant architecture. This is just a sample so you can interact with it and does not manage a chat session history.")
37
+
38
+ chat_col, trace_col, request_col = st.columns([3, 2, 2])
39
+
40
+ with chat_col:
41
+ with st.chat_message("assistant"):
42
+ st.write("Chat with me in the box below")
43
+ if prompt := st.chat_input("Ask a question"):
44
+ with chat_col:
45
+ with st.chat_message("user"):
46
+ st.write(prompt)
47
+ request = ArchitectureRequest(query=prompt)
48
+ trace = arch(request)
49
+ with st.chat_message("assistant"):
50
+ st.write(request.response)
51
+ with trace_col:
52
+ st.write("#### Architecture Trace")
53
+ st.markdown(trace.as_markdown())
54
+ with request_col:
55
+ st.write("#### Full Request/Response")
56
+ st.markdown(request.as_markdown())
57
+
58
 
59
  if st_setup('LLM Arch'):
60
  st.write("# LLM Architectures")
61
+ Architecture.load_architectures()
62
+
63
+ # Display the available architectures
64
+ arch_count = len(Architecture.architectures)
65
+ if arch_count == 1:
66
+ st.write('### 1 Architecture available')
67
+ else:
68
+ st.write(f'### {arch_count} Architectures available')
69
+
70
+ if st.button("Force reload of architecture configs"):
71
+ Architecture.load_architectures(force_reload=True)
72
+
73
+ arch_names = [a.name for a in Architecture.architectures]
74
+ selected_arch = st.radio(label="Available architectures", label_visibility="hidden", options=arch_names, index=None)
75
+ if selected_arch is None:
76
+ st.info('Select an architecture from above to see details and interact with it')
77
+ else:
78
+ show_architecture(selected_arch)
requirements.txt CHANGED
@@ -1,2 +1,3 @@
1
  pandas==2.1.1
2
  plotly==5.17.0
 
 
1
  pandas==2.1.1
2
  plotly==5.17.0
3
+ chromadb==0.4.15
src/architectures.py CHANGED
@@ -6,7 +6,7 @@ architecture components.
6
  import chromadb
7
  import json
8
  import os
9
- import requests
10
 
11
  from abc import ABC, abstractmethod
12
  from enum import Enum
@@ -245,6 +245,7 @@ class Architecture:
245
  except Exception as err:
246
  trace.end_trace(outcome=ArchitectureTraceOutcome.EXCEPTION)
247
  trace.steps[-1].exception = err
 
248
  break
249
  # TODO - save the request / response
250
  # TODO - save the trace
@@ -328,22 +329,8 @@ class HFLlamaHttpRequestor(ArchitectureComponent):
328
  by port if provided or attempting to lookup by name, and then adds this to the
329
  response element of the request.
330
  """
331
- chat_endpoint = f'https://api-inference.huggingface.co/models/{self.model}'
332
-
333
- data = {
334
- "max_tokens": self.max_tokens,
335
- "messages": [
336
- {"role": "system", "content": self.system_prompt},
337
- {"role": "user", "content": request.request}
338
- ]
339
- }
340
- data_json = json.dumps(data, default=lambda o: o.__dict__)
341
- headers = {
342
- 'accept': 'application/json',
343
- 'Content-Type': 'application/json',
344
- 'Authorization': f'Bearer {self.api_token}'
345
- }
346
- response = requests.post(chat_endpoint, headers=headers, data=data_json)
347
- if response.status_code != 200:
348
- raise ValueError(f"Call to model returned status {response.status_code}: {response.reason}")
349
- request.response = response.json()['choices'][0]['message']['content']
 
6
  import chromadb
7
  import json
8
  import os
9
+ import traceback
10
 
11
  from abc import ABC, abstractmethod
12
  from enum import Enum
 
245
  except Exception as err:
246
  trace.end_trace(outcome=ArchitectureTraceOutcome.EXCEPTION)
247
  trace.steps[-1].exception = err
248
+ traceback.print_exc()
249
  break
250
  # TODO - save the request / response
251
  # TODO - save the trace
 
329
  by port if provided or attempting to lookup by name, and then adds this to the
330
  response element of the request.
331
  """
332
+ llm = HFLlamaChatModel.for_model(self.model)
333
+ if llm is None:
334
+ raise ValueError(f'No model {self.model} configured in the environment')
335
+ response = llm(request.request, system_prompt=self.system_prompt, max_new_tokens=self.max_tokens)
336
+ request.response = response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/models.py CHANGED
@@ -21,10 +21,20 @@ class HFLlamaChatModel:
21
 
22
  @classmethod
23
  def for_name(cls, name: str):
 
 
24
  for m in cls.models:
25
  if m.name == name:
26
  return m
27
 
 
 
 
 
 
 
 
 
28
  @classmethod
29
  def available_models(cls) -> List[str]:
30
  if cls.models is None:
 
21
 
22
  @classmethod
23
  def for_name(cls, name: str):
24
+ if cls.models is None:
25
+ cls.load_configs()
26
  for m in cls.models:
27
  if m.name == name:
28
  return m
29
 
30
+ @classmethod
31
+ def for_model(cls, model: str):
32
+ if cls.models is None:
33
+ cls.load_configs()
34
+ for m in cls.models:
35
+ if m.id == model:
36
+ return m
37
+
38
  @classmethod
39
  def available_models(cls) -> List[str]:
40
  if cls.models is None: