Spaces:
Runtime error
Runtime error
Integrated the architecture page structure and code from the old codebase and integrated for hugging face deployment
Browse files- config/architectures.json +21 -0
- pages/010_LLM_Architectures.py +73 -0
- requirements.txt +1 -0
- src/architectures.py +7 -20
- src/models.py +10 -0
config/architectures.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
{
|
4 |
+
"name": "Baseline LLM",
|
5 |
+
"description": "Just a direct call through to the LLM without any additional components.",
|
6 |
+
"steps": [
|
7 |
+
{"class": "HFLlamaHttpRequestor", "params": {"model": "meta-llama/Llama-2-7b-chat-hf", "system_prompt": "You are a helpful agent.", "max_tokens": 2000}}
|
8 |
+
]
|
9 |
+
},
|
10 |
+
{
|
11 |
+
"name": "RAG Architecture",
|
12 |
+
"description": "An architecture which uses a raw baseline LLM for its core, but augments requests from the user with information which has been retrieved from a knowledge store where the organisational knowledge has previously been stored for this purpose.",
|
13 |
+
"steps": [
|
14 |
+
{"class": "InputRequestScreener"},
|
15 |
+
{"class": "RetrievalAugmentor", "params": {"vector_store": "products_tvs"}},
|
16 |
+
{"class": "HFLlamaHttpRequestor", "params": {"model": "meta-llama/Llama-2-7b-chat-hf", "system_prompt": "You are a helpful domestic appliance advisor. Please answer the following customer question, answering only from the facts provided. Do not make things up, and say if you cannot answer.", "max_tokens": 2000}},
|
17 |
+
{"class": "OutputResponseScreener"}
|
18 |
+
]
|
19 |
+
}
|
20 |
+
]
|
21 |
+
}
|
pages/010_LLM_Architectures.py
CHANGED
@@ -1,5 +1,78 @@
|
|
|
|
1 |
import streamlit as st
|
|
|
2 |
from src.st_helpers import st_setup
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
if st_setup('LLM Arch'):
|
5 |
st.write("# LLM Architectures")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
import streamlit as st
|
3 |
+
|
4 |
from src.st_helpers import st_setup
|
5 |
+
from src.architectures import *
|
6 |
+
|
7 |
+
|
8 |
+
def show_architecture(architecture: str) -> None:
|
9 |
+
"""
|
10 |
+
Convenience wrapper for the streamlit rendering of an architecture details and the
|
11 |
+
ability to interact with the architecture
|
12 |
+
:param architecture: the name of the architecture to output
|
13 |
+
"""
|
14 |
+
arch = Architecture.get_architecture(architecture)
|
15 |
+
|
16 |
+
# Segment into two containers for organisation
|
17 |
+
arch_container = st.container()
|
18 |
+
chat_container = st.container()
|
19 |
+
|
20 |
+
with arch_container:
|
21 |
+
st.divider()
|
22 |
+
st.write(f'### {arch.name}')
|
23 |
+
st.write('#### Architecture description')
|
24 |
+
st.write(arch.description)
|
25 |
+
table_data = []
|
26 |
+
for j, s in enumerate(arch.steps, start=1):
|
27 |
+
table_data.append(
|
28 |
+
[j, s.__class__.__name__, s.description, s.config_description()]
|
29 |
+
)
|
30 |
+
table_cols = ['Step', 'Name', 'Description', 'Config details']
|
31 |
+
st.write('#### Architecture pipeline steps')
|
32 |
+
st.table(pd.DataFrame(table_data, columns=table_cols))
|
33 |
+
|
34 |
+
with chat_container:
|
35 |
+
st.write(f"### Chat with {arch.name}")
|
36 |
+
st.write("Note this is a simple single query through the relevant architecture. This is just a sample so you can interact with it and does not manage a chat session history.")
|
37 |
+
|
38 |
+
chat_col, trace_col, request_col = st.columns([3, 2, 2])
|
39 |
+
|
40 |
+
with chat_col:
|
41 |
+
with st.chat_message("assistant"):
|
42 |
+
st.write("Chat with me in the box below")
|
43 |
+
if prompt := st.chat_input("Ask a question"):
|
44 |
+
with chat_col:
|
45 |
+
with st.chat_message("user"):
|
46 |
+
st.write(prompt)
|
47 |
+
request = ArchitectureRequest(query=prompt)
|
48 |
+
trace = arch(request)
|
49 |
+
with st.chat_message("assistant"):
|
50 |
+
st.write(request.response)
|
51 |
+
with trace_col:
|
52 |
+
st.write("#### Architecture Trace")
|
53 |
+
st.markdown(trace.as_markdown())
|
54 |
+
with request_col:
|
55 |
+
st.write("#### Full Request/Response")
|
56 |
+
st.markdown(request.as_markdown())
|
57 |
+
|
58 |
|
59 |
if st_setup('LLM Arch'):
|
60 |
st.write("# LLM Architectures")
|
61 |
+
Architecture.load_architectures()
|
62 |
+
|
63 |
+
# Display the available architectures
|
64 |
+
arch_count = len(Architecture.architectures)
|
65 |
+
if arch_count == 1:
|
66 |
+
st.write('### 1 Architecture available')
|
67 |
+
else:
|
68 |
+
st.write(f'### {arch_count} Architectures available')
|
69 |
+
|
70 |
+
if st.button("Force reload of architecture configs"):
|
71 |
+
Architecture.load_architectures(force_reload=True)
|
72 |
+
|
73 |
+
arch_names = [a.name for a in Architecture.architectures]
|
74 |
+
selected_arch = st.radio(label="Available architectures", label_visibility="hidden", options=arch_names, index=None)
|
75 |
+
if selected_arch is None:
|
76 |
+
st.info('Select an architecture from above to see details and interact with it')
|
77 |
+
else:
|
78 |
+
show_architecture(selected_arch)
|
requirements.txt
CHANGED
@@ -1,2 +1,3 @@
|
|
1 |
pandas==2.1.1
|
2 |
plotly==5.17.0
|
|
|
|
1 |
pandas==2.1.1
|
2 |
plotly==5.17.0
|
3 |
+
chromadb==0.4.15
|
src/architectures.py
CHANGED
@@ -6,7 +6,7 @@ architecture components.
|
|
6 |
import chromadb
|
7 |
import json
|
8 |
import os
|
9 |
-
import
|
10 |
|
11 |
from abc import ABC, abstractmethod
|
12 |
from enum import Enum
|
@@ -245,6 +245,7 @@ class Architecture:
|
|
245 |
except Exception as err:
|
246 |
trace.end_trace(outcome=ArchitectureTraceOutcome.EXCEPTION)
|
247 |
trace.steps[-1].exception = err
|
|
|
248 |
break
|
249 |
# TODO - save the request / response
|
250 |
# TODO - save the trace
|
@@ -328,22 +329,8 @@ class HFLlamaHttpRequestor(ArchitectureComponent):
|
|
328 |
by port if provided or attempting to lookup by name, and then adds this to the
|
329 |
response element of the request.
|
330 |
"""
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
{"role": "system", "content": self.system_prompt},
|
337 |
-
{"role": "user", "content": request.request}
|
338 |
-
]
|
339 |
-
}
|
340 |
-
data_json = json.dumps(data, default=lambda o: o.__dict__)
|
341 |
-
headers = {
|
342 |
-
'accept': 'application/json',
|
343 |
-
'Content-Type': 'application/json',
|
344 |
-
'Authorization': f'Bearer {self.api_token}'
|
345 |
-
}
|
346 |
-
response = requests.post(chat_endpoint, headers=headers, data=data_json)
|
347 |
-
if response.status_code != 200:
|
348 |
-
raise ValueError(f"Call to model returned status {response.status_code}: {response.reason}")
|
349 |
-
request.response = response.json()['choices'][0]['message']['content']
|
|
|
6 |
import chromadb
|
7 |
import json
|
8 |
import os
|
9 |
+
import traceback
|
10 |
|
11 |
from abc import ABC, abstractmethod
|
12 |
from enum import Enum
|
|
|
245 |
except Exception as err:
|
246 |
trace.end_trace(outcome=ArchitectureTraceOutcome.EXCEPTION)
|
247 |
trace.steps[-1].exception = err
|
248 |
+
traceback.print_exc()
|
249 |
break
|
250 |
# TODO - save the request / response
|
251 |
# TODO - save the trace
|
|
|
329 |
by port if provided or attempting to lookup by name, and then adds this to the
|
330 |
response element of the request.
|
331 |
"""
|
332 |
+
llm = HFLlamaChatModel.for_model(self.model)
|
333 |
+
if llm is None:
|
334 |
+
raise ValueError(f'No model {self.model} configured in the environment')
|
335 |
+
response = llm(request.request, system_prompt=self.system_prompt, max_new_tokens=self.max_tokens)
|
336 |
+
request.response = response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/models.py
CHANGED
@@ -21,10 +21,20 @@ class HFLlamaChatModel:
|
|
21 |
|
22 |
@classmethod
|
23 |
def for_name(cls, name: str):
|
|
|
|
|
24 |
for m in cls.models:
|
25 |
if m.name == name:
|
26 |
return m
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
@classmethod
|
29 |
def available_models(cls) -> List[str]:
|
30 |
if cls.models is None:
|
|
|
21 |
|
22 |
@classmethod
|
23 |
def for_name(cls, name: str):
|
24 |
+
if cls.models is None:
|
25 |
+
cls.load_configs()
|
26 |
for m in cls.models:
|
27 |
if m.name == name:
|
28 |
return m
|
29 |
|
30 |
+
@classmethod
|
31 |
+
def for_model(cls, model: str):
|
32 |
+
if cls.models is None:
|
33 |
+
cls.load_configs()
|
34 |
+
for m in cls.models:
|
35 |
+
if m.id == model:
|
36 |
+
return m
|
37 |
+
|
38 |
@classmethod
|
39 |
def available_models(cls) -> List[str]:
|
40 |
if cls.models is None:
|