Spaces:

alfraser
/

llm-arch

Runtime error

App Files Files Community

alfraser commited on Nov 29, 2023

Commit

cc9a95f

•

1 Parent(s): e94696c

Integrated the architecture page structure and code from the old codebase and integrated for hugging face deployment

Browse files

Files changed (5) hide show

config/architectures.json +21 -0
pages/010_LLM_Architectures.py +73 -0
requirements.txt +1 -0
src/architectures.py +7 -20
src/models.py +10 -0

config/architectures.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+  "architectures": [
+    {
+      "name": "Baseline LLM",
+      "description": "Just a direct call through to the LLM without any additional components.",
+      "steps": [
+        {"class":  "HFLlamaHttpRequestor", "params":  {"model":  "meta-llama/Llama-2-7b-chat-hf", "system_prompt":  "You are a helpful agent.", "max_tokens":  2000}}
+      ]
+    },
+    {
+      "name": "RAG Architecture",
+      "description": "An architecture which uses a raw baseline LLM for its core, but augments requests from the user with information which has been retrieved from a knowledge store where the organisational knowledge has previously been stored for this purpose.",
+      "steps": [
+        {"class":  "InputRequestScreener"},
+        {"class":  "RetrievalAugmentor", "params": {"vector_store": "products_tvs"}},
+        {"class":  "HFLlamaHttpRequestor", "params":  {"model":  "meta-llama/Llama-2-7b-chat-hf", "system_prompt": "You are a helpful domestic appliance advisor. Please answer the following customer question, answering only from the facts provided. Do not make things up, and say if you cannot answer.", "max_tokens": 2000}},
+        {"class":  "OutputResponseScreener"}
+      ]
+    }
+  ]
+}

pages/010_LLM_Architectures.py CHANGED Viewed

@@ -1,5 +1,78 @@
 import streamlit as st
 from src.st_helpers import st_setup
 if st_setup('LLM Arch'):
     st.write("# LLM Architectures")

+import pandas as pd
 import streamlit as st
 from src.st_helpers import st_setup
+from src.architectures import *
+def show_architecture(architecture: str) -> None:
+    """
+    Convenience wrapper for the streamlit rendering of an architecture details and the
+    ability to interact with the architecture
+    :param architecture: the name of the architecture to output
+    """
+    arch = Architecture.get_architecture(architecture)
+    # Segment into two containers for organisation
+    arch_container = st.container()
+    chat_container = st.container()
+    with arch_container:
+        st.divider()
+        st.write(f'### {arch.name}')
+        st.write('#### Architecture description')
+        st.write(arch.description)
+        table_data = []
+        for j, s in enumerate(arch.steps, start=1):
+            table_data.append(
+                [j, s.__class__.__name__, s.description, s.config_description()]
+            )
+        table_cols = ['Step', 'Name', 'Description', 'Config details']
+        st.write('#### Architecture pipeline steps')
+        st.table(pd.DataFrame(table_data, columns=table_cols))
+    with chat_container:
+        st.write(f"### Chat with {arch.name}")
+        st.write("Note this is a simple single query through the relevant architecture. This is just a sample so you can interact with it and does not manage a chat session history.")
+        chat_col, trace_col, request_col = st.columns([3, 2, 2])
+        with chat_col:
+            with st.chat_message("assistant"):
+                st.write("Chat with me in the box below")
+        if prompt := st.chat_input("Ask a question"):
+            with chat_col:
+                with st.chat_message("user"):
+                    st.write(prompt)
+                request = ArchitectureRequest(query=prompt)
+                trace = arch(request)
+                with st.chat_message("assistant"):
+                    st.write(request.response)
+                with trace_col:
+                    st.write("#### Architecture Trace")
+                    st.markdown(trace.as_markdown())
+                with request_col:
+                    st.write("#### Full Request/Response")
+                    st.markdown(request.as_markdown())
 if st_setup('LLM Arch'):
     st.write("# LLM Architectures")
+    Architecture.load_architectures()
+    # Display the available architectures
+    arch_count = len(Architecture.architectures)
+    if arch_count == 1:
+        st.write('### 1 Architecture available')
+    else:
+        st.write(f'### {arch_count} Architectures available')
+    if st.button("Force reload of architecture configs"):
+        Architecture.load_architectures(force_reload=True)
+    arch_names = [a.name for a in Architecture.architectures]
+    selected_arch = st.radio(label="Available architectures", label_visibility="hidden", options=arch_names, index=None)
+    if selected_arch is None:
+        st.info('Select an architecture from above to see details and interact with it')
+    else:
+        show_architecture(selected_arch)

requirements.txt CHANGED Viewed

@@ -1,2 +1,3 @@
 pandas==2.1.1
 plotly==5.17.0

 pandas==2.1.1
 plotly==5.17.0
+chromadb==0.4.15

src/architectures.py CHANGED Viewed

@@ -6,7 +6,7 @@ architecture components.
 import chromadb
 import json
 import os
-import requests
 from abc import ABC, abstractmethod
 from enum import Enum
@@ -245,6 +245,7 @@ class Architecture:
             except Exception as err:
                 trace.end_trace(outcome=ArchitectureTraceOutcome.EXCEPTION)
                 trace.steps[-1].exception = err
                 break
         # TODO - save the request / response
         # TODO - save the trace
@@ -328,22 +329,8 @@ class HFLlamaHttpRequestor(ArchitectureComponent):
         by port if provided or attempting to lookup by name, and then adds this to the
         response element of the request.
         """
-        chat_endpoint = f'https://api-inference.huggingface.co/models/{self.model}'
-        data = {
-            "max_tokens": self.max_tokens,
-            "messages": [
-                {"role": "system", "content": self.system_prompt},
-                {"role": "user", "content": request.request}
-            ]
-        }
-        data_json = json.dumps(data, default=lambda o: o.__dict__)
-        headers = {
-            'accept': 'application/json',
-            'Content-Type': 'application/json',
-            'Authorization': f'Bearer {self.api_token}'
-        }
-        response = requests.post(chat_endpoint, headers=headers, data=data_json)
-        if response.status_code != 200:
-            raise ValueError(f"Call to model returned status {response.status_code}: {response.reason}")
-        request.response = response.json()['choices'][0]['message']['content']

 import chromadb
 import json
 import os
+import traceback
 from abc import ABC, abstractmethod
 from enum import Enum
             except Exception as err:
                 trace.end_trace(outcome=ArchitectureTraceOutcome.EXCEPTION)
                 trace.steps[-1].exception = err
+                traceback.print_exc()
                 break
         # TODO - save the request / response
         # TODO - save the trace
         by port if provided or attempting to lookup by name, and then adds this to the
         response element of the request.
         """
+        llm = HFLlamaChatModel.for_model(self.model)
+        if llm is None:
+            raise ValueError(f'No model {self.model} configured in the environment')
+        response = llm(request.request, system_prompt=self.system_prompt, max_new_tokens=self.max_tokens)
+        request.response = response

src/models.py CHANGED Viewed

@@ -21,10 +21,20 @@ class HFLlamaChatModel:
     @classmethod
     def for_name(cls, name: str):
         for m in cls.models:
             if m.name == name:
                 return m
     @classmethod
     def available_models(cls) -> List[str]:
         if cls.models is None:

     @classmethod
     def for_name(cls, name: str):
+        if cls.models is None:
+            cls.load_configs()
         for m in cls.models:
             if m.name == name:
                 return m
+    @classmethod
+    def for_model(cls, model: str):
+        if cls.models is None:
+            cls.load_configs()
+        for m in cls.models:
+            if m.id == model:
+                return m
     @classmethod
     def available_models(cls) -> List[str]:
         if cls.models is None: