llm-arch / config /architectures.json
alfraser's picture
Updated the architecture descriptions, images and caption text for the display of the architectures
cc46ec6
raw
history blame
8.36 kB
{
"architectures": [
{
"name": "1. Baseline LLM",
"description": "This architecture represents a baseline control. It includes safety components checking both the query and the response. The core of the architecture is powered by the unmodified Llama 2 7 billion parameter chat model. This model has never seen any of the private data, so the expectation is it will perform poorly, and it is included just as a comparative control.",
"steps": [
{"class": "InputRequestScreener"},
{"class": "HFInferenceEndpoint", "params": {"endpoint_url": "https://yl89ru8gdr1wkbej.eu-west-1.aws.endpoints.huggingface.cloud","model_name": "Unmodified Meta Llama 2 chat", "system_prompt": "You are a helpful agent.", "max_new_tokens": 1000}},
{"class": "OutputResponseScreener"}
],
"img": "architecture_baseline.jpg"
},
{
"name": "2. Fine-tuning Architecture",
"description": "This architecture is the final version of a fine-tuned LLM (version 7 of the fine-tuning iterations) based approach. Compared to the baseline architecture, the LLM which is the core of the question answering is replaced by one which has been further trained on questions and answers derived from the private data. It should therefore be able to answer questions about the ElectroHome products.",
"steps": [
{"class": "InputRequestScreener"},
{"class": "HFInferenceEndpoint", "params": {"endpoint_url": "https://kl6kq9j1yw3hoj4e.eu-west-1.aws.endpoints.huggingface.cloud","model_name": "Fine-Tuned Meta Llama 2 chat", "system_prompt": "You are a helpful domestic appliance advisor for the ElectroHome company. Please answer customer questions and do not mention other brands. Answer succinctly with facts, and if you cannot answer please say so.", "max_new_tokens": 1000, "prompt_style": "multi_line_with_roles"}},
{"class": "OutputResponseScreener"}
],
"img": "architecture_fine_tuned_v7.jpg"
},
{
"name": "3. RAG Architecture",
"description": "This architecture is the RAG based approach. The underlying LLM for this architecture is unchanged from the baseline, but the system also includes a vector database where relevant documents can be retrieved and the LLM prompt augmented. This process gives the LLM more context for the question which should all it to answer questions about the ElectroHome products.",
"steps": [
{"class": "InputRequestScreener"},
{"class": "RetrievalAugmentor", "params": {"vector_store": "02_baseline_products"}},
{"class": "HFInferenceEndpoint", "params": {"endpoint_url": "https://yl89ru8gdr1wkbej.eu-west-1.aws.endpoints.huggingface.cloud","model_name": "Unmodified Meta Llama 2 chat", "system_prompt": "You are a helpful domestic appliance advisor. Please answer the following customer question, answering only from the facts provided. Answer based on the background provided, do not make things up, and say if you cannot answer.", "max_new_tokens": 1000}},
{"class": "ResponseTrimmer", "params": {"regexes": ["^.{0,20}information provided[0-9A-Za-z,]*? ", "^.{0,20}background[0-9A-Za-z,]*? "]}},
{"class": "OutputResponseScreener"}
],
"img": "architecture_rag.jpg"
},
{
"name": "4a. Fine-tuning Architecture evolution V5",
"description": "This fine-tuning architecture features version 5 of the fine-tuned LLM, which was subject to ~1.25 hours of training. It is included to allow testing of progressive learning of the LLM over the fine-tuning process.",
"steps": [
{"class": "InputRequestScreener"},
{"class": "HFInferenceEndpoint", "params": {"endpoint_url": "https://pgzu02dvzupp5sml.eu-west-1.aws.endpoints.huggingface.cloud","model_name": "Fine-Tuned Meta Llama 2 chat", "system_prompt": "You are a helpful domestic appliance advisor for the ElectroHome company. Please answer customer questions and do not mention other brands. Answer succinctly with facts, and if you cannot answer please say so.", "max_new_tokens": 1000, "prompt_style": "multi_line_with_roles"}},
{"class": "OutputResponseScreener"}
],
"img": "architecture_fine_tuned_v5.jpg"
},
{
"name": "4b. Fine-tuning Architecture evolution V6",
"description": "This fine-tuning architecture features version 6 of the fine-tuned LLM, which was subject to ~3 hours of training. It is included to allow testing of progressive learning of the LLM over the fine-tuning process.",
"steps": [
{"class": "InputRequestScreener"},
{"class": "HFInferenceEndpoint", "params": {"endpoint_url": "https://ln8i9z4ecjqora6d.eu-west-1.aws.endpoints.huggingface.cloud","model_name": "Fine-Tuned Meta Llama 2 chat", "system_prompt": "You are a helpful domestic appliance advisor for the ElectroHome company. Please answer customer questions and do not mention other brands. Answer succinctly with facts, and if you cannot answer please say so.", "max_new_tokens": 1000, "prompt_style": "multi_line_with_roles"}},
{"class": "OutputResponseScreener"}
],
"img": "architecture_fine_tuned_v6.jpg"
},
{
"name": "4c. Fine-tuning Architecture evolution V7",
"description": "This fine-tuning architecture is identical to architecture 2, and features version 7 of the fine-tuned LLM, which was subject to ~12 hours of training. It is included to allow testing of progressive learning of the LLM over the fine-tuning process.",
"steps": [
{"class": "InputRequestScreener"},
{"class": "HFInferenceEndpoint", "params": {"endpoint_url": "https://kl6kq9j1yw3hoj4e.eu-west-1.aws.endpoints.huggingface.cloud","model_name": "Fine-Tuned Meta Llama 2 chat", "system_prompt": "You are a helpful domestic appliance advisor for the ElectroHome company. Please answer customer questions and do not mention other brands. Answer succinctly with facts, and if you cannot answer please say so.", "max_new_tokens": 1000, "prompt_style": "multi_line_with_roles"}},
{"class": "OutputResponseScreener"}
],
"img": "architecture_fine_tuned_v7.jpg"
},
{
"name": "5a. Performance test (safety off) Fine-tuning",
"description": "This architecture is the fine-tuning model (architecture 2) but with the safety components removed. The reason for this is because the safety components also include an LLM call to the baseline model, so for a fair test these are disabled to be able to compare the core differentiating elements of fine-tuning vs RAG.",
"steps": [
{"class": "HFInferenceEndpoint", "params": {"endpoint_url": "https://kl6kq9j1yw3hoj4e.eu-west-1.aws.endpoints.huggingface.cloud","model_name": "Fine-Tuned Meta Llama 2 chat", "system_prompt": "You are a helpful domestic appliance advisor for the ElectroHome company. Please answer customer questions and do not mention other brands. Answer succinctly with facts, and if you cannot answer please say so.", "max_new_tokens": 1000, "prompt_style": "multi_line_with_roles"}}
],
"img": "architecture_fine_tuned_performance_test.jpg"
},
{
"name": "5b. Performance test (safety off) RAG",
"description": "This architecture is the RAG model (architecture 3) but with the safety components removed. The reason for this is because the safety components also include an LLM call to the baseline model, so for a fair test these are disabled to be able to compare the core differentiating elements of fine-tuning vs RAG.",
"steps": [
{"class": "RetrievalAugmentor", "params": {"vector_store": "02_baseline_products"}},
{"class": "HFInferenceEndpoint", "params": {"endpoint_url": "https://yl89ru8gdr1wkbej.eu-west-1.aws.endpoints.huggingface.cloud","model_name": "Unmodified Meta Llama 2 chat", "system_prompt": "You are a helpful domestic appliance advisor. Please answer the following customer question, answering only from the facts provided. Answer based on the background provided, do not make things up, and say if you cannot answer.", "max_new_tokens": 1000}},
{"class": "ResponseTrimmer", "params": {"regexes": ["^.{0,20}information provided[0-9A-Za-z,]*? ", "^.{0,20}background[0-9A-Za-z,]*? "]}}
],
"img": "architecture_rag_performance_test.jpg"
}
]
}