hf-qa-demo

Runtime error

App Files Files Community

KonradSzafer commited on Nov 7, 2023

Commit

cf57696

1 Parent(s): c893d6c

discord bot fix

Browse files

Files changed (16) hide show

.gitignore +2 -1
Dockerfile.api → Dockerfile +6 -5
Dockerfile.bot +0 -17
app.py +12 -2
benchmark/__main__.py +76 -0
benchmark/questions.json +38 -0
benchmarker.py +0 -63
data/benchmark/.gitkeep +0 -0
data/indexing_benchmark.ipynb +387 -0
docker-compose.yml +0 -23
models/inference.ipynb +0 -103
qa_engine/mocks.py +5 -19
qa_engine/qa_engine.py +4 -0
questions.txt +0 -9
requirements.txt +1 -1
run_docker.sh +2 -1

.gitignore CHANGED Viewed

@@ -64,5 +64,6 @@ data/datasets/*
 !data/datasets/hf_repositories_urls.json
 !data/datasets/hf_repositories_urls_scraped.json
-# Local models
 qa_engine/local_models

 !data/datasets/hf_repositories_urls.json
 !data/datasets/hf_repositories_urls_scraped.json
+# Models and inference scripts
 qa_engine/local_models
+models/

Dockerfile.api → Dockerfile RENAMED Viewed

@@ -1,19 +1,20 @@
-FROM ubuntu:latest
 ENV DEBIAN_FRONTEND=noninteractive
 RUN apt-get -y update && \
     apt-get -y upgrade && \
-    apt-get -y install git python3.10 python3-pip
 COPY requirements.txt .
 RUN pip install --upgrade pip && \
     pip install --no-cache-dir -r requirements.txt
 WORKDIR /hugging-face-qa-bot
-COPY config/api/ config/api/
-COPY api/ api/
 EXPOSE 8000
-ENTRYPOINT [ "python3", "-m", "api" ]

+FROM debian:bullseye-slim
 ENV DEBIAN_FRONTEND=noninteractive
 RUN apt-get -y update && \
     apt-get -y upgrade && \
+    apt-get -y install git python3.11 python3-pip
 COPY requirements.txt .
 RUN pip install --upgrade pip && \
     pip install --no-cache-dir -r requirements.txt
 WORKDIR /hugging-face-qa-bot
+COPY . .
+RUN ls -la
 EXPOSE 8000
+ENTRYPOINT [ "python3", "-m", "api" ] # to run the api module
+# ENTRYPOINT [ "python3", "-m", "discord_bot" ] # to host the bot

Dockerfile.bot DELETED Viewed

@@ -1,17 +0,0 @@
-FROM ubuntu:latest
-ENV DEBIAN_FRONTEND=noninteractive
-RUN apt-get -y update && \
-    apt-get -y upgrade && \
-    apt-get -y install git python3.10 python3-pip
-COPY requirements.txt .
-RUN pip install --upgrade pip && \
-    pip install --no-cache-dir -r requirements.txt
-WORKDIR /hugging-face-qa-bot
-COPY config/bot/ config/bot/
-COPY bot/ bot/
-ENTRYPOINT [ "python3", "-m", "bot" ]

app.py CHANGED Viewed

@@ -1,9 +1,12 @@
 import gradio as gr
 from qa_engine import logger, Config, QAEngine
 from discord_bot import DiscordClient
 config = Config()
 qa_engine = QAEngine(
     llm_model_id=config.question_answering_model_id,
@@ -35,7 +38,7 @@ def gradio_interface():
     demo.launch(share=True)
-def discord_bot():
     client = DiscordClient(
         qa_engine=qa_engine,
         num_last_messages=config.num_last_messages,
@@ -43,9 +46,16 @@ def discord_bot():
         enable_commands=config.enable_commands,
         debug=config.debug
     )
     with gr.Blocks() as demo:
         gr.Markdown(f'Discord bot is running.')
-        client.run(config.discord_token)
 if __name__ == '__main__':

+import threading
 import gradio as gr
 from qa_engine import logger, Config, QAEngine
 from discord_bot import DiscordClient
 config = Config()
 qa_engine = QAEngine(
     llm_model_id=config.question_answering_model_id,
     demo.launch(share=True)
+def discord_bot_inference_thread():
     client = DiscordClient(
         qa_engine=qa_engine,
         num_last_messages=config.num_last_messages,
         enable_commands=config.enable_commands,
         debug=config.debug
     )
+    client.run(config.discord_token)
+def discord_bot():
+    thread = threading.Thread(target=discord_bot_inference_thread)
+    thread.start()
     with gr.Blocks() as demo:
         gr.Markdown(f'Discord bot is running.')
+    demo.queue(concurrency_count=100)
+    demo.queue(max_size=100)
+    demo.launch()
 if __name__ == '__main__':

benchmark/__main__.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import time
+import json
+import wandb
+import gradio as gr
+from qa_engine import logger, Config, QAEngine
+QUESTIONS_FILENAME = 'benchmark/questions.json'
+config = Config()
+qa_engine = QAEngine(
+    llm_model_id=config.question_answering_model_id,
+    embedding_model_id=config.embedding_model_id,
+    index_repo_id=config.index_repo_id,
+    prompt_template=config.prompt_template,
+    use_docs_for_context=config.use_docs_for_context,
+    add_sources_to_response=config.add_sources_to_response,
+    use_messages_for_context=config.use_messages_in_context,
+    debug=config.debug
+)
+def main():
+    filtered_config = config.asdict()
+    disallowed_config_keys = [
+        "DISCORD_TOKEN", "NUM_LAST_MESSAGES", "USE_NAMES_IN_CONTEXT",
+        "ENABLE_COMMANDS", "APP_MODE", "DEBUG"
+    ]
+    for key in disallowed_config_keys:
+        filtered_config.pop(key, None)
+    wandb.init(
+        project='HF-Docs-QA',
+        name=f'{config.question_answering_model_id} - {config.embedding_model_id} - {config.index_repo_id}',
+        mode='run', # run/disabled
+        config=filtered_config
+    )
+    with open(QUESTIONS_FILENAME, 'r') as f:
+        questions = json.load(f)
+    table = wandb.Table(
+        columns=[
+            "id", "question", "messages_context", "answer", "sources", "time"
+        ]
+    )
+    for i, q in enumerate(questions):
+        logger.info(f"Question {i+1}/{len(questions)}")
+        question = q['question']
+        messages_context = q['messages_context']
+        time_start = time.perf_counter()
+        response = qa_engine.get_response(
+            question=question,
+            messages_context=messages_context
+        )
+        time_end = time.perf_counter()
+        table.add_data(
+            i,
+            question,
+            messages_context,
+            response.get_answer(),
+            response.get_sources_as_text(),
+            time_end - time_start
+        )
+    wandb.log({"answers": table})
+    wandb.finish()
+if __name__ == '__main__':
+    main()

benchmark/questions.json ADDED Viewed

	@@ -0,0 +1,38 @@

+[
+    {
+        "question": "How to create audio dataset with Hugging Face?",
+        "messages_context": " "
+    },
+    {
+        "question": "I want to check if 2 sentences are similar semantically. How can I do it?",
+        "messages_context": " "
+    },
+    {
+        "question": "What are the benefits of Gradio?",
+        "messages_context": " "
+    },
+    {
+        "question": "How to deploy a text-to-image model?",
+        "messages_context": " "
+    },
+    {
+        "question": "Does Hugging Face offer any distributed training assistance? followup: Can you give me an example setup of it?",
+        "messages_context": " "
+    },
+    {
+        "question": "I want to detect cars on video recording. How should I do it and what models do you recommend?",
+        "messages_context": " "
+    },
+    {
+        "question": "Is there any tool for evaluating models in Hugging Face? followup: Can you give me an example setup of it?",
+        "messages_context": " "
+    },
+    {
+        "question": "What are some advantages of the Hugging Face Hub?",
+        "messages_context": " "
+    },
+    {
+        "question": "How would I use a model in 8 bit in transformers?",
+        "messages_context": " "
+    }
+]

benchmarker.py DELETED Viewed

@@ -1,63 +0,0 @@
-import gradio as gr
-from dotenv import load_dotenv
-from api.config import Config
-from api.logger import logger
-from api.question_answering import QAModel
-import time
-load_dotenv(dotenv_path='config/api/.env')
-config = Config()
-model = QAModel(
-    llm_model_id=config.question_answering_model_id,
-    embedding_model_id=config.embedding_model_id,
-    index_repo_id=config.index_repo_id,
-    prompt_template=config.prompt_template,
-    use_docs_for_context=config.use_docs_for_context,
-    add_sources_to_response=config.add_sources_to_response,
-    use_messages_for_context=config.use_messages_in_context,
-    debug=config.debug
-)
-QUESTIONS_FILENAME = 'data/benchmark/questions.json'
-ANSWERS_FILENAME = 'data/benchmark/answers.json'
-def main():
-    benchmark_name = \
-        f'model: {config.question_answering_model_id}' \
-        f'index: {config.index_repo_id}'
-    wandb.init(
-        project='HF-Docs-QA',
-        name=f'model: {config.question_answering_model_id}',
-        mode='run', # run/disabled
-        config=config.asdict()
-    )
-    # log config to wandb
-    with open(QUESTIONS_FILENAME, 'r') as f: # json
-        questions = f.readlines()
-    with open(ANSWERS_FILENAME, 'w') as f:
-        for q in questions:
-            question = q['question']
-            messages_contex = q['messages_context']
-            t_start = time.perf_counter()
-            response = model.get_response(
-                question=question,
-                messages_context=messages_context
-            )
-            t_end = time.perf_counter()
-            # write to json
-            {
-                "answer": response.get_answer(),
-                "sources": response.get_sources_as_text(),
-                'time': t_end - t_start
-            }
-if __name__ == '__main__':
-    main()

data/benchmark/.gitkeep DELETED Viewed

File without changes

data/indexing_benchmark.ipynb ADDED Viewed

	@@ -0,0 +1,387 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import math\n",
+    "import numpy as np\n",
+    "from pathlib import Path\n",
+    "from typing import List, Union, Any\n",
+    "from tqdm import tqdm\n",
+    "from sentence_transformers import CrossEncoder\n",
+    "from langchain.chains import RetrievalQA\n",
+    "from langchain.embeddings import HuggingFaceEmbeddings, HuggingFaceInstructEmbeddings\n",
+    "from langchain.document_loaders import TextLoader\n",
+    "from langchain.indexes import VectorstoreIndexCreator\n",
+    "from langchain.text_splitter import CharacterTextSplitter\n",
+    "from langchain.vectorstores import FAISS\n",
+    "from sentence_transformers import CrossEncoder"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class AverageInstructEmbeddings(HuggingFaceInstructEmbeddings):\n",
+    "    max_length: int = None\n",
+    "    def __init__(self, max_length: int = 512, **kwargs: Any):\n",
+    "        super().__init__(**kwargs)\n",
+    "        self.max_length = max_length\n",
+    "        if self.max_length < 0:\n",
+    "            print('max_length is not specified, using model default max_seq_length')\n",
+    "\n",
+    "    def embed_documents(self, texts: List[str]) -> List[List[float]]:\n",
+    "        all_embeddings = []\n",
+    "        for text in tqdm(texts, desc=\"Embedding documents\"):\n",
+    "            if len(text) > self.max_length and self.max_length > -1:\n",
+    "                n_chunks = math.ceil(len(text)/self.max_length)\n",
+    "                chunks = [\n",
+    "                    text[i*self.max_length:(i+1)*self.max_length]\n",
+    "                    for i in range(n_chunks)\n",
+    "                ]\n",
+    "                instruction_pairs = [[self.embed_instruction, chunk] for chunk in chunks]\n",
+    "                chunk_embeddings = self.client.encode(instruction_pairs)\n",
+    "                avg_embedding = np.mean(chunk_embeddings, axis=0)\n",
+    "                all_embeddings.append(avg_embedding.tolist())\n",
+    "            else:\n",
+    "                instruction_pairs = [[self.embed_instruction, text]]\n",
+    "                embeddings = self.client.encode(instruction_pairs)\n",
+    "                all_embeddings.append(embeddings[0].tolist())\n",
+    "\n",
+    "        return all_embeddings\n",
+    "\n",
+    "\n",
+    "class BenchDataST:\n",
+    "    def __init__(self, path: str, percentage: float = 0.005, chunk_size: int = 512, chunk_overlap: int = 100):\n",
+    "        self.path = path\n",
+    "        self.percentage = percentage\n",
+    "        self.docs = []\n",
+    "        self.metadata = []\n",
+    "        self.load()\n",
+    "        self.text_splitter = CharacterTextSplitter(separator=\"\", chunk_size=chunk_size, chunk_overlap=chunk_overlap)\n",
+    "        self.docs_processed = self.text_splitter.create_documents(self.docs, self.metadata)\n",
+    "\n",
+    "    def load(self):\n",
+    "        for p in Path(self.path).iterdir():\n",
+    "            if not p.is_dir():\n",
+    "                with open(p) as f:\n",
+    "                    source = f.readline().strip().replace('source: ', '')\n",
+    "                    self.docs.append(f.read())\n",
+    "                    self.metadata.append({\"source\": source})\n",
+    "        self.docs = self.docs[:int(len(self.docs) * self.percentage)]\n",
+    "        self.metadata = self.metadata[:int(len(self.metadata) * self.percentage)]\n",
+    "\n",
+    "    def __len__(self):\n",
+    "        return len(self.docs)\n",
+    "\n",
+    "    def __getitem__(self, idx):\n",
+    "        return self.docs[idx], self.metadata[idx]\n",
+    "\n",
+    "    def __iter__(self):\n",
+    "        for doc, metadata in zip(self.docs, self.metadata):\n",
+    "            yield doc, metadata\n",
+    "\n",
+    "    def __repr__(self):\n",
+    "        return f'BenchDataST({len(self)} docs) at {self.path} with {self.percentage} percentage \\nSources: {self.metadata} \\nChunks: {self.text_splitter}'\n",
+    "    \n",
+    "\n",
+    "class BenchmarkST:\n",
+    "    def __init__(self, data: BenchDataST, baseline_model: Union[HuggingFaceEmbeddings, HuggingFaceInstructEmbeddings, AverageInstructEmbeddings], embedding_models: List[Union[HuggingFaceEmbeddings, HuggingFaceInstructEmbeddings, AverageInstructEmbeddings]]):\n",
+    "        self.data = data\n",
+    "        self.baseline_model = baseline_model\n",
+    "        self.embedding_models = embedding_models\n",
+    "        self.baseline_index, self.indexes  = self.build_indexes()\n",
+    "\n",
+    "    def build_indexes(self):\n",
+    "        indexes = []\n",
+    "        for model in [self.baseline_model] + self.embedding_models:\n",
+    "            print(f\"Building index for {model}\")\n",
+    "            index = FAISS.from_documents(self.data.docs_processed, model)\n",
+    "            indexes.append(index)\n",
+    "        return indexes[0], indexes[1:]\n",
+    "    \n",
+    "    def add_index(self, index: FAISS):\n",
+    "        self.indexes.append(index)\n",
+    "    \n",
+    "    def evaluate(self, query: str, k: int = 3):\n",
+    "        baseline_results = self.baseline_index.similarity_search_with_score(query, k=k)\n",
+    "        results = []\n",
+    "        for index in self.indexes:\n",
+    "            results.append(index.similarity_search_with_score(query, k=k))\n",
+    "        return baseline_results, results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "load INSTRUCTOR_Transformer\n",
+      "max_seq_length  512\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "No sentence-transformers model found with name /Users/michalwilinski/.cache/torch/sentence_transformers/cross-encoder_ms-marco-MiniLM-L-12-v2. Creating a new one with MEAN pooling.\n",
+      "Some weights of the model checkpoint at /Users/michalwilinski/.cache/torch/sentence_transformers/cross-encoder_ms-marco-MiniLM-L-12-v2 were not used when initializing BertModel: ['classifier.bias', 'classifier.weight']\n",
+      "- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Building index for client=INSTRUCTOR(\n",
+      "  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: T5EncoderModel \n",
+      "  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False})\n",
+      "  (2): Dense({'in_features': 768, 'out_features': 768, 'bias': False, 'activation_function': 'torch.nn.modules.linear.Identity'})\n",
+      "  (3): Normalize()\n",
+      ") model_name='hkunlp/instructor-base' cache_folder=None model_kwargs={} encode_kwargs={} embed_instruction='Represent this piece of text for searching relevant information:' query_instruction='Query the most relevant piece of information from the Hugging Face documentation' max_length=512\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Embedding documents: 100%|██████████| 278/278 [00:19<00:00, 14.11it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Building index for client=SentenceTransformer(\n",
+      "  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel \n",
+      "  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})\n",
+      ") model_name='cross-encoder/ms-marco-MiniLM-L-12-v2' cache_folder=None model_kwargs={} encode_kwargs={} multi_process=False\n"
+     ]
+    }
+   ],
+   "source": [
+    "data = BenchDataST(\n",
+    "    path=\"./datasets/huggingface_docs/\",\n",
+    "    percentage=0.005,\n",
+    "    chunk_size=512,\n",
+    "    chunk_overlap=100\n",
+    ")\n",
+    "\n",
+    "baseline_embedding_model = AverageInstructEmbeddings(\n",
+    "    model_name=\"hkunlp/instructor-base\",\n",
+    "    embed_instruction=\"Represent this piece of text for searching relevant information:\",\n",
+    "    query_instruction=\"Query the most relevant piece of information from the Hugging Face documentation\",\n",
+    "    max_length=512,\n",
+    ")\n",
+    "\n",
+    "embedding_model = HuggingFaceEmbeddings(\n",
+    "    model_name=\"intfloat/e5-large-v2\",\n",
+    ")\n",
+    "\n",
+    "cross_encoder = HuggingFaceEmbeddings(model_name=\"cross-encoder/ms-marco-MiniLM-L-12-v2\")\n",
+    "\n",
+    "benchmark = BenchmarkST(\n",
+    "    data=data,\n",
+    "    baseline_model=baseline_embedding_model,\n",
+    "    embedding_models=[cross_encoder]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Baseline results:\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.23610792\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.24087097\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.24181677\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.24541612\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.24639006\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.24780047\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.2535807\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.25887597\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.27293646\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.27374876\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.27710187\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.28146794\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.29536068\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.29784447\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.30452335\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.3061711\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.31600478\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.3166225\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.33345556\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.3469957\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.35222226\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.36451602\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.36925688\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.37025565\n",
+      "{'source': 'https://github.com/huggingface/diffusers/blob/main/examples/textual_inversion/README.md'} 0.37112093\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.37146708\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.3766507\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.37794292\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.37923962\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.38359642\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.3878625\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.39796114\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.40057343\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.40114868\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.40156174\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.40341228\n",
+      "{'source': 'https://github.com/huggingface/diffusers/blob/main/examples/textual_inversion/README.md'} 0.40720195\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.41241395\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.4134417\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.4134435\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.41754264\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.41917825\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.41928726\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.41988587\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.42029166\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.42128915\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.4226097\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.42302307\n",
+      "{'source': 'https://github.com/gradio-app/gradio/blob/main/demo/stt_or_tts/run.ipynb'} 0.4252566\n",
+      "{'source': 'https://github.com/huggingface/diffusers/blob/main/examples/textual_inversion/README.md'} 0.42704937\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.4297651\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.43067485\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.43116528\n",
+      "{'source': 'https://github.com/huggingface/blog/blob/main/bloom.md'} 0.43272027\n",
+      "{'source': 'https://github.com/huggingface/diffusers/blob/main/examples/instruct_pix2pix/README_sdxl.md'} 0.43434155\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.43486434\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.43524152\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.43530554\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.4371896\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.43753576\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.43824\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.4384127\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.43900505\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.43903238\n",
+      "{'source': 'https://github.com/huggingface/blog/blob/main/accelerate-deepspeed.md'} 0.44034868\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.44217598\n",
+      "{'source': 'https://github.com/huggingface/diffusers/blob/main/docs/source/en/api/schedulers/euler_ancestral.md'} 0.4426194\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.44303834\n",
+      "{'source': 'https://github.com/huggingface/diffusers/blob/main/examples/instruct_pix2pix/README_sdxl.md'} 0.4452571\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.44619536\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.44652176\n",
+      "{'source': 'https://github.com/gradio-app/gradio/blob/main/demo/stt_or_tts/run.ipynb'} 0.44683564\n",
+      "{'source': 'https://github.com/huggingface/blog/blob/main/accelerate-deepspeed.md'} 0.44743723\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.44768596\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.4477852\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.44906363\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.45155957\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.45215163\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.45415214\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.4541726\n",
+      "{'source': 'https://github.com/huggingface/diffusers/blob/main/examples/instruct_pix2pix/README_sdxl.md'} 0.4542602\n",
+      "{'source': 'https://github.com/huggingface/blog/blob/main/accelerate-deepspeed.md'} 0.4544394\n",
+      "{'source': 'https://github.com/huggingface/transformers/blob/main/docs/source/en/model_doc/open-llama.md'} 0.45448524\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.454512\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.45478693\n",
+      "{'source': 'https://github.com/huggingface/diffusers/blob/main/docs/source/en/api/schedulers/euler_ancestral.md'} 0.45494407\n",
+      "{'source': 'https://github.com/huggingface/transformers/blob/main/docs/source/en/model_doc/open-llama.md'} 0.45494407\n",
+      "{'source': 'https://github.com/gradio-app/gradio/blob/main/js/accordion/CHANGELOG.md'} 0.45520714\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.4559689\n",
+      "{'source': 'https://github.com/huggingface/blog/blob/main/bloom.md'} 0.4568352\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.4577096\n",
+      "{'source': 'https://github.com/huggingface/simulate/blob/main/docs/source/api/lights.mdx'} 0.4577096\n",
+      "{'source': 'https://github.com/huggingface/diffusers/blob/main/examples/instruct_pix2pix/README_sdxl.md'} 0.45773098\n",
+      "{'source': 'https://github.com/huggingface/blog/blob/main/bloom.md'} 0.45818624\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.45871085\n",
+      "{'source': 'https://github.com/huggingface/blog/blob/main/bloom.md'} 0.4591412\n",
+      "{'source': 'https://github.com/huggingface/diffusers/blob/main/examples/instruct_pix2pix/README_sdxl.md'} 0.46033093\n",
+      "{'source': 'https://github.com/huggingface/blog/blob/main/accelerate-deepspeed.md'} 0.4605264\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.46091354\n",
+      "{'source': 'https://github.com/huggingface/transformers/blob/main/docs/source/en/model_doc/open-llama.md'} 0.46182537\n",
+      "Cross encoder results:\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 6.840022\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} -0.98426485\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} -1.9345549\n",
+      "bye\n"
+     ]
+    }
+   ],
+   "source": [
+    "query = \"textual inversion\"\n",
+    "k = 100\n",
+    "baseline_results, results = benchmark.evaluate(query=query, k=k)\n",
+    "print(\"Baseline results:\")\n",
+    "[print(doc.metadata,score) for (doc,score) in baseline_results]\n",
+    "cross_encoder = CrossEncoder(\"cross-encoder/ms-marco-MiniLM-L-12-v2\")\n",
+    "cross_encoder_results = cross_encoder.predict([(query, doc.page_content) for doc in data.docs_processed])\n",
+    "# rerank results\n",
+    "cross_encoder_results = sorted(zip(data.docs_processed, cross_encoder_results), key=lambda x: x[1], reverse=True)\n",
+    "print(\"Cross encoder results:\")\n",
+    "final_results = cross_encoder_results[:3]\n",
+    "[print(doc.metadata, score) for (doc,score) in final_results]\n",
+    "print(\"bye\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "es where the space character is not used (like Chinese or Japanese).\n",
+      "\n",
+      "The other main feature of SentencePiece is *reversible tokenization*: since there is no special treatment of spaces, decoding the tokens is done simply by concatenating them and replacing the `_`s with spaces -- this results in the normalized text. As we saw earlier, the BERT tokenizer removes repeating spaces, so its tokenization is not reversible.\n",
+      "\n",
+      "## Algorithm overview[[algorithm-overview]]\n",
+      "\n",
+      "In the following sections, we'll dive into t\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(final_results[0][0].page_content)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "hf_qa_bot",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

docker-compose.yml DELETED Viewed

@@ -1,23 +0,0 @@
-version: '3'
-services:
-  api:
-    build:
-      context: .
-      dockerfile: Dockerfile.api
-    ports:
-      - 8000:8000
-    networks:
-      - mynetwork
-  bot:
-    build:
-      context: .
-      dockerfile: Dockerfile.bot
-    ports:
-      - 80:80
-    depends_on:
-      - api
-    networks:
-      - mynetwork
-networks:
-  mynetwork:
-    driver: bridge

models/inference.ipynb DELETED Viewed

@@ -1,103 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "import torch\n",
-    "import transformers\n",
-    "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
-    "\n",
-    "PROMPT_TEMPLATES_DIR = os.path.dirname(os.path.abspath(os.getcwd()))\n",
-    "PROMPT_TEMPLATES_DIR += '/config/api/prompt_templates/'\n",
-    "\n",
-    "os.environ['CUDA_VISIBLE_DEVICES'] = '0'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "prompt_template = 'sythia_v1.3'\n",
-    "with open(PROMPT_TEMPLATES_DIR + f'{prompt_template}.txt', 'r') as f:\n",
-    "    prompt_template = f.read()\n",
-    "\n",
-    "context = ''\n",
-    "question = 'How to fix a bike?'\n",
-    "\n",
-    "prompt = prompt_template.format(context=context, question=question)\n",
-    "print(f'prompt len: {len(prompt)}\\n')\n",
-    "print(prompt)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model_id = 'migtissera/SynthIA-7B-v1.3'\n",
-    "\n",
-    "tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
-    "model = AutoModelForCausalLM.from_pretrained(\n",
-    "    model_id,\n",
-    "    torch_dtype=torch.bfloat16,\n",
-    "    trust_remote_code=True,\n",
-    "    load_in_8bit=False,\n",
-    "    device_map='auto',\n",
-    "    resume_download=True,\n",
-    ")\n",
-    "\n",
-    "pipeline = transformers.pipeline(\n",
-    "    'text-generation',\n",
-    "    model=model,\n",
-    "    tokenizer=tokenizer,\n",
-    "    device_map='auto',\n",
-    "    torch_dtype=torch.bfloat16,\n",
-    "    eos_token_id=tokenizer.eos_token_id,\n",
-    "    pad_token_id=tokenizer.eos_token_id,\n",
-    "    min_new_tokens=64,\n",
-    "    max_new_tokens=800,\n",
-    "    temperature=0.5,\n",
-    "    do_sample=True,\n",
-    ")\n",
-    "\n",
-    "output_text = pipeline(prompt)[0]['generated_text']\n",
-    "output_text = output_text.replace(prompt+'\\n', '')\n",
-    "print(output_text)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "hf_qa_bot",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.5"
-  },
-  "orig_nbformat": 4,
-  "vscode": {
-   "interpreter": {
-    "hash": "e769ac600d1c65682759767682b2a946c0eaa09d353302f712fe4c2e822e15df"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

qa_engine/mocks.py CHANGED Viewed

@@ -6,36 +6,22 @@ from langchain.llms.base import LLM
 class MockLocalBinaryModel(LLM):
     """
-    Mock Local Binary Model class, used for generating the string "a".
-    Args:
-        model_id (str): The ID of the model to be mocked.
-    Attributes:
-        model_path (str): The path to the model to be mocked.
-        llm (str): The string "a".
-    Raises:
-        ValueError: If the model_path does not exist.
     """
     model_path: str = None
-    llm: str = 'READY TO MOCK'
-    def __init__(self, model_id: str = None):
         super().__init__()
-        self.model_path = f'bot/question_answering/{model_id}'
-        if not os.path.exists(self.model_path):
-            raise ValueError(f'{self.model_path} does not exist')
     def _call(self, prompt: str, stop: Optional[list[str]] = None) -> str:
         return self.llm
     @property
     def _identifying_params(self) -> Mapping[str, Any]:
-        return {'name_of_model': self.model_path}
     @property
     def _llm_type(self) -> str:
-        return self.model_path

 class MockLocalBinaryModel(LLM):
     """
+    Mock Local Binary Model class.
     """
     model_path: str = None
+    llm: str = 'Mocked Response'
+    def __init__(self):
         super().__init__()
     def _call(self, prompt: str, stop: Optional[list[str]] = None) -> str:
         return self.llm
     @property
     def _identifying_params(self) -> Mapping[str, Any]:
+        return {'name_of_model': 'mock'}
     @property
     def _llm_type(self) -> str:
+        return 'mock'

qa_engine/qa_engine.py CHANGED Viewed

@@ -18,6 +18,7 @@ from sentence_transformers import CrossEncoder
 from qa_engine import logger
 from qa_engine.response import Response
 class LocalBinaryModel(LLM):
@@ -191,6 +192,9 @@ class QAEngine():
                 model_url=llm_model_id.replace('api_models/', ''),
                 debug=self.debug
             )
         else:
             logger.info('using transformers pipeline model')
             self.llm_model = TransformersPipelineModel(

 from qa_engine import logger
 from qa_engine.response import Response
+from qa_engine.mocks import MockLocalBinaryModel
 class LocalBinaryModel(LLM):
                 model_url=llm_model_id.replace('api_models/', ''),
                 debug=self.debug
             )
+        elif llm_model_id == 'mock':
+            logger.info('using mock model')
+            self.llm_model = MockLocalBinaryModel()
         else:
             logger.info('using transformers pipeline model')
             self.llm_model = TransformersPipelineModel(

questions.txt DELETED Viewed

@@ -1,9 +0,0 @@
-How to create audio dataset with Hugging Face?
-I want to check if 2 sentences are similar semantically. How can I do it?
-What are the benefits of Gradio?
-How to deploy a text-to-image model?
-Does Hugging Face offer any distributed training assistance? followup: Can you give me an example setup of it?
-I want to detect cars on video recording. How should I do it and what models do you recommend?
-Is there any tool for evaluating models in Hugging Face? followup: Can you give me an example setup of it?
-What are some advantages of the Hugging Face Hub?
-How would I use a model in 8 bit in transformers?

requirements.txt CHANGED Viewed

@@ -5,6 +5,7 @@ accelerate
 einops
 huggingface_hub
 gradio
 beautifulsoup4==4.12.0
 discord.py==2.2.2
 evaluate==0.4.0
@@ -24,5 +25,4 @@ InstructorEmbedding==1.0.0
 faiss_cpu==1.7.3
 tqdm==4.64.1
 uvicorn==0.22.0
-wandb==0.15.0
 pytest==7.3.1

 einops
 huggingface_hub
 gradio
+wandb
 beautifulsoup4==4.12.0
 discord.py==2.2.2
 evaluate==0.4.0
 faiss_cpu==1.7.3
 tqdm==4.64.1
 uvicorn==0.22.0
 pytest==7.3.1

run_docker.sh CHANGED Viewed

@@ -1,2 +1,3 @@
 #!/bin/bash
-docker-compose down && docker-compose up --build

 #!/bin/bash
+docker build -t hf_qa_engine .
+docker run -it hf_qa_engine bash