Spaces:

isl-research
/

ddi_chat

Sleeping

App Files Files Community

teddyllm commited on Nov 12, 2024

Commit

4a91290

•

1 Parent(s): 7cbc1c8

Upload 4 files

Browse files

Files changed (4) hide show

app.py +111 -0
documents.json +0 -0
faiss_index.bin +3 -0
requirements.txt +100 -0

app.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import os
+import faiss
+import numpy as np
+import json
+import gradio as gr
+from openai import OpenAI
+from sentence_transformers import SentenceTransformer
+# Step 1: Set up OpenAI API key
+openai_api_key = os.environ.get("OPENAI_API_KEY", "")
+client = OpenAI(api_key=openai_api_key)
+# Step 2: Load the pre-trained FAISS index and SentenceTransformer model
+index = faiss.read_index("faiss_index.bin")
+model = SentenceTransformer('all-MiniLM-L6-v2')
+def load_documents(docs_path):
+    with open(docs_path, 'r', encoding='utf-8') as file:
+        return json.load(file)
+# Specify the path to your JSON file
+docs_path = 'documents.json'
+documents = load_documents(docs_path)
+dimension = 1536
+def get_embeddings(text):
+    response = client.embeddings.create(
+        model="text-embedding-3-small",
+        input = [text]
+    )
+    embedding = response.data[0].embedding
+    return np.array(embedding, dtype='float32')
+# Step 3: Function to search FAISS index
+def search_index(query, k=3):
+    # Convert query to an embedding
+    query_vector = get_embeddings(query).reshape(1, -1).astype('float32')
+    # Check if the index is not empty before searching
+    if index.ntotal == 0:
+        return "No documents in the index."
+    # Search the FAISS index for the nearest neighbors
+    distances, indices = index.search(query_vector, k)
+    # Retrieve the top matching documents
+    results = [documents[i] for i in indices[0] if i != -1]
+    if results:
+        return "\n\n".join(results)
+    else:
+        return "No relevant documents found."
+# Step 4: Function to generate a response using OpenAI's GPT
+def generate_response(context, user_input):
+    prompt = f"{context}\n\nUser: {user_input}\nAssistant:"
+    response = client.chat.completions.create(
+        model="gpt-4o-mini",
+        messages=[{"role": "system", "content": "You are a helpful assistant."},
+                  {"role": "user", "content": prompt}],
+        # stream=True,
+    )
+    # for chunk in stream:
+    #     if chunk.choices[0].delta.content is not None:
+    #         print(chunk.choices[0].delta.content, end="")
+    return response.choices[0].message.content
+# Step 5: Gradio chatbot function
+def chatbot_interface(user_input, chat_history):
+    # Step 5.1: Retrieve context using FAISS
+    context = search_index(user_input)
+    # Step 5.2: Generate a response using OpenAI GPT model
+    response = generate_response(context, user_input)
+    # Step 5.3: Update chat history
+    chat_history.append((user_input, response))
+    return chat_history, chat_history
+def chat_gen(message, history):
+    history_openai_format = []
+    context = search_index(message)
+    prompt = f"{context}\n\nUser: {message}\nAssistant:"
+    response = client.chat.completions.create(
+        model="gpt-4o-mini",
+        messages=[{"role": "system", "content": "You are a helpful assistant."},
+                  {"role": "user", "content": prompt}],
+        stream=True,
+    )
+    partial_message = ""
+    for chunk in response:
+        if chunk.choices[0].delta.content is not None:
+            partial_message = partial_message + chunk.choices[0].delta.content
+            yield partial_message
+initial_msg = "Hello! I am DII assistant. You can ask me anything about DDI program. I am happy to assist you."
+chatbot = gr.Chatbot(value = [[None, initial_msg]])
+demo = gr.ChatInterface(chat_gen, chatbot=chatbot).queue()
+try:
+    demo.launch(debug=True, share=False, show_api=False)
+    demo.close()
+except Exception as e:
+    demo.close()
+    print(e)
+    raise e

documents.json ADDED Viewed

The diff for this file is too large to render. See raw diff

faiss_index.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ddae79299503fe170726dd027d6d4cf5a7057ebdcd36a186d0a643f981a1c4b0
+size 755757

requirements.txt ADDED Viewed

	@@ -0,0 +1,100 @@

+aiofiles==23.2.1
+annotated-types==0.7.0
+anyio==4.6.2.post1
+appnope==0.1.4
+asttokens==2.4.1
+certifi==2024.8.30
+charset-normalizer==3.4.0
+click==8.1.7
+comm==0.2.2
+debugpy==1.8.8
+decorator==5.1.1
+distro==1.9.0
+et_xmlfile==2.0.0
+executing==2.1.0
+faiss-cpu==1.9.0
+fastapi==0.115.4
+ffmpy==0.4.0
+filelock==3.16.1
+fsspec==2024.10.0
+gradio==5.5.0
+gradio_client==1.4.2
+h11==0.14.0
+httpcore==1.0.6
+httpx==0.27.2
+huggingface-hub==0.26.2
+idna==3.10
+ipykernel==6.29.5
+ipython==8.29.0
+jedi==0.19.2
+Jinja2==3.1.4
+jiter==0.7.0
+joblib==1.4.2
+jupyter_client==8.6.3
+jupyter_core==5.7.2
+lxml==5.3.0
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib-inline==0.1.7
+mdurl==0.1.2
+mpmath==1.3.0
+nest-asyncio==1.6.0
+networkx==3.4.2
+numpy==2.1.3
+openai==1.54.3
+openpyxl==3.1.5
+orjson==3.10.11
+packaging==24.2
+pandas==2.2.3
+parso==0.8.4
+pexpect==4.9.0
+pillow==11.0.0
+platformdirs==4.3.6
+prompt_toolkit==3.0.48
+psutil==6.1.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pydantic==2.9.2
+pydantic_core==2.23.4
+pydub==0.25.1
+Pygments==2.18.0
+PyPDF2==3.0.1
+pytesseract==0.3.13
+python-dateutil==2.9.0.post0
+python-docx==1.1.2
+python-multipart==0.0.12
+pytz==2024.2
+PyYAML==6.0.2
+pyzmq==26.2.0
+regex==2024.11.6
+requests==2.32.3
+rich==13.9.4
+ruff==0.7.3
+safehttpx==0.1.1
+safetensors==0.4.5
+scikit-learn==1.5.2
+scipy==1.14.1
+semantic-version==2.10.0
+sentence-transformers==3.3.0
+setuptools==75.4.0
+shellingham==1.5.4
+six==1.16.0
+sniffio==1.3.1
+stack-data==0.6.3
+starlette==0.41.2
+sympy==1.13.1
+threadpoolctl==3.5.0
+tokenizers==0.20.3
+tomlkit==0.12.0
+torch==2.5.1
+tornado==6.4.1
+tqdm==4.67.0
+traitlets==5.14.3
+transformers==4.46.2
+typer==0.13.0
+typing_extensions==4.12.2
+tzdata==2024.2
+urllib3==2.2.3
+uvicorn==0.32.0
+wcwidth==0.2.13
+websockets==12.0