Spaces:

KatGaw
/

airbnb_new_space

Runtime error

App Files Files Community

KatGaw commited on Jun 25, 2024

Commit

b0420ab

1 Parent(s): 5420355

adding new files requirements

Browse files

Files changed (2) hide show

app.py +59 -62
requirements.txt +3 -1

app.py CHANGED Viewed

@@ -2,20 +2,19 @@ import os
 import chainlit as cl
 from dotenv import load_dotenv
 from operator import itemgetter
 from langchain_openai import ChatOpenAI
 from langchain.schema.runnable import RunnablePassthrough
-from langchain.schema.runnable.config import RunnableConfig
 from langchain_openai.embeddings import OpenAIEmbeddings
 from langchain.document_loaders import PyMuPDFLoader
-import tiktoken
-from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import Qdrant
 from langchain_openai.embeddings import OpenAIEmbeddings
 from langchain_core.prompts import ChatPromptTemplate
-from langchain.schema.output_parser import StrOutputParser
 from langchain.schema.runnable import RunnablePassthrough
-from dotenv import main
 import openai
 # GLOBAL SCOPE - ENTIRE APPLICATION HAS ACCESS TO VALUES SET IN THIS SCOPE #
 # ---- ENV VARIABLES ---- #
@@ -29,81 +28,79 @@ main.load_dotenv()
 """
 We will load our environment variables here.
 """
-openai.api_key = os.getenv("OPENAI_API_KEY")
 # Model
 openai_chat_model = ChatOpenAI(model="gpt-4o")
 # upload embedding model
 embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
-# ---- GLOBAL DECLARATIONS ---- #
-@cl.on_chat_start
-async def init():
-    # -- RETRIEVAL -- #
-    """
-    1. Load Documents from Text File
-    2. Split Documents into Chunks
-    3. Load HuggingFace Embeddings (remember to use the URL we set above)
-    4. Index Files if they do not exist, otherwise load the vectorstore
-    """
-    # upload file
-    #docs=TextLoader("./data/airbnb_10k_filings.txt").load()
-    docs = PyMuPDFLoader("airbnb_10k_filings.pdf").load()
-    import tiktoken
-    from langchain.text_splitter import RecursiveCharacterTextSplitter
-    def tiktoken_len(text):
-        tokens = tiktoken.encoding_for_model("gpt-4o").encode(
-            text,
-        )
-        return len(tokens)
-    text_splitter = RecursiveCharacterTextSplitter(
-        chunk_size = 200,
-        chunk_overlap = 0,
-        length_function = tiktoken_len,
-    )
-    split_chunks = text_splitter.split_documents(docs)
-    max_chunk_length = 0
-    for chunk in split_chunks:
-        max_chunk_length = max(max_chunk_length, tiktoken_len(chunk.page_content))
-    # Embeddings and Vector store
-    qdrant_vectorstore = Qdrant.from_documents(
-        split_chunks,
-        embedding_model,
-        location=":memory:",
-        collection_name="airbnb 10k filings",
     )
-    print("Loaded Vectorstore")
-    # Ste up ur retriever using LangChain
-    qdrant_retriever = qdrant_vectorstore.as_retriever()
-    # -- AUGMENTED -- #
-    """
-    1. Define a String Template
-    2. Create a Prompt Template from the String Template
-    """
-    RAG_PROMPT = """
-    CONTEXT:
-    {context}
-    QUERY:
-    {question}
-    Use the provide context to answer the provided user question. Only use the provided context to answer the question. If you do not know the answer, response with "I don't know"
-    """
-    CONTEXT = """
-    This report on Airbnb 10k filings contains unstructured and structured tabular data, use both.
-    """
-    rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
     # -- Our RAG Chain -- #
     """

 import chainlit as cl
 from dotenv import load_dotenv
 from operator import itemgetter
+import tiktoken
+from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_openai import ChatOpenAI
 from langchain.schema.runnable import RunnablePassthrough
 from langchain_openai.embeddings import OpenAIEmbeddings
 from langchain.document_loaders import PyMuPDFLoader
 from langchain_community.vectorstores import Qdrant
 from langchain_openai.embeddings import OpenAIEmbeddings
 from langchain_core.prompts import ChatPromptTemplate
+from operator import itemgetter
 from langchain.schema.runnable import RunnablePassthrough
 import openai
+from dotenv import main
 # GLOBAL SCOPE - ENTIRE APPLICATION HAS ACCESS TO VALUES SET IN THIS SCOPE #
 # ---- ENV VARIABLES ---- #
 """
 We will load our environment variables here.
 """
+openai.api_key=os.environ["OPENAI_API_KEY"]
 # Model
 openai_chat_model = ChatOpenAI(model="gpt-4o")
 # upload embedding model
 embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
+# -- AUGMENTED -- #
+"""
+1. Define a String Template
+2. Create a Prompt Template from the String Template
+"""
+RAG_PROMPT = """
+CONTEXT:
+{context}
+QUERY:
+{question}
+Use the provide context to answer the provided user question. Only use the provided context to answer the question. If you do not know the answer, response with "I don't know"
+"""
+CONTEXT = """
+You are an expert on Airbnb, be polite and answer all questions. This report on Airbnb 10k filings contains unstructured and structured tabular data, use both.
+"""
+rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
+# ---- GLOBAL DECLARATIONS ---- #
+# -- RETRIEVAL -- #
+"""
+1. Load Documents from Text File
+2. Split Documents into Chunks
+3. Load HuggingFace Embeddings (remember to use the URL we set above)
+4. Index Files if they do not exist, otherwise load the vectorstore
+"""
+# upload file
+#docs=TextLoader("./data/airbnb_10k_filings.txt").load()
+docs = PyMuPDFLoader("airbnb_10k_filings.pdf").load()
+def tiktoken_len(text):
+    tokens = tiktoken.encoding_for_model("gpt-4o").encode(
+        text,
     )
+    return len(tokens)
+text_splitter = RecursiveCharacterTextSplitter(
+    chunk_size = 200,
+    chunk_overlap = 0,
+    length_function = tiktoken_len,
+)
+split_chunks = text_splitter.split_documents(docs)
+max_chunk_length = 0
+for chunk in split_chunks:
+    max_chunk_length = max(max_chunk_length, tiktoken_len(chunk.page_content))
+# Embeddings and Vector store
+qdrant_vectorstore = Qdrant.from_documents(
+    split_chunks,
+    embedding_model,
+    location=":memory:",
+    collection_name="airbnb 10k filings",
+)
+print("Loaded Vectorstore")
+# Ste up ur retriever using LangChain
+qdrant_retriever = qdrant_vectorstore.as_retriever()
+@cl.on_chat_start
+async def init():
     # -- Our RAG Chain -- #
     """

requirements.txt CHANGED Viewed

@@ -11,4 +11,6 @@ pymupdf==1.24.5
 marshmallow==3.19.0
 jsonschema==4.17.3
 jsonpointer==1.10
-multidict==4.5.0

 marshmallow==3.19.0
 jsonschema==4.17.3
 jsonpointer==1.10
+multidict==4.5.0
+idna==2.8
+h2==3.0.0