Spaces:

bubuuunel
/

RAG10K

Sleeping

App Files Files Community

bubuuunel commited on Jun 20

Commit

57079d8

•

1 Parent(s): 0ec4749

Upload 3 files

Browse files

Files changed (3) hide show

Dataset_db.zip +3 -0
app.py +87 -0
requirements.txt +8 -0

Dataset_db.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8c45947fa443fe590e57dfc9f41e2502335313950ec4a0b5de39427477e2aa51
+size 172

app.py ADDED Viewed

	@@ -0,0 +1,87 @@

+## Setup
+# Import the necessary Libraries
+# Create Client
+# Define the embedding model and the vectorstore
+# Load the persisted vectorDB
+# Prepare the logging functionality
+log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
+log_folder = log_file.parent
+scheduler = CommitScheduler(
+    repo_id="---------",
+    repo_type="dataset",
+    folder_path=log_folder,
+    path_in_repo="data",
+    every=2
+)
+# Define the Q&A system message
+# Define the user message template
+# Define the predict function that runs when 'Submit' is clicked or when a API request is made
+def predict(user_input,company):
+    filter = "dataset/"+company+"-10-k-2023.pdf"
+    relevant_document_chunks = vectorstore_persisted.similarity_search(user_input, k=5, filter={"source":filter})
+    # Create context_for_query
+    # Create messages
+    # Get response from the LLM
+    # While the prediction is made, log both the inputs and outputs to a local log file
+    # While writing to the log file, ensure that the commit scheduler is locked to avoid parallel
+    # access
+    with scheduler.lock:
+        with log_file.open("a") as f:
+            f.write(json.dumps(
+                {
+                    'user_input': user_input,
+                    'retrieved_context': context_for_query,
+                    'model_response': prediction
+                }
+            ))
+            f.write("\n")
+    return prediction
+# Set-up the Gradio UI
+# Add text box and radio button to the interface
+# The radio button is used to select the company 10k report in which the context needs to be retrieved.
+textbox = gr.Textbox()
+company = gr.Radio()
+# Create the interface
+# For the inputs parameter of Interface provide [textbox,company]
+demo.queue()
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+openai==1.23.2 \
+tiktoken==0.6.0 \
+langchain==0.1.1 \
+langchain-community==0.0.13 \
+chromadb==0.4.22 \
+sentence-transformers==2.3.1 \
+datasets
+pypdf