bubuuunel commited on
Commit
57079d8
1 Parent(s): 0ec4749

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dataset_db.zip +3 -0
  2. app.py +87 -0
  3. requirements.txt +8 -0
Dataset_db.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c45947fa443fe590e57dfc9f41e2502335313950ec4a0b5de39427477e2aa51
3
+ size 172
app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ ## Setup
4
+ # Import the necessary Libraries
5
+
6
+
7
+
8
+
9
+
10
+
11
+ # Create Client
12
+
13
+
14
+ # Define the embedding model and the vectorstore
15
+
16
+ # Load the persisted vectorDB
17
+
18
+
19
+ # Prepare the logging functionality
20
+
21
+ log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
22
+ log_folder = log_file.parent
23
+
24
+ scheduler = CommitScheduler(
25
+ repo_id="---------",
26
+ repo_type="dataset",
27
+ folder_path=log_folder,
28
+ path_in_repo="data",
29
+ every=2
30
+ )
31
+
32
+ # Define the Q&A system message
33
+
34
+
35
+
36
+
37
+
38
+ # Define the user message template
39
+
40
+
41
+
42
+
43
+ # Define the predict function that runs when 'Submit' is clicked or when a API request is made
44
+ def predict(user_input,company):
45
+
46
+ filter = "dataset/"+company+"-10-k-2023.pdf"
47
+ relevant_document_chunks = vectorstore_persisted.similarity_search(user_input, k=5, filter={"source":filter})
48
+
49
+ # Create context_for_query
50
+
51
+
52
+ # Create messages
53
+
54
+
55
+ # Get response from the LLM
56
+
57
+
58
+ # While the prediction is made, log both the inputs and outputs to a local log file
59
+ # While writing to the log file, ensure that the commit scheduler is locked to avoid parallel
60
+ # access
61
+
62
+ with scheduler.lock:
63
+ with log_file.open("a") as f:
64
+ f.write(json.dumps(
65
+ {
66
+ 'user_input': user_input,
67
+ 'retrieved_context': context_for_query,
68
+ 'model_response': prediction
69
+ }
70
+ ))
71
+ f.write("\n")
72
+
73
+ return prediction
74
+
75
+ # Set-up the Gradio UI
76
+ # Add text box and radio button to the interface
77
+ # The radio button is used to select the company 10k report in which the context needs to be retrieved.
78
+
79
+ textbox = gr.Textbox()
80
+ company = gr.Radio()
81
+
82
+ # Create the interface
83
+ # For the inputs parameter of Interface provide [textbox,company]
84
+
85
+
86
+ demo.queue()
87
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ openai==1.23.2 \
2
+ tiktoken==0.6.0 \
3
+ langchain==0.1.1 \
4
+ langchain-community==0.0.13 \
5
+ chromadb==0.4.22 \
6
+ sentence-transformers==2.3.1 \
7
+ datasets
8
+ pypdf