Spaces:
Sleeping
Sleeping
vishanth10
commited on
Commit
·
61f09d1
1
Parent(s):
6f3dbfd
new fearures
Browse files
app.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
import streamlit as st
|
2 |
-
from decimal import Decimal
|
3 |
from carbon import Carbon
|
4 |
import requests
|
5 |
import json
|
@@ -8,8 +7,98 @@ import json
|
|
8 |
CARBON_API_KEY = "a38ee1fe5fef56fc8e1ae2afc881378804bb902882442e1554adae4f82ee23ea"
|
9 |
customer_id = "Candid"
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
def main():
|
12 |
-
st.title('
|
13 |
|
14 |
# Authenticate with Carbon API
|
15 |
st.write('### Authenticate with Carbon API')
|
@@ -17,77 +106,91 @@ def main():
|
|
17 |
token = carbon.auth.get_access_token()
|
18 |
carbon = Carbon(access_token=token.access_token) # authenticated object
|
19 |
|
20 |
-
# Connect to
|
21 |
-
st.write('
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
use_ocr=False,
|
40 |
-
parse_pdf_tables_with_ocr=False,
|
41 |
-
enable_file_picker=True,
|
42 |
-
sync_source_items=True,
|
43 |
-
incremental_sync=False,
|
44 |
-
file_sync_config={
|
45 |
-
"auto_synced_source_types": ["ARTICLE"],
|
46 |
-
"sync_attachments": False,
|
47 |
-
"detect_audio_language": False,
|
48 |
-
},
|
49 |
-
)
|
50 |
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
-
|
55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
if st.button('List Files'):
|
57 |
with st.spinner('Fetching files...'):
|
58 |
try:
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
filters={"source": "GOOGLE_DRIVE"},
|
65 |
-
)
|
66 |
-
|
67 |
-
# Filter and display relevant information from data sources response
|
68 |
-
st.write("Data sources:")
|
69 |
-
for ds in query_user_data_sources_response.results:
|
70 |
-
st.write(f"- ID: {ds.id}, External ID: {ds.data_source_external_id}, Sync Status: {ds.sync_status}")
|
71 |
-
|
72 |
-
data_sources = query_user_data_sources_response.results
|
73 |
-
if data_sources:
|
74 |
-
data_source_id = int(data_sources[0].id) # Convert to int
|
75 |
-
st.write(f"Using data_source_id: {data_source_id}")
|
76 |
-
sync_response = carbon.integrations.sync_data_source_items(data_source_id=data_source_id)
|
77 |
-
|
78 |
-
# Filter and display relevant information from sync response
|
79 |
-
st.write("Files in Google Drive:")
|
80 |
-
if hasattr(sync_response, 'items'):
|
81 |
-
for item in sync_response.items:
|
82 |
-
st.write(f"File ID: {item.id}, File Name: {item.name}, File Size: {item.size if hasattr(item, 'size') else 'N/A'}, Last Modified: {item.last_modified if hasattr(item, 'last_modified') else 'N/A'}")
|
83 |
-
else:
|
84 |
-
st.write("No files found.")
|
85 |
else:
|
86 |
-
st.write("No
|
87 |
except Exception as e:
|
88 |
st.error(f"An error occurred: {e}")
|
89 |
-
|
90 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
st.write('### Search in the Connected Data Source')
|
92 |
query = st.text_input("Enter your query:", value="Type here...")
|
93 |
if st.button('Search'):
|
@@ -119,7 +222,9 @@ def main():
|
|
119 |
st.write("Search results:")
|
120 |
for i, doc in enumerate(response_search_chunks['documents']):
|
121 |
st.write(f"Document {i+1}:")
|
122 |
-
st.write(doc['content'])
|
|
|
|
|
123 |
if 'file_url' in doc:
|
124 |
st.markdown(f"[Download {doc['filename']}]({doc['file_url']})")
|
125 |
st.write("-------------------------------------------------")
|
@@ -128,8 +233,8 @@ def main():
|
|
128 |
else:
|
129 |
st.write("Please enter a query to search.")
|
130 |
|
131 |
-
# Display
|
132 |
-
st.write('
|
133 |
if 'search_history' not in st.session_state:
|
134 |
st.session_state['search_history'] = []
|
135 |
|
@@ -138,9 +243,6 @@ def main():
|
|
138 |
|
139 |
if st.session_state['search_history']:
|
140 |
st.write("Past Searches:")
|
141 |
-
|
142 |
-
st.write(past_query)
|
143 |
-
|
144 |
-
# Call the main function
|
145 |
if __name__ == '__main__':
|
146 |
main()
|
|
|
1 |
import streamlit as st
|
|
|
2 |
from carbon import Carbon
|
3 |
import requests
|
4 |
import json
|
|
|
7 |
CARBON_API_KEY = "a38ee1fe5fef56fc8e1ae2afc881378804bb902882442e1554adae4f82ee23ea"
|
8 |
customer_id = "Candid"
|
9 |
|
10 |
+
def get_google_drive_oauth(carbon):
|
11 |
+
get_oauth_url_response = carbon.integrations.get_oauth_url(
|
12 |
+
service="GOOGLE_DRIVE",
|
13 |
+
scope="https://www.googleapis.com/auth/drive.readonly",
|
14 |
+
connecting_new_account=True,
|
15 |
+
)
|
16 |
+
return get_oauth_url_response.oauth_url
|
17 |
+
|
18 |
+
def get_dropbox_oauth(carbon):
|
19 |
+
get_oauth_url_response = carbon.integrations.get_oauth_url(
|
20 |
+
service="DROPBOX",
|
21 |
+
connecting_new_account=True,
|
22 |
+
)
|
23 |
+
return get_oauth_url_response.oauth_url
|
24 |
+
|
25 |
+
def get_notion_oauth(carbon):
|
26 |
+
get_oauth_url_response = carbon.integrations.get_oauth_url(
|
27 |
+
service="NOTION",
|
28 |
+
connecting_new_account=True,
|
29 |
+
)
|
30 |
+
return get_oauth_url_response.oauth_url
|
31 |
+
|
32 |
+
def sync_github(carbon, username, token):
|
33 |
+
sync_response = carbon.integrations.sync_git_hub(
|
34 |
+
username=username,
|
35 |
+
token=token,
|
36 |
+
sync_source_items=True
|
37 |
+
)
|
38 |
+
return sync_response
|
39 |
+
|
40 |
+
def sync_gitbook(carbon, access_token, organization):
|
41 |
+
sync_response = carbon.integrations.sync_git_book(
|
42 |
+
access_token=access_token,
|
43 |
+
organization=organization,
|
44 |
+
sync_source_items=True
|
45 |
+
)
|
46 |
+
return sync_response
|
47 |
+
|
48 |
+
def sync_s3(carbon, access_key, access_key_secret):
|
49 |
+
sync_response = carbon.integrations.sync_s3(
|
50 |
+
access_key=access_key,
|
51 |
+
access_key_secret=access_key_secret,
|
52 |
+
sync_source_items=True
|
53 |
+
)
|
54 |
+
return sync_response
|
55 |
+
|
56 |
+
def list_files(carbon, data_source_id=None, service="GOOGLE_DRIVE"):
|
57 |
+
if data_source_id:
|
58 |
+
list_files_response = carbon.integrations.list_data_source_items(data_source_id=int(data_source_id))
|
59 |
+
else:
|
60 |
+
query_user_data_sources_response = carbon.data_sources.query_user_data_sources(
|
61 |
+
pagination={"limit": 100, "offset": 0},
|
62 |
+
order_by="created_at",
|
63 |
+
order_dir="desc",
|
64 |
+
filters={"source": service},
|
65 |
+
)
|
66 |
+
if query_user_data_sources_response.results:
|
67 |
+
data_source_id = query_user_data_sources_response.results[0].id
|
68 |
+
list_files_response = carbon.integrations.list_data_source_items(data_source_id=int(data_source_id))
|
69 |
+
else:
|
70 |
+
list_files_response = None
|
71 |
+
return list_files_response.items if list_files_response else None
|
72 |
+
|
73 |
+
def list_user_documents(carbon):
|
74 |
+
query_user_documents_response = carbon.documents.query_documents(
|
75 |
+
pagination={"limit": 100, "offset": 0},
|
76 |
+
order_by="created_at",
|
77 |
+
order_dir="desc"
|
78 |
+
)
|
79 |
+
return query_user_documents_response.documents if query_user_documents_response else None
|
80 |
+
|
81 |
+
def semantic_search_v2(carbon, query, tags_v2=None, hybrid_search=False):
|
82 |
+
search_response = carbon.embeddings.get_documents(
|
83 |
+
query=query,
|
84 |
+
k=2,
|
85 |
+
tags_v2=tags_v2 if tags_v2 else {},
|
86 |
+
include_tags=True,
|
87 |
+
include_vectors=True,
|
88 |
+
include_raw_file=True,
|
89 |
+
hybrid_search=hybrid_search,
|
90 |
+
hybrid_search_tuning_parameters={
|
91 |
+
"weight_a": 0.5,
|
92 |
+
"weight_b": 0.5,
|
93 |
+
},
|
94 |
+
media_type="TEXT",
|
95 |
+
embedding_model="OPENAI",
|
96 |
+
)
|
97 |
+
return search_response.documents
|
98 |
+
|
99 |
+
|
100 |
def main():
|
101 |
+
st.title('Data Connector using Carbon SDK')
|
102 |
|
103 |
# Authenticate with Carbon API
|
104 |
st.write('### Authenticate with Carbon API')
|
|
|
106 |
token = carbon.auth.get_access_token()
|
107 |
carbon = Carbon(access_token=token.access_token) # authenticated object
|
108 |
|
109 |
+
# Connect to Data Source
|
110 |
+
st.write('## Connect to Data Source')
|
111 |
+
service = st.selectbox('Select Data Source for OAuth', ['GOOGLE_DRIVE', 'DROPBOX', 'NOTION'])
|
112 |
+
if st.button('Get OAuth URL'):
|
113 |
+
with st.spinner('Fetching OAuth URL...'):
|
114 |
+
try:
|
115 |
+
if service == "GOOGLE_DRIVE":
|
116 |
+
oauth_url = get_google_drive_oauth(carbon)
|
117 |
+
elif service == "DROPBOX":
|
118 |
+
oauth_url = get_dropbox_oauth(carbon)
|
119 |
+
elif service == "NOTION":
|
120 |
+
oauth_url = get_notion_oauth(carbon)
|
121 |
+
st.write(f"OAuth URL for {service}: {oauth_url}")
|
122 |
+
except Exception as e:
|
123 |
+
st.error(f"An error occurred: {e}")
|
124 |
+
|
125 |
+
# Sync Data Sources
|
126 |
+
st.write('## Sync Data Sources')
|
127 |
+
sync_service = st.selectbox('Select Data Source for Sync', ['GITHUB', 'GITBOOK', 'S3'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
|
129 |
+
if sync_service == 'GITHUB':
|
130 |
+
github_username = st.text_input('Enter GitHub Username')
|
131 |
+
github_token = st.text_input('Enter GitHub Token', type="password")
|
132 |
+
if st.button('Sync GitHub'):
|
133 |
+
with st.spinner('Syncing GitHub...'):
|
134 |
+
try:
|
135 |
+
sync_response = sync_github(carbon, github_username, github_token)
|
136 |
+
st.write(f"GitHub sync response: {sync_response}")
|
137 |
+
except Exception as e:
|
138 |
+
st.error(f"An error occurred: {e}")
|
139 |
|
140 |
+
elif sync_service == 'GITBOOK':
|
141 |
+
gitbook_access_token = st.text_input('Enter GitBook Access Token', type="password")
|
142 |
+
gitbook_organization = st.text_input('Enter GitBook Organization')
|
143 |
+
if st.button('Sync GitBook'):
|
144 |
+
with st.spinner('Syncing GitBook...'):
|
145 |
+
try:
|
146 |
+
sync_response = sync_gitbook(carbon, gitbook_access_token, gitbook_organization)
|
147 |
+
st.write(f"GitBook sync response: {sync_response}")
|
148 |
+
except Exception as e:
|
149 |
+
st.error(f"An error occurred: {e}")
|
150 |
+
|
151 |
+
elif sync_service == 'S3':
|
152 |
+
s3_access_key = st.text_input('Enter S3 Access Key')
|
153 |
+
s3_access_key_secret = st.text_input('Enter S3 Access Key Secret', type="password")
|
154 |
+
if st.button('Sync S3'):
|
155 |
+
with st.spinner('Syncing S3...'):
|
156 |
+
try:
|
157 |
+
sync_response = sync_s3(carbon, s3_access_key, s3_access_key_secret)
|
158 |
+
st.write(f"S3 sync response: {sync_response}")
|
159 |
+
except Exception as e:
|
160 |
+
st.error(f"An error occurred: {e}")
|
161 |
+
|
162 |
+
# List Files in Data Source
|
163 |
+
st.write(f'## List Files in {service}')
|
164 |
+
data_source_id = st.text_input('Enter Data Source ID (leave blank to list all files)')
|
165 |
if st.button('List Files'):
|
166 |
with st.spinner('Fetching files...'):
|
167 |
try:
|
168 |
+
files = list_files(carbon, data_source_id if data_source_id else None, service)
|
169 |
+
if files:
|
170 |
+
st.write(f"Files in {service}:")
|
171 |
+
for item in files:
|
172 |
+
st.write(f"File ID: {item.id}, File Name: {item.name}, File Size: {item.size if hasattr(item, 'size') else 'N/A'}, Last Modified: {item.last_modified if hasattr(item, 'last_modified') else 'N/A'}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
173 |
else:
|
174 |
+
st.write("No files found.")
|
175 |
except Exception as e:
|
176 |
st.error(f"An error occurred: {e}")
|
177 |
+
|
178 |
+
# List All Documents Uploaded by User
|
179 |
+
# st.write('## List All Documents Uploaded by User')
|
180 |
+
# if st.button('List User Documents'):
|
181 |
+
# with st.spinner('Fetching user documents...'):
|
182 |
+
# try:
|
183 |
+
# documents = list_user_documents(carbon)
|
184 |
+
# if documents:
|
185 |
+
# st.write("User Documents:")
|
186 |
+
# for doc in documents:
|
187 |
+
# st.write(f"Document ID: {doc.id}, Document Name: {doc.name}, Uploaded At: {doc.created_at}")
|
188 |
+
# else:
|
189 |
+
# st.write("No documents found.")
|
190 |
+
# except Exception as e:
|
191 |
+
# st.error(f"An error occurred: {e}")
|
192 |
+
|
193 |
+
# Search in the Connected Data Source
|
194 |
st.write('### Search in the Connected Data Source')
|
195 |
query = st.text_input("Enter your query:", value="Type here...")
|
196 |
if st.button('Search'):
|
|
|
222 |
st.write("Search results:")
|
223 |
for i, doc in enumerate(response_search_chunks['documents']):
|
224 |
st.write(f"Document {i+1}:")
|
225 |
+
st.write(f"Content: {doc['content']}")
|
226 |
+
st.write(f"Source: {doc['source']}")
|
227 |
+
st.write(f"Match Percentage: {doc['score'] * 100}%")
|
228 |
if 'file_url' in doc:
|
229 |
st.markdown(f"[Download {doc['filename']}]({doc['file_url']})")
|
230 |
st.write("-------------------------------------------------")
|
|
|
233 |
else:
|
234 |
st.write("Please enter a query to search.")
|
235 |
|
236 |
+
# Display Search History
|
237 |
+
st.write('## Search History')
|
238 |
if 'search_history' not in st.session_state:
|
239 |
st.session_state['search_history'] = []
|
240 |
|
|
|
243 |
|
244 |
if st.session_state['search_history']:
|
245 |
st.write("Past Searches:")
|
246 |
+
|
|
|
|
|
|
|
247 |
if __name__ == '__main__':
|
248 |
main()
|