vishanth10 commited on
Commit
61f09d1
·
1 Parent(s): 6f3dbfd

new fearures

Browse files
Files changed (1) hide show
  1. app.py +174 -72
app.py CHANGED
@@ -1,5 +1,4 @@
1
  import streamlit as st
2
- from decimal import Decimal
3
  from carbon import Carbon
4
  import requests
5
  import json
@@ -8,8 +7,98 @@ import json
8
  CARBON_API_KEY = "a38ee1fe5fef56fc8e1ae2afc881378804bb902882442e1554adae4f82ee23ea"
9
  customer_id = "Candid"
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def main():
12
- st.title('Google Drive Data Connector using Carbon SDK')
13
 
14
  # Authenticate with Carbon API
15
  st.write('### Authenticate with Carbon API')
@@ -17,77 +106,91 @@ def main():
17
  token = carbon.auth.get_access_token()
18
  carbon = Carbon(access_token=token.access_token) # authenticated object
19
 
20
- # Connect to Google Drive
21
- st.write('### Connect to Google Drive')
22
- get_oauth_url_response = carbon.integrations.get_oauth_url(
23
- service="GOOGLE_DRIVE",
24
- tags=None,
25
- scope="string_example",
26
- chunk_size=Decimal(1500),
27
- chunk_overlap=Decimal(15),
28
- skip_embedding_generation=False,
29
- embedding_model="OPENAI",
30
- generate_sparse_vectors=False,
31
- prepend_filename_to_chunks=False,
32
- max_items_per_chunk=Decimal(1),
33
- salesforce_domain="string_example",
34
- sync_files_on_connection=True,
35
- set_page_as_boundary=False,
36
- data_source_id=Decimal(1), # Using static data_source_id initially
37
- connecting_new_account=False,
38
- request_id="b7620173-662c-4ae7-bb61-2e6ffd8619f5",
39
- use_ocr=False,
40
- parse_pdf_tables_with_ocr=False,
41
- enable_file_picker=True,
42
- sync_source_items=True,
43
- incremental_sync=False,
44
- file_sync_config={
45
- "auto_synced_source_types": ["ARTICLE"],
46
- "sync_attachments": False,
47
- "detect_audio_language": False,
48
- },
49
- )
50
 
51
- st.write(f"OAuth URL: {get_oauth_url_response.oauth_url}")
52
- st.write("Please use the above URL to connect your Google Drive account.")
 
 
 
 
 
 
 
 
53
 
54
- # Fetch the data source ID for the connected Google Drive
55
- st.write('### List Files in Connected Google Drive')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  if st.button('List Files'):
57
  with st.spinner('Fetching files...'):
58
  try:
59
- # Fetch data sources
60
- query_user_data_sources_response = carbon.data_sources.query_user_data_sources(
61
- pagination={"limit": 100, "offset": 0},
62
- order_by="created_at",
63
- order_dir="desc",
64
- filters={"source": "GOOGLE_DRIVE"},
65
- )
66
-
67
- # Filter and display relevant information from data sources response
68
- st.write("Data sources:")
69
- for ds in query_user_data_sources_response.results:
70
- st.write(f"- ID: {ds.id}, External ID: {ds.data_source_external_id}, Sync Status: {ds.sync_status}")
71
-
72
- data_sources = query_user_data_sources_response.results
73
- if data_sources:
74
- data_source_id = int(data_sources[0].id) # Convert to int
75
- st.write(f"Using data_source_id: {data_source_id}")
76
- sync_response = carbon.integrations.sync_data_source_items(data_source_id=data_source_id)
77
-
78
- # Filter and display relevant information from sync response
79
- st.write("Files in Google Drive:")
80
- if hasattr(sync_response, 'items'):
81
- for item in sync_response.items:
82
- st.write(f"File ID: {item.id}, File Name: {item.name}, File Size: {item.size if hasattr(item, 'size') else 'N/A'}, Last Modified: {item.last_modified if hasattr(item, 'last_modified') else 'N/A'}")
83
- else:
84
- st.write("No files found.")
85
  else:
86
- st.write("No Google Drive data sources found for the user.")
87
  except Exception as e:
88
  st.error(f"An error occurred: {e}")
89
-
90
- # Search in the connected data source
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  st.write('### Search in the Connected Data Source')
92
  query = st.text_input("Enter your query:", value="Type here...")
93
  if st.button('Search'):
@@ -119,7 +222,9 @@ def main():
119
  st.write("Search results:")
120
  for i, doc in enumerate(response_search_chunks['documents']):
121
  st.write(f"Document {i+1}:")
122
- st.write(doc['content'])
 
 
123
  if 'file_url' in doc:
124
  st.markdown(f"[Download {doc['filename']}]({doc['file_url']})")
125
  st.write("-------------------------------------------------")
@@ -128,8 +233,8 @@ def main():
128
  else:
129
  st.write("Please enter a query to search.")
130
 
131
- # Display search history
132
- st.write('### Search History')
133
  if 'search_history' not in st.session_state:
134
  st.session_state['search_history'] = []
135
 
@@ -138,9 +243,6 @@ def main():
138
 
139
  if st.session_state['search_history']:
140
  st.write("Past Searches:")
141
- for past_query in st.session_state['search_history']:
142
- st.write(past_query)
143
-
144
- # Call the main function
145
  if __name__ == '__main__':
146
  main()
 
1
  import streamlit as st
 
2
  from carbon import Carbon
3
  import requests
4
  import json
 
7
  CARBON_API_KEY = "a38ee1fe5fef56fc8e1ae2afc881378804bb902882442e1554adae4f82ee23ea"
8
  customer_id = "Candid"
9
 
10
+ def get_google_drive_oauth(carbon):
11
+ get_oauth_url_response = carbon.integrations.get_oauth_url(
12
+ service="GOOGLE_DRIVE",
13
+ scope="https://www.googleapis.com/auth/drive.readonly",
14
+ connecting_new_account=True,
15
+ )
16
+ return get_oauth_url_response.oauth_url
17
+
18
+ def get_dropbox_oauth(carbon):
19
+ get_oauth_url_response = carbon.integrations.get_oauth_url(
20
+ service="DROPBOX",
21
+ connecting_new_account=True,
22
+ )
23
+ return get_oauth_url_response.oauth_url
24
+
25
+ def get_notion_oauth(carbon):
26
+ get_oauth_url_response = carbon.integrations.get_oauth_url(
27
+ service="NOTION",
28
+ connecting_new_account=True,
29
+ )
30
+ return get_oauth_url_response.oauth_url
31
+
32
+ def sync_github(carbon, username, token):
33
+ sync_response = carbon.integrations.sync_git_hub(
34
+ username=username,
35
+ token=token,
36
+ sync_source_items=True
37
+ )
38
+ return sync_response
39
+
40
+ def sync_gitbook(carbon, access_token, organization):
41
+ sync_response = carbon.integrations.sync_git_book(
42
+ access_token=access_token,
43
+ organization=organization,
44
+ sync_source_items=True
45
+ )
46
+ return sync_response
47
+
48
+ def sync_s3(carbon, access_key, access_key_secret):
49
+ sync_response = carbon.integrations.sync_s3(
50
+ access_key=access_key,
51
+ access_key_secret=access_key_secret,
52
+ sync_source_items=True
53
+ )
54
+ return sync_response
55
+
56
+ def list_files(carbon, data_source_id=None, service="GOOGLE_DRIVE"):
57
+ if data_source_id:
58
+ list_files_response = carbon.integrations.list_data_source_items(data_source_id=int(data_source_id))
59
+ else:
60
+ query_user_data_sources_response = carbon.data_sources.query_user_data_sources(
61
+ pagination={"limit": 100, "offset": 0},
62
+ order_by="created_at",
63
+ order_dir="desc",
64
+ filters={"source": service},
65
+ )
66
+ if query_user_data_sources_response.results:
67
+ data_source_id = query_user_data_sources_response.results[0].id
68
+ list_files_response = carbon.integrations.list_data_source_items(data_source_id=int(data_source_id))
69
+ else:
70
+ list_files_response = None
71
+ return list_files_response.items if list_files_response else None
72
+
73
+ def list_user_documents(carbon):
74
+ query_user_documents_response = carbon.documents.query_documents(
75
+ pagination={"limit": 100, "offset": 0},
76
+ order_by="created_at",
77
+ order_dir="desc"
78
+ )
79
+ return query_user_documents_response.documents if query_user_documents_response else None
80
+
81
+ def semantic_search_v2(carbon, query, tags_v2=None, hybrid_search=False):
82
+ search_response = carbon.embeddings.get_documents(
83
+ query=query,
84
+ k=2,
85
+ tags_v2=tags_v2 if tags_v2 else {},
86
+ include_tags=True,
87
+ include_vectors=True,
88
+ include_raw_file=True,
89
+ hybrid_search=hybrid_search,
90
+ hybrid_search_tuning_parameters={
91
+ "weight_a": 0.5,
92
+ "weight_b": 0.5,
93
+ },
94
+ media_type="TEXT",
95
+ embedding_model="OPENAI",
96
+ )
97
+ return search_response.documents
98
+
99
+
100
  def main():
101
+ st.title('Data Connector using Carbon SDK')
102
 
103
  # Authenticate with Carbon API
104
  st.write('### Authenticate with Carbon API')
 
106
  token = carbon.auth.get_access_token()
107
  carbon = Carbon(access_token=token.access_token) # authenticated object
108
 
109
+ # Connect to Data Source
110
+ st.write('## Connect to Data Source')
111
+ service = st.selectbox('Select Data Source for OAuth', ['GOOGLE_DRIVE', 'DROPBOX', 'NOTION'])
112
+ if st.button('Get OAuth URL'):
113
+ with st.spinner('Fetching OAuth URL...'):
114
+ try:
115
+ if service == "GOOGLE_DRIVE":
116
+ oauth_url = get_google_drive_oauth(carbon)
117
+ elif service == "DROPBOX":
118
+ oauth_url = get_dropbox_oauth(carbon)
119
+ elif service == "NOTION":
120
+ oauth_url = get_notion_oauth(carbon)
121
+ st.write(f"OAuth URL for {service}: {oauth_url}")
122
+ except Exception as e:
123
+ st.error(f"An error occurred: {e}")
124
+
125
+ # Sync Data Sources
126
+ st.write('## Sync Data Sources')
127
+ sync_service = st.selectbox('Select Data Source for Sync', ['GITHUB', 'GITBOOK', 'S3'])
 
 
 
 
 
 
 
 
 
 
 
128
 
129
+ if sync_service == 'GITHUB':
130
+ github_username = st.text_input('Enter GitHub Username')
131
+ github_token = st.text_input('Enter GitHub Token', type="password")
132
+ if st.button('Sync GitHub'):
133
+ with st.spinner('Syncing GitHub...'):
134
+ try:
135
+ sync_response = sync_github(carbon, github_username, github_token)
136
+ st.write(f"GitHub sync response: {sync_response}")
137
+ except Exception as e:
138
+ st.error(f"An error occurred: {e}")
139
 
140
+ elif sync_service == 'GITBOOK':
141
+ gitbook_access_token = st.text_input('Enter GitBook Access Token', type="password")
142
+ gitbook_organization = st.text_input('Enter GitBook Organization')
143
+ if st.button('Sync GitBook'):
144
+ with st.spinner('Syncing GitBook...'):
145
+ try:
146
+ sync_response = sync_gitbook(carbon, gitbook_access_token, gitbook_organization)
147
+ st.write(f"GitBook sync response: {sync_response}")
148
+ except Exception as e:
149
+ st.error(f"An error occurred: {e}")
150
+
151
+ elif sync_service == 'S3':
152
+ s3_access_key = st.text_input('Enter S3 Access Key')
153
+ s3_access_key_secret = st.text_input('Enter S3 Access Key Secret', type="password")
154
+ if st.button('Sync S3'):
155
+ with st.spinner('Syncing S3...'):
156
+ try:
157
+ sync_response = sync_s3(carbon, s3_access_key, s3_access_key_secret)
158
+ st.write(f"S3 sync response: {sync_response}")
159
+ except Exception as e:
160
+ st.error(f"An error occurred: {e}")
161
+
162
+ # List Files in Data Source
163
+ st.write(f'## List Files in {service}')
164
+ data_source_id = st.text_input('Enter Data Source ID (leave blank to list all files)')
165
  if st.button('List Files'):
166
  with st.spinner('Fetching files...'):
167
  try:
168
+ files = list_files(carbon, data_source_id if data_source_id else None, service)
169
+ if files:
170
+ st.write(f"Files in {service}:")
171
+ for item in files:
172
+ st.write(f"File ID: {item.id}, File Name: {item.name}, File Size: {item.size if hasattr(item, 'size') else 'N/A'}, Last Modified: {item.last_modified if hasattr(item, 'last_modified') else 'N/A'}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  else:
174
+ st.write("No files found.")
175
  except Exception as e:
176
  st.error(f"An error occurred: {e}")
177
+
178
+ # List All Documents Uploaded by User
179
+ # st.write('## List All Documents Uploaded by User')
180
+ # if st.button('List User Documents'):
181
+ # with st.spinner('Fetching user documents...'):
182
+ # try:
183
+ # documents = list_user_documents(carbon)
184
+ # if documents:
185
+ # st.write("User Documents:")
186
+ # for doc in documents:
187
+ # st.write(f"Document ID: {doc.id}, Document Name: {doc.name}, Uploaded At: {doc.created_at}")
188
+ # else:
189
+ # st.write("No documents found.")
190
+ # except Exception as e:
191
+ # st.error(f"An error occurred: {e}")
192
+
193
+ # Search in the Connected Data Source
194
  st.write('### Search in the Connected Data Source')
195
  query = st.text_input("Enter your query:", value="Type here...")
196
  if st.button('Search'):
 
222
  st.write("Search results:")
223
  for i, doc in enumerate(response_search_chunks['documents']):
224
  st.write(f"Document {i+1}:")
225
+ st.write(f"Content: {doc['content']}")
226
+ st.write(f"Source: {doc['source']}")
227
+ st.write(f"Match Percentage: {doc['score'] * 100}%")
228
  if 'file_url' in doc:
229
  st.markdown(f"[Download {doc['filename']}]({doc['file_url']})")
230
  st.write("-------------------------------------------------")
 
233
  else:
234
  st.write("Please enter a query to search.")
235
 
236
+ # Display Search History
237
+ st.write('## Search History')
238
  if 'search_history' not in st.session_state:
239
  st.session_state['search_history'] = []
240
 
 
243
 
244
  if st.session_state['search_history']:
245
  st.write("Past Searches:")
246
+
 
 
 
247
  if __name__ == '__main__':
248
  main()