Spaces:

vhr1007
/

traversaal_api_test

Sleeping

App Files Files Community

vishanth10 commited on Jun 13, 2024

Commit

61f09d1

1 Parent(s): 6f3dbfd

new fearures

Browse files

Files changed (1) hide show

app.py +174 -72

app.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import streamlit as st
-from decimal import Decimal
 from carbon import Carbon
 import requests
 import json
@@ -8,8 +7,98 @@ import json
 CARBON_API_KEY = "a38ee1fe5fef56fc8e1ae2afc881378804bb902882442e1554adae4f82ee23ea"
 customer_id = "Candid"
 def main():
-    st.title('Google Drive Data Connector using Carbon SDK')
     # Authenticate with Carbon API
     st.write('### Authenticate with Carbon API')
@@ -17,77 +106,91 @@ def main():
     token = carbon.auth.get_access_token()
     carbon = Carbon(access_token=token.access_token)  # authenticated object
-    # Connect to Google Drive
-    st.write('### Connect to Google Drive')
-    get_oauth_url_response = carbon.integrations.get_oauth_url(
-        service="GOOGLE_DRIVE",
-        tags=None,
-        scope="string_example",
-        chunk_size=Decimal(1500),
-        chunk_overlap=Decimal(15),
-        skip_embedding_generation=False,
-        embedding_model="OPENAI",
-        generate_sparse_vectors=False,
-        prepend_filename_to_chunks=False,
-        max_items_per_chunk=Decimal(1),
-        salesforce_domain="string_example",
-        sync_files_on_connection=True,
-        set_page_as_boundary=False,
-        data_source_id=Decimal(1),  # Using static data_source_id initially
-        connecting_new_account=False,
-        request_id="b7620173-662c-4ae7-bb61-2e6ffd8619f5",
-        use_ocr=False,
-        parse_pdf_tables_with_ocr=False,
-        enable_file_picker=True,
-        sync_source_items=True,
-        incremental_sync=False,
-        file_sync_config={
-            "auto_synced_source_types": ["ARTICLE"],
-            "sync_attachments": False,
-            "detect_audio_language": False,
-        },
-    )
-    st.write(f"OAuth URL: {get_oauth_url_response.oauth_url}")
-    st.write("Please use the above URL to connect your Google Drive account.")
-    # Fetch the data source ID for the connected Google Drive
-    st.write('### List Files in Connected Google Drive')
     if st.button('List Files'):
         with st.spinner('Fetching files...'):
             try:
-                # Fetch data sources
-                query_user_data_sources_response = carbon.data_sources.query_user_data_sources(
-                    pagination={"limit": 100, "offset": 0},
-                    order_by="created_at",
-                    order_dir="desc",
-                    filters={"source": "GOOGLE_DRIVE"},
-                )
-                # Filter and display relevant information from data sources response
-                st.write("Data sources:")
-                for ds in query_user_data_sources_response.results:
-                    st.write(f"- ID: {ds.id}, External ID: {ds.data_source_external_id}, Sync Status: {ds.sync_status}")
-                data_sources = query_user_data_sources_response.results
-                if data_sources:
-                    data_source_id = int(data_sources[0].id)  # Convert to int
-                    st.write(f"Using data_source_id: {data_source_id}")
-                    sync_response = carbon.integrations.sync_data_source_items(data_source_id=data_source_id)
-                    # Filter and display relevant information from sync response
-                    st.write("Files in Google Drive:")
-                    if hasattr(sync_response, 'items'):
-                        for item in sync_response.items:
-                            st.write(f"File ID: {item.id}, File Name: {item.name}, File Size: {item.size if hasattr(item, 'size') else 'N/A'}, Last Modified: {item.last_modified if hasattr(item, 'last_modified') else 'N/A'}")
-                    else:
-                        st.write("No files found.")
                 else:
-                    st.write("No Google Drive data sources found for the user.")
             except Exception as e:
                 st.error(f"An error occurred: {e}")
-    # Search in the connected data source
     st.write('### Search in the Connected Data Source')
     query = st.text_input("Enter your query:", value="Type here...")
     if st.button('Search'):
@@ -119,7 +222,9 @@ def main():
                     st.write("Search results:")
                     for i, doc in enumerate(response_search_chunks['documents']):
                         st.write(f"Document {i+1}:")
-                        st.write(doc['content'])
                         if 'file_url' in doc:
                             st.markdown(f"[Download {doc['filename']}]({doc['file_url']})")
                         st.write("-------------------------------------------------")
@@ -128,8 +233,8 @@ def main():
         else:
             st.write("Please enter a query to search.")
-    # Display search history
-    st.write('### Search History')
     if 'search_history' not in st.session_state:
         st.session_state['search_history'] = []
@@ -138,9 +243,6 @@ def main():
     if st.session_state['search_history']:
         st.write("Past Searches:")
-        for past_query in st.session_state['search_history']:
-            st.write(past_query)
-# Call the main function
 if __name__ == '__main__':
     main()

 import streamlit as st
 from carbon import Carbon
 import requests
 import json
 CARBON_API_KEY = "a38ee1fe5fef56fc8e1ae2afc881378804bb902882442e1554adae4f82ee23ea"
 customer_id = "Candid"
+def get_google_drive_oauth(carbon):
+    get_oauth_url_response = carbon.integrations.get_oauth_url(
+        service="GOOGLE_DRIVE",
+        scope="https://www.googleapis.com/auth/drive.readonly",
+        connecting_new_account=True,
+    )
+    return get_oauth_url_response.oauth_url
+def get_dropbox_oauth(carbon):
+    get_oauth_url_response = carbon.integrations.get_oauth_url(
+        service="DROPBOX",
+        connecting_new_account=True,
+    )
+    return get_oauth_url_response.oauth_url
+def get_notion_oauth(carbon):
+    get_oauth_url_response = carbon.integrations.get_oauth_url(
+        service="NOTION",
+        connecting_new_account=True,
+    )
+    return get_oauth_url_response.oauth_url
+def sync_github(carbon, username, token):
+    sync_response = carbon.integrations.sync_git_hub(
+        username=username,
+        token=token,
+        sync_source_items=True
+    )
+    return sync_response
+def sync_gitbook(carbon, access_token, organization):
+    sync_response = carbon.integrations.sync_git_book(
+        access_token=access_token,
+        organization=organization,
+        sync_source_items=True
+    )
+    return sync_response
+def sync_s3(carbon, access_key, access_key_secret):
+    sync_response = carbon.integrations.sync_s3(
+        access_key=access_key,
+        access_key_secret=access_key_secret,
+        sync_source_items=True
+    )
+    return sync_response
+def list_files(carbon, data_source_id=None, service="GOOGLE_DRIVE"):
+    if data_source_id:
+        list_files_response = carbon.integrations.list_data_source_items(data_source_id=int(data_source_id))
+    else:
+        query_user_data_sources_response = carbon.data_sources.query_user_data_sources(
+            pagination={"limit": 100, "offset": 0},
+            order_by="created_at",
+            order_dir="desc",
+            filters={"source": service},
+        )
+        if query_user_data_sources_response.results:
+            data_source_id = query_user_data_sources_response.results[0].id
+            list_files_response = carbon.integrations.list_data_source_items(data_source_id=int(data_source_id))
+        else:
+            list_files_response = None
+    return list_files_response.items if list_files_response else None
+def list_user_documents(carbon):
+    query_user_documents_response = carbon.documents.query_documents(
+        pagination={"limit": 100, "offset": 0},
+        order_by="created_at",
+        order_dir="desc"
+    )
+    return query_user_documents_response.documents if query_user_documents_response else None
+def semantic_search_v2(carbon, query, tags_v2=None, hybrid_search=False):
+    search_response = carbon.embeddings.get_documents(
+        query=query,
+        k=2,
+        tags_v2=tags_v2 if tags_v2 else {},
+        include_tags=True,
+        include_vectors=True,
+        include_raw_file=True,
+        hybrid_search=hybrid_search,
+        hybrid_search_tuning_parameters={
+            "weight_a": 0.5,
+            "weight_b": 0.5,
+        },
+        media_type="TEXT",
+        embedding_model="OPENAI",
+    )
+    return search_response.documents
 def main():
+    st.title('Data Connector using Carbon SDK')
     # Authenticate with Carbon API
     st.write('### Authenticate with Carbon API')
     token = carbon.auth.get_access_token()
     carbon = Carbon(access_token=token.access_token)  # authenticated object
+    # Connect to Data Source
+    st.write('## Connect to Data Source')
+    service = st.selectbox('Select Data Source for OAuth', ['GOOGLE_DRIVE', 'DROPBOX', 'NOTION'])
+    if st.button('Get OAuth URL'):
+        with st.spinner('Fetching OAuth URL...'):
+            try:
+                if service == "GOOGLE_DRIVE":
+                    oauth_url = get_google_drive_oauth(carbon)
+                elif service == "DROPBOX":
+                    oauth_url = get_dropbox_oauth(carbon)
+                elif service == "NOTION":
+                    oauth_url = get_notion_oauth(carbon)
+                st.write(f"OAuth URL for {service}: {oauth_url}")
+            except Exception as e:
+                st.error(f"An error occurred: {e}")
+    # Sync Data Sources
+    st.write('## Sync Data Sources')
+    sync_service = st.selectbox('Select Data Source for Sync', ['GITHUB', 'GITBOOK', 'S3'])
+    if sync_service == 'GITHUB':
+        github_username = st.text_input('Enter GitHub Username')
+        github_token = st.text_input('Enter GitHub Token', type="password")
+        if st.button('Sync GitHub'):
+            with st.spinner('Syncing GitHub...'):
+                try:
+                    sync_response = sync_github(carbon, github_username, github_token)
+                    st.write(f"GitHub sync response: {sync_response}")
+                except Exception as e:
+                    st.error(f"An error occurred: {e}")
+    elif sync_service == 'GITBOOK':
+        gitbook_access_token = st.text_input('Enter GitBook Access Token', type="password")
+        gitbook_organization = st.text_input('Enter GitBook Organization')
+        if st.button('Sync GitBook'):
+            with st.spinner('Syncing GitBook...'):
+                try:
+                    sync_response = sync_gitbook(carbon, gitbook_access_token, gitbook_organization)
+                    st.write(f"GitBook sync response: {sync_response}")
+                except Exception as e:
+                    st.error(f"An error occurred: {e}")
+    elif sync_service == 'S3':
+        s3_access_key = st.text_input('Enter S3 Access Key')
+        s3_access_key_secret = st.text_input('Enter S3 Access Key Secret', type="password")
+        if st.button('Sync S3'):
+            with st.spinner('Syncing S3...'):
+                try:
+                    sync_response = sync_s3(carbon, s3_access_key, s3_access_key_secret)
+                    st.write(f"S3 sync response: {sync_response}")
+                except Exception as e:
+                    st.error(f"An error occurred: {e}")
+    # List Files in Data Source
+    st.write(f'## List Files in {service}')
+    data_source_id = st.text_input('Enter Data Source ID (leave blank to list all files)')
     if st.button('List Files'):
         with st.spinner('Fetching files...'):
             try:
+                files = list_files(carbon, data_source_id if data_source_id else None, service)
+                if files:
+                    st.write(f"Files in {service}:")
+                    for item in files:
+                        st.write(f"File ID: {item.id}, File Name: {item.name}, File Size: {item.size if hasattr(item, 'size') else 'N/A'}, Last Modified: {item.last_modified if hasattr(item, 'last_modified') else 'N/A'}")
                 else:
+                    st.write("No files found.")
             except Exception as e:
                 st.error(f"An error occurred: {e}")
+    # List All Documents Uploaded by User
+    # st.write('## List All Documents Uploaded by User')
+    # if st.button('List User Documents'):
+    #     with st.spinner('Fetching user documents...'):
+    #         try:
+    #             documents = list_user_documents(carbon)
+    #             if documents:
+    #                 st.write("User Documents:")
+    #                 for doc in documents:
+    #                     st.write(f"Document ID: {doc.id}, Document Name: {doc.name}, Uploaded At: {doc.created_at}")
+    #             else:
+    #                 st.write("No documents found.")
+    #         except Exception as e:
+    #             st.error(f"An error occurred: {e}")
+    # Search in the Connected Data Source
     st.write('### Search in the Connected Data Source')
     query = st.text_input("Enter your query:", value="Type here...")
     if st.button('Search'):
                     st.write("Search results:")
                     for i, doc in enumerate(response_search_chunks['documents']):
                         st.write(f"Document {i+1}:")
+                        st.write(f"Content: {doc['content']}")
+                        st.write(f"Source: {doc['source']}")
+                        st.write(f"Match Percentage: {doc['score'] * 100}%")
                         if 'file_url' in doc:
                             st.markdown(f"[Download {doc['filename']}]({doc['file_url']})")
                         st.write("-------------------------------------------------")
         else:
             st.write("Please enter a query to search.")
+    # Display Search History
+    st.write('## Search History')
     if 'search_history' not in st.session_state:
         st.session_state['search_history'] = []
     if st.session_state['search_history']:
         st.write("Past Searches:")
 if __name__ == '__main__':
     main()