Spaces:

Effyis
/

AGDS-UI

Sleeping

File size: 13,055 Bytes

from utlis.helper import *
import sqlite3
import hashlib

def create_document_id(token, service_selected, document_selected):
    # Create a unique document ID from token, service, and document name
    unique_id = f"{token}{service_selected}{document_selected}"
    # Hash the unique ID using SHA-256
    hashed_id = hashlib.sha256(unique_id.encode()).hexdigest()
    return hashed_id

def create_database():
    conn = sqlite3.connect('document_cache.db')
    c = conn.cursor()
    # Create table for schemas
    c.execute('''CREATE TABLE IF NOT EXISTS schemas
                 (document_id TEXT PRIMARY KEY, schema TEXT)''')
    # Create table for comments
    c.execute('''CREATE TABLE IF NOT EXISTS comments
                 (document_id TEXT PRIMARY KEY, comments TEXT)''')
    conn.commit()
    conn.close()

create_database()
initialize_session_state()

with st.sidebar:
    st.image("logo.png", width=170)
    st.title("AGDS")
    # Get List of models
    llms = ['gpt-3.5-turbo', 'gemini']
    st.session_state.llm = st.selectbox("Choose LLM",llms)
    st.session_state.genre = st.radio(
    "Choose option",
    ["Select document", "Add document(s)","Delete service(s)", "Delete document(s)"])
    
    if st.session_state.genre=="Add document(s)":
        st.title('Add Document(s)')
        # Check service status
        # Get all available services
        add_new_service = st.checkbox("Add new service")
        if add_new_service:
            new_service = st.text_input("Enter service name")
            # Get list of Embedding models
            
            if  new_service and st.button('Add'):
                add_service(st.session_state.token,new_service)
        data = {"token": st.session_state.token}
        json_data = json.dumps(data)
        headers = {'Content-Type': 'application/json'}
        services  = requests.get(SERVICES_API,data=json_data, headers=headers)
        services =json.loads(services.text)
        if len(services)>0:
           st.session_state.service = st.selectbox("Choose Service",services)


        if len(services)>0:
            st.session_state.doc_ortext = st.radio("Choose option",["Documnt", "Text area"])
            if st.session_state.doc_ortext=="Documnt":
                st.session_state.uploaded_files = st.file_uploader("Upload PDF file",  type=["pdf","txt"], accept_multiple_files=False)
                if st.session_state.uploaded_files:
                    st.session_state.process = st.button('Process')
                    if st.session_state.process:
                        add_document(st.session_state.token,st.session_state.service)
            # elif st.session_state.doc_ortext=="Text area":
            #     st.session_state.name_text_area = st.container().text_area("Enter name of the text area")
            #     st.session_state.text_area = st.container().text_area("Enter text")

            #     if st.session_state.text_area:
            #         st.session_state.process = st.container().button('Process')
            #         if st.session_state.process:
            #             add_text_document(st.session_state.token,st.session_state.service)

    elif st.session_state.genre=="Select document":
        st.title('Scrape Document')
        data = {"token": st.session_state.token}
        json_data = json.dumps(data)
        headers = {'Content-Type': 'application/json'}
        services  = requests.get(SERVICES_API,data=json_data, headers=headers)
        services =json.loads(services.text)

        if len(services)>0:
            st.session_state.service_slected_to_chat = st.selectbox("Choose Service",services)
            data = {"token": st.session_state.token, "servicename": st.session_state.service_slected_to_chat}
            json_data = json.dumps(data)
            headers = {'Content-Type': 'application/json'}
            history_document  = requests.get(DOCUMENT_API,data=json_data, headers=headers)
            history_document =json.loads(history_document.text).get("documents",[])
            history_document = [doc["documentname"] for doc in history_document]
            st.session_state.doument_slected_to_chat = st.selectbox("Choose Documnet",history_document)
            if st.session_state.doument_slected_to_chat.split("_")[-1]=="pdf":
                data = {"token": st.session_state.token, "service_name": st.session_state.service_slected_to_chat,"document_name":st.session_state.doument_slected_to_chat}
                json_data = json.dumps(data)
                headers = {'Content-Type': 'application/json'}
                number_pages = requests.get(GET_NUM_PAGES,data=json_data, headers=headers)
                number_pages =json.loads(number_pages.text).get("num_pages")
                page_options = list(range(1, int(number_pages) + 1))

                st.session_state.start_page = st.selectbox("Start Page",page_options)
                st.session_state.end_page = st.selectbox("End Page", page_options, index=len(page_options) - 1)
                st.session_state.method = st.selectbox("Chunking Method", ["chunk_per_page", "personalize_chunking"])
                if st.session_state.method=="personalize_chunking":
                    st.session_state.split_token = st.text_area("Split Token")
            #elif st.session_state.doument_slected_to_chat.split("_")[-1]=="txt":
            else:
                st.session_state.method = st.selectbox("Chunking Method", ["personalize_chunking"])
                st.session_state.split_token = st.text_area("Split Token")
        else:
            st.session_state.service_slected_to_chat = None

            
    elif st.session_state.genre == "Delete service(s)":
        st.title('Delete Service(s)')
        data = {"token": st.session_state.token}
        json_data = json.dumps(data)
        headers = {'Content-Type': 'application/json'}
        services  = requests.get(SERVICES_API,data=json_data, headers=headers)
        services =json.loads(services.text)
        if len(services)>=2:
            services.append("ALL")
            # Get list of documents from histrory
        if "ALL" in services:
            service_slected = st.multiselect(
                    "",services ,default="ALL"
                    )
        elif len(services)==1:
            service_slected = st.multiselect(
                    "",services,default=services[0]
                    )
        else:
            service_slected = st.multiselect(
                    "",services
                    )
        if "ALL" in service_slected:
            service_slected = services
            service_slected.remove("ALL")
        st.write("You selected:", service_slected)

        if len(service_slected) > 0:
            st.session_state.delete = st.button('Delete')
            if st.session_state.delete:
                delete_service(st.session_state.token ,service_slected)
        
    elif st.session_state.genre == "Delete document(s)":
        st.title('Delete Document(s)')
        data = {"token": st.session_state.token}
        json_data = json.dumps(data)
        headers = {'Content-Type': 'application/json'}
        services  = requests.get(SERVICES_API,data=json_data, headers=headers)
        services =json.loads(services.text)
        if len(services)>0:
            service = st.selectbox("Choose Service",services)
            data = {"token": st.session_state.token, "servicename": service}
            json_data = json.dumps(data)
            headers = {'Content-Type': 'application/json'}
            history_document  = requests.get(DOCUMENT_API,data=json_data, headers=headers)
            history_document =json.loads(history_document.text).get("documents",[])
            history_document = [doc["documentname"] for doc in history_document]
            if len(history_document)>=2:
                history_document.append("ALL")
            # Get list of documents from histrory
            if "ALL" in history_document:
                document_slected_to_delete = st.multiselect(
                    "",history_document ,default="ALL"
                    )
            elif len(history_document)==1:
                document_slected_to_delete = st.multiselect(
                    "",history_document,default=history_document[0]
                    )
            else:
                document_slected_to_delete = st.multiselect(
                    "",history_document
                    )
            if "ALL" in document_slected_to_delete:
                document_slected_to_delete = history_document
                document_slected_to_delete.remove("ALL")

            st.write("You selected:", document_slected_to_delete)
            if len(document_slected_to_delete) > 0:
                st.session_state.delete = st.button('Delete')
                if st.session_state.delete:
                    delete_document(st.session_state.token,st.session_state.service ,document_slected_to_delete)

css_style = """
<style>
.title {
    white-space: nowrap;
}
</style>
"""

st.markdown(css_style, unsafe_allow_html=True)

with st.container():
    st.markdown('<h1 class="title">Augmented Generative Document Scraper</h1>', unsafe_allow_html=True)
    if st.session_state.genre=="Add document(s)" and st.session_state.doc_ortext == "Text area":
                st.session_state.name_text_area = st.text_input("Enter name of the text area:")
                st.session_state.text_area = st.text_area("Enter text:")
                if st.session_state.text_area:
                    if st.button('Process Text'):
                        add_text_document(st.session_state.token, st.session_state.service)
    if st.session_state.genre=="Select document" and st.session_state.service_slected_to_chat:
        #print(st.session_state.document_selected_to_chat)
        #document_id = st.session_state.token+st.session_state.service_slected_to_chat+st.session_state.doument_slected_to_chat
        document_id = create_document_id(st.session_state.token, st.session_state.service_slected_to_chat, st.session_state.doument_slected_to_chat)
        print(document_id)
        schema = get_schema(document_id)
        schema = display_and_validate_schema(schema)
        if schema:
            save_schema(document_id, schema)
        
        if schema and st.checkbox("Add comments")  :
            comments = get_comments(document_id)
            if not comments:
                comments = {}
                keys = get_all_keys(schema)
            else:
                keys = list(comments.keys())
            comments = handle_comments(comments, keys)
            save_comments(document_id, comments)
        if schema and st.button('Process') :
            if st.session_state.doument_slected_to_chat.split("_")[-1]=="pdf": 
                data = {"token": st.session_state.token,
                "service_name": st.session_state.service_slected_to_chat,
                "document_name": st.session_state.doument_slected_to_chat,
                "method": st.session_state.method,
                "model": st.session_state.llm,
                "schema": schema,
                "comment": comments,
                "split_token": st.session_state.split_token if st.session_state.method == "personalize_chunking" else "",
                "start_page": st.session_state.start_page,
                "end_page": st.session_state.end_page}
                json_data = json.dumps(data)
                headers = {'Content-Type': 'application/json'}
                response  = requests.get(RESPONSE_API,data=json_data, headers=headers)
                print(response.text)
                response_data = json.loads(response.text)
            #elif st.session_state.doument_slected_to_chat.split("_")[-1]=="txt": 
            else:
                data = {"token": st.session_state.token,
                "service_name": st.session_state.service_slected_to_chat,
                "document_name": st.session_state.doument_slected_to_chat,
                "method": st.session_state.method,
                "model": st.session_state.llm,
                "schema": schema,
                "comment": comments,
                "split_token": st.session_state.split_token}
                json_data = json.dumps(data)
                headers = {'Content-Type': 'application/json'}
                response  = requests.get(RESPONSE_TXT_API,data=json_data, headers=headers)
                response_data = json.loads(response.text)
                if response_data.get('status')=='success':
                    json_str =response_data.get("json")
    
                    # Encode this JSON string to bytes, which is required for the download
                    json_bytes = json_str.encode('utf-8')
                    st.download_button(
                        label="Download JSON",
                        data=json_bytes,
                        file_name="results.json",
                        mime="application/json"
                    )
                else:
                    st.error("Error in processing document")