Spaces:

Hemasagar
/

RD

Sleeping

App Files Files Community

Hemasagar commited on Jun 4, 2024

Commit

91945f3

verified ·

1 Parent(s): 79b1b39

Upload 4 files

Browse files

Files changed (4) hide show

app.py +29 -0
config.yml +11 -0
main.py +51 -0
requirements.txt +13 -0

app.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from ingest import run_ingest
+from llm.wrapper import setup_qa_chain
+from llm.wrapper import query_embeddings
+import timeit
+import streamlit as st
+def main():
+    st.set_page_config(page_title="Document seemless process ")
+    st.title("Auto text extraction with AI Planet ")
+    st.subheader("I can help you in extracting text from pdf,documents ....")
+    pdf = st.file_uploader("Upload text here for now, only PDF files allowed ", type=["pdf","txt"],accept_multiple_files=True)
+    submit=st.button("Extract Data")
+    if submit:
+        with st.spinner('Wait for it...'):
+            run_ingest()
+    question = st.text_input("Please wirte a Query: ", key="Please ask question on uploaded pdf")
+    submit = st.button('Generate')
+    if submit:
+        with st.spinner('Wait for it...'):
+            qa_chain = setup_qa_chain()
+            response = qa_chain({'query': question})
+            answer = {'answer': response['result']}
+            st.subheader("Answer:")
+            st.write(answer)
+            st.success("Hope I was able to save your time❤️")
+#Invoking main function
+if __name__ == '__main__':
+    main()

config.yml ADDED Viewed

	@@ -0,0 +1,11 @@

+RETURN_SOURCE_DOCUMENTS: True
+VECTOR_COUNT: 2
+CHUNK_SIZE: 300
+CHUNK_OVERLAP: 30
+DATA_PATH: 'data/'
+DB_FAISS_PATH: 'vectorstore/chroma'
+MODEL_TYPE: 'mistral'
+MODEL_BIN_PATH: 'models/mistral-7b-instruct-v0.1.Q5_K_M.gguf'
+EMBEDDINGS: 'sentence-transformers/all-mpnet-base-v2'
+MAX_NEW_TOKENS: 2048
+TEMPERATURE: 0.00

main.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import timeit
+import argparse
+from llm.wrapper import setup_qa_chain
+from llm.wrapper import query_embeddings
+import streamlit as lt
+import streamlit as st
+#Hello! It seems like you want to import the Streamlit library in Python. Streamlit is a powerful open-source framework used for building web applications with interactive data visualizations and machine learning models. To import Streamlit, you'll need to ensure that you have it installed in your Python environment.
+#Once you have Streamlit installed, you can import it into your Python script using the import statement,
+# def main():
+    # Upload the Invoices (pdf files)...
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('input',
+                        type=str,
+                        default='What is the invoice number value?',
+                        help='Enter the query to pass into the LLM')
+    parser.add_argument('--semantic_search',
+                        type=bool,
+                        default=False,
+                        help='Enter True if you want to run semantic search, else False')
+    args = parser.parse_args()
+    start = timeit.default_timer()
+    if args.semantic_search:
+        semantic_search = query_embeddings(args.input)
+        print(f'Semantic search: {semantic_search}')
+        print('='*50)
+    else:
+        qa_chain = setup_qa_chain()
+        response = qa_chain({'query': args.input})
+        print(f'\nAnswer: {response["result"]}')
+        print('=' * 50)
+    if submit:
+        with st.spinner('Wait for it...'):
+            st.subheader("Answer:")
+            st.write(response)
+    end = timeit.default_timer()
+    print(f"Time to retrieve answer: {end - start}")

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+streamlit==1.29.0
+langchain==0.1.13
+# unstructured==0.12.3
+tiktoken==0.5.2
+pypdf==4.1.0
+sentence-transformers==2.5.1
+langchain-community
+langchain-chroma
+numpy==1.26.1
+python-box
+llama-cpp-python==0.2.76
+# pdfservices-sdk==4.0.0
+watchdog==4.0.1