Hemasagar commited on
Commit
91945f3
·
verified ·
1 Parent(s): 79b1b39

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +29 -0
  2. config.yml +11 -0
  3. main.py +51 -0
  4. requirements.txt +13 -0
app.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ingest import run_ingest
2
+ from llm.wrapper import setup_qa_chain
3
+ from llm.wrapper import query_embeddings
4
+ import timeit
5
+
6
+
7
+ import streamlit as st
8
+ def main():
9
+ st.set_page_config(page_title="Document seemless process ")
10
+ st.title("Auto text extraction with AI Planet ")
11
+ st.subheader("I can help you in extracting text from pdf,documents ....")
12
+ pdf = st.file_uploader("Upload text here for now, only PDF files allowed ", type=["pdf","txt"],accept_multiple_files=True)
13
+ submit=st.button("Extract Data")
14
+ if submit:
15
+ with st.spinner('Wait for it...'):
16
+ run_ingest()
17
+ question = st.text_input("Please wirte a Query: ", key="Please ask question on uploaded pdf")
18
+ submit = st.button('Generate')
19
+ if submit:
20
+ with st.spinner('Wait for it...'):
21
+ qa_chain = setup_qa_chain()
22
+ response = qa_chain({'query': question})
23
+ answer = {'answer': response['result']}
24
+ st.subheader("Answer:")
25
+ st.write(answer)
26
+ st.success("Hope I was able to save your time❤️")
27
+ #Invoking main function
28
+ if __name__ == '__main__':
29
+ main()
config.yml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ RETURN_SOURCE_DOCUMENTS: True
2
+ VECTOR_COUNT: 2
3
+ CHUNK_SIZE: 300
4
+ CHUNK_OVERLAP: 30
5
+ DATA_PATH: 'data/'
6
+ DB_FAISS_PATH: 'vectorstore/chroma'
7
+ MODEL_TYPE: 'mistral'
8
+ MODEL_BIN_PATH: 'models/mistral-7b-instruct-v0.1.Q5_K_M.gguf'
9
+ EMBEDDINGS: 'sentence-transformers/all-mpnet-base-v2'
10
+ MAX_NEW_TOKENS: 2048
11
+ TEMPERATURE: 0.00
main.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import timeit
2
+ import argparse
3
+ from llm.wrapper import setup_qa_chain
4
+ from llm.wrapper import query_embeddings
5
+ import streamlit as lt
6
+
7
+ import streamlit as st
8
+
9
+ #Hello! It seems like you want to import the Streamlit library in Python. Streamlit is a powerful open-source framework used for building web applications with interactive data visualizations and machine learning models. To import Streamlit, you'll need to ensure that you have it installed in your Python environment.
10
+ #Once you have Streamlit installed, you can import it into your Python script using the import statement,
11
+ # def main():
12
+
13
+
14
+
15
+ # Upload the Invoices (pdf files)...
16
+
17
+
18
+ if __name__ == "__main__":
19
+ parser = argparse.ArgumentParser()
20
+ parser.add_argument('input',
21
+ type=str,
22
+ default='What is the invoice number value?',
23
+ help='Enter the query to pass into the LLM')
24
+ parser.add_argument('--semantic_search',
25
+ type=bool,
26
+ default=False,
27
+ help='Enter True if you want to run semantic search, else False')
28
+ args = parser.parse_args()
29
+
30
+ start = timeit.default_timer()
31
+ if args.semantic_search:
32
+ semantic_search = query_embeddings(args.input)
33
+ print(f'Semantic search: {semantic_search}')
34
+ print('='*50)
35
+
36
+ else:
37
+ qa_chain = setup_qa_chain()
38
+ response = qa_chain({'query': args.input})
39
+ print(f'\nAnswer: {response["result"]}')
40
+ print('=' * 50)
41
+
42
+ if submit:
43
+ with st.spinner('Wait for it...'):
44
+ st.subheader("Answer:")
45
+ st.write(response)
46
+
47
+ end = timeit.default_timer()
48
+
49
+
50
+
51
+ print(f"Time to retrieve answer: {end - start}")
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit==1.29.0
2
+ langchain==0.1.13
3
+ # unstructured==0.12.3
4
+ tiktoken==0.5.2
5
+ pypdf==4.1.0
6
+ sentence-transformers==2.5.1
7
+ langchain-community
8
+ langchain-chroma
9
+ numpy==1.26.1
10
+ python-box
11
+ llama-cpp-python==0.2.76
12
+ # pdfservices-sdk==4.0.0
13
+ watchdog==4.0.1