raseel-zymr commited on
Commit
5aee298
1 Parent(s): f70522c

Added logic to upload file , ask questions and get answers

Browse files
Files changed (2) hide show
  1. .gitignore +1 -0
  2. app.py +49 -30
.gitignore CHANGED
@@ -1 +1,2 @@
1
  __pycache__
 
 
1
  __pycache__
2
+ .streamlit
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import os
2
  import streamlit as st
 
3
 
4
  #for textfiles
5
  from langchain.document_loaders import TextLoader
@@ -20,49 +21,67 @@ os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["hf_api_key"]
20
  st.title('Document Q&A - Ask anything in your Document')
21
  st.sidebar.subheader('Upload document')
22
  uploaded_file = st.sidebar.file_uploader("Upload File",type=['txt','pdf'])
 
 
 
 
 
 
23
  # url2 = "https://github.com/fabiomatricardi/cdQnA/raw/main/KS-all-info_rev1.txt"
24
  # res = requests.get(url2)
25
  # with open("KS-all-info_rev1.txt", "w") as f:
26
  # f.write(res.text)
 
 
 
 
 
 
27
 
28
- st.subheader('Enter query')
29
- query = st.text_input('Ask anything about the Document you uploaded')
 
30
 
31
- st.subheader('Answer')
32
- st.write('Answer from document')
33
 
34
- # # Document Loader
35
- # loader = TextLoader('./KS-all-info_rev1.txt')
36
- # documents = loader.load()
37
- # import textwrap
38
- # def wrap_text_preserve_newlines(text, width=110):
39
- # # Split the input text into lines based on newline characters
40
- # lines = text.split('\n')
41
- # # Wrap each line individually
42
- # wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
43
- # # Join the wrapped lines back together using newline characters
44
- # wrapped_text = '\n'.join(wrapped_lines)
45
- # return wrapped_text
46
 
47
- # # Text Splitter
48
- # text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
49
- # docs = text_splitter.split_documents(documents)
50
 
51
- # # Embeddings
52
- # embeddings = HuggingFaceEmbeddings()
 
 
 
 
 
 
 
53
 
54
- # #Create the vectorized db
55
- # db = FAISS.from_documents(docs, embeddings)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
- # llm=HuggingFaceHub(repo_id="google/flan-t5-xl", model_kwargs={"temperature":0, "max_length":512})
58
- # llm2=HuggingFaceHub(repo_id="declare-lab/flan-alpaca-large", model_kwargs={"temperature":0, "max_length":512})
59
- # chain = load_qa_chain(llm2, chain_type="stuff")
60
 
61
- # # Sample question
62
- # # query = "What the actual issues and drawbacks ?"
63
 
64
- # # docs = db.similarity_search(query)
65
- # # chain.run(input_documents=docs, question=query)
66
 
67
 
68
  # # PDFs
 
1
  import os
2
  import streamlit as st
3
+ from io import StringIO
4
 
5
  #for textfiles
6
  from langchain.document_loaders import TextLoader
 
21
  st.title('Document Q&A - Ask anything in your Document')
22
  st.sidebar.subheader('Upload document')
23
  uploaded_file = st.sidebar.file_uploader("Upload File",type=['txt','pdf'])
24
+ with st.sidebar.expander('File'):
25
+ if(uploaded_file):
26
+ st.info(uploaded_file.name)
27
+
28
+
29
+
30
  # url2 = "https://github.com/fabiomatricardi/cdQnA/raw/main/KS-all-info_rev1.txt"
31
  # res = requests.get(url2)
32
  # with open("KS-all-info_rev1.txt", "w") as f:
33
  # f.write(res.text)
34
+ if (uploaded_file):
35
+ st.subheader('Enter query')
36
+ query = st.text_input('Ask anything about the Document you uploaded')
37
+ stringio = StringIO(uploaded_file.getvalue().decode("utf-8"))
38
+ with open(uploaded_file.name, "w") as f:
39
+ f.write(stringio.read())
40
 
41
+ if(uploaded_file):
42
+ loader = TextLoader(uploaded_file.name)
43
+ documents = loader.load()
44
 
45
+ # Document Loader
46
+ #loader = TextLoader('./KS-all-info_rev1.txt')
47
 
48
+ # loader = TextLoader(os.path.join("./", uploaded_file.name))
 
 
 
 
 
 
 
 
 
 
 
49
 
 
 
 
50
 
51
+ # import textwrap
52
+ # def wrap_text_preserve_newlines(text, width=110):
53
+ # # Split the input text into lines based on newline characters
54
+ # lines = text.split('\n')
55
+ # # Wrap each line individually
56
+ # wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
57
+ # # Join the wrapped lines back together using newline characters
58
+ # wrapped_text = '\n'.join(wrapped_lines)
59
+ # return wrapped_text
60
 
61
+ # Text Splitter
62
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
63
+ docs = text_splitter.split_documents(documents)
64
+
65
+ # Embeddings
66
+ embeddings = HuggingFaceEmbeddings()
67
+
68
+ #Create the vectorized db
69
+ db = FAISS.from_documents(docs, embeddings)
70
+
71
+ #llm=HuggingFaceHub(repo_id="google/flan-t5-xl", model_kwargs={"temperature":0, "max_length":512})
72
+ llm2=HuggingFaceHub(repo_id="declare-lab/flan-alpaca-large", model_kwargs={"temperature":0, "max_length":512})
73
+ chain = load_qa_chain(llm2, chain_type="stuff")
74
+
75
+ # Sample question
76
+ #query = "What the actual issues and drawbacks ?"
77
+
78
+ docs = db.similarity_search(query)
79
+ answer = chain.run(input_documents=docs, question=query)
80
 
 
 
 
81
 
 
 
82
 
83
+ st.subheader('Answer')
84
+ st.write(answer)
85
 
86
 
87
  # # PDFs