darshan8950 commited on
Commit
2a0365d
β€’
1 Parent(s): dc5fa61

Upload 3 files

Browse files
Files changed (3) hide show
  1. borrower_data.csv +0 -0
  2. requirements.txt +5 -0
  3. train.py +51 -0
borrower_data.csv ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ langchain
2
+ ctransformers
3
+ sentence-transformers
4
+ faiss-cpu
5
+ streamlit== 1.22.0
train.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from langchain.document_loaders.csv_loader import CSVLoader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain.embeddings import HuggingFaceEmbeddings
5
+ from langchain.vectorstores import FAISS
6
+ from langchain.llms import CTransformers
7
+ from langchain.chains import ConversationalRetrievalChain
8
+ import streamlit as st
9
+ import tempfile
10
+
11
+ def main():
12
+ st.set_page_config(page_title="πŸ‘¨β€πŸ’» Talk with your CSV")
13
+ st.title("πŸ‘¨β€πŸ’» Talk with your CSV")
14
+ st.write("Please insert your link.")
15
+ uploaded_file = st.sidebar.file_uploader("Upload your Data", type="csv")
16
+
17
+ query = st.text_input("Send a Message")
18
+ if st.button("Submit Query", type="primary"):
19
+ DB_FAISS_PATH = "vectorstore/db_faiss"
20
+
21
+ if uploaded_file :
22
+ with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
23
+ tmp_file.write(uploaded_file.getvalue())
24
+ tmp_file_path = tmp_file.name
25
+
26
+ loader = CSVLoader(file_path=tmp_file_path, encoding="utf-8", csv_args={
27
+ 'delimiter': ','})
28
+ data = loader.load()
29
+ st.write(data)
30
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
31
+ text_chunks = text_splitter.split_documents(data)
32
+
33
+ embeddings = HuggingFaceEmbeddings(model_name = 'sentence-transformers/all-MiniLM-L6-v2')
34
+
35
+ docsearch = FAISS.from_documents(text_chunks, embeddings)
36
+
37
+ docsearch.save_local(DB_FAISS_PATH)
38
+
39
+ llm = CTransformers(model="models/llama-2-7b-chat.ggmlv3.q4_0.bin",
40
+ model_type="llama",
41
+ max_new_tokens=512,
42
+ temperature=0.1)
43
+
44
+ qa = ConversationalRetrievalChain.from_llm(llm, retriever=docsearch.as_retriever())
45
+
46
+ result = qa(query)
47
+ st.write(result)
48
+
49
+ if __name__ == '__main__':
50
+ main()
51
+