Bimal Bhattarai commited on
Commit
5dc421a
0 Parent(s):

first commit

Browse files
.gitattributes ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ *.sqlite3 filter=lfs diff=lfs merge=lfs -text
2
+ *.bin filter=lfs diff=lfs merge=lfs -text
3
+ *.pickle filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ db
2
+ db_bgai
3
+ db2
4
+ models
5
+ source_documents
6
+ app_v2.py
7
+ app_v3.py
8
+ app.py
9
+ ingest.py
10
+ ingest_v3.py
11
+ .env
12
+ README.md
__pycache__/constants.cpython-311.pyc ADDED
Binary file (669 Bytes). View file
 
__pycache__/constants.cpython-39.pyc ADDED
Binary file (471 Bytes). View file
 
app_palm.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain import PromptTemplate, LLMChain
2
+ from langchain.llms import CTransformers, HuggingFacePipeline, GooglePalm
3
+ import os
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain.vectorstores import Chroma
6
+ from langchain.chains import RetrievalQA
7
+ from langchain.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceEmbeddings
8
+ from io import BytesIO
9
+ from langchain.document_loaders import PyPDFLoader
10
+ import gradio as gr
11
+ import chromadb
12
+ from dotenv import load_dotenv
13
+ from constants import CHROMA_SETTINGS
14
+ from io import BytesIO
15
+ import gradio as gr
16
+ import torch
17
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoModelForSeq2SeqLM, AutoModel
18
+ import gc
19
+
20
+ from langchain.schema.runnable import RunnableLambda, RunnablePassthrough
21
+ from langchain.chat_models import ChatGooglePalm
22
+ import google.generativeai as genai
23
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
24
+
25
+ gc.collect()
26
+ torch.cuda.empty_cache()
27
+
28
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
29
+
30
+
31
+ #model= AutoModelForCausalLM.from_pretrained(local_llm, device_map= device)
32
+ llm= ChatGooglePalm()
33
+ #llm= HuggingFacePipeline.from_model_id(model_id=local_llm, task='text-generation', device=0, pipeline_kwargs={"max_new_tokens": 1000})
34
+
35
+ embeddings_model_name = os.environ.get('EMBEDDINGS_MODEL_NAME')
36
+ persist_directory = os.environ.get('PERSIST_DIRECTORY')
37
+ target_source_chunks = int(os.environ.get('TARGET_SOURCE_CHUNKS',4))
38
+ google_api_key= os.environ.get('GOOGLE_API_KEY')
39
+ if not load_dotenv():
40
+ print("Could not load .env file or it is empty. Please check if it exists and is readable.")
41
+ exit(1)
42
+
43
+ print("Loading embeddings model...")
44
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
45
+ #embeddings= pipeline("feature-extraction", model="WhereIsAI/UAE-Large-V1")
46
+ # Chroma client
47
+ chroma_client = chromadb.PersistentClient(settings=CHROMA_SETTINGS , path=persist_directory)
48
+ db = Chroma(persist_directory=persist_directory, embedding_function=embeddings, client_settings=CHROMA_SETTINGS, client=chroma_client)
49
+
50
+ prompt_template = """Use the following pieces of information to answer the user's question.
51
+ If you don't know the answer, just say that you don't know, don't try to make up an answer.
52
+
53
+ Context: {context}
54
+ Question: {question}
55
+
56
+ Only return the helpful answer below and nothing else.
57
+ Helpful answer:
58
+ """
59
+
60
+ prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])
61
+ retriever = db.as_retriever(search_kwargs={"k": target_source_chunks})
62
+ # activate/deactivate the streaming StdOut callback for LLMs
63
+
64
+ chain_type_kwargs = {"prompt": prompt}
65
+
66
+ input_gradio= gr.Text(
67
+ label="Prompt",
68
+ show_label=False,
69
+ max_lines=2,
70
+ placeholder="Enter your question here",
71
+ container=False,
72
+
73
+ )
74
+
75
+
76
+ def get_response(input_gradio ):
77
+ query=input_gradio
78
+ qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents= False, chain_type_kwargs=chain_type_kwargs, verbose=True)
79
+ response= qa(query)
80
+ return response['result']
81
+
82
+ iface= gr.Interface(
83
+ fn=get_response,
84
+ inputs=input_gradio,
85
+ outputs="text",
86
+ title="Tsetlin Machine Chatbot",
87
+ description="A chatbot that uses the LLM to answer anything regarding TM",
88
+ allow_flagging='never'
89
+
90
+ )
91
+ # Interactive questions and answers
92
+ iface.launch()
93
+
94
+
constants.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ from chromadb.config import Settings
4
+
5
+ load_dotenv()
6
+
7
+ PERSIST_DIRECTORY = os.environ.get('PERSIST_DIRECTORY')
8
+ if PERSIST_DIRECTORY is None:
9
+ raise Exception("please put the directory path in chroma db")
10
+
11
+ CHROMA_SETTINGS = Settings(
12
+ persist_directory=PERSIST_DIRECTORY,
13
+ anonymized_telemetry=False
14
+ )
db_google/chroma.sqlite3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8eba354226ba88c591f1c149f3e965403e48dba1f085f71c7713bae1bd895f1f
3
+ size 39329792
db_google/e072cde8-61bc-4eaa-9876-6068c95e6f74/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b2cb81ecbebf24556e4af8b17c5f6606e0b4a4b428c64814f74c798c38ec50e
3
+ size 16060000
db_google/e072cde8-61bc-4eaa-9876-6068c95e6f74/header.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cd215c9f6b14de8a55607f23f90af168c5493da60c02482ee8f7087809be863
3
+ size 100
db_google/e072cde8-61bc-4eaa-9876-6068c95e6f74/index_metadata.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dc24cfc543dfc34ed403220c5ffa07e3d2caf5143d6eb2cb731d1dcd46d6336
3
+ size 288034
db_google/e072cde8-61bc-4eaa-9876-6068c95e6f74/length.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ad4eb880e8012f662c48d0a8492c6006672085d2e3345ab37d4c05076c9a747
3
+ size 20000
db_google/e072cde8-61bc-4eaa-9876-6068c95e6f74/link_lists.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b22486b1e0558438145eb5c5c3c063e3ec5f38af73a3a04639d53d13ea0b50a
3
+ size 44752
requirements.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain==0.0.274
2
+ gradio==3.50.2
3
+ gpt4all==1.0.8
4
+ chromadb==0.4.7
5
+ urllib3==2.0.4
6
+ PyMuPDF==1.23.1
7
+ python-dotenv==1.0.0
8
+ unstructured==0.10.8
9
+ extract-msg==0.45.0
10
+ tabulate==0.9.0
11
+ pandoc==2.3
12
+ pypandoc==1.11
13
+ tqdm==4.66.1
14
+ sentence_transformers==2.2.2
15
+ pypdf
16
+ google-generativeai
17
+ protobuf==3.20.*