Sujithanumala commited on
Commit
13b889e
1 Parent(s): 144a916

Upload 13 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ faiss_index/index.faiss filter=lfs diff=lfs merge=lfs -text
DB_Search.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_experimental.agents.agent_toolkits import create_csv_agent
2
+ from langchain_community.utilities.sql_database import SQLDatabase
3
+ from langchain_community.agent_toolkits import create_sql_agent
4
+ import google.generativeai as genai
5
+ from langchain_google_genai import ChatGoogleGenerativeAI
6
+ import os
7
+
8
+
9
+ os.environ["GOOGLE_API_KEY"] = "AIzaSyCsYPr4TiZEjCTcrOP_Itw3CiIYk7sKykc"
10
+ genai.configure(api_key="AIzaSyCsYPr4TiZEjCTcrOP_Itw3CiIYk7sKykc")
11
+
12
+ model = ChatGoogleGenerativeAI(model="gemini-pro",
13
+ temperature=0)
14
+ agent = create_csv_agent(llm=model,path='Training.csv',verbose = True,allow_dangerous_code=True,)
15
+ result = agent.invoke('Return the training No and Trainer name where the start date is May')
16
+ print(result['output'])
17
+ # print(agent.invoke('I need to get the training number and training start and end date for the records which are present or null and the trainer is prefixed with ank and the startday is a weekday and training_no is divisble by 10'))
Training.csv ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Training_No,Training_startDate,Training_EndDate,Trainer,Training_Attendance
2
+ 1,06-05-2024,06-05-2024,Ankita,Present
3
+ 2,07-05-2024,07-05-2024,Pradeep,Absent
4
+ 3,08-05-2024,08-05-2024,Pradeep,Absent
5
+ 4,09-05-2024,09-05-2024,Pradeep,Absent
6
+ 5,10-05-2024,10-05-2024,Pradeep,Absent
7
+ 6,11-05-2024,11-05-2024,Pradeep,Absent
8
+ 7,12-05-2024,12-05-2024,Pradeep,Absent
9
+ 8,13-05-2024,13-05-2024,Pradeep,Absent
10
+ 9,14-05-2024,14-05-2024,Pradeep,Absent
11
+ 10,15-05-2024,15-05-2024,Pradeep,Absent
12
+ 11,16-05-2024,16-05-2024,Pradeep,Absent
13
+ 12,17-05-2024,17-05-2024,Pradeep,Absent
14
+ 13,18-05-2024,18-05-2024,Ankira,Absent
15
+ 14,19-05-2024,19-05-2024,Ankira,Absent
16
+ 15,20-05-2024,20-05-2024,Ankira,
17
+ 16,21-05-2024,21-05-2024,Ankira,
18
+ 17,22-05-2024,22-05-2024,Ankira,
19
+ 18,23-05-2024,23-05-2024,Ankira,
20
+ 19,24-05-2024,24-05-2024,,Present
21
+ 20,25-05-2024,25-05-2024,,Present
22
+ 21,26-05-2024,26-05-2024,,Present
23
+ 22,27-05-2024,27-05-2024,,Present
24
+ 23,28-05-2024,28-05-2024,,Present
__init__.py ADDED
File without changes
__pycache__/__init__.cpython-312.pyc ADDED
Binary file (171 Bytes). View file
 
__pycache__/main.cpython-312.pyc ADDED
Binary file (4.88 kB). View file
 
app.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from main import response_generator,query_csv_db
4
+ from PIL import Image
5
+ import pandas as pd
6
+
7
+ im = Image.open("logo.png")
8
+ st.set_page_config(
9
+ page_title="Oracle Wiki",
10
+ page_icon=im,
11
+ layout='centered'
12
+ )
13
+ st.title("Oracle Wiki")
14
+ options =st.selectbox('Choose One',options=['Query an SQL DB','OIC Chatbot','SOA Chatbot','OSB Chatbot','Query a CSV DB'],index = 1)
15
+
16
+ if options in ['OIC Chatbot','SOA Chatbot','OSB Chatbot']:
17
+ if "messages" not in st.session_state:
18
+ st.session_state.messages = []
19
+
20
+ for message in st.session_state.messages:
21
+ with st.chat_message(message["role"],avatar=im):
22
+ st.markdown(message["content"])
23
+
24
+ if prompt := st.chat_input("What is up?"):
25
+ st.session_state.messages.append({"role": "user", "content": prompt})
26
+ with st.chat_message("user",avatar=im):
27
+ st.markdown(prompt)
28
+
29
+ with st.chat_message("assistant",avatar=im):
30
+ response,output_docs = response_generator(prompt,'')
31
+ st.markdown(response)
32
+
33
+
34
+ st.session_state.messages.append({"role": "assistant", "content": response})
35
+ elif options=='Query a CSV DB':
36
+ csv_files = st.file_uploader('Enter csv files to Query',accept_multiple_files=True)
37
+ if csv_files:
38
+ query = st.text_input('**:red[Enter the Query]**')
39
+ if st.button(':red[Submit]'):
40
+ result = query_csv_db(csv_files,query)['output']
41
+ st.write(result)
42
+
43
+
44
+ else:
45
+ st.selectbox('Choose the database you want to Query',options=['HCM OFS','SCM OFS'])
faiss_index/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb0c2686572eff292bdb06000c6c49a7ba08f4579a68952a16589e0ae5725d1c
3
+ size 1738797
faiss_index/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cb85b684526142700928195952dae02ecc1d212215dda84f9e9dcd7a000dd81
3
+ size 5709230
get_OIC_Documents.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+
4
+ response = requests.get("https://docs.oracle.com/en/cloud/paas/integration-cloud/books.html")
5
+ # Check if the request was successful
6
+ if response.status_code == 200:
7
+ # Access the content of the response
8
+ html_content = response.text
9
+ print(html_content)
10
+ else:
11
+ print("Failed to retrieve data")
12
+
13
+
14
+
15
+
16
+ soup = BeautifulSoup(html_content, 'html.parser')
17
+
18
+ book_links = soup.find_all('a', class_=False, parent=soup.find('div', class_="book"))
19
+ pdf_links = []
20
+ for link in book_links:
21
+ if link.has_attr('href') and link['href'].endswith('.pdf'):
22
+ pdf_links.append(link['href'])
23
+
24
+ # Print the extracted PDF links
25
+ print(pdf_links)
26
+
27
+ base_url = 'https://docs.oracle.com/en/cloud/paas/integration-cloud/' # Replace with the actual base URL
28
+ for pdf_link in pdf_links:
29
+ full_pdf_url = base_url + pdf_link
30
+ # Download the PDF file
31
+ response = requests.get(full_pdf_url)
32
+ if response.status_code == 200:
33
+ with open('Documentation_OIC/'+pdf_link.split('/')[-1], 'wb') as f:
34
+ f.write(response.content)
35
+ print('PDF downloaded successfully.')
36
+ else:
37
+ print('Failed to download PDF.')
38
+
helper_functions.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ from pydantic import BaseModel, Field
3
+ from langchain_google_genai import ChatGoogleGenerativeAI
4
+ import google.generativeai as genai
5
+ import os
6
+ from langchain.prompts import ChatPromptTemplate
7
+ from langchain.schema.output_parser import StrOutputParser
8
+ from langchain_core.utils.function_calling import convert_to_openai_function
9
+
10
+
11
+ os.environ["GOOGLE_API_KEY"] = "AIzaSyAgkKi4TKH9xL3N78FWn7SS7yDIz0T4r_4"
12
+ genai.configure(api_key="AIzaSyAgkKi4TKH9xL3N78FWn7SS7yDIz0T4r_4")
13
+
14
+
15
+ class Reverse(BaseModel):
16
+ """returns the reverse of a string"""
17
+ string: str
18
+
19
+ weather_function = convert_to_openai_function(Reverse)
20
+
21
+
22
+ model = ChatGoogleGenerativeAI(model="gemini-pro",temperature=0.3)
23
+ model = model.bind( functions=[Reverse])
24
+ prompt = ChatPromptTemplate.from_template(""" {input} """)
25
+ output_parser = StrOutputParser()
26
+ chain = prompt | model | output_parser
27
+ print(chain.invoke({"input": "what is the reverse of Sbaiuodcbdvu"}))
28
+
29
+ # output_parser = StrOutputParser()
30
+ # prompt = """Hey Gemini. Act as OIC Bot and follow the instructions and answer the question accordingly.
31
+ # Chat Instructions:
32
+ # 1. Explain the question in 1-2 lines with heading as Question Explaination.
33
+ # 2. Explain the potential solution with heading as Potential Solution to it and don't hallucinate.
34
+ # General Instructions:
35
+ # 1. Ensure that you answer clearly and briefly and only answer from the context strictly.
36
+ # 2. While answering the question follow these steps.
37
+ # 3. Make sure you explain every question step by step and don't exceed 15 lines.
38
+ # Here is the chat history and respond to the user query accordingly.
39
+ # chat_history: \n {chat_history}\n
40
+ # Context:\n {context}\n
41
+ # Question: \n{question}?\n
42
+
43
+ # Answer:"""
44
+ # simple_chain = ChatPromptTemplate.from_template(prompt)
45
+ # chain = simple_chain | model | output_parser
46
+ # print(chain.invoke({'chat_history':'','context':'connections are ways to connect things','question':'what is connection'}))
logo.png ADDED
main.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PyPDF2 import PdfReader
2
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
3
+ import os
4
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
5
+ import google.generativeai as genai
6
+ from langchain_community.vectorstores import FAISS
7
+ from langchain_google_genai import ChatGoogleGenerativeAI
8
+ from langchain.chains.question_answering import load_qa_chain
9
+ from langchain.prompts import PromptTemplate
10
+ from langchain_experimental.agents.agent_toolkits import create_csv_agent
11
+
12
+
13
+ # from dotenv import load_dotenv
14
+
15
+ # load_dotenv()
16
+ os.getenv("GOOGLE_API_KEY")
17
+ genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
18
+
19
+ os.environ["GOOGLE_API_KEY"] = "AIzaSyCsYPr4TiZEjCTcrOP_Itw3CiIYk7sKykc"
20
+ genai.configure(api_key="AIzaSyCsYPr4TiZEjCTcrOP_Itw3CiIYk7sKykc")
21
+
22
+
23
+ def get_pdf_text(pdf_docs):
24
+ text=""
25
+ for pdf in pdf_docs:
26
+ pdf_reader= PdfReader(pdf)
27
+ for page in pdf_reader.pages:
28
+ text+= page.extract_text()
29
+ return text
30
+
31
+
32
+
33
+ def get_text_chunks(text):
34
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
35
+ chunks = text_splitter.split_text(text)
36
+ return chunks
37
+
38
+
39
+ def get_vector_store(text_chunks):
40
+ embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
41
+ vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
42
+ vector_store.save_local("faiss_index")
43
+
44
+
45
+ def get_conversational_chain():
46
+
47
+ prompt_template = """
48
+ Hey Gemini. You are an OIC Bot now and follow the instructions strictly and answer the question accordingly.
49
+ Chat Instructions:
50
+ 1. The reponse must contain Question Explaination Heading.
51
+ 2. The reponse must contain Potential Solution Heading.
52
+ 3. Ensure that you answer clearly and briefly and only answer from the context strictly.
53
+ 4. While answering the question follow these steps.
54
+ 5. Make sure you explain every question step by step and don't exceed 15 lines.
55
+ 6. The reponse must give a brief summary of what you have explained.
56
+ 7. Reponse must have possible error scenarios or have few Edge cases to consider.
57
+ chat_history: \n {chat_history}\n
58
+ Context:\n {context}\n
59
+ Question: \n{question}?\n
60
+
61
+ Answer:
62
+ """
63
+ model = ChatGoogleGenerativeAI(model="gemini-pro",
64
+ temperature=0.3)
65
+
66
+ prompt = PromptTemplate(template = prompt_template, input_variables = ["context","question","chat_history"])
67
+ chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
68
+
69
+ return chain
70
+
71
+
72
+
73
+ def user_input(user_question,chat_history):
74
+ embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
75
+
76
+ new_db = FAISS.load_local("faiss_index", embeddings,allow_dangerous_deserialization = True)
77
+ docs = new_db.similarity_search(user_question)
78
+ retriver = new_db.as_retriever()
79
+ output_docs= retriver.get_relevant_documents(user_question)
80
+
81
+ chain = get_conversational_chain()
82
+
83
+ response = chain(
84
+ {"input_documents":docs, "question": user_question,"chat_history":chat_history}
85
+ , return_only_outputs=True)
86
+
87
+ # print(response)
88
+ return response,output_docs
89
+
90
+ def response_generator(prompt,chat_history):
91
+ response,output_docs = user_input(prompt,chat_history)
92
+ return response['output_text'],output_docs
93
+
94
+ ############################ DB QUERY CSV ########################
95
+ def query_csv_db(csv_files,query):
96
+ model = ChatGoogleGenerativeAI(model="gemini-pro",
97
+ temperature=0)
98
+ agent = create_csv_agent(llm=model,path=csv_files,verbose = True,allow_dangerous_code=True)
99
+ result = agent.invoke(query)
100
+ return result
101
+
102
+ if __name__ == "__main__":
103
+ files = os.listdir(r"C:\Users\Sujith\Downloads\Innovation\OIC_Docs")
104
+ os.chdir(r"C:\Users\Sujith\Downloads\Innovation\OIC_Docs")
105
+ get_vector_store(get_text_chunks(get_pdf_text(files)))
106
+ # chat_history = ''
107
+ # prompt = "how to add the Google Calendar Adapter Connection to an Integration"
108
+ # response =response_generator(prompt,chat_history)
109
+ # chat_history += f"""User: {prompt}\nBot: {reslponse}\n"""
110
+ # print('\033[95m'+chat_history+'\033[0m')
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain_google_genai
2
+ langchain
3
+ langchain_experimental
4
+ google
5
+ PyPDF2
6
+ langchain_community
7
+ faiss-cpu
8
+ pydantic
9
+ pillow
10
+ tabulate
11
+ pytest
12
+ setuptools