Spaces:
Sleeping
Sleeping
Sujithanumala
commited on
Commit
•
13b889e
1
Parent(s):
144a916
Upload 13 files
Browse files- .gitattributes +1 -0
- DB_Search.py +17 -0
- Training.csv +24 -0
- __init__.py +0 -0
- __pycache__/__init__.cpython-312.pyc +0 -0
- __pycache__/main.cpython-312.pyc +0 -0
- app.py +45 -0
- faiss_index/index.faiss +3 -0
- faiss_index/index.pkl +3 -0
- get_OIC_Documents.py +38 -0
- helper_functions.py +46 -0
- logo.png +0 -0
- main.py +110 -0
- requirements.txt +12 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
faiss_index/index.faiss filter=lfs diff=lfs merge=lfs -text
|
DB_Search.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_experimental.agents.agent_toolkits import create_csv_agent
|
2 |
+
from langchain_community.utilities.sql_database import SQLDatabase
|
3 |
+
from langchain_community.agent_toolkits import create_sql_agent
|
4 |
+
import google.generativeai as genai
|
5 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
6 |
+
import os
|
7 |
+
|
8 |
+
|
9 |
+
os.environ["GOOGLE_API_KEY"] = "AIzaSyCsYPr4TiZEjCTcrOP_Itw3CiIYk7sKykc"
|
10 |
+
genai.configure(api_key="AIzaSyCsYPr4TiZEjCTcrOP_Itw3CiIYk7sKykc")
|
11 |
+
|
12 |
+
model = ChatGoogleGenerativeAI(model="gemini-pro",
|
13 |
+
temperature=0)
|
14 |
+
agent = create_csv_agent(llm=model,path='Training.csv',verbose = True,allow_dangerous_code=True,)
|
15 |
+
result = agent.invoke('Return the training No and Trainer name where the start date is May')
|
16 |
+
print(result['output'])
|
17 |
+
# print(agent.invoke('I need to get the training number and training start and end date for the records which are present or null and the trainer is prefixed with ank and the startday is a weekday and training_no is divisble by 10'))
|
Training.csv
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Training_No,Training_startDate,Training_EndDate,Trainer,Training_Attendance
|
2 |
+
1,06-05-2024,06-05-2024,Ankita,Present
|
3 |
+
2,07-05-2024,07-05-2024,Pradeep,Absent
|
4 |
+
3,08-05-2024,08-05-2024,Pradeep,Absent
|
5 |
+
4,09-05-2024,09-05-2024,Pradeep,Absent
|
6 |
+
5,10-05-2024,10-05-2024,Pradeep,Absent
|
7 |
+
6,11-05-2024,11-05-2024,Pradeep,Absent
|
8 |
+
7,12-05-2024,12-05-2024,Pradeep,Absent
|
9 |
+
8,13-05-2024,13-05-2024,Pradeep,Absent
|
10 |
+
9,14-05-2024,14-05-2024,Pradeep,Absent
|
11 |
+
10,15-05-2024,15-05-2024,Pradeep,Absent
|
12 |
+
11,16-05-2024,16-05-2024,Pradeep,Absent
|
13 |
+
12,17-05-2024,17-05-2024,Pradeep,Absent
|
14 |
+
13,18-05-2024,18-05-2024,Ankira,Absent
|
15 |
+
14,19-05-2024,19-05-2024,Ankira,Absent
|
16 |
+
15,20-05-2024,20-05-2024,Ankira,
|
17 |
+
16,21-05-2024,21-05-2024,Ankira,
|
18 |
+
17,22-05-2024,22-05-2024,Ankira,
|
19 |
+
18,23-05-2024,23-05-2024,Ankira,
|
20 |
+
19,24-05-2024,24-05-2024,,Present
|
21 |
+
20,25-05-2024,25-05-2024,,Present
|
22 |
+
21,26-05-2024,26-05-2024,,Present
|
23 |
+
22,27-05-2024,27-05-2024,,Present
|
24 |
+
23,28-05-2024,28-05-2024,,Present
|
__init__.py
ADDED
File without changes
|
__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (171 Bytes). View file
|
|
__pycache__/main.cpython-312.pyc
ADDED
Binary file (4.88 kB). View file
|
|
app.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import os
|
3 |
+
from main import response_generator,query_csv_db
|
4 |
+
from PIL import Image
|
5 |
+
import pandas as pd
|
6 |
+
|
7 |
+
im = Image.open("logo.png")
|
8 |
+
st.set_page_config(
|
9 |
+
page_title="Oracle Wiki",
|
10 |
+
page_icon=im,
|
11 |
+
layout='centered'
|
12 |
+
)
|
13 |
+
st.title("Oracle Wiki")
|
14 |
+
options =st.selectbox('Choose One',options=['Query an SQL DB','OIC Chatbot','SOA Chatbot','OSB Chatbot','Query a CSV DB'],index = 1)
|
15 |
+
|
16 |
+
if options in ['OIC Chatbot','SOA Chatbot','OSB Chatbot']:
|
17 |
+
if "messages" not in st.session_state:
|
18 |
+
st.session_state.messages = []
|
19 |
+
|
20 |
+
for message in st.session_state.messages:
|
21 |
+
with st.chat_message(message["role"],avatar=im):
|
22 |
+
st.markdown(message["content"])
|
23 |
+
|
24 |
+
if prompt := st.chat_input("What is up?"):
|
25 |
+
st.session_state.messages.append({"role": "user", "content": prompt})
|
26 |
+
with st.chat_message("user",avatar=im):
|
27 |
+
st.markdown(prompt)
|
28 |
+
|
29 |
+
with st.chat_message("assistant",avatar=im):
|
30 |
+
response,output_docs = response_generator(prompt,'')
|
31 |
+
st.markdown(response)
|
32 |
+
|
33 |
+
|
34 |
+
st.session_state.messages.append({"role": "assistant", "content": response})
|
35 |
+
elif options=='Query a CSV DB':
|
36 |
+
csv_files = st.file_uploader('Enter csv files to Query',accept_multiple_files=True)
|
37 |
+
if csv_files:
|
38 |
+
query = st.text_input('**:red[Enter the Query]**')
|
39 |
+
if st.button(':red[Submit]'):
|
40 |
+
result = query_csv_db(csv_files,query)['output']
|
41 |
+
st.write(result)
|
42 |
+
|
43 |
+
|
44 |
+
else:
|
45 |
+
st.selectbox('Choose the database you want to Query',options=['HCM OFS','SCM OFS'])
|
faiss_index/index.faiss
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb0c2686572eff292bdb06000c6c49a7ba08f4579a68952a16589e0ae5725d1c
|
3 |
+
size 1738797
|
faiss_index/index.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1cb85b684526142700928195952dae02ecc1d212215dda84f9e9dcd7a000dd81
|
3 |
+
size 5709230
|
get_OIC_Documents.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
from bs4 import BeautifulSoup
|
3 |
+
|
4 |
+
response = requests.get("https://docs.oracle.com/en/cloud/paas/integration-cloud/books.html")
|
5 |
+
# Check if the request was successful
|
6 |
+
if response.status_code == 200:
|
7 |
+
# Access the content of the response
|
8 |
+
html_content = response.text
|
9 |
+
print(html_content)
|
10 |
+
else:
|
11 |
+
print("Failed to retrieve data")
|
12 |
+
|
13 |
+
|
14 |
+
|
15 |
+
|
16 |
+
soup = BeautifulSoup(html_content, 'html.parser')
|
17 |
+
|
18 |
+
book_links = soup.find_all('a', class_=False, parent=soup.find('div', class_="book"))
|
19 |
+
pdf_links = []
|
20 |
+
for link in book_links:
|
21 |
+
if link.has_attr('href') and link['href'].endswith('.pdf'):
|
22 |
+
pdf_links.append(link['href'])
|
23 |
+
|
24 |
+
# Print the extracted PDF links
|
25 |
+
print(pdf_links)
|
26 |
+
|
27 |
+
base_url = 'https://docs.oracle.com/en/cloud/paas/integration-cloud/' # Replace with the actual base URL
|
28 |
+
for pdf_link in pdf_links:
|
29 |
+
full_pdf_url = base_url + pdf_link
|
30 |
+
# Download the PDF file
|
31 |
+
response = requests.get(full_pdf_url)
|
32 |
+
if response.status_code == 200:
|
33 |
+
with open('Documentation_OIC/'+pdf_link.split('/')[-1], 'wb') as f:
|
34 |
+
f.write(response.content)
|
35 |
+
print('PDF downloaded successfully.')
|
36 |
+
else:
|
37 |
+
print('Failed to download PDF.')
|
38 |
+
|
helper_functions.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List
|
2 |
+
from pydantic import BaseModel, Field
|
3 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
4 |
+
import google.generativeai as genai
|
5 |
+
import os
|
6 |
+
from langchain.prompts import ChatPromptTemplate
|
7 |
+
from langchain.schema.output_parser import StrOutputParser
|
8 |
+
from langchain_core.utils.function_calling import convert_to_openai_function
|
9 |
+
|
10 |
+
|
11 |
+
os.environ["GOOGLE_API_KEY"] = "AIzaSyAgkKi4TKH9xL3N78FWn7SS7yDIz0T4r_4"
|
12 |
+
genai.configure(api_key="AIzaSyAgkKi4TKH9xL3N78FWn7SS7yDIz0T4r_4")
|
13 |
+
|
14 |
+
|
15 |
+
class Reverse(BaseModel):
|
16 |
+
"""returns the reverse of a string"""
|
17 |
+
string: str
|
18 |
+
|
19 |
+
weather_function = convert_to_openai_function(Reverse)
|
20 |
+
|
21 |
+
|
22 |
+
model = ChatGoogleGenerativeAI(model="gemini-pro",temperature=0.3)
|
23 |
+
model = model.bind( functions=[Reverse])
|
24 |
+
prompt = ChatPromptTemplate.from_template(""" {input} """)
|
25 |
+
output_parser = StrOutputParser()
|
26 |
+
chain = prompt | model | output_parser
|
27 |
+
print(chain.invoke({"input": "what is the reverse of Sbaiuodcbdvu"}))
|
28 |
+
|
29 |
+
# output_parser = StrOutputParser()
|
30 |
+
# prompt = """Hey Gemini. Act as OIC Bot and follow the instructions and answer the question accordingly.
|
31 |
+
# Chat Instructions:
|
32 |
+
# 1. Explain the question in 1-2 lines with heading as Question Explaination.
|
33 |
+
# 2. Explain the potential solution with heading as Potential Solution to it and don't hallucinate.
|
34 |
+
# General Instructions:
|
35 |
+
# 1. Ensure that you answer clearly and briefly and only answer from the context strictly.
|
36 |
+
# 2. While answering the question follow these steps.
|
37 |
+
# 3. Make sure you explain every question step by step and don't exceed 15 lines.
|
38 |
+
# Here is the chat history and respond to the user query accordingly.
|
39 |
+
# chat_history: \n {chat_history}\n
|
40 |
+
# Context:\n {context}\n
|
41 |
+
# Question: \n{question}?\n
|
42 |
+
|
43 |
+
# Answer:"""
|
44 |
+
# simple_chain = ChatPromptTemplate.from_template(prompt)
|
45 |
+
# chain = simple_chain | model | output_parser
|
46 |
+
# print(chain.invoke({'chat_history':'','context':'connections are ways to connect things','question':'what is connection'}))
|
logo.png
ADDED
main.py
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from PyPDF2 import PdfReader
|
2 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
3 |
+
import os
|
4 |
+
from langchain_google_genai import GoogleGenerativeAIEmbeddings
|
5 |
+
import google.generativeai as genai
|
6 |
+
from langchain_community.vectorstores import FAISS
|
7 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
8 |
+
from langchain.chains.question_answering import load_qa_chain
|
9 |
+
from langchain.prompts import PromptTemplate
|
10 |
+
from langchain_experimental.agents.agent_toolkits import create_csv_agent
|
11 |
+
|
12 |
+
|
13 |
+
# from dotenv import load_dotenv
|
14 |
+
|
15 |
+
# load_dotenv()
|
16 |
+
os.getenv("GOOGLE_API_KEY")
|
17 |
+
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
|
18 |
+
|
19 |
+
os.environ["GOOGLE_API_KEY"] = "AIzaSyCsYPr4TiZEjCTcrOP_Itw3CiIYk7sKykc"
|
20 |
+
genai.configure(api_key="AIzaSyCsYPr4TiZEjCTcrOP_Itw3CiIYk7sKykc")
|
21 |
+
|
22 |
+
|
23 |
+
def get_pdf_text(pdf_docs):
|
24 |
+
text=""
|
25 |
+
for pdf in pdf_docs:
|
26 |
+
pdf_reader= PdfReader(pdf)
|
27 |
+
for page in pdf_reader.pages:
|
28 |
+
text+= page.extract_text()
|
29 |
+
return text
|
30 |
+
|
31 |
+
|
32 |
+
|
33 |
+
def get_text_chunks(text):
|
34 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
|
35 |
+
chunks = text_splitter.split_text(text)
|
36 |
+
return chunks
|
37 |
+
|
38 |
+
|
39 |
+
def get_vector_store(text_chunks):
|
40 |
+
embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
|
41 |
+
vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
|
42 |
+
vector_store.save_local("faiss_index")
|
43 |
+
|
44 |
+
|
45 |
+
def get_conversational_chain():
|
46 |
+
|
47 |
+
prompt_template = """
|
48 |
+
Hey Gemini. You are an OIC Bot now and follow the instructions strictly and answer the question accordingly.
|
49 |
+
Chat Instructions:
|
50 |
+
1. The reponse must contain Question Explaination Heading.
|
51 |
+
2. The reponse must contain Potential Solution Heading.
|
52 |
+
3. Ensure that you answer clearly and briefly and only answer from the context strictly.
|
53 |
+
4. While answering the question follow these steps.
|
54 |
+
5. Make sure you explain every question step by step and don't exceed 15 lines.
|
55 |
+
6. The reponse must give a brief summary of what you have explained.
|
56 |
+
7. Reponse must have possible error scenarios or have few Edge cases to consider.
|
57 |
+
chat_history: \n {chat_history}\n
|
58 |
+
Context:\n {context}\n
|
59 |
+
Question: \n{question}?\n
|
60 |
+
|
61 |
+
Answer:
|
62 |
+
"""
|
63 |
+
model = ChatGoogleGenerativeAI(model="gemini-pro",
|
64 |
+
temperature=0.3)
|
65 |
+
|
66 |
+
prompt = PromptTemplate(template = prompt_template, input_variables = ["context","question","chat_history"])
|
67 |
+
chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
|
68 |
+
|
69 |
+
return chain
|
70 |
+
|
71 |
+
|
72 |
+
|
73 |
+
def user_input(user_question,chat_history):
|
74 |
+
embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
|
75 |
+
|
76 |
+
new_db = FAISS.load_local("faiss_index", embeddings,allow_dangerous_deserialization = True)
|
77 |
+
docs = new_db.similarity_search(user_question)
|
78 |
+
retriver = new_db.as_retriever()
|
79 |
+
output_docs= retriver.get_relevant_documents(user_question)
|
80 |
+
|
81 |
+
chain = get_conversational_chain()
|
82 |
+
|
83 |
+
response = chain(
|
84 |
+
{"input_documents":docs, "question": user_question,"chat_history":chat_history}
|
85 |
+
, return_only_outputs=True)
|
86 |
+
|
87 |
+
# print(response)
|
88 |
+
return response,output_docs
|
89 |
+
|
90 |
+
def response_generator(prompt,chat_history):
|
91 |
+
response,output_docs = user_input(prompt,chat_history)
|
92 |
+
return response['output_text'],output_docs
|
93 |
+
|
94 |
+
############################ DB QUERY CSV ########################
|
95 |
+
def query_csv_db(csv_files,query):
|
96 |
+
model = ChatGoogleGenerativeAI(model="gemini-pro",
|
97 |
+
temperature=0)
|
98 |
+
agent = create_csv_agent(llm=model,path=csv_files,verbose = True,allow_dangerous_code=True)
|
99 |
+
result = agent.invoke(query)
|
100 |
+
return result
|
101 |
+
|
102 |
+
if __name__ == "__main__":
|
103 |
+
files = os.listdir(r"C:\Users\Sujith\Downloads\Innovation\OIC_Docs")
|
104 |
+
os.chdir(r"C:\Users\Sujith\Downloads\Innovation\OIC_Docs")
|
105 |
+
get_vector_store(get_text_chunks(get_pdf_text(files)))
|
106 |
+
# chat_history = ''
|
107 |
+
# prompt = "how to add the Google Calendar Adapter Connection to an Integration"
|
108 |
+
# response =response_generator(prompt,chat_history)
|
109 |
+
# chat_history += f"""User: {prompt}\nBot: {reslponse}\n"""
|
110 |
+
# print('\033[95m'+chat_history+'\033[0m')
|
requirements.txt
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
langchain_google_genai
|
2 |
+
langchain
|
3 |
+
langchain_experimental
|
4 |
+
google
|
5 |
+
PyPDF2
|
6 |
+
langchain_community
|
7 |
+
faiss-cpu
|
8 |
+
pydantic
|
9 |
+
pillow
|
10 |
+
tabulate
|
11 |
+
pytest
|
12 |
+
setuptools
|