Spaces:
Runtime error
Runtime error
0.1
Browse files- app.py +109 -0
- requirements.txt +113 -0
app.py
ADDED
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import tempfile
|
3 |
+
import time
|
4 |
+
|
5 |
+
import streamlit as st
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
8 |
+
from langchain.chat_models import ChatOpenAI
|
9 |
+
from langchain.prompts import PromptTemplate
|
10 |
+
from langchain.schema import StrOutputParser
|
11 |
+
from langchain.vectorstores import Vectara
|
12 |
+
|
13 |
+
# Load environment variables from .env file
|
14 |
+
load_dotenv()
|
15 |
+
|
16 |
+
# Sidebar for PDF upload and API keys
|
17 |
+
with st.sidebar:
|
18 |
+
st.header("Configuration")
|
19 |
+
uploaded_file = st.file_uploader("Choose a PDF file", type=["pdf"])
|
20 |
+
customer_id = st.text_input("Vectara Customer ID", value=os.getenv("CUSTOMER_ID", ""))
|
21 |
+
api_key = st.text_input("Vectara API Key", value=os.getenv("API_KEY", ""))
|
22 |
+
corpus_id = st.text_input("Vectara Corpus ID", value=str(os.getenv("CORPUS_ID", "")))
|
23 |
+
openai_api_key = st.text_input("OpenAI API Key", value=os.getenv("OPENAI_API_KEY", ""))
|
24 |
+
submit_button = st.button("Submit")
|
25 |
+
|
26 |
+
# Constants
|
27 |
+
CUSTOMER_ID = customer_id if customer_id else os.getenv("CUSTOMER_ID")
|
28 |
+
API_KEY = api_key if api_key else os.getenv("API_KEY")
|
29 |
+
CORPUS_ID = int(corpus_id) if corpus_id else int(os.getenv("CORPUS_ID", 0)) # Assuming CORPUS_ID should be an integer
|
30 |
+
OPENAI_API_KEY = openai_api_key if openai_api_key else os.getenv("OPENAI_API_KEY")
|
31 |
+
|
32 |
+
# Initialize Vectara
|
33 |
+
def initialize_vectara():
|
34 |
+
vectara = Vectara(
|
35 |
+
vectara_customer_id=CUSTOMER_ID,
|
36 |
+
vectara_corpus_id=CORPUS_ID,
|
37 |
+
vectara_api_key=API_KEY
|
38 |
+
)
|
39 |
+
return vectara
|
40 |
+
|
41 |
+
vectara_client = initialize_vectara()
|
42 |
+
|
43 |
+
# Function to get knowledge content from Vectara
|
44 |
+
def get_knowledge_content(vectara, query, threshold=0.5):
|
45 |
+
found_docs = vectara.similarity_search_with_score(
|
46 |
+
query,
|
47 |
+
score_threshold=threshold,
|
48 |
+
)
|
49 |
+
knowledge_content = ""
|
50 |
+
for number, (score, doc) in enumerate(found_docs):
|
51 |
+
knowledge_content += f"Document {number}: {found_docs[number][0].page_content}\n"
|
52 |
+
return knowledge_content
|
53 |
+
|
54 |
+
# Prompt and response setup
|
55 |
+
prompt = PromptTemplate.from_template(
|
56 |
+
"""You are a professional and friendly Legal Consultant and you are helping a client with a legal issue. The client is asking you for advice on a legal issue. Just explain him in detail the answer and nothing else. This is the issue: {issue}
|
57 |
+
To assist him with his issue, you need to know the following information: {knowledge}
|
58 |
+
"""
|
59 |
+
)
|
60 |
+
runnable = prompt | ChatOpenAI(streaming=True, callbacks=[StreamingStdOutCallbackHandler()], openai_api_key=OPENAI_API_KEY) | StrOutputParser()
|
61 |
+
|
62 |
+
# Main Streamlit App
|
63 |
+
st.title("Legal Consultation Chat")
|
64 |
+
|
65 |
+
# Initialize chat history
|
66 |
+
if "messages" not in st.session_state:
|
67 |
+
st.session_state.messages = []
|
68 |
+
|
69 |
+
# Display chat messages from history on app rerun
|
70 |
+
for message in st.session_state.messages:
|
71 |
+
with st.chat_message(message["role"]):
|
72 |
+
st.markdown(message["content"])
|
73 |
+
|
74 |
+
# Accept user input and run the main chat interaction
|
75 |
+
if user_input := st.chat_input("Enter your issue:"):
|
76 |
+
st.session_state.messages.append({"role": "user", "content": user_input})
|
77 |
+
with st.chat_message("user"):
|
78 |
+
st.markdown(user_input)
|
79 |
+
|
80 |
+
knowledge_content = get_knowledge_content(vectara_client, user_input)
|
81 |
+
print("__________________ Start of knowledge content __________________")
|
82 |
+
print(knowledge_content)
|
83 |
+
response = runnable.invoke({"knowledge": knowledge_content, "issue": user_input})
|
84 |
+
|
85 |
+
response_words = response.split()
|
86 |
+
with st.chat_message("assistant"):
|
87 |
+
message_placeholder = st.empty()
|
88 |
+
full_response = ""
|
89 |
+
for word in response_words:
|
90 |
+
full_response += word + " "
|
91 |
+
time.sleep(0.05)
|
92 |
+
message_placeholder.markdown(full_response + "▌")
|
93 |
+
message_placeholder.markdown(full_response)
|
94 |
+
|
95 |
+
st.session_state.messages.append({"role": "assistant", "content": full_response})
|
96 |
+
|
97 |
+
# Run when the submit button is pressed
|
98 |
+
if submit_button and uploaded_file:
|
99 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmpfile:
|
100 |
+
tmpfile.write(uploaded_file.getvalue())
|
101 |
+
tmp_filename = tmpfile.name
|
102 |
+
|
103 |
+
try:
|
104 |
+
vectara_client.add_files([tmp_filename])
|
105 |
+
st.sidebar.success("PDF file successfully uploaded to Vectara!")
|
106 |
+
except Exception as e:
|
107 |
+
st.sidebar.error(f"An error occurred: {str(e)}")
|
108 |
+
finally:
|
109 |
+
os.remove(tmp_filename) # Clean up temporary file
|
requirements.txt
ADDED
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiohttp==3.8.6
|
2 |
+
aiosignal==1.3.1
|
3 |
+
altair==5.1.2
|
4 |
+
annotated-types==0.6.0
|
5 |
+
anyio==3.7.1
|
6 |
+
appnope==0.1.3
|
7 |
+
asttokens==2.4.1
|
8 |
+
async-timeout==4.0.3
|
9 |
+
attrs==23.1.0
|
10 |
+
backoff==2.2.1
|
11 |
+
beautifulsoup4==4.12.2
|
12 |
+
blinker==1.7.0
|
13 |
+
cachetools==5.3.2
|
14 |
+
certifi==2023.7.22
|
15 |
+
chardet==5.2.0
|
16 |
+
charset-normalizer==3.3.2
|
17 |
+
click==8.1.7
|
18 |
+
comm==0.1.4
|
19 |
+
dataclasses-json==0.6.1
|
20 |
+
debugpy==1.8.0
|
21 |
+
decorator==5.1.1
|
22 |
+
emoji==2.8.0
|
23 |
+
exceptiongroup==1.1.3
|
24 |
+
executing==2.0.1
|
25 |
+
filetype==1.2.0
|
26 |
+
frozenlist==1.4.0
|
27 |
+
gitdb==4.0.11
|
28 |
+
GitPython==3.1.40
|
29 |
+
idna==3.4
|
30 |
+
importlib-metadata==6.8.0
|
31 |
+
ipykernel==6.26.0
|
32 |
+
ipython==8.17.2
|
33 |
+
jedi==0.19.1
|
34 |
+
Jinja2==3.1.2
|
35 |
+
joblib==1.3.2
|
36 |
+
jq==1.6.0
|
37 |
+
jsonpatch==1.33
|
38 |
+
jsonpointer==2.4
|
39 |
+
jsonschema==4.19.2
|
40 |
+
jsonschema-specifications==2023.7.1
|
41 |
+
jupyter_client==8.5.0
|
42 |
+
jupyter_core==5.5.0
|
43 |
+
langchain==0.0.327
|
44 |
+
langdetect==1.0.9
|
45 |
+
langsmith==0.0.56
|
46 |
+
lxml==4.9.3
|
47 |
+
markdown-it-py==3.0.0
|
48 |
+
MarkupSafe==2.1.3
|
49 |
+
marshmallow==3.20.1
|
50 |
+
matplotlib-inline==0.1.6
|
51 |
+
mdurl==0.1.2
|
52 |
+
multidict==6.0.4
|
53 |
+
mypy-extensions==1.0.0
|
54 |
+
nest-asyncio==1.5.8
|
55 |
+
nltk==3.8.1
|
56 |
+
numpy==1.26.1
|
57 |
+
openai==0.28.1
|
58 |
+
packaging==23.2
|
59 |
+
pandas==2.1.2
|
60 |
+
parso==0.8.3
|
61 |
+
pexpect==4.8.0
|
62 |
+
Pillow==10.1.0
|
63 |
+
platformdirs==3.11.0
|
64 |
+
prompt-toolkit==3.0.39
|
65 |
+
protobuf==4.25.0
|
66 |
+
psutil==5.9.6
|
67 |
+
ptyprocess==0.7.0
|
68 |
+
pure-eval==0.2.2
|
69 |
+
pyarrow==14.0.0
|
70 |
+
pydantic==2.4.2
|
71 |
+
pydantic_core==2.10.1
|
72 |
+
pydeck==0.8.1b0
|
73 |
+
Pygments==2.16.1
|
74 |
+
pypdf==3.17.0
|
75 |
+
python-dateutil==2.8.2
|
76 |
+
python-docx==1.0.1
|
77 |
+
python-dotenv==1.0.0
|
78 |
+
python-iso639==2023.6.15
|
79 |
+
python-magic==0.4.27
|
80 |
+
pytz==2023.3.post1
|
81 |
+
PyYAML==6.0.1
|
82 |
+
pyzmq==25.1.1
|
83 |
+
rapidfuzz==3.5.1
|
84 |
+
referencing==0.30.2
|
85 |
+
regex==2023.10.3
|
86 |
+
requests==2.31.0
|
87 |
+
rich==13.6.0
|
88 |
+
rpds-py==0.10.6
|
89 |
+
six==1.16.0
|
90 |
+
smmap==5.0.1
|
91 |
+
sniffio==1.3.0
|
92 |
+
soupsieve==2.5
|
93 |
+
SQLAlchemy==2.0.22
|
94 |
+
stack-data==0.6.3
|
95 |
+
streamlit==1.28.0
|
96 |
+
tabulate==0.9.0
|
97 |
+
tenacity==8.2.3
|
98 |
+
toml==0.10.2
|
99 |
+
toolz==0.12.0
|
100 |
+
tornado==6.3.3
|
101 |
+
tqdm==4.66.1
|
102 |
+
traitlets==5.13.0
|
103 |
+
typing-inspect==0.9.0
|
104 |
+
typing_extensions==4.8.0
|
105 |
+
tzdata==2023.3
|
106 |
+
tzlocal==5.2
|
107 |
+
unstructured==0.10.28
|
108 |
+
urllib3==2.0.7
|
109 |
+
validators==0.22.0
|
110 |
+
watchdog==3.0.0
|
111 |
+
wcwidth==0.2.9
|
112 |
+
yarl==1.9.2
|
113 |
+
zipp==3.17.0
|