Sanchayt commited on
Commit
860b718
·
1 Parent(s): 262d812
Files changed (2) hide show
  1. app.py +109 -0
  2. requirements.txt +113 -0
app.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ import time
4
+
5
+ import streamlit as st
6
+ from dotenv import load_dotenv
7
+ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
8
+ from langchain.chat_models import ChatOpenAI
9
+ from langchain.prompts import PromptTemplate
10
+ from langchain.schema import StrOutputParser
11
+ from langchain.vectorstores import Vectara
12
+
13
+ # Load environment variables from .env file
14
+ load_dotenv()
15
+
16
+ # Sidebar for PDF upload and API keys
17
+ with st.sidebar:
18
+ st.header("Configuration")
19
+ uploaded_file = st.file_uploader("Choose a PDF file", type=["pdf"])
20
+ customer_id = st.text_input("Vectara Customer ID", value=os.getenv("CUSTOMER_ID", ""))
21
+ api_key = st.text_input("Vectara API Key", value=os.getenv("API_KEY", ""))
22
+ corpus_id = st.text_input("Vectara Corpus ID", value=str(os.getenv("CORPUS_ID", "")))
23
+ openai_api_key = st.text_input("OpenAI API Key", value=os.getenv("OPENAI_API_KEY", ""))
24
+ submit_button = st.button("Submit")
25
+
26
+ # Constants
27
+ CUSTOMER_ID = customer_id if customer_id else os.getenv("CUSTOMER_ID")
28
+ API_KEY = api_key if api_key else os.getenv("API_KEY")
29
+ CORPUS_ID = int(corpus_id) if corpus_id else int(os.getenv("CORPUS_ID", 0)) # Assuming CORPUS_ID should be an integer
30
+ OPENAI_API_KEY = openai_api_key if openai_api_key else os.getenv("OPENAI_API_KEY")
31
+
32
+ # Initialize Vectara
33
+ def initialize_vectara():
34
+ vectara = Vectara(
35
+ vectara_customer_id=CUSTOMER_ID,
36
+ vectara_corpus_id=CORPUS_ID,
37
+ vectara_api_key=API_KEY
38
+ )
39
+ return vectara
40
+
41
+ vectara_client = initialize_vectara()
42
+
43
+ # Function to get knowledge content from Vectara
44
+ def get_knowledge_content(vectara, query, threshold=0.5):
45
+ found_docs = vectara.similarity_search_with_score(
46
+ query,
47
+ score_threshold=threshold,
48
+ )
49
+ knowledge_content = ""
50
+ for number, (score, doc) in enumerate(found_docs):
51
+ knowledge_content += f"Document {number}: {found_docs[number][0].page_content}\n"
52
+ return knowledge_content
53
+
54
+ # Prompt and response setup
55
+ prompt = PromptTemplate.from_template(
56
+ """You are a professional and friendly Legal Consultant and you are helping a client with a legal issue. The client is asking you for advice on a legal issue. Just explain him in detail the answer and nothing else. This is the issue: {issue}
57
+ To assist him with his issue, you need to know the following information: {knowledge}
58
+ """
59
+ )
60
+ runnable = prompt | ChatOpenAI(streaming=True, callbacks=[StreamingStdOutCallbackHandler()], openai_api_key=OPENAI_API_KEY) | StrOutputParser()
61
+
62
+ # Main Streamlit App
63
+ st.title("Legal Consultation Chat")
64
+
65
+ # Initialize chat history
66
+ if "messages" not in st.session_state:
67
+ st.session_state.messages = []
68
+
69
+ # Display chat messages from history on app rerun
70
+ for message in st.session_state.messages:
71
+ with st.chat_message(message["role"]):
72
+ st.markdown(message["content"])
73
+
74
+ # Accept user input and run the main chat interaction
75
+ if user_input := st.chat_input("Enter your issue:"):
76
+ st.session_state.messages.append({"role": "user", "content": user_input})
77
+ with st.chat_message("user"):
78
+ st.markdown(user_input)
79
+
80
+ knowledge_content = get_knowledge_content(vectara_client, user_input)
81
+ print("__________________ Start of knowledge content __________________")
82
+ print(knowledge_content)
83
+ response = runnable.invoke({"knowledge": knowledge_content, "issue": user_input})
84
+
85
+ response_words = response.split()
86
+ with st.chat_message("assistant"):
87
+ message_placeholder = st.empty()
88
+ full_response = ""
89
+ for word in response_words:
90
+ full_response += word + " "
91
+ time.sleep(0.05)
92
+ message_placeholder.markdown(full_response + "▌")
93
+ message_placeholder.markdown(full_response)
94
+
95
+ st.session_state.messages.append({"role": "assistant", "content": full_response})
96
+
97
+ # Run when the submit button is pressed
98
+ if submit_button and uploaded_file:
99
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmpfile:
100
+ tmpfile.write(uploaded_file.getvalue())
101
+ tmp_filename = tmpfile.name
102
+
103
+ try:
104
+ vectara_client.add_files([tmp_filename])
105
+ st.sidebar.success("PDF file successfully uploaded to Vectara!")
106
+ except Exception as e:
107
+ st.sidebar.error(f"An error occurred: {str(e)}")
108
+ finally:
109
+ os.remove(tmp_filename) # Clean up temporary file
requirements.txt ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohttp==3.8.6
2
+ aiosignal==1.3.1
3
+ altair==5.1.2
4
+ annotated-types==0.6.0
5
+ anyio==3.7.1
6
+ appnope==0.1.3
7
+ asttokens==2.4.1
8
+ async-timeout==4.0.3
9
+ attrs==23.1.0
10
+ backoff==2.2.1
11
+ beautifulsoup4==4.12.2
12
+ blinker==1.7.0
13
+ cachetools==5.3.2
14
+ certifi==2023.7.22
15
+ chardet==5.2.0
16
+ charset-normalizer==3.3.2
17
+ click==8.1.7
18
+ comm==0.1.4
19
+ dataclasses-json==0.6.1
20
+ debugpy==1.8.0
21
+ decorator==5.1.1
22
+ emoji==2.8.0
23
+ exceptiongroup==1.1.3
24
+ executing==2.0.1
25
+ filetype==1.2.0
26
+ frozenlist==1.4.0
27
+ gitdb==4.0.11
28
+ GitPython==3.1.40
29
+ idna==3.4
30
+ importlib-metadata==6.8.0
31
+ ipykernel==6.26.0
32
+ ipython==8.17.2
33
+ jedi==0.19.1
34
+ Jinja2==3.1.2
35
+ joblib==1.3.2
36
+ jq==1.6.0
37
+ jsonpatch==1.33
38
+ jsonpointer==2.4
39
+ jsonschema==4.19.2
40
+ jsonschema-specifications==2023.7.1
41
+ jupyter_client==8.5.0
42
+ jupyter_core==5.5.0
43
+ langchain==0.0.327
44
+ langdetect==1.0.9
45
+ langsmith==0.0.56
46
+ lxml==4.9.3
47
+ markdown-it-py==3.0.0
48
+ MarkupSafe==2.1.3
49
+ marshmallow==3.20.1
50
+ matplotlib-inline==0.1.6
51
+ mdurl==0.1.2
52
+ multidict==6.0.4
53
+ mypy-extensions==1.0.0
54
+ nest-asyncio==1.5.8
55
+ nltk==3.8.1
56
+ numpy==1.26.1
57
+ openai==0.28.1
58
+ packaging==23.2
59
+ pandas==2.1.2
60
+ parso==0.8.3
61
+ pexpect==4.8.0
62
+ Pillow==10.1.0
63
+ platformdirs==3.11.0
64
+ prompt-toolkit==3.0.39
65
+ protobuf==4.25.0
66
+ psutil==5.9.6
67
+ ptyprocess==0.7.0
68
+ pure-eval==0.2.2
69
+ pyarrow==14.0.0
70
+ pydantic==2.4.2
71
+ pydantic_core==2.10.1
72
+ pydeck==0.8.1b0
73
+ Pygments==2.16.1
74
+ pypdf==3.17.0
75
+ python-dateutil==2.8.2
76
+ python-docx==1.0.1
77
+ python-dotenv==1.0.0
78
+ python-iso639==2023.6.15
79
+ python-magic==0.4.27
80
+ pytz==2023.3.post1
81
+ PyYAML==6.0.1
82
+ pyzmq==25.1.1
83
+ rapidfuzz==3.5.1
84
+ referencing==0.30.2
85
+ regex==2023.10.3
86
+ requests==2.31.0
87
+ rich==13.6.0
88
+ rpds-py==0.10.6
89
+ six==1.16.0
90
+ smmap==5.0.1
91
+ sniffio==1.3.0
92
+ soupsieve==2.5
93
+ SQLAlchemy==2.0.22
94
+ stack-data==0.6.3
95
+ streamlit==1.28.0
96
+ tabulate==0.9.0
97
+ tenacity==8.2.3
98
+ toml==0.10.2
99
+ toolz==0.12.0
100
+ tornado==6.3.3
101
+ tqdm==4.66.1
102
+ traitlets==5.13.0
103
+ typing-inspect==0.9.0
104
+ typing_extensions==4.8.0
105
+ tzdata==2023.3
106
+ tzlocal==5.2
107
+ unstructured==0.10.28
108
+ urllib3==2.0.7
109
+ validators==0.22.0
110
+ watchdog==3.0.0
111
+ wcwidth==0.2.9
112
+ yarl==1.9.2
113
+ zipp==3.17.0