Spaces:
Sleeping
Sleeping
copy original pipeline code
Browse files- app.py +51 -0
- langchain_pipeline.py +56 -0
- requirements.txt +8 -0
app.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from langchain_pipeline import pipeline
|
3 |
+
|
4 |
+
st.title("Composure AI")
|
5 |
+
|
6 |
+
uploaded_file = st.file_uploader("Choose a file")
|
7 |
+
if uploaded_file is not None:
|
8 |
+
# To read file as bytes:
|
9 |
+
|
10 |
+
diff = ""
|
11 |
+
with st.spinner('Please wait ...'):
|
12 |
+
try:
|
13 |
+
diff = pipeline(uploaded_file)
|
14 |
+
except Exception as e:
|
15 |
+
st.exception(e)
|
16 |
+
|
17 |
+
diff_lines = diff.split("\n")
|
18 |
+
|
19 |
+
# Use HTML and CSS to style the diff lines
|
20 |
+
styled_diff = """
|
21 |
+
<style>
|
22 |
+
body {
|
23 |
+
font-family: 'Times New Roman', serif;
|
24 |
+
line-height: 1.5;
|
25 |
+
}
|
26 |
+
.diff {
|
27 |
+
margin: 10px 0;
|
28 |
+
padding: 5px;
|
29 |
+
}
|
30 |
+
.add {
|
31 |
+
color: green;
|
32 |
+
}
|
33 |
+
.remove {
|
34 |
+
color: red;
|
35 |
+
}
|
36 |
+
</style>
|
37 |
+
<div>
|
38 |
+
"""
|
39 |
+
for line in diff_lines:
|
40 |
+
if line.startswith('+'):
|
41 |
+
styled_diff += f'<div class="diff add">{line}</div>'
|
42 |
+
elif line.startswith('-'):
|
43 |
+
styled_diff += f'<div class="diff remove">{line}</div>'
|
44 |
+
else:
|
45 |
+
styled_diff += f'<div class="diff">{line}</div>'
|
46 |
+
styled_diff += "</div>"
|
47 |
+
|
48 |
+
# Display styled diff
|
49 |
+
st.markdown(styled_diff, unsafe_allow_html=True)
|
50 |
+
|
51 |
+
st.markdown("The key changes are:")
|
langchain_pipeline.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from pdfminer import high_level
|
3 |
+
|
4 |
+
from langchain_astradb import AstraDBVectorStore
|
5 |
+
from langchain_core.prompts import PromptTemplate
|
6 |
+
from langchain_openai import OpenAIEmbeddings
|
7 |
+
from langchain_anthropic import ChatAnthropic
|
8 |
+
|
9 |
+
ASTRA_DB_API_ENDPOINT = os.environ["ASTRA_DB_API_ENDPOINT"]
|
10 |
+
ASTRA_DB_APPLICATION_TOKEN = os.environ["ASTRA_DB_APPLICATION_TOKEN"]
|
11 |
+
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
|
12 |
+
ANTHROPIC_API_KEY = os.environ["ANTHROPIC_API_KEY"]
|
13 |
+
|
14 |
+
collection_name = "ilj_test"
|
15 |
+
|
16 |
+
embedding = OpenAIEmbeddings(model="text-embedding-ada-002")
|
17 |
+
model = ChatAnthropic(model='claude-3-sonnet-20240229')
|
18 |
+
|
19 |
+
def pipeline(bytes):
|
20 |
+
disclosure_text = high_level.extract_text(bytes)
|
21 |
+
# disclosure_text = doc[0].page_content
|
22 |
+
#
|
23 |
+
astra = AstraDBVectorStore(
|
24 |
+
api_endpoint=ASTRA_DB_API_ENDPOINT,
|
25 |
+
token=ASTRA_DB_APPLICATION_TOKEN,
|
26 |
+
collection_name=collection_name,
|
27 |
+
embedding=embedding
|
28 |
+
)
|
29 |
+
|
30 |
+
related_docs = astra.search(disclosure_text, search_type="similarity")
|
31 |
+
|
32 |
+
prompt = PromptTemplate.from_template(
|
33 |
+
"""
|
34 |
+
law context:
|
35 |
+
|
36 |
+
{context}
|
37 |
+
|
38 |
+
end of law context
|
39 |
+
=====
|
40 |
+
disclosure:
|
41 |
+
|
42 |
+
{disclosure}
|
43 |
+
|
44 |
+
end of disclosure
|
45 |
+
===
|
46 |
+
Given the context above, how would a very good regulatory attorney edit the disclosure above?
|
47 |
+
Please provide only changes to the text in git diff format if it contradicts any laws or rules or does not provide enough clarity based on the spirit of the law.
|
48 |
+
If you don't know or unsure, say you don't know or that you're not sure.
|
49 |
+
Be as thorough as possible.
|
50 |
+
""",
|
51 |
+
)
|
52 |
+
val = prompt.format(context=related_docs, disclosure={disclosure_text})
|
53 |
+
|
54 |
+
chat_response = model.invoke(input=val)
|
55 |
+
|
56 |
+
return chat_response.content
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
pdfminer.six
|
3 |
+
langchain
|
4 |
+
langchain_community
|
5 |
+
langchain_astradb
|
6 |
+
langchain_core
|
7 |
+
langchain_openai
|
8 |
+
langchain_anthropic
|