ilj commited on
Commit
c16c548
1 Parent(s): fcea2e9

copy original pipeline code

Browse files
Files changed (3) hide show
  1. app.py +51 -0
  2. langchain_pipeline.py +56 -0
  3. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain_pipeline import pipeline
3
+
4
+ st.title("Composure AI")
5
+
6
+ uploaded_file = st.file_uploader("Choose a file")
7
+ if uploaded_file is not None:
8
+ # To read file as bytes:
9
+
10
+ diff = ""
11
+ with st.spinner('Please wait ...'):
12
+ try:
13
+ diff = pipeline(uploaded_file)
14
+ except Exception as e:
15
+ st.exception(e)
16
+
17
+ diff_lines = diff.split("\n")
18
+
19
+ # Use HTML and CSS to style the diff lines
20
+ styled_diff = """
21
+ <style>
22
+ body {
23
+ font-family: 'Times New Roman', serif;
24
+ line-height: 1.5;
25
+ }
26
+ .diff {
27
+ margin: 10px 0;
28
+ padding: 5px;
29
+ }
30
+ .add {
31
+ color: green;
32
+ }
33
+ .remove {
34
+ color: red;
35
+ }
36
+ </style>
37
+ <div>
38
+ """
39
+ for line in diff_lines:
40
+ if line.startswith('+'):
41
+ styled_diff += f'<div class="diff add">{line}</div>'
42
+ elif line.startswith('-'):
43
+ styled_diff += f'<div class="diff remove">{line}</div>'
44
+ else:
45
+ styled_diff += f'<div class="diff">{line}</div>'
46
+ styled_diff += "</div>"
47
+
48
+ # Display styled diff
49
+ st.markdown(styled_diff, unsafe_allow_html=True)
50
+
51
+ st.markdown("The key changes are:")
langchain_pipeline.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pdfminer import high_level
3
+
4
+ from langchain_astradb import AstraDBVectorStore
5
+ from langchain_core.prompts import PromptTemplate
6
+ from langchain_openai import OpenAIEmbeddings
7
+ from langchain_anthropic import ChatAnthropic
8
+
9
+ ASTRA_DB_API_ENDPOINT = os.environ["ASTRA_DB_API_ENDPOINT"]
10
+ ASTRA_DB_APPLICATION_TOKEN = os.environ["ASTRA_DB_APPLICATION_TOKEN"]
11
+ OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
12
+ ANTHROPIC_API_KEY = os.environ["ANTHROPIC_API_KEY"]
13
+
14
+ collection_name = "ilj_test"
15
+
16
+ embedding = OpenAIEmbeddings(model="text-embedding-ada-002")
17
+ model = ChatAnthropic(model='claude-3-sonnet-20240229')
18
+
19
+ def pipeline(bytes):
20
+ disclosure_text = high_level.extract_text(bytes)
21
+ # disclosure_text = doc[0].page_content
22
+ #
23
+ astra = AstraDBVectorStore(
24
+ api_endpoint=ASTRA_DB_API_ENDPOINT,
25
+ token=ASTRA_DB_APPLICATION_TOKEN,
26
+ collection_name=collection_name,
27
+ embedding=embedding
28
+ )
29
+
30
+ related_docs = astra.search(disclosure_text, search_type="similarity")
31
+
32
+ prompt = PromptTemplate.from_template(
33
+ """
34
+ law context:
35
+
36
+ {context}
37
+
38
+ end of law context
39
+ =====
40
+ disclosure:
41
+
42
+ {disclosure}
43
+
44
+ end of disclosure
45
+ ===
46
+ Given the context above, how would a very good regulatory attorney edit the disclosure above?
47
+ Please provide only changes to the text in git diff format if it contradicts any laws or rules or does not provide enough clarity based on the spirit of the law.
48
+ If you don't know or unsure, say you don't know or that you're not sure.
49
+ Be as thorough as possible.
50
+ """,
51
+ )
52
+ val = prompt.format(context=related_docs, disclosure={disclosure_text})
53
+
54
+ chat_response = model.invoke(input=val)
55
+
56
+ return chat_response.content
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pdfminer.six
3
+ langchain
4
+ langchain_community
5
+ langchain_astradb
6
+ langchain_core
7
+ langchain_openai
8
+ langchain_anthropic