Spaces:

el-camino-de-santiago
/

model-pick

Sleeping

App Files Files Community

ilj commited on May 28

Commit

c16c548

•

1 Parent(s): fcea2e9

copy original pipeline code

Browse files

Files changed (3) hide show

app.py +51 -0
langchain_pipeline.py +56 -0
requirements.txt +8 -0

app.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import streamlit as st
+from langchain_pipeline import pipeline
+st.title("Composure AI")
+uploaded_file = st.file_uploader("Choose a file")
+if uploaded_file is not None:
+    # To read file as bytes:
+    diff = ""
+    with st.spinner('Please wait ...'):
+        try:
+            diff = pipeline(uploaded_file)
+        except Exception as e:
+            st.exception(e)
+    diff_lines = diff.split("\n")
+    # Use HTML and CSS to style the diff lines
+    styled_diff = """
+    <style>
+    body {
+        font-family: 'Times New Roman', serif;
+        line-height: 1.5;
+    }
+    .diff {
+        margin: 10px 0;
+        padding: 5px;
+    }
+    .add {
+        color: green;
+    }
+    .remove {
+        color: red;
+    }
+    </style>
+    <div>
+    """
+    for line in diff_lines:
+        if line.startswith('+'):
+            styled_diff += f'<div class="diff add">{line}</div>'
+        elif line.startswith('-'):
+            styled_diff += f'<div class="diff remove">{line}</div>'
+        else:
+            styled_diff += f'<div class="diff">{line}</div>'
+    styled_diff += "</div>"
+    # Display styled diff
+    st.markdown(styled_diff, unsafe_allow_html=True)
+st.markdown("The key changes are:")

langchain_pipeline.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import os
+from pdfminer import high_level
+from langchain_astradb import AstraDBVectorStore
+from langchain_core.prompts import PromptTemplate
+from langchain_openai import OpenAIEmbeddings
+from langchain_anthropic import ChatAnthropic
+ASTRA_DB_API_ENDPOINT = os.environ["ASTRA_DB_API_ENDPOINT"]
+ASTRA_DB_APPLICATION_TOKEN = os.environ["ASTRA_DB_APPLICATION_TOKEN"]
+OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
+ANTHROPIC_API_KEY = os.environ["ANTHROPIC_API_KEY"]
+collection_name = "ilj_test"
+embedding = OpenAIEmbeddings(model="text-embedding-ada-002")
+model = ChatAnthropic(model='claude-3-sonnet-20240229')
+def pipeline(bytes):
+    disclosure_text = high_level.extract_text(bytes)
+#     disclosure_text = doc[0].page_content
+#
+    astra = AstraDBVectorStore(
+        api_endpoint=ASTRA_DB_API_ENDPOINT,
+        token=ASTRA_DB_APPLICATION_TOKEN,
+        collection_name=collection_name,
+        embedding=embedding
+    )
+    related_docs = astra.search(disclosure_text, search_type="similarity")
+    prompt = PromptTemplate.from_template(
+        """
+    law context:
+    {context}
+    end of law context
+    =====
+    disclosure:
+    {disclosure}
+    end of disclosure
+    ===
+    Given the context above, how would a very good regulatory attorney edit the disclosure above?
+    Please provide only changes to the text in git diff format if it contradicts any laws or rules or does not provide enough clarity based on the spirit of the law.
+    If you don't know or unsure, say you don't know or that you're not sure.
+    Be as thorough as possible.
+    """,
+    )
+    val = prompt.format(context=related_docs, disclosure={disclosure_text})
+    chat_response = model.invoke(input=val)
+    return chat_response.content

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+streamlit
+pdfminer.six
+langchain
+langchain_community
+langchain_astradb
+langchain_core
+langchain_openai
+langchain_anthropic