LIDA2_csv

Sleeping

App Files Files Community

Anne31415 commited on Dec 18, 2023

Commit

c368a2c

•

1 Parent(s): d709f6f

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -377

app.py CHANGED Viewed

@@ -1,383 +1,69 @@
-import streamlit as st
-from PIL import Image
-import random
-import time
-import streamlit_analytics
 from dotenv import load_dotenv
-import pickle
-from huggingface_hub import Repository
-from PyPDF2 import PdfReader
-from streamlit_extras.add_vertical_space import add_vertical_space
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.embeddings.openai import OpenAIEmbeddings
-from langchain.vectorstores import FAISS
-from langchain.llms import OpenAI
-from langchain.chains.question_answering import load_qa_chain
-from langchain.callbacks import get_openai_callback
 import os
-import pandas as pd
-import pydeck as pdk
-from urllib.error import URLError
-# Initialize session state variables
-if 'chat_history_page1' not in st.session_state:
-    st.session_state['chat_history_page1'] = []
-if 'chat_history_page2' not in st.session_state:
-    st.session_state['chat_history_page2'] = []
-# Step 1: Clone the Dataset Repository
-repo = Repository(
-    local_dir="Private_Book",  # Local directory to clone the repository
-    repo_type="dataset",  # Specify that this is a dataset repository
-    clone_from="Anne31415/Private_Book",  # Replace with your repository URL
-    token=os.environ["HUB_TOKEN"]  # Use the secret token to authenticate
-)
-repo.git_pull()  # Pull the latest changes (if any)
-# Step 2: Load the PDF File
-pdf_path = "Private_Book/18122023_KOMBI.pdf"  # Replace with your PDF file path
-# Step 2: Load the PDF File
-pdf_path2 = "Private_Book/Deutsche_Kodierrichtlinien_23.pdf"  # Replace with your PDF file path
-api_key = os.getenv("OPENAI_API_KEY")
-# Retrieve the API key from st.secrets
-# Updated caching mechanism using st.cache_data
-@st.cache_data(persist="disk")  # Using persist="disk" to save cache across sessions
-def load_vector_store(file_path, store_name, force_reload=False):
-        # Check if we need to force reload the vector store (e.g., when the PDF changes)
-        if force_reload or not os.path.exists(f"{store_name}.pkl"):
-            text_splitter = RecursiveCharacterTextSplitter(
-                chunk_size=1000,
-                chunk_overlap=200,
-                length_function=len
-            )
-            text = load_pdf_text(file_path)
-            chunks = text_splitter.split_text(text=text)
-            embeddings = OpenAIEmbeddings()
-            VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
-            VectorStore.save_local("faiss_store")
-            FAISS.load_local("faiss_store", OpenAIEmbeddings())
-            with open(f"{store_name}.pkl", "wb") as f:
-                pickle.dump(VectorStore, f)
-        else:
-            with open(f"{store_name}.pkl", "rb") as f:
-                VectorStore = pickle.load(f)
-        return VectorStore
-# Utility function to load text from a PDF
-def load_pdf_text(file_path):
-    pdf_reader = PdfReader(file_path)
-    text = ""
-    for page in pdf_reader.pages:
-        text += page.extract_text() or ""  # Add fallback for pages where text extraction fails
-    return text
-def load_chatbot():
-    #return load_qa_chain(llm=OpenAI(), chain_type="stuff")
-    return load_qa_chain(llm=OpenAI(model_name="gpt-3.5-turbo-instruct"), chain_type="stuff")
-def display_chat_history(chat_history):
-    for chat in chat_history:
-        background_color = "#ffeecf" if chat[2] == "new" else "#ffeecf" if chat[0] == "User" else "#ffeecf"
-        st.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True)
-def handle_no_answer(response):
-    no_answer_phrases = [
-        "ich weiß es nicht",
-        "ich weiß nicht",
-        "ich bin mir nicht sicher",
-        "es wird nicht erwähnt",
-        "Leider kann ich diese Frage nicht beantworten",
-        "kann ich diese Frage nicht beantworten",
-        "ich kann diese Frage nicht beantworten",
-        "ich kann diese Frage leider nicht beantworten",
-        "keine information",
-        "das ist unklar",
-        "da habe ich keine antwort",
-        "das kann ich nicht beantworten",
-        "i don't know",
-        "i am not sure",
-        "it is not mentioned",
-        "no information",
-        "that is unclear",
-        "i have no answer",
-        "i cannot answer that",
-        "unable to provide an answer",
-        "not enough context",
-    ]
-    alternative_responses = [
-        "Hmm, das ist eine knifflige Frage. Lass uns das gemeinsam erkunden. Kannst du mehr Details geben?",
-        "Interessante Frage! Ich bin mir nicht sicher, aber wir können es herausfinden. Hast du weitere Informationen?",
-        "Das ist eine gute Frage. Ich habe momentan keine Antwort darauf, aber vielleicht kannst du sie anders formulieren?",
-        "Da bin ich überfragt. Kannst du die Frage anders stellen oder mir mehr Kontext geben?",
-        "Ich stehe hier etwas auf dem Schlauch. Gibt es noch andere Aspekte der Frage, die wir betrachten könnten?",
-        # Add more alternative responses as needed
-    ]
-    # Check if response matches any phrase in no_answer_phrases
-    if any(phrase in response.lower() for phrase in no_answer_phrases):
-        return random.choice(alternative_responses)  # Randomly select a response
-    return response
-def page1():
-    try:
-        hide_streamlit_style = """
-                <style>
-                #MainMenu {visibility: hidden;}
-                footer {visibility: hidden;}
-                </style>
-                """
-        st.markdown(hide_streamlit_style, unsafe_allow_html=True)
-         # Create columns for layout
-        col1, col2 = st.columns([3, 1])  # Adjust the ratio to your liking
-        with col1:
-            st.title("Welcome to BinDocs AI!")
-        with col2:
-            # Load and display the image in the right column, which will be the top-right corner of the page
-            image = Image.open('BinDoc Logo (Quadratisch).png')
-            st.image(image, use_column_width='always')
-        # Start tracking user interactions
-        with streamlit_analytics.track():
-            if not os.path.exists(pdf_path):
-                st.error("File not found. Please check the file path.")
-                return
-            VectorStore = load_vector_store(pdf_path, "vector_store_page1", force_reload=False)
-            display_chat_history(st.session_state['chat_history_page1'])
-            st.write("<!-- Start Spacer -->", unsafe_allow_html=True)
-            st.write("<div style='flex: 1;'></div>", unsafe_allow_html=True)
-            st.write("<!-- End Spacer -->", unsafe_allow_html=True)
-            new_messages_placeholder = st.empty()
-            query = st.text_input("Geben Sie hier Ihre Frage ein / Enter your question here:")
-            add_vertical_space(2)  # Adjust as per the desired spacing
-            # Create two columns for the buttons
-            col1, col2 = st.columns(2)
-            with col1:
-                if st.button("Was kann ich mit dem Prognose-Analyse-Tool machen?"):
-                    query = "Was kann ich mit dem Prognose-Analyse-Tool machen?"
-                if st.button("Was sagt mir die Farbe der Balken der Bevölkerungsentwicklung?"):
-                    query = "Was sagt mir die Farbe der Balken der Bevölkerungsentwicklung?"
-                if st.button("Ich habe mein Meta-Password vergessen, wie kann ich es zurücksetzen?"):
-                    query = "Ich habe mein Meta-Password vergessen, wie kann ich es zurücksetzen?"
-            with col2:
-                if st.button("Dies ist eine reine Test Frage, welche aber eine ausreichende Länge hat."):
-                    query = "Dies ist eine reine Test Frage, welche aber eine ausreichende Länge hat."
-                if st.button("Was sagt mir denn generell die wundervolle Bevölkerungsentwicklung?"):
-                    query = "Was sagt mir denn generell die wundervolle Bevölkerungsentwicklung?"
-                if st.button("Ob ich hier wohl viel schreibe, dass die Fragen vom Layout her passen?"):
-                    query = "Ob ich hier wohl viel schreibe, dass die Fragen vom Layout her passen?"
-            if query:
-                st.session_state['chat_history_page1'].append(("User", query, "new"))
-                # Start timing
-                start_time = time.time()
-                with st.spinner('Bot is thinking...'):
-                    chain = load_chatbot()
-                    docs = VectorStore.similarity_search(query=query, k=3)
-                    with get_openai_callback() as cb:
-                        response = chain.run(input_documents=docs, question=query)
-                        response = handle_no_answer(response)  # Process the response through the new function
-                # Stop timing
-                end_time = time.time()
-                # Calculate duration
-                duration = end_time - start_time
-                # You can use Streamlit's text function to display the timing
-                st.text(f"Response time: {duration:.2f} seconds")
-                st.session_state['chat_history_page1'].append(("Bot", response, "new"))
-                # Display new messages at the bottom
-                new_messages = st.session_state['chat_history_page1'][-2:]
-                for chat in new_messages:
-                    background_color = "#ffeecf" if chat[2] == "new" else "#ffeecf" if chat[0] == "User" else "#ffeecf"
-                    new_messages_placeholder.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True)
-                # Clear the input field after the query is made
-                query = ""
-            # Mark all messages as old after displaying
-            st.session_state['chat_history_page1'] = [(sender, msg, "old") for sender, msg, _ in st.session_state['chat_history_page1']]
-    except Exception as e:
-        st.error(f"Upsi, an unexpected error occurred: {e}")
-        # Optionally log the exception details to a file or error tracking service
-def page2():
-    try:
-        hide_streamlit_style = """
-                <style>
-                #MainMenu {visibility: hidden;}
-                footer {visibility: hidden;}
-                </style>
-                """
-        st.markdown(hide_streamlit_style, unsafe_allow_html=True)
-         # Create columns for layout
-        col1, col2 = st.columns([3, 1])  # Adjust the ratio to your liking
-        with col1:
-            st.title("Kodieren statt Frustrieren!")
-        with col2:
-            # Load and display the image in the right column, which will be the top-right corner of the page
-            image = Image.open('BinDoc Logo (Quadratisch).png')
-            st.image(image, use_column_width='always')
-        # Start tracking user interactions
-        with streamlit_analytics.track():
-            if not os.path.exists(pdf_path2):
-                st.error("File not found. Please check the file path.")
-                return
-            VectorStore = load_vector_store(pdf_path2, "vector_store_page2", force_reload=False)
-            display_chat_history(st.session_state['chat_history_page2'])
-            st.write("<!-- Start Spacer -->", unsafe_allow_html=True)
-            st.write("<div style='flex: 1;'></div>", unsafe_allow_html=True)
-            st.write("<!-- End Spacer -->", unsafe_allow_html=True)
-            new_messages_placeholder = st.empty()
-            query = st.text_input("Ask questions about your PDF file (in any preferred language):")
-            add_vertical_space(2)  # Adjust as per the desired spacing
-            # Create two columns for the buttons
-            col1, col2 = st.columns(2)
-            with col1:
-                if st.button("Wann kodiere ich etwas als Hauptdiagnose und wann als Nebendiagnose?"):
-                    query = "Wann kodiere ich etwas als Hauptdiagnose und wann als Nebendiagnose?"
-                if st.button("Ein Patient wird mit Aszites bei bekannter Leberzirrhose stationär aufgenommen. Es wird nur der Aszites durch eine Punktion behandelt.Wie kodiere ich das?"):
-                    query = ("Ein Patient wird mit Aszites bei bekannter Leberzirrhose stationär aufgenommen. Es wird nur der Aszites durch eine Punktion behandelt.Wie kodiere ich das?")
-                if st.button("Hauptdiagnose: Hirntumor wie kodiere ich das?"):
-                    query = "Hauptdiagnose: Hirntumor wie kodiere ich das?"
-            with col2:
-                if st.button("Welche Prozeduren werden normalerweise nicht verschlüsselt?"):
-                    query = "Welche Prozeduren werden normalerweise nicht verschlüsselt?"
-                if st.button("Was muss ich bei der Kodierung der Folgezusänden von Krankheiten beachten?"):
-                    query = "Was muss ich bei der Kodierung der Folgezusänden von Krankheiten beachten?"
-                if st.button("Was mache ich bei einer Verdachtsdiagnose, wenn mein Patien nach Hause entlassen wird?"):
-                    query = "Was mache ich bei einer Verdachtsdiagnose, wenn mein Patien nach Hause entlassen wird?"
-            if query:
-                st.session_state['chat_history_page2'].append(("User", query, "new"))
-                # Start timing
-                start_time = time.time()
-                with st.spinner('Bot is thinking...'):
-                    chain = load_chatbot()
-                    docs = VectorStore.similarity_search(query=query, k=3)
-                    with get_openai_callback() as cb:
-                        response = chain.run(input_documents=docs, question=query)
-                        response = handle_no_answer(response)  # Process the response through the new function
-                # Stop timing
-                end_time = time.time()
-                # Calculate duration
-                duration = end_time - start_time
-                # You can use Streamlit's text function to display the timing
-                st.text(f"Response time: {duration:.2f} seconds")
-                st.session_state['chat_history_page2'].append(("Bot", response, "new"))
-                # Display new messages at the bottom
-                new_messages = st.session_state['chat_history_page2'][-2:]
-                for chat in new_messages:
-                    background_color = "#ffeecf" if chat[2] == "new" else "#ffeecf" if chat[0] == "User" else "#ffeecf"
-                    new_messages_placeholder.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True)
-                # Clear the input field after the query is made
-                query = ""
-            # Mark all messages as old after displaying
-            st.session_state['chat_history_page2'] = [(sender, msg, "old") for sender, msg, _ in st.session_state['chat_history_page2']]
-    except Exception as e:
-        st.error(f"Upsi, an unexpected error occurred: {e}")
-        # Optionally log the exception details to a file or error tracking service
-def main():
-    # Sidebar content
-    with st.sidebar:
-        st.title('BinDoc GmbH')
-        st.markdown("Experience revolutionary interaction with BinDocs Chat App, leveraging state-of-the-art AI technology.")
-        add_vertical_space(1)
-        page = st.sidebar.selectbox("Choose a page", ["Document Analysis Bot", "Coding Assistance Bot"])
-        add_vertical_space(1)
-        st.write('Made with ❤️ by BinDoc GmbH')
-    # Main area content based on page selection
-    if page == "Document Analysis Bot":
-        page1()
-    elif page == "Coding Assistance Bot":
-        page2()
-if __name__ == "__main__":
-    main()

+import streamlit as st
+from lida import Manager, TextGenerationConfig , llm
 from dotenv import load_dotenv
 import os
+import openai
+from PIL import Image
+from io import BytesIO
+import base64
+load_dotenv()
+openai.api_key = os.getenv('OPENAI_API_KEY')
+def base64_to_image(base64_string):
+    # Decode the base64 string
+    byte_data = base64.b64decode(base64_string)
+    # Use BytesIO to convert the byte data to image
+    return Image.open(BytesIO(byte_data))
+lida = Manager(text_gen = llm("openai"))
+textgen_config = TextGenerationConfig(n=1, temperature=0.5, model="gpt-3.5-turbo-0301", use_cache=True)
+menu = st.sidebar.selectbox("Choose an Option", ["Summarize", "Question based Graph"])
+if menu == "Summarize":
+    st.subheader("Summarization of your Data")
+    file_uploader = st.file_uploader("Upload your CSV", type="csv")
+    if file_uploader is not None:
+        path_to_save = "filename.csv"
+        with open(path_to_save, "wb") as f:
+            f.write(file_uploader.getvalue())
+        summary = lida.summarize("filename.csv", summary_method="default", textgen_config=textgen_config)
+        st.write(summary)
+        goals = lida.goals(summary, n=2, textgen_config=textgen_config)
+        for goal in goals:
+            st.write(goal)
+        i = 0
+        library = "seaborn"
+        textgen_config = TextGenerationConfig(n=1, temperature=0.2, use_cache=True)
+        charts = lida.visualize(summary=summary, goal=goals[i], textgen_config=textgen_config, library=library)
+        img_base64_string = charts[0].raster
+        img = base64_to_image(img_base64_string)
+        st.image(img)
+elif menu == "Question based Graph":
+    st.subheader("Query your Data to Generate Graph")
+    file_uploader = st.file_uploader("Upload your CSV", type="csv")
+    if file_uploader is not None:
+        path_to_save = "filename1.csv"
+        with open(path_to_save, "wb") as f:
+            f.write(file_uploader.getvalue())
+        text_area = st.text_area("Query your Data to Generate Graph", height=200)
+        if st.button("Generate Graph"):
+            if len(text_area) > 0:
+                st.info("Your Query: " + text_area)
+                lida = Manager(text_gen = llm("openai"))
+                textgen_config = TextGenerationConfig(n=1, temperature=0.2, use_cache=True)
+                summary = lida.summarize("filename1.csv", summary_method="default", textgen_config=textgen_config)
+                user_query = text_area
+                charts = lida.visualize(summary=summary, goal=user_query, textgen_config=textgen_config)
+                charts[0]
+                image_base64 = charts[0].raster
+                img = base64_to_image(image_base64)
+                st.image(img)