Spaces:

JorgeAnimalsAI
/

Work-Assistant-App

Sleeping

App Files Files Community

Jorge Aguirregomezcorta commited on Jun 26, 2023

Commit

ea08e05

•

1 Parent(s): 704765c

Clone other repo

Browse files

Files changed (6) hide show

.env.example +1 -0
.gitignore +160 -0
README.md +2 -12
app.py +56 -0
environment.yml +159 -0
logic.py +58 -0

.env.example ADDED Viewed

	@@ -0,0 +1 @@


1	+ OPENAI_API_KEY=

.gitignore ADDED Viewed

	@@ -0,0 +1,160 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/

README.md CHANGED Viewed

@@ -1,12 +1,2 @@
----
-title: Work Assistant App
-emoji: 📊
-colorFrom: green
-colorTo: purple
-sdk: streamlit
-sdk_version: 1.21.0
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


1	+ # Introduction
2	+ This document outlines the Onboarding case plan for the development of a Work Assistant application using ChatGPT API 4.0. The plan spans two weeks, from June 19th to June 30th, 2023.

app.py ADDED Viewed

	@@ -0,0 +1,56 @@

+# Load libraries and dependencies
+from dotenv import load_dotenv
+from logic import get_pdf_text, get_text_chunks, init_embeddings, get_conversation_chain
+import streamlit as st
+# Initialization function
+def init():
+    # Load environmental variables to extract API secrets
+    load_dotenv()
+    # Create and configure streamlit page
+    st.set_page_config(page_title="Work-Assistant App", page_icon="🐮")
+    st.header("Work-Assistant App 🐮")
+    # Initialize session state
+    if "conversation_chain" not in st.session_state:
+        st.session_state.conversation_chain = None
+    if "chat_history" not in st.session_state:
+        st.session_state.chat_history = None
+# Main function
+def main():
+    init()
+    # UI main layout
+    user_input = st.text_input("Ask a question:")
+    if user_input:
+        # Obtain response from conversation chain
+        response = st.session_state.conversation_chain({'question': user_input})
+        # Update chat history via response history
+        st.session_state.chat_history = response['chat_history']
+        # Show conversation
+        for i, message in enumerate(st.session_state.chat_history):
+            if i % 2 == 0:
+                st.write("User:" + message.content)
+            else:
+                st.write("Bot:" + message.content)
+    # UI sidebar layout
+    with st.sidebar:
+        st.subheader("Your documents")
+        pdf_docs = st.file_uploader("Upload your PDFs", accept_multiple_files=True)
+        if st.button("Process files"):
+            with st.spinner("Processing..."):
+                # Extract text content from pdf
+                text = get_pdf_text(pdf_docs)
+                # Split text into chunks
+                chunks = get_text_chunks(text)
+                # Obtain embeddings
+                embeddings = init_embeddings()
+                # Start conversation chain
+                st.session_state.conversation_chain = get_conversation_chain(chunks=chunks, embeddings=embeddings)
+# Main program of the application
+if __name__ == '__main__':
+    main()

environment.yml ADDED Viewed

	@@ -0,0 +1,159 @@

+name: py311
+channels:
+  - conda-forge
+  - defaults
+dependencies:
+  - aiohttp=3.8.3=py311h2bbff1b_0
+  - aiosignal=1.2.0=pyhd3eb1b0_0
+  - appdirs=1.4.4=pyhd3eb1b0_0
+  - async-timeout=4.0.2=py311haa95532_0
+  - attrs=22.1.0=py311haa95532_0
+  - blas=1.0=mkl
+  - bottleneck=1.3.5=py311h5bb9823_0
+  - brotli=1.0.9=h2bbff1b_7
+  - brotli-bin=1.0.9=h2bbff1b_7
+  - brotlipy=0.7.0=py311h2bbff1b_1002
+  - bzip2=1.0.8=he774522_0
+  - ca-certificates=2023.05.30=haa95532_0
+  - certifi=2023.5.7=py311haa95532_0
+  - cffi=1.15.1=py311h2bbff1b_3
+  - charset-normalizer=2.0.4=pyhd3eb1b0_0
+  - click=8.0.4=py311haa95532_0
+  - colorama=0.4.6=py311haa95532_0
+  - contourpy=1.0.5=py311h59b6b97_0
+  - cryptography=39.0.1=py311h21b164f_2
+  - cycler=0.11.0=pyhd3eb1b0_0
+  - dataclasses=0.8=pyh6d0b6a4_7
+  - dataclasses-json=0.5.7=py311haa95532_0
+  - et_xmlfile=1.1.0=py311haa95532_0
+  - faiss=1.7.4=py311h8355858_0_cpu
+  - fonttools=4.25.0=pyhd3eb1b0_0
+  - freetype=2.12.1=ha860e81_0
+  - frozenlist=1.3.3=py311h2bbff1b_0
+  - giflib=5.2.1=h8cc25b3_3
+  - greenlet=2.0.1=py311hd77b12b_0
+  - icc_rt=2022.1.0=h6049295_2
+  - idna=3.4=py311haa95532_0
+  - intel-openmp=2023.1.0=h59b6b97_46319
+  - joblib=1.2.0=py311haa95532_0
+  - jpeg=9e=h2bbff1b_1
+  - kiwisolver=1.4.4=py311hd77b12b_0
+  - langchain=0.0.190=pyhd8ed1ab_0
+  - lerc=3.0=hd77b12b_0
+  - libblas=3.9.0=1_h8933c1f_netlib
+  - libbrotlicommon=1.0.9=h2bbff1b_7
+  - libbrotlidec=1.0.9=h2bbff1b_7
+  - libbrotlienc=1.0.9=h2bbff1b_7
+  - libdeflate=1.17=h2bbff1b_0
+  - libfaiss=1.7.4=hba6d9cf_0_cpu
+  - libfaiss-avx2=1.7.4=h1234567_0_cpu
+  - libffi=3.4.4=hd77b12b_0
+  - liblapack=3.9.0=5_hd5c7e75_netlib
+  - libpng=1.6.39=h8cc25b3_0
+  - libtiff=4.5.0=h6c2663c_2
+  - libwebp=1.2.4=hbc33d0d_1
+  - libwebp-base=1.2.4=h2bbff1b_1
+  - lz4-c=1.9.4=h2bbff1b_0
+  - m2w64-gcc-libgfortran=5.3.0=6
+  - m2w64-gcc-libs=5.3.0=7
+  - m2w64-gcc-libs-core=5.3.0=7
+  - m2w64-gmp=6.1.0=2
+  - m2w64-libwinpthread-git=5.0.0.4634.697f757=2
+  - marshmallow=3.19.0=py311haa95532_0
+  - marshmallow-enum=1.5.1=py311haa95532_0
+  - matplotlib-base=3.7.1=py311hf62ec03_1
+  - mkl=2023.1.0=h8bd8f75_46356
+  - mkl-service=2.4.0=py311h2bbff1b_1
+  - mkl_fft=1.3.6=py311hf62ec03_1
+  - mkl_random=1.2.2=py311hf62ec03_1
+  - msys2-conda-epoch=20160418=1
+  - multidict=6.0.2=py311h2bbff1b_0
+  - munkres=1.1.4=py_0
+  - mypy_extensions=0.4.3=py311haa95532_1
+  - numexpr=2.8.4=py311h1fcbade_1
+  - numpy=1.25.0=py311hdab7c0b_0
+  - numpy-base=1.25.0=py311hd01c5d8_0
+  - openai=0.27.4=py311haa95532_0
+  - openapi-schema-pydantic=1.2.4=py311haa95532_0
+  - openpyxl=3.0.10=py311h2bbff1b_0
+  - openssl=3.0.9=h2bbff1b_0
+  - packaging=23.0=py311haa95532_0
+  - pandas=1.5.3=py311heda8569_0
+  - pandas-stubs=1.5.3.230203=py311haa95532_0
+  - pillow=9.4.0=py311hd77b12b_0
+  - pip=23.1.2=py311haa95532_0
+  - plotly=5.9.0=py311haa95532_0
+  - pooch=1.4.0=pyhd3eb1b0_0
+  - pycparser=2.21=pyhd3eb1b0_0
+  - pydantic=1.10.8=py311h2bbff1b_0
+  - pyopenssl=23.0.0=py311haa95532_0
+  - pyparsing=3.0.9=py311haa95532_0
+  - pypdf=3.11.0=pyhd8ed1ab_0
+  - pysocks=1.7.1=py311haa95532_0
+  - python=3.11.3=he1021f5_1
+  - python-dateutil=2.8.2=pyhd3eb1b0_0
+  - python-dotenv=0.21.0=py311haa95532_0
+  - python_abi=3.11=2_cp311
+  - pytz=2022.7=py311haa95532_0
+  - pyyaml=6.0=py311h2bbff1b_1
+  - requests=2.29.0=py311haa95532_0
+  - scikit-learn=1.2.2=py311hd77b12b_1
+  - scipy=1.10.1=py311hc1ccb85_1
+  - setuptools=67.8.0=py311haa95532_0
+  - six=1.16.0=pyhd3eb1b0_1
+  - sqlalchemy=1.4.39=py311h2bbff1b_0
+  - sqlite=3.41.2=h2bbff1b_0
+  - tbb=2021.8.0=h59b6b97_0
+  - tenacity=8.2.2=py311haa95532_0
+  - threadpoolctl=2.2.0=pyh0d69192_0
+  - tk=8.6.12=h2bbff1b_0
+  - tqdm=4.65.0=py311h746a85d_0
+  - types-pytz=2022.4.0.0=py311haa95532_1
+  - typing-extensions=4.6.3=py311haa95532_0
+  - typing_extensions=4.6.3=py311haa95532_0
+  - typing_inspect=0.7.1=pyhd3eb1b0_0
+  - ucrt=10.0.20348.0=haa95532_0
+  - urllib3=1.26.16=py311haa95532_0
+  - vc=14.2=h21ff451_1
+  - vc14_runtime=14.36.32532=hfdfe4a8_16
+  - vs2015_runtime=14.36.32532=h05e6639_16
+  - wheel=0.38.4=py311haa95532_0
+  - win_inet_pton=1.1.0=py311haa95532_0
+  - xz=5.4.2=h8cc25b3_0
+  - yaml=0.2.5=he774522_0
+  - yarl=1.8.1=py311h2bbff1b_0
+  - zlib=1.2.13=h8cc25b3_0
+  - zstd=1.5.5=hd43e919_0
+  - pip:
+      - altair==5.0.1
+      - blinker==1.6.2
+      - cachetools==5.3.1
+      - decorator==5.1.1
+      - gitdb==4.0.10
+      - gitpython==3.1.31
+      - importlib-metadata==6.7.0
+      - jinja2==3.1.2
+      - jsonschema==4.17.3
+      - markdown-it-py==3.0.0
+      - markupsafe==2.1.3
+      - mdurl==0.1.2
+      - protobuf==4.23.3
+      - pyarrow==12.0.1
+      - pydeck==0.8.1b0
+      - pygments==2.15.1
+      - pympler==1.0.1
+      - pyrsistent==0.19.3
+      - pytz-deprecation-shim==0.1.0.post0
+      - regex==2023.6.3
+      - rich==13.4.2
+      - smmap==5.0.0
+      - streamlit==1.23.1
+      - tiktoken==0.4.0
+      - toml==0.10.2
+      - toolz==0.12.0
+      - tornado==6.3.2
+      - tzdata==2023.3
+      - tzlocal==4.3.1
+      - validators==0.20.0
+      - watchdog==3.0.0
+      - zipp==3.15.0

logic.py ADDED Viewed

	@@ -0,0 +1,58 @@

+# Load libraries and dependencies
+from pypdf import PdfReader
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.embeddings.huggingface import HuggingFaceInstructEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.chat_models import ChatOpenAI
+from langchain.memory import ConversationBufferMemory
+from langchain.chains import ConversationalRetrievalChain
+# Transform a list of PDFs into a single string
+def get_pdf_text(pdf_documents):
+    # Initialize line of text
+    text = ""
+    # Append text extracted from the documents into the text string
+    for pdf in pdf_documents:
+        pdf_reader = PdfReader(pdf, strict=True)
+        for page in pdf_reader.pages:
+            text += page.extract_text()
+    return text
+# Transform a single line of text into an array of text chunks
+def get_text_chunks(raw_text, separator="\n", chunk_size=1000, chunk_overlap=200, lenght_function=len):
+    # Initialize TextSplitter with default variables
+    text_splitter = CharacterTextSplitter(
+        separator=separator,
+        chunk_size=chunk_size,
+        chunk_overlap=chunk_overlap,
+        length_function=lenght_function
+    )
+    # Create list of text chunks
+    return text_splitter.split_text(raw_text)
+# Initialize embeddings
+def init_embeddings(type=1):
+    # Choose embeding depending on the project's necessities
+    if type == 1:
+        # OpenAI Embeddings
+        return OpenAIEmbeddings()
+    else:
+        # Instructor Embeddings
+        return HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
+# Initialize Conversation Chain
+def get_conversation_chain(chunks, embeddings):
+    # Create Vector Database from text chunks and embeddings
+    knowledge_base = FAISS.from_texts(chunks, embeddings).as_retriever()
+    # Create buffer to store the conversation memory
+    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
+    # Initialize language model
+    language_model = ChatOpenAI()
+    # Create conversation chain
+    conversation_chain = ConversationalRetrievalChain.from_llm(
+        llm=language_model,
+        retriever=knowledge_base,
+        memory=memory
+    )
+    return conversation_chain