Spaces:

JKilpatrick
/

youtube_ai_makerspace

Sleeping

App Files Files Community

JKilpatrick commited on Dec 20, 2023

Commit

e88ca0c

•

1 Parent(s): b8b9f57

initial commit

Browse files

Files changed (5) hide show

.gitignore +160 -0
Dockerfile +11 -0
app.py +116 -0
chainlit.md +14 -0
requirements.txt +12 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,160 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/

Dockerfile ADDED Viewed

	@@ -0,0 +1,11 @@

+FROM python:3.9
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+WORKDIR $HOME/app
+COPY --chown=user . $HOME/app
+COPY ./requirements.txt ~/app/requirements.txt
+RUN pip install -r requirements.txt
+COPY . .
+CMD ["chainlit", "run", "app.py", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,116 @@

+import os
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.embeddings import CacheBackedEmbeddings
+from langchain.storage import LocalFileStore
+import chainlit as cl
+from chainlit.playground.providers import ChatOpenAI
+from dotenv import load_dotenv
+load_dotenv()
+from langchain.chat_models import ChatOpenAI
+from langchain.prompts import ChatPromptTemplate
+from langchain.schema.runnable import RunnablePassthrough
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.embeddings import CacheBackedEmbeddings
+from langchain.storage import LocalFileStore
+from langchain.vectorstores import Pinecone
+from operator import itemgetter
+import pinecone
+# =============================================================================
+# Retrieval Chain
+# =============================================================================
+def load_llm():
+  llm = ChatOpenAI(
+        model='gpt-3.5-turbo',
+        temperature=0.0,
+    )
+  return llm
+def load_vectorstore():
+    pinecone.init(
+        api_key=os.getenv('PINECONE_API_KEY'),
+        environment=os.getenv('PINECONE_ENV')
+    )
+    index = pinecone.GRPCIndex("youtube-index")
+    store = LocalFileStore("./cache/")
+    core_embeddings_model = OpenAIEmbeddings()
+    embedder = CacheBackedEmbeddings.from_bytes_store(
+        core_embeddings_model,
+        store,
+        namespace=core_embeddings_model.model
+    )
+    text_field = "text"
+    index = pinecone.Index("youtube-index")
+    vectorstore = Pinecone(
+        index,
+        embedder,
+        text_field
+    )
+    return vectorstore
+def qa_chain():
+    vectorstore = load_vectorstore()
+    llm = load_llm()
+    retriever = vectorstore.as_retriever()
+    template = """You are a helpful assistant that answers questions on the provided context, if its not answered within the context respond with I dont know.
+                  Additionally, the context includes a specific integer formatted as <int>, representing a timestamp. In your response, include this integer as a citation, formatted as a YouTube video link: "https://www.youtube.com/watch?v=[video_id]&t=<int>s" and text of link be the title of video.
+    ### CONTEXT
+    {context}
+    ### QUESTION
+    {question}
+    """
+    prompt = ChatPromptTemplate.from_template(template)
+    retrieval_augmented_qa_chain = (
+        {"context": itemgetter("question") | retriever,
+        "question": itemgetter("question")
+        }
+        | RunnablePassthrough.assign(
+            context=itemgetter("context")
+        )
+        | {
+            "response": prompt  | llm,
+            "context": itemgetter("context"),
+        }
+    )
+    return retrieval_augmented_qa_chain
+# =============================================================================
+# Chainlit
+# =============================================================================
+@cl.on_chat_start
+async def on_chat_start():
+  chain = qa_chain()
+  cl.user_session.set("chain", chain)
+  msg=cl.Message(content="What is your question about AI Makerspace?")
+  await msg.send()
+@cl.on_message
+async def on_message(message: cl.Message):
+  chain=cl.user_session.get("chain")
+  res = chain.invoke({"question" : message.content})
+  answer = res['response'].content
+  await cl.Message(content=answer).send()

chainlit.md ADDED Viewed

	@@ -0,0 +1,14 @@

+# Welcome to Chainlit! 🚀🤖
+Hi there, Developer! 👋 We're excited to have you on board. Chainlit is a powerful tool designed to help you prototype, debug and share applications built on top of LLMs.
+## Useful Links 🔗
+- **Documentation:** Get started with our comprehensive [Chainlit Documentation](https://docs.chainlit.io) 📚
+- **Discord Community:** Join our friendly [Chainlit Discord](https://discord.gg/k73SQ3FyUh) to ask questions, share your projects, and connect with other developers! 💬
+We can't wait to see what you create with Chainlit! Happy coding! 💻😊
+## Welcome screen
+To modify the welcome screen, edit the `chainlit.md` file at the root of your project. If you do not want a welcome screen, just leave this file empty.

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+openai==0.27.2
+pandas==1.4.2
+requests==2.29.0
+youtube_transcript_api==0.6.0
+chainlit==0.7.700
+cohere==4.37
+openai==1.3.5
+tiktoken==0.5.1
+python-dotenv==1.0.0
+pinecone-client==2.2.4
+langchain==0.0.350