Spaces:

eklyman
/

pt-assistant-demo

Runtime error

App Files Files Community

eklyman commited on May 27

Commit

b8371dd

•

1 Parent(s): 5284eff

first commit

Browse files

Files changed (10) hide show

.chainlit/config.toml +97 -0
.chainlit/translations/en-US.json +155 -0
.chainlit/translations/pt-BR.json +155 -0
.gitignore +3 -0
Dockerfile +11 -0
README.md +2 -2
app.py +90 -0
chainlit.md +10 -0
requirements.txt +87 -0
youtube_to_docstore.py +140 -0

.chainlit/config.toml ADDED Viewed

	@@ -0,0 +1,97 @@

+[project]
+# Whether to enable telemetry (default: true). No personal data is collected.
+enable_telemetry = true
+# List of environment variables to be provided by each user to use the app.
+user_env = []
+# Duration (in seconds) during which the session is saved when the connection is lost
+session_timeout = 3600
+# Enable third parties caching (e.g LangChain cache)
+cache = false
+# Authorized origins
+allow_origins = ["*"]
+# Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
+# follow_symlink = false
+[features]
+# Show the prompt playground
+prompt_playground = true
+# Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript)
+unsafe_allow_html = false
+# Process and display mathematical expressions. This can clash with "$" characters in messages.
+latex = false
+# Authorize users to upload files with messages
+multi_modal = true
+# Allows user to use speech to text
+[features.speech_to_text]
+    enabled = false
+    # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string
+    # language = "en-US"
+[UI]
+# Name of the app and chatbot.
+name = "Chatbot"
+# Show the readme while the thread is empty.
+show_readme_as_default = true
+# Description of the app and chatbot. This is used for HTML tags.
+# description = ""
+# Large size content are by default collapsed for a cleaner ui
+default_collapse_content = true
+# The default value for the expand messages settings.
+default_expand_messages = false
+# Hide the chain of thought details from the user in the UI.
+hide_cot = false
+# Link to your github repo. This will add a github button in the UI's header.
+# github = ""
+# Specify a CSS file that can be used to customize the user interface.
+# The CSS file can be served from the public directory or via an external link.
+# custom_css = "/public/test.css"
+# Specify a Javascript file that can be used to customize the user interface.
+# The Javascript file can be served from the public directory.
+# custom_js = "/public/test.js"
+# Specify a custom font url.
+# custom_font = "https://fonts.googleapis.com/css2?family=Inter:wght@400;500;700&display=swap"
+# Override default MUI light theme. (Check theme.ts)
+[UI.theme]
+    #font_family = "Inter, sans-serif"
+[UI.theme.light]
+    #background = "#FAFAFA"
+    #paper = "#FFFFFF"
+    [UI.theme.light.primary]
+        #main = "#F80061"
+        #dark = "#980039"
+        #light = "#FFE7EB"
+# Override default MUI dark theme. (Check theme.ts)
+[UI.theme.dark]
+    #background = "#FAFAFA"
+    #paper = "#FFFFFF"
+    [UI.theme.dark.primary]
+        #main = "#F80061"
+        #dark = "#980039"
+        #light = "#FFE7EB"
+[meta]
+generated_by = "1.0.401"

.chainlit/translations/en-US.json ADDED Viewed

	@@ -0,0 +1,155 @@

+{
+    "components": {
+        "atoms": {
+            "buttons": {
+                "userButton": {
+                    "menu": {
+                        "settings": "Settings",
+                        "settingsKey": "S",
+                        "APIKeys": "API Keys",
+                        "logout": "Logout"
+                    }
+                }
+            }
+        },
+        "molecules": {
+            "newChatButton": {
+                "newChat": "New Chat"
+            },
+            "tasklist": {
+                "TaskList": {
+                    "title": "\ud83d\uddd2\ufe0f Task List",
+                    "loading": "Loading...",
+                    "error": "An error occured"
+                }
+            },
+            "attachments": {
+                "cancelUpload": "Cancel upload",
+                "removeAttachment": "Remove attachment"
+            },
+            "newChatDialog": {
+                "createNewChat": "Create new chat?",
+                "clearChat": "This will clear the current messages and start a new chat.",
+                "cancel": "Cancel",
+                "confirm": "Confirm"
+            },
+            "settingsModal": {
+                "expandMessages": "Expand Messages",
+                "hideChainOfThought": "Hide Chain of Thought",
+                "darkMode": "Dark Mode"
+            }
+        },
+        "organisms": {
+            "chat": {
+                "history": {
+                    "index": {
+                        "lastInputs": "Last Inputs",
+                        "noInputs": "Such empty...",
+                        "loading": "Loading..."
+                    }
+                },
+                "inputBox": {
+                    "input": {
+                        "placeholder": "Type your message here..."
+                    },
+                    "speechButton": {
+                        "start": "Start recording",
+                        "stop": "Stop recording"
+                    },
+                    "SubmitButton": {
+                        "sendMessage": "Send message",
+                        "stopTask": "Stop Task"
+                    },
+                    "UploadButton": {
+                        "attachFiles": "Attach files"
+                    },
+                    "waterMark": {
+                        "text": "Built with"
+                    }
+                },
+                "Messages": {
+                    "index": {
+                        "running": "Running",
+                        "executedSuccessfully": "executed successfully",
+                        "failed": "failed",
+                        "feedbackUpdated": "Feedback updated",
+                        "updating": "Updating"
+                    }
+                },
+                "dropScreen": {
+                    "dropYourFilesHere": "Drop your files here"
+                },
+                "index": {
+                    "failedToUpload": "Failed to upload",
+                    "cancelledUploadOf": "Cancelled upload of",
+                    "couldNotReachServer": "Could not reach the server",
+                    "continuingChat": "Continuing previous chat"
+                },
+                "settings": {
+                    "settingsPanel": "Settings panel",
+                    "reset": "Reset",
+                    "cancel": "Cancel",
+                    "confirm": "Confirm"
+                }
+            },
+            "threadHistory": {
+                "sidebar": {
+                    "filters": {
+                        "FeedbackSelect": {
+                            "feedbackAll": "Feedback: All",
+                            "feedbackPositive": "Feedback: Positive",
+                            "feedbackNegative": "Feedback: Negative"
+                        },
+                        "SearchBar": {
+                            "search": "Search"
+                        }
+                    },
+                    "DeleteThreadButton": {
+                        "confirmMessage": "This will delete the thread as well as it's messages and elements.",
+                        "cancel": "Cancel",
+                        "confirm": "Confirm",
+                        "deletingChat": "Deleting chat",
+                        "chatDeleted": "Chat deleted"
+                    },
+                    "index": {
+                        "pastChats": "Past Chats"
+                    },
+                    "ThreadList": {
+                        "empty": "Empty..."
+                    },
+                    "TriggerButton": {
+                        "closeSidebar": "Close sidebar",
+                        "openSidebar": "Open sidebar"
+                    }
+                },
+                "Thread": {
+                    "backToChat": "Go back to chat",
+                    "chatCreatedOn": "This chat was created on"
+                }
+            },
+            "header": {
+                "chat": "Chat",
+                "readme": "Readme"
+            }
+        }
+    },
+    "hooks": {
+        "useLLMProviders": {
+            "failedToFetchProviders": "Failed to fetch providers:"
+        }
+    },
+    "pages": {
+        "Design": {},
+        "Env": {
+            "savedSuccessfully": "Saved successfully",
+            "requiredApiKeys": "Required API Keys",
+            "requiredApiKeysInfo": "To use this app, the following API keys are required. The keys are stored on your device's local storage."
+        },
+        "Page": {
+            "notPartOfProject": "You are not part of this project."
+        },
+        "ResumeButton": {
+            "resumeChat": "Resume Chat"
+        }
+    }
+}

.chainlit/translations/pt-BR.json ADDED Viewed

	@@ -0,0 +1,155 @@

+{
+    "components": {
+        "atoms": {
+            "buttons": {
+                "userButton": {
+                    "menu": {
+                        "settings": "Configura\u00e7\u00f5es",
+                        "settingsKey": "S",
+                        "APIKeys": "Chaves de API",
+                        "logout": "Sair"
+                    }
+                }
+            }
+        },
+        "molecules": {
+            "newChatButton": {
+                "newChat": "Nova Conversa"
+            },
+            "tasklist": {
+                "TaskList": {
+                    "title": "\ud83d\uddd2\ufe0f Lista de Tarefas",
+                    "loading": "Carregando...",
+                    "error": "Ocorreu um erro"
+                }
+            },
+            "attachments": {
+                "cancelUpload": "Cancelar envio",
+                "removeAttachment": "Remover anexo"
+            },
+            "newChatDialog": {
+                "createNewChat": "Criar novo chat?",
+                "clearChat": "Isso limpar\u00e1 as mensagens atuais e iniciar\u00e1 uma nova conversa.",
+                "cancel": "Cancelar",
+                "confirm": "Confirmar"
+            },
+            "settingsModal": {
+                "expandMessages": "Expandir Mensagens",
+                "hideChainOfThought": "Esconder Sequ\u00eancia de Pensamento",
+                "darkMode": "Modo Escuro"
+            }
+        },
+        "organisms": {
+            "chat": {
+                "history": {
+                    "index": {
+                        "lastInputs": "\u00daltimas Entradas",
+                        "noInputs": "Vazio...",
+                        "loading": "Carregando..."
+                    }
+                },
+                "inputBox": {
+                    "input": {
+                        "placeholder": "Digite sua mensagem aqui..."
+                    },
+                    "speechButton": {
+                        "start": "Iniciar grava\u00e7\u00e3o",
+                        "stop": "Parar grava\u00e7\u00e3o"
+                    },
+                    "SubmitButton": {
+                        "sendMessage": "Enviar mensagem",
+                        "stopTask": "Parar Tarefa"
+                    },
+                    "UploadButton": {
+                        "attachFiles": "Anexar arquivos"
+                    },
+                    "waterMark": {
+                        "text": "Constru\u00eddo com"
+                    }
+                },
+                "Messages": {
+                    "index": {
+                        "running": "Executando",
+                        "executedSuccessfully": "executado com sucesso",
+                        "failed": "falhou",
+                        "feedbackUpdated": "Feedback atualizado",
+                        "updating": "Atualizando"
+                    }
+                },
+                "dropScreen": {
+                    "dropYourFilesHere": "Solte seus arquivos aqui"
+                },
+                "index": {
+                    "failedToUpload": "Falha ao enviar",
+                    "cancelledUploadOf": "Envio cancelado de",
+                    "couldNotReachServer": "N\u00e3o foi poss\u00edvel conectar ao servidor",
+                    "continuingChat": "Continuando o chat anterior"
+                },
+                "settings": {
+                    "settingsPanel": "Painel de Configura\u00e7\u00f5es",
+                    "reset": "Redefinir",
+                    "cancel": "Cancelar",
+                    "confirm": "Confirmar"
+                }
+            },
+            "threadHistory": {
+                "sidebar": {
+                    "filters": {
+                        "FeedbackSelect": {
+                            "feedbackAll": "Feedback: Todos",
+                            "feedbackPositive": "Feedback: Positivo",
+                            "feedbackNegative": "Feedback: Negativo"
+                        },
+                        "SearchBar": {
+                            "search": "Buscar"
+                        }
+                    },
+                    "DeleteThreadButton": {
+                        "confirmMessage": "Isso deletar\u00e1 a conversa, assim como suas mensagens e elementos.",
+                        "cancel": "Cancelar",
+                        "confirm": "Confirmar",
+                        "deletingChat": "Deletando conversa",
+                        "chatDeleted": "Conversa deletada"
+                    },
+                    "index": {
+                        "pastChats": "Conversas Anteriores"
+                    },
+                    "ThreadList": {
+                        "empty": "Vazio..."
+                    },
+                    "TriggerButton": {
+                        "closeSidebar": "Fechar barra lateral",
+                        "openSidebar": "Abrir barra lateral"
+                    }
+                },
+                "Thread": {
+                    "backToChat": "Voltar para a conversa",
+                    "chatCreatedOn": "Esta conversa foi criada em"
+                }
+            },
+            "header": {
+                "chat": "Conversa",
+                "readme": "Leia-me"
+            }
+        },
+        "hooks": {
+            "useLLMProviders": {
+                "failedToFetchProviders": "Falha ao buscar provedores:"
+            }
+        },
+        "pages": {
+            "Design": {},
+            "Env": {
+                "savedSuccessfully": "Salvo com sucesso",
+                "requiredApiKeys": "Chaves de API necess\u00e1rias",
+                "requiredApiKeysInfo": "Para usar este aplicativo, as seguintes chaves de API s\u00e3o necess\u00e1rias. As chaves s\u00e3o armazenadas localmente em seu dispositivo."
+            },
+            "Page": {
+                "notPartOfProject": "Voc\u00ea n\u00e3o faz parte deste projeto."
+            },
+            "ResumeButton": {
+                "resumeChat": "Continuar Conversa"
+            }
+        }
+    }
+}

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+.env
+__pycache__/
+venv/

Dockerfile ADDED Viewed

	@@ -0,0 +1,11 @@

+FROM python:3.9
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+WORKDIR $HOME/app
+COPY --chown=user . $HOME/app
+COPY ./requirements.txt ~/app/requirements.txt
+RUN pip install -r requirements.txt
+COPY . .
+CMD ["chainlit", "run", "app.py", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,10 +1,10 @@
 ---
-title: Pt Assistant Demo
 emoji: 📚
 colorFrom: red
 colorTo: blue
 sdk: docker
-pinned: false
 license: openrail
 ---

 ---
+title: PT Assistant Demo
 emoji: 📚
 colorFrom: red
 colorTo: blue
 sdk: docker
+pinned: true
 license: openrail
 ---

app.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import json
+import os
+import re
+import chainlit as cl
+from dotenv import load_dotenv
+from langchain.embeddings import CacheBackedEmbeddings
+from langchain.prompts import ChatPromptTemplate
+from langchain.schema import StrOutputParser
+from langchain.schema.runnable import RunnablePassthrough
+from langchain.schema.runnable.config import RunnableConfig
+from langchain.storage import LocalFileStore
+from langchain_core.runnables import RunnableParallel
+from langchain_openai import ChatOpenAI
+from langchain_openai import OpenAIEmbeddings
+from langchain_pinecone import PineconeVectorStore
+import requests
+load_dotenv()
+RAG_PROMPT = """
+CONTEXT:
+{context}
+QUERY:
+{question}
+You are a helpful assistant and you provide summarized
+and succint information. Your answers are accurate yet
+brief, to ensure the reader can obtain the high level
+responses. You will be presented with a question helping
+one of our customers, and your job is to be as detailed
+and as helpful as possible. If the context provided
+doesn't answer the question, please respond with: "I
+require more information in order to better assist you,
+please state your question and what kind of service or
+support you are seeking."
+"""
+core_embeddings_model = OpenAIEmbeddings()
+vector_store = PineconeVectorStore(index_name=os.getenv(
+    "INDEX_NAME"), embedding=core_embeddings_model)
+retriever = vector_store.as_retriever(search_kwargs={"k": 1})
+llm = ChatOpenAI(streaming=True, model=os.getenv("OPENAI_MODEL"))
+rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
+def format_docs(docs):
+    return "\n\n".join(doc for doc in docs)
+rag_chain_from_docs = (
+    RunnablePassthrough.assign(context=(lambda x: format_docs(x)))
+    | rag_prompt
+    | llm
+    | StrOutputParser()
+)
+rag_chain_with_source = RunnableParallel(
+    {"context": retriever, "question": RunnablePassthrough()}
+).assign(answer=rag_chain_from_docs)
+@cl.on_chat_start
+async def on_chat_start():
+    cl.user_session.set("runnable", rag_chain_with_source)
+@cl.on_message
+async def on_message(message: cl.Message):
+    rag_chain_with_source = cl.user_session.get("runnable")
+    msg = cl.Message(content="")
+    response = rag_chain_with_source.invoke(message.content)
+    if response is not None:
+        metadata = response["context"][0].metadata
+        link = metadata["link"]
+        title = metadata["source_document"]
+        formatted_response = f"{response['answer']} Check out the Youtube video [{title}]({
+            link}!"
+        match = re.search(r"<(\d+\.\d+)>", response["context"][0].page_content)
+        if match:
+            start_time = float(match.group(1))
+            formatted_response = f"{response['answer']} Check out the Youtube video [{
+                title}]({link}&t={int(start_time)}s)!"
+        msg.content = formatted_response
+    await msg.send()

chainlit.md ADDED Viewed

	@@ -0,0 +1,10 @@

+# PT DEMO! 🚀
+Hello from your friendly PT Assistant! 👋
+Enter a question to begin chatting!
+Examples:
+- Who is Aaron LaBauer?
+- Is Aaron an LMT?
+- What does Aaron LaBauer do professionally?

requirements.txt ADDED Viewed

	@@ -0,0 +1,87 @@

+aiofiles==23.2.1
+aiohttp==3.9.5
+aiosignal==1.3.1
+annotated-types==0.7.0
+anyio==3.7.1
+asyncer==0.0.2
+attrs==23.2.0
+bidict==0.23.1
+certifi==2024.2.2
+chainlit==1.1.202
+charset-normalizer==3.3.2
+chevron==0.14.0
+click==8.1.7
+dataclasses-json==0.5.14
+Deprecated==1.2.14
+distro==1.9.0
+fastapi==0.110.3
+fastapi-socketio==0.0.10
+filetype==1.2.0
+frozenlist==1.4.1
+googleapis-common-protos==1.63.0
+grpcio==1.64.0
+h11==0.14.0
+httpcore==1.0.5
+httpx==0.27.0
+idna==3.7
+importlib-metadata==7.0.0
+install==1.3.5
+jsonpatch==1.33
+jsonpointer==2.4
+langchain==0.2.1
+langchain-core==0.2.1
+langchain-openai==0.1.7
+langchain-pinecone==0.1.1
+langchain-text-splitters==0.2.0
+langsmith==0.1.63
+Lazify==0.4.0
+literalai==0.0.601
+marshmallow==3.21.2
+multidict==6.0.5
+mypy-extensions==1.0.0
+nest-asyncio==1.6.0
+numpy==1.26.4
+openai==1.30.3
+opentelemetry-api==1.24.0
+opentelemetry-exporter-otlp==1.24.0
+opentelemetry-exporter-otlp-proto-common==1.24.0
+opentelemetry-exporter-otlp-proto-grpc==1.24.0
+opentelemetry-exporter-otlp-proto-http==1.24.0
+opentelemetry-instrumentation==0.45b0
+opentelemetry-proto==1.24.0
+opentelemetry-sdk==1.24.0
+opentelemetry-semantic-conventions==0.45b0
+orjson==3.10.3
+packaging==23.2
+pinecone-client==3.2.2
+protobuf==4.25.3
+pydantic==2.7.1
+pydantic_core==2.18.2
+PyJWT==2.8.0
+python-dotenv==1.0.1
+python-engineio==4.9.1
+python-multipart==0.0.9
+python-socketio==5.11.2
+PyYAML==6.0.1
+regex==2024.5.15
+requests==2.32.2
+setuptools==70.0.0
+simple-websocket==1.0.0
+sniffio==1.3.1
+SQLAlchemy==2.0.30
+starlette==0.37.2
+syncer==2.0.3
+tenacity==8.3.0
+tiktoken==0.7.0
+tomli==2.0.1
+tqdm==4.66.4
+typing-inspect==0.9.0
+typing_extensions==4.12.0
+uptrace==1.24.0
+urllib3==2.2.1
+uvicorn==0.25.0
+watchfiles==0.20.0
+wrapt==1.16.0
+wsproto==1.2.0
+yarl==1.9.4
+zipp==3.19.0

youtube_to_docstore.py ADDED Viewed

	@@ -0,0 +1,140 @@

+import json
+import os
+from dotenv import load_dotenv
+from langchain.embeddings.cache import CacheBackedEmbeddings
+from langchain_openai import OpenAIEmbeddings
+from langchain.storage import LocalFileStore
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+import pandas as pd
+from pinecone import Pinecone, ServerlessSpec
+import requests
+import scrapetube
+from uuid import uuid4
+from youtube_transcript_api import YouTubeTranscriptApi
+load_dotenv()
+BATCH_LIMIT = 100
+def get_youtube_data(video_id):
+    url = f"https://www.youtube.com/watch?v={video_id}"
+    try:
+        raw = YouTubeTranscriptApi.get_transcript(video_id)
+    except:
+        print(f"No transcript found for {url}")
+        return False
+    # Get metadata
+    response = requests.get(
+        f"https://noembed.com/embed?dataType=json&url={url}")
+    data = json.loads(response.content)
+    title = data["title"]
+    # ' is a reserved character
+    title = title.replace("'", "")
+    df = pd.DataFrame(raw)
+    # Generate the transcript string with timestamps
+    transcript = ' '.join(
+        f"{row['text']}<{row['start']}>" for _, row in df.iterrows())
+    return transcript, title
+def index_video(video_id, embedder, index):
+    try:
+        print(f"Getting transcript & text for video: {video_id}")
+        transcript, title = get_youtube_data(video_id)
+    except Exception as e:
+        print(f"""Error getting transcript for video {video_id}: {e}""")
+        return False
+    url = f"https://www.youtube.com/watch?v={video_id}"
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=1000,
+        chunk_overlap=100,
+        length_function=len,
+        separators=["\n\n", "\n", " ", ""]
+    )
+    texts = []
+    metadatas = []
+    metadata = {
+        'source_document': title,
+        'link': url
+    }
+    record_texts = text_splitter.split_text(transcript)
+    print(f"Split documents into {len(record_texts)} chunks")
+    record_metadatas = [{"chunk": j, "text": text, **metadata}
+                        for j, text in enumerate(record_texts)]
+    print(f"Uploading {len(record_texts)} chunks to Pinecone...")
+    texts.extend(record_texts)
+    metadatas.extend(record_metadatas)
+    ids = [str(uuid4()) for _ in range(len(texts))]
+    embeds = embedder.embed_documents(texts)
+    try:
+        print("Upserting data to pinecone...")
+        index.upsert(vectors=zip(ids, embeds, metadatas))
+    except Exception as e:
+        print(f"Error upserting data to Pinecone: {e}")
+    if len(texts) >= BATCH_LIMIT:
+        texts = []
+        metadatas = []
+def index_channel(channel_id, embedder, index):
+    print("Indexing channel...")
+    videos = scrapetube.get_channel(channel_id)
+    for video in videos:
+        print(f"Ready to process {video['videoId']}")
+        index_video(video["videoId"], embedder, index)
+def configure_vector_database():
+    print("Configuring Pinecone...")
+    pc = Pinecone(
+        api_key=os.getenv("PINECONE_API_KEY")
+    )
+    if INDEX_NAME not in pc.list_indexes().names():
+        pc.create_index(
+            name=INDEX_NAME,
+            metric='cosine',
+            dimension=1536,
+            spec=ServerlessSpec(
+                cloud="aws", region="us-east-1"
+            )
+        )
+    index = pc.Index(INDEX_NAME)
+    store = LocalFileStore("./cache/")
+    # default model is text-embedding-ada-002
+    core_embeddings_model = OpenAIEmbeddings()
+    embedder = CacheBackedEmbeddings.from_bytes_store(
+        core_embeddings_model,
+        store,
+        namespace=core_embeddings_model.model
+    )
+    return embedder, index
+embedder, index = configure_vector_database()
+index_channel(os.getenv["CHANNEL_ID"], embedder, index)