Spaces:

cowcow02
/

gen-ai-demo-5

Sleeping

App Files Files Community

cowcow02 commited on Oct 17, 2023

Commit

56de25c

•

1 Parent(s): 3acf3f8

Upload folder using huggingface_hub

Browse files

Files changed (11) hide show

.env.template +3 -0
.github/workflows/update_space.yml +28 -0
.gitignore +162 -0
Dockerfile +8 -0
LICENSE +24 -0
README.md +4 -8
app.py +165 -0
docker-compose.yml +12 -0
environments.py +11 -0
requirements.txt +9 -0
train.py +58 -0

.env.template ADDED Viewed

	@@ -0,0 +1,3 @@

+OPENAI_API_KEY=
+PINECONE_API_KEY=
+PINECONE_INDEX=preface-demo

.github/workflows/update_space.yml ADDED Viewed

	@@ -0,0 +1,28 @@

+name: Run Python script
+on:
+  push:
+    branches:
+      - master
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v2
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: '3.9'
+    - name: Install Gradio
+      run: python -m pip install gradio
+    - name: Log in to Hugging Face
+      run: python -c 'import huggingface_hub; huggingface_hub.login(token="${{ secrets.hf_token }}")'
+    - name: Deploy to Spaces
+      run: gradio deploy

.gitignore ADDED Viewed

	@@ -0,0 +1,162 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+.idea/
+.env
+train-assets/

Dockerfile ADDED Viewed

	@@ -0,0 +1,8 @@

+FROM python:3.8
+WORKDIR /app/
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .

LICENSE ADDED Viewed

	@@ -0,0 +1,24 @@

+BSD 2-Clause License
+Copyright (c) 2023, Inspect Element
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

README.md CHANGED Viewed

@@ -1,12 +1,8 @@
 ---
-title: Gen Ai Demo 5
-emoji: ⚡
-colorFrom: green
-colorTo: yellow
-sdk: gradio
-sdk_version: 3.48.0
 app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: gen-ai-demo-5
 app_file: app.py
+sdk: gradio
+sdk_version: 3.46.0
 ---
+# gen-ai-demo-health-center

app.py ADDED Viewed

	@@ -0,0 +1,165 @@

+import re
+from typing import List
+import gradio as gr
+import openai
+import pinecone
+from llama_index import VectorStoreIndex, StorageContext, ServiceContext
+from llama_index.chat_engine.types import ChatMode
+from llama_index.llms import ChatMessage, MessageRole, OpenAI
+from llama_index.vector_stores import PineconeVectorStore
+from environments import OPENAI_API_KEY, PINECONE_API_KEY, PINECONE_INDEX, PASSWORD, LOCAL
+if LOCAL:
+    import llama_index
+    import phoenix as px
+    px.launch_app()
+    llama_index.set_global_handler("arize_phoenix")
+openai.api_key = OPENAI_API_KEY
+pinecone.init(
+    api_key=PINECONE_API_KEY,
+    environment='gcp-starter'
+)
+pinecone_index = pinecone.Index(PINECONE_INDEX)
+llm = OpenAI(temperature=0.1, model="gpt-3.5-turbo-instruct")
+service_context = ServiceContext.from_defaults(llm=llm)
+DENIED_ANSWER_PROMPT = '我是設計用於回答關於延智會所的服務內容'
+SYSTEM_PROMPT = (
+    f'Context:'
+    "\n--------------------\n"
+    "{context_str}"
+    "\n--------------------\n"
+    "\n"
+    "Instruction:"
+    f'\n- 你必須基於上面提供的資訊 (context) 進行總結，回答用戶的提問。'
+    f'\n- 你必須嚴格判斷 context 內容是否完全符合用戶的問題。如不確定，你必須回答「{DENIED_ANSWER_PROMPT}」為完整回覆，不附加任何資訊或建議。'
+    f'\n- 你不能自行生成非 context 的內容，必須基於 context 原文進行回答。'
+    f'\n- 如沒有與問題符合的 context，必須以「{DENIED_ANSWER_PROMPT}」為完整回答，不附加任何資訊或建議。'
+    f'\n- 你不能進行算術，翻譯，程式碼生成，文章生成等要求。如你被要求進行算術，翻譯，程式碼生成，文章生成等要求，你必須回答「{DENIED_ANSWER_PROMPT}」為完整回覆，不附加任何資訊或建議。'
+    f'\n- 你不能提供或生成 context 不存在的內容，例如名稱，服務，地點，介紹，健康資訊，醫學建議或者醫療相關的解答。如被要求，你必須回答「{DENIED_ANSWER_PROMPT}」為完整回覆，不附加任何資訊或建議。'
+    f'\n- 如果當前的問題沒有任何符合的 context 可供作答，必須以「{DENIED_ANSWER_PROMPT}」為完整回覆，不附加任何資訊或建議。'
+    # f'\n- 提供網址時，盡量以列點顯示。'
+)
+vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
+storage_context = StorageContext.from_defaults(vector_store=vector_store)
+index = VectorStoreIndex.from_documents([], storage_context=storage_context, service_context=service_context)
+chat_engine = index.as_chat_engine(chat_mode=ChatMode.CONTEXT,
+                                   similarity_top_k=3,
+                                   context_template=SYSTEM_PROMPT,
+                                   )
+CHAT_EXAMPLES = [
+    '你可以自我介紹嗎？',
+    '可以介紹一下中心嗎？',
+    '中心的開放時間是？',
+    '會員如何申請？',
+]
+def convert_to_chat_messages(history: List[List[str]]) -> List[ChatMessage]:
+    chat_messages = []
+    for conversation in history[-1:]:
+        for index, message in enumerate(conversation):
+            if not message:
+                continue
+            message = re.sub(r'\n&nbsp;\n\n---\n\n參考: \n.*$', '', message, flags=re.DOTALL)
+            role = MessageRole.USER if index % 2 == 0 else MessageRole.ASSISTANT
+            chat_message = ChatMessage(role=role, content=message.strip())
+            chat_messages.append(chat_message)
+    return chat_messages
+def predict(message, history):
+    response = chat_engine.stream_chat(message, chat_history=convert_to_chat_messages(history))
+    partial_message = ""
+    for token in response.response_gen:
+        partial_message = partial_message + token
+        yield partial_message
+    urls = []
+    for source in response.source_nodes:
+        if source.score < 0.78:
+            continue
+        url = source.node.metadata.get('source')
+        if url:
+            urls.append(url)
+    if urls:
+        partial_message = partial_message + "\n&nbsp;\n\n---\n\n參考: \n"
+        for url in list(set(urls)):
+            partial_message = partial_message + f"- {url}\n"
+        yield partial_message
+def predict_without_history(message, history):
+    yield from predict(message, [])
+def predict_with_rag(message, history):
+    return predict(message, history)
+# For 'With Prompt Wrapper' - Add system prompt, no Pinecone
+def predict_with_prompt_wrapper(message, history):
+    yield from _invoke_chatgpt(history, message, is_include_system_prompt=True)
+# For 'Vanilla ChatGPT' - No system prompt
+def predict_vanilla_chatgpt(message, history):
+    yield from _invoke_chatgpt(history, message)
+def _invoke_chatgpt(history, message, is_include_system_prompt=False):
+    history_openai_format = []
+    if is_include_system_prompt:
+        history_openai_format.append({"role": "system", "content": SYSTEM_PROMPT})
+    for human, assistant in history:
+        history_openai_format.append({"role": "user", "content": human})
+        history_openai_format.append({"role": "assistant", "content": assistant})
+    history_openai_format.append({"role": "user", "content": message})
+    response = openai.ChatCompletion.create(
+        model='gpt-3.5-turbo-instruct',
+        messages=history_openai_format,
+        temperature=0.0,
+        stream=True
+    )
+    partial_message = ""
+    for chunk in response:
+        if len(chunk['choices'][0]['delta']) != 0:
+            partial_message = partial_message + chunk['choices'][0]['delta']['content']
+            yield partial_message
+def vote(data: gr.LikeData):
+    if data.liked:
+        gr.Info("You up-voted this response: " + data.value)
+    else:
+        gr.Info("You down-voted this response: " + data.value)
+chatbot = gr.Chatbot()
+with gr.Blocks() as demo:
+    gr.Markdown("# 延智會所智能助理")
+    gr.ChatInterface(predict,
+                     chatbot=chatbot,
+                     examples=CHAT_EXAMPLES,
+                     )
+    chatbot.like(vote, None, None)
+if LOCAL:
+    demo.queue()
+    demo.launch(share=False)
+else:
+    demo.launch(share=False, auth=("demo", PASSWORD))

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,12 @@

+version: "3"
+services:
+  backend:
+    build: .
+    container_name: server
+    command: gradio app.py
+    ports:
+      - "7860:7860"
+      - "7861:7861"
+    volumes:
+      - ./:/app/

environments.py ADDED Viewed

	@@ -0,0 +1,11 @@

+import os
+from dotenv import load_dotenv
+load_dotenv()
+OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
+PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
+PINECONE_INDEX = os.getenv('PINECONE_INDEX')
+PASSWORD = os.getenv('PASSWORD')
+LOCAL = os.getenv('LOCAL')

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+arize-phoenix[experimental]==0.0.43
+gradio==3.46.0
+openai==0.27.9
+pinecone-client==2.2.2
+python-dotenv==1.0.0
+llama_index==0.8.38
+llama_hub==0.0.34
+nltk==3.8.1
+transformers==4.32.0

train.py ADDED Viewed

	@@ -0,0 +1,58 @@

+from pathlib import Path
+import openai
+import pinecone
+from llama_index import StorageContext, VectorStoreIndex, download_loader
+from llama_index.vector_stores import PineconeVectorStore
+from environments import PINECONE_API_KEY, PINECONE_INDEX, OPENAI_API_KEY
+openai.api_key = OPENAI_API_KEY
+print('Start Loading Data ...')
+PagedCSVReader = download_loader("PagedCSVReader")
+loader = PagedCSVReader(encoding="utf-8")
+documents = loader.load_data(file=Path('train-assets/training-target-simple.csv'))
+# PDFReader = download_loader("PDFReader")
+# loader = PDFReader()
+# documents = loader.load_data(file=Path('./train-assets/training-target.pdf'))
+UnstructuredURLLoader = download_loader("UnstructuredURLLoader")
+# urls = [
+#     'https://mosdecc.elchk.org.hk/index.php',
+#     'https://mosdecc.elchk.org.hk/news_details.php?pkey=3762',
+#     'https://mosdecc.elchk.org.hk/center_aboutus.php',
+#     'https://mosdecc.elchk.org.hk/activity_details.php?pkey=1559&pg=1&news_cat=&news_year=&news_month=',
+#     'https://mosdecc.elchk.org.hk/activity_details.php?pkey=1291&pg=1&news_cat=&news_year=&news_month=',
+#     'https://mosdecc.elchk.org.hk/activity_details.php?pkey=1132&pg=1&news_cat=&news_year=&news_month=',
+#     'https://mosdecc.elchk.org.hk/activity_details.php?pkey=1068&pg=2&news_cat=&news_year=all&news_month=all',
+#     'https://mosdecc.elchk.org.hk/activity_details.php?pkey=1008&pg=2&news_cat=&news_year=all&news_month=all',
+#     'https://mosdecc.elchk.org.hk/course.php',
+#     'https://mosdecc.elchk.org.hk/service_member.php',
+#     'https://mosdecc.elchk.org.hk/contactus.php',
+# ]
+urls = [
+    'https://service.elchk.org.hk/unit_service3.php?center=77#intro',
+    'https://service.elchk.org.hk/hot_project.php?pkey=102&tab=1&sub_tab=2',
+    'https://www.etnet.com.hk/www/tc/soin/seg_detail.php?id=561',
+]
+# loader = UnstructuredURLLoader(urls=urls, continue_on_failure=True, headers={
+#     "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+#                   "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537",
+# })
+# documents = loader.load()
+pinecone.init(
+    api_key=PINECONE_API_KEY,
+    environment='gcp-starter'
+)
+pinecone_index = pinecone.Index(PINECONE_INDEX)
+vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
+storage_context = StorageContext.from_defaults(vector_store=vector_store)
+index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
+print('Done!')