Spaces:

hysts
/

zephyr-7b

Running on Zero

hysts HF Staff commited on Oct 22, 2023

Commit

c49fb1b

1 Parent(s): e375076

Add files

Browse files

Files changed (7) hide show

.pre-commit-config.yaml +55 -0
.vscode/settings.json +21 -0
LICENSE +21 -0
README.md +1 -0
app.py +136 -0
requirements.txt +9 -0
style.css +16 -0

.pre-commit-config.yaml ADDED Viewed

	@@ -0,0 +1,55 @@

+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.5.0
+    hooks:
+      - id: check-executables-have-shebangs
+      - id: check-json
+      - id: check-merge-conflict
+      - id: check-shebang-scripts-are-executable
+      - id: check-toml
+      - id: check-yaml
+      - id: end-of-file-fixer
+      - id: mixed-line-ending
+        args: ["--fix=lf"]
+      - id: requirements-txt-fixer
+      - id: trailing-whitespace
+  - repo: https://github.com/myint/docformatter
+    rev: v1.7.5
+    hooks:
+      - id: docformatter
+        args: ["--in-place"]
+  - repo: https://github.com/pycqa/isort
+    rev: 5.12.0
+    hooks:
+      - id: isort
+        args: ["--profile", "black"]
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.6.1
+    hooks:
+      - id: mypy
+        args: ["--ignore-missing-imports"]
+        additional_dependencies:
+          ["types-python-slugify", "types-requests", "types-PyYAML"]
+  - repo: https://github.com/psf/black
+    rev: 23.10.0
+    hooks:
+      - id: black
+        language_version: python3.10
+        args: ["--line-length", "119"]
+  - repo: https://github.com/kynan/nbstripout
+    rev: 0.6.1
+    hooks:
+      - id: nbstripout
+        args:
+          [
+            "--extra-keys",
+            "metadata.interpreter metadata.kernelspec cell.metadata.pycharm",
+          ]
+  - repo: https://github.com/nbQA-dev/nbQA
+    rev: 1.7.0
+    hooks:
+      - id: nbqa-black
+      - id: nbqa-pyupgrade
+        args: ["--py37-plus"]
+      - id: nbqa-isort
+        args: ["--float-to-top"]

.vscode/settings.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+    "[python]": {
+        "editor.defaultFormatter": "ms-python.black-formatter",
+        "editor.formatOnType": true,
+        "editor.codeActionsOnSave": {
+            "source.organizeImports": true
+        }
+    },
+    "black-formatter.args": [
+        "--line-length=119"
+    ],
+    "isort.args": ["--profile", "black"],
+    "flake8.args": [
+        "--max-line-length=119"
+    ],
+    "ruff.args": [
+        "--line-length=119"
+    ],
+    "editor.formatOnSave": true,
+    "files.insertFinalNewline": true
+}

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2023 hysts
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -8,6 +8,7 @@ sdk_version: 3.50.2
 app_file: app.py
 pinned: false
 license: mit
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 app_file: app.py
 pinned: false
 license: mit
+suggested-hardware: t4-small
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,136 @@

+#!/usr/bin/env python
+import os
+from threading import Thread
+from typing import Iterator
+import gradio as gr
+import spaces
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
+DESCRIPTION = "# Zephyr-7B-alpha"
+if not torch.cuda.is_available():
+    DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
+MAX_MAX_NEW_TOKENS = 2048
+DEFAULT_MAX_NEW_TOKENS = 1024
+MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
+if torch.cuda.is_available():
+    model_id = "HuggingFaceH4/zephyr-7b-alpha"
+    model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto")
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+@spaces.GPU
+def generate(
+    message: str,
+    chat_history: list[tuple[str, str]],
+    system_prompt: str = "",
+    max_new_tokens: int = 1024,
+    temperature: float = 0.7,
+    top_p: float = 0.95,
+    top_k: int = 50,
+    repetition_penalty: float = 1.0,
+) -> Iterator[str]:
+    conversation = []
+    if system_prompt:
+        conversation.append({"role": "system", "content": system_prompt})
+    for user, assistant in chat_history:
+        conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
+    conversation.append({"role": "user", "content": message})
+    input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt", add_generation_prompt=True)
+    if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
+        input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
+        gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
+    input_ids = input_ids.to(model.device)
+    streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
+    generate_kwargs = dict(
+        {"input_ids": input_ids},
+        streamer=streamer,
+        max_new_tokens=max_new_tokens,
+        do_sample=True,
+        top_p=top_p,
+        top_k=top_k,
+        temperature=temperature,
+        num_beams=1,
+        repetition_penalty=repetition_penalty,
+    )
+    t = Thread(target=model.generate, kwargs=generate_kwargs)
+    t.start()
+    outputs = []
+    for text in streamer:
+        outputs.append(text)
+        yield "".join(outputs)
+chat_interface = gr.ChatInterface(
+    fn=generate,
+    additional_inputs=[
+        gr.Textbox(
+            label="System prompt",
+            lines=6,
+            placeholder="You are a friendly chatbot who always responds in the style of a pirate.",
+        ),
+        gr.Slider(
+            label="Max new tokens",
+            minimum=1,
+            maximum=MAX_MAX_NEW_TOKENS,
+            step=1,
+            value=DEFAULT_MAX_NEW_TOKENS,
+        ),
+        gr.Slider(
+            label="Temperature",
+            minimum=0.1,
+            maximum=4.0,
+            step=0.1,
+            value=0.7,
+        ),
+        gr.Slider(
+            label="Top-p (nucleus sampling)",
+            minimum=0.05,
+            maximum=1.0,
+            step=0.05,
+            value=0.95,
+        ),
+        gr.Slider(
+            label="Top-k",
+            minimum=1,
+            maximum=1000,
+            step=1,
+            value=50,
+        ),
+        gr.Slider(
+            label="Repetition penalty",
+            minimum=1.0,
+            maximum=2.0,
+            step=0.05,
+            value=1.0,
+        ),
+    ],
+    stop_btn=None,
+    examples=[
+        ["Hello there! How are you doing?"],
+        ["Can you explain briefly to me what is the Python programming language?"],
+        ["Explain the plot of Cinderella in a sentence."],
+        ["How many hours does it take a man to eat a Helicopter?"],
+        ["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
+    ],
+)
+with gr.Blocks(css="style.css") as demo:
+    gr.Markdown(DESCRIPTION)
+    gr.DuplicateButton(
+        value="Duplicate Space for private use",
+        elem_id="duplicate-button",
+        visible=os.getenv("SHOW_DUPLICATE_BUTTON") == "1",
+    )
+    chat_interface.render()
+if __name__ == "__main__":
+    demo.queue(max_size=20).launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+accelerate==0.23.0
+bitsandbytes==0.41.1
+gradio==3.50.2
+protobuf==3.20.3
+scipy==1.11.2
+sentencepiece==0.1.99
+spaces==0.16.3
+torch==2.0.0
+transformers==4.34.1

style.css ADDED Viewed

	@@ -0,0 +1,16 @@

+h1 {
+  text-align: center;
+}
+#duplicate-button {
+  margin: auto;
+  color: white;
+  background: #1565c0;
+  border-radius: 100vh;
+}
+.contain {
+  max-width: 900px;
+  margin: auto;
+  padding-top: 1.5rem;
+}