ffreemt commited on
Commit
c69be0f
β€’
1 Parent(s): 35ef0f8
Files changed (9) hide show
  1. .gitignore +6 -0
  2. .ruff.toml +18 -0
  3. .stignore +102 -0
  4. README.md +7 -9
  5. README.md- +15 -0
  6. api.py β†’ api.py- +7 -7
  7. demo.py +37 -10
  8. requirements.txt +4 -8
  9. requirements.txt- +11 -0
.gitignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ install-mine.sh
2
+ install-sw.sh
3
+ install-sw1.sh
4
+ okteto-up.bat
5
+ okteto.yml
6
+ start-sshd.sh
.ruff.toml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Assume Python 3.10.
2
+ target-version = "py310"
3
+ # Decrease the maximum line length to 79 characters.
4
+ line-length = 1000
5
+
6
+ # pyflakes, pycodestyle, isort
7
+ # flake8 YTT, pydocstyle D, pylint PLC
8
+ select = ["F", "E", "W", "I001", "YTT", "D", "PLC"]
9
+ # select = ["ALL"]
10
+
11
+ # "D100" Missing docstring in public module
12
+ # D103 Missing docstring in public function
13
+ # D101 Missing docstring in public class
14
+ # `multi-line-summary-first-line` (D212)
15
+ # `one-blank-line-before-class` (D203)
16
+ extend-ignore = ["D100", "D103", "D101", "D212", "D203"]
17
+
18
+ exclude = [".venv"]
.stignore ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .ruuf_cache
2
+ .git
3
+ # Byte-compiled / optimized / DLL files
4
+ __pycache__
5
+ *.py[cod]
6
+ *$py.class
7
+
8
+ # C extensions
9
+ *.so
10
+
11
+ # Distribution / packaging
12
+ .Python
13
+ build
14
+ develop-eggs
15
+ dist
16
+ downloads
17
+ eggs
18
+ .eggs
19
+ lib
20
+ lib64
21
+ parts
22
+ sdist
23
+ var
24
+ wheels
25
+ pip-wheel-metadata
26
+ share/python-wheels
27
+ *.egg-info
28
+ .installed.cfg
29
+ *.egg
30
+ MANIFEST
31
+
32
+ # PyInstaller
33
+ # Usually these files are written by a python script from a template
34
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
35
+ *.manifest
36
+ *.spec
37
+
38
+ # Installer logs
39
+ pip-log.txt
40
+ pip-delete-this-directory.txt
41
+
42
+ # Translations
43
+ *.mo
44
+ *.pot
45
+
46
+ # Django stuff:
47
+ *.log
48
+ local_settings.py
49
+ db.sqlite3
50
+
51
+ # Flask stuff:
52
+ instance
53
+ .webassets-cache
54
+
55
+ # Scrapy stuff:
56
+ .scrapy
57
+
58
+ # Sphinx documentation
59
+ docs/_build
60
+
61
+ # PyBuilder
62
+ target
63
+
64
+ # Jupyter Notebook
65
+ .ipynb_checkpoints
66
+
67
+ # IPython
68
+ profile_default
69
+ ipython_config.py
70
+
71
+ # pyenv
72
+ .python-version
73
+
74
+ # celery beat schedule file
75
+ celerybeat-schedule
76
+
77
+ # SageMath parsed files
78
+ *.sage.py
79
+
80
+ # Environments
81
+ .env
82
+ .venv
83
+ env
84
+ venv
85
+ ENV
86
+ env.bak
87
+ venv.bak
88
+
89
+ # Spyder project settings
90
+ .spyderproject
91
+ .spyproject
92
+
93
+ # Rope project settings
94
+ .ropeproject
95
+
96
+ # mypy
97
+ .mypy_cache
98
+ .dmypy.json
99
+ dmypy.json
100
+
101
+ # Pyre type checker
102
+ .pyre
README.md CHANGED
@@ -1,15 +1,13 @@
1
  ---
2
  title: falcon-mini
3
- emoji: πŸ¦…πŸ’Έ
4
- colorFrom: red
5
  colorTo: blue
6
- sdk: docker
7
- pinned: false
8
- app_port: 7860
 
9
  duplicated_from: matthoffner/falcon-mini
10
  ---
11
 
12
- # falcon-7b-instruct-q3
13
-
14
- ## <a href="https://github.com/cmp-nct/ggllm.cpp" target="_blank">ggllm.cpp</a>
15
- ## ctransformers
 
1
  ---
2
  title: falcon-mini
3
+ emoji: πŸ¦…
4
+ colorFrom: yellow
5
  colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 3.36.1
8
+ app_file: demo.py
9
+ pinned: true
10
  duplicated_from: matthoffner/falcon-mini
11
  ---
12
 
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
README.md- ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: falcon-mini
3
+ emoji: πŸ¦…πŸ’Έ
4
+ colorFrom: red
5
+ colorTo: blue
6
+ sdk: docker
7
+ pinned: false
8
+ app_port: 7860
9
+ duplicated_from: matthoffner/falcon-mini
10
+ ---
11
+
12
+ # falcon-7b-instruct-q3
13
+
14
+ ## <a href="https://github.com/cmp-nct/ggllm.cpp" target="_blank">ggllm.cpp</a>
15
+ ## ctransformers
api.py β†’ api.py- RENAMED
@@ -1,14 +1,14 @@
1
- import fastapi
2
  import json
 
 
 
3
  import uvicorn
 
4
  from fastapi import HTTPException
5
- from fastapi.responses import HTMLResponse
6
  from fastapi.middleware.cors import CORSMiddleware
 
7
  from sse_starlette.sse import EventSourceResponse
8
  from starlette.responses import StreamingResponse
9
- from ctransformers import AutoModelForCausalLM
10
- from pydantic import BaseModel
11
- from typing import List, Dict, Any, Generator
12
 
13
  llm = AutoModelForCausalLM.from_pretrained("TheBloke/falcon-40b-instruct-GGML", model_file="falcon40b-instruct.ggmlv3.q2_K.bin",
14
  model_type="falcon", threads=8)
@@ -41,7 +41,7 @@ async def completion(request: ChatCompletionRequestV0, response_mode=None):
41
  async def chat(request: ChatCompletionRequest):
42
  combined_messages = ' '.join([message.content for message in request.messages])
43
  tokens = llm.tokenize(combined_messages)
44
-
45
  try:
46
  chat_chunks = llm.generate(tokens)
47
  except Exception as e:
@@ -76,4 +76,4 @@ async def chat(request: ChatCompletionRequestV0, response_mode=None):
76
  return EventSourceResponse(server_sent_events(tokens, llm))
77
 
78
  if __name__ == "__main__":
79
- uvicorn.run(app, host="0.0.0.0", port=8000)
 
 
1
  import json
2
+ from typing import Any, Generator, List
3
+
4
+ import fastapi
5
  import uvicorn
6
+ from ctransformers import AutoModelForCausalLM
7
  from fastapi import HTTPException
 
8
  from fastapi.middleware.cors import CORSMiddleware
9
+ from pydantic import BaseModel
10
  from sse_starlette.sse import EventSourceResponse
11
  from starlette.responses import StreamingResponse
 
 
 
12
 
13
  llm = AutoModelForCausalLM.from_pretrained("TheBloke/falcon-40b-instruct-GGML", model_file="falcon40b-instruct.ggmlv3.q2_K.bin",
14
  model_type="falcon", threads=8)
 
41
  async def chat(request: ChatCompletionRequest):
42
  combined_messages = ' '.join([message.content for message in request.messages])
43
  tokens = llm.tokenize(combined_messages)
44
+
45
  try:
46
  chat_chunks = llm.generate(tokens)
47
  except Exception as e:
 
76
  return EventSourceResponse(server_sent_events(tokens, llm))
77
 
78
  if __name__ == "__main__":
79
+ uvicorn.run(app, host="0.0.0.0", port=8000)
demo.py CHANGED
@@ -1,16 +1,43 @@
1
- import os
2
- from ctransformers import AutoModelForCausalLM
3
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
 
 
 
 
 
 
 
5
 
6
- llm = AutoModelForCausalLM.from_pretrained("TheBloke/falcon-7b-instruct-GGML", model_file="falcon7b-instruct.ggmlv3.q4_0.bin",
7
- model_type="falcon", threads=8)
 
 
 
 
 
 
8
 
9
 
10
- TITLE = """<h1 align="center">πŸ¦… falcon-mini (7b-q4_0) ggml demo πŸ¦…</h1>"""
11
  USER_NAME = "User"
12
  BOT_NAME = "Falcon"
13
- DEFAULT_INSTRUCTIONS = f"""The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Falcon, and a human user, called User. In the following interactions, User and Falcon will converse in natural language, and Falcon will answer User's questions. Falcon was built to be respectful, polite and inclusive. Falcon was built by the Technology Innovation Institute in Abu Dhabi. Falcon will never decline to answer a question, and always attempts to give an answer that User would be satisfied with. It knows a lot, and always tells the truth. The conversation begins.
14
  """
15
  RETRY_COMMAND = "/retry"
16
  STOP_STR = f"\n{USER_NAME}:"
@@ -147,7 +174,7 @@ def chat():
147
  run_chat,
148
  [inputs, chatbot, instructions, temperature, top_p],
149
  outputs=[chatbot],
150
- show_progress=False,
151
  )
152
  inputs.submit(lambda: "", inputs=None, outputs=inputs)
153
  delete_turn_button.click(delete_last_turn, inputs=[chatbot], outputs=[chatbot])
@@ -155,7 +182,7 @@ def chat():
155
  run_retry,
156
  [inputs, chatbot, instructions, temperature, top_p],
157
  outputs=[chatbot],
158
- show_progress=False,
159
  )
160
  clear_chat_button.click(clear_chat, [], chatbot)
161
 
@@ -176,7 +203,7 @@ def get_demo():
176
  with gr.Row():
177
  with gr.Column():
178
  gr.Markdown(
179
- """**Chat with [Falcon-7B-Instruct](https://huggingface.co/tiiuae/falcon-7b-instruct), brainstorm ideas, discuss your holiday plans, and more!**
180
 
181
  πŸ§ͺ This uses a quantized [ggml](https://github.com/ggerganov/ggml) optimized for CPU. Special thanks to [ggllm.cpp](https://github.com/cmp-nct/ggllm.cpp), [ctransformers](https://github.com/marella/ctransformers), and [TheBloke](https://huggingface.co/TheBloke).
182
  """
@@ -189,4 +216,4 @@ def get_demo():
189
  if __name__ == "__main__":
190
  demo = get_demo()
191
  demo.queue(max_size=128, concurrency_count=8)
192
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
1
  import gradio as gr
2
+ import psutil
3
+ from ctransformers import AutoModelForCausalLM
4
+ from huggingface_hub import hf_hub_download
5
+
6
+ _ = """
7
+ llm = AutoModelForCausalLM.from_pretrained(
8
+ "TheBloke/falcon-7b-instruct-GGML",
9
+ model_file="falcon7b-instruct.ggmlv3.q4_0.bin",
10
+ model_type="falcon",
11
+ threads=psutil.cpu_count(logical=False))
12
+ """
13
+
14
+ URL = "https://huggingface.co/TheBloke/falcon-7b-instruct-GGML/blob/ggmlv3/falcon7b-instruct.ggmlv3.q4_0.bin"
15
+ repo_id = "/".join(urlparse(URL).path.strip("/").split("/")[:2])
16
+
17
+ model_file = Path(URL).name
18
 
19
+ _ = hf_hub_download(
20
+ repo_id=repo_id,
21
+ revision="ggmlv3",
22
+ filename=model_file,
23
+ local_dir="models",
24
+ # local_dir_use_symlinks=True,
25
+ )
26
 
27
+ llm = AutoModelForCausalLM.from_pretrained(
28
+ # repo_id, # "TheBloke/falcon-7b-instruct-GGML",
29
+ # model_file=model_file,
30
+ # model_file=_,
31
+ _,
32
+ model_type="falcon",
33
+ threads=psutil.cpu_count(logical=False),
34
+ )
35
 
36
 
37
+ TITLE = f"""<h1 align="center">πŸ¦… falcon-mini ({model_file}) ggml demo πŸ¦…</h1>"""
38
  USER_NAME = "User"
39
  BOT_NAME = "Falcon"
40
+ DEFAULT_INSTRUCTIONS = """The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Falcon, and a human user, called User. In the following interactions, User and Falcon will converse in natural language, and Falcon will answer User's questions. Falcon was built to be respectful, polite and inclusive. Falcon was built by the Technology Innovation Institute in Abu Dhabi. Falcon will never decline to answer a question, and always attempts to give an answer that User would be satisfied with. It knows a lot, and always tells the truth. The conversation begins.
41
  """
42
  RETRY_COMMAND = "/retry"
43
  STOP_STR = f"\n{USER_NAME}:"
 
174
  run_chat,
175
  [inputs, chatbot, instructions, temperature, top_p],
176
  outputs=[chatbot],
177
+ show_progress="minimal",
178
  )
179
  inputs.submit(lambda: "", inputs=None, outputs=inputs)
180
  delete_turn_button.click(delete_last_turn, inputs=[chatbot], outputs=[chatbot])
 
182
  run_retry,
183
  [inputs, chatbot, instructions, temperature, top_p],
184
  outputs=[chatbot],
185
+ show_progress="minimal",
186
  )
187
  clear_chat_button.click(clear_chat, [], chatbot)
188
 
 
203
  with gr.Row():
204
  with gr.Column():
205
  gr.Markdown(
206
+ """**Chat with [Falcon-7B-Instruct](https://huggingface.co/tiiuae/falcon-7b-instruct), brainstorm ideas, discuss your holiday plans, and more!**
207
 
208
  πŸ§ͺ This uses a quantized [ggml](https://github.com/ggerganov/ggml) optimized for CPU. Special thanks to [ggllm.cpp](https://github.com/cmp-nct/ggllm.cpp), [ctransformers](https://github.com/marella/ctransformers), and [TheBloke](https://huggingface.co/TheBloke).
209
  """
 
216
  if __name__ == "__main__":
217
  demo = get_demo()
218
  demo.queue(max_size=128, concurrency_count=8)
219
+ demo.launch(server_name="0.0.0.0", server_port=7860)
requirements.txt CHANGED
@@ -1,11 +1,7 @@
1
- uvicorn
2
  markdown
3
- fastapi
4
  loguru
5
- torch
6
- numpy
7
- transformers
8
- accelerate
9
  langchain
10
- sse_starlette
11
- gradio
 
 
 
1
  markdown
 
2
  loguru
3
+ ctransformers
 
 
 
4
  langchain
5
+ # gradio
6
+ psutil
7
+ huggingface-hub
requirements.txt- ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ uvicorn
2
+ markdown
3
+ fastapi
4
+ loguru
5
+ torch
6
+ numpy
7
+ transformers
8
+ accelerate
9
+ langchain
10
+ sse_starlette
11
+ gradio