NERDDISCO matthoffner commited on
Commit
9eae9de
0 Parent(s):

Duplicate from matthoffner/wizardcoder-ggml

Browse files

Co-authored-by: Matt Hoffner <matthoffner@users.noreply.huggingface.co>

Files changed (5) hide show
  1. .gitattributes +34 -0
  2. Dockerfile +23 -0
  3. README.md +21 -0
  4. main.py +128 -0
  5. requirements.txt +12 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:latest
2
+
3
+ ENV PYTHONUNBUFFERED 1
4
+
5
+ EXPOSE 8000
6
+
7
+ RUN useradd -m -u 1000 user
8
+ USER user
9
+ ENV HOME=/home/user \
10
+ PATH=/home/user/.local/bin:$PATH
11
+
12
+ WORKDIR $HOME/app
13
+
14
+ COPY requirements.txt ./
15
+ RUN pip install --upgrade pip && \
16
+ pip install -r requirements.txt
17
+
18
+
19
+ COPY --chown=user . $HOME/app
20
+
21
+ RUN ls -al
22
+
23
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
README.md ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: wizardcoder
3
+ emoji: 🪄⚡️
4
+ sdk: docker
5
+ app_port: 8000
6
+ duplicated_from: matthoffner/wizardcoder-ggml
7
+ ---
8
+
9
+ # wizardCoder-ggml
10
+
11
+ ## <a href="https://matthoffner-wizardcoder-ggml.hf.space/docs" target="_blank">FastAPI Docs</a>
12
+
13
+ ## <a href="https://github.com/ggerganov/ggml" target="_blank">ggml</a>
14
+ ## <a href="https://github.com/marella/ctransformers" target="_blank">ctransformers</a>
15
+
16
+ ### Updates
17
+
18
+ * Refactored /v1/chat/completions to match OpenAI spec
19
+ * Added /v1/chat/completions
20
+ * [Start using ctransformers](https://github.com/marella/ctransformers)
21
+ * [Added starcoder example](https://github.com/ggerganov/ggml/tree/master/examples/starcoder)
main.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import markdown
3
+ from typing import Callable, List, Dict, Any, Generator
4
+ from functools import partial
5
+
6
+ import fastapi
7
+ import uvicorn
8
+ from fastapi import HTTPException, Depends, Request
9
+ from fastapi.responses import HTMLResponse
10
+ from fastapi.middleware.cors import CORSMiddleware
11
+ from sse_starlette.sse import EventSourceResponse
12
+ from anyio import create_memory_object_stream
13
+ from anyio.to_thread import run_sync
14
+ from ctransformers import AutoModelForCausalLM
15
+ from pydantic import BaseModel
16
+
17
+ llm = AutoModelForCausalLM.from_pretrained("TheBloke/WizardCoder-15B-1.0-GGML",
18
+ model_file="WizardCoder-15B-1.0.ggmlv3.q5_0.bin",
19
+ model_type="starcoder",
20
+ threads=8)
21
+ app = fastapi.FastAPI(title="🪄WizardCoder💫")
22
+ app.add_middleware(
23
+ CORSMiddleware,
24
+ allow_origins=["*"],
25
+ allow_credentials=True,
26
+ allow_methods=["*"],
27
+ allow_headers=["*"],
28
+ )
29
+
30
+ @app.get("/")
31
+ async def index():
32
+ html_content = """
33
+ <html>
34
+ <head>
35
+ </head>
36
+ <body style="font-family:system-ui">
37
+ <h2><a href="https://huggingface.co/TheBloke/WizardCoder-15B-1.0-GGML">wizardcoder-ggml</a></h2>
38
+ <h2><a href="https://matthoffner-wizardcoder-ggml.hf.space/docs">FastAPI Docs</a></h2>
39
+ <h2><a href="https://wizardcoder-sandbox.netlify.app">Wizardcoder Sandbox</a></h2>
40
+ <h2><a href="https://matthoffner-monacopilot.hf.space">monacopilot</a></h2>
41
+ </body>
42
+ </html>
43
+ """
44
+ return HTMLResponse(content=html_content, status_code=200)
45
+
46
+ class ChatCompletionRequestV0(BaseModel):
47
+ prompt: str
48
+
49
+ class Message(BaseModel):
50
+ role: str
51
+ content: str
52
+
53
+ class ChatCompletionRequest(BaseModel):
54
+ messages: List[Message]
55
+ max_tokens: int = 250
56
+
57
+ @app.post("/v1/completions")
58
+ async def completion(request: ChatCompletionRequestV0, response_mode=None):
59
+ response = llm(request.prompt)
60
+ return response
61
+
62
+ async def generate_response(chat_chunks, llm):
63
+ for chat_chunk in chat_chunks:
64
+ response = {
65
+ 'choices': [
66
+ {
67
+ 'message': {
68
+ 'role': 'system',
69
+ 'content': llm.detokenize(chat_chunk)
70
+ },
71
+ 'finish_reason': 'stop' if llm.is_eos_token(chat_chunk) else 'unknown'
72
+ }
73
+ ]
74
+ }
75
+ yield dict(data=json.dumps(response))
76
+ yield dict(data="[DONE]")
77
+
78
+ @app.post("/v1/chat/completions")
79
+ async def chat(request: ChatCompletionRequest):
80
+ combined_messages = ' '.join([message.content for message in request.messages])
81
+ tokens = llm.tokenize(combined_messages)
82
+
83
+ try:
84
+ chat_chunks = llm.generate(tokens)
85
+ except Exception as e:
86
+ raise HTTPException(status_code=500, detail=str(e))
87
+
88
+ return EventSourceResponse(generate_response(chat_chunks, llm))
89
+
90
+ async def stream_response(tokens, llm):
91
+ try:
92
+ iterator: Generator = llm.generate(tokens)
93
+ for chat_chunk in iterator:
94
+ response = {
95
+ 'choices': [
96
+ {
97
+ 'message': {
98
+ 'role': 'system',
99
+ 'content': llm.detokenize(chat_chunk)
100
+ },
101
+ 'finish_reason': 'stop' if llm.is_eos_token(chat_chunk) else 'unknown'
102
+ }
103
+ ]
104
+ }
105
+ yield dict(data=json.dumps(response))
106
+ yield dict(data="[DONE]")
107
+ except Exception as e:
108
+ print(f"Exception in event publisher: {str(e)}")
109
+
110
+ @app.post("/v2/chat/completions")
111
+ async def chatV2_endpoint(request: Request, body: ChatCompletionRequest):
112
+ combined_messages = ' '.join([message.content for message in body.messages])
113
+ tokens = llm.tokenize(combined_messages)
114
+
115
+ return EventSourceResponse(stream_response(tokens, llm))
116
+
117
+ @app.post("/v0/chat/completions")
118
+ async def chat(request: ChatCompletionRequestV0, response_mode=None):
119
+ tokens = llm.tokenize(request.prompt)
120
+ async def server_sent_events(chat_chunks, llm):
121
+ for chat_chunk in llm.generate(chat_chunks):
122
+ yield dict(data=json.dumps(llm.detokenize(chat_chunk)))
123
+ yield dict(data="[DONE]")
124
+
125
+ return EventSourceResponse(server_sent_events(tokens, llm))
126
+
127
+ if __name__ == "__main__":
128
+ uvicorn.run(app, host="0.0.0.0", port=8000)
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ uvicorn
2
+ markdown
3
+ fastapi
4
+ loguru
5
+ torch
6
+ numpy
7
+ transformers
8
+ ctransformers
9
+ accelerate
10
+ langchain
11
+ sse_starlette
12
+ anyio