File size: 3,303 Bytes
4159dc2 8f10b0b 4159dc2 b45b188 415b5df 50f1720 415b5df 4159dc2 8f10b0b 360e8ae b45b188 50f1720 b45b188 50f1720 b45b188 50f1720 b45b188 50f1720 b45b188 50f1720 6cd48e0 50f1720 6cd48e0 66e6717 50f1720 4159dc2 50f1720 66e6717 50f1720 66e6717 4159dc2 a4575d8 50f1720 4ca165d 50f1720 7e1210e 803e214 a4575d8 2b25048 d9aa8d0 a4575d8 4745a50 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
import gradio as gr
from fastapi import FastAPI, Request
import uvicorn
from sentence_transformers import SentenceTransformer
from sentence_transformers.util import cos_sim
from sentence_transformers.quantization import quantize_embeddings
import spaces
from gradio_client import Client
import json
import os
app = FastAPI()
@app.post("/v1/embeddings")
async def openai_embeddings(request: Request):
body = await request.json();
token = request.headers.get("authorization");
apiName = body.get("ApiName");
print(body);
BearerToken = None;
if not token is None:
parts = token.split(' ');
BearerToken = parts[1];
print("Using token...");
SpacePath = body['model']
print("Creating client...");
SpaceClient = Client(SpacePath, hf_token = BearerToken)
if not apiName:
apiName = "/embed"
text = body['input'];
result = SpaceClient.predict(
text=text,
api_name=apiName
)
embeddings = json.loads(result);
return {
'object': "list"
,'data': [{
'object': "embeddings"
,'embedding': embeddings
,'index':0
}]
,'model': SpacePath
,'usage':{
'prompt_tokens': 0
,'total_tokens': 0
}
}
SpaceHost = os.environ.get("SPACE_HOST");
if not SpaceHost:
SpaceHost = "localhost"
with gr.Blocks() as demo:
gr.Markdown(f"""
This space allow you connect SQL Server 2025 with Hugging Face to generate embeddings!
First, create a ZeroGPU Space that export an endpoint called embed.
That endpoint must accept a parameter called text.
Then, create the external model using T-SQL:
```sql
CREATE EXTERNAL MODEL HuggingFace
WITH (
LOCATION = 'https://{SpaceHost}/v1/embeddings',
API_FORMAT = 'OpenAI',
MODEL_TYPE = EMBEDDINGS,
MODEL = 'user/space'
);
```
If you prefer, just type the space name into field bellow and we generate the right T-SQL command for you!
""")
SpaceName = gr.Textbox(label="Space", submit_btn=True)
EndpointName = gr.Textbox(value="/embed", label = "EndpointName");
tsqlCommand = gr.Textbox(lines=5);
def UpdateTsql(space):
return f"""
CREATE EXTERNAL MODEL HuggingFace
WITH (
LOCATION = 'https://{SpaceHost}/v1/embeddings',
API_FORMAT = 'OpenAI',
MODEL_TYPE = EMBEDDINGS,
MODEL = '{space}'
)
"""
SpaceName.submit(UpdateTsql, [SpaceName], [tsqlCommand])
## hack para funcionar com ZeroGPU nesse mesmo space
#print("Demo run...");
#(app2,url,other) = demo.launch(prevent_thread_lock=True, server_name=None, server_port=8000);
# demo.close
print("Mounting app...");
GradioApp = gr.mount_gradio_app(app, demo, path="", ssr_mode=False);
if __name__ == '__main__':
print("Running uviconr...");
uvicorn.run(GradioApp, host="0.0.0.0", port=7860)
|