Update app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,8 @@ import torch
|
|
5 |
import optimum
|
6 |
from transformers import (AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM, LlamaTokenizer, GenerationConfig, pipeline,)
|
7 |
|
|
|
|
|
8 |
if torch.cuda.is_available():
|
9 |
print("CUDA is available. GPU will be used.")
|
10 |
else:
|
@@ -128,15 +130,5 @@ def generate_response(prompt: str) -> str:
|
|
128 |
return generated_text
|
129 |
|
130 |
|
131 |
-
app = FastAPI()
|
132 |
-
|
133 |
-
|
134 |
-
# Run the FastAPI app
|
135 |
-
async def run_app():
|
136 |
-
await uvicorn.run(app, host="0.0.0.0", port=8000)
|
137 |
-
|
138 |
|
139 |
-
if __name__ == '__main__':
|
140 |
-
import asyncio
|
141 |
|
142 |
-
asyncio.run(run_app())
|
|
|
5 |
import optimum
|
6 |
from transformers import (AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM, LlamaTokenizer, GenerationConfig, pipeline,)
|
7 |
|
8 |
+
app = FastAPI()
|
9 |
+
|
10 |
if torch.cuda.is_available():
|
11 |
print("CUDA is available. GPU will be used.")
|
12 |
else:
|
|
|
130 |
return generated_text
|
131 |
|
132 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
|
|
|
|
|
134 |
|
|