Spaces:
Sleeping
Sleeping
Commit
·
d4aa01a
1
Parent(s):
14052a3
fix: remove zerogpu-offload before prediction to avoid OOM on disk
Browse files- prediction.py +4 -0
prediction.py
CHANGED
|
@@ -22,7 +22,10 @@ from transformers import pipeline as hf_pipeline
|
|
| 22 |
import litellm
|
| 23 |
|
| 24 |
from tqdm import tqdm
|
|
|
|
| 25 |
|
|
|
|
|
|
|
| 26 |
|
| 27 |
pipeline = hf_pipeline(
|
| 28 |
"text-generation",
|
|
@@ -120,6 +123,7 @@ class ModelPrediction:
|
|
| 120 |
# https://huggingface.co/docs/inference-endpoints/en/pricing?utm_source=chatgpt.com
|
| 121 |
cost_per_second=0.001
|
| 122 |
response = outputs[0]["generated_text"][-1]['content']
|
|
|
|
| 123 |
return {
|
| 124 |
"response": response,
|
| 125 |
"cost": elapsed_time * cost_per_second
|
|
|
|
| 22 |
import litellm
|
| 23 |
|
| 24 |
from tqdm import tqdm
|
| 25 |
+
import subprocess
|
| 26 |
|
| 27 |
+
# https://huggingface.co/spaces/zero-gpu-explorers/README/discussions/132
|
| 28 |
+
subprocess.run("rm -rf /data-nvme/zerogpu-offload/*", env={}, shell=True)
|
| 29 |
|
| 30 |
pipeline = hf_pipeline(
|
| 31 |
"text-generation",
|
|
|
|
| 123 |
# https://huggingface.co/docs/inference-endpoints/en/pricing?utm_source=chatgpt.com
|
| 124 |
cost_per_second=0.001
|
| 125 |
response = outputs[0]["generated_text"][-1]['content']
|
| 126 |
+
print(response)
|
| 127 |
return {
|
| 128 |
"response": response,
|
| 129 |
"cost": elapsed_time * cost_per_second
|