runtime error

e-packages/anyio/to_thread.py", line 56, in run_sync return await get_async_backend().run_sync_in_worker_thread( File "/usr/local/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 2144, in run_sync_in_worker_thread return await future File "/usr/local/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 851, in run result = context.run(func, *args) File "/usr/local/lib/python3.10/site-packages/gradio/utils.py", line 559, in run_sync_iterator_async return next(iterator) File "/home/user/app/app.py", line 35, in generate stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False) File "/usr/local/lib/python3.10/site-packages/huggingface_hub/inference/_client.py", line 1841, in text_generation raise_text_generation_error(e) File "/usr/local/lib/python3.10/site-packages/huggingface_hub/inference/_common.py", line 470, in raise_text_generation_error raise http_error File "/usr/local/lib/python3.10/site-packages/huggingface_hub/inference/_client.py", line 1817, in text_generation bytes_output = self.post(json=payload, model=model, task="text-generation", stream=stream) # type: ignore File "/usr/local/lib/python3.10/site-packages/huggingface_hub/inference/_client.py", line 267, in post hf_raise_for_status(response) File "/usr/local/lib/python3.10/site-packages/huggingface_hub/utils/_errors.py", line 367, in hf_raise_for_status raise HfHubHTTPError(message, response=response) from e huggingface_hub.utils._errors.HfHubHTTPError: (Request ID: ZxfqtIXhFxNzAEFfdaYYk) 403 Forbidden: None. Cannot access content at: https://api-inference.huggingface.co/models/BioMistral/BioMistral-7B. If you are trying to create or update content,make sure you have a token with the `write` role. The model BioMistral/BioMistral-7B is too large to be loaded automatically (14GB > 10GB). Please use Spaces (https://huggingface.co/spaces) or Inference Endpoints (https://huggingface.co/inference-endpoints).

Container logs:

Fetching error logs...