pls hepl

#1
by Tonic - opened

Loading checkpoint shards: 100%|██████████| 4/4 [00:08<00:00, 2.16s/it]
Running on local URL: http://0.0.0.0:7860
/usr/local/lib/python3.10/site-packages/gradio/blocks.py:2389: UserWarning: Setting share=True is not supported on Hugging Face Spaces
warnings.warn(

To create a public link, set share=True in launch().
/home/user/.cache/huggingface/modules/transformers_modules/nvidia/NV-Embed-v1/19686aea068213164305a7b4c956a0cbf53e0182/modeling_nvembed.py:344: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
'input_ids': torch.tensor(batch_dict.get('input_ids').to(batch_dict.get('input_ids')).long()),
Traceback (most recent call last):
File "/usr/local/lib/python3.10/site-packages/spaces/zero/wrappers.py", line 216, in thread_wrapper
res = future.result()
File "/usr/local/lib/python3.10/concurrent/futures/_base.py", line 451, in result
return self.__get_result()
File "/usr/local/lib/python3.10/concurrent/futures/_base.py", line 403, in __get_result
raise self._exception
File "/usr/local/lib/python3.10/concurrent/futures/thread.py", line 58, in run
result = self.fn(*self.args, **self.kwargs)
File "/home/user/app/app.py", line 129, in compute_embeddings
query_embeddings = model.encode(queries, instruction=query_prefix, max_length=4096)
File "/usr/local/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/home/user/.cache/huggingface/modules/transformers_modules/nvidia/NV-Embed-v1/19686aea068213164305a7b4c956a0cbf53e0182/modeling_nvembed.py", line 417, in encode
return self(features)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/home/user/.cache/huggingface/modules/transformers_modules/nvidia/NV-Embed-v1/19686aea068213164305a7b4c956a0cbf53e0182/modeling_nvembed.py", line 391, in forward
outputs = self.embedding_model(
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/home/user/.cache/huggingface/modules/transformers_modules/nvidia/NV-Embed-v1/19686aea068213164305a7b4c956a0cbf53e0182/modeling_nvembed.py", line 143, in forward
layer_outputs = decoder_layer(
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/transformers/models/mistral/modeling_mistral.py", line 751, in forward
hidden_states = self.mlp(hidden_states)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/transformers/models/mistral/modeling_mistral.py", line 178, in forward
return self.down_proj(self.act_fn(self.gate_proj(x)) * self.up_proj(x))
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/linear.py", line 116, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: NVML_SUCCESS == r INTERNAL ASSERT FAILED at "../c10/cuda/CUDACachingAllocator.cpp":830, please report a bug to PyTorch.
Traceback (most recent call last):
File "/usr/local/lib/python3.10/site-packages/gradio/queueing.py", line 528, in process_events
response = await route_utils.call_process_api(
File "/usr/local/lib/python3.10/site-packages/gradio/route_utils.py", line 270, in call_process_api
output = await app.get_blocks().process_api(
File "/usr/local/lib/python3.10/site-packages/gradio/blocks.py", line 1908, in process_api
result = await self.call_function(
File "/usr/local/lib/python3.10/site-packages/gradio/blocks.py", line 1485, in call_function
prediction = await anyio.to_thread.run_sync(
File "/usr/local/lib/python3.10/site-packages/anyio/to_thread.py", line 56, in run_sync
return await get_async_backend().run_sync_in_worker_thread(
File "/usr/local/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 2177, in run_sync_in_worker_thread
return await future
File "/usr/local/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 859, in run
result = context.run(func, *args)
File "/usr/local/lib/python3.10/site-packages/gradio/utils.py", line 808, in wrapper
response = f(*args, **kwargs)
File "/home/user/app/app.py", line 144, in compute_similarity
embeddings1 = compute_embeddings(selected_task, sentence1)
File "/usr/local/lib/python3.10/site-packages/spaces/zero/wrappers.py", line 177, in gradio_handler
raise res.value
RuntimeError: NVML_SUCCESS == r INTERNAL ASSERT FAILED at "../c10/cuda/CUDACachingAllocator.cpp":830, please report a bug to PyTorch.

nope that didnt help + no change

@Tonic it did, at the very least the application is starting now, now you have to fix the application logic, i have no clue what you have in the logs but it's 100% different from before.

else you have to check what is happening in this line https://huggingface.co/spaces/Tonic/Nvidia-Embed-V1/blob/main/app.py#L136

  • what are the inputs
  • what are their types
    add print somewhere for better debugging

no it's really not that line, it's an install issue with torch and cuda versions, or prove me wrong, the decorator is not a problem and multiple spaces decorators are not the problem, if you check the code you can see i did revert it , and as you say : the application is startign now, which it also did before btw, same stack trace.

/home/user/.cache/huggingface/modules/transformers_modules/nvidia/NV-Embed-v1/19686aea068213164305a7b4c956a0cbf53e0182/modeling_nvembed.py:344: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
'input_ids': torch.tensor(batch_dict.get('input_ids').to(batch_dict.get('input_ids')).long()),
Traceback (most recent call last):
File "/usr/local/lib/python3.10/site-packages/spaces/zero/wrappers.py", line 216, in thread_wrapper
res = future.result()
File "/usr/local/lib/python3.10/concurrent/futures/_base.py", line 451, in result
return self.__get_result()
File "/usr/local/lib/python3.10/concurrent/futures/_base.py", line 403, in __get_result
raise self._exception
File "/usr/local/lib/python3.10/concurrent/futures/thread.py", line 58, in run
result = self.fn(*self.args, **self.kwargs)
File "/home/user/app/app.py", line 147, in compute_similarity
except KeyError:
File "/home/user/app/app.py", line 131, in compute_embeddings
File "/usr/local/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/home/user/.cache/huggingface/modules/transformers_modules/nvidia/NV-Embed-v1/19686aea068213164305a7b4c956a0cbf53e0182/modeling_nvembed.py", line 417, in encode
return self(features)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/home/user/.cache/huggingface/modules/transformers_modules/nvidia/NV-Embed-v1/19686aea068213164305a7b4c956a0cbf53e0182/modeling_nvembed.py", line 391, in forward
outputs = self.embedding_model(
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/home/user/.cache/huggingface/modules/transformers_modules/nvidia/NV-Embed-v1/19686aea068213164305a7b4c956a0cbf53e0182/modeling_nvembed.py", line 143, in forward
layer_outputs = decoder_layer(
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/transformers/models/mistral/modeling_mistral.py", line 751, in forward
hidden_states = self.mlp(hidden_states)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/transformers/models/mistral/modeling_mistral.py", line 178, in forward
return self.down_proj(self.act_fn(self.gate_proj(x)) * self.up_proj(x))
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/linear.py", line 116, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: NVML_SUCCESS == r INTERNAL ASSERT FAILED at "../c10/cuda/CUDACachingAllocator.cpp":830, please report a bug to PyTorch.
Traceback (most recent call last):
File "/usr/local/lib/python3.10/site-packages/gradio/queueing.py", line 528, in process_events
response = await route_utils.call_process_api(
File "/usr/local/lib/python3.10/site-packages/gradio/route_utils.py", line 270, in call_process_api
output = await app.get_blocks().process_api(
File "/usr/local/lib/python3.10/site-packages/gradio/blocks.py", line 1908, in process_api
result = await self.call_function(
File "/usr/local/lib/python3.10/site-packages/gradio/blocks.py", line 1485, in call_function
prediction = await anyio.to_thread.run_sync(
File "/usr/local/lib/python3.10/site-packages/anyio/to_thread.py", line 56, in run_sync
return await get_async_backend().run_sync_in_worker_thread(
File "/usr/local/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 2177, in run_sync_in_worker_thread
return await future
File "/usr/local/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 859, in run
result = context.run(func, *args)
File "/usr/local/lib/python3.10/site-packages/gradio/utils.py", line 808, in wrapper
response = f(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/spaces/zero/wrappers.py", line 177, in gradio_handler
raise res.value
RuntimeError: NVML_SUCCESS == r INTERNAL ASSERT FAILED at "../c10/cuda/CUDACachingAllocator.cpp":830, please report a bug to PyTorch.

Sign up or log in to comment