Nvidia-Embed-V1

Running on Zero

pls hepl

by Tonic - opened 28 days ago

Owner 28 days ago

Loading checkpoint shards: 100%|██████████| 4/4 [00:08<00:00, 2.16s/it]
Running on local URL: http://0.0.0.0:7860
/usr/local/lib/python3.10/site-packages/gradio/blocks.py:2389: UserWarning: Setting share=True is not supported on Hugging Face Spaces
warnings.warn(

To create a public link, set share=True in launch().
/home/user/.cache/huggingface/modules/transformers_modules/nvidia/NV-Embed-v1/19686aea068213164305a7b4c956a0cbf53e0182/modeling_nvembed.py:344: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
'input_ids': torch.tensor(batch_dict.get('input_ids').to(batch_dict.get('input_ids')).long()),
Traceback (most recent call last):
File "/usr/local/lib/python3.10/site-packages/spaces/zero/wrappers.py", line 216, in thread_wrapper
res = future.result()
File "/usr/local/lib/python3.10/concurrent/futures/_base.py", line 451, in result
return self.__get_result()
File "/usr/local/lib/python3.10/concurrent/futures/_base.py", line 403, in __get_result
raise self._exception
File "/usr/local/lib/python3.10/concurrent/futures/thread.py", line 58, in run
result = self.fn(*self.args, **self.kwargs)
File "/home/user/app/app.py", line 129, in compute_embeddings
query_embeddings = model.encode(queries, instruction=query_prefix, max_length=4096)
File "/usr/local/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/home/user/.cache/huggingface/modules/transformers_modules/nvidia/NV-Embed-v1/19686aea068213164305a7b4c956a0cbf53e0182/modeling_nvembed.py", line 417, in encode
return self(features)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/home/user/.cache/huggingface/modules/transformers_modules/nvidia/NV-Embed-v1/19686aea068213164305a7b4c956a0cbf53e0182/modeling_nvembed.py", line 391, in forward
outputs = self.embedding_model(
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/home/user/.cache/huggingface/modules/transformers_modules/nvidia/NV-Embed-v1/19686aea068213164305a7b4c956a0cbf53e0182/modeling_nvembed.py", line 143, in forward
layer_outputs = decoder_layer(
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/transformers/models/mistral/modeling_mistral.py", line 751, in forward
hidden_states = self.mlp(hidden_states)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/transformers/models/mistral/modeling_mistral.py", line 178, in forward
return self.down_proj(self.act_fn(self.gate_proj(x)) * self.up_proj(x))
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/linear.py", line 116, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: NVML_SUCCESS == r INTERNAL ASSERT FAILED at "../c10/cuda/CUDACachingAllocator.cpp":830, please report a bug to PyTorch.
Traceback (most recent call last):
File "/usr/local/lib/python3.10/site-packages/gradio/queueing.py", line 528, in process_events
response = await route_utils.call_process_api(
File "/usr/local/lib/python3.10/site-packages/gradio/route_utils.py", line 270, in call_process_api
output = await app.get_blocks().process_api(
File "/usr/local/lib/python3.10/site-packages/gradio/blocks.py", line 1908, in process_api
result = await self.call_function(
File "/usr/local/lib/python3.10/site-packages/gradio/blocks.py", line 1485, in call_function
prediction = await anyio.to_thread.run_sync(
File "/usr/local/lib/python3.10/site-packages/anyio/to_thread.py", line 56, in run_sync
return await get_async_backend().run_sync_in_worker_thread(
File "/usr/local/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 2177, in run_sync_in_worker_thread
return await future
File "/usr/local/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 859, in run
result = context.run(func, *args)
File "/usr/local/lib/python3.10/site-packages/gradio/utils.py", line 808, in wrapper
response = f(*args, **kwargs)
File "/home/user/app/app.py", line 144, in compute_similarity
embeddings1 = compute_embeddings(selected_task, sentence1)
File "/usr/local/lib/python3.10/site-packages/spaces/zero/wrappers.py", line 177, in gradio_handler
raise res.value
RuntimeError: NVML_SUCCESS == r INTERNAL ASSERT FAILED at "../c10/cuda/CUDACachingAllocator.cpp":830, please report a bug to PyTorch.

not-lain

28 days ago

@Tonic let's close this one since it got fixed in https://huggingface.co/spaces/Tonic/Nvidia-Embed-V1/discussions/2

Tonic

Owner 27 days ago

nope that didnt help + no change

not-lain

27 days ago

@Tonic it did, at the very least the application is starting now, now you have to fix the application logic, i have no clue what you have in the logs but it's 100% different from before.

not-lain

27 days ago

else you have to check what is happening in this line https://huggingface.co/spaces/Tonic/Nvidia-Embed-V1/blob/main/app.py#L136

what are the inputs
what are their types
add print somewhere for better debugging

Tonic

Owner 26 days ago

no it's really not that line, it's an install issue with torch and cuda versions, or prove me wrong, the decorator is not a problem and multiple spaces decorators are not the problem, if you check the code you can see i did revert it , and as you say : the application is startign now, which it also did before btw, same stack trace.

Tonic

Owner 26 days ago

/home/user/.cache/huggingface/modules/transformers_modules/nvidia/NV-Embed-v1/19686aea068213164305a7b4c956a0cbf53e0182/modeling_nvembed.py:344: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
'input_ids': torch.tensor(batch_dict.get('input_ids').to(batch_dict.get('input_ids')).long()),
Traceback (most recent call last):
File "/usr/local/lib/python3.10/site-packages/spaces/zero/wrappers.py", line 216, in thread_wrapper
res = future.result()
File "/usr/local/lib/python3.10/concurrent/futures/_base.py", line 451, in result
return self.__get_result()
File "/usr/local/lib/python3.10/concurrent/futures/_base.py", line 403, in __get_result
raise self._exception
File "/usr/local/lib/python3.10/concurrent/futures/thread.py", line 58, in run
result = self.fn(*self.args, **self.kwargs)
File "/home/user/app/app.py", line 147, in compute_similarity
except KeyError:
File "/home/user/app/app.py", line 131, in compute_embeddings
File "/usr/local/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/home/user/.cache/huggingface/modules/transformers_modules/nvidia/NV-Embed-v1/19686aea068213164305a7b4c956a0cbf53e0182/modeling_nvembed.py", line 417, in encode
return self(features)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/home/user/.cache/huggingface/modules/transformers_modules/nvidia/NV-Embed-v1/19686aea068213164305a7b4c956a0cbf53e0182/modeling_nvembed.py", line 391, in forward
outputs = self.embedding_model(
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/home/user/.cache/huggingface/modules/transformers_modules/nvidia/NV-Embed-v1/19686aea068213164305a7b4c956a0cbf53e0182/modeling_nvembed.py", line 143, in forward
layer_outputs = decoder_layer(
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/transformers/models/mistral/modeling_mistral.py", line 751, in forward
hidden_states = self.mlp(hidden_states)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/transformers/models/mistral/modeling_mistral.py", line 178, in forward
return self.down_proj(self.act_fn(self.gate_proj(x)) * self.up_proj(x))
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/linear.py", line 116, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: NVML_SUCCESS == r INTERNAL ASSERT FAILED at "../c10/cuda/CUDACachingAllocator.cpp":830, please report a bug to PyTorch.
Traceback (most recent call last):
File "/usr/local/lib/python3.10/site-packages/gradio/queueing.py", line 528, in process_events
response = await route_utils.call_process_api(
File "/usr/local/lib/python3.10/site-packages/gradio/route_utils.py", line 270, in call_process_api
output = await app.get_blocks().process_api(
File "/usr/local/lib/python3.10/site-packages/gradio/blocks.py", line 1908, in process_api
result = await self.call_function(
File "/usr/local/lib/python3.10/site-packages/gradio/blocks.py", line 1485, in call_function
prediction = await anyio.to_thread.run_sync(
File "/usr/local/lib/python3.10/site-packages/anyio/to_thread.py", line 56, in run_sync
return await get_async_backend().run_sync_in_worker_thread(
File "/usr/local/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 2177, in run_sync_in_worker_thread
return await future
File "/usr/local/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 859, in run
result = context.run(func, *args)
File "/usr/local/lib/python3.10/site-packages/gradio/utils.py", line 808, in wrapper
response = f(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/spaces/zero/wrappers.py", line 177, in gradio_handler
raise res.value
RuntimeError: NVML_SUCCESS == r INTERNAL ASSERT FAILED at "../c10/cuda/CUDACachingAllocator.cpp":830, please report a bug to PyTorch.

Upload images, audio, and videos by dragging in the text input, pasting, or clicking here.

Tap or paste here to upload images

· Sign up or log in to comment