| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | from unittest.mock import MagicMock, patch |
| |
|
| | import numpy as np |
| | import pytest |
| |
|
| | from nemo.deploy.nlp.query_llm import NemoQueryLLM, NemoQueryLLMBase, NemoQueryLLMHF, NemoQueryLLMPyTorch |
| |
|
| |
|
| | class TestNemoQueryLLMBase: |
| | def test_base_initialization(self): |
| | url = "localhost:8000" |
| | model_name = "test-model" |
| | query = NemoQueryLLMBase(url=url, model_name=model_name) |
| | assert query.url == url |
| | assert query.model_name == model_name |
| |
|
| |
|
| | class TestNemoQueryLLMPyTorch: |
| | @pytest.fixture |
| | def query(self): |
| | return NemoQueryLLMPyTorch(url="localhost:8000", model_name="test-model") |
| |
|
| | def test_initialization(self, query): |
| | assert isinstance(query, NemoQueryLLMBase) |
| | assert query.url == "localhost:8000" |
| | assert query.model_name == "test-model" |
| |
|
| | @patch('nemo.deploy.nlp.query_llm.ModelClient') |
| | def test_query_llm_basic(self, mock_client, query): |
| | |
| | mock_instance = MagicMock() |
| | mock_client.return_value.__enter__.return_value = mock_instance |
| | mock_instance.infer_batch.return_value = {"sentences": np.array([b"test response"])} |
| | mock_instance.model_config.outputs = [MagicMock(dtype=np.bytes_)] |
| |
|
| | |
| | response = query.query_llm(prompts=["test prompt"], max_length=100, temperature=0.7, top_k=1, top_p=0.9) |
| |
|
| | assert isinstance(response, dict) |
| | assert "choices" in response |
| | assert response["choices"][0]["text"] == "test response" |
| |
|
| | @patch('nemo.deploy.nlp.query_llm.ModelClient') |
| | def test_query_llm_with_logprobs(self, mock_client, query): |
| | |
| | mock_instance = MagicMock() |
| | mock_client.return_value.__enter__.return_value = mock_instance |
| | mock_instance.infer_batch.return_value = { |
| | "sentences": np.array([b"test response"]), |
| | "log_probs": np.array([0.1, 0.2, 0.3]), |
| | } |
| | mock_instance.model_config.outputs = [MagicMock(dtype=np.bytes_)] |
| |
|
| | |
| | response = query.query_llm(prompts=["test prompt"], max_length=100, compute_logprob=True) |
| |
|
| | assert "logprobs" in response["choices"][0] |
| | assert "token_logprobs" in response["choices"][0]["logprobs"] |
| |
|
| |
|
| | class TestNemoQueryLLMHF: |
| | @pytest.fixture |
| | def query(self): |
| | return NemoQueryLLMHF(url="localhost:8000", model_name="test-model") |
| |
|
| | def test_initialization(self, query): |
| | assert isinstance(query, NemoQueryLLMBase) |
| | assert query.url == "localhost:8000" |
| | assert query.model_name == "test-model" |
| |
|
| | @patch('nemo.deploy.nlp.query_llm.ModelClient') |
| | def test_query_llm_basic(self, mock_client, query): |
| | |
| | mock_instance = MagicMock() |
| | mock_client.return_value.__enter__.return_value = mock_instance |
| | mock_instance.infer_batch.return_value = {"sentences": np.array([b"test response"])} |
| | mock_instance.model_config.outputs = [MagicMock(dtype=np.bytes_)] |
| |
|
| | |
| | response = query.query_llm(prompts=["test prompt"], max_length=100, temperature=0.7, top_k=1, top_p=0.9) |
| |
|
| | assert isinstance(response, dict) |
| | assert "choices" in response |
| | assert response["choices"][0]["text"] == "test response" |
| |
|
| | @patch('nemo.deploy.nlp.query_llm.ModelClient') |
| | def test_query_llm_with_logits(self, mock_client, query): |
| | |
| | mock_instance = MagicMock() |
| | mock_client.return_value.__enter__.return_value = mock_instance |
| | mock_instance.infer_batch.return_value = { |
| | "sentences": np.array([b"test response"]), |
| | "logits": np.array([[0.1, 0.2, 0.3]]), |
| | } |
| | mock_instance.model_config.outputs = [MagicMock(dtype=np.bytes_)] |
| |
|
| | |
| | response = query.query_llm(prompts=["test prompt"], max_length=100, output_logits=True) |
| |
|
| | assert "logits" in response |
| |
|
| |
|
| | class TestNemoQueryLLM: |
| | @pytest.fixture |
| | def query(self): |
| | return NemoQueryLLM(url="localhost:8000", model_name="test-model") |
| |
|
| | def test_initialization(self, query): |
| | assert isinstance(query, NemoQueryLLMBase) |
| | assert query.url == "localhost:8000" |
| | assert query.model_name == "test-model" |
| |
|
| | @patch('nemo.deploy.nlp.query_llm.ModelClient') |
| | def test_query_llm_basic(self, mock_client, query): |
| | |
| | mock_instance = MagicMock() |
| | mock_client.return_value.__enter__.return_value = mock_instance |
| | mock_instance.infer_batch.return_value = {"outputs": np.array([b"test response"])} |
| | mock_instance.model_config.outputs = [MagicMock(dtype=np.bytes_)] |
| |
|
| | |
| | response = query.query_llm(prompts=["test prompt"], max_output_len=100, temperature=0.7, top_k=1, top_p=0.9) |
| |
|
| | assert isinstance(response[0], str) |
| | assert response[0] == "test response" |
| |
|
| | @patch('nemo.deploy.nlp.query_llm.ModelClient') |
| | def test_query_llm_openai_format(self, mock_client, query): |
| | |
| | mock_instance = MagicMock() |
| | mock_client.return_value.__enter__.return_value = mock_instance |
| | mock_instance.infer_batch.return_value = {"outputs": np.array([b"test response"])} |
| | mock_instance.model_config.outputs = [MagicMock(dtype=np.bytes_)] |
| |
|
| | |
| | response = query.query_llm(prompts=["test prompt"], max_output_len=100, openai_format_response=True) |
| |
|
| | assert isinstance(response, dict) |
| | assert "choices" in response |
| | assert response["choices"][0]["text"] == "test response" |
| |
|
| | @patch('nemo.deploy.nlp.query_llm.DecoupledModelClient') |
| | def test_query_llm_streaming(self, mock_client, query): |
| | |
| | mock_instance = MagicMock() |
| | mock_client.return_value.__enter__.return_value = mock_instance |
| | mock_instance.infer_batch.return_value = [ |
| | {"outputs": np.array([b"test"])}, |
| | {"outputs": np.array([b" response"])}, |
| | ] |
| | mock_instance.model_config.outputs = [MagicMock(dtype=np.bytes_)] |
| |
|
| | |
| | responses = list(query.query_llm_streaming(prompts=["test prompt"], max_output_len=100)) |
| |
|
| | assert len(responses) == 2 |
| | assert responses[0] == "test" |
| | assert responses[1] == " response" |
| |
|
| | @patch('nemo.deploy.nlp.query_llm.ModelClient') |
| | def test_query_llm_with_stop_words(self, mock_client, query): |
| | |
| | mock_instance = MagicMock() |
| | mock_client.return_value.__enter__.return_value = mock_instance |
| | mock_instance.infer_batch.return_value = {"outputs": np.array([b"test response"])} |
| | mock_instance.model_config.outputs = [MagicMock(dtype=np.bytes_)] |
| |
|
| | |
| | response = query.query_llm(prompts=["test prompt"], max_output_len=100, stop_words_list=["stop"]) |
| |
|
| | assert isinstance(response[0], str) |
| | assert response[0] == "test response" |
| |
|
| | @patch('nemo.deploy.nlp.query_llm.ModelClient') |
| | def test_query_llm_with_bad_words(self, mock_client, query): |
| | |
| | mock_instance = MagicMock() |
| | mock_client.return_value.__enter__.return_value = mock_instance |
| | mock_instance.infer_batch.return_value = {"outputs": np.array([b"test response"])} |
| | mock_instance.model_config.outputs = [MagicMock(dtype=np.bytes_)] |
| |
|
| | |
| | response = query.query_llm(prompts=["test prompt"], max_output_len=100, bad_words_list=["bad"]) |
| |
|
| | assert isinstance(response[0], str) |
| | assert response[0] == "test response" |
| |
|