import json
import os
from unittest.mock import patch
import openai
import pytest
from dotenv import load_dotenv
# from translation_agent.utils import find_sentence_starts
from translation_agent.utils import get_completion
from translation_agent.utils import num_tokens_in_string
from translation_agent.utils import one_chunk_improve_translation
from translation_agent.utils import one_chunk_initial_translation
from translation_agent.utils import one_chunk_reflect_on_translation
from translation_agent.utils import one_chunk_translate_text
load_dotenv()
client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
def test_get_completion_json_mode_api_call():
# Set up the test data
prompt = "What is the capital of France in json?"
system_message = "You are a helpful assistant."
model = "gpt-4-turbo"
temperature = 0.3
json_mode = True
# Call the function with JSON_mode=True
result = get_completion(
prompt, system_message, model, temperature, json_mode
)
# Assert that the result is not None
assert result is not None
# Assert that it can be transformed to dictionary (json)
assert isinstance(json.loads(result), dict)
def test_get_completion_non_json_mode_api_call():
# Set up the test data
prompt = "What is the capital of France?"
system_message = "You are a helpful assistant."
model = "gpt-4-turbo"
temperature = 0.3
json_mode = False
# Call the function with JSON_mode=False
result = get_completion(
prompt, system_message, model, temperature, json_mode
)
# Assert that the result is not None
assert result is not None
# Assert that the result has the expected response format
assert isinstance(result, str)
def test_one_chunk_initial_translation():
# Define test data
source_lang = "English"
target_lang = "Spanish"
source_text = "Hello, how are you?"
expected_translation = "Hola, ¿cómo estás?"
# Mock the get_completion_content function
with patch(
"translation_agent.utils.get_completion"
) as mock_get_completion:
mock_get_completion.return_value = expected_translation
# Call the function with test data
translation = one_chunk_initial_translation(
source_lang, target_lang, source_text
)
# Assert the expected translation is returned
assert translation == expected_translation
# Assert the get_completion_content function was called with the correct arguments
expected_system_message = f"You are an expert linguist, specializing in translation from {source_lang} to {target_lang}."
expected_prompt = f"""This is an {source_lang} to {target_lang} translation, please provide the {target_lang} translation for this text. \
Do not provide any explanations or text apart from the translation.
{source_lang}: {source_text}
{target_lang}:"""
mock_get_completion.assert_called_once_with(
expected_prompt, system_message=expected_system_message
)
def test_one_chunk_reflect_on_translation():
# Define test data
source_lang = "English"
target_lang = "Spanish"
country = "Mexico"
source_text = "This is a sample source text."
translation_1 = "Este es un texto de origen de muestra."
# Define the expected reflection
expected_reflection = "The translation is accurate and conveys the meaning of the source text well. However, here are a few suggestions for improvement:\n\n1. Consider using 'texto fuente' instead of 'texto de origen' for a more natural translation of 'source text'.\n2. Add a definite article before 'texto fuente' to improve fluency: 'Este es un texto fuente de muestra.'\n3. If the context allows, you could also use 'texto de ejemplo' as an alternative translation for 'sample text'."
# Mock the get_completion_content function
with patch(
"translation_agent.utils.get_completion"
) as mock_get_completion:
mock_get_completion.return_value = expected_reflection
# Call the function with test data
reflection = one_chunk_reflect_on_translation(
source_lang, target_lang, source_text, translation_1, country
)
# Assert that the reflection matches the expected reflection
assert reflection == expected_reflection
# Assert that the get_completion_content function was called with the correct arguments
expected_prompt = f"""Your task is to carefully read a source text and a translation from {source_lang} to {target_lang}, and then give constructive criticism and helpful suggestions to improve the translation. \
The final style and tone of the translation should match the style of {target_lang} colloquially spoken in {country}.
The source text and initial translation, delimited by XML tags and , are as follows:
{source_text}
{translation_1}
When writing suggestions, pay attention to whether there are ways to improve the translation's \n\
(i) accuracy (by correcting errors of addition, mistranslation, omission, or untranslated text),\n\
(ii) fluency (by applying {target_lang} grammar, spelling and punctuation rules, and ensuring there are no unnecessary repetitions),\n\
(iii) style (by ensuring the translations reflect the style of the source text and takes into account any cultural context),\n\
(iv) terminology (by ensuring terminology use is consistent and reflects the source text domain; and by only ensuring you use equivalent idioms {target_lang}).\n\
Write a list of specific, helpful and constructive suggestions for improving the translation.
Each suggestion should address one specific part of the translation.
Output only the suggestions and nothing else."""
expected_system_message = f"You are an expert linguist specializing in translation from {source_lang} to {target_lang}. \
You will be provided with a source text and its translation and your goal is to improve the translation."
mock_get_completion.assert_called_once_with(
expected_prompt, system_message=expected_system_message
)
@pytest.fixture
def example_data():
return {
"source_lang": "English",
"target_lang": "Spanish",
"source_text": "This is a sample source text.",
"translation_1": "Esta es una traducción de ejemplo.",
"reflection": "The translation is accurate but could be more fluent.",
}
@patch("translation_agent.utils.get_completion")
def test_one_chunk_improve_translation(mock_get_completion, example_data):
# Set up the mock return value for get_completion_content
mock_get_completion.return_value = (
"Esta es una traducción de ejemplo mejorada."
)
# Call the function with the example data
result = one_chunk_improve_translation(
example_data["source_lang"],
example_data["target_lang"],
example_data["source_text"],
example_data["translation_1"],
example_data["reflection"],
)
# Assert that the function returns the expected translation
assert result == "Esta es una traducción de ejemplo mejorada."
# Assert that get_completion was called with the expected arguments
expected_prompt = f"""Your task is to carefully read, then edit, a translation from {example_data["source_lang"]} to {example_data["target_lang"]}, taking into
account a list of expert suggestions and constructive criticisms.
The source text, the initial translation, and the expert linguist suggestions are delimited by XML tags , and \
as follows:
{example_data["source_text"]}
{example_data["translation_1"]}
{example_data["reflection"]}
Please take into account the expert suggestions when editing the translation. Edit the translation by ensuring:
(i) accuracy (by correcting errors of addition, mistranslation, omission, or untranslated text),
(ii) fluency (by applying Spanish grammar, spelling and punctuation rules and ensuring there are no unnecessary repetitions), \
(iii) style (by ensuring the translations reflect the style of the source text)
(iv) terminology (inappropriate for context, inconsistent use), or
(v) other errors.
Output only the new translation and nothing else."""
expected_system_message = f"You are an expert linguist, specializing in translation editing from English to Spanish."
mock_get_completion.assert_called_once_with(
expected_prompt, expected_system_message
)
def test_one_chunk_translate_text(mocker):
# Define test data
source_lang = "English"
target_lang = "Spanish"
country = "Mexico"
source_text = "Hello, how are you?"
translation_1 = "Hola, ¿cómo estás?"
reflection = "The translation looks good, but it could be more formal."
translation2 = "Hola, ¿cómo está usted?"
# Mock the helper functions
mock_initial_translation = mocker.patch(
"translation_agent.utils.one_chunk_initial_translation",
return_value=translation_1,
)
mock_reflect_on_translation = mocker.patch(
"translation_agent.utils.one_chunk_reflect_on_translation",
return_value=reflection,
)
mock_improve_translation = mocker.patch(
"translation_agent.utils.one_chunk_improve_translation",
return_value=translation2,
)
# Call the function being tested
result = one_chunk_translate_text(
source_lang, target_lang, source_text, country
)
# Assert the expected result
assert result == translation2
# Assert that the helper functions were called with the correct arguments
mock_initial_translation.assert_called_once_with(
source_lang, target_lang, source_text
)
mock_reflect_on_translation.assert_called_once_with(
source_lang, target_lang, source_text, translation_1, country
)
mock_improve_translation.assert_called_once_with(
source_lang, target_lang, source_text, translation_1, reflection
)
def test_num_tokens_in_string():
# Test case 1: Empty string
assert num_tokens_in_string("") == 0
# Test case 2: Simple string
assert num_tokens_in_string("Hello, world!") == 4
# Test case 3: String with special characters
assert (
num_tokens_in_string(
"This is a test string with special characters: !@#$%^&*()"
)
== 16
)
# Test case 4: String with non-ASCII characters
assert num_tokens_in_string("Héllò, wörld! 你好,世界!") == 17
# Test case 5: Long string
long_string = (
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. " * 10
)
assert num_tokens_in_string(long_string) == 101
# Test case 6: Different encoding
assert (
num_tokens_in_string("Hello, world!", encoding_name="p50k_base") == 4
)