Spaces:
Runtime error
Runtime error
from typing import Any, Optional, Sequence | |
from langchain_core.documents import BaseDocumentTransformer, Document | |
from langchain.utilities.vertexai import get_client_info | |
class GoogleTranslateTransformer(BaseDocumentTransformer): | |
"""Translate text documents using Google Cloud Translation.""" | |
def __init__( | |
self, | |
project_id: str, | |
*, | |
location: str = "global", | |
model_id: Optional[str] = None, | |
glossary_id: Optional[str] = None, | |
api_endpoint: Optional[str] = None, | |
) -> None: | |
""" | |
Arguments: | |
project_id: Google Cloud Project ID. | |
location: (Optional) Translate model location. | |
model_id: (Optional) Translate model ID to use. | |
glossary_id: (Optional) Translate glossary ID to use. | |
api_endpoint: (Optional) Regional endpoint to use. | |
""" | |
try: | |
from google.api_core.client_options import ClientOptions | |
from google.cloud import translate | |
except ImportError as exc: | |
raise ImportError( | |
"Install Google Cloud Translate to use this parser." | |
"(pip install google-cloud-translate)" | |
) from exc | |
self.project_id = project_id | |
self.location = location | |
self.model_id = model_id | |
self.glossary_id = glossary_id | |
self._client = translate.TranslationServiceClient( | |
client_info=get_client_info("translate"), | |
client_options=( | |
ClientOptions(api_endpoint=api_endpoint) if api_endpoint else None | |
), | |
) | |
self._parent_path = self._client.common_location_path(project_id, location) | |
# For some reason, there's no `model_path()` method for the client. | |
self._model_path = ( | |
f"{self._parent_path}/models/{model_id}" if model_id else None | |
) | |
self._glossary_path = ( | |
self._client.glossary_path(project_id, location, glossary_id) | |
if glossary_id | |
else None | |
) | |
def transform_documents( | |
self, documents: Sequence[Document], **kwargs: Any | |
) -> Sequence[Document]: | |
"""Translate text documents using Google Translate. | |
Arguments: | |
source_language_code: ISO 639 language code of the input document. | |
target_language_code: ISO 639 language code of the output document. | |
For supported languages, refer to: | |
https://cloud.google.com/translate/docs/languages | |
mime_type: (Optional) Media Type of input text. | |
Options: `text/plain`, `text/html` | |
""" | |
try: | |
from google.cloud import translate | |
except ImportError as exc: | |
raise ImportError( | |
"Install Google Cloud Translate to use this parser." | |
"(pip install google-cloud-translate)" | |
) from exc | |
response = self._client.translate_text( | |
request=translate.TranslateTextRequest( | |
contents=[doc.page_content for doc in documents], | |
parent=self._parent_path, | |
model=self._model_path, | |
glossary_config=translate.TranslateTextGlossaryConfig( | |
glossary=self._glossary_path | |
), | |
source_language_code=kwargs.get("source_language_code", None), | |
target_language_code=kwargs.get("target_language_code"), | |
mime_type=kwargs.get("mime_type", "text/plain"), | |
) | |
) | |
# If using a glossary, the translations will be in `glossary_translations`. | |
translations = response.glossary_translations or response.translations | |
return [ | |
Document( | |
page_content=translation.translated_text, | |
metadata={ | |
**doc.metadata, | |
"model": translation.model, | |
"detected_language_code": translation.detected_language_code, | |
}, | |
) | |
for doc, translation in zip(documents, translations) | |
] | |