Spaces:
Runtime error
Runtime error
| from typing import Any, Optional, Sequence | |
| from langchain_core.documents import BaseDocumentTransformer, Document | |
| from langchain.utilities.vertexai import get_client_info | |
| class GoogleTranslateTransformer(BaseDocumentTransformer): | |
| """Translate text documents using Google Cloud Translation.""" | |
| def __init__( | |
| self, | |
| project_id: str, | |
| *, | |
| location: str = "global", | |
| model_id: Optional[str] = None, | |
| glossary_id: Optional[str] = None, | |
| api_endpoint: Optional[str] = None, | |
| ) -> None: | |
| """ | |
| Arguments: | |
| project_id: Google Cloud Project ID. | |
| location: (Optional) Translate model location. | |
| model_id: (Optional) Translate model ID to use. | |
| glossary_id: (Optional) Translate glossary ID to use. | |
| api_endpoint: (Optional) Regional endpoint to use. | |
| """ | |
| try: | |
| from google.api_core.client_options import ClientOptions | |
| from google.cloud import translate | |
| except ImportError as exc: | |
| raise ImportError( | |
| "Install Google Cloud Translate to use this parser." | |
| "(pip install google-cloud-translate)" | |
| ) from exc | |
| self.project_id = project_id | |
| self.location = location | |
| self.model_id = model_id | |
| self.glossary_id = glossary_id | |
| self._client = translate.TranslationServiceClient( | |
| client_info=get_client_info("translate"), | |
| client_options=( | |
| ClientOptions(api_endpoint=api_endpoint) if api_endpoint else None | |
| ), | |
| ) | |
| self._parent_path = self._client.common_location_path(project_id, location) | |
| # For some reason, there's no `model_path()` method for the client. | |
| self._model_path = ( | |
| f"{self._parent_path}/models/{model_id}" if model_id else None | |
| ) | |
| self._glossary_path = ( | |
| self._client.glossary_path(project_id, location, glossary_id) | |
| if glossary_id | |
| else None | |
| ) | |
| def transform_documents( | |
| self, documents: Sequence[Document], **kwargs: Any | |
| ) -> Sequence[Document]: | |
| """Translate text documents using Google Translate. | |
| Arguments: | |
| source_language_code: ISO 639 language code of the input document. | |
| target_language_code: ISO 639 language code of the output document. | |
| For supported languages, refer to: | |
| https://cloud.google.com/translate/docs/languages | |
| mime_type: (Optional) Media Type of input text. | |
| Options: `text/plain`, `text/html` | |
| """ | |
| try: | |
| from google.cloud import translate | |
| except ImportError as exc: | |
| raise ImportError( | |
| "Install Google Cloud Translate to use this parser." | |
| "(pip install google-cloud-translate)" | |
| ) from exc | |
| response = self._client.translate_text( | |
| request=translate.TranslateTextRequest( | |
| contents=[doc.page_content for doc in documents], | |
| parent=self._parent_path, | |
| model=self._model_path, | |
| glossary_config=translate.TranslateTextGlossaryConfig( | |
| glossary=self._glossary_path | |
| ), | |
| source_language_code=kwargs.get("source_language_code", None), | |
| target_language_code=kwargs.get("target_language_code"), | |
| mime_type=kwargs.get("mime_type", "text/plain"), | |
| ) | |
| ) | |
| # If using a glossary, the translations will be in `glossary_translations`. | |
| translations = response.glossary_translations or response.translations | |
| return [ | |
| Document( | |
| page_content=translation.translated_text, | |
| metadata={ | |
| **doc.metadata, | |
| "model": translation.model, | |
| "detected_language_code": translation.detected_language_code, | |
| }, | |
| ) | |
| for doc, translation in zip(documents, translations) | |
| ] | |