Spaces:

jfeng1115
/

marketing-analytics-bot

Runtime error

App Files Files Community

marketing-analytics-bot / langchain /embeddings /self_hosted_hugging_face.py

jfeng1115

init commit

58d33f0 12 months ago

raw

history blame contribute delete

No virus

6.69 kB

	"""Wrapper around HuggingFace embedding models for self-hosted remote hardware."""
	import importlib
	import logging
	from typing import Any, Callable, List, Optional

	from pydantic import BaseModel

	from langchain.embeddings.self_hosted import SelfHostedEmbeddings

	DEFAULT_MODEL_NAME = "sentence-transformers/all-mpnet-base-v2"
	DEFAULT_INSTRUCT_MODEL = "hkunlp/instructor-large"
	DEFAULT_EMBED_INSTRUCTION = "Represent the document for retrieval: "
	DEFAULT_QUERY_INSTRUCTION = (
	"Represent the question for retrieving supporting documents: "
	)

	logger = logging.getLogger(__name__)


	def _embed_documents(client: Any, args: Any, *kwargs: Any) -> List[List[float]]:
	"""Inference function to send to the remote hardware.

	Accepts a sentence_transformer model_id and
	returns a list of embeddings for each document in the batch.
	"""
	return client.encode(args, *kwargs)


	def load_embedding_model(model_id: str, instruct: bool = False, device: int = 0) -> Any:
	"""Load the embedding model."""
	if not instruct:
	import sentence_transformers

	client = sentence_transformers.SentenceTransformer(model_id)
	else:
	from InstructorEmbedding import INSTRUCTOR

	client = INSTRUCTOR(model_id)

	if importlib.util.find_spec("torch") is not None:
	import torch

	cuda_device_count = torch.cuda.device_count()
	if device < -1 or (device >= cuda_device_count):
	raise ValueError(
	f"Got device=={device}, "
	f"device is required to be within [-1, {cuda_device_count})"
	)
	if device < 0 and cuda_device_count > 0:
	logger.warning(
	"Device has %d GPUs available. "
	"Provide device={deviceId} to `from_model_id` to use available"
	"GPUs for execution. deviceId is -1 for CPU and "
	"can be a positive integer associated with CUDA device id.",
	cuda_device_count,
	)

	client = client.to(device)
	return client


	class SelfHostedHuggingFaceEmbeddings(SelfHostedEmbeddings, BaseModel):
	"""Runs sentence_transformers embedding models on self-hosted remote hardware.

	Supported hardware includes auto-launched instances on AWS, GCP, Azure,
	and Lambda, as well as servers specified
	by IP address and SSH credentials (such as on-prem, or another cloud
	like Paperspace, Coreweave, etc.).

	To use, you should have the ``runhouse`` python package installed.

	Example:
	.. code-block:: python

	from langchain.embeddings import SelfHostedHuggingFaceEmbeddings
	import runhouse as rh
	model_name = "sentence-transformers/all-mpnet-base-v2"
	gpu = rh.cluster(name="rh-a10x", instance_type="A100:1")
	hf = SelfHostedHuggingFaceEmbeddings(model_name=model_name, hardware=gpu)
	"""

	client: Any #: :meta private:
	model_id: str = DEFAULT_MODEL_NAME
	"""Model name to use."""
	model_reqs: List[str] = ["./", "sentence_transformers", "torch"]
	"""Requirements to install on hardware to inference the model."""
	hardware: Any
	"""Remote hardware to send the inference function to."""
	model_load_fn: Callable = load_embedding_model
	"""Function to load the model remotely on the server."""
	load_fn_kwargs: Optional[dict] = None
	"""Key word arguments to pass to the model load function."""
	inference_fn: Callable = _embed_documents
	"""Inference function to extract the embeddings."""

	def __init__(self, **kwargs: Any):
	"""Initialize the remote inference function."""
	load_fn_kwargs = kwargs.pop("load_fn_kwargs", {})
	load_fn_kwargs["model_id"] = load_fn_kwargs.get("model_id", DEFAULT_MODEL_NAME)
	load_fn_kwargs["instruct"] = load_fn_kwargs.get("instruct", False)
	load_fn_kwargs["device"] = load_fn_kwargs.get("device", 0)
	super().__init__(load_fn_kwargs=load_fn_kwargs, **kwargs)


	class SelfHostedHuggingFaceInstructEmbeddings(SelfHostedHuggingFaceEmbeddings):
	"""Runs InstructorEmbedding embedding models on self-hosted remote hardware.

	Supported hardware includes auto-launched instances on AWS, GCP, Azure,
	and Lambda, as well as servers specified
	by IP address and SSH credentials (such as on-prem, or another
	cloud like Paperspace, Coreweave, etc.).

	To use, you should have the ``runhouse`` python package installed.

	Example:
	.. code-block:: python

	from langchain.embeddings import SelfHostedHuggingFaceInstructEmbeddings
	import runhouse as rh
	model_name = "hkunlp/instructor-large"
	gpu = rh.cluster(name='rh-a10x', instance_type='A100:1')
	hf = SelfHostedHuggingFaceInstructEmbeddings(
	model_name=model_name, hardware=gpu)
	"""

	model_id: str = DEFAULT_INSTRUCT_MODEL
	"""Model name to use."""
	embed_instruction: str = DEFAULT_EMBED_INSTRUCTION
	"""Instruction to use for embedding documents."""
	query_instruction: str = DEFAULT_QUERY_INSTRUCTION
	"""Instruction to use for embedding query."""
	model_reqs: List[str] = ["./", "InstructorEmbedding", "torch"]
	"""Requirements to install on hardware to inference the model."""

	def __init__(self, **kwargs: Any):
	"""Initialize the remote inference function."""
	load_fn_kwargs = kwargs.pop("load_fn_kwargs", {})
	load_fn_kwargs["model_id"] = load_fn_kwargs.get(
	"model_id", DEFAULT_INSTRUCT_MODEL
	)
	load_fn_kwargs["instruct"] = load_fn_kwargs.get("instruct", True)
	load_fn_kwargs["device"] = load_fn_kwargs.get("device", 0)
	super().__init__(load_fn_kwargs=load_fn_kwargs, **kwargs)

	def embed_documents(self, texts: List[str]) -> List[List[float]]:
	"""Compute doc embeddings using a HuggingFace instruct model.

	Args:
	texts: The list of texts to embed.

	Returns:
	List of embeddings, one for each text.
	"""
	instruction_pairs = []
	for text in texts:
	instruction_pairs.append([self.embed_instruction, text])
	embeddings = self.client(self.pipeline_ref, instruction_pairs)
	return embeddings.tolist()

	def embed_query(self, text: str) -> List[float]:
	"""Compute query embeddings using a HuggingFace instruct model.

	Args:
	text: The text to embed.

	Returns:
	Embeddings for the text.
	"""
	instruction_pair = [self.query_instruction, text]
	embedding = self.client(self.pipeline_ref, [instruction_pair])[0]
	return embedding.tolist()