transformers-chat

Runtime error

App Files Files Community

transformers-chat / ingest_examples.py

enoreyes

Update code

fa8c8ef over 1 year ago

raw

history blame contribute delete

3.98 kB

	"""Ingest examples into FAISS."""
	import os
	from pathlib import Path
	import pickle
	from langchain.vectorstores import FAISS
	from langchain.embeddings import HuggingFaceInstructEmbeddings
	from langchain.text_splitter import CharacterTextSplitter
	from langchain.prompts.example_selector import \
	SemanticSimilarityExampleSelector

	rephrase_documents = [
	{
	"question": "how do i load those?",
	"chat_history": "Human: What types of tasks can I do with Pipelines?\nAssistant: \n\nThere are a few different types of tasks pipelines can do. Some examples: Text classification, Text generation, name entity recognition, question answering, summarization, translation, image classification, image segmentation, object detection, audio classification, and visual question answering.",
	"answer": "How do I load a pipeline for a specific task",
	},
	{
	"question": "how do i install this package?",
	"chat_history": "",
	"answer": "How do I install transformers?",
	},
	{
	"question": "where do i find the models?",
	"chat_history": "Human: can you write me a code snippet for that?\nAssistant: \n\nYes, you can load a pretained model with the from_pretrained() method. Here is a [link](https://huggingface.co/docs/transformers/autoclass_tutorial) to the documentation that provides a code snippet for loading a pretrained model with AutoClass.",
	"answer": "Where do I find the models that can be loaded into an autoclass?",
	},
	{
	"question": "how do I finetune a pre-trained model?",
	"chat_history": "Human: List all methods of a pipeline please\nAssistant: \n\nTo answer your question, you can find a list of all the methods of the Pipeline class in the [API reference documentation](https://huggingface.co/docs/transformers/main_classes/pipelines).",
	"answer": "What are some methods for finetuning a pre-trained model?",
	},
	{
	"question": "can you write me a code snippet for that?",
	"chat_history": "Human: how do I do train on multiple gpus?\nAssistant: \n\nTo perform distributed training, you can use the [Accelerate](https://huggingface.co/docs/transformers/accelerate) library. This example shows how to perform distributed training on multiple GPUs with accelerate. For more information on distributed training, check out the [Full Accelerate Documentation](https://huggingface.co/docs/accelerate/).",
	"answer": "Can you provide a code snippet for training on multiple GPUs with accelerate?",
	},
	{
	"question": "show me how to do it with trainer",
	"chat_history": "Human: How do I finetune a pre-trained model?\nAssistant: \n\nYou can fine-tune a pretrained model with 🤗 Transformers Trainer, in TensorFlow with Keras, and in native PyTorch. For more information on how to do this, visit our [training tutorial](https://huggingface.co/docs/transformers/training)",
	"answer": "How do I finetune a pre-trained model with Transformers trainer?",
	}
	]

	model_name = "hkunlp/instructor-large"
	embed_instruction = "Represent the text from the Hugging Face code documentation"
	query_instruction = "Query the most relevant text from the Hugging Face code documentation"
	embedding = HuggingFaceInstructEmbeddings(model_name=model_name, embed_instruction=embed_instruction, query_instruction=query_instruction)

	example_selector = SemanticSimilarityExampleSelector.from_examples(
	# This is the list of examples available to select from.
	rephrase_documents,
	# This is the embedding class used to produce embeddings which are used to measure semantic similarity.
	embedding,
	# This is the VectorStore class that is used to store the embeddings and do a similarity search over.
	FAISS,
	# This is the number of examples to produce.
	k=4
	)

	print("beginning pickle")
	with open("rephrase_eg.pkl", 'wb') as f:
	pickle.dump(example_selector, f)

	print("Rephrase pickle complete")