student-llm-guard

Sleeping

App Files Files Community

student-llm-guard / LLM /TransformersLLM.py

retereum

[WIP] switched to llama-cpp, trying to get RegEx into AnonymizerScanner

7cade8e 9 months ago

raw

history blame contribute delete

1.98 kB

	import sys
	import torch
	from transformers import (
	pipeline,
	AutoModelForCausalLM,
	AutoTokenizer,
	QuantoConfig,
	)

	device = "cpu"


	def get_transformers_pipeline(model_name: str = "Qwen/Qwen2-0.5B") -> pipeline:
	"""
	Function to get a Transformers pipeline that can be used to generate output.

	Args:
	model_name (str): The name of the model to load. Defaults to "Qwen/Qwen2-0.5B".

	Returns:
	pipeline: The Transformers pipeline instance.
	"""
	quantization_config = QuantoConfig(weights="int2")

	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(
	model_name, device_map="cpu", quantization_config=quantization_config
	)

	# Compilation doesn't work with Python 3.12+ yet
	if sys.version_info < (3, 12):
	model.forward = torch.compile(
	model.forward, mode="reduce-overhead", fullgraph=True
	)

	return pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	device_map="auto",
	)


	def generate_transformers_output(prompt: str, pipeline: pipeline = None) -> str:
	"""
	Function to generate an output from the Transformers pipeline.

	Args:
	prompt (str): The prompt to generate the output.
	pipeline (pipeline \| None, optional): The Transformers pipeline to use. Defaults to None. If None, a new pipeline will be created.

	Returns:
	str: The generated output.
	"""
	if pipeline is None:
	pipeline = get_transformers_pipeline()

	messages = [
	{
	"role": "system",
	"content": "You are a helpful assistant in a university environment. Help professors and students with their questions and problems.",
	},
	{"role": "user", "content": prompt},
	]

	response = pipeline(messages, max_new_tokens=100, do_sample=True)

	print(response)

	return response[0]["generated_text"][-1]