Spaces:

InventorsHub
/

SwarmChat

Sleeping

SwarmChat / text_processing.py

Update text_processing.py

e979f5e verified 7 months ago

1.55 kB

	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download
	# import spaces
	import functools
	# Download the single GGUF shard by its repo path:

	# llm = Llama(model_path=model_path, n_ctx=1024)#, verbose=True)
	# llm = Llama(
	# model_path=model_path,
	# n_ctx=512, # down from 4096
	# low_vram=True, # llama.cpp low-vram mode
	# f16_kv=True, # half-precision kv cache
	# use_mmap=True, # mmap file
	# use_mlock=False,
	# )
	# print("Llama backend initialized successfully!")
	# @spaces.CPU
	@functools.lru_cache(maxsize=1)
	def llm_gpu():

	model_path = hf_hub_download(
	repo_id="Inventors-Hub/SwarmChat-models",
	repo_type="model",
	filename="EuroLLM-9B-Instruct-Q4_K_M.gguf",
	)

	llm = Llama(
	model_path=model_path,
	n_ctx=512, # down from 4096
	low_vram=True, # llama.cpp low-vram mode
	f16_kv=True, # half-precision kv cache
	use_mmap=True, # mmap file
	use_mlock=False,
	)
	return llm


	# Function to process text using EuroLLM
	def translate_text(text):
	input_prompt = f"""
	<\|im_start\|>system
	<\|im_end\|>
	<\|im_start\|>user
	Translate the following text to English:
	Text: {text}
	English:
	<\|im_end\|>
	<\|im_start\|>assistant
	"""
	llm = llm_gpu()
	output = llm(input_prompt, max_tokens=1024, temperature=0)

	translated_text = output.get("choices", [{}])[0].get("text", "").strip()

	return translated_text