Spaces:

moksh9591
/

LLm_Webscraper

Running

LLm_Webscraper / llm_processor.py

Mokshith Salian

initial commit

f937fdb 21 days ago

958 Bytes

	from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
	import torch

	class LLMProcessor:
	def __init__(self, model_name="TheBloke/Mistral-7B-Instruct-v0.2-GGUF"):
	# Option 1: Use HuggingFace pipeline for simplicity
	self.pipe = pipeline(
	"text-generation",
	model=model_name,
	torch_dtype=torch.float16,
	device_map="auto"
	)

	def process_data(self, scraped_data, task_instruction):
	# Create prompt
	prompt = f"""
	Task: {task_instruction}

	Data:
	{scraped_data}

	Please process the above data according to the task instruction.
	"""

	# Generate response
	response = self.pipe(
	prompt,
	max_length=2048,
	temperature=0.7,
	top_p=0.9,
	do_sample=True
	)

	return response[0]['generated_text']