LLm_Webscraper / llm_processor.py
Mokshith Salian
initial commit
f937fdb
raw
history blame contribute delete
958 Bytes
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
class LLMProcessor:
def __init__(self, model_name="TheBloke/Mistral-7B-Instruct-v0.2-GGUF"):
# Option 1: Use HuggingFace pipeline for simplicity
self.pipe = pipeline(
"text-generation",
model=model_name,
torch_dtype=torch.float16,
device_map="auto"
)
def process_data(self, scraped_data, task_instruction):
# Create prompt
prompt = f"""
Task: {task_instruction}
Data:
{scraped_data}
Please process the above data according to the task instruction.
"""
# Generate response
response = self.pipe(
prompt,
max_length=2048,
temperature=0.7,
top_p=0.9,
do_sample=True
)
return response[0]['generated_text']