Spaces:

moksh9591
/

LLm_Webscraper

Sleeping

File size: 958 Bytes

f937fdb

from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch

class LLMProcessor:
    def __init__(self, model_name="TheBloke/Mistral-7B-Instruct-v0.2-GGUF"):
        # Option 1: Use HuggingFace pipeline for simplicity
        self.pipe = pipeline(
            "text-generation",
            model=model_name,
            torch_dtype=torch.float16,
            device_map="auto"
        )
        
    def process_data(self, scraped_data, task_instruction):
        # Create prompt
        prompt = f"""
        Task: {task_instruction}
        
        Data:
        {scraped_data}
        
        Please process the above data according to the task instruction.
        """
        
        # Generate response
        response = self.pipe(
            prompt,
            max_length=2048,
            temperature=0.7,
            top_p=0.9,
            do_sample=True
        )
        
        return response[0]['generated_text']