from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline,BitsAndBytesConfig # import accelerate # import bitsandbytes from langchain_core.prompts import PromptTemplate quants = BitsAndBytesConfig(load_in_4bit=True) # model_id = "mistralai/Mistral-7B-Instruct-v0.2" from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline hf = HuggingFacePipeline.from_model_id( model_id="mistralai/Mistral-7B-Instruct-v0.2", task="text-generation", pipeline_kwargs={"max_new_tokens": 10000}, model_kwargs={"quantization_config":quants} ) # tokenizer = AutoTokenizer.from_pretrained(model_id,quantization_config=quants) # model = AutoModelForCausalLM.from_pretrained(model_id,quantization_config=quants) # pipe = pipeline("text-generation", model=model, tokenizer=tokenizer,max_new_tokens=1000) # hf = HuggingFacePipeline(pipeline=pipe) def generate_blog(role , words , topic): template = ''' You are an expert Blog generator , Given the Topic , the intended audience and the maximum number of words , Write a blog on the given topic Topic : {topic} Intended Audince : {role} Number of Words : {words} Strictly return the output in a markdown format''' prompt = PromptTemplate.from_template(template) chain = prompt | hf return chain.invoke({"topic": topic,"words":words,"role":role})