|  | from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | 
					
						
						|  | from langchain.vectorstores import FAISS | 
					
						
						|  | from langchain.embeddings import HuggingFaceEmbeddings | 
					
						
						|  | from langchain.llms import HuggingFacePipeline | 
					
						
						|  | from langchain.chains import RetrievalQA | 
					
						
						|  | import torch | 
					
						
						|  |  | 
					
						
						|  | class Handler: | 
					
						
						|  | def __init__(self): | 
					
						
						|  |  | 
					
						
						|  | print("Loading model and tokenizer...") | 
					
						
						|  | self.model = AutoModelForCausalLM.from_pretrained("anirudh248/upf_code_generator_final", device_map="auto") | 
					
						
						|  | self.tokenizer = AutoTokenizer.from_pretrained("anirudh248/upf_code_generator_final") | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | print("Loading FAISS index and embeddings...") | 
					
						
						|  | self.embeddings = HuggingFaceEmbeddings() | 
					
						
						|  | self.vectorstore = FAISS.load_local("faiss_index", self.embeddings, allow_dangerous_deserialization=True) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | print("Creating Hugging Face pipeline...") | 
					
						
						|  | self.hf_pipeline = pipeline( | 
					
						
						|  | "text-generation", | 
					
						
						|  | model=self.model, | 
					
						
						|  | tokenizer=self.tokenizer, | 
					
						
						|  | device=0 if torch.cuda.is_available() else -1, | 
					
						
						|  | temperature=0.7, | 
					
						
						|  | max_new_tokens=2048, | 
					
						
						|  | top_p=0.95, | 
					
						
						|  | repetition_penalty=1.15 | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | self.llm = HuggingFacePipeline(pipeline=self.hf_pipeline) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | self.retriever = self.vectorstore.as_retriever() | 
					
						
						|  | self.qa_chain = RetrievalQA.from_chain_type( | 
					
						
						|  | llm=self.llm, | 
					
						
						|  | retriever=self.retriever, | 
					
						
						|  | return_source_documents=False | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | def __call__(self, request): | 
					
						
						|  | try: | 
					
						
						|  |  | 
					
						
						|  | prompt = request.json.get("prompt") | 
					
						
						|  | if not prompt: | 
					
						
						|  | return {"error": "Prompt is required"}, 400 | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | response = self.qa_chain.run(prompt) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | return {"response": response} | 
					
						
						|  |  | 
					
						
						|  | except Exception as e: | 
					
						
						|  | return {"error": str(e)}, 500 |