PhishingTest

Paused

App Files Files Community

PhishingTest / llm_client.py

dungeon29

Using GGUF model

c800b50 verified 17 days ago

raw

history blame

4.96 kB

	import os
	import requests
	from huggingface_hub import hf_hub_download
	from langchain.llms.base import LLM
	from langchain.chains import RetrievalQA
	from langchain_core.prompts import PromptTemplate
	from typing import Any, List, Optional, Mapping

	# --- Custom LangChain LLM Wrapper for Hybrid Approach ---
	class HybridLLM(LLM):
	api_url: str = ""
	local_llm: Any = None

	@property
	def _llm_type(self) -> str:
	return "hybrid_llm"

	def _call(self, prompt: str, stop: Optional[List[str]] = None, **kwargs: Any) -> str:
	# 1. Try Colab API first
	if self.api_url:
	try:
	print(f"🌐 Calling Colab API: {self.api_url}")
	response = requests.post(
	f"{self.api_url}/generate",
	json={"prompt": prompt, "max_tokens": 512},
	timeout=30 # 30s timeout
	)
	if response.status_code == 200:
	return response.json()["response"]
	else:
	print(f"⚠️ API Error {response.status_code}: {response.text}")
	except Exception as e:
	print(f"⚠️ API Connection Failed: {e}")

	# 2. Fallback to Local GGUF
	if self.local_llm:
	print("💻 Using Local GGUF Fallback...")
	# Llama-cpp-python expects prompt in specific format or raw
	# We'll pass the prompt directly
	output = self.local_llm(
	prompt,
	max_tokens=512,
	stop=["<\|im_end\|>", "User:", "Input:"],
	echo=False
	)
	return output['choices'][0]['text']

	return "❌ Error: No working LLM available (API failed and no local model)."

	@property
	def _identifying_params(self) -> Mapping[str, Any]:
	return {"api_url": self.api_url}

	class LLMClient:
	def __init__(self, vector_store=None):
	"""
	Initialize Hybrid LLM Client
	"""
	self.vector_store = vector_store
	self.api_url = os.environ.get("COLAB_API_URL", "") # Get from Env Var
	self.local_llm = None

	# Initialize Local GGUF (always load as backup or if API missing)
	# We load it lazily or eagerly depending on memory.
	# Since user has 16GB RAM, we can load a 2B model easily.
	try:
	print("📂 Loading Local Qwen3-VL-2B-Thinking (GGUF)...")
	from llama_cpp import Llama

	model_name = "Qwen/Qwen2.5-VL-3B-Thinking-GGUF" # Fallback to a known working GGUF if Qwen3 not found, but user asked for Qwen3
	# NOTE: As of now, Qwen3-VL GGUF might be under a specific repo.
	# Let's use a generic search or specific path if known.
	# User specified: Qwen/Qwen3-VL-2B-Thinking-GGUF
	# We will try to download it.

	repo_id = "Qwen/Qwen3-VL-2B-Thinking-GGUF"

	model_repo = "Qwen/Qwen3-VL-2B-Thinking-GGUF"
	filename = "Qwen3VL-2B-Thinking-Q4_K_M.gguf"

	model_path = hf_hub_download(
	repo_id=model_repo,
	filename=filename
	)

	self.local_llm = Llama(
	model_path=model_path,
	n_ctx=2048,
	n_threads=2, # Use 2 vCPUs
	verbose=False
	)
	print("✅ Local GGUF Model Ready!")

	except Exception as e:
	print(f"⚠️ Could not load local GGUF: {e}")

	# Create Hybrid LangChain Wrapper
	self.llm = HybridLLM(api_url=self.api_url, local_llm=self.local_llm)

	def analyze(self, text, context_chunks=None):
	"""
	Analyze text using LangChain RetrievalQA
	"""
	if not self.vector_store:
	return "❌ Vector Store not initialized."

	# Custom Prompt Template
	template = """<\|im_start\|>system
	You are a cybersecurity expert. Task: Determine whether the input is 'PHISHING' or 'BENIGN' (Safe).
	Respond in the following format:
	LABEL: [PHISHING or BENIGN]
	EXPLANATION: [A brief Vietnamese explanation]

	Context:
	{context}
	<\|im_end\|>
	<\|im_start\|>user
	Input:
	{question}

	Short Analysis:
	<\|im_end\|>
	<\|im_start\|>assistant
	"""

	PROMPT = PromptTemplate(
	template=template,
	input_variables=["context", "question"]
	)

	# Create QA Chain
	qa_chain = RetrievalQA.from_chain_type(
	llm=self.llm,
	chain_type="stuff",
	retriever=self.vector_store.as_retriever(search_kwargs={"k": 3}),
	chain_type_kwargs={"prompt": PROMPT}
	)

	try:
	print("🤖 Generating response...")
	response = qa_chain.invoke(text)
	return response['result']
	except Exception as e:
	return f"❌ Error: {str(e)}"