PhishingTest / llm_client.py
dungeon29's picture
Using GGUF model
c800b50 verified
raw
history blame
4.96 kB
import os
import requests
from huggingface_hub import hf_hub_download
from langchain.llms.base import LLM
from langchain.chains import RetrievalQA
from langchain_core.prompts import PromptTemplate
from typing import Any, List, Optional, Mapping
# --- Custom LangChain LLM Wrapper for Hybrid Approach ---
class HybridLLM(LLM):
api_url: str = ""
local_llm: Any = None
@property
def _llm_type(self) -> str:
return "hybrid_llm"
def _call(self, prompt: str, stop: Optional[List[str]] = None, **kwargs: Any) -> str:
# 1. Try Colab API first
if self.api_url:
try:
print(f"๐ŸŒ Calling Colab API: {self.api_url}")
response = requests.post(
f"{self.api_url}/generate",
json={"prompt": prompt, "max_tokens": 512},
timeout=30 # 30s timeout
)
if response.status_code == 200:
return response.json()["response"]
else:
print(f"โš ๏ธ API Error {response.status_code}: {response.text}")
except Exception as e:
print(f"โš ๏ธ API Connection Failed: {e}")
# 2. Fallback to Local GGUF
if self.local_llm:
print("๐Ÿ’ป Using Local GGUF Fallback...")
# Llama-cpp-python expects prompt in specific format or raw
# We'll pass the prompt directly
output = self.local_llm(
prompt,
max_tokens=512,
stop=["<|im_end|>", "User:", "Input:"],
echo=False
)
return output['choices'][0]['text']
return "โŒ Error: No working LLM available (API failed and no local model)."
@property
def _identifying_params(self) -> Mapping[str, Any]:
return {"api_url": self.api_url}
class LLMClient:
def __init__(self, vector_store=None):
"""
Initialize Hybrid LLM Client
"""
self.vector_store = vector_store
self.api_url = os.environ.get("COLAB_API_URL", "") # Get from Env Var
self.local_llm = None
# Initialize Local GGUF (always load as backup or if API missing)
# We load it lazily or eagerly depending on memory.
# Since user has 16GB RAM, we can load a 2B model easily.
try:
print("๐Ÿ“‚ Loading Local Qwen3-VL-2B-Thinking (GGUF)...")
from llama_cpp import Llama
model_name = "Qwen/Qwen2.5-VL-3B-Thinking-GGUF" # Fallback to a known working GGUF if Qwen3 not found, but user asked for Qwen3
# NOTE: As of now, Qwen3-VL GGUF might be under a specific repo.
# Let's use a generic search or specific path if known.
# User specified: Qwen/Qwen3-VL-2B-Thinking-GGUF
# We will try to download it.
repo_id = "Qwen/Qwen3-VL-2B-Thinking-GGUF"
model_repo = "Qwen/Qwen3-VL-2B-Thinking-GGUF"
filename = "Qwen3VL-2B-Thinking-Q4_K_M.gguf"
model_path = hf_hub_download(
repo_id=model_repo,
filename=filename
)
self.local_llm = Llama(
model_path=model_path,
n_ctx=2048,
n_threads=2, # Use 2 vCPUs
verbose=False
)
print("โœ… Local GGUF Model Ready!")
except Exception as e:
print(f"โš ๏ธ Could not load local GGUF: {e}")
# Create Hybrid LangChain Wrapper
self.llm = HybridLLM(api_url=self.api_url, local_llm=self.local_llm)
def analyze(self, text, context_chunks=None):
"""
Analyze text using LangChain RetrievalQA
"""
if not self.vector_store:
return "โŒ Vector Store not initialized."
# Custom Prompt Template
template = """<|im_start|>system
You are a cybersecurity expert. Task: Determine whether the input is 'PHISHING' or 'BENIGN' (Safe).
Respond in the following format:
LABEL: [PHISHING or BENIGN]
EXPLANATION: [A brief Vietnamese explanation]
Context:
{context}
<|im_end|>
<|im_start|>user
Input:
{question}
Short Analysis:
<|im_end|>
<|im_start|>assistant
"""
PROMPT = PromptTemplate(
template=template,
input_variables=["context", "question"]
)
# Create QA Chain
qa_chain = RetrievalQA.from_chain_type(
llm=self.llm,
chain_type="stuff",
retriever=self.vector_store.as_retriever(search_kwargs={"k": 3}),
chain_type_kwargs={"prompt": PROMPT}
)
try:
print("๐Ÿค– Generating response...")
response = qa_chain.invoke(text)
return response['result']
except Exception as e:
return f"โŒ Error: {str(e)}"