added more files

Browse files

Files changed (7) hide show

TODO.md +10 -0
check_model.py +7 -0
download_model.py +11 -0
login.py +11 -0
main.py +308 -0
requirements.txt +9 -0
test_torch.py +10 -0

TODO.md ADDED Viewed

	@@ -0,0 +1,10 @@

+# Task: Completed ✅
+## Steps:
+1. [x] Confirm plan approved.
+2. [x] Generate complete Markdown content (temp file: new_readme.md).
+3. [x] Replace in final_merged_model/README.md with full whitepaper (YAML preserved, exhaustive content, updated script).
+4. [x] Preview opened in VSCode.
+5. [x] Complete task.
+Final README.md is now an enterprise-grade whitepaper with all required sections, tables, 263% boost, guardrails, dataset details, test case tables, constraints/V2.0, and merged model usage script.

check_model.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from transformers import AutoModelForCausalLM
+try:
+    model = AutoModelForCausalLM.from_pretrained('Qwen/Qwen2.5-7B-Instruct', local_files_only=True)
+    print('Local model files:', list(model.state_dict().keys())[:5])
+except Exception as e:
+    print('Model not cached:', str(e))

download_model.py ADDED Viewed

	@@ -0,0 +1,11 @@

+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained(
+    'Qwen/Qwen2.5-7B-Instruct',
+    dtype=torch.float16,
+    device_map='cpu',
+    trust_remote_code=True
+)
+tokenizer = AutoTokenizer.from_pretrained('Qwen/Qwen2.5-7B-Instruct', trust_remote_code=True)
+print('Qwen2.5-7B-Instruct base model downloaded and cached locally for CPU.')

	@@ -0,0 +1,11 @@

+from huggingface_hub import login, whoami
+import os
+# Replace with your actual token from https://huggingface.co/settings/tokens
+HF_TOKEN = os.environ.get("HF_TOKEN")
+try:
+    login(token=HF_TOKEN)
+    user_info = whoami()
+    print(f"\n[SUCCESS] Authenticated as: {user_info['name']}")
+except Exception as e:
+    print(f"\n[LOGIC ERROR] Authentication Failed: {e}")

main.py ADDED Viewed

	@@ -0,0 +1,308 @@

+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+from peft import PeftModel
+import psycopg2
+from psycopg2 import pool
+import re
+import logging
+from groq import Groq
+import os
+from dotenv import load_dotenv
+# Setup logging
+logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
+logger = logging.getLogger(__name__)
+# Load environment variables
+load_dotenv()
+class EnergyIntelligenceBot:
+    def __init__(self):
+        self.db = None
+        self._validate_env_vars()
+        self._connect_db()
+    def _validate_env_vars(self):
+        """Ensures all critical variables exist before starting."""
+        required_vars = ["DB_USER", "DB_PASSWORD", "DB_HOST", "DB_PORT", "DB_NAME", "GROQ_API_KEY"]
+        missing = [var for var in required_vars if not os.environ.get(var)]
+        if missing:
+            raise ValueError(f"CRITICAL: Missing environment variables: {', '.join(missing)}")
+    def _connect_db(self):
+        """Safely connects to PostgreSQL."""
+        try:
+            self.db = psycopg2.pool.SimpleConnectionPool(
+                minconn=1,
+                maxconn=10,
+                user=os.environ.get("DB_USER"),
+                password=os.environ.get("DB_PASSWORD"),
+                host=os.environ.get("DB_HOST"),
+                port=os.environ.get("DB_PORT"),
+                database=os.environ.get("DB_NAME")
+            )
+            logger.info("PostgreSQL connection pool created successfully.")
+        except Exception as e:
+            logger.error(f"Database connection failed: {e}. Bot will run in text-only mode.")
+    def call_llm(self, messages, model="llama-3.1-8b-instant"):
+        """Calls Groq API."""
+        api_key = os.environ.get("GROQ_API_KEY")
+        client = Groq(api_key=api_key)
+        return client.chat.completions.create(
+            messages=messages,
+            model=model
+        )
+    def execute_and_format_query(self, generated_sql, user_message):
+        logger.info(f"Generated SQL text:\n{generated_sql}")
+        # --- ROBUST SQL CLEANING ---
+        sql_match = re.search(r"```sql\s*(.*?)\s*```", generated_sql, re.DOTALL | re.IGNORECASE)
+        if sql_match:
+            query = sql_match.group(1).strip()
+        else:
+            fallback_match = re.search(r"(SELECT[\s\S]*)", generated_sql, re.IGNORECASE)
+            query = fallback_match.group(1).strip() if fallback_match else generated_sql
+        query = re.sub(r';\s*(?=union)', ' ', query, flags=re.IGNORECASE)
+        query = query.split(';')[0].strip() + ';'
+        logger.info(f"Cleaned SQL query: {query}")
+        Qres = []
+        if not self.db:
+            return "Error: Database connection is not available."
+        conn = None
+        try:
+            conn = self.db.getconn()
+            with conn.cursor() as cur:
+                cur.execute(query)
+                Qres = cur.fetchall()
+                logger.info(f"Result Fetched: {str(Qres)[:100]}...")
+        except Exception as e:
+            logger.error(f"Error executing query: {e}")
+            if conn:
+                conn.rollback()
+            return f"An error occurred while executing the query: {e}"
+        finally:
+            if conn:
+                self.db.putconn(conn)
+        messages = [
+            {
+                "role": "system",
+                "content": """Task : Your main goal is to make SQL query results easy to interpret for users who may not have technical backgrounds while ensuring all information is correct and clear.
+                        user will give the conversation history, which contains ONLY the user's messages. Your task is to generate a response based on this history.
+                        You have electric monitoring systems data, of 4 locations :
+                            CNS Equipment Room
+                            Glide Path
+                            Localizer
+                            DVOR
+                            NEVER CHANGE THE ACTUAL DATA.
+                            USER PROVIDED DATA SHOULD BE AS THEY ARE DONT EVEN TRY TO CONVERT THEM, LIKE FOR ENERGY TO KWH. THEY ARE ALREADY IN KWH FORMAT.
+                        keep this in mind while making response that you have electric data so form them correctly with their units, there will be current, voltage, energy, power factor ,etc.
+                        the r,y,b will be denoting the different phases such as red, yellow and blue phase.
+                        There are not any phases in energy , frequency data. means they are regular data, they dont have any phases.
+                        1. Receive SQL Query Results:
+                            When given an SQL query result, your task is to format it professionally and clearly so that it is easy to read and understand.
+                        2. Structure the Answer:
+                            Tables: If the SQL query result contains rows and columns, format the output as a neat table.
+                            Bullet Points or Lists: Use bullet points or structured lists if the results are better conveyed this way.
+                        3. Contextual Information:
+                            Add brief, clear explanations where necessary to provide context or meaning behind the data, ensuring the user understands what the result represents.
+                        4. Formatting Example:
+                            Guidelines:
+                            Maintain a clean and simple presentation.
+                            When needed, include context or analysis like trends, anomalies, or insights from the data.
+                            The final answer should only include the well-formatted result and necessary explanation—no technical jargon or SQL-specific terms.
+                        5. NOTE :
+                            Read the fluctuations or anomaly data and notice them, if they are in percentage or the actual values for show them with units.(all data like 234,256,.. then volts, and if all data like 3.4, 6.56, 11.34, then %)
+                            Never use this types of words in the final answer like "Based on the provided SQL query " or anything that indicates towards the sql query.
+                            just give natural answers as human can understand without any technical things like the sql related things.
+                        NEVER CHANGE THE VALUES FOR THE ENERGY REPORT, DONT YOU DARE CHANGING THEM. KEEP IT AS THEY ARE.
+                """
+            },
+            {
+                "role": "user",
+                "content": f"{user_message} \nThis was the query : {query} \nAnd Here is the query result : {str(Qres)}"
+            }
+        ]
+        try:
+            ai_msg = self.call_llm(messages)
+            response_content = ai_msg.choices[0].message.content
+            logger.info("Formatted response generated successfully.")
+            return response_content
+        except Exception as e:
+            logger.error(f"Error calling LLM for formatting: {e}")
+            return "Failed to format response via LLM."
+    def close_connections(self):
+        if hasattr(self, 'db') and self.db:
+            self.db.closeall()
+            logger.info("PostgreSQL connection pool closed.")
+# --- Initialization & Safe Execution ---
+if __name__ == "__main__":
+    bot = None
+    try:
+        bot = EnergyIntelligenceBot()
+        # Hardware Check
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        logger.info(f"Using device: {device}")
+        if device == "cpu":
+            logger.warning("No GPU found. Running this model on CPU will be extremely slow.")
+        else:
+            # GPU optimizations for inference
+            torch.backends.cuda.matmul.allow_tf32 = True
+            torch.backends.cudnn.allow_tf32 = True
+            logger.info(f"GPU optimizations enabled for {torch.cuda.get_device_name()}")
+        base_model_id = "Qwen/Qwen2.5-7B-Instruct"
+        adapter_path = "./"
+        # Check if adapter exists
+        if not os.path.exists(adapter_path) and adapter_path != "./":
+            logger.warning(f"Adapter path {adapter_path} not found. Ensure the path is correct.")
+        # VRAM check for RTX 2050 4GB
+        if torch.cuda.is_available():
+            total_vram = torch.cuda.get_device_properties(0).total_memory / 1024**3
+            logger.info(f"Total VRAM: {total_vram:.1f} GB")
+            if total_vram < 5:
+                logger.warning("Low VRAM detected (<5GB). Using aggressive offloading.")
+        if torch.cuda.is_available():
+            # Optimized 4-bit configuration for RTX 2050 4GB
+            bnb_config = BitsAndBytesConfig(
+                load_in_4bit=True,
+                bnb_4bit_use_double_quant=True,
+                bnb_4bit_quant_type="nf4",
+                bnb_4bit_compute_dtype=torch.float16
+            )
+            logger.info("Loading base model with AGGRESSIVE GPU quantization...")
+            base_model = AutoModelForCausalLM.from_pretrained(
+                base_model_id,
+                dtype=torch.float16,
+                device_map="auto",  # Changed from "cuda:auto" to "auto" to better handle hybrid GPUs
+                quantization_config=bnb_config,
+                trust_remote_code=True,
+                low_cpu_mem_usage=True, # Critical to prevent RAM spike
+                max_memory={0: "3.5GiB", "cpu": "8GiB"} # Restricted RAM usage to keep system stable
+            )
+        else:
+            logger.info("Loading base model on CPU (no quantization)...")
+            if torch.cuda.is_available():
+    logger.info(f"Model is actually on: {base_model.device}")
+    logger.info(f"VRAM used: {torch.cuda.memory_allocated(0)/1024**3:.2f}GB")
+            base_model = AutoModelForCausalLM.from_pretrained(
+                base_model_id,
+                dtype=torch.float16,
+                device_map="cpu",
+                trust_remote_code=True,
+                low_cpu_mem_usage=True
+            )
+        # Verification check
+        logger.info(f"Model placement complete.")
+        if torch.cuda.is_available():
+            logger.info(f"Model is actually on: {base_model.device}")
+            logger.info(f"VRAM used: {torch.cuda.memory_allocated(0)/1024**3:.2f}GB")
+            if "cpu" in str(base_model.device):
+                 logger.warning("MODEL IS ON CPU! Bitsandbytes may be failing to find CUDA kernels.")
+        tokenizer = AutoTokenizer.from_pretrained(base_model_id, trust_remote_code=True)
+        if tokenizer.pad_token is None:
+            tokenizer.pad_token = tokenizer.eos_token
+            logger.info("Set pad_token to eos_token")
+        logger.info("Loading adapter...")
+        model = PeftModel.from_pretrained(base_model, adapter_path)
+        model.eval()
+        if device == "cuda":
+            logger.info("CPU offload ready (disabled due to PeftModel compatibility)")
+        print("Enter 'exit' to quit.")
+        while True:
+            user_question = input("Enter your question: ").strip().lower()
+            if user_question == "exit":
+                print("Exiting...")
+                break
+            if not user_question:
+                print("Please enter a question.")
+                continue
+            prompt = f"generate the sql for this:{user_question.capitalize()}"
+            messages = [
+                {"role": "system", "content": """You are an expert NLP-to-SQL agent. Database table is 'main_cns' with energy monitoring data.
+CRITICAL RULES:
+- ONLY generate ONE real SELECT query for 'main_cns' table.
+- NO examples, fictional tables (like 'energy'), multiple queries, or explanations.
+- Output ONLY the SQL query inside ```sql ... ``` block.
+- STRICTLY READ-ONLY SELECT statements. No INSERT/UPDATE/DELETE."""},
+                {"role": "user", "content": prompt}
+            ]
+            logger.info("Processing inputs...")
+            text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+            inputs = tokenizer(text, return_tensors="pt").to(model.device)
+            logger.info("Generating response...")
+            with torch.no_grad(), torch.cuda.amp.autocast(dtype=torch.float16):
+                generated_ids = model.generate(
+                    **inputs,
+                    max_new_tokens=256,
+                    do_sample=False,
+                    pad_token_id=tokenizer.eos_token_id,
+                    use_cache=True
+                )
+            generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
+            output_text = tokenizer.batch_decode(generated_ids_trimmed, skip_special_tokens=True)
+            generated_sql = output_text[0]
+            print("\n--- Model Response ---")
+            print(generated_sql)
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+                logger.info(f"Post-gen VRAM: {torch.cuda.memory_allocated(0)/1024**3:.1f}GB")
+            # Final formatting and DB execution
+            print("\n--- Executing SQL and Formatting Results ---")
+            formatted_response = bot.execute_and_format_query(generated_sql, prompt.capitalize())
+            print("\n--- Formatted Response ---")
+            print(formatted_response)
+            print("\n" + "="*80 + "\n")
+    except Exception as e:
+        logger.critical(f"Application crashed: {e}")
+    finally:
+        # Resource cleanup
+        if bot:
+            bot.close_connections()
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+            logger.info("GPU cache cleared.")

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+torch==2.2.6
+transformers==5.3.0
+peft==0.18.1
+bitsandbytes==0.49.2
+psycopg2-binary==2.9.11
+groq==1.0.0
+python-dotenv==1.1.1
+huggingface-hub==1.6.0
+accelerate==1.13.0

test_torch.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import torch
+print(f"PyTorch version: {torch.__version__}")
+print(f"CUDA available: {torch.cuda.is_available()}")
+if torch.cuda.is_available():
+    print(f"CUDA version: {torch.version.cuda}")
+    print(f"GPU name: {torch.cuda.get_device_name(0)}")
+    print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
+else:
+    print("No GPU")