Spaces:

Vachudev
/

mcp_ocr_tuner

Sleeping

App Files Files Community

Vachudev commited on 8 days ago

Commit

dc79584

verified ·

1 Parent(s): 4071c84

Initial Commit

Browse files

Files changed (9) hide show

app.py +301 -0
app_json.py +233 -0
config.py +22 -0
ocr_engine.py +63 -0
ocr_engine_json.py +45 -0
packages.txt +2 -0
prompts.py +46 -0
requirements.txt +9 -0
zoho_client_mcp.py +93 -0

app.py ADDED Viewed

	@@ -0,0 +1,301 @@

+# app.py — MCP server (single-file)
+from mcp.server.fastmcp import FastMCP
+from typing import Optional, List, Tuple, Any, Dict
+import requests
+import os
+import gradio as gr
+import json
+import re
+import logging
+import gc
+# --- Import OCR Engine & Prompts ---
+try:
+    # UPDATED IMPORT
+    from ocr_engine import extract_text_and_conf
+    from prompts import get_ocr_extraction_prompt, get_agent_prompt
+except ImportError:
+    def extract_text_and_conf(path): return "", 0.0
+    def get_ocr_extraction_prompt(txt): return txt
+    def get_agent_prompt(h, u): return u
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("mcp_server")
+# --- Load Config ---
+try:
+    from config import (
+        CLIENT_ID, CLIENT_SECRET, REFRESH_TOKEN, API_BASE,
+        INVOICE_API_BASE, ORGANIZATION_ID, LOCAL_MODEL
+    )
+except Exception:
+    raise SystemExit("Config missing.")
+mcp = FastMCP("ZohoCRMAgent")
+# --- Globals ---
+LLM_PIPELINE = None
+TOKENIZER = None
+# --- NEW: Evaluation / KPI Logic (Integrated OCR Score) ---
+def calculate_extraction_confidence(data: dict, ocr_score: float) -> dict:
+    """
+    Calculates Hybrid Confidence:
+    - 20% based on OCR Engine Signal (Tesseract Confidence)
+    - 80% based on Data Quality (LLM Extraction Completeness)
+    """
+    semantic_score = 0
+    issues = []
+    # 1. Structure Check (Base 10 pts)
+    semantic_score += 10
+    # 2. Total Amount Check (30 pts)
+    amt = str(data.get("total_amount", "")).replace("$", "").replace(",", "")
+    if amt and re.match(r'^\d+(\.\d+)?$', amt):
+        semantic_score += 30
+    else:
+        issues.append("Missing/Invalid Total Amount")
+    # 3. Date Check (20 pts)
+    date_str = str(data.get("invoice_date", ""))
+    if date_str and len(date_str) >= 8:
+        semantic_score += 20
+    else:
+        issues.append("Missing Invoice Date")
+    # 4. Line Items Check (30 pts)
+    items = data.get("line_items", [])
+    if isinstance(items, list) and len(items) > 0:
+        if any(i.get("name") for i in items):
+            semantic_score += 30
+        else:
+            semantic_score += 10
+            issues.append("Line Items missing descriptions")
+    else:
+        issues.append("No Line Items detected")
+    # 5. Contact Name (10 pts)
+    if data.get("contact_name"):
+        semantic_score += 10
+    else:
+        issues.append("Missing Vendor Name")
+    # --- HYBRID CALCULATION ---
+    # Weight: 80% Data Quality + 20% OCR Quality
+    final_score = (semantic_score * 0.8) + (ocr_score * 0.2)
+    # Add OCR warnings
+    if ocr_score < 60:
+        issues.append(f"Low OCR Confidence ({ocr_score}%) - Check image quality")
+    return {
+        "score": int(final_score),
+        "ocr_score": ocr_score,
+        "semantic_score": semantic_score,
+        "rating": "High" if final_score > 80 else ("Medium" if final_score > 50 else "Low"),
+        "issues": issues
+    }
+# --- Helpers ---
+def extract_json_safely(text: str) -> Optional[Any]:
+    try:
+        return json.loads(text)
+    except:
+        match = re.search(r'(\{.*\}|\[.*\])', text, re.DOTALL)
+        return json.loads(match.group(0)) if match else None
+def _normalize_local_path_args(args: Any) -> Any:
+    if not isinstance(args, dict): return args
+    fp = args.get("file_path") or args.get("path")
+    if isinstance(fp, str) and fp.startswith("/mnt/data/") and os.path.exists(fp):
+        args["file_url"] = f"file://{fp}"
+    return args
+# --- Model Loading ---
+def init_local_model():
+    global LLM_PIPELINE, TOKENIZER
+    if LLM_PIPELINE is not None: return
+    try:
+        from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
+        logger.info(f"Loading lighter model: {LOCAL_MODEL}...")
+        TOKENIZER = AutoTokenizer.from_pretrained(LOCAL_MODEL)
+        model = AutoModelForCausalLM.from_pretrained(
+            LOCAL_MODEL,
+            device_map="auto",
+            torch_dtype="auto"
+        )
+        LLM_PIPELINE = pipeline("text-generation", model=model, tokenizer=TOKENIZER)
+        logger.info("Model loaded.")
+    except Exception as e:
+        logger.error(f"Model load error: {e}")
+def local_llm_generate(prompt: str, max_tokens: int = 512) -> Dict[str, Any]:
+    if LLM_PIPELINE is None:
+        init_local_model()
+    if LLM_PIPELINE is None:
+        return {"text": "Model not loaded.", "raw": None}
+    try:
+        out = LLM_PIPELINE(
+            prompt,
+            max_new_tokens=max_tokens,
+            return_full_text=False,
+            do_sample=False
+        )
+        text = out[0]["generated_text"] if out else ""
+        return {"text": text, "raw": out}
+    except Exception as e:
+        return {"text": f"Error: {e}", "raw": None}
+# --- Tools (Zoho) ---
+def _get_valid_token_headers() -> dict:
+    r = requests.post("https://accounts.zoho.in/oauth/v2/token", params={
+        "refresh_token": REFRESH_TOKEN, "client_id": CLIENT_ID,
+        "client_secret": CLIENT_SECRET, "grant_type": "refresh_token"
+    }, timeout=10)
+    if r.status_code == 200:
+        return {"Authorization": f"Zoho-oauthtoken {r.json().get('access_token')}"}
+    return {}
+@mcp.tool()
+def create_record(module_name: str, record_data: dict) -> str:
+    h = _get_valid_token_headers()
+    if not h: return "Auth Failed"
+    r = requests.post(f"{API_BASE}/{module_name}", headers=h, json={"data": [record_data]})
+    if r.status_code in (200, 201):
+        try:
+            d = r.json().get("data", [{}])[0].get("details", {})
+            return json.dumps({"status": "success", "id": d.get("id"), "zoho_response": r.json()})
+        except:
+            return json.dumps(r.json())
+    return r.text
+@mcp.tool()
+def create_invoice(data: dict) -> str:
+    h = _get_valid_token_headers()
+    if not h: return "Auth Failed"
+    r = requests.post(f"{INVOICE_API_BASE}/invoices", headers=h,
+                      params={"organization_id": ORGANIZATION_ID}, json=data)
+    return json.dumps(r.json()) if r.status_code in (200, 201) else r.text
+@mcp.tool()
+def process_document(file_path: str, target_module: Optional[str] = "Contacts") -> dict:
+    if not os.path.exists(file_path):
+        return {"error": f"File not found at path: {file_path}"}
+    # 1. OCR (UPDATED: Returns text AND score)
+    raw_text, ocr_score = extract_text_and_conf(file_path)
+    if not raw_text: return {"error": "OCR empty"}
+    # 2. LLM Extraction
+    prompt = get_ocr_extraction_prompt(raw_text)
+    res = local_llm_generate(prompt, max_tokens=300)
+    data = extract_json_safely(res["text"])
+    # 3. Evaluation / KPI Calculation (UPDATED: Uses ocr_score)
+    kpis = {"score": 0, "rating": "Fail", "issues": ["Extraction Failed"]}
+    if data:
+        kpis = calculate_extraction_confidence(data, ocr_score)
+    return {
+        "status": "success",
+        "file": os.path.basename(file_path),
+        "extracted_data": data or {"raw": res["text"]},
+        "kpis": kpis
+    }
+# --- Executor ---
+def parse_and_execute(model_text: str, history: list) -> str:
+    payload = extract_json_safely(model_text)
+    if not payload: return "No valid tool call found."
+    cmds = [payload] if isinstance(payload, dict) else payload
+    results = []
+    last_contact_id = None
+    for cmd in cmds:
+        if not isinstance(cmd, dict): continue
+        tool = cmd.get("tool")
+        args = _normalize_local_path_args(cmd.get("args", {}))
+        if tool == "create_record":
+            res = create_record(args.get("module_name", "Contacts"), args.get("record_data", {}))
+            results.append(f"Record: {res}")
+            try:
+                rj = json.loads(res)
+                if isinstance(rj, dict) and "id" in rj:
+                    last_contact_id = rj["id"]
+            except: pass
+        elif tool == "create_invoice":
+            if not args.get("customer_id") and last_contact_id:
+                args["customer_id"] = last_contact_id
+            invoice_payload = args
+            if last_contact_id and "customer_id" not in invoice_payload:
+                 invoice_payload["customer_id"] = last_contact_id
+            res = create_invoice(invoice_payload)
+            results.append(f"Invoice: {res}")
+    return "\n".join(results)
+# --- Chat Core ---
+def chat_logic(message: str, file_path: str, history: list) -> str:
+    # PHASE 1: File Upload -> Extraction -> KPI Report
+    if file_path:
+        logger.info(f"Processing file: {file_path}")
+        doc = process_document(file_path)
+        if doc.get("status") == "success":
+            data = doc["extracted_data"]
+            kpi = doc["kpis"]
+            extracted_json = json.dumps(data, indent=2)
+            # Format KPI output (Expanded)
+            rating_emoji = "🟢" if kpi['rating'] == 'High' else ("🟡" if kpi['rating'] == 'Medium' else "🔴")
+            issues_txt = "\n".join([f"- {i}" for i in kpi['issues']]) if kpi['issues'] else "None"
+            return (
+                f"### 📄 Extraction Complete: **{doc['file']}**\n"
+                f"**Combined Confidence:** {rating_emoji} {kpi['score']}/100\n"
+                f"*(OCR Signal: {kpi['ocr_score']}% | Data Quality: {kpi['semantic_score']}%)*\n\n"
+                f"**Issues Detected:**\n{issues_txt}\n\n"
+                f"```json\n{extracted_json}\n```\n\n"
+                "Type **'Create Invoice'** to push this to Zoho."
+            )
+        else:
+            return f"OCR Failed: {doc.get('error')}"
+    # PHASE 2: Text Interaction
+    hist_txt = "\n".join([f"U: {h[0]}\nA: {h[1]}" for h in history])
+    prompt = get_agent_prompt(hist_txt, message)
+    gen = local_llm_generate(prompt, max_tokens=256)
+    tool_data = extract_json_safely(gen["text"])
+    if tool_data:
+        return parse_and_execute(gen["text"], history)
+    return gen["text"]
+# --- UI ---
+def chat_handler(msg, hist):
+    txt = msg.get("text", "")
+    files = msg.get("files", [])
+    path = files[0] if files else None
+    return chat_logic(txt, path, hist)
+if __name__ == "__main__":
+    gc.collect()
+    demo = gr.ChatInterface(fn=chat_handler, multimodal=True)
+    demo.launch(server_name="0.0.0.0", server_port=7860)

app_json.py ADDED Viewed

	@@ -0,0 +1,233 @@

+# app.py — MCP server (single-file)
+from mcp.server.fastmcp import FastMCP
+from typing import Optional, List, Tuple, Any, Dict
+import requests
+import os
+import gradio as gr
+import json
+import re
+import logging
+import gc
+# --- Import OCR Engine & Prompts ---
+try:
+    from ocr_engine import extract_text_from_file
+    from prompts import get_ocr_extraction_prompt, get_agent_prompt
+except ImportError:
+    def extract_text_from_file(path): return ""
+    def get_ocr_extraction_prompt(txt): return txt
+    def get_agent_prompt(h, u): return u
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("mcp_server")
+# --- Load Config ---
+try:
+    from config import (
+        CLIENT_ID, CLIENT_SECRET, REFRESH_TOKEN, API_BASE,
+        INVOICE_API_BASE, ORGANIZATION_ID, LOCAL_MODEL
+    )
+except Exception:
+    raise SystemExit("Config missing.")
+mcp = FastMCP("ZohoCRMAgent")
+# --- Globals ---
+LLM_PIPELINE = None
+TOKENIZER = None
+# --- Helpers ---
+def extract_json_safely(text: str) -> Optional[Any]:
+    try:
+        return json.loads(text)
+    except:
+        match = re.search(r'(\{.*\}|\[.*\])', text, re.DOTALL)
+        return json.loads(match.group(0)) if match else None
+def _normalize_local_path_args(args: Any) -> Any:
+    if not isinstance(args, dict): return args
+    fp = args.get("file_path") or args.get("path")
+    if isinstance(fp, str) and fp.startswith("/mnt/data/") and os.path.exists(fp):
+        args["file_url"] = f"file://{fp}"
+    return args
+# --- Model Loading ---
+def init_local_model():
+    global LLM_PIPELINE, TOKENIZER
+    if LLM_PIPELINE is not None: return
+    try:
+        from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
+        logger.info(f"Loading lighter model: {LOCAL_MODEL}...")
+        TOKENIZER = AutoTokenizer.from_pretrained(LOCAL_MODEL)
+        model = AutoModelForCausalLM.from_pretrained(
+            LOCAL_MODEL,
+            device_map="auto",
+            torch_dtype="auto"
+        )
+        LLM_PIPELINE = pipeline("text-generation", model=model, tokenizer=TOKENIZER)
+        logger.info("Model loaded.")
+    except Exception as e:
+        logger.error(f"Model load error: {e}")
+def local_llm_generate(prompt: str, max_tokens: int = 512) -> Dict[str, Any]:
+    if LLM_PIPELINE is None:
+        init_local_model()
+    if LLM_PIPELINE is None:
+        return {"text": "Model not loaded.", "raw": None}
+    try:
+        out = LLM_PIPELINE(
+            prompt,
+            max_new_tokens=max_tokens,
+            return_full_text=False,
+            do_sample=False
+        )
+        text = out[0]["generated_text"] if out else ""
+        return {"text": text, "raw": out}
+    except Exception as e:
+        return {"text": f"Error: {e}", "raw": None}
+# --- Tools (Zoho) ---
+def _get_valid_token_headers() -> dict:
+    r = requests.post("https://accounts.zoho.in/oauth/v2/token", params={
+        "refresh_token": REFRESH_TOKEN, "client_id": CLIENT_ID,
+        "client_secret": CLIENT_SECRET, "grant_type": "refresh_token"
+    }, timeout=10)
+    if r.status_code == 200:
+        return {"Authorization": f"Zoho-oauthtoken {r.json().get('access_token')}"}
+    return {}
+@mcp.tool()
+def create_record(module_name: str, record_data: dict) -> str:
+    h = _get_valid_token_headers()
+    if not h: return "Auth Failed"
+    r = requests.post(f"{API_BASE}/{module_name}", headers=h, json={"data": [record_data]})
+    if r.status_code in (200, 201):
+        try:
+            d = r.json().get("data", [{}])[0].get("details", {})
+            return json.dumps({"status": "success", "id": d.get("id"), "zoho_response": r.json()})
+        except:
+            return json.dumps(r.json())
+    return r.text
+@mcp.tool()
+def create_invoice(data: dict) -> str:
+    h = _get_valid_token_headers()
+    if not h: return "Auth Failed"
+    r = requests.post(f"{INVOICE_API_BASE}/invoices", headers=h,
+                      params={"organization_id": ORGANIZATION_ID}, json=data)
+    return json.dumps(r.json()) if r.status_code in (200, 201) else r.text
+@mcp.tool()
+def process_document(file_path: str, target_module: Optional[str] = "Contacts") -> dict:
+    if not os.path.exists(file_path):
+        return {"error": f"File not found at path: {file_path}"}
+    # 1. OCR
+    raw_text = extract_text_from_file(file_path)
+    if not raw_text: return {"error": "OCR empty"}
+    # 2. LLM Extraction
+    prompt = get_ocr_extraction_prompt(raw_text)
+    res = local_llm_generate(prompt, max_tokens=300)
+    data = extract_json_safely(res["text"])
+    return {
+        "status": "success",
+        "file": os.path.basename(file_path),
+        "extracted_data": data or {"raw": res["text"]}
+    }
+# --- Executor ---
+def parse_and_execute(model_text: str, history: list) -> str:
+    payload = extract_json_safely(model_text)
+    if not payload: return "No valid tool call found."
+    cmds = [payload] if isinstance(payload, dict) else payload
+    results = []
+    last_contact_id = None
+    for cmd in cmds:
+        if not isinstance(cmd, dict): continue
+        tool = cmd.get("tool")
+        args = _normalize_local_path_args(cmd.get("args", {}))
+        if tool == "create_record":
+            res = create_record(args.get("module_name", "Contacts"), args.get("record_data", {}))
+            results.append(f"Record: {res}")
+            try:
+                rj = json.loads(res)
+                if isinstance(rj, dict) and "id" in rj:
+                    last_contact_id = rj["id"]
+            except: pass
+        elif tool == "create_invoice":
+            # Auto-fill contact_id if we just created one
+            if not args.get("customer_id") and last_contact_id:
+                args["customer_id"] = last_contact_id
+            # Map Items from strict structure
+            invoice_payload = args # Assuming LLM passes correct structure, or map here
+            if last_contact_id and "customer_id" not in invoice_payload:
+                 invoice_payload["customer_id"] = last_contact_id
+            res = create_invoice(invoice_payload)
+            results.append(f"Invoice: {res}")
+    return "\n".join(results)
+# --- Chat Core ---
+def chat_logic(message: str, file_path: str, history: list) -> str:
+    # PHASE 1: File Upload -> Extraction Only (No Zoho Auth yet)
+    if file_path:
+        logger.info(f"Processing file: {file_path}")
+        doc = process_document(file_path)
+        if doc.get("status") == "success":
+            extracted_json = json.dumps(doc["extracted_data"], indent=2)
+            # We return this text. It gets added to history.
+            # The User must then say "Yes, push it" to trigger Phase 2.
+            return (
+                f"I extracted the following data from **{doc['file']}**:\n\n"
+                f"```json\n{extracted_json}\n```\n\n"
+                "Please review it. If it looks correct, type **'Create Invoice'** or **'Push to Zoho'**."
+            )
+        else:
+            return f"OCR Failed: {doc.get('error')}"
+    # PHASE 2: Text Interaction (Check History for JSON + Intent)
+    hist_txt = "\n".join([f"U: {h[0]}\nA: {h[1]}" for h in history])
+    # The Prompt now checks history for JSON and waits for explicit "save/push" keywords
+    prompt = get_agent_prompt(hist_txt, message)
+    gen = local_llm_generate(prompt, max_tokens=256)
+    logger.info(f"LLM Decision: {gen['text']}")
+    tool_data = extract_json_safely(gen["text"])
+    if tool_data:
+        # User confirmed -> Execute Tool (Triggers Zoho Auth)
+        return parse_and_execute(gen["text"], history)
+    # Just chat/clarification
+    return gen["text"]
+# --- UI ---
+def chat_handler(msg, hist):
+    txt = msg.get("text", "")
+    files = msg.get("files", [])
+    path = files[0] if files else None
+    return chat_logic(txt, path, hist)
+if __name__ == "__main__":
+    gc.collect()
+    demo = gr.ChatInterface(fn=chat_handler, multimodal=True)
+    demo.launch(server_name="0.0.0.0", server_port=7860)

config.py ADDED Viewed

	@@ -0,0 +1,22 @@

+# config.py — Zoho + local model configuration
+# IMPORTANT: This file contains sensitive credentials. Keep it local and DO NOT commit to a public repository.
+CLIENT_ID = "1000.SIMKGAO5719K0TQ0QZQ31ZU57RLFNQ"
+CLIENT_SECRET = "60b329b4fe51930abee900cba6524ec7332cd67e06"
+REFRESH_TOKEN = "1000.47c4724c105c0275477b8e0aea8415fd.63a086b666a133ca804f692086ee2963"
+ORGANIZATION_ID = "60058860935"
+# Zoho API endpoints (India data center)
+API_BASE = "https://www.zohoapis.in/crm/v2"
+INVOICE_API_BASE = "https://invoice.zoho.in/api/v3"
+# Local model (set to None if you prefer not to load a local HF model)
+LOCAL_MODEL = "Qwen/Qwen2.5-1.5B-Instruct"
+LOCAL_TOKENIZER = None
+# Optional: toggle demo behaviour at runtime via environment variable DEMO=true
+# To avoid accidental API calls on startup, leave DEMO unset (or set to false) in production
+# NOTE: If your LOCAL_MODEL points to a gated HF repo, ensure the runtime has proper HF auth
+# (HUGGINGFACE_HUB_TOKEN or similar) and access to the model. If you don't have access, set
+# LOCAL_MODEL = None or to a public model like "google/flan-t5-small".

ocr_engine.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import pytesseract
+from pytesseract import Output
+from pdf2image import convert_from_path
+from PIL import Image
+import os
+import logging
+import numpy as np
+logger = logging.getLogger("ocr_engine")
+def extract_text_and_conf(file_path: str) -> tuple[str, float]:
+    """
+    Extracts text AND confidence score from a PDF or Image.
+    Returns: (text_content, average_confidence_0_to_100)
+    """
+    if not os.path.exists(file_path):
+        return "", 0.0
+    text_content = ""
+    confidences = []
+    try:
+        images = []
+        # 1. Load Images
+        if file_path.lower().endswith('.pdf'):
+            try:
+                images = convert_from_path(file_path)
+            except Exception as e:
+                logger.error(f"PDF Convert Error: {e}")
+                return "", 0.0
+        elif file_path.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.bmp')):
+            try:
+                images = [Image.open(file_path)]
+            except Exception as e:
+                logger.error(f"Image Open Error: {e}")
+                return "", 0.0
+        # 2. Process Each Page
+        for i, image in enumerate(images):
+            # A. Get Layout-Preserved Text (Best for LLM)
+            page_text = pytesseract.image_to_string(image)
+            text_content += f"--- Page {i+1} ---\n{page_text}\n"
+            # B. Get Confidence Data (Best for KPIs)
+            # data_dict keys: ['level', 'page_num', 'block_num', 'par_num', 'line_num', 'word_num', 'left', 'top', 'width', 'height', 'conf', 'text']
+            data = pytesseract.image_to_data(image, output_type=Output.DICT)
+            # Filter valid confidences (ignore -1 which usually means whitespace/block info)
+            for conf in data['conf']:
+                # Tesseract returns -1 for structural elements (not words)
+                if conf != -1:
+                    confidences.append(conf)
+        # 3. Calculate Average Confidence
+        avg_conf = 0.0
+        if confidences:
+            avg_conf = sum(confidences) / len(confidences)
+        return text_content.strip(), round(avg_conf, 2)
+    except Exception as e:
+        logger.error(f"OCR Critical Error: {e}")
+        return "", 0.0

ocr_engine_json.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import pytesseract
+from pdf2image import convert_from_path
+from PIL import Image
+import os
+import logging
+logger = logging.getLogger("ocr_engine")
+def extract_text_from_file(file_path: str) -> str:
+    """
+    Extracts text from a PDF or Image file using Tesseract.
+    """
+    if not os.path.exists(file_path):
+        return ""
+    text_content = ""
+    try:
+        # Handle PDF
+        if file_path.lower().endswith('.pdf'):
+            try:
+                # Convert PDF pages to images
+                images = convert_from_path(file_path)
+                for i, image in enumerate(images):
+                    page_text = pytesseract.image_to_string(image)
+                    text_content += f"--- Page {i+1} ---\n{page_text}\n"
+            except Exception as e:
+                logger.error(f"Error converting PDF: {e}")
+                return f"Error reading PDF: {str(e)}"
+        # Handle Images (JPG, PNG, etc.)
+        elif file_path.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.bmp')):
+            try:
+                image = Image.open(file_path)
+                text_content = pytesseract.image_to_string(image)
+            except Exception as e:
+                logger.error(f"Error reading image: {e}")
+                return f"Error reading image: {str(e)}"
+        else:
+            return "Unsupported file format. Please upload PDF or Image."
+    except Exception as e:
+        logger.error(f"OCR Critical Error: {e}")
+        return f"OCR Failed: {str(e)}"
+    return text_content.strip()

packages.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ tesseract-ocr
2	+ poppler-utils

prompts.py ADDED Viewed

	@@ -0,0 +1,46 @@

+# prompts.py
+# Qwen-2.5 Compatible Prompts
+def get_ocr_extraction_prompt(raw_text: str) -> str:
+    return f"""<|im_start|>system
+You are a precise Data Extraction Engine.
+Extract data from the text below and return a JSON object.
+Fields: contact_name, total_amount, currency, invoice_date, line_items (name, quantity, rate).
+Output ONLY JSON. No markdown.
+<|im_end|>
+<|im_start|>user
+Input Text:
+{raw_text[:3000]}
+Return the JSON:
+<|im_end|>
+<|im_start|>assistant
+"""
+def get_agent_prompt(history_text: str, user_message: str) -> str:
+    """
+    Agent Prompt: Decides whether to Chat or Call Tools based on History.
+    """
+    return f"""<|im_start|>system
+You are the Zoho CRM Assistant.
+AVAILABLE TOOLS:
+1. create_record(module_name, record_data)
+2. create_invoice(data)
+RULES:
+1. REVIEW THE CHAT HISTORY. If you see extracted JSON data in the history, use it.
+2. TRIGGER CONDITION: ONLY call a tool if the user explicitly asks to "save", "create", "push", or "upload".
+3. If the user has NOT confirmed, just answer their questions or summarize the data.
+4. TOOL FORMAT: Return a JSON object: {{"tool": "name", "args": {{...}}}}
+5. Return ONLY JSON for tool calls.
+<|im_end|>
+<|im_start|>user
+HISTORY:
+{history_text}
+CURRENT REQUEST:
+{user_message}
+<|im_end|>
+<|im_start|>assistant
+"""

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+fastmcp
+gradio
+requests
+transformers
+torch        # choose CPU or CUDA wheel appropriate for your environment
+accelerate
+pytesseract
+pdf2image
+pillow

zoho_client_mcp.py ADDED Viewed

	@@ -0,0 +1,93 @@

+from mcp.server.fastmcp import FastMCP
+from typing import Optional
+import requests
+import os
+from config import CLIENT_ID, CLIENT_SECRET, REFRESH_TOKEN, API_BASE
+# --- Initialize the FastMCP Server ---
+mcp = FastMCP("ZohoCRMAgent")
+# --- Token Refresh Utility ---
+def _get_valid_token_headers() -> dict:
+    """Internal function to ensure a valid Zoho access token is available.
+    This uses the refresh token flow to retrieve a fresh access token."""
+    token_url = "https://accounts.zoho.in/oauth/v2/token"
+    params = {
+        "refresh_token": REFRESH_TOKEN,
+        "client_id": CLIENT_ID,
+        "client_secret": CLIENT_SECRET,
+        "grant_type": "refresh_token"
+    }
+    response = requests.post(token_url, params=params)
+    if response.status_code == 200:
+        access_token = response.json().get("access_token")
+        return {"Authorization": f"Zoho-oauthtoken {access_token}"}
+    else:
+        raise Exception(f"Failed to refresh token: {response.text}")
+# --- MCP Tools for Zoho CRM and Zoho Books Operations ---
+@mcp.tool()
+def authenticate_zoho() -> str:
+    """Refreshes and confirms Zoho CRM access token availability."""
+    _ = _get_valid_token_headers()
+    return "Zoho CRM access token successfully refreshed."
+@mcp.tool()
+def create_record(module_name: str, record_data: dict) -> str:
+    """Creates a new record in the specified Zoho CRM module."""
+    headers = _get_valid_token_headers()
+    response = requests.post(f"{API_BASE}/{module_name}", headers=headers, json={"data": [record_data]})
+    if response.status_code in [200, 201]:
+        return f"Record created successfully in {module_name}."
+    return f"Error creating record: {response.text}"
+@mcp.tool()
+def get_records(module_name: str, page: int = 1, per_page: int = 200) -> list:
+    """Fetches records from a specified Zoho CRM module."""
+    headers = _get_valid_token_headers()
+    params = {"page": page, "per_page": per_page}
+    response = requests.get(f"{API_BASE}/{module_name}", headers=headers, params=params)
+    if response.status_code == 200:
+        return response.json().get("data", [])
+    return [f"Error retrieving records: {response.text}"]
+@mcp.tool()
+def update_record(module_name: str, record_id: str, data: dict) -> str:
+    """Updates a record in a Zoho CRM module."""
+    headers = _get_valid_token_headers()
+    response = requests.put(f"{API_BASE}/{module_name}/{record_id}", headers=headers, json={"data": [data]})
+    if response.status_code == 200:
+        return f"Record {record_id} in {module_name} updated successfully."
+    return f"Error updating record: {response.text}"
+@mcp.tool()
+def delete_record(module_name: str, record_id: str) -> str:
+    """Deletes a record from the specified Zoho CRM module."""
+    headers = _get_valid_token_headers()
+    response = requests.delete(f"{API_BASE}/{module_name}/{record_id}", headers=headers)
+    if response.status_code == 200:
+        return f"Record {record_id} in {module_name} deleted."
+    return f"Error deleting record: {response.text}"
+@mcp.tool()
+def create_invoice(data: dict) -> str:
+    """Creates an invoice in Zoho Books."""
+    headers = _get_valid_token_headers()
+    response = requests.post(f"{API_BASE}/invoices", headers=headers, json={"data": [data]})
+    if response.status_code in [200, 201]:
+        return "Invoice created successfully."
+    return f"Error creating invoice: {response.text}"
+@mcp.tool()
+def process_document(file_path: str, target_module: Optional[str] = "Contacts") -> dict:
+    """Extracts data from uploaded file (PDF/image) and returns structured info."""
+    # Placeholder for OCR + Gemini parsing logic
+    # raw_text = perform_ocr(file_path)
+    # structured_data = gemini_parse_json(raw_text)
+    return {
+        "status": "success",
+        "file": os.path.basename(file_path),
+        "extracted_data": f"Simulated structured data from {target_module} document."
+    }