Vachudev commited on
Commit
dc79584
·
verified ·
1 Parent(s): 4071c84

Initial Commit

Browse files
Files changed (9) hide show
  1. app.py +301 -0
  2. app_json.py +233 -0
  3. config.py +22 -0
  4. ocr_engine.py +63 -0
  5. ocr_engine_json.py +45 -0
  6. packages.txt +2 -0
  7. prompts.py +46 -0
  8. requirements.txt +9 -0
  9. zoho_client_mcp.py +93 -0
app.py ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py — MCP server (single-file)
2
+
3
+ from mcp.server.fastmcp import FastMCP
4
+ from typing import Optional, List, Tuple, Any, Dict
5
+ import requests
6
+ import os
7
+ import gradio as gr
8
+ import json
9
+ import re
10
+ import logging
11
+ import gc
12
+
13
+ # --- Import OCR Engine & Prompts ---
14
+ try:
15
+ # UPDATED IMPORT
16
+ from ocr_engine import extract_text_and_conf
17
+ from prompts import get_ocr_extraction_prompt, get_agent_prompt
18
+ except ImportError:
19
+ def extract_text_and_conf(path): return "", 0.0
20
+ def get_ocr_extraction_prompt(txt): return txt
21
+ def get_agent_prompt(h, u): return u
22
+
23
+ logging.basicConfig(level=logging.INFO)
24
+ logger = logging.getLogger("mcp_server")
25
+
26
+ # --- Load Config ---
27
+ try:
28
+ from config import (
29
+ CLIENT_ID, CLIENT_SECRET, REFRESH_TOKEN, API_BASE,
30
+ INVOICE_API_BASE, ORGANIZATION_ID, LOCAL_MODEL
31
+ )
32
+ except Exception:
33
+ raise SystemExit("Config missing.")
34
+
35
+ mcp = FastMCP("ZohoCRMAgent")
36
+
37
+ # --- Globals ---
38
+ LLM_PIPELINE = None
39
+ TOKENIZER = None
40
+
41
+ # --- NEW: Evaluation / KPI Logic (Integrated OCR Score) ---
42
+ def calculate_extraction_confidence(data: dict, ocr_score: float) -> dict:
43
+ """
44
+ Calculates Hybrid Confidence:
45
+ - 20% based on OCR Engine Signal (Tesseract Confidence)
46
+ - 80% based on Data Quality (LLM Extraction Completeness)
47
+ """
48
+ semantic_score = 0
49
+ issues = []
50
+
51
+ # 1. Structure Check (Base 10 pts)
52
+ semantic_score += 10
53
+
54
+ # 2. Total Amount Check (30 pts)
55
+ amt = str(data.get("total_amount", "")).replace("$", "").replace(",", "")
56
+ if amt and re.match(r'^\d+(\.\d+)?$', amt):
57
+ semantic_score += 30
58
+ else:
59
+ issues.append("Missing/Invalid Total Amount")
60
+
61
+ # 3. Date Check (20 pts)
62
+ date_str = str(data.get("invoice_date", ""))
63
+ if date_str and len(date_str) >= 8:
64
+ semantic_score += 20
65
+ else:
66
+ issues.append("Missing Invoice Date")
67
+
68
+ # 4. Line Items Check (30 pts)
69
+ items = data.get("line_items", [])
70
+ if isinstance(items, list) and len(items) > 0:
71
+ if any(i.get("name") for i in items):
72
+ semantic_score += 30
73
+ else:
74
+ semantic_score += 10
75
+ issues.append("Line Items missing descriptions")
76
+ else:
77
+ issues.append("No Line Items detected")
78
+
79
+ # 5. Contact Name (10 pts)
80
+ if data.get("contact_name"):
81
+ semantic_score += 10
82
+ else:
83
+ issues.append("Missing Vendor Name")
84
+
85
+ # --- HYBRID CALCULATION ---
86
+ # Weight: 80% Data Quality + 20% OCR Quality
87
+ final_score = (semantic_score * 0.8) + (ocr_score * 0.2)
88
+
89
+ # Add OCR warnings
90
+ if ocr_score < 60:
91
+ issues.append(f"Low OCR Confidence ({ocr_score}%) - Check image quality")
92
+
93
+ return {
94
+ "score": int(final_score),
95
+ "ocr_score": ocr_score,
96
+ "semantic_score": semantic_score,
97
+ "rating": "High" if final_score > 80 else ("Medium" if final_score > 50 else "Low"),
98
+ "issues": issues
99
+ }
100
+
101
+ # --- Helpers ---
102
+ def extract_json_safely(text: str) -> Optional[Any]:
103
+ try:
104
+ return json.loads(text)
105
+ except:
106
+ match = re.search(r'(\{.*\}|\[.*\])', text, re.DOTALL)
107
+ return json.loads(match.group(0)) if match else None
108
+
109
+ def _normalize_local_path_args(args: Any) -> Any:
110
+ if not isinstance(args, dict): return args
111
+ fp = args.get("file_path") or args.get("path")
112
+ if isinstance(fp, str) and fp.startswith("/mnt/data/") and os.path.exists(fp):
113
+ args["file_url"] = f"file://{fp}"
114
+ return args
115
+
116
+ # --- Model Loading ---
117
+ def init_local_model():
118
+ global LLM_PIPELINE, TOKENIZER
119
+ if LLM_PIPELINE is not None: return
120
+
121
+ try:
122
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
123
+
124
+ logger.info(f"Loading lighter model: {LOCAL_MODEL}...")
125
+ TOKENIZER = AutoTokenizer.from_pretrained(LOCAL_MODEL)
126
+ model = AutoModelForCausalLM.from_pretrained(
127
+ LOCAL_MODEL,
128
+ device_map="auto",
129
+ torch_dtype="auto"
130
+ )
131
+ LLM_PIPELINE = pipeline("text-generation", model=model, tokenizer=TOKENIZER)
132
+ logger.info("Model loaded.")
133
+ except Exception as e:
134
+ logger.error(f"Model load error: {e}")
135
+
136
+ def local_llm_generate(prompt: str, max_tokens: int = 512) -> Dict[str, Any]:
137
+ if LLM_PIPELINE is None:
138
+ init_local_model()
139
+
140
+ if LLM_PIPELINE is None:
141
+ return {"text": "Model not loaded.", "raw": None}
142
+
143
+ try:
144
+ out = LLM_PIPELINE(
145
+ prompt,
146
+ max_new_tokens=max_tokens,
147
+ return_full_text=False,
148
+ do_sample=False
149
+ )
150
+ text = out[0]["generated_text"] if out else ""
151
+ return {"text": text, "raw": out}
152
+ except Exception as e:
153
+ return {"text": f"Error: {e}", "raw": None}
154
+
155
+ # --- Tools (Zoho) ---
156
+ def _get_valid_token_headers() -> dict:
157
+ r = requests.post("https://accounts.zoho.in/oauth/v2/token", params={
158
+ "refresh_token": REFRESH_TOKEN, "client_id": CLIENT_ID,
159
+ "client_secret": CLIENT_SECRET, "grant_type": "refresh_token"
160
+ }, timeout=10)
161
+ if r.status_code == 200:
162
+ return {"Authorization": f"Zoho-oauthtoken {r.json().get('access_token')}"}
163
+ return {}
164
+
165
+ @mcp.tool()
166
+ def create_record(module_name: str, record_data: dict) -> str:
167
+ h = _get_valid_token_headers()
168
+ if not h: return "Auth Failed"
169
+ r = requests.post(f"{API_BASE}/{module_name}", headers=h, json={"data": [record_data]})
170
+ if r.status_code in (200, 201):
171
+ try:
172
+ d = r.json().get("data", [{}])[0].get("details", {})
173
+ return json.dumps({"status": "success", "id": d.get("id"), "zoho_response": r.json()})
174
+ except:
175
+ return json.dumps(r.json())
176
+ return r.text
177
+
178
+ @mcp.tool()
179
+ def create_invoice(data: dict) -> str:
180
+ h = _get_valid_token_headers()
181
+ if not h: return "Auth Failed"
182
+ r = requests.post(f"{INVOICE_API_BASE}/invoices", headers=h,
183
+ params={"organization_id": ORGANIZATION_ID}, json=data)
184
+ return json.dumps(r.json()) if r.status_code in (200, 201) else r.text
185
+
186
+ @mcp.tool()
187
+ def process_document(file_path: str, target_module: Optional[str] = "Contacts") -> dict:
188
+ if not os.path.exists(file_path):
189
+ return {"error": f"File not found at path: {file_path}"}
190
+
191
+ # 1. OCR (UPDATED: Returns text AND score)
192
+ raw_text, ocr_score = extract_text_and_conf(file_path)
193
+
194
+ if not raw_text: return {"error": "OCR empty"}
195
+
196
+ # 2. LLM Extraction
197
+ prompt = get_ocr_extraction_prompt(raw_text)
198
+ res = local_llm_generate(prompt, max_tokens=300)
199
+ data = extract_json_safely(res["text"])
200
+
201
+ # 3. Evaluation / KPI Calculation (UPDATED: Uses ocr_score)
202
+ kpis = {"score": 0, "rating": "Fail", "issues": ["Extraction Failed"]}
203
+ if data:
204
+ kpis = calculate_extraction_confidence(data, ocr_score)
205
+
206
+ return {
207
+ "status": "success",
208
+ "file": os.path.basename(file_path),
209
+ "extracted_data": data or {"raw": res["text"]},
210
+ "kpis": kpis
211
+ }
212
+
213
+ # --- Executor ---
214
+ def parse_and_execute(model_text: str, history: list) -> str:
215
+ payload = extract_json_safely(model_text)
216
+ if not payload: return "No valid tool call found."
217
+
218
+ cmds = [payload] if isinstance(payload, dict) else payload
219
+ results = []
220
+ last_contact_id = None
221
+
222
+ for cmd in cmds:
223
+ if not isinstance(cmd, dict): continue
224
+ tool = cmd.get("tool")
225
+ args = _normalize_local_path_args(cmd.get("args", {}))
226
+
227
+ if tool == "create_record":
228
+ res = create_record(args.get("module_name", "Contacts"), args.get("record_data", {}))
229
+ results.append(f"Record: {res}")
230
+ try:
231
+ rj = json.loads(res)
232
+ if isinstance(rj, dict) and "id" in rj:
233
+ last_contact_id = rj["id"]
234
+ except: pass
235
+
236
+ elif tool == "create_invoice":
237
+ if not args.get("customer_id") and last_contact_id:
238
+ args["customer_id"] = last_contact_id
239
+
240
+ invoice_payload = args
241
+ if last_contact_id and "customer_id" not in invoice_payload:
242
+ invoice_payload["customer_id"] = last_contact_id
243
+
244
+ res = create_invoice(invoice_payload)
245
+ results.append(f"Invoice: {res}")
246
+
247
+ return "\n".join(results)
248
+
249
+ # --- Chat Core ---
250
+ def chat_logic(message: str, file_path: str, history: list) -> str:
251
+
252
+ # PHASE 1: File Upload -> Extraction -> KPI Report
253
+ if file_path:
254
+ logger.info(f"Processing file: {file_path}")
255
+ doc = process_document(file_path)
256
+
257
+ if doc.get("status") == "success":
258
+ data = doc["extracted_data"]
259
+ kpi = doc["kpis"]
260
+
261
+ extracted_json = json.dumps(data, indent=2)
262
+
263
+ # Format KPI output (Expanded)
264
+ rating_emoji = "🟢" if kpi['rating'] == 'High' else ("🟡" if kpi['rating'] == 'Medium' else "🔴")
265
+ issues_txt = "\n".join([f"- {i}" for i in kpi['issues']]) if kpi['issues'] else "None"
266
+
267
+ return (
268
+ f"### 📄 Extraction Complete: **{doc['file']}**\n"
269
+ f"**Combined Confidence:** {rating_emoji} {kpi['score']}/100\n"
270
+ f"*(OCR Signal: {kpi['ocr_score']}% | Data Quality: {kpi['semantic_score']}%)*\n\n"
271
+ f"**Issues Detected:**\n{issues_txt}\n\n"
272
+ f"```json\n{extracted_json}\n```\n\n"
273
+ "Type **'Create Invoice'** to push this to Zoho."
274
+ )
275
+ else:
276
+ return f"OCR Failed: {doc.get('error')}"
277
+
278
+ # PHASE 2: Text Interaction
279
+ hist_txt = "\n".join([f"U: {h[0]}\nA: {h[1]}" for h in history])
280
+ prompt = get_agent_prompt(hist_txt, message)
281
+
282
+ gen = local_llm_generate(prompt, max_tokens=256)
283
+ tool_data = extract_json_safely(gen["text"])
284
+
285
+ if tool_data:
286
+ return parse_and_execute(gen["text"], history)
287
+
288
+ return gen["text"]
289
+
290
+ # --- UI ---
291
+ def chat_handler(msg, hist):
292
+ txt = msg.get("text", "")
293
+ files = msg.get("files", [])
294
+ path = files[0] if files else None
295
+
296
+ return chat_logic(txt, path, hist)
297
+
298
+ if __name__ == "__main__":
299
+ gc.collect()
300
+ demo = gr.ChatInterface(fn=chat_handler, multimodal=True)
301
+ demo.launch(server_name="0.0.0.0", server_port=7860)
app_json.py ADDED
@@ -0,0 +1,233 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py — MCP server (single-file)
2
+
3
+ from mcp.server.fastmcp import FastMCP
4
+ from typing import Optional, List, Tuple, Any, Dict
5
+ import requests
6
+ import os
7
+ import gradio as gr
8
+ import json
9
+ import re
10
+ import logging
11
+ import gc
12
+
13
+ # --- Import OCR Engine & Prompts ---
14
+ try:
15
+ from ocr_engine import extract_text_from_file
16
+ from prompts import get_ocr_extraction_prompt, get_agent_prompt
17
+ except ImportError:
18
+ def extract_text_from_file(path): return ""
19
+ def get_ocr_extraction_prompt(txt): return txt
20
+ def get_agent_prompt(h, u): return u
21
+
22
+ logging.basicConfig(level=logging.INFO)
23
+ logger = logging.getLogger("mcp_server")
24
+
25
+ # --- Load Config ---
26
+ try:
27
+ from config import (
28
+ CLIENT_ID, CLIENT_SECRET, REFRESH_TOKEN, API_BASE,
29
+ INVOICE_API_BASE, ORGANIZATION_ID, LOCAL_MODEL
30
+ )
31
+ except Exception:
32
+ raise SystemExit("Config missing.")
33
+
34
+ mcp = FastMCP("ZohoCRMAgent")
35
+
36
+ # --- Globals ---
37
+ LLM_PIPELINE = None
38
+ TOKENIZER = None
39
+
40
+ # --- Helpers ---
41
+ def extract_json_safely(text: str) -> Optional[Any]:
42
+ try:
43
+ return json.loads(text)
44
+ except:
45
+ match = re.search(r'(\{.*\}|\[.*\])', text, re.DOTALL)
46
+ return json.loads(match.group(0)) if match else None
47
+
48
+ def _normalize_local_path_args(args: Any) -> Any:
49
+ if not isinstance(args, dict): return args
50
+ fp = args.get("file_path") or args.get("path")
51
+ if isinstance(fp, str) and fp.startswith("/mnt/data/") and os.path.exists(fp):
52
+ args["file_url"] = f"file://{fp}"
53
+ return args
54
+
55
+ # --- Model Loading ---
56
+ def init_local_model():
57
+ global LLM_PIPELINE, TOKENIZER
58
+ if LLM_PIPELINE is not None: return
59
+
60
+ try:
61
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
62
+
63
+ logger.info(f"Loading lighter model: {LOCAL_MODEL}...")
64
+ TOKENIZER = AutoTokenizer.from_pretrained(LOCAL_MODEL)
65
+ model = AutoModelForCausalLM.from_pretrained(
66
+ LOCAL_MODEL,
67
+ device_map="auto",
68
+ torch_dtype="auto"
69
+ )
70
+ LLM_PIPELINE = pipeline("text-generation", model=model, tokenizer=TOKENIZER)
71
+ logger.info("Model loaded.")
72
+ except Exception as e:
73
+ logger.error(f"Model load error: {e}")
74
+
75
+ def local_llm_generate(prompt: str, max_tokens: int = 512) -> Dict[str, Any]:
76
+ if LLM_PIPELINE is None:
77
+ init_local_model()
78
+
79
+ if LLM_PIPELINE is None:
80
+ return {"text": "Model not loaded.", "raw": None}
81
+
82
+ try:
83
+ out = LLM_PIPELINE(
84
+ prompt,
85
+ max_new_tokens=max_tokens,
86
+ return_full_text=False,
87
+ do_sample=False
88
+ )
89
+ text = out[0]["generated_text"] if out else ""
90
+ return {"text": text, "raw": out}
91
+ except Exception as e:
92
+ return {"text": f"Error: {e}", "raw": None}
93
+
94
+ # --- Tools (Zoho) ---
95
+ def _get_valid_token_headers() -> dict:
96
+ r = requests.post("https://accounts.zoho.in/oauth/v2/token", params={
97
+ "refresh_token": REFRESH_TOKEN, "client_id": CLIENT_ID,
98
+ "client_secret": CLIENT_SECRET, "grant_type": "refresh_token"
99
+ }, timeout=10)
100
+ if r.status_code == 200:
101
+ return {"Authorization": f"Zoho-oauthtoken {r.json().get('access_token')}"}
102
+ return {}
103
+
104
+ @mcp.tool()
105
+ def create_record(module_name: str, record_data: dict) -> str:
106
+ h = _get_valid_token_headers()
107
+ if not h: return "Auth Failed"
108
+ r = requests.post(f"{API_BASE}/{module_name}", headers=h, json={"data": [record_data]})
109
+ if r.status_code in (200, 201):
110
+ try:
111
+ d = r.json().get("data", [{}])[0].get("details", {})
112
+ return json.dumps({"status": "success", "id": d.get("id"), "zoho_response": r.json()})
113
+ except:
114
+ return json.dumps(r.json())
115
+ return r.text
116
+
117
+ @mcp.tool()
118
+ def create_invoice(data: dict) -> str:
119
+ h = _get_valid_token_headers()
120
+ if not h: return "Auth Failed"
121
+ r = requests.post(f"{INVOICE_API_BASE}/invoices", headers=h,
122
+ params={"organization_id": ORGANIZATION_ID}, json=data)
123
+ return json.dumps(r.json()) if r.status_code in (200, 201) else r.text
124
+
125
+ @mcp.tool()
126
+ def process_document(file_path: str, target_module: Optional[str] = "Contacts") -> dict:
127
+ if not os.path.exists(file_path):
128
+ return {"error": f"File not found at path: {file_path}"}
129
+
130
+ # 1. OCR
131
+ raw_text = extract_text_from_file(file_path)
132
+ if not raw_text: return {"error": "OCR empty"}
133
+
134
+ # 2. LLM Extraction
135
+ prompt = get_ocr_extraction_prompt(raw_text)
136
+ res = local_llm_generate(prompt, max_tokens=300)
137
+ data = extract_json_safely(res["text"])
138
+
139
+ return {
140
+ "status": "success",
141
+ "file": os.path.basename(file_path),
142
+ "extracted_data": data or {"raw": res["text"]}
143
+ }
144
+
145
+ # --- Executor ---
146
+ def parse_and_execute(model_text: str, history: list) -> str:
147
+ payload = extract_json_safely(model_text)
148
+ if not payload: return "No valid tool call found."
149
+
150
+ cmds = [payload] if isinstance(payload, dict) else payload
151
+ results = []
152
+
153
+ last_contact_id = None
154
+
155
+ for cmd in cmds:
156
+ if not isinstance(cmd, dict): continue
157
+ tool = cmd.get("tool")
158
+ args = _normalize_local_path_args(cmd.get("args", {}))
159
+
160
+ if tool == "create_record":
161
+ res = create_record(args.get("module_name", "Contacts"), args.get("record_data", {}))
162
+ results.append(f"Record: {res}")
163
+ try:
164
+ rj = json.loads(res)
165
+ if isinstance(rj, dict) and "id" in rj:
166
+ last_contact_id = rj["id"]
167
+ except: pass
168
+
169
+ elif tool == "create_invoice":
170
+ # Auto-fill contact_id if we just created one
171
+ if not args.get("customer_id") and last_contact_id:
172
+ args["customer_id"] = last_contact_id
173
+
174
+ # Map Items from strict structure
175
+ invoice_payload = args # Assuming LLM passes correct structure, or map here
176
+ if last_contact_id and "customer_id" not in invoice_payload:
177
+ invoice_payload["customer_id"] = last_contact_id
178
+
179
+ res = create_invoice(invoice_payload)
180
+ results.append(f"Invoice: {res}")
181
+
182
+ return "\n".join(results)
183
+
184
+ # --- Chat Core ---
185
+ def chat_logic(message: str, file_path: str, history: list) -> str:
186
+
187
+ # PHASE 1: File Upload -> Extraction Only (No Zoho Auth yet)
188
+ if file_path:
189
+ logger.info(f"Processing file: {file_path}")
190
+ doc = process_document(file_path)
191
+
192
+ if doc.get("status") == "success":
193
+ extracted_json = json.dumps(doc["extracted_data"], indent=2)
194
+ # We return this text. It gets added to history.
195
+ # The User must then say "Yes, push it" to trigger Phase 2.
196
+ return (
197
+ f"I extracted the following data from **{doc['file']}**:\n\n"
198
+ f"```json\n{extracted_json}\n```\n\n"
199
+ "Please review it. If it looks correct, type **'Create Invoice'** or **'Push to Zoho'**."
200
+ )
201
+ else:
202
+ return f"OCR Failed: {doc.get('error')}"
203
+
204
+ # PHASE 2: Text Interaction (Check History for JSON + Intent)
205
+ hist_txt = "\n".join([f"U: {h[0]}\nA: {h[1]}" for h in history])
206
+
207
+ # The Prompt now checks history for JSON and waits for explicit "save/push" keywords
208
+ prompt = get_agent_prompt(hist_txt, message)
209
+
210
+ gen = local_llm_generate(prompt, max_tokens=256)
211
+ logger.info(f"LLM Decision: {gen['text']}")
212
+
213
+ tool_data = extract_json_safely(gen["text"])
214
+
215
+ if tool_data:
216
+ # User confirmed -> Execute Tool (Triggers Zoho Auth)
217
+ return parse_and_execute(gen["text"], history)
218
+
219
+ # Just chat/clarification
220
+ return gen["text"]
221
+
222
+ # --- UI ---
223
+ def chat_handler(msg, hist):
224
+ txt = msg.get("text", "")
225
+ files = msg.get("files", [])
226
+ path = files[0] if files else None
227
+
228
+ return chat_logic(txt, path, hist)
229
+
230
+ if __name__ == "__main__":
231
+ gc.collect()
232
+ demo = gr.ChatInterface(fn=chat_handler, multimodal=True)
233
+ demo.launch(server_name="0.0.0.0", server_port=7860)
config.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # config.py — Zoho + local model configuration
2
+ # IMPORTANT: This file contains sensitive credentials. Keep it local and DO NOT commit to a public repository.
3
+
4
+ CLIENT_ID = "1000.SIMKGAO5719K0TQ0QZQ31ZU57RLFNQ"
5
+ CLIENT_SECRET = "60b329b4fe51930abee900cba6524ec7332cd67e06"
6
+ REFRESH_TOKEN = "1000.47c4724c105c0275477b8e0aea8415fd.63a086b666a133ca804f692086ee2963"
7
+ ORGANIZATION_ID = "60058860935"
8
+
9
+ # Zoho API endpoints (India data center)
10
+ API_BASE = "https://www.zohoapis.in/crm/v2"
11
+ INVOICE_API_BASE = "https://invoice.zoho.in/api/v3"
12
+
13
+ # Local model (set to None if you prefer not to load a local HF model)
14
+ LOCAL_MODEL = "Qwen/Qwen2.5-1.5B-Instruct"
15
+ LOCAL_TOKENIZER = None
16
+
17
+ # Optional: toggle demo behaviour at runtime via environment variable DEMO=true
18
+ # To avoid accidental API calls on startup, leave DEMO unset (or set to false) in production
19
+
20
+ # NOTE: If your LOCAL_MODEL points to a gated HF repo, ensure the runtime has proper HF auth
21
+ # (HUGGINGFACE_HUB_TOKEN or similar) and access to the model. If you don't have access, set
22
+ # LOCAL_MODEL = None or to a public model like "google/flan-t5-small".
ocr_engine.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytesseract
2
+ from pytesseract import Output
3
+ from pdf2image import convert_from_path
4
+ from PIL import Image
5
+ import os
6
+ import logging
7
+ import numpy as np
8
+
9
+ logger = logging.getLogger("ocr_engine")
10
+
11
+ def extract_text_and_conf(file_path: str) -> tuple[str, float]:
12
+ """
13
+ Extracts text AND confidence score from a PDF or Image.
14
+ Returns: (text_content, average_confidence_0_to_100)
15
+ """
16
+ if not os.path.exists(file_path):
17
+ return "", 0.0
18
+
19
+ text_content = ""
20
+ confidences = []
21
+
22
+ try:
23
+ images = []
24
+ # 1. Load Images
25
+ if file_path.lower().endswith('.pdf'):
26
+ try:
27
+ images = convert_from_path(file_path)
28
+ except Exception as e:
29
+ logger.error(f"PDF Convert Error: {e}")
30
+ return "", 0.0
31
+ elif file_path.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.bmp')):
32
+ try:
33
+ images = [Image.open(file_path)]
34
+ except Exception as e:
35
+ logger.error(f"Image Open Error: {e}")
36
+ return "", 0.0
37
+
38
+ # 2. Process Each Page
39
+ for i, image in enumerate(images):
40
+ # A. Get Layout-Preserved Text (Best for LLM)
41
+ page_text = pytesseract.image_to_string(image)
42
+ text_content += f"--- Page {i+1} ---\n{page_text}\n"
43
+
44
+ # B. Get Confidence Data (Best for KPIs)
45
+ # data_dict keys: ['level', 'page_num', 'block_num', 'par_num', 'line_num', 'word_num', 'left', 'top', 'width', 'height', 'conf', 'text']
46
+ data = pytesseract.image_to_data(image, output_type=Output.DICT)
47
+
48
+ # Filter valid confidences (ignore -1 which usually means whitespace/block info)
49
+ for conf in data['conf']:
50
+ # Tesseract returns -1 for structural elements (not words)
51
+ if conf != -1:
52
+ confidences.append(conf)
53
+
54
+ # 3. Calculate Average Confidence
55
+ avg_conf = 0.0
56
+ if confidences:
57
+ avg_conf = sum(confidences) / len(confidences)
58
+
59
+ return text_content.strip(), round(avg_conf, 2)
60
+
61
+ except Exception as e:
62
+ logger.error(f"OCR Critical Error: {e}")
63
+ return "", 0.0
ocr_engine_json.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytesseract
2
+ from pdf2image import convert_from_path
3
+ from PIL import Image
4
+ import os
5
+ import logging
6
+ logger = logging.getLogger("ocr_engine")
7
+ def extract_text_from_file(file_path: str) -> str:
8
+ """
9
+ Extracts text from a PDF or Image file using Tesseract.
10
+ """
11
+ if not os.path.exists(file_path):
12
+ return ""
13
+
14
+ text_content = ""
15
+
16
+ try:
17
+ # Handle PDF
18
+ if file_path.lower().endswith('.pdf'):
19
+ try:
20
+ # Convert PDF pages to images
21
+ images = convert_from_path(file_path)
22
+ for i, image in enumerate(images):
23
+ page_text = pytesseract.image_to_string(image)
24
+ text_content += f"--- Page {i+1} ---\n{page_text}\n"
25
+ except Exception as e:
26
+ logger.error(f"Error converting PDF: {e}")
27
+ return f"Error reading PDF: {str(e)}"
28
+
29
+ # Handle Images (JPG, PNG, etc.)
30
+ elif file_path.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.bmp')):
31
+ try:
32
+ image = Image.open(file_path)
33
+ text_content = pytesseract.image_to_string(image)
34
+ except Exception as e:
35
+ logger.error(f"Error reading image: {e}")
36
+ return f"Error reading image: {str(e)}"
37
+
38
+ else:
39
+ return "Unsupported file format. Please upload PDF or Image."
40
+
41
+ except Exception as e:
42
+ logger.error(f"OCR Critical Error: {e}")
43
+ return f"OCR Failed: {str(e)}"
44
+
45
+ return text_content.strip()
packages.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ tesseract-ocr
2
+ poppler-utils
prompts.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # prompts.py
2
+ # Qwen-2.5 Compatible Prompts
3
+
4
+ def get_ocr_extraction_prompt(raw_text: str) -> str:
5
+ return f"""<|im_start|>system
6
+ You are a precise Data Extraction Engine.
7
+ Extract data from the text below and return a JSON object.
8
+ Fields: contact_name, total_amount, currency, invoice_date, line_items (name, quantity, rate).
9
+ Output ONLY JSON. No markdown.
10
+ <|im_end|>
11
+ <|im_start|>user
12
+ Input Text:
13
+ {raw_text[:3000]}
14
+
15
+ Return the JSON:
16
+ <|im_end|>
17
+ <|im_start|>assistant
18
+ """
19
+
20
+ def get_agent_prompt(history_text: str, user_message: str) -> str:
21
+ """
22
+ Agent Prompt: Decides whether to Chat or Call Tools based on History.
23
+ """
24
+ return f"""<|im_start|>system
25
+ You are the Zoho CRM Assistant.
26
+
27
+ AVAILABLE TOOLS:
28
+ 1. create_record(module_name, record_data)
29
+ 2. create_invoice(data)
30
+
31
+ RULES:
32
+ 1. REVIEW THE CHAT HISTORY. If you see extracted JSON data in the history, use it.
33
+ 2. TRIGGER CONDITION: ONLY call a tool if the user explicitly asks to "save", "create", "push", or "upload".
34
+ 3. If the user has NOT confirmed, just answer their questions or summarize the data.
35
+ 4. TOOL FORMAT: Return a JSON object: {{"tool": "name", "args": {{...}}}}
36
+ 5. Return ONLY JSON for tool calls.
37
+ <|im_end|>
38
+ <|im_start|>user
39
+ HISTORY:
40
+ {history_text}
41
+
42
+ CURRENT REQUEST:
43
+ {user_message}
44
+ <|im_end|>
45
+ <|im_start|>assistant
46
+ """
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ fastmcp
2
+ gradio
3
+ requests
4
+ transformers
5
+ torch # choose CPU or CUDA wheel appropriate for your environment
6
+ accelerate
7
+ pytesseract
8
+ pdf2image
9
+ pillow
zoho_client_mcp.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from mcp.server.fastmcp import FastMCP
2
+ from typing import Optional
3
+ import requests
4
+ import os
5
+
6
+ from config import CLIENT_ID, CLIENT_SECRET, REFRESH_TOKEN, API_BASE
7
+
8
+ # --- Initialize the FastMCP Server ---
9
+ mcp = FastMCP("ZohoCRMAgent")
10
+
11
+ # --- Token Refresh Utility ---
12
+ def _get_valid_token_headers() -> dict:
13
+ """Internal function to ensure a valid Zoho access token is available.
14
+ This uses the refresh token flow to retrieve a fresh access token."""
15
+ token_url = "https://accounts.zoho.in/oauth/v2/token"
16
+ params = {
17
+ "refresh_token": REFRESH_TOKEN,
18
+ "client_id": CLIENT_ID,
19
+ "client_secret": CLIENT_SECRET,
20
+ "grant_type": "refresh_token"
21
+ }
22
+ response = requests.post(token_url, params=params)
23
+ if response.status_code == 200:
24
+ access_token = response.json().get("access_token")
25
+ return {"Authorization": f"Zoho-oauthtoken {access_token}"}
26
+ else:
27
+ raise Exception(f"Failed to refresh token: {response.text}")
28
+
29
+ # --- MCP Tools for Zoho CRM and Zoho Books Operations ---
30
+
31
+ @mcp.tool()
32
+ def authenticate_zoho() -> str:
33
+ """Refreshes and confirms Zoho CRM access token availability."""
34
+ _ = _get_valid_token_headers()
35
+ return "Zoho CRM access token successfully refreshed."
36
+
37
+ @mcp.tool()
38
+ def create_record(module_name: str, record_data: dict) -> str:
39
+ """Creates a new record in the specified Zoho CRM module."""
40
+ headers = _get_valid_token_headers()
41
+ response = requests.post(f"{API_BASE}/{module_name}", headers=headers, json={"data": [record_data]})
42
+ if response.status_code in [200, 201]:
43
+ return f"Record created successfully in {module_name}."
44
+ return f"Error creating record: {response.text}"
45
+
46
+ @mcp.tool()
47
+ def get_records(module_name: str, page: int = 1, per_page: int = 200) -> list:
48
+ """Fetches records from a specified Zoho CRM module."""
49
+ headers = _get_valid_token_headers()
50
+ params = {"page": page, "per_page": per_page}
51
+ response = requests.get(f"{API_BASE}/{module_name}", headers=headers, params=params)
52
+ if response.status_code == 200:
53
+ return response.json().get("data", [])
54
+ return [f"Error retrieving records: {response.text}"]
55
+
56
+ @mcp.tool()
57
+ def update_record(module_name: str, record_id: str, data: dict) -> str:
58
+ """Updates a record in a Zoho CRM module."""
59
+ headers = _get_valid_token_headers()
60
+ response = requests.put(f"{API_BASE}/{module_name}/{record_id}", headers=headers, json={"data": [data]})
61
+ if response.status_code == 200:
62
+ return f"Record {record_id} in {module_name} updated successfully."
63
+ return f"Error updating record: {response.text}"
64
+
65
+ @mcp.tool()
66
+ def delete_record(module_name: str, record_id: str) -> str:
67
+ """Deletes a record from the specified Zoho CRM module."""
68
+ headers = _get_valid_token_headers()
69
+ response = requests.delete(f"{API_BASE}/{module_name}/{record_id}", headers=headers)
70
+ if response.status_code == 200:
71
+ return f"Record {record_id} in {module_name} deleted."
72
+ return f"Error deleting record: {response.text}"
73
+
74
+ @mcp.tool()
75
+ def create_invoice(data: dict) -> str:
76
+ """Creates an invoice in Zoho Books."""
77
+ headers = _get_valid_token_headers()
78
+ response = requests.post(f"{API_BASE}/invoices", headers=headers, json={"data": [data]})
79
+ if response.status_code in [200, 201]:
80
+ return "Invoice created successfully."
81
+ return f"Error creating invoice: {response.text}"
82
+
83
+ @mcp.tool()
84
+ def process_document(file_path: str, target_module: Optional[str] = "Contacts") -> dict:
85
+ """Extracts data from uploaded file (PDF/image) and returns structured info."""
86
+ # Placeholder for OCR + Gemini parsing logic
87
+ # raw_text = perform_ocr(file_path)
88
+ # structured_data = gemini_parse_json(raw_text)
89
+ return {
90
+ "status": "success",
91
+ "file": os.path.basename(file_path),
92
+ "extracted_data": f"Simulated structured data from {target_module} document."
93
+ }