NguyenDinhHieu commited on
Commit
72872bb
·
verified ·
1 Parent(s): b03c583

Upload appv2.py

Browse files
Files changed (1) hide show
  1. appv2.py +153 -0
appv2.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from langchain_core.output_parsers import StrOutputParser
4
+ from langchain_core.prompts import PromptTemplate
5
+ from langchain_community.llms import LlamaCpp
6
+
7
+ import ast
8
+ import atexit
9
+ import os
10
+ import re
11
+ import sys
12
+
13
+ FENCE_RE = re.compile(r"```(?:python)?\s*([\s\S]*?)\s*```", flags=re.IGNORECASE)
14
+ TRAILING_PARENS_RE = re.compile(r"\)\)\s*$", flags=re.MULTILINE)
15
+
16
+ # Install (Python env):
17
+ # - pip install langchain langchain-community
18
+ # - pip install llama-cpp-python
19
+ # - pip install gpt4all (optional: if using LLM_BACKEND=gpt4all) $env:LLM_BACKEND='gpt4all'
20
+
21
+
22
+ def _force_utf8_stdio() -> None:
23
+ try:
24
+ if hasattr(sys.stdout, "reconfigure"):
25
+ sys.stdout.reconfigure(encoding="utf-8")
26
+ if hasattr(sys.stderr, "reconfigure"):
27
+ sys.stderr.reconfigure(encoding="utf-8")
28
+ except Exception:
29
+ pass
30
+
31
+ # =====================
32
+ # Config
33
+ # =====================
34
+ MODEL_FILE = "Cube-Python_v2.gguf"
35
+ N_CTX = 4096
36
+ TEMPERATURE = 0.1
37
+ N_GPU_LAYERS = -1 # llama.cpp: -1 = try push all to GPU, set 0 to force CPU
38
+
39
+ LLM_BACKEND = os.getenv("LLM_BACKEND", "llamacpp").strip().lower()
40
+ MAX_FIX_ATTEMPTS = 2
41
+
42
+ def load_llm():
43
+ base_path = os.path.dirname(os.path.abspath(__file__))
44
+ model_path = os.path.join(base_path, MODEL_FILE)
45
+
46
+ if not os.path.exists(model_path):
47
+ raise FileNotFoundError(f"Không tìm thấy file model tại: {model_path}")
48
+
49
+ if LLM_BACKEND in {"gpt4all", "gpt4allcpp"}:
50
+ try:
51
+ from langchain_community.llms import GPT4All
52
+ except Exception as e:
53
+ raise RuntimeError(
54
+ "Chưa cài GPT4All cho LangChain. Cài bằng:\n"
55
+ " pip install gpt4all langchain-community\n"
56
+ f"Chi tiết: {e}"
57
+ )
58
+
59
+ return GPT4All(model=model_path, temp=TEMPERATURE, verbose=False)
60
+
61
+ return LlamaCpp(
62
+ model_path=model_path,
63
+ n_gpu_layers=N_GPU_LAYERS,
64
+ n_ctx=N_CTX,
65
+ temperature=TEMPERATURE,
66
+ verbose=False, # Tắt log rác
67
+ )
68
+
69
+ def close_llm_safely(llm):
70
+ try:
71
+ client = getattr(llm, "client", None)
72
+ close = getattr(client, "close", None)
73
+ if callable(close):
74
+ close()
75
+ except Exception:
76
+ pass
77
+
78
+ def extract_python_code(text: str) -> str:
79
+ if not text:
80
+ return ""
81
+
82
+ m = FENCE_RE.search(text)
83
+ if m:
84
+ return m.group(1).strip()
85
+
86
+ return text.strip()
87
+
88
+ def _syntax_error_message(code: str) -> str | None:
89
+ try:
90
+ ast.parse(code)
91
+ return None
92
+ except SyntaxError:
93
+ # Re-parse to get rich info (cheap vs model inference, and avoids duplicate logic).
94
+ try:
95
+ ast.parse(code)
96
+ return None
97
+ except SyntaxError as e:
98
+ line = (e.text or "").strip()
99
+ where = f"line {e.lineno}, col {e.offset}" if e.lineno and e.offset else "unknown location"
100
+ return f"{e.msg} ({where}). Offending line: {line}"
101
+
102
+
103
+ def is_valid_python(code: str) -> bool:
104
+ return _syntax_error_message(code) is None
105
+
106
+
107
+ def generate_code(chain, question: str) -> str:
108
+ raw = chain.invoke({"question": question})
109
+ code = extract_python_code(raw)
110
+
111
+ for _ in range(MAX_FIX_ATTEMPTS):
112
+ err = _syntax_error_message(code)
113
+ if err is None:
114
+ return code
115
+
116
+ raw = chain.invoke(
117
+ {
118
+ "question": (
119
+ "Output trước bị sai cú pháp Python.\n"
120
+ f"Lỗi: {err}\n\n"
121
+ f"Output trước:\n{raw}\n\n"
122
+ "Hãy trả lại code Python ĐÚNG cú pháp, chỉ code, không markdown."
123
+ )
124
+ }
125
+ )
126
+ code = extract_python_code(raw)
127
+
128
+ code2 = TRAILING_PARENS_RE.sub(")", code)
129
+ return code2 if is_valid_python(code2) else code
130
+
131
+ template = """[INST] Bạn là một trợ lý AI chuyên nghiệp về lập trình Python.
132
+ Hãy viết code Python chất lượng cao để giải quyết yêu cầu sau.
133
+ Chỉ trả lời bằng code Python thuần (KHÔNG markdown, KHÔNG giải thích).
134
+ Yêu cầu: {question} [/INST]"""
135
+
136
+ prompt = PromptTemplate(input_variables=["question"], template=template)
137
+
138
+ _force_utf8_stdio()
139
+ llm = load_llm()
140
+ atexit.register(close_llm_safely, llm)
141
+ chain = prompt | llm | StrOutputParser()
142
+
143
+ question = '''
144
+ Write a Python program that extracts all email addresses from a given text.
145
+ Input:
146
+ A text: "Contact us at support@nlp.com or info@textprocessing.ai for more details."
147
+ Desired Output:
148
+ ['support@nlp.com', 'info@textprocessing.ai']'''
149
+
150
+ try:
151
+ print(generate_code(chain, question))
152
+ finally:
153
+ close_llm_safely(llm)