|
|
|
|
|
|
|
|
"""
|
|
|
Auditor de projeto Streamlit — chaves duplicadas, estrutura e relacionamentos.
|
|
|
|
|
|
Verifica:
|
|
|
1) Chaves duplicadas em st.form/st.button/st.download_button.
|
|
|
2) Widgets sem 'key' (risco em loops).
|
|
|
3) Imports faltantes no app.py para módulos usados no roteamento.
|
|
|
4) Cobertura MODULES ↔ Roteamento (entries sem rota e rotas sem entry).
|
|
|
5) Arquivos de módulos inexistentes e módulos sem main().
|
|
|
6) Imports não usados.
|
|
|
7) Ciclos de importação entre arquivos .py (somente locais).
|
|
|
8) Emite relatório em console e JSON.
|
|
|
|
|
|
Uso:
|
|
|
python audit_streamlit_project.py
|
|
|
python audit_streamlit_project.py --root . --app app.py --modules modules_map.py --exclude venv .venv .git
|
|
|
|
|
|
Saída JSON:
|
|
|
.audit_report.json (na raiz especificada)
|
|
|
"""
|
|
|
import os
|
|
|
import re
|
|
|
import ast
|
|
|
import json
|
|
|
import argparse
|
|
|
from collections import defaultdict
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def find_python_files(root, exclude_dirs=None):
|
|
|
exclude_dirs = set(exclude_dirs or [])
|
|
|
for dirpath, dirnames, filenames in os.walk(root):
|
|
|
|
|
|
dirnames[:] = [
|
|
|
d for d in dirnames
|
|
|
if os.path.join(dirpath, d) not in {os.path.join(root, ex) for ex in exclude_dirs}
|
|
|
and d not in exclude_dirs
|
|
|
]
|
|
|
for fn in filenames:
|
|
|
if fn.endswith(".py"):
|
|
|
yield os.path.join(dirpath, fn)
|
|
|
|
|
|
def read_text(path):
|
|
|
try:
|
|
|
with open(path, "r", encoding="utf-8") as f:
|
|
|
return f.read()
|
|
|
except Exception:
|
|
|
try:
|
|
|
with open(path, "r", encoding="latin-1") as f:
|
|
|
return f.read()
|
|
|
except Exception:
|
|
|
return ""
|
|
|
|
|
|
def parse_ast(path):
|
|
|
src = read_text(path)
|
|
|
if not src:
|
|
|
return None, ""
|
|
|
try:
|
|
|
tree = ast.parse(src, filename=path)
|
|
|
return tree, src
|
|
|
except Exception:
|
|
|
return None, src
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
KEY_PATTERNS = {
|
|
|
"form_literal": re.compile(r'st\.form\(\s*\'"[\'"]'),
|
|
|
"button_key": re.compile(r'st\.button\([^)]*key\s*=\s*\'"[\'"]'),
|
|
|
"download_key": re.compile(r'st\.download_button\([^)]*key\s*=\s*\'"[\'"]'),
|
|
|
}
|
|
|
|
|
|
MISSING_KEY_PATTERNS = {
|
|
|
"button_no_key": re.compile(r'st\.button\((?![^)]*key\s*=)'),
|
|
|
"download_no_key": re.compile(r'st\.download_button\((?![^)]*key\s*=)'),
|
|
|
}
|
|
|
|
|
|
def scan_duplicate_and_missing_keys(file_path):
|
|
|
dups = defaultdict(list)
|
|
|
missing = defaultdict(list)
|
|
|
try:
|
|
|
with open(file_path, "r", encoding="utf-8") as f:
|
|
|
for i, line in enumerate(f, 1):
|
|
|
|
|
|
for _, pat in KEY_PATTERNS.items():
|
|
|
for m in pat.finditer(line):
|
|
|
dups[m.group(1)].append(i)
|
|
|
|
|
|
for name, pat in MISSING_KEY_PATTERNS.items():
|
|
|
if pat.search(line):
|
|
|
missing[name].append(i)
|
|
|
except Exception:
|
|
|
pass
|
|
|
dup_filtered = {k: v for k, v in dups.items() if len(v) > 1}
|
|
|
return dup_filtered, missing
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def extract_imports_defs_calls(tree):
|
|
|
"""
|
|
|
Retorna:
|
|
|
imports: { alias_ou_nome -> modulo_base }
|
|
|
used_names: set de nomes referenciados
|
|
|
defs: set de nomes de funções definidas
|
|
|
calls_main: set de nomes/lvalues em chamadas *.main()
|
|
|
"""
|
|
|
imports = {}
|
|
|
used_names = set()
|
|
|
defs = set()
|
|
|
calls_main = set()
|
|
|
|
|
|
class V(ast.NodeVisitor):
|
|
|
def visit_Import(self, node):
|
|
|
for alias in node.names:
|
|
|
base = alias.name.split(".")[0]
|
|
|
asname = alias.asname or alias.name
|
|
|
asname = asname.split(".")[0]
|
|
|
imports[asname] = base
|
|
|
|
|
|
def visit_ImportFrom(self, node):
|
|
|
if node.module:
|
|
|
base = node.module.split(".")[0]
|
|
|
for alias in node.names:
|
|
|
asname = alias.asname or alias.name
|
|
|
imports[asname] = base
|
|
|
|
|
|
def visit_FunctionDef(self, node):
|
|
|
defs.add(node.name)
|
|
|
self.generic_visit(node)
|
|
|
|
|
|
def visit_Name(self, node):
|
|
|
used_names.add(node.id)
|
|
|
|
|
|
def visit_Attribute(self, node):
|
|
|
|
|
|
if isinstance(node.ctx, ast.Load) and getattr(node, "attr", None) == "main":
|
|
|
if isinstance(node.value, ast.Name):
|
|
|
calls_main.add(node.value.id)
|
|
|
else:
|
|
|
|
|
|
root = node.value
|
|
|
while isinstance(root, ast.Attribute):
|
|
|
root = root.value
|
|
|
if isinstance(root, ast.Name):
|
|
|
calls_main.add(root.id)
|
|
|
self.generic_visit(node)
|
|
|
|
|
|
if tree:
|
|
|
V().visit(tree)
|
|
|
return imports, used_names, defs, calls_main
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_modules_map(modules_map_path):
|
|
|
"""
|
|
|
Extrai:
|
|
|
- route_keys: chaves top-level do dict MODULES (ex.: "consulta", "operacao"...)
|
|
|
- internal_keys: valores do campo "key" dentro de cada entrada
|
|
|
"""
|
|
|
route_keys = set()
|
|
|
internal_keys = set()
|
|
|
src = read_text(modules_map_path)
|
|
|
if not src:
|
|
|
return route_keys, internal_keys
|
|
|
|
|
|
for m in re.finditer(r'^[ \t]*"([^"]+)"\s*:\s*\{', src, re.MULTILINE):
|
|
|
route_keys.add(m.group(1))
|
|
|
|
|
|
for m in re.finditer(r'"key"\s*:\s*"([^"]+)"', src):
|
|
|
internal_keys.add(m.group(1))
|
|
|
return route_keys, internal_keys
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def extract_routing(app_src):
|
|
|
"""
|
|
|
Busca padrões:
|
|
|
if/elif pagina_id == "consulta":
|
|
|
consulta.main()
|
|
|
Retorna lista de tuplas: (route_key, called_module_name)
|
|
|
"""
|
|
|
routes = []
|
|
|
|
|
|
|
|
|
m_if = re.search(
|
|
|
r'if\s+pagina_id\s*==\s*\'"[\'"]\s*:\s*(.*?)\n\s*(?:elif|#|$)',
|
|
|
app_src, re.DOTALL
|
|
|
)
|
|
|
if m_if:
|
|
|
route = m_if.group(1)
|
|
|
block = m_if.group(2)
|
|
|
called = None
|
|
|
cm = re.search(r'([A-Za-z_][A-Za-z0-9_]*)\s*\.\s*main\s*\(', block)
|
|
|
if cm:
|
|
|
called = cm.group(1)
|
|
|
routes.append((route, called))
|
|
|
|
|
|
|
|
|
for m in re.finditer(
|
|
|
r'elif\s+pagina_id\s*==\s*\'"[\'"]\s*:\s*(.*?)\n\s*(?:elif|#|$)',
|
|
|
app_src, re.DOTALL
|
|
|
):
|
|
|
route = m.group(1)
|
|
|
block = m.group(2)
|
|
|
called = None
|
|
|
cm = re.search(r'([A-Za-z_][A-Za-z0-9_]*)\s*\.\s*main\s*\(', block)
|
|
|
if cm:
|
|
|
called = cm.group(1)
|
|
|
routes.append((route, called))
|
|
|
|
|
|
return routes
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def build_local_import_graph(py_files):
|
|
|
"""
|
|
|
Monta grafo de importações locais: base_name -> { base_names importados }
|
|
|
"""
|
|
|
|
|
|
base_to_file = {}
|
|
|
for f in py_files:
|
|
|
base = os.path.splitext(os.path.basename(f))[0]
|
|
|
base_to_file[base] = f
|
|
|
|
|
|
graph = defaultdict(set)
|
|
|
for f in py_files:
|
|
|
base = os.path.splitext(os.path.basename(f))[0]
|
|
|
tree, _ = parse_ast(f)
|
|
|
imports, _, _, _ = extract_imports_defs_calls(tree)
|
|
|
for alias, base_mod in imports.items():
|
|
|
|
|
|
target = None
|
|
|
if alias in base_to_file:
|
|
|
target = alias
|
|
|
elif base_mod in base_to_file:
|
|
|
target = base_mod
|
|
|
if target and target != base:
|
|
|
graph[base].add(target)
|
|
|
return graph
|
|
|
|
|
|
def find_cycles(graph):
|
|
|
"""
|
|
|
Detecta ciclos no grafo (lista de ciclos) — sem mutar o dicionário durante a iteração.
|
|
|
"""
|
|
|
|
|
|
nodes = set(graph.keys())
|
|
|
for vs in graph.values():
|
|
|
nodes.update(vs)
|
|
|
|
|
|
visited = set()
|
|
|
stack = set()
|
|
|
cycles = []
|
|
|
path = []
|
|
|
|
|
|
def dfs(u):
|
|
|
visited.add(u)
|
|
|
stack.add(u)
|
|
|
path.append(u)
|
|
|
for v in graph.get(u, set()):
|
|
|
if v not in visited:
|
|
|
dfs(v)
|
|
|
elif v in stack:
|
|
|
|
|
|
if v in path:
|
|
|
idx = len(path) - 1
|
|
|
while idx >= 0 and path[idx] != v:
|
|
|
idx -= 1
|
|
|
if idx >= 0:
|
|
|
cycle = path[idx:] + [v]
|
|
|
cycles.append(cycle)
|
|
|
stack.remove(u)
|
|
|
path.pop()
|
|
|
|
|
|
for node in list(nodes):
|
|
|
if node not in visited:
|
|
|
dfs(node)
|
|
|
|
|
|
|
|
|
def canonical(cyc):
|
|
|
core = cyc[:-1]
|
|
|
if not core:
|
|
|
return tuple()
|
|
|
rots = [tuple(core[i:] + core[:i]) for i in range(len(core))]
|
|
|
return min(rots)
|
|
|
|
|
|
seen = set()
|
|
|
unique = []
|
|
|
for cyc in cycles:
|
|
|
can = canonical(cyc)
|
|
|
if can and can not in seen:
|
|
|
seen.add(can)
|
|
|
unique.append(cyc)
|
|
|
return unique
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def find_unused_imports(tree, imports, used_names):
|
|
|
"""
|
|
|
Aproximação: se o alias importado não aparece em used_names -> não usado.
|
|
|
Não detecta usos por getattr/reflection; serve como guia inicial.
|
|
|
"""
|
|
|
unused = []
|
|
|
for alias in imports.keys():
|
|
|
if alias not in used_names:
|
|
|
unused.append(alias)
|
|
|
return unused
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def audit(root, app_path, modules_map_path, exclude_dirs=None, output_json=".audit_report.json"):
|
|
|
report = {
|
|
|
"duplicate_keys": {},
|
|
|
"widgets_without_key": {},
|
|
|
"missing_imports_in_app": [],
|
|
|
"routing_vs_modules": {
|
|
|
"routes_without_modules_entry": [],
|
|
|
"modules_entry_without_route": [],
|
|
|
},
|
|
|
"module_files_missing": [],
|
|
|
"modules_without_main": [],
|
|
|
"unused_imports": {},
|
|
|
"import_cycles": [],
|
|
|
}
|
|
|
|
|
|
|
|
|
py_files = list(find_python_files(root, exclude_dirs=exclude_dirs))
|
|
|
|
|
|
base_to_file = {os.path.splitext(os.path.basename(f))[0]: f for f in py_files}
|
|
|
|
|
|
|
|
|
for f in py_files:
|
|
|
dups, missing = scan_duplicate_and_missing_keys(f)
|
|
|
if dups:
|
|
|
report["duplicate_keys"][f] = dups
|
|
|
if any(missing.values()):
|
|
|
report["widgets_without_key"][f] = {k: v for k, v in missing.items() if v}
|
|
|
|
|
|
|
|
|
app_full = os.path.join(root, app_path)
|
|
|
modules_map_full = os.path.join(root, modules_map_path)
|
|
|
app_tree, app_src = parse_ast(app_full)
|
|
|
routes = extract_routing(app_src) if app_src else []
|
|
|
|
|
|
|
|
|
app_imports, app_used, app_defs, app_calls_main = extract_imports_defs_calls(app_tree)
|
|
|
|
|
|
|
|
|
route_keys_in_map, internal_keys_in_map = load_modules_map(modules_map_full)
|
|
|
|
|
|
|
|
|
routes_set = set()
|
|
|
for route_key, called_module in routes:
|
|
|
routes_set.add(route_key)
|
|
|
if not called_module:
|
|
|
report["missing_imports_in_app"].append((route_key, None, "Bloco da rota não chama *.main()"))
|
|
|
continue
|
|
|
|
|
|
imported_aliases = set(app_imports.keys())
|
|
|
if called_module not in imported_aliases:
|
|
|
report["missing_imports_in_app"].append((route_key, called_module, "Módulo não importado no app.py"))
|
|
|
|
|
|
if called_module not in base_to_file:
|
|
|
|
|
|
base_mod = app_imports.get(called_module)
|
|
|
if not (base_mod and base_mod in base_to_file):
|
|
|
report["module_files_missing"].append(called_module)
|
|
|
else:
|
|
|
|
|
|
t, _ = parse_ast(base_to_file[called_module])
|
|
|
_, _, defs, _ = extract_imports_defs_calls(t)
|
|
|
if "main" not in defs:
|
|
|
report["modules_without_main"].append(called_module)
|
|
|
|
|
|
|
|
|
|
|
|
for r in routes_set:
|
|
|
if r not in route_keys_in_map and r not in internal_keys_in_map:
|
|
|
report["routing_vs_modules"]["routes_without_modules_entry"].append(r)
|
|
|
|
|
|
for m in route_keys_in_map:
|
|
|
if m not in routes_set:
|
|
|
report["routing_vs_modules"]["modules_entry_without_route"].append(m)
|
|
|
|
|
|
|
|
|
for f in py_files:
|
|
|
t, _ = parse_ast(f)
|
|
|
imp, used, defs, calls_main = extract_imports_defs_calls(t)
|
|
|
unused = find_unused_imports(t, imp, used)
|
|
|
if unused:
|
|
|
report["unused_imports"][f] = unused
|
|
|
|
|
|
|
|
|
graph = build_local_import_graph(py_files)
|
|
|
cycles = find_cycles(graph)
|
|
|
report["import_cycles"] = cycles
|
|
|
|
|
|
|
|
|
report["missing_imports_in_app"] = list(dict.fromkeys(report["missing_imports_in_app"]))
|
|
|
report["module_files_missing"] = sorted(set(report["module_files_missing"]))
|
|
|
report["modules_without_main"] = sorted(set(report["modules_without_main"]))
|
|
|
report["routing_vs_modules"]["routes_without_modules_entry"] = sorted(
|
|
|
set(report["routing_vs_modules"]["routes_without_modules_entry"]))
|
|
|
report["routing_vs_modules"]["modules_entry_without_route"] = sorted(
|
|
|
set(report["routing_vs_modules"]["modules_entry_without_route"]))
|
|
|
|
|
|
|
|
|
print("\n=== RELATÓRIO DE AUDITORIA — Streamlit Project ===")
|
|
|
|
|
|
print("\n[Chaves duplicadas]")
|
|
|
if not report["duplicate_keys"]:
|
|
|
print(" ✔ Nenhuma chave duplicada literal encontrada.")
|
|
|
else:
|
|
|
for file, dups in report["duplicate_keys"].items():
|
|
|
print(f" - {file}")
|
|
|
for key, lines in dups.items():
|
|
|
print(f" * key='{key}' duplicada em linhas {lines}")
|
|
|
|
|
|
|
|
|
print("\n[Widgets sem 'key' (atenção em loops)]")
|
|
|
if not report["widgets_without_key"]:
|
|
|
print(" ✔ Nenhum potencial widget sem key encontrado.")
|
|
|
else:
|
|
|
for file, miss in report["widgets_without_key"].items():
|
|
|
print(f" - {file}")
|
|
|
for kind, lines in miss.items():
|
|
|
print(f" * {kind}: linhas {lines}")
|
|
|
|
|
|
|
|
|
print("\n[Imports faltantes no app e módulos]")
|
|
|
if not report["missing_imports_in_app"]:
|
|
|
print(" ✔ Nenhum import faltante detectado no app.py (para rotas).")
|
|
|
else:
|
|
|
for route_key, called_module, reason in report["missing_imports_in_app"]:
|
|
|
print(f" - rota='{route_key}' -> módulo='{called_module}' • {reason}")
|
|
|
if not report["module_files_missing"]:
|
|
|
print(" ✔ Nenhum arquivo de módulo ausente detectado.")
|
|
|
else:
|
|
|
print(" Arquivos de módulo não encontrados:", report["module_files_missing"])
|
|
|
if not report["modules_without_main"]:
|
|
|
print(" ✔ Todos os módulos localizados possuem main().")
|
|
|
else:
|
|
|
print(" Módulos sem main():", report["modules_without_main"])
|
|
|
|
|
|
|
|
|
print("\n[Consistência: MODULES x Roteamento]")
|
|
|
rwm = report["routing_vs_modules"]
|
|
|
if not rwm["routes_without_modules_entry"]:
|
|
|
print(" ✔ Todas as rotas possuem entrada em modules_map.py (ou 'key' interna).")
|
|
|
else:
|
|
|
print(" Rotas sem entrada no modules_map.py:", rwm["routes_without_modules_entry"])
|
|
|
if not rwm["modules_entry_without_route"]:
|
|
|
print(" ✔ Todas as entradas do modules_map.py possuem rota no app.py.")
|
|
|
else:
|
|
|
print(" Entradas do modules_map.py sem rota no app.py:", rwm["modules_entry_without_route"])
|
|
|
|
|
|
|
|
|
print("\n[Imports não usados (aprox.)]")
|
|
|
if not report["unused_imports"]:
|
|
|
print(" ✔ Nenhum import potencialmente não usado encontrado.")
|
|
|
else:
|
|
|
for file, unused in report["unused_imports"].items():
|
|
|
print(f" - {file}: {unused}")
|
|
|
|
|
|
|
|
|
print("\n[Ciclos de importação]")
|
|
|
if not report["import_cycles"]:
|
|
|
print(" ✔ Nenhum ciclo de importação detectado.")
|
|
|
else:
|
|
|
for cyc in report["import_cycles"]:
|
|
|
print(" - ciclo:", " -> ".join(cyc))
|
|
|
|
|
|
|
|
|
out_path = os.path.join(root, output_json)
|
|
|
with open(out_path, "w", encoding="utf-8") as f:
|
|
|
json.dump(report, f, ensure_ascii=False, indent=2)
|
|
|
print(f"\n📄 Relatório JSON salvo em: {out_path}")
|
|
|
|
|
|
return report
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def cli():
|
|
|
p = argparse.ArgumentParser(description="Auditor de projeto Streamlit")
|
|
|
p.add_argument("--root", default=".", help="Raiz do projeto (default: .)")
|
|
|
p.add_argument("--app", default="app.py", help="Caminho do app.py (relativo à raiz)")
|
|
|
p.add_argument("--modules", default="modules_map.py", help="Caminho do modules_map.py (relativo à raiz)")
|
|
|
p.add_argument("--exclude", nargs="*", default=[".git", ".venv", "venv", "__pycache__", ".streamlit"],
|
|
|
help="Pastas a excluir da varredura")
|
|
|
p.add_argument("--json", default=".audit_report.json", help="Nome do arquivo JSON de saída")
|
|
|
args = p.parse_args()
|
|
|
|
|
|
audit(
|
|
|
root=args.root,
|
|
|
app_path=args.app,
|
|
|
modules_map_path=args.modules,
|
|
|
exclude_dirs=args.exclude,
|
|
|
output_json=args.json
|
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
cli()
|
|
|
|
|
|
|