testeappb / app.py
nsv2042's picture
Update app.py
24cf879 verified
import os
import subprocess
import tempfile
from androguard.misc import AnalyzeAPK
from transformers import BloomForCausalLM, BloomTokenizerFast, RobertaTokenizer, RobertaForCausalLM, pipeline
from sentence_transformers import SentenceTransformer, util
import torch
import gradio as gr
# Inicialização dos modelos com tratamento de erros
try:
# BLOOM para compreensão de linguagem natural
bloom_tokenizer = BloomTokenizerFast.from_pretrained("bigscience/bloom-560m")
bloom_model = BloomForCausalLM.from_pretrained("bigscience/bloom-560m")
bloom_model.eval()
# Modelo de indexação
indexing_model = SentenceTransformer("all-MiniLM-L6-v2")
# CodeBERT para análise de código
tokenizer = RobertaTokenizer.from_pretrained("microsoft/codebert-base")
codebert_model = RobertaForCausalLM.from_pretrained("microsoft/codebert-base")
codebert_model.eval()
except Exception as e:
print(f"Erro ao carregar modelos: {str(e)}")
raise
# Contexto global
apk_context = {"smali": {}, "java": {}, "info": ""}
def check_java():
try:
result = subprocess.run(["java", "-version"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
print("Java está disponível:", result.stderr.decode())
except FileNotFoundError:
raise EnvironmentError("Java não está instalado")
except Exception as e:
raise EnvironmentError(f"Erro ao verificar Java: {str(e)}")
def install_tools():
baksmali_path = "/usr/local/bin/baksmali.jar"
jadx_path = "/usr/local/bin/jadx/bin/jadx"
check_java()
if not os.path.exists(baksmali_path):
print("Instalando Baksmali...")
subprocess.run([
"curl", "-L", "-o", baksmali_path,
"https://bitbucket.org/JesusFreke/smali/downloads/baksmali-2.5.2.jar"
], check=True)
jadx_zip_path = "/usr/local/bin/jadx.zip"
if not os.path.exists(jadx_path):
print("Instalando JADX...")
subprocess.run([
"curl", "-L", "-o", jadx_zip_path,
"https://github.com/skylot/jadx/releases/download/v1.4.7/jadx-1.4.7.zip"
], check=True)
subprocess.run(["unzip", "-o", jadx_zip_path, "-d", "/usr/local/bin/jadx"], check=True)
if os.path.exists(jadx_path):
subprocess.run(["chmod", "+x", jadx_path], check=True)
def decompile_apk(apk_file):
if not apk_file:
return "Nenhum arquivo enviado"
temp_apk_path = apk_file.name
output_dir = tempfile.mkdtemp()
try:
# Smali
smali_output = os.path.join(output_dir, "smali")
subprocess.run([
"java", "-jar", "/usr/local/bin/baksmali.jar",
"d", temp_apk_path, "-o", smali_output
], check=True)
# JADX
java_output = os.path.join(output_dir, "java")
subprocess.run([
"/usr/local/bin/jadx/bin/jadx",
"-d", java_output, temp_apk_path
], check=True)
# Coletar arquivos
smali_files = {}
java_files = {}
for root, _, files in os.walk(smali_output):
for file in files:
if file.endswith(".smali"):
with open(os.path.join(root, file), "r") as f:
smali_files[file] = f.read()
for root, _, files in os.walk(java_output):
for file in files:
if file.endswith(".java"):
with open(os.path.join(root, file), "r") as f:
java_files[file] = f.read()
apk_context["smali"] = smali_files
apk_context["java"] = java_files
return f"Decompilação concluída: {len(smali_files)} arquivos Smali, {len(java_files)} arquivos Java"
except Exception as e:
return f"Erro na decompilação: {str(e)}"
def process_with_bloom(text):
try:
# Preparar input
inputs = bloom_tokenizer(
text,
return_tensors="pt",
max_length=512,
truncation=True,
padding=True
)
# Gerar resposta
with torch.no_grad():
outputs = bloom_model.generate(
inputs["input_ids"],
max_length=200,
num_return_sequences=1,
temperature=0.7,
pad_token_id=bloom_tokenizer.pad_token_id
)
# Decodificar resposta
processed = bloom_tokenizer.decode(outputs[0], skip_special_tokens=True)
return processed
except Exception as e:
print(f"Erro no processamento BLOOM: {str(e)}")
return text
def analyze_with_codebert(code_text, query):
try:
# Preparar prompt
prompt = f"Query: {query}\nCódigo: {code_text[:500]}"
# Tokenizar com padding adequado
inputs = tokenizer(
prompt,
return_tensors="pt",
max_length=512,
truncation=True,
padding="max_length"
)
# Gerar análise
with torch.no_grad():
outputs = codebert_model.generate(
inputs["input_ids"],
max_length=200,
num_return_sequences=1,
pad_token_id=tokenizer.pad_token_id
)
# Decodificar resposta
analysis = tokenizer.decode(outputs[0], skip_special_tokens=True)
return analysis
except Exception as e:
print(f"Erro na análise CodeBERT: {str(e)}")
return "Não foi possível analisar o código"
def query_apk_chat(user_message):
if not apk_context["smali"] and not apk_context["java"]:
return "Nenhum APK decompilado disponível"
try:
# Processar query com BLOOM
processed_query = process_with_bloom(user_message)
# Preparar todos os códigos
all_codes = []
all_files = []
for file, code in apk_context["java"].items():
all_codes.append(code)
all_files.append(("java", file))
for file, code in apk_context["smali"].items():
all_codes.append(code)
all_files.append(("smali", file))
# Calcular embeddings
query_embedding = indexing_model.encode(processed_query, convert_to_tensor=True)
code_embeddings = indexing_model.encode(all_codes, convert_to_tensor=True)
# Encontrar matches
similarities = util.pytorch_cos_sim(query_embedding, code_embeddings)[0]
top_k = min(3, len(all_codes))
best_matches = torch.topk(similarities, k=top_k)
response = []
for score, idx in zip(best_matches.values, best_matches.indices):
file_type, file_name = all_files[idx]
code = all_codes[idx]
# Análise do código
analysis = analyze_with_codebert(code, processed_query)
response.append(f"\nArquivo ({file_type}): {file_name}")
response.append(f"Relevância: {score:.2f}")
response.append(f"Código:\n{code[:500]}...")
response.append(f"Análise:\n{analysis}\n")
response.append("-" * 80)
return "\n".join(response)
except Exception as e:
return f"Erro na análise: {str(e)}"
# Configuração Gradio
install_tools()
upload_interface = gr.Interface(
fn=decompile_apk,
inputs=gr.File(label="APK File", file_types=[".apk"]),
outputs="text",
title="APK Analyzer",
description="Upload an APK file for analysis"
)
chat_interface = gr.Interface(
fn=query_apk_chat,
inputs=gr.Textbox(lines=3, placeholder="Ask about the APK code..."),
outputs=gr.Textbox(lines=20),
title="Code Analysis Chat",
description="AI-powered code analysis"
)
# Interface combinada
iface = gr.TabbedInterface(
[upload_interface, chat_interface],
["Upload APK", "Analyze Code"]
)
if __name__ == "__main__":
iface.launch()