|
import os |
|
import subprocess |
|
import tempfile |
|
from androguard.misc import AnalyzeAPK |
|
from transformers import BloomForCausalLM, BloomTokenizerFast, RobertaTokenizer, RobertaForCausalLM, pipeline |
|
from sentence_transformers import SentenceTransformer, util |
|
import torch |
|
import gradio as gr |
|
|
|
|
|
try: |
|
|
|
bloom_tokenizer = BloomTokenizerFast.from_pretrained("bigscience/bloom-560m") |
|
bloom_model = BloomForCausalLM.from_pretrained("bigscience/bloom-560m") |
|
bloom_model.eval() |
|
|
|
|
|
indexing_model = SentenceTransformer("all-MiniLM-L6-v2") |
|
|
|
|
|
tokenizer = RobertaTokenizer.from_pretrained("microsoft/codebert-base") |
|
codebert_model = RobertaForCausalLM.from_pretrained("microsoft/codebert-base") |
|
codebert_model.eval() |
|
|
|
except Exception as e: |
|
print(f"Erro ao carregar modelos: {str(e)}") |
|
raise |
|
|
|
|
|
apk_context = {"smali": {}, "java": {}, "info": ""} |
|
|
|
def check_java(): |
|
try: |
|
result = subprocess.run(["java", "-version"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
|
print("Java está disponível:", result.stderr.decode()) |
|
except FileNotFoundError: |
|
raise EnvironmentError("Java não está instalado") |
|
except Exception as e: |
|
raise EnvironmentError(f"Erro ao verificar Java: {str(e)}") |
|
|
|
def install_tools(): |
|
baksmali_path = "/usr/local/bin/baksmali.jar" |
|
jadx_path = "/usr/local/bin/jadx/bin/jadx" |
|
|
|
check_java() |
|
|
|
if not os.path.exists(baksmali_path): |
|
print("Instalando Baksmali...") |
|
subprocess.run([ |
|
"curl", "-L", "-o", baksmali_path, |
|
"https://bitbucket.org/JesusFreke/smali/downloads/baksmali-2.5.2.jar" |
|
], check=True) |
|
|
|
jadx_zip_path = "/usr/local/bin/jadx.zip" |
|
if not os.path.exists(jadx_path): |
|
print("Instalando JADX...") |
|
subprocess.run([ |
|
"curl", "-L", "-o", jadx_zip_path, |
|
"https://github.com/skylot/jadx/releases/download/v1.4.7/jadx-1.4.7.zip" |
|
], check=True) |
|
subprocess.run(["unzip", "-o", jadx_zip_path, "-d", "/usr/local/bin/jadx"], check=True) |
|
if os.path.exists(jadx_path): |
|
subprocess.run(["chmod", "+x", jadx_path], check=True) |
|
|
|
def decompile_apk(apk_file): |
|
if not apk_file: |
|
return "Nenhum arquivo enviado" |
|
|
|
temp_apk_path = apk_file.name |
|
output_dir = tempfile.mkdtemp() |
|
try: |
|
|
|
smali_output = os.path.join(output_dir, "smali") |
|
subprocess.run([ |
|
"java", "-jar", "/usr/local/bin/baksmali.jar", |
|
"d", temp_apk_path, "-o", smali_output |
|
], check=True) |
|
|
|
|
|
java_output = os.path.join(output_dir, "java") |
|
subprocess.run([ |
|
"/usr/local/bin/jadx/bin/jadx", |
|
"-d", java_output, temp_apk_path |
|
], check=True) |
|
|
|
|
|
smali_files = {} |
|
java_files = {} |
|
|
|
for root, _, files in os.walk(smali_output): |
|
for file in files: |
|
if file.endswith(".smali"): |
|
with open(os.path.join(root, file), "r") as f: |
|
smali_files[file] = f.read() |
|
|
|
for root, _, files in os.walk(java_output): |
|
for file in files: |
|
if file.endswith(".java"): |
|
with open(os.path.join(root, file), "r") as f: |
|
java_files[file] = f.read() |
|
|
|
apk_context["smali"] = smali_files |
|
apk_context["java"] = java_files |
|
|
|
return f"Decompilação concluída: {len(smali_files)} arquivos Smali, {len(java_files)} arquivos Java" |
|
|
|
except Exception as e: |
|
return f"Erro na decompilação: {str(e)}" |
|
|
|
def process_with_bloom(text): |
|
try: |
|
|
|
inputs = bloom_tokenizer( |
|
text, |
|
return_tensors="pt", |
|
max_length=512, |
|
truncation=True, |
|
padding=True |
|
) |
|
|
|
|
|
with torch.no_grad(): |
|
outputs = bloom_model.generate( |
|
inputs["input_ids"], |
|
max_length=200, |
|
num_return_sequences=1, |
|
temperature=0.7, |
|
pad_token_id=bloom_tokenizer.pad_token_id |
|
) |
|
|
|
|
|
processed = bloom_tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
return processed |
|
|
|
except Exception as e: |
|
print(f"Erro no processamento BLOOM: {str(e)}") |
|
return text |
|
|
|
def analyze_with_codebert(code_text, query): |
|
try: |
|
|
|
prompt = f"Query: {query}\nCódigo: {code_text[:500]}" |
|
|
|
|
|
inputs = tokenizer( |
|
prompt, |
|
return_tensors="pt", |
|
max_length=512, |
|
truncation=True, |
|
padding="max_length" |
|
) |
|
|
|
|
|
with torch.no_grad(): |
|
outputs = codebert_model.generate( |
|
inputs["input_ids"], |
|
max_length=200, |
|
num_return_sequences=1, |
|
pad_token_id=tokenizer.pad_token_id |
|
) |
|
|
|
|
|
analysis = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
return analysis |
|
|
|
except Exception as e: |
|
print(f"Erro na análise CodeBERT: {str(e)}") |
|
return "Não foi possível analisar o código" |
|
|
|
def query_apk_chat(user_message): |
|
if not apk_context["smali"] and not apk_context["java"]: |
|
return "Nenhum APK decompilado disponível" |
|
|
|
try: |
|
|
|
processed_query = process_with_bloom(user_message) |
|
|
|
|
|
all_codes = [] |
|
all_files = [] |
|
|
|
for file, code in apk_context["java"].items(): |
|
all_codes.append(code) |
|
all_files.append(("java", file)) |
|
|
|
for file, code in apk_context["smali"].items(): |
|
all_codes.append(code) |
|
all_files.append(("smali", file)) |
|
|
|
|
|
query_embedding = indexing_model.encode(processed_query, convert_to_tensor=True) |
|
code_embeddings = indexing_model.encode(all_codes, convert_to_tensor=True) |
|
|
|
|
|
similarities = util.pytorch_cos_sim(query_embedding, code_embeddings)[0] |
|
top_k = min(3, len(all_codes)) |
|
best_matches = torch.topk(similarities, k=top_k) |
|
|
|
response = [] |
|
for score, idx in zip(best_matches.values, best_matches.indices): |
|
file_type, file_name = all_files[idx] |
|
code = all_codes[idx] |
|
|
|
|
|
analysis = analyze_with_codebert(code, processed_query) |
|
|
|
response.append(f"\nArquivo ({file_type}): {file_name}") |
|
response.append(f"Relevância: {score:.2f}") |
|
response.append(f"Código:\n{code[:500]}...") |
|
response.append(f"Análise:\n{analysis}\n") |
|
response.append("-" * 80) |
|
|
|
return "\n".join(response) |
|
|
|
except Exception as e: |
|
return f"Erro na análise: {str(e)}" |
|
|
|
|
|
install_tools() |
|
|
|
upload_interface = gr.Interface( |
|
fn=decompile_apk, |
|
inputs=gr.File(label="APK File", file_types=[".apk"]), |
|
outputs="text", |
|
title="APK Analyzer", |
|
description="Upload an APK file for analysis" |
|
) |
|
|
|
chat_interface = gr.Interface( |
|
fn=query_apk_chat, |
|
inputs=gr.Textbox(lines=3, placeholder="Ask about the APK code..."), |
|
outputs=gr.Textbox(lines=20), |
|
title="Code Analysis Chat", |
|
description="AI-powered code analysis" |
|
) |
|
|
|
|
|
iface = gr.TabbedInterface( |
|
[upload_interface, chat_interface], |
|
["Upload APK", "Analyze Code"] |
|
) |
|
|
|
if __name__ == "__main__": |
|
iface.launch() |