Update app.py
Browse files
app.py
CHANGED
@@ -7,20 +7,26 @@ from sentence_transformers import SentenceTransformer, util
|
|
7 |
import torch
|
8 |
import gradio as gr
|
9 |
|
10 |
-
# Inicialização
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
apk_context = {"smali": {}, "java": {}, "info": ""}
|
25 |
|
26 |
def check_java():
|
@@ -28,9 +34,9 @@ def check_java():
|
|
28 |
result = subprocess.run(["java", "-version"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
29 |
print("Java está disponível:", result.stderr.decode())
|
30 |
except FileNotFoundError:
|
31 |
-
raise EnvironmentError("Java não está instalado
|
32 |
except Exception as e:
|
33 |
-
raise EnvironmentError(f"Erro
|
34 |
|
35 |
def install_tools():
|
36 |
baksmali_path = "/usr/local/bin/baksmali.jar"
|
@@ -39,64 +45,54 @@ def install_tools():
|
|
39 |
check_java()
|
40 |
|
41 |
if not os.path.exists(baksmali_path):
|
42 |
-
print("Instalando
|
43 |
-
subprocess.run(
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
"-o",
|
48 |
-
baksmali_path,
|
49 |
-
"https://bitbucket.org/JesusFreke/smali/downloads/baksmali-2.5.2.jar",
|
50 |
-
],
|
51 |
-
check=True,
|
52 |
-
)
|
53 |
|
54 |
jadx_zip_path = "/usr/local/bin/jadx.zip"
|
55 |
if not os.path.exists(jadx_path):
|
56 |
-
print("Instalando
|
57 |
-
subprocess.run(
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
"-o",
|
62 |
-
jadx_zip_path,
|
63 |
-
"https://github.com/skylot/jadx/releases/download/v1.4.7/jadx-1.4.7.zip",
|
64 |
-
],
|
65 |
-
check=True,
|
66 |
-
)
|
67 |
subprocess.run(["unzip", "-o", jadx_zip_path, "-d", "/usr/local/bin/jadx"], check=True)
|
68 |
if os.path.exists(jadx_path):
|
69 |
subprocess.run(["chmod", "+x", jadx_path], check=True)
|
70 |
-
else:
|
71 |
-
raise FileNotFoundError("Executável do JADX não encontrado no caminho esperado.")
|
72 |
|
73 |
def decompile_apk(apk_file):
|
74 |
-
if apk_file
|
75 |
-
return "Nenhum arquivo enviado
|
76 |
|
77 |
temp_apk_path = apk_file.name
|
78 |
output_dir = tempfile.mkdtemp()
|
79 |
try:
|
|
|
80 |
smali_output = os.path.join(output_dir, "smali")
|
81 |
-
subprocess.run(
|
82 |
-
|
83 |
-
|
84 |
-
)
|
85 |
|
|
|
86 |
java_output = os.path.join(output_dir, "java")
|
87 |
-
subprocess.run(
|
88 |
-
|
89 |
-
|
90 |
-
)
|
91 |
|
|
|
92 |
smali_files = {}
|
|
|
|
|
93 |
for root, _, files in os.walk(smali_output):
|
94 |
for file in files:
|
95 |
if file.endswith(".smali"):
|
96 |
with open(os.path.join(root, file), "r") as f:
|
97 |
smali_files[file] = f.read()
|
98 |
|
99 |
-
java_files = {}
|
100 |
for root, _, files in os.walk(java_output):
|
101 |
for file in files:
|
102 |
if file.endswith(".java"):
|
@@ -106,100 +102,143 @@ def decompile_apk(apk_file):
|
|
106 |
apk_context["smali"] = smali_files
|
107 |
apk_context["java"] = java_files
|
108 |
|
109 |
-
return f"Decompilação
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
|
111 |
except Exception as e:
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
|
|
|
|
|
|
|
|
123 |
)
|
124 |
-
processed_query = bloom_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
125 |
-
return processed_query
|
126 |
|
127 |
-
|
128 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
|
130 |
def query_apk_chat(user_message):
|
131 |
if not apk_context["smali"] and not apk_context["java"]:
|
132 |
-
return "Nenhum APK decompilado disponível
|
133 |
|
134 |
try:
|
135 |
-
# Processar
|
136 |
-
|
137 |
-
|
138 |
-
#
|
139 |
-
|
|
|
140 |
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
top_indices = torch.topk(scores, k=top_k).indices
|
149 |
|
150 |
-
|
151 |
-
|
152 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
|
154 |
-
#
|
155 |
-
|
156 |
-
inputs = tokenizer(explanation_prompt, return_tensors="pt", max_length=512, truncation=True, padding=True)
|
157 |
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
)
|
166 |
-
explanation = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
167 |
-
|
168 |
-
response += f"\n\n**Arquivo:** {file_name}\n"
|
169 |
-
response += f"**Código:**\n```\n{relevant_code[:1000]}\n```\n"
|
170 |
-
response += f"**Explicação:**\n{explanation}\n"
|
171 |
-
response += "-" * 80
|
172 |
-
|
173 |
-
return response
|
174 |
|
175 |
except Exception as e:
|
176 |
-
return f"Erro
|
177 |
|
178 |
-
# Configuração
|
179 |
install_tools()
|
180 |
|
181 |
-
|
182 |
-
apk_upload_interface = gr.Interface(
|
183 |
fn=decompile_apk,
|
184 |
-
inputs=gr.File(label="
|
185 |
outputs="text",
|
186 |
-
title="
|
187 |
-
description="
|
188 |
)
|
189 |
|
190 |
chat_interface = gr.Interface(
|
191 |
fn=query_apk_chat,
|
192 |
-
inputs=gr.Textbox(lines=3, placeholder="
|
193 |
-
outputs=gr.Textbox(lines=20
|
194 |
-
title="
|
195 |
-
description="
|
196 |
)
|
197 |
|
198 |
# Interface combinada
|
199 |
iface = gr.TabbedInterface(
|
200 |
-
[
|
201 |
-
["
|
202 |
)
|
203 |
|
204 |
-
|
205 |
-
iface.launch()
|
|
|
7 |
import torch
|
8 |
import gradio as gr
|
9 |
|
10 |
+
# Inicialização dos modelos com tratamento de erros
|
11 |
+
try:
|
12 |
+
# BLOOM para compreensão de linguagem natural
|
13 |
+
bloom_tokenizer = BloomTokenizerFast.from_pretrained("bigscience/bloom-560m")
|
14 |
+
bloom_model = BloomForCausalLM.from_pretrained("bigscience/bloom-560m")
|
15 |
+
bloom_model.eval()
|
16 |
+
|
17 |
+
# Modelo de indexação
|
18 |
+
indexing_model = SentenceTransformer("all-MiniLM-L6-v2")
|
19 |
+
|
20 |
+
# CodeBERT para análise de código
|
21 |
+
tokenizer = RobertaTokenizer.from_pretrained("microsoft/codebert-base")
|
22 |
+
codebert_model = RobertaForCausalLM.from_pretrained("microsoft/codebert-base")
|
23 |
+
codebert_model.eval()
|
24 |
+
|
25 |
+
except Exception as e:
|
26 |
+
print(f"Erro ao carregar modelos: {str(e)}")
|
27 |
+
raise
|
28 |
+
|
29 |
+
# Contexto global
|
30 |
apk_context = {"smali": {}, "java": {}, "info": ""}
|
31 |
|
32 |
def check_java():
|
|
|
34 |
result = subprocess.run(["java", "-version"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
35 |
print("Java está disponível:", result.stderr.decode())
|
36 |
except FileNotFoundError:
|
37 |
+
raise EnvironmentError("Java não está instalado")
|
38 |
except Exception as e:
|
39 |
+
raise EnvironmentError(f"Erro ao verificar Java: {str(e)}")
|
40 |
|
41 |
def install_tools():
|
42 |
baksmali_path = "/usr/local/bin/baksmali.jar"
|
|
|
45 |
check_java()
|
46 |
|
47 |
if not os.path.exists(baksmali_path):
|
48 |
+
print("Instalando Baksmali...")
|
49 |
+
subprocess.run([
|
50 |
+
"curl", "-L", "-o", baksmali_path,
|
51 |
+
"https://bitbucket.org/JesusFreke/smali/downloads/baksmali-2.5.2.jar"
|
52 |
+
], check=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
jadx_zip_path = "/usr/local/bin/jadx.zip"
|
55 |
if not os.path.exists(jadx_path):
|
56 |
+
print("Instalando JADX...")
|
57 |
+
subprocess.run([
|
58 |
+
"curl", "-L", "-o", jadx_zip_path,
|
59 |
+
"https://github.com/skylot/jadx/releases/download/v1.4.7/jadx-1.4.7.zip"
|
60 |
+
], check=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
subprocess.run(["unzip", "-o", jadx_zip_path, "-d", "/usr/local/bin/jadx"], check=True)
|
62 |
if os.path.exists(jadx_path):
|
63 |
subprocess.run(["chmod", "+x", jadx_path], check=True)
|
|
|
|
|
64 |
|
65 |
def decompile_apk(apk_file):
|
66 |
+
if not apk_file:
|
67 |
+
return "Nenhum arquivo enviado"
|
68 |
|
69 |
temp_apk_path = apk_file.name
|
70 |
output_dir = tempfile.mkdtemp()
|
71 |
try:
|
72 |
+
# Smali
|
73 |
smali_output = os.path.join(output_dir, "smali")
|
74 |
+
subprocess.run([
|
75 |
+
"java", "-jar", "/usr/local/bin/baksmali.jar",
|
76 |
+
"d", temp_apk_path, "-o", smali_output
|
77 |
+
], check=True)
|
78 |
|
79 |
+
# JADX
|
80 |
java_output = os.path.join(output_dir, "java")
|
81 |
+
subprocess.run([
|
82 |
+
"/usr/local/bin/jadx/bin/jadx",
|
83 |
+
"-d", java_output, temp_apk_path
|
84 |
+
], check=True)
|
85 |
|
86 |
+
# Coletar arquivos
|
87 |
smali_files = {}
|
88 |
+
java_files = {}
|
89 |
+
|
90 |
for root, _, files in os.walk(smali_output):
|
91 |
for file in files:
|
92 |
if file.endswith(".smali"):
|
93 |
with open(os.path.join(root, file), "r") as f:
|
94 |
smali_files[file] = f.read()
|
95 |
|
|
|
96 |
for root, _, files in os.walk(java_output):
|
97 |
for file in files:
|
98 |
if file.endswith(".java"):
|
|
|
102 |
apk_context["smali"] = smali_files
|
103 |
apk_context["java"] = java_files
|
104 |
|
105 |
+
return f"Decompilação concluída: {len(smali_files)} arquivos Smali, {len(java_files)} arquivos Java"
|
106 |
+
|
107 |
+
except Exception as e:
|
108 |
+
return f"Erro na decompilação: {str(e)}"
|
109 |
+
|
110 |
+
def process_with_bloom(text):
|
111 |
+
try:
|
112 |
+
# Preparar input
|
113 |
+
inputs = bloom_tokenizer(
|
114 |
+
text,
|
115 |
+
return_tensors="pt",
|
116 |
+
max_length=512,
|
117 |
+
truncation=True,
|
118 |
+
padding=True
|
119 |
+
)
|
120 |
+
|
121 |
+
# Gerar resposta
|
122 |
+
with torch.no_grad():
|
123 |
+
outputs = bloom_model.generate(
|
124 |
+
inputs["input_ids"],
|
125 |
+
max_length=200,
|
126 |
+
num_return_sequences=1,
|
127 |
+
temperature=0.7,
|
128 |
+
pad_token_id=bloom_tokenizer.pad_token_id
|
129 |
+
)
|
130 |
+
|
131 |
+
# Decodificar resposta
|
132 |
+
processed = bloom_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
133 |
+
return processed
|
134 |
|
135 |
except Exception as e:
|
136 |
+
print(f"Erro no processamento BLOOM: {str(e)}")
|
137 |
+
return text
|
138 |
+
|
139 |
+
def analyze_with_codebert(code_text, query):
|
140 |
+
try:
|
141 |
+
# Preparar prompt
|
142 |
+
prompt = f"Query: {query}\nCódigo: {code_text[:500]}"
|
143 |
+
|
144 |
+
# Tokenizar com padding adequado
|
145 |
+
inputs = tokenizer(
|
146 |
+
prompt,
|
147 |
+
return_tensors="pt",
|
148 |
+
max_length=512,
|
149 |
+
truncation=True,
|
150 |
+
padding="max_length"
|
151 |
)
|
|
|
|
|
152 |
|
153 |
+
# Gerar análise
|
154 |
+
with torch.no_grad():
|
155 |
+
outputs = codebert_model.generate(
|
156 |
+
inputs["input_ids"],
|
157 |
+
max_length=200,
|
158 |
+
num_return_sequences=1,
|
159 |
+
pad_token_id=tokenizer.pad_token_id
|
160 |
+
)
|
161 |
+
|
162 |
+
# Decodificar resposta
|
163 |
+
analysis = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
164 |
+
return analysis
|
165 |
+
|
166 |
+
except Exception as e:
|
167 |
+
print(f"Erro na análise CodeBERT: {str(e)}")
|
168 |
+
return "Não foi possível analisar o código"
|
169 |
|
170 |
def query_apk_chat(user_message):
|
171 |
if not apk_context["smali"] and not apk_context["java"]:
|
172 |
+
return "Nenhum APK decompilado disponível"
|
173 |
|
174 |
try:
|
175 |
+
# Processar query com BLOOM
|
176 |
+
processed_query = process_with_bloom(user_message)
|
177 |
+
|
178 |
+
# Preparar todos os códigos
|
179 |
+
all_codes = []
|
180 |
+
all_files = []
|
181 |
|
182 |
+
for file, code in apk_context["java"].items():
|
183 |
+
all_codes.append(code)
|
184 |
+
all_files.append(("java", file))
|
185 |
|
186 |
+
for file, code in apk_context["smali"].items():
|
187 |
+
all_codes.append(code)
|
188 |
+
all_files.append(("smali", file))
|
|
|
189 |
|
190 |
+
# Calcular embeddings
|
191 |
+
query_embedding = indexing_model.encode(processed_query, convert_to_tensor=True)
|
192 |
+
code_embeddings = indexing_model.encode(all_codes, convert_to_tensor=True)
|
193 |
+
|
194 |
+
# Encontrar matches
|
195 |
+
similarities = util.pytorch_cos_sim(query_embedding, code_embeddings)[0]
|
196 |
+
top_k = min(3, len(all_codes))
|
197 |
+
best_matches = torch.topk(similarities, k=top_k)
|
198 |
+
|
199 |
+
response = []
|
200 |
+
for score, idx in zip(best_matches.values, best_matches.indices):
|
201 |
+
file_type, file_name = all_files[idx]
|
202 |
+
code = all_codes[idx]
|
203 |
|
204 |
+
# Análise do código
|
205 |
+
analysis = analyze_with_codebert(code, processed_query)
|
|
|
206 |
|
207 |
+
response.append(f"\nArquivo ({file_type}): {file_name}")
|
208 |
+
response.append(f"Relevância: {score:.2f}")
|
209 |
+
response.append(f"Código:\n{code[:500]}...")
|
210 |
+
response.append(f"Análise:\n{analysis}\n")
|
211 |
+
response.append("-" * 80)
|
212 |
+
|
213 |
+
return "\n".join(response)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
214 |
|
215 |
except Exception as e:
|
216 |
+
return f"Erro na análise: {str(e)}"
|
217 |
|
218 |
+
# Configuração Gradio
|
219 |
install_tools()
|
220 |
|
221 |
+
upload_interface = gr.Interface(
|
|
|
222 |
fn=decompile_apk,
|
223 |
+
inputs=gr.File(label="APK File", file_types=[".apk"]),
|
224 |
outputs="text",
|
225 |
+
title="APK Analyzer",
|
226 |
+
description="Upload an APK file for analysis"
|
227 |
)
|
228 |
|
229 |
chat_interface = gr.Interface(
|
230 |
fn=query_apk_chat,
|
231 |
+
inputs=gr.Textbox(lines=3, placeholder="Ask about the APK code..."),
|
232 |
+
outputs=gr.Textbox(lines=20),
|
233 |
+
title="Code Analysis Chat",
|
234 |
+
description="AI-powered code analysis"
|
235 |
)
|
236 |
|
237 |
# Interface combinada
|
238 |
iface = gr.TabbedInterface(
|
239 |
+
[upload_interface, chat_interface],
|
240 |
+
["Upload APK", "Analyze Code"]
|
241 |
)
|
242 |
|
243 |
+
if __name__ == "__main__":
|
244 |
+
iface.launch()
|