nsv2042 commited on
Commit
24cf879
1 Parent(s): 1d6d751

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +156 -117
app.py CHANGED
@@ -7,20 +7,26 @@ from sentence_transformers import SentenceTransformer, util
7
  import torch
8
  import gradio as gr
9
 
10
- # Inicialização do modelo BLOOM para compreensão de linguagem natural
11
- bloom_tokenizer = BloomTokenizerFast.from_pretrained("bigscience/bloom-560m")
12
- bloom_model = BloomForCausalLM.from_pretrained("bigscience/bloom-560m")
13
- bloom_model.eval()
14
-
15
- # Inicialização do modelo all-MiniLM-L6-v2 para indexação
16
- indexing_model = SentenceTransformer("all-MiniLM-L6-v2")
17
-
18
- # Inicialização do CodeBERT para explicações gerativas
19
- tokenizer = RobertaTokenizer.from_pretrained("microsoft/codebert-base")
20
- codebert_model = RobertaForCausalLM.from_pretrained("microsoft/codebert-base", output_hidden_states=True)
21
- codebert_model.eval()
22
-
23
- # Contexto global para armazenar dados do APK
 
 
 
 
 
 
24
  apk_context = {"smali": {}, "java": {}, "info": ""}
25
 
26
  def check_java():
@@ -28,9 +34,9 @@ def check_java():
28
  result = subprocess.run(["java", "-version"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
29
  print("Java está disponível:", result.stderr.decode())
30
  except FileNotFoundError:
31
- raise EnvironmentError("Java não está instalado ou não foi encontrado no PATH.")
32
  except Exception as e:
33
- raise EnvironmentError(f"Erro inesperado ao verificar a instalação do Java: {str(e)}")
34
 
35
  def install_tools():
36
  baksmali_path = "/usr/local/bin/baksmali.jar"
@@ -39,64 +45,54 @@ def install_tools():
39
  check_java()
40
 
41
  if not os.path.exists(baksmali_path):
42
- print("Instalando o Baksmali...")
43
- subprocess.run(
44
- [
45
- "curl",
46
- "-L",
47
- "-o",
48
- baksmali_path,
49
- "https://bitbucket.org/JesusFreke/smali/downloads/baksmali-2.5.2.jar",
50
- ],
51
- check=True,
52
- )
53
 
54
  jadx_zip_path = "/usr/local/bin/jadx.zip"
55
  if not os.path.exists(jadx_path):
56
- print("Instalando o JADX...")
57
- subprocess.run(
58
- [
59
- "curl",
60
- "-L",
61
- "-o",
62
- jadx_zip_path,
63
- "https://github.com/skylot/jadx/releases/download/v1.4.7/jadx-1.4.7.zip",
64
- ],
65
- check=True,
66
- )
67
  subprocess.run(["unzip", "-o", jadx_zip_path, "-d", "/usr/local/bin/jadx"], check=True)
68
  if os.path.exists(jadx_path):
69
  subprocess.run(["chmod", "+x", jadx_path], check=True)
70
- else:
71
- raise FileNotFoundError("Executável do JADX não encontrado no caminho esperado.")
72
 
73
  def decompile_apk(apk_file):
74
- if apk_file is None:
75
- return "Nenhum arquivo enviado. Por favor, envie um arquivo APK."
76
 
77
  temp_apk_path = apk_file.name
78
  output_dir = tempfile.mkdtemp()
79
  try:
 
80
  smali_output = os.path.join(output_dir, "smali")
81
- subprocess.run(
82
- ["java", "-jar", "/usr/local/bin/baksmali.jar", "d", temp_apk_path, "-o", smali_output],
83
- check=True,
84
- )
85
 
 
86
  java_output = os.path.join(output_dir, "java")
87
- subprocess.run(
88
- ["/usr/local/bin/jadx/bin/jadx", "-d", java_output, temp_apk_path],
89
- check=True,
90
- )
91
 
 
92
  smali_files = {}
 
 
93
  for root, _, files in os.walk(smali_output):
94
  for file in files:
95
  if file.endswith(".smali"):
96
  with open(os.path.join(root, file), "r") as f:
97
  smali_files[file] = f.read()
98
 
99
- java_files = {}
100
  for root, _, files in os.walk(java_output):
101
  for file in files:
102
  if file.endswith(".java"):
@@ -106,100 +102,143 @@ def decompile_apk(apk_file):
106
  apk_context["smali"] = smali_files
107
  apk_context["java"] = java_files
108
 
109
- return f"Decompilação bem-sucedida. Extraídos {len(smali_files)} arquivos Smali e {len(java_files)} arquivos Java."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
  except Exception as e:
112
- return f"Erro durante a decompilação: {str(e)}"
113
-
114
- def process_with_bloom(user_message):
115
- inputs = bloom_tokenizer(user_message, return_tensors="pt", max_length=512, truncation=True)
116
- with torch.no_grad():
117
- outputs = bloom_model.generate(
118
- inputs["input_ids"],
119
- max_length=256,
120
- num_return_sequences=1,
121
- temperature=0.7,
122
- top_p=0.9
 
 
 
 
123
  )
124
- processed_query = bloom_tokenizer.decode(outputs[0], skip_special_tokens=True)
125
- return processed_query
126
 
127
- def get_embeddings(text):
128
- return indexing_model.encode(text, convert_to_tensor=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
  def query_apk_chat(user_message):
131
  if not apk_context["smali"] and not apk_context["java"]:
132
- return "Nenhum APK decompilado disponível. Por favor, envie e decompile um APK primeiro."
133
 
134
  try:
135
- # Processar a mensagem do usuário com BLOOM
136
- processed_message = process_with_bloom(user_message)
137
-
138
- # Obter embedding da query processada
139
- query_embedding = get_embeddings(processed_message)
 
140
 
141
- # Combinar Smali e Java para análise
142
- combined_texts = [(k, v) for k, v in apk_context["smali"].items()] + [(k, v) for k, v in apk_context["java"].items()]
143
- combined_embeddings = indexing_model.encode([v for _, v in combined_texts], convert_to_tensor=True)
144
 
145
- # Encontrar códigos relevantes
146
- scores = util.pytorch_cos_sim(query_embedding, combined_embeddings).squeeze(0)
147
- top_k = min(3, len(combined_texts))
148
- top_indices = torch.topk(scores, k=top_k).indices
149
 
150
- response = ""
151
- for idx in top_indices:
152
- file_name, relevant_code = combined_texts[idx.item()]
 
 
 
 
 
 
 
 
 
 
153
 
154
- # Gerar explicação usando CodeBERT com a query processada
155
- explanation_prompt = f"Query processada: {processed_message}\nExplique o código:\n{relevant_code[:500]}"
156
- inputs = tokenizer(explanation_prompt, return_tensors="pt", max_length=512, truncation=True, padding=True)
157
 
158
- with torch.no_grad():
159
- outputs = codebert_model.generate(
160
- inputs["input_ids"],
161
- max_length=1024,
162
- num_return_sequences=1,
163
- temperature=0.7,
164
- top_p=0.9
165
- )
166
- explanation = tokenizer.decode(outputs[0], skip_special_tokens=True)
167
-
168
- response += f"\n\n**Arquivo:** {file_name}\n"
169
- response += f"**Código:**\n```\n{relevant_code[:1000]}\n```\n"
170
- response += f"**Explicação:**\n{explanation}\n"
171
- response += "-" * 80
172
-
173
- return response
174
 
175
  except Exception as e:
176
- return f"Erro durante a análise: {str(e)}"
177
 
178
- # Configuração e inicialização
179
  install_tools()
180
 
181
- # Interfaces Gradio
182
- apk_upload_interface = gr.Interface(
183
  fn=decompile_apk,
184
- inputs=gr.File(label="Enviar arquivo APK", file_types=[".apk"]),
185
  outputs="text",
186
- title="Analisador de APK com BLOOM + CodeBERT",
187
- description="Envie um arquivo APK para análise avançada com IA."
188
  )
189
 
190
  chat_interface = gr.Interface(
191
  fn=query_apk_chat,
192
- inputs=gr.Textbox(lines=3, placeholder="Faça uma pergunta sobre o código do APK..."),
193
- outputs=gr.Textbox(lines=20, label="Análise Detalhada"),
194
- title="Chat Avançado com APK",
195
- description="Análise inteligente do código usando BLOOM para processamento de linguagem natural e CodeBERT para análise técnica."
196
  )
197
 
198
  # Interface combinada
199
  iface = gr.TabbedInterface(
200
- [apk_upload_interface, chat_interface],
201
- ["Enviar & Analisar", "Análise Inteligente"]
202
  )
203
 
204
- # Iniciar a interface
205
- iface.launch()
 
7
  import torch
8
  import gradio as gr
9
 
10
+ # Inicialização dos modelos com tratamento de erros
11
+ try:
12
+ # BLOOM para compreensão de linguagem natural
13
+ bloom_tokenizer = BloomTokenizerFast.from_pretrained("bigscience/bloom-560m")
14
+ bloom_model = BloomForCausalLM.from_pretrained("bigscience/bloom-560m")
15
+ bloom_model.eval()
16
+
17
+ # Modelo de indexação
18
+ indexing_model = SentenceTransformer("all-MiniLM-L6-v2")
19
+
20
+ # CodeBERT para análise de código
21
+ tokenizer = RobertaTokenizer.from_pretrained("microsoft/codebert-base")
22
+ codebert_model = RobertaForCausalLM.from_pretrained("microsoft/codebert-base")
23
+ codebert_model.eval()
24
+
25
+ except Exception as e:
26
+ print(f"Erro ao carregar modelos: {str(e)}")
27
+ raise
28
+
29
+ # Contexto global
30
  apk_context = {"smali": {}, "java": {}, "info": ""}
31
 
32
  def check_java():
 
34
  result = subprocess.run(["java", "-version"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
35
  print("Java está disponível:", result.stderr.decode())
36
  except FileNotFoundError:
37
+ raise EnvironmentError("Java não está instalado")
38
  except Exception as e:
39
+ raise EnvironmentError(f"Erro ao verificar Java: {str(e)}")
40
 
41
  def install_tools():
42
  baksmali_path = "/usr/local/bin/baksmali.jar"
 
45
  check_java()
46
 
47
  if not os.path.exists(baksmali_path):
48
+ print("Instalando Baksmali...")
49
+ subprocess.run([
50
+ "curl", "-L", "-o", baksmali_path,
51
+ "https://bitbucket.org/JesusFreke/smali/downloads/baksmali-2.5.2.jar"
52
+ ], check=True)
 
 
 
 
 
 
53
 
54
  jadx_zip_path = "/usr/local/bin/jadx.zip"
55
  if not os.path.exists(jadx_path):
56
+ print("Instalando JADX...")
57
+ subprocess.run([
58
+ "curl", "-L", "-o", jadx_zip_path,
59
+ "https://github.com/skylot/jadx/releases/download/v1.4.7/jadx-1.4.7.zip"
60
+ ], check=True)
 
 
 
 
 
 
61
  subprocess.run(["unzip", "-o", jadx_zip_path, "-d", "/usr/local/bin/jadx"], check=True)
62
  if os.path.exists(jadx_path):
63
  subprocess.run(["chmod", "+x", jadx_path], check=True)
 
 
64
 
65
  def decompile_apk(apk_file):
66
+ if not apk_file:
67
+ return "Nenhum arquivo enviado"
68
 
69
  temp_apk_path = apk_file.name
70
  output_dir = tempfile.mkdtemp()
71
  try:
72
+ # Smali
73
  smali_output = os.path.join(output_dir, "smali")
74
+ subprocess.run([
75
+ "java", "-jar", "/usr/local/bin/baksmali.jar",
76
+ "d", temp_apk_path, "-o", smali_output
77
+ ], check=True)
78
 
79
+ # JADX
80
  java_output = os.path.join(output_dir, "java")
81
+ subprocess.run([
82
+ "/usr/local/bin/jadx/bin/jadx",
83
+ "-d", java_output, temp_apk_path
84
+ ], check=True)
85
 
86
+ # Coletar arquivos
87
  smali_files = {}
88
+ java_files = {}
89
+
90
  for root, _, files in os.walk(smali_output):
91
  for file in files:
92
  if file.endswith(".smali"):
93
  with open(os.path.join(root, file), "r") as f:
94
  smali_files[file] = f.read()
95
 
 
96
  for root, _, files in os.walk(java_output):
97
  for file in files:
98
  if file.endswith(".java"):
 
102
  apk_context["smali"] = smali_files
103
  apk_context["java"] = java_files
104
 
105
+ return f"Decompilação concluída: {len(smali_files)} arquivos Smali, {len(java_files)} arquivos Java"
106
+
107
+ except Exception as e:
108
+ return f"Erro na decompilação: {str(e)}"
109
+
110
+ def process_with_bloom(text):
111
+ try:
112
+ # Preparar input
113
+ inputs = bloom_tokenizer(
114
+ text,
115
+ return_tensors="pt",
116
+ max_length=512,
117
+ truncation=True,
118
+ padding=True
119
+ )
120
+
121
+ # Gerar resposta
122
+ with torch.no_grad():
123
+ outputs = bloom_model.generate(
124
+ inputs["input_ids"],
125
+ max_length=200,
126
+ num_return_sequences=1,
127
+ temperature=0.7,
128
+ pad_token_id=bloom_tokenizer.pad_token_id
129
+ )
130
+
131
+ # Decodificar resposta
132
+ processed = bloom_tokenizer.decode(outputs[0], skip_special_tokens=True)
133
+ return processed
134
 
135
  except Exception as e:
136
+ print(f"Erro no processamento BLOOM: {str(e)}")
137
+ return text
138
+
139
+ def analyze_with_codebert(code_text, query):
140
+ try:
141
+ # Preparar prompt
142
+ prompt = f"Query: {query}\nCódigo: {code_text[:500]}"
143
+
144
+ # Tokenizar com padding adequado
145
+ inputs = tokenizer(
146
+ prompt,
147
+ return_tensors="pt",
148
+ max_length=512,
149
+ truncation=True,
150
+ padding="max_length"
151
  )
 
 
152
 
153
+ # Gerar análise
154
+ with torch.no_grad():
155
+ outputs = codebert_model.generate(
156
+ inputs["input_ids"],
157
+ max_length=200,
158
+ num_return_sequences=1,
159
+ pad_token_id=tokenizer.pad_token_id
160
+ )
161
+
162
+ # Decodificar resposta
163
+ analysis = tokenizer.decode(outputs[0], skip_special_tokens=True)
164
+ return analysis
165
+
166
+ except Exception as e:
167
+ print(f"Erro na análise CodeBERT: {str(e)}")
168
+ return "Não foi possível analisar o código"
169
 
170
  def query_apk_chat(user_message):
171
  if not apk_context["smali"] and not apk_context["java"]:
172
+ return "Nenhum APK decompilado disponível"
173
 
174
  try:
175
+ # Processar query com BLOOM
176
+ processed_query = process_with_bloom(user_message)
177
+
178
+ # Preparar todos os códigos
179
+ all_codes = []
180
+ all_files = []
181
 
182
+ for file, code in apk_context["java"].items():
183
+ all_codes.append(code)
184
+ all_files.append(("java", file))
185
 
186
+ for file, code in apk_context["smali"].items():
187
+ all_codes.append(code)
188
+ all_files.append(("smali", file))
 
189
 
190
+ # Calcular embeddings
191
+ query_embedding = indexing_model.encode(processed_query, convert_to_tensor=True)
192
+ code_embeddings = indexing_model.encode(all_codes, convert_to_tensor=True)
193
+
194
+ # Encontrar matches
195
+ similarities = util.pytorch_cos_sim(query_embedding, code_embeddings)[0]
196
+ top_k = min(3, len(all_codes))
197
+ best_matches = torch.topk(similarities, k=top_k)
198
+
199
+ response = []
200
+ for score, idx in zip(best_matches.values, best_matches.indices):
201
+ file_type, file_name = all_files[idx]
202
+ code = all_codes[idx]
203
 
204
+ # Análise do código
205
+ analysis = analyze_with_codebert(code, processed_query)
 
206
 
207
+ response.append(f"\nArquivo ({file_type}): {file_name}")
208
+ response.append(f"Relevância: {score:.2f}")
209
+ response.append(f"Código:\n{code[:500]}...")
210
+ response.append(f"Análise:\n{analysis}\n")
211
+ response.append("-" * 80)
212
+
213
+ return "\n".join(response)
 
 
 
 
 
 
 
 
 
214
 
215
  except Exception as e:
216
+ return f"Erro na análise: {str(e)}"
217
 
218
+ # Configuração Gradio
219
  install_tools()
220
 
221
+ upload_interface = gr.Interface(
 
222
  fn=decompile_apk,
223
+ inputs=gr.File(label="APK File", file_types=[".apk"]),
224
  outputs="text",
225
+ title="APK Analyzer",
226
+ description="Upload an APK file for analysis"
227
  )
228
 
229
  chat_interface = gr.Interface(
230
  fn=query_apk_chat,
231
+ inputs=gr.Textbox(lines=3, placeholder="Ask about the APK code..."),
232
+ outputs=gr.Textbox(lines=20),
233
+ title="Code Analysis Chat",
234
+ description="AI-powered code analysis"
235
  )
236
 
237
  # Interface combinada
238
  iface = gr.TabbedInterface(
239
+ [upload_interface, chat_interface],
240
+ ["Upload APK", "Analyze Code"]
241
  )
242
 
243
+ if __name__ == "__main__":
244
+ iface.launch()