import os import sys from llama_cpp import Llama from util import create_prompt, is_bytecode_empty llm = Llama( model_path="sentient-simulations-pydecompiler-3.7-6.7b-v0.9-q8_0.gguf", n_gpu_layers=-1, last_n_tokens_size=0, n_ctx=16384, ) def decompile_pycb(file_path: str, force_overwrite=False): source_code_path = file_path[:-2] with open(file_path, "r") as bytecode_file: bytecode = bytecode_file.read() if is_bytecode_empty(bytecode): return "# Empty file" prompt = create_prompt(bytecode) tokens = llm.tokenize(text=prompt.encode("utf-8"), add_bos=True, special=True) if len(tokens) > 15000: return None output = llm(prompt, max_tokens=None, echo=False) result = output["choices"][0] source_code = result.get("text").strip() finish_reason = result.get("finish_reason") source_code += f"\n\n# Finish Reason: {finish_reason}\n" with open(source_code_path, "w") as source_code_file: source_code_file.write(source_code) return source_code if __name__ == "__main__": path = sys.argv[1] if os.path.isfile(path): output = decompile_pycb(path) print(output) elif os.path.isdir(path): for root, dirs, files in os.walk(path): for file in files: if file.endswith(".pycb") and not os.path.exists( os.path.join(root, file[:-2]) ): file_path = os.path.join(root, file) output = decompile_pycb(file_path) if output: print(f"Decompiled {file_path}")