unscramble.ts
1 //! This will take an Arkose `enforcement.js` file and unscramble 2 //! all the strings so it gets finally a bit readable. 3 4 import { 5 AssignmentExpression, 6 ForStatement, 7 FunctionExpression, 8 Identifier, 9 Node, 10 SimpleCallExpression, 11 } from "estree"; 12 13 import { 14 traverse, 15 is, 16 BindingPathT, 17 BindingKind, 18 Binding, 19 NodePath, 20 } from "estree-toolkit"; 21 22 import { parseScript } from "meriyah"; 23 import { generate } from "astring"; 24 25 const original = process.argv[2]; 26 const destination = process.argv[3]; 27 28 if (!original || !destination) { 29 console.log(`Usage: bun unscramble.ts original.js destination.js`); 30 process.exit(1); 31 } 32 33 const script = await Bun.file(original).text(); 34 const ast = parseScript(script); 35 36 traverse(ast, { 37 $: { scope: true }, 38 39 CallExpression($) { 40 const ret = { call: $ } as { 41 constants_fn: Binding<BindingKind>; 42 constants: Array<string>; 43 constants_scrambled: Array<string>; 44 scrambler_fn: FunctionExpression; 45 scrambler: ForStatement; 46 call: NodePath<SimpleCallExpression, Node>; 47 offset_fn: BindingPathT<BindingKind>; 48 offset: number; 49 index: number; 50 }; 51 52 read_index: { 53 /* 54 We're looking for function calls that looks like the following. 55 56 O(100); // the easy one... 57 58 var S = 50; 59 O(50); // the medium one 60 61 var jt; 62 ((jt = 1187), O(jt)) // very hard one! 63 64 Should be recursive, of course. 65 This will give us the index of the string to look for in constants. 66 */ 67 68 const path = ret.call; 69 if (!path.node || !path.scope) return; 70 71 const args = path.node.arguments; 72 if (args.length !== 1) return; 73 74 const arg = args[0]; 75 76 if (is.literal(arg) && typeof arg.value === "number") { 77 ret.index = arg.value; 78 } else if (is.identifier(arg)) { 79 const recursive = (bind: Binding<BindingKind>) => { 80 const node = bind.path.node; 81 if (!is.variableDeclarator(node) || !is.identifier(node.id)) return; 82 83 if (node.init === null) { 84 const assign = bind.constantViolations.find((v) => 85 is.assignmentExpression(v.parent) 86 ); 87 88 if (!assign || !assign.parent) return; 89 if (!is.assignmentExpression(assign.parent)) return; 90 91 if (is.identifier(assign.parent.right)) { 92 if (!assign.scope) return; 93 94 const bind = assign.scope.getBinding(assign.parent.right.name); 95 if (!bind) return; 96 97 return recursive(bind); 98 } else if ( 99 is.literal(assign.parent.right) && 100 typeof assign.parent.right.value === "number" 101 ) { 102 return assign.parent.right.value; 103 } 104 } else if ( 105 is.literal(node.init) && 106 typeof node.init.value === "number" 107 ) { 108 return node.init.value; 109 } 110 }; 111 112 const decl = path.scope.getBinding(arg.name); 113 if (!decl) return; 114 115 const index = recursive(decl); 116 if (index === void 0) return; 117 ret.index = index; 118 } else return; 119 } 120 121 get_offset_fn: { 122 /* 123 This is what we're looking for here. 124 125 function j(t, e) { 126 var r = N(); 127 return ( 128 (j = function (t, e) { 129 return r[(t -= 477)]; 130 }), 131 j(t, e) 132 ); 133 } 134 135 `j` is the function name we'll grab from binding. 136 Sometimes it is wrapped behind an identifier, 137 138 rE = j; 139 rE(200); 140 141 and even wrapped into multiple identifiers! 142 This should be recursive. 143 */ 144 145 const path = ret.call; 146 if (!path.node || !path.scope) return; 147 if (!is.identifier(path.node.callee)) return; 148 149 const fn = path.scope.getBinding(path.node.callee.name); 150 if (!fn) return; 151 152 if (is.functionDeclaration(fn.path.node)) { 153 if (fn.path.node.params.length !== 2) return; 154 ret.offset_fn = fn.path; 155 } else if (is.variableDeclarator(fn.path.node)) { 156 const recursive = (bind: Binding<BindingKind>) => { 157 const node = bind.path.node; 158 if (!is.variableDeclarator(node) || !is.identifier(node.id)) return; 159 160 let init: Identifier | undefined; 161 162 if (node.init === null) { 163 const assign = bind.constantViolations.find((v) => 164 is.assignmentExpression(v.parent) 165 ); 166 167 if (!assign || !assign.parent) return; 168 if ( 169 !is.assignmentExpression(assign.parent) || 170 !is.identifier(assign.parent.right) 171 ) 172 return; 173 174 init = assign.parent.right; 175 } else if (is.identifier(node.init)) { 176 init = node.init; 177 } 178 179 if (!init) return; 180 181 const ref = bind.scope.getBinding(init.name); 182 if (!ref) return; 183 184 if (is.functionDeclaration(ref.path.node)) { 185 if (ref.path.node.params.length !== 2) return; 186 return ref.path; 187 } else if (is.variableDeclarator(ref.path)) { 188 return recursive(ref); 189 } else return; 190 }; 191 192 const offset_fn = recursive(fn); 193 if (!offset_fn) return; 194 ret.offset_fn = offset_fn; 195 } else return; 196 } 197 198 read_constants: { 199 /* 200 We're looking at the function declaration 201 of the following function call. 202 203 var r = N(); 204 205 This contains every constants needed to retrieve the 206 constant with the index we got earlier. 207 */ 208 209 const path = ret.offset_fn; 210 if (!is.functionDeclaration(path.node) || !path.scope) return; 211 const decls = path.node.body.body[0]; 212 213 if ( 214 !is.variableDeclaration(decls) || 215 decls.kind !== "var" || 216 decls.declarations.length !== 1 217 ) 218 return; 219 220 const decl = decls.declarations[0]; 221 if ( 222 !is.variableDeclarator(decl) || 223 !is.callExpression(decl.init) || 224 !is.identifier(decl.init.callee) 225 ) 226 return; 227 228 const fn = path.scope.getBinding(decl.init.callee.name); 229 if (!fn || !is.functionDeclaration(fn.path.node)) return; 230 ret.constants_fn = fn; 231 232 /* 233 This is what we have right now. 234 Let's look for the first variable in the function body. 235 236 function N() { 237 var t = [ 238 "...", 239 "...", 240 "..." 241 ]; 242 return (N = function () { 243 return t; 244 })(); 245 } 246 247 It should contain all the constants. 248 */ 249 250 const inner = fn.path.node.body.body[0]; 251 if ( 252 !is.variableDeclaration(inner) || 253 !is.variableDeclarator(inner.declarations[0]) 254 ) 255 return; 256 257 const init = inner.declarations[0].init; 258 if (!is.arrayExpression(init)) return; 259 260 ret.constants = init.elements.map((el) => { 261 if (!is.literal(el)) throw new Error("not literal in constants array"); 262 return el.value; 263 }) as Array<string>; 264 } 265 266 read_offset: { 267 /* 268 We're extracting the offset from this return function 269 in the offset function. 270 271 return ( 272 (j = function (t, e) { 273 return r[(t -= 477)]; 274 }), 275 j(t, e) 276 ); 277 278 annnndddd sometimes it can also look like this... 279 280 return (h = function (t, e) { 281 return r[(t -= 416)]; 282 })(t, e); 283 */ 284 285 const path = ret.offset_fn; 286 if (!is.functionDeclaration(path.node) || !path.scope) return; 287 288 let rtn = path.node.body.body[1]; 289 if (!is.returnStatement(rtn)) return; 290 291 let assign_expression: AssignmentExpression | undefined; 292 293 if (is.sequenceExpression(rtn.argument)) { 294 const expr = rtn.argument.expressions[0]; 295 if (!is.assignmentExpression(expr)) return; 296 assign_expression = expr; 297 } else if (is.callExpression(rtn.argument)) { 298 const expr = rtn.argument.callee; 299 if (!is.assignmentExpression(expr)) return; 300 assign_expression = expr; 301 } 302 303 if (!assign_expression) return; 304 305 const fn_expression = assign_expression.right; 306 if (!is.functionExpression(fn_expression)) return; 307 308 rtn = fn_expression.body.body[0]; 309 if (!is.returnStatement(rtn) || !is.memberExpression(rtn.argument)) 310 return; 311 312 const t_assign_expression = rtn.argument.property; 313 if (!is.assignmentExpression(t_assign_expression)) return; 314 315 const right = t_assign_expression.right; 316 if (!is.literal(right) || typeof right.value !== "number") return; 317 318 ret.offset = right.value; 319 } 320 321 find_scrambler: { 322 /* 323 For security purposes - I guess - they scramble the array of constants 324 before using it. It is defined in a function that calls itself. 325 It looks like this. 326 327 !(function (t, e) { 328 for ( ... ) 329 ^^^ we're extracting this for loop, 330 we'll interpret it later! 331 })(Be) 332 333 Where `Be` is the function containing the constants array. 334 They only take the first item and push it to the end if the 335 condition is truthy. 336 337 Okay, but see the `!` at the beginning? 338 Sometimes it is not there... 339 */ 340 341 const found = ret.constants_fn.references.find((ref) => { 342 if (!is.callExpression(ref.parentPath)) return false; 343 344 // Add the extra check if find an unary expression on the parent. 345 if (is.unaryExpression(ref.parentPath.parentPath)) { 346 if (ref.parentPath.parentPath.node?.operator !== "!") return false; 347 } 348 349 return true; 350 }); 351 352 if (!found) return; 353 const call_expression = found.parentPath; 354 355 // NOTE: we recheck for types... 356 if (!is.callExpression(call_expression) || !call_expression.node) return; 357 358 const function_expression = call_expression.node.callee; 359 if (!is.functionExpression(function_expression)) return; 360 361 const for_statement = function_expression.body.body[0]; 362 if (!is.forStatement(for_statement)) return; 363 364 ret.scrambler_fn = function_expression; 365 ret.scrambler = for_statement; 366 } 367 368 scramble_constants: { 369 /* 370 for ( 371 var r = 469, 372 n = 457, 373 o = 464, 374 i = 481, 375 a = 471, 376 c = 462, 377 u = 460, 378 s = 461, 379 f = 478, 380 l = 470, 381 p = 483, 382 v = 472, 383 h = Fr, -> (i) => ret.constants[i - ret.offset] 384 d = t(); -> ret.constants 385 ; 386 387 ) 388 try { 389 if ( 390 274856 === 391 (parseInt(h(r)) / 1) * (parseInt(h(n)) / 2) + 392 parseInt(h(o)) / 3 + 393 (-parseInt(h(i)) / 4) * (-parseInt(h(a)) / 5) + 394 (parseInt(h(c)) / 6) * (-parseInt(h(u)) / 7) + 395 -parseInt(h(s)) / 8 + 396 (parseInt(h(f)) / 9) * (-parseInt(h(l)) / 10) + 397 (parseInt(h(p)) / 11) * (parseInt(h(v)) / 12) 398 ) 399 break; 400 d.push(d.shift()); 401 } catch (t) { 402 d.push(d.shift()); 403 } 404 405 Since we're lazy, we'll evaluate this code! 406 Eventually, we'll to find the variables not initialized with numbers 407 to find which one is the offset function and the which one is the constants. 408 */ 409 410 const decls = ret.scrambler.init; 411 if (!is.variableDeclaration(decls)) return; 412 413 let constants_name: string | undefined; 414 let offset_fn_name: string | undefined; 415 416 for (const decl of decls.declarations) { 417 if (is.callExpression(decl.init) && is.identifier(decl.init.callee)) { 418 constants_name = decl.init.callee.name; 419 } else if (is.identifier(decl.init)) { 420 offset_fn_name = decl.init.name; 421 } 422 } 423 424 if (!constants_name || !offset_fn_name) return; 425 426 ret.constants_scrambled = eval(` 427 const __constants = ${JSON.stringify(ret.constants)}; 428 429 (function() { 430 const ${constants_name} = () => __constants; 431 const ${offset_fn_name} = (i) => __constants[i - ${ret.offset}]; 432 ${generate(ret.scrambler)} 433 })(); 434 435 __constants; 436 `); 437 } 438 439 assert_not_in_protected_fn: { 440 /* 441 Simply because if we start replacing indexes from the scrambler 442 or anything else that this algorithm uses, it'll work for the 443 first iteration but will break on the next ones. 444 */ 445 446 const parent = ret.call.getFunctionParent(); 447 if (parent && parent.node === ret.scrambler_fn) return; 448 } 449 450 replace_index: { 451 ret.call.replaceWith({ 452 type: "Literal", 453 value: ret.constants_scrambled[ret.index - ret.offset], 454 }); 455 } 456 }, 457 }); 458 459 // Write a first time the file with syntax mangle. 460 await Bun.write(destination, generate(ast)); 461 462 // Demangle the syntax by building the script with itself 463 // so Bun will automatically simplify the syntax and strings. 464 const { 465 outputs: [minified], 466 } = await Bun.build({ 467 entrypoints: [destination], 468 minify: { 469 syntax: true, 470 keepNames: true, 471 whitespace: false, 472 identifiers: true, 473 }, 474 }); 475 476 // Overwrite the file. 477 await Bun.write(destination, await minified.text());