fix rewriter

This commit is contained in:
velzie 2024-07-27 16:40:50 -04:00
parent 0e37a7f267
commit 396170e4ea
No known key found for this signature in database
GPG key ID: 048413F95F0DDE1F
7 changed files with 115 additions and 43 deletions

View file

@ -6,6 +6,13 @@ edition = "2021"
[lib]
crate-type = ["cdylib"]
[profile.speed]
inherits = "release"
opt-level = 3
[profile.release]
opt-level = "z"
[dependencies]
console_error_panic_hook = "0.1.7"
js-sys = "0.3.69"

View file

@ -1,4 +1,4 @@
RUSTFLAGS='-C target-feature=+atomics,+bulk-memory -Zlocation-detail=none' cargo build --lib --target wasm32-unknown-unknown -Z build-std=panic_abort,std -Z build-std-features=panic_immediate_abort,optimize_for_size --release
RUSTFLAGS='-C target-feature=+atomics,+bulk-memory -Zlocation-detail=none' cargo build --lib --target wasm32-unknown-unknown -Z build-std=panic_abort,std -Z build-std-features=panic_immediate_abort --release
wasm-bindgen --weak-refs --target web --out-dir out/ target/wasm32-unknown-unknown/release/rewriter.wasm
sed -i 's/import.meta.url/""/g' out/rewriter.js
@ -7,7 +7,7 @@ cd ..
WASM=rewriter/out/rewriter_bg.wasm
time wasm-opt -Oz --vacuum --dce --enable-threads --enable-bulk-memory --enable-simd "$WASM" -o rewriter/out/optimized.wasm
time wasm-opt -O4 --vacuum --dce --enable-threads --enable-bulk-memory --enable-simd "$WASM" -o rewriter/out/optimized.wasm
# cp "$WASM" rewriter/out/optimized.wasm
echo -n "self.WASM = '" > static/wasm.js

View file

@ -30,22 +30,42 @@ fn create_encode_function(encode: Function) -> EncodeFn {
}
#[wasm_bindgen]
pub fn rewrite_js(js: &str, url: &str, encode: Function) -> Vec<u8> {
pub fn rewrite_js(
js: &str,
url: &str,
prefix: String,
encode: Function,
wrapfn: String,
importfn: String,
) -> Vec<u8> {
rewrite(
js,
Url::from_str(url).unwrap(),
prefix,
create_encode_function(encode),
wrapfn,
importfn,
)
}
#[wasm_bindgen]
pub fn rewrite_js_from_arraybuffer(js: &[u8], url: &str, encode: Function) -> Vec<u8> {
pub fn rewrite_js_from_arraybuffer(
js: &[u8],
url: &str,
prefix: String,
encode: Function,
wrapfn: String,
importfn: String,
) -> Vec<u8> {
// we know that this is a valid utf-8 string
let js = unsafe { std::str::from_utf8_unchecked(js) };
rewrite(
js,
Url::from_str(url).unwrap(),
prefix,
create_encode_function(encode),
wrapfn,
importfn,
)
}

View file

@ -112,7 +112,10 @@ fn main() -> std::io::Result<()> {
rewrite(
&source_text,
Url::from_str("https://google.com/glorngle/si.js").unwrap(),
Box::new(encode_string)
"/scrammedjet/".to_string(),
Box::new(encode_string),
"$wrap".to_string(),
"$import".to_string(),
)
.as_slice()
)

View file

@ -31,6 +31,9 @@ pub type EncodeFn = Box<dyn Fn(String) -> String>;
struct Rewriter {
jschanges: Vec<JsChange>,
base: Url,
prefix: String,
wrapfn: String,
importfn: String,
encode: EncodeFn,
}
impl Rewriter {
@ -39,7 +42,7 @@ impl Rewriter {
let urlencoded = (self.encode)(url.to_string());
format!("\"/scramjet/{}\"", urlencoded)
format!("\"{}{}\"", self.prefix, urlencoded)
}
}
@ -48,14 +51,14 @@ impl<'a> Visit<'a> for Rewriter {
if UNSAFE_GLOBALS.contains(&it.name.to_string().as_str()) {
self.jschanges.push(JsChange::GenericChange {
span: it.span,
text: format!("(globalThis.$s({}))", it.name),
text: format!("({}({}))", self.wrapfn, it.name),
});
}
}
fn visit_this_expression(&mut self, it: &oxc_ast::ast::ThisExpression) {
self.jschanges.push(JsChange::GenericChange {
span: it.span,
text: "(globalThis.$s(this))".to_string(),
text: format!("({}(this))", self.wrapfn),
});
}
@ -71,7 +74,7 @@ impl<'a> Visit<'a> for Rewriter {
fn visit_import_expression(&mut self, it: &oxc_ast::ast::ImportExpression<'a>) {
self.jschanges.push(JsChange::GenericChange {
span: Span::new(it.span.start, it.span.start + 6),
text: format!("(globalThis.$sImport(\"{}\"))", self.base),
text: format!("({}(\"{}\"))", self.importfn, self.base),
});
walk::walk_import_expression(self, it);
}
@ -106,7 +109,7 @@ impl<'a> Visit<'a> for Rewriter {
if UNSAFE_GLOBALS.contains(&s.name.to_string().as_str()) && p.shorthand {
self.jschanges.push(JsChange::GenericChange {
span: s.span,
text: format!("{}: (globalThis.$s({}))", s.name, s.name),
text: format!("{}: ({}({}))", s.name, self.wrapfn, s.name),
});
return;
}
@ -139,7 +142,7 @@ impl<'a> Visit<'a> for Rewriter {
}
_ => {}
}
walk::walk_expression(self, &it.right);
walk::walk_assignment_expression(self, &it);
}
}
@ -204,7 +207,14 @@ const UNSAFE_GLOBALS: [&str; 8] = [
"document",
];
pub fn rewrite(js: &str, url: Url, encode: EncodeFn) -> Vec<u8> {
pub fn rewrite(
js: &str,
url: Url,
prefix: String,
encode: EncodeFn,
wrapfn: String,
importfn: String,
) -> Vec<u8> {
let allocator = Allocator::default();
let source_type = SourceType::default();
let ret = Parser::new(&allocator, js, source_type).parse();
@ -222,7 +232,10 @@ pub fn rewrite(js: &str, url: Url, encode: EncodeFn) -> Vec<u8> {
let mut ast_pass = Rewriter {
jschanges: Vec::new(),
base: url,
prefix,
encode,
wrapfn,
importfn,
};
ast_pass.visit_program(&program);
@ -294,31 +307,13 @@ pub fn rewrite(js: &str, url: Url, encode: EncodeFn) -> Vec<u8> {
let start = entirespan.start as usize;
buffer.extend_from_slice(js[offset..start].as_bytes());
let opstr = match op {
AssignmentOperator::Assign => "=",
AssignmentOperator::Addition => "+=",
AssignmentOperator::Subtraction => "-=",
AssignmentOperator::Multiplication => "*=",
AssignmentOperator::Division => "/=",
AssignmentOperator::Remainder => "%=",
AssignmentOperator::Exponential => "**=",
AssignmentOperator::ShiftLeft => "<<=",
AssignmentOperator::ShiftRight => ">>=",
AssignmentOperator::ShiftRightZeroFill => ">>>=",
AssignmentOperator::BitwiseAnd => "&=",
AssignmentOperator::BitwiseXOR => "^=",
AssignmentOperator::BitwiseOR => "|=",
AssignmentOperator::LogicalAnd => "&&=",
AssignmentOperator::LogicalOr => "||=",
AssignmentOperator::LogicalNullish => "??=",
};
buffer.extend_from_slice(
let opstr = buffer.extend_from_slice(
format!(
"((t)=>$tryset({},\"{}\",t)||{}=t)({})",
"((t)=>$scramjet$tryset({},\"{}\",t)||{}{}t)({})",
name,
opstr,
fmt_op(*op),
name,
fmt_op(*op),
&js[rhsspan.start as usize..rhsspan.end as usize]
)
.as_bytes(),
@ -333,3 +328,24 @@ pub fn rewrite(js: &str, url: Url, encode: EncodeFn) -> Vec<u8> {
buffer
}
fn fmt_op(op: AssignmentOperator) -> &'static str {
match op {
AssignmentOperator::Assign => "=",
AssignmentOperator::Addition => "+=",
AssignmentOperator::Subtraction => "-=",
AssignmentOperator::Multiplication => "*=",
AssignmentOperator::Division => "/=",
AssignmentOperator::Remainder => "%=",
AssignmentOperator::Exponential => "**=",
AssignmentOperator::ShiftLeft => "<<=",
AssignmentOperator::ShiftRight => ">>=",
AssignmentOperator::ShiftRightZeroFill => ">>>=",
AssignmentOperator::BitwiseAnd => "&=",
AssignmentOperator::BitwiseXOR => "^=",
AssignmentOperator::BitwiseOR => "|=",
AssignmentOperator::LogicalAnd => "&&=",
AssignmentOperator::LogicalOr => "||=",
AssignmentOperator::LogicalNullish => "??=",
}
}

View file

@ -6,15 +6,23 @@ export const issw = "ServiceWorkerGlobalScope" in self;
export const isdedicated = "DedicatedWorkerGlobalScope" in self;
export const isshared = "SharedWorkerGlobalScope" in self;
export const wrapfn = "$scramjet$wrap";
export const trysetfn = "$scramjet$tryset";
export const importfn = "$scramjet$import";
export default function (client: ScramjetClient, self: typeof globalThis) {
function scope(identifier: any) {
// this will break iframe postmessage!
// the main magic of the proxy. all attempts to access any "banned objects" will be redirected here, and instead served a proxy object
// this contrasts from how other proxies will leave the root object alone and instead attempt to catch every member access
// this presents some issues (see element.ts), but makes us a good bit faster at runtime!
self[wrapfn] = function (identifier: any) {
if (
iswindow &&
(identifier instanceof self.Window ||
identifier instanceof self.top.window.Window ||
identifier instanceof self.parent.window.Window)
) {
// this will break iframe postmessage!
return client.windowProxy;
} else if (
(iswindow && identifier instanceof Location) ||
@ -28,12 +36,13 @@ export default function (client: ScramjetClient, self: typeof globalThis) {
}
return identifier;
}
};
// shorthand because this can get out of hand reall quickly
self.$s = scope;
self.$tryset = function (lhs: any, op: string, rhs: any) {
// location = "..." can't be rewritten as wrapfn(location) = ..., so instead it will actually be rewritten as
// ((t)=>$scramjet$tryset(location,"+=",t)||location+=t)(...);
// it has to be a discrete function because there's always the possibility that "location" is a local variable
// we have to use an IIFE to avoid duplicating side-effects in the getter
self[trysetfn] = function (lhs: any, op: string, rhs: any) {
if (lhs instanceof Location) {
// @ts-ignore
locationProxy.href = rhs;

View file

@ -8,6 +8,7 @@ import {
rewrite_js_from_arraybuffer,
} from "../../../rewriter/out/rewriter.js";
import "../../../static/wasm.js";
import { importfn, wrapfn } from "../../client/shared/wrap";
initSync(
new WebAssembly.Module(
@ -25,9 +26,25 @@ export function rewriteJs(js: string | ArrayBuffer, origin?: URL) {
const before = performance.now();
if (typeof js === "string") {
js = new TextDecoder().decode(rewrite_js(js, origin.toString()));
js = new TextDecoder().decode(
rewrite_js(
js,
origin.toString(),
self.$scramjet.config.prefix,
self.$scramjet.config.codec.encode as any,
wrapfn,
importfn
)
);
} else {
js = rewrite_js_from_arraybuffer(new Uint8Array(js), origin.toString());
js = rewrite_js_from_arraybuffer(
new Uint8Array(js),
origin.toString(),
self.$scramjet.config.prefix,
self.$scramjet.config.codec.encode as any,
wrapfn,
importfn
);
}
const after = performance.now();