diff --git a/rewriter/Cargo.lock b/rewriter/Cargo.lock index c852b67..a375df0 100644 --- a/rewriter/Cargo.lock +++ b/rewriter/Cargo.lock @@ -469,7 +469,6 @@ dependencies = [ "serde", "serde-wasm-bindgen", "url", - "urlencoding", "wasm-bindgen", ] @@ -718,12 +717,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "urlencoding" -version = "2.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" - [[package]] name = "wasm-bindgen" version = "0.2.92" diff --git a/rewriter/Cargo.toml b/rewriter/Cargo.toml index 9d3364a..e433f0d 100644 --- a/rewriter/Cargo.toml +++ b/rewriter/Cargo.toml @@ -17,5 +17,4 @@ oxc_syntax = "0.20.0" serde = "1.0.204" serde-wasm-bindgen = "0.6.5" url = "2.5.2" -urlencoding = "2.1.3" wasm-bindgen = "0.2.92" diff --git a/rewriter/build.sh b/rewriter/build.sh index 58c1620..0f13bad 100644 --- a/rewriter/build.sh +++ b/rewriter/build.sh @@ -1,6 +1,5 @@ -RUSTFLAGS='-C target-feature=+atomics,+bulk-memory,+simd128' cargo build --lib --target wasm32-unknown-unknown -Z build-std=panic_abort,std --release +RUSTFLAGS='-C target-feature=+atomics,+bulk-memory' cargo build --lib --target wasm32-unknown-unknown -Z build-std=panic_abort,std --release wasm-bindgen --weak-refs --target web --out-dir out/ target/wasm32-unknown-unknown/release/rewriter.wasm -# wasm-bindgen --keep-debug --weak-refs --target web --out-dir out/ target/wasm32-unknown-unknown/release/rewriter.wasm sed -i 's/import.meta.url/""/g' out/rewriter.js diff --git a/rewriter/src/lib.rs b/rewriter/src/lib.rs index 30e2962..12919a9 100644 --- a/rewriter/src/lib.rs +++ b/rewriter/src/lib.rs @@ -2,6 +2,7 @@ pub mod rewrite; use std::{panic, str::FromStr}; +use js_sys::encode_uri_component; use rewrite::rewrite; use url::Url; use wasm_bindgen::prelude::*; @@ -12,6 +13,11 @@ extern "C" { fn log(s: &str); } +// import the SCRAM!!! jet encoder here later +fn encode(s: String) -> String { + encode_uri_component(&s).into() +} + #[wasm_bindgen] pub fn init() { panic::set_hook(Box::new(console_error_panic_hook::hook)); @@ -19,7 +25,7 @@ pub fn init() { #[wasm_bindgen] pub fn rewrite_js(js: &str, url: &str) -> Vec { - rewrite(js, Url::from_str(url).unwrap()) + rewrite(js, Url::from_str(url).unwrap(), Box::new(encode)) } #[wasm_bindgen] @@ -28,5 +34,5 @@ pub fn rewrite_js_from_arraybuffer(js: &[u8], url: &str) -> Vec { let js = unsafe { std::str::from_utf8_unchecked(js) }; - rewrite(js, Url::from_str(url).unwrap()) + rewrite(js, Url::from_str(url).unwrap(), Box::new(encode)) } diff --git a/rewriter/src/main.rs b/rewriter/src/main.rs index dde40c8..585c5fa 100644 --- a/rewriter/src/main.rs +++ b/rewriter/src/main.rs @@ -1,5 +1,6 @@ #![allow(clippy::print_stdout)] use std::{ + borrow::Cow, env, path::Path, str::{from_utf8, FromStr}, @@ -15,6 +16,91 @@ use url::Url; // run `cargo run -p oxc_parser --example visitor` // or `cargo watch -x "run -p oxc_parser --example visitor"` +/// Percent-encodes every byte except alphanumerics and `-`, `_`, `.`, `~`. Assumes UTF-8 encoding. +/// +/// Call `.into_owned()` if you need a `String` +#[inline(always)] +#[must_use] +pub fn encode(data: &str) -> Cow<'_, str> { + encode_binary(data.as_bytes()) +} + +/// Percent-encodes every byte except alphanumerics and `-`, `_`, `.`, `~`. +#[inline] +#[must_use] +pub fn encode_binary(data: &[u8]) -> Cow<'_, str> { + // add maybe extra capacity, but try not to exceed allocator's bucket size + let mut escaped = String::new(); + let _ = escaped.try_reserve(data.len() | 15); + let unmodified = append_string(data, &mut escaped, true); + if unmodified { + return Cow::Borrowed(unsafe { + // encode_into has checked it's ASCII + std::str::from_utf8_unchecked(data) + }); + } + Cow::Owned(escaped) +} + +fn append_string(data: &[u8], escaped: &mut String, may_skip: bool) -> bool { + encode_into(data, may_skip, |s| { + escaped.push_str(s); + Ok::<_, std::convert::Infallible>(()) + }) + .unwrap() +} + +fn encode_into( + mut data: &[u8], + may_skip_write: bool, + mut push_str: impl FnMut(&str) -> Result<(), E>, +) -> Result { + let mut pushed = false; + loop { + // Fast path to skip over safe chars at the beginning of the remaining string + let ascii_len = data.iter() + .take_while(|&&c| matches!(c, b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'-' | b'.' | b'_' | b'~')).count(); + + let (safe, rest) = if ascii_len >= data.len() { + if !pushed && may_skip_write { + return Ok(true); + } + (data, &[][..]) // redundatnt to optimize out a panic in split_at + } else { + data.split_at(ascii_len) + }; + pushed = true; + if !safe.is_empty() { + push_str(unsafe { std::str::from_utf8_unchecked(safe) })?; + } + if rest.is_empty() { + break; + } + + match rest.split_first() { + Some((byte, rest)) => { + let enc = &[b'%', to_hex_digit(byte >> 4), to_hex_digit(byte & 15)]; + push_str(unsafe { std::str::from_utf8_unchecked(enc) })?; + data = rest; + } + None => break, + }; + } + Ok(false) +} + +#[inline] +fn to_hex_digit(digit: u8) -> u8 { + match digit { + 0..=9 => b'0' + digit, + 10..=255 => b'A' - 10 + digit, + } +} + +fn encode_string(s: String) -> String { + encode(&s).to_string() +} + fn main() -> std::io::Result<()> { let name = env::args().nth(1).unwrap_or_else(|| "test.js".to_string()); let path = Path::new(&name); @@ -25,7 +111,8 @@ fn main() -> std::io::Result<()> { from_utf8( rewrite( &source_text, - Url::from_str("https://google.com/glorngle/si.js").unwrap() + Url::from_str("https://google.com/glorngle/si.js").unwrap(), + Box::new(encode_string) ) .as_slice() ) diff --git a/rewriter/src/rewrite.rs b/rewriter/src/rewrite.rs index 844db85..e086b70 100644 --- a/rewriter/src/rewrite.rs +++ b/rewriter/src/rewrite.rs @@ -8,7 +8,6 @@ use oxc_parser::Parser; use oxc_span::{SourceType, Span}; use oxc_syntax::operator::AssignmentOperator; use url::Url; -use urlencoding::encode; #[derive(Debug)] enum JsChange { @@ -28,16 +27,17 @@ enum JsChange { }, } -#[derive(Debug)] +type EncodeFn = Box String>; struct Rewriter { jschanges: Vec, base: Url, + encode: EncodeFn, } impl Rewriter { fn rewrite_url(&mut self, url: String) -> String { let url = self.base.join(&url).unwrap(); - let urlencoded = encode(url.as_str()); + let urlencoded = (self.encode)(url.to_string()); format!("\"/scramjet/{}\"", urlencoded) } @@ -204,7 +204,7 @@ const UNSAFE_GLOBALS: [&str; 8] = [ "document", ]; -pub fn rewrite(js: &str, url: Url) -> Vec { +pub fn rewrite(js: &str, url: Url, encode: EncodeFn) -> Vec { let allocator = Allocator::default(); let source_type = SourceType::default(); let ret = Parser::new(&allocator, js, source_type).parse(); @@ -222,6 +222,7 @@ pub fn rewrite(js: &str, url: Url) -> Vec { let mut ast_pass = Rewriter { jschanges: Vec::new(), base: url, + encode, }; ast_pass.visit_program(&program); diff --git a/rewriter/test.js b/rewriter/test.js index 7edd13a..65aeee8 100644 --- a/rewriter/test.js +++ b/rewriter/test.js @@ -24,4 +24,6 @@ location += "http://example.com"; function f() { return import("x") } +let window = (1, window); +