mirror of
https://github.com/MercuryWorkshop/scramjet.git
synced 2025-05-17 08:00:02 -04:00
add encoder function arg
This commit is contained in:
parent
544134f800
commit
11a0c09179
7 changed files with 104 additions and 17 deletions
7
rewriter/Cargo.lock
generated
7
rewriter/Cargo.lock
generated
|
@ -469,7 +469,6 @@ dependencies = [
|
||||||
"serde",
|
"serde",
|
||||||
"serde-wasm-bindgen",
|
"serde-wasm-bindgen",
|
||||||
"url",
|
"url",
|
||||||
"urlencoding",
|
|
||||||
"wasm-bindgen",
|
"wasm-bindgen",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -718,12 +717,6 @@ dependencies = [
|
||||||
"percent-encoding",
|
"percent-encoding",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "urlencoding"
|
|
||||||
version = "2.1.3"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "wasm-bindgen"
|
name = "wasm-bindgen"
|
||||||
version = "0.2.92"
|
version = "0.2.92"
|
||||||
|
|
|
@ -17,5 +17,4 @@ oxc_syntax = "0.20.0"
|
||||||
serde = "1.0.204"
|
serde = "1.0.204"
|
||||||
serde-wasm-bindgen = "0.6.5"
|
serde-wasm-bindgen = "0.6.5"
|
||||||
url = "2.5.2"
|
url = "2.5.2"
|
||||||
urlencoding = "2.1.3"
|
|
||||||
wasm-bindgen = "0.2.92"
|
wasm-bindgen = "0.2.92"
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
RUSTFLAGS='-C target-feature=+atomics,+bulk-memory,+simd128' cargo build --lib --target wasm32-unknown-unknown -Z build-std=panic_abort,std --release
|
RUSTFLAGS='-C target-feature=+atomics,+bulk-memory' cargo build --lib --target wasm32-unknown-unknown -Z build-std=panic_abort,std --release
|
||||||
wasm-bindgen --weak-refs --target web --out-dir out/ target/wasm32-unknown-unknown/release/rewriter.wasm
|
wasm-bindgen --weak-refs --target web --out-dir out/ target/wasm32-unknown-unknown/release/rewriter.wasm
|
||||||
# wasm-bindgen --keep-debug --weak-refs --target web --out-dir out/ target/wasm32-unknown-unknown/release/rewriter.wasm
|
|
||||||
|
|
||||||
sed -i 's/import.meta.url/""/g' out/rewriter.js
|
sed -i 's/import.meta.url/""/g' out/rewriter.js
|
||||||
|
|
||||||
|
|
|
@ -2,6 +2,7 @@ pub mod rewrite;
|
||||||
|
|
||||||
use std::{panic, str::FromStr};
|
use std::{panic, str::FromStr};
|
||||||
|
|
||||||
|
use js_sys::encode_uri_component;
|
||||||
use rewrite::rewrite;
|
use rewrite::rewrite;
|
||||||
use url::Url;
|
use url::Url;
|
||||||
use wasm_bindgen::prelude::*;
|
use wasm_bindgen::prelude::*;
|
||||||
|
@ -12,6 +13,11 @@ extern "C" {
|
||||||
fn log(s: &str);
|
fn log(s: &str);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// import the SCRAM!!! jet encoder here later
|
||||||
|
fn encode(s: String) -> String {
|
||||||
|
encode_uri_component(&s).into()
|
||||||
|
}
|
||||||
|
|
||||||
#[wasm_bindgen]
|
#[wasm_bindgen]
|
||||||
pub fn init() {
|
pub fn init() {
|
||||||
panic::set_hook(Box::new(console_error_panic_hook::hook));
|
panic::set_hook(Box::new(console_error_panic_hook::hook));
|
||||||
|
@ -19,7 +25,7 @@ pub fn init() {
|
||||||
|
|
||||||
#[wasm_bindgen]
|
#[wasm_bindgen]
|
||||||
pub fn rewrite_js(js: &str, url: &str) -> Vec<u8> {
|
pub fn rewrite_js(js: &str, url: &str) -> Vec<u8> {
|
||||||
rewrite(js, Url::from_str(url).unwrap())
|
rewrite(js, Url::from_str(url).unwrap(), Box::new(encode))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[wasm_bindgen]
|
#[wasm_bindgen]
|
||||||
|
@ -28,5 +34,5 @@ pub fn rewrite_js_from_arraybuffer(js: &[u8], url: &str) -> Vec<u8> {
|
||||||
|
|
||||||
let js = unsafe { std::str::from_utf8_unchecked(js) };
|
let js = unsafe { std::str::from_utf8_unchecked(js) };
|
||||||
|
|
||||||
rewrite(js, Url::from_str(url).unwrap())
|
rewrite(js, Url::from_str(url).unwrap(), Box::new(encode))
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
#![allow(clippy::print_stdout)]
|
#![allow(clippy::print_stdout)]
|
||||||
use std::{
|
use std::{
|
||||||
|
borrow::Cow,
|
||||||
env,
|
env,
|
||||||
path::Path,
|
path::Path,
|
||||||
str::{from_utf8, FromStr},
|
str::{from_utf8, FromStr},
|
||||||
|
@ -15,6 +16,91 @@ use url::Url;
|
||||||
// run `cargo run -p oxc_parser --example visitor`
|
// run `cargo run -p oxc_parser --example visitor`
|
||||||
// or `cargo watch -x "run -p oxc_parser --example visitor"`
|
// or `cargo watch -x "run -p oxc_parser --example visitor"`
|
||||||
|
|
||||||
|
/// Percent-encodes every byte except alphanumerics and `-`, `_`, `.`, `~`. Assumes UTF-8 encoding.
|
||||||
|
///
|
||||||
|
/// Call `.into_owned()` if you need a `String`
|
||||||
|
#[inline(always)]
|
||||||
|
#[must_use]
|
||||||
|
pub fn encode(data: &str) -> Cow<'_, str> {
|
||||||
|
encode_binary(data.as_bytes())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Percent-encodes every byte except alphanumerics and `-`, `_`, `.`, `~`.
|
||||||
|
#[inline]
|
||||||
|
#[must_use]
|
||||||
|
pub fn encode_binary(data: &[u8]) -> Cow<'_, str> {
|
||||||
|
// add maybe extra capacity, but try not to exceed allocator's bucket size
|
||||||
|
let mut escaped = String::new();
|
||||||
|
let _ = escaped.try_reserve(data.len() | 15);
|
||||||
|
let unmodified = append_string(data, &mut escaped, true);
|
||||||
|
if unmodified {
|
||||||
|
return Cow::Borrowed(unsafe {
|
||||||
|
// encode_into has checked it's ASCII
|
||||||
|
std::str::from_utf8_unchecked(data)
|
||||||
|
});
|
||||||
|
}
|
||||||
|
Cow::Owned(escaped)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn append_string(data: &[u8], escaped: &mut String, may_skip: bool) -> bool {
|
||||||
|
encode_into(data, may_skip, |s| {
|
||||||
|
escaped.push_str(s);
|
||||||
|
Ok::<_, std::convert::Infallible>(())
|
||||||
|
})
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn encode_into<E>(
|
||||||
|
mut data: &[u8],
|
||||||
|
may_skip_write: bool,
|
||||||
|
mut push_str: impl FnMut(&str) -> Result<(), E>,
|
||||||
|
) -> Result<bool, E> {
|
||||||
|
let mut pushed = false;
|
||||||
|
loop {
|
||||||
|
// Fast path to skip over safe chars at the beginning of the remaining string
|
||||||
|
let ascii_len = data.iter()
|
||||||
|
.take_while(|&&c| matches!(c, b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'-' | b'.' | b'_' | b'~')).count();
|
||||||
|
|
||||||
|
let (safe, rest) = if ascii_len >= data.len() {
|
||||||
|
if !pushed && may_skip_write {
|
||||||
|
return Ok(true);
|
||||||
|
}
|
||||||
|
(data, &[][..]) // redundatnt to optimize out a panic in split_at
|
||||||
|
} else {
|
||||||
|
data.split_at(ascii_len)
|
||||||
|
};
|
||||||
|
pushed = true;
|
||||||
|
if !safe.is_empty() {
|
||||||
|
push_str(unsafe { std::str::from_utf8_unchecked(safe) })?;
|
||||||
|
}
|
||||||
|
if rest.is_empty() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
match rest.split_first() {
|
||||||
|
Some((byte, rest)) => {
|
||||||
|
let enc = &[b'%', to_hex_digit(byte >> 4), to_hex_digit(byte & 15)];
|
||||||
|
push_str(unsafe { std::str::from_utf8_unchecked(enc) })?;
|
||||||
|
data = rest;
|
||||||
|
}
|
||||||
|
None => break,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
Ok(false)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn to_hex_digit(digit: u8) -> u8 {
|
||||||
|
match digit {
|
||||||
|
0..=9 => b'0' + digit,
|
||||||
|
10..=255 => b'A' - 10 + digit,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn encode_string(s: String) -> String {
|
||||||
|
encode(&s).to_string()
|
||||||
|
}
|
||||||
|
|
||||||
fn main() -> std::io::Result<()> {
|
fn main() -> std::io::Result<()> {
|
||||||
let name = env::args().nth(1).unwrap_or_else(|| "test.js".to_string());
|
let name = env::args().nth(1).unwrap_or_else(|| "test.js".to_string());
|
||||||
let path = Path::new(&name);
|
let path = Path::new(&name);
|
||||||
|
@ -25,7 +111,8 @@ fn main() -> std::io::Result<()> {
|
||||||
from_utf8(
|
from_utf8(
|
||||||
rewrite(
|
rewrite(
|
||||||
&source_text,
|
&source_text,
|
||||||
Url::from_str("https://google.com/glorngle/si.js").unwrap()
|
Url::from_str("https://google.com/glorngle/si.js").unwrap(),
|
||||||
|
Box::new(encode_string)
|
||||||
)
|
)
|
||||||
.as_slice()
|
.as_slice()
|
||||||
)
|
)
|
||||||
|
|
|
@ -8,7 +8,6 @@ use oxc_parser::Parser;
|
||||||
use oxc_span::{SourceType, Span};
|
use oxc_span::{SourceType, Span};
|
||||||
use oxc_syntax::operator::AssignmentOperator;
|
use oxc_syntax::operator::AssignmentOperator;
|
||||||
use url::Url;
|
use url::Url;
|
||||||
use urlencoding::encode;
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
enum JsChange {
|
enum JsChange {
|
||||||
|
@ -28,16 +27,17 @@ enum JsChange {
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
type EncodeFn = Box<dyn Fn(String) -> String>;
|
||||||
struct Rewriter {
|
struct Rewriter {
|
||||||
jschanges: Vec<JsChange>,
|
jschanges: Vec<JsChange>,
|
||||||
base: Url,
|
base: Url,
|
||||||
|
encode: EncodeFn,
|
||||||
}
|
}
|
||||||
impl Rewriter {
|
impl Rewriter {
|
||||||
fn rewrite_url(&mut self, url: String) -> String {
|
fn rewrite_url(&mut self, url: String) -> String {
|
||||||
let url = self.base.join(&url).unwrap();
|
let url = self.base.join(&url).unwrap();
|
||||||
|
|
||||||
let urlencoded = encode(url.as_str());
|
let urlencoded = (self.encode)(url.to_string());
|
||||||
|
|
||||||
format!("\"/scramjet/{}\"", urlencoded)
|
format!("\"/scramjet/{}\"", urlencoded)
|
||||||
}
|
}
|
||||||
|
@ -204,7 +204,7 @@ const UNSAFE_GLOBALS: [&str; 8] = [
|
||||||
"document",
|
"document",
|
||||||
];
|
];
|
||||||
|
|
||||||
pub fn rewrite(js: &str, url: Url) -> Vec<u8> {
|
pub fn rewrite(js: &str, url: Url, encode: EncodeFn) -> Vec<u8> {
|
||||||
let allocator = Allocator::default();
|
let allocator = Allocator::default();
|
||||||
let source_type = SourceType::default();
|
let source_type = SourceType::default();
|
||||||
let ret = Parser::new(&allocator, js, source_type).parse();
|
let ret = Parser::new(&allocator, js, source_type).parse();
|
||||||
|
@ -222,6 +222,7 @@ pub fn rewrite(js: &str, url: Url) -> Vec<u8> {
|
||||||
let mut ast_pass = Rewriter {
|
let mut ast_pass = Rewriter {
|
||||||
jschanges: Vec::new(),
|
jschanges: Vec::new(),
|
||||||
base: url,
|
base: url,
|
||||||
|
encode,
|
||||||
};
|
};
|
||||||
|
|
||||||
ast_pass.visit_program(&program);
|
ast_pass.visit_program(&program);
|
||||||
|
|
|
@ -24,4 +24,6 @@ location += "http://example.com";
|
||||||
function f() { return import("x") }
|
function f() { return import("x") }
|
||||||
|
|
||||||
|
|
||||||
|
let window = (1, window);
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue