use autosorting btreeset (via indexset) and optimize for speed

This commit is contained in:
Toshit Chawda 2024-11-04 20:53:17 -08:00
parent 33744b4e83
commit 9b15201b25
No known key found for this signature in database
GPG key ID: 91480ED99E2B3D9D
5 changed files with 98 additions and 60 deletions

19
rewriter/Cargo.lock generated
View file

@ -332,6 +332,15 @@ dependencies = [
"percent-encoding", "percent-encoding",
] ]
[[package]]
name = "ftree"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "241f9dd9089e67c0b269989e9f884b12a61f68fc07ea8a4be6af8ee164e1abf7"
dependencies = [
"serde",
]
[[package]] [[package]]
name = "getrandom" name = "getrandom"
version = "0.2.15" version = "0.2.15"
@ -497,6 +506,15 @@ dependencies = [
"hashbrown 0.15.0", "hashbrown 0.15.0",
] ]
[[package]]
name = "indexset"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6279c421b4feea3fb8e07ea6bd0934b38b641f6601d5f6677062fad15272ed57"
dependencies = [
"ftree",
]
[[package]] [[package]]
name = "instant" name = "instant"
version = "0.1.13" version = "0.1.13"
@ -1032,6 +1050,7 @@ name = "rewriter"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"boa_engine", "boa_engine",
"indexset",
"instant", "instant",
"js-sys", "js-sys",
"oxc", "oxc",

View file

@ -10,18 +10,15 @@ crate-type = ["cdylib"]
default = ["debug"] default = ["debug"]
debug = [] debug = []
[profile.speed]
inherits = "release"
opt-level = 3
[profile.release] [profile.release]
opt-level = "z" opt-level = 3
debug = true debug = true
lto = true lto = true
codegen-units = 1 codegen-units = 1
panic = "abort" panic = "abort"
[dependencies] [dependencies]
indexset = "0.5.0"
instant = { version = "0.1.13", features = ["wasm-bindgen"] } instant = { version = "0.1.13", features = ["wasm-bindgen"] }
js-sys = "0.3.69" js-sys = "0.3.69"
oxc = "0.34.0" oxc = "0.34.0"

View file

@ -1,5 +1,6 @@
use core::str; use std::str;
use indexset::BTreeSet;
use oxc::{ use oxc::{
allocator::Allocator, allocator::Allocator,
ast::{ ast::{
@ -22,7 +23,7 @@ use url::Url;
use crate::error::{Result, RewriterError}; use crate::error::{Result, RewriterError};
#[derive(Debug)] #[derive(Debug, PartialEq, Eq)]
enum JsChange { enum JsChange {
GenericChange { GenericChange {
span: Span, span: Span,
@ -39,9 +40,47 @@ enum JsChange {
}, },
} }
impl JsChange {
fn inner_cmp(&self, other: &Self) -> std::cmp::Ordering {
let a = match self {
JsChange::GenericChange { span, text: _ } => span.start,
JsChange::Assignment {
name: _,
entirespan,
rhsspan: _,
op: _,
} => entirespan.start,
JsChange::SourceTag { tagstart } => *tagstart,
};
let b = match other {
JsChange::GenericChange { span, text: _ } => span.start,
JsChange::Assignment {
name: _,
entirespan,
rhsspan: _,
op: _,
} => entirespan.start,
JsChange::SourceTag { tagstart } => *tagstart,
};
a.cmp(&b)
}
}
impl PartialOrd for JsChange {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.inner_cmp(other))
}
}
impl Ord for JsChange {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.inner_cmp(other)
}
}
pub type EncodeFn = Box<dyn Fn(String) -> String>; pub type EncodeFn = Box<dyn Fn(String) -> String>;
struct Rewriter { struct Rewriter {
jschanges: Vec<JsChange>, jschanges: BTreeSet<JsChange>,
base: Url, base: Url,
config: Config, config: Config,
} }
@ -75,7 +114,7 @@ impl Rewriter {
fn rewrite_ident(&mut self, name: &Atom, span: Span) { fn rewrite_ident(&mut self, name: &Atom, span: Span) {
if UNSAFE_GLOBALS.contains(&name.to_string().as_str()) { if UNSAFE_GLOBALS.contains(&name.to_string().as_str()) {
self.jschanges.push(JsChange::GenericChange { self.jschanges.insert(JsChange::GenericChange {
span, span,
text: format!("({}({}))", self.config.wrapfn, name), text: format!("({}({}))", self.config.wrapfn, name),
}); });
@ -103,7 +142,7 @@ impl Rewriter {
impl<'a> Visit<'a> for Rewriter { impl<'a> Visit<'a> for Rewriter {
fn visit_identifier_reference(&mut self, it: &IdentifierReference<'a>) { fn visit_identifier_reference(&mut self, it: &IdentifierReference<'a>) {
// if self.config.capture_errors { // if self.config.capture_errors {
// self.jschanges.push(JsChange::GenericChange { // self.jschanges.insert(JsChange::GenericChange {
// span: it.span, // span: it.span,
// text: format!( // text: format!(
// "{}({}, typeof arguments != 'undefined' && arguments)", // "{}({}, typeof arguments != 'undefined' && arguments)",
@ -112,7 +151,7 @@ impl<'a> Visit<'a> for Rewriter {
// }); // });
// } else { // } else {
if UNSAFE_GLOBALS.contains(&it.name.to_string().as_str()) { if UNSAFE_GLOBALS.contains(&it.name.to_string().as_str()) {
self.jschanges.push(JsChange::GenericChange { self.jschanges.insert(JsChange::GenericChange {
span: it.span, span: it.span,
text: format!("{}({})", self.config.wrapfn, it.name), text: format!("{}({})", self.config.wrapfn, it.name),
}); });
@ -129,7 +168,7 @@ impl<'a> Visit<'a> for Rewriter {
match it { match it {
MemberExpression::StaticMemberExpression(s) => { MemberExpression::StaticMemberExpression(s) => {
if s.property.name == "postMessage" { if s.property.name == "postMessage" {
self.jschanges.push(JsChange::GenericChange { self.jschanges.insert(JsChange::GenericChange {
span: s.property.span, span: s.property.span,
// an empty object will let us safely reconstruct the realm later // an empty object will let us safely reconstruct the realm later
text: format!("{}({{}}).{}", self.config.setrealmfn, s.property.name), text: format!("{}({{}}).{}", self.config.setrealmfn, s.property.name),
@ -156,11 +195,11 @@ impl<'a> Visit<'a> for Rewriter {
&& !matches!(s.object, Expression::Super(_)) && !matches!(s.object, Expression::Super(_))
{ {
let span = s.object.span(); let span = s.object.span();
self.jschanges.push(JsChange::GenericChange { self.jschanges.insert(JsChange::GenericChange {
span: Span::new(span.start, span.start), span: Span::new(span.start, span.start),
text: " $scramitize(".to_string(), text: " $scramitize(".to_string(),
}); });
self.jschanges.push(JsChange::GenericChange { self.jschanges.insert(JsChange::GenericChange {
span: Span::new(span.end, span.end), span: Span::new(span.end, span.end),
text: ")".to_string(), text: ")".to_string(),
}); });
@ -177,7 +216,7 @@ impl<'a> Visit<'a> for Rewriter {
walk::walk_member_expression(self, it); walk::walk_member_expression(self, it);
} }
fn visit_this_expression(&mut self, it: &ThisExpression) { fn visit_this_expression(&mut self, it: &ThisExpression) {
self.jschanges.push(JsChange::GenericChange { self.jschanges.insert(JsChange::GenericChange {
span: it.span, span: it.span,
text: format!("{}(this)", self.config.wrapthisfn), text: format!("{}(this)", self.config.wrapthisfn),
}); });
@ -185,7 +224,7 @@ impl<'a> Visit<'a> for Rewriter {
fn visit_debugger_statement(&mut self, it: &DebuggerStatement) { fn visit_debugger_statement(&mut self, it: &DebuggerStatement) {
// delete debugger statements entirely. some sites will spam debugger as an anti-debugging measure, and we don't want that! // delete debugger statements entirely. some sites will spam debugger as an anti-debugging measure, and we don't want that!
self.jschanges.push(JsChange::GenericChange { self.jschanges.insert(JsChange::GenericChange {
span: it.span, span: it.span,
text: "".to_string(), text: "".to_string(),
}); });
@ -197,11 +236,11 @@ impl<'a> Visit<'a> for Rewriter {
if let Expression::Identifier(s) = &it.callee { if let Expression::Identifier(s) = &it.callee {
// if it's optional that actually makes it an indirect eval which is handled separately // if it's optional that actually makes it an indirect eval which is handled separately
if s.name == "eval" && !it.optional { if s.name == "eval" && !it.optional {
self.jschanges.push(JsChange::GenericChange { self.jschanges.insert(JsChange::GenericChange {
span: Span::new(s.span.start, s.span.end + 1), span: Span::new(s.span.start, s.span.end + 1),
text: format!("eval({}(", self.config.rewritefn), text: format!("eval({}(", self.config.rewritefn),
}); });
self.jschanges.push(JsChange::GenericChange { self.jschanges.insert(JsChange::GenericChange {
span: Span::new(it.span.end, it.span.end), span: Span::new(it.span.end, it.span.end),
text: ")".to_string(), text: ")".to_string(),
}); });
@ -213,11 +252,11 @@ impl<'a> Visit<'a> for Rewriter {
} }
} }
if self.config.scramitize { if self.config.scramitize {
self.jschanges.push(JsChange::GenericChange { self.jschanges.insert(JsChange::GenericChange {
span: Span::new(it.span.start, it.span.start), span: Span::new(it.span.start, it.span.start),
text: " $scramitize(".to_string(), text: " $scramitize(".to_string(),
}); });
self.jschanges.push(JsChange::GenericChange { self.jschanges.insert(JsChange::GenericChange {
span: Span::new(it.span.end, it.span.end), span: Span::new(it.span.end, it.span.end),
text: ")".to_string(), text: ")".to_string(),
}); });
@ -228,14 +267,14 @@ impl<'a> Visit<'a> for Rewriter {
fn visit_import_declaration(&mut self, it: &ImportDeclaration<'a>) { fn visit_import_declaration(&mut self, it: &ImportDeclaration<'a>) {
let name = it.source.value.to_string(); let name = it.source.value.to_string();
let text = self.rewrite_url(name); let text = self.rewrite_url(name);
self.jschanges.push(JsChange::GenericChange { self.jschanges.insert(JsChange::GenericChange {
span: it.source.span, span: it.source.span,
text, text,
}); });
walk::walk_import_declaration(self, it); walk::walk_import_declaration(self, it);
} }
fn visit_import_expression(&mut self, it: &ImportExpression<'a>) { fn visit_import_expression(&mut self, it: &ImportExpression<'a>) {
self.jschanges.push(JsChange::GenericChange { self.jschanges.insert(JsChange::GenericChange {
span: Span::new(it.span.start, it.span.start + 6), span: Span::new(it.span.start, it.span.start + 6),
text: format!("({}(\"{}\"))", self.config.importfn, self.base), text: format!("({}(\"{}\"))", self.config.importfn, self.base),
}); });
@ -245,7 +284,7 @@ impl<'a> Visit<'a> for Rewriter {
fn visit_export_all_declaration(&mut self, it: &ExportAllDeclaration<'a>) { fn visit_export_all_declaration(&mut self, it: &ExportAllDeclaration<'a>) {
let name = it.source.value.to_string(); let name = it.source.value.to_string();
let text = self.rewrite_url(name); let text = self.rewrite_url(name);
self.jschanges.push(JsChange::GenericChange { self.jschanges.insert(JsChange::GenericChange {
span: it.source.span, span: it.source.span,
text, text,
}); });
@ -255,7 +294,7 @@ impl<'a> Visit<'a> for Rewriter {
if let Some(source) = &it.source { if let Some(source) = &it.source {
let name = source.value.to_string(); let name = source.value.to_string();
let text = self.rewrite_url(name); let text = self.rewrite_url(name);
self.jschanges.push(JsChange::GenericChange { self.jschanges.insert(JsChange::GenericChange {
span: source.span, span: source.span,
text, text,
}); });
@ -271,7 +310,7 @@ impl<'a> Visit<'a> for Rewriter {
if let Some(h) = &it.handler { if let Some(h) = &it.handler {
if let Some(name) = &h.param { if let Some(name) = &h.param {
if let Some(name) = name.pattern.get_identifier() { if let Some(name) = name.pattern.get_identifier() {
self.jschanges.push(JsChange::GenericChange { self.jschanges.insert(JsChange::GenericChange {
span: Span::new(h.body.span.start + 1, h.body.span.start + 1), span: Span::new(h.body.span.start + 1, h.body.span.start + 1),
text: format!("$scramerr({});", name), text: format!("$scramerr({});", name),
}); });
@ -289,7 +328,7 @@ impl<'a> Visit<'a> for Rewriter {
ObjectPropertyKind::ObjectProperty(p) => match &p.value { ObjectPropertyKind::ObjectProperty(p) => match &p.value {
Expression::Identifier(s) => { Expression::Identifier(s) => {
if UNSAFE_GLOBALS.contains(&s.name.to_string().as_str()) && p.shorthand { if UNSAFE_GLOBALS.contains(&s.name.to_string().as_str()) && p.shorthand {
self.jschanges.push(JsChange::GenericChange { self.jschanges.insert(JsChange::GenericChange {
span: s.span, span: s.span,
text: format!("{}: ({}({}))", s.name, self.config.wrapfn, s.name), text: format!("{}: ({}({}))", s.name, self.config.wrapfn, s.name),
}); });
@ -308,7 +347,7 @@ impl<'a> Visit<'a> for Rewriter {
fn visit_function_body(&mut self, it: &FunctionBody<'a>) { fn visit_function_body(&mut self, it: &FunctionBody<'a>) {
// tag function for use in sourcemaps // tag function for use in sourcemaps
if self.config.do_sourcemaps { if self.config.do_sourcemaps {
self.jschanges.push(JsChange::SourceTag { self.jschanges.insert(JsChange::SourceTag {
tagstart: it.span.start, tagstart: it.span.start,
}); });
} }
@ -317,11 +356,11 @@ impl<'a> Visit<'a> for Rewriter {
fn visit_return_statement(&mut self, it: &ReturnStatement<'a>) { fn visit_return_statement(&mut self, it: &ReturnStatement<'a>) {
// if let Some(arg) = &it.argument { // if let Some(arg) = &it.argument {
// self.jschanges.push(JsChange::GenericChange { // self.jschanges.insert(JsChange::GenericChange {
// span: Span::new(it.span.start + 6, it.span.start + 6), // span: Span::new(it.span.start + 6, it.span.start + 6),
// text: format!(" $scramdbg((()=>{{ try {{return arguments}} catch(_){{}} }})(),("), // text: format!(" $scramdbg((()=>{{ try {{return arguments}} catch(_){{}} }})(),("),
// }); // });
// self.jschanges.push(JsChange::GenericChange { // self.jschanges.insert(JsChange::GenericChange {
// span: Span::new(expression_span(arg).end, expression_span(arg).end), // span: Span::new(expression_span(arg).end, expression_span(arg).end),
// text: format!("))"), // text: format!("))"),
// }); // });
@ -351,7 +390,7 @@ impl<'a> Visit<'a> for Rewriter {
fn visit_meta_property(&mut self, it: &MetaProperty<'a>) { fn visit_meta_property(&mut self, it: &MetaProperty<'a>) {
if it.meta.name == "import" { if it.meta.name == "import" {
self.jschanges.push(JsChange::GenericChange { self.jschanges.insert(JsChange::GenericChange {
span: it.span, span: it.span,
text: format!("{}(\"{}\")", self.config.metafn, self.base), text: format!("{}(\"{}\")", self.config.metafn, self.base),
}); });
@ -363,7 +402,7 @@ impl<'a> Visit<'a> for Rewriter {
match &it.left { match &it.left {
AssignmentTarget::AssignmentTargetIdentifier(s) => { AssignmentTarget::AssignmentTargetIdentifier(s) => {
if ["location"].contains(&s.name.to_string().as_str()) { if ["location"].contains(&s.name.to_string().as_str()) {
self.jschanges.push(JsChange::Assignment { self.jschanges.insert(JsChange::Assignment {
name: s.name.to_string(), name: s.name.to_string(),
entirespan: it.span, entirespan: it.span,
rhsspan: it.right.span(), rhsspan: it.right.span(),
@ -404,7 +443,12 @@ const UNSAFE_GLOBALS: &[&str] = &[
"frames", "frames",
]; ];
pub fn rewrite(js: &str, url: Url, sourcetag: String, config: Config) -> Result<(Vec<u8>, Vec<OxcDiagnostic>)> { pub fn rewrite(
js: &str,
url: Url,
sourcetag: String,
config: Config,
) -> Result<(Vec<u8>, Vec<OxcDiagnostic>)> {
let allocator = Allocator::default(); let allocator = Allocator::default();
let source_type = SourceType::default(); let source_type = SourceType::default();
let ret = Parser::new(&allocator, js, source_type) let ret = Parser::new(&allocator, js, source_type)
@ -418,38 +462,13 @@ pub fn rewrite(js: &str, url: Url, sourcetag: String, config: Config) -> Result<
let program = ret.program; let program = ret.program;
let mut ast_pass = Rewriter { let mut ast_pass = Rewriter {
jschanges: Vec::new(), jschanges: BTreeSet::new(),
base: url, base: url,
config, config,
}; };
ast_pass.visit_program(&program); ast_pass.visit_program(&program);
// sorrt changse
ast_pass.jschanges.sort_by(|a, b| {
let a = match a {
JsChange::GenericChange { span, text: _ } => span.start,
JsChange::Assignment {
name: _,
entirespan,
rhsspan: _,
op: _,
} => entirespan.start,
JsChange::SourceTag { tagstart } => *tagstart,
};
let b = match b {
JsChange::GenericChange { span, text: _ } => span.start,
JsChange::Assignment {
name: _,
entirespan,
rhsspan: _,
op: _,
} => entirespan.start,
JsChange::SourceTag { tagstart } => *tagstart,
};
a.cmp(&b)
});
let original_len = js.len(); let original_len = js.len();
let mut difference = 0i32; let mut difference = 0i32;

View file

@ -54,7 +54,9 @@ function rewriteJsWrapper(
} else { } else {
timespan = "really slow"; timespan = "really slow";
} }
console.log(`oxc rewrite was ${timespan} (${duration}ms)`); console.log(
`oxc rewrite for "${url || "(unknown)"}" was ${timespan} (${duration}ms)`
);
} }
return typeof input === "string" ? decoder.decode(js) : js; return typeof input === "string" ? decoder.decode(js) : js;

View file

@ -302,7 +302,8 @@ async function rewriteBody(
return response.body; return response.body;
} }
case "script": case "script":
return rewriteJs(await response.arrayBuffer(), response.url, meta); // @ts-ignore perc idk why
return rewriteJs(await response.arrayBuffer(), response.finalURL, meta);
case "style": case "style":
return rewriteCss(await response.text(), meta); return rewriteCss(await response.text(), meta);
case "sharedworker": case "sharedworker":