From e8ff38b2e528197b156284e79e1240aa53dc05a6 Mon Sep 17 00:00:00 2001 From: Toshit Chawda Date: Thu, 6 Mar 2025 19:04:57 -0800 Subject: [PATCH] make scrammaps work (in rust) --- rewriter/Cargo.lock | 7 +++ rewriter/native/Cargo.toml | 1 + rewriter/native/src/main.rs | 77 ++++++++++++++++++++++++++++++-- rewriter/rewriter/src/changes.rs | 55 ++++++++++++++--------- 4 files changed, 116 insertions(+), 24 deletions(-) diff --git a/rewriter/Cargo.lock b/rewriter/Cargo.lock index 6e8eb10..131380b 100644 --- a/rewriter/Cargo.lock +++ b/rewriter/Cargo.lock @@ -233,6 +233,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" +[[package]] +name = "bytes" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" + [[package]] name = "cast" version = "0.3.0" @@ -768,6 +774,7 @@ version = "0.1.0" dependencies = [ "anyhow", "boa_engine", + "bytes", "criterion", "oxc", "rewriter", diff --git a/rewriter/native/Cargo.toml b/rewriter/native/Cargo.toml index 39f5554..3636550 100644 --- a/rewriter/native/Cargo.toml +++ b/rewriter/native/Cargo.toml @@ -8,6 +8,7 @@ workspace = true [dependencies] anyhow = "1.0.97" +bytes = "1.10.1" oxc = { workspace = true } rewriter = { version = "0.1.0", path = "../rewriter" } url = "2.5.4" diff --git a/rewriter/native/src/main.rs b/rewriter/native/src/main.rs index cd558d1..6e4b879 100644 --- a/rewriter/native/src/main.rs +++ b/rewriter/native/src/main.rs @@ -1,6 +1,7 @@ use std::{env, fs, str::FromStr, sync::Arc}; use anyhow::{Context, Result}; +use bytes::{Buf, Bytes, BytesMut}; use oxc::diagnostics::NamedSource; use rewriter::{cfg::Config, rewrite, RewriteResult}; use url::Url; @@ -38,6 +39,64 @@ fn dorewrite(data: &str) -> Result { .context("failed to rewrite file") } +#[derive(Debug)] +enum RewriteType { + Insert { pos: u32, size: u32 }, + Replace { start: u32, end: u32, str: Bytes }, +} + +fn dounrewrite(res: RewriteResult) -> Vec { + let js = res.js.as_slice(); + let mut map = Bytes::from(res.sourcemap); + let rewrite_cnt = map.get_u32_le(); + let mut rewrites = Vec::with_capacity(rewrite_cnt as usize); + + for x in 0..rewrite_cnt { + let ty = map.get_u8(); + if ty == 0 { + rewrites.push(RewriteType::Insert { + pos: map.get_u32_le(), + size: map.get_u32_le(), + }); + } else if ty == 1 { + let len = map.get_u32_le(); + rewrites.push(RewriteType::Replace { + start: map.get_u32_le(), + end: map.get_u32_le(), + str: map.split_to(len as usize), + }); + } else { + panic!( + "{x} {ty} {:X?} {:#?}", + map.slice(0..10).as_ref(), + &rewrites.last_chunk::<3>() + ) + } + } + + let mut out = BytesMut::with_capacity(res.js.len()); + + let mut lastpos: u32 = 0; + + for rewrite in rewrites { + match rewrite { + RewriteType::Insert { pos, size } => { + out.extend_from_slice(&js[lastpos as usize..pos as usize]); + lastpos = pos + size; + } + RewriteType::Replace { start, end, str } => { + out.extend_from_slice(&js[lastpos as usize..start as usize]); + out.extend_from_slice(&str); + lastpos = end; + } + } + } + + out.extend_from_slice(&js[lastpos as usize..]); + + out.to_vec() +} + fn main() -> Result<()> { let file = env::args().nth(1).unwrap_or_else(|| "test.js".to_string()); let data = fs::read_to_string(file).context("failed to read file")?; @@ -49,23 +108,33 @@ fn main() -> Result<()> { let _ = dorewrite(&data); i += 1; if i % 100 == 0 { - println!("{}...", i); + println!("{i}..."); } } } else { + println!("orig:\n{data}"); + let res = dorewrite(&data)?; let source = Arc::new( - NamedSource::new(data, "https://google.com/glorngle/si.js").with_language("javascript"), + NamedSource::new(data.clone(), "https://google.com/glorngle/si.js") + .with_language("javascript"), ); eprintln!("errors:"); - for err in res.errors { + for err in res.errors.clone() { eprintln!("{}", err.with_source_code(source.clone())); } println!( "rewritten:\n{}", - String::from_utf8(res.js).context("failed to parse rewritten js")? + str::from_utf8(&res.js).context("failed to parse rewritten js")? + ); + + let unrewritten = dounrewrite(res); + + println!( + "unrewritten matches orig: {}", + data.as_bytes() == unrewritten.as_slice() ); } diff --git a/rewriter/rewriter/src/changes.rs b/rewriter/rewriter/src/changes.rs index d47a7d6..fc35ac6 100644 --- a/rewriter/rewriter/src/changes.rs +++ b/rewriter/rewriter/src/changes.rs @@ -416,6 +416,7 @@ impl JsChanges { E: Clone, { let mut offset = 0; + let mut added = 0i64; let mut buffer = Vec::with_capacity(js.len() * 2); macro_rules! tryget { @@ -427,10 +428,14 @@ impl JsChanges { macro_rules! eval { ($change:expr) => { match $change { - Change::Str(x) => buffer.extend_from_slice(x.as_bytes()), + Change::Str(x) => { + buffer.extend_from_slice(x.as_bytes()); + x.len() + } Change::Number(x) => { let x = format_compact_str!("{}", x); buffer.extend_from_slice(x.as_bytes()); + x.len() } } }; @@ -439,7 +444,7 @@ impl JsChanges { let mut map = Vec::with_capacity(js.len() * 2); map.extend_from_slice(&(self.inner.len() as u32).to_le_bytes()); - self.inner.sort_unstable(); + self.inner.sort(); for change in &self.inner { let span = change.get_span(); @@ -450,37 +455,47 @@ impl JsChanges { match change.to_inner(cfg, offset) { JsChangeInner::Insert { loc, str } => { - // INSERT op - map.push(0); - // offset - map.extend_from_slice(&(offset as u32).to_le_bytes()); - // start - map.extend_from_slice(&loc.to_le_bytes()); - // size - map.extend_from_slice(&(str.len() as u32).to_le_bytes()); - + let mut len = 0u32; let loc = loc as usize; buffer.extend_from_slice(tryget!(start..loc).as_bytes()); - for str in str { - eval!(str); + for str in &str { + len += eval!(str) as u32; } buffer.extend_from_slice(tryget!(loc..end).as_bytes()); + + // INSERT op + map.push(0); + // pos + map.extend_from_slice( + &((loc as u32).wrapping_add_signed(added as i32)).to_le_bytes(), + ); + // size + map.extend_from_slice(&len.to_le_bytes()); + + added += len as i64; } JsChangeInner::Replace { str } => { + let mut len = 0u32; + for str in &str { + len += eval!(str) as u32; + } + // REPLACE op map.push(1); - // offset - map.extend_from_slice(&(offset as u32).to_le_bytes()); + // len + map.extend_from_slice(&(span.end - span.start).to_le_bytes()); // start - map.extend_from_slice(&span.start.to_le_bytes()); + map.extend_from_slice( + &(span.start.wrapping_add_signed(added as i32)).to_le_bytes(), + ); // end - map.extend_from_slice(&span.end.to_le_bytes()); + map.extend_from_slice( + &((span.start + len).wrapping_add_signed(added as i32)).to_le_bytes(), + ); // oldstr map.extend_from_slice(tryget!(start..end).as_bytes()); - for str in str { - eval!(str); - } + added += len as i64 - (span.end - span.start) as i64; } }