handle unicode characters when encoding

This commit is contained in:
illusionTBA 2024-10-09 13:08:03 -04:00
parent 33bcfaedab
commit 7aaf3b7ceb

View file

@ -10,7 +10,7 @@ export function rewriteHtml(
html: string, html: string,
cookieStore: CookieStore, cookieStore: CookieStore,
meta: URLMeta, meta: URLMeta,
fromTop: boolean = false, fromTop: boolean = false
) { ) {
const handler = new DomHandler((err, dom) => dom); const handler = new DomHandler((err, dom) => dom);
const parser = new Parser(handler); const parser = new Parser(handler);
@ -56,7 +56,7 @@ export function rewriteHtml(
script(self.$scramjet.config["codecs"]), script(self.$scramjet.config["codecs"]),
script("data:application/javascript;base64," + btoa(injected)), script("data:application/javascript;base64," + btoa(injected)),
script(self.$scramjet.config["shared"]), script(self.$scramjet.config["shared"]),
script(self.$scramjet.config["client"]), script(self.$scramjet.config["client"])
); );
} }
@ -107,80 +107,80 @@ export const htmlRules: {
[key: string]: "*" | string[] | ((...any: any[]) => string | null); [key: string]: "*" | string[] | ((...any: any[]) => string | null);
fn: (value: string, meta: URLMeta, cookieStore: CookieStore) => string | null; fn: (value: string, meta: URLMeta, cookieStore: CookieStore) => string | null;
}[] = [ }[] = [
{ {
fn: (value: string, meta: URLMeta) => { fn: (value: string, meta: URLMeta) => {
return encodeUrl(value, meta); return encodeUrl(value, meta);
}, },
// url rewrites // url rewrites
src: [ src: [
"embed", "embed",
"script", "script",
"img", "img",
"image", "image",
"iframe", "iframe",
"source", "source",
"video", "video",
"audio", "audio",
"input", "input",
"track", "track",
], ],
href: ["a", "link", "area"], href: ["a", "link", "area"],
data: ["object"], data: ["object"],
action: ["form"], action: ["form"],
formaction: ["button", "input", "textarea", "submit"], formaction: ["button", "input", "textarea", "submit"],
poster: ["video"], poster: ["video"],
"xlink:href": ["image"], "xlink:href": ["image"],
}, },
{ {
fn: () => null, fn: () => null,
// csp stuff that must be deleted // csp stuff that must be deleted
nonce: "*", nonce: "*",
integrity: ["script", "link"], integrity: ["script", "link"],
csp: ["iframe"], csp: ["iframe"],
}, },
{ {
fn: (value: string, meta: URLMeta) => rewriteSrcset(value, meta), fn: (value: string, meta: URLMeta) => rewriteSrcset(value, meta),
// srcset // srcset
srcset: ["img", "source"], srcset: ["img", "source"],
imagesrcset: ["link"], imagesrcset: ["link"],
}, },
{ {
fn: (value: string, meta: URLMeta, cookieStore: CookieStore) => fn: (value: string, meta: URLMeta, cookieStore: CookieStore) =>
rewriteHtml( rewriteHtml(
value, value,
cookieStore, cookieStore,
{ {
// for srcdoc origin is the origin of the page that the iframe is on. base and path get dropped // for srcdoc origin is the origin of the page that the iframe is on. base and path get dropped
origin: new URL(meta.origin.origin), origin: new URL(meta.origin.origin),
base: new URL(meta.origin.origin), base: new URL(meta.origin.origin),
}, },
true, true
), ),
// srcdoc // srcdoc
srcdoc: ["iframe"], srcdoc: ["iframe"],
},
{
fn: (value: string, meta: URLMeta) => rewriteCss(value, meta),
style: "*",
},
{
fn: (value: string) => {
if (["_parent", "_top", "_unfencedTop"].includes(value)) return "_self";
}, },
{ target: ["a", "base"],
fn: (value: string, meta: URLMeta) => rewriteCss(value, meta), },
style: "*", ];
},
{
fn: (value: string) => {
if (["_parent", "_top", "_unfencedTop"].includes(value)) return "_self";
},
target: ["a", "base"],
},
];
// i need to add the attributes in during rewriting // i need to add the attributes in during rewriting
function traverseParsedHtml( function traverseParsedHtml(
node: any, node: any,
cookieStore: CookieStore, cookieStore: CookieStore,
meta: URLMeta, meta: URLMeta
) { ) {
if (node.name === "base" && node.attribs.href !== undefined) { if (node.name === "base" && node.attribs.href !== undefined) {
meta.base = new URL(node.attribs.href, meta.origin); meta.base = new URL(node.attribs.href, meta.origin);
@ -213,12 +213,15 @@ function traverseParsedHtml(
if ( if (
node.name === "script" && node.name === "script" &&
/(application|text)\/javascript|module|importmap|undefined/.test( /(application|text)\/javascript|module|importmap|undefined/.test(
node.attribs.type, node.attribs.type
) && ) &&
node.children[0] !== undefined node.children[0] !== undefined
) { ) {
let js = node.children[0].data; let js = node.children[0].data;
node.attribs[`data-scramjet-script-source-src`] = btoa(js); // node.attribs[`data-scramjet-script-source-src`] = btoa(js);
node.attribs[`data-scramjet-script-source-src`] = bytesToBase64(
new TextEncoder().encode(js)
);
const htmlcomment = /<!--[\s\S]*?-->/g; const htmlcomment = /<!--[\s\S]*?-->/g;
js = js.replace(htmlcomment, ""); js = js.replace(htmlcomment, "");
node.children[0].data = rewriteJs(js, meta); node.children[0].data = rewriteJs(js, meta);
@ -245,7 +248,7 @@ function traverseParsedHtml(
node.childNodes[childNode] = traverseParsedHtml( node.childNodes[childNode] = traverseParsedHtml(
node.childNodes[childNode], node.childNodes[childNode],
cookieStore, cookieStore,
meta, meta
); );
} }
} }
@ -266,3 +269,15 @@ export function rewriteSrcset(srcset: string, meta: URLMeta) {
return rewrittenUrls.join(""); return rewrittenUrls.join("");
} }
function base64ToBytes(base64) {
const binString = atob(base64);
return Uint8Array.from(binString, (m) => m.codePointAt(0));
}
function bytesToBase64(bytes: Uint8Array) {
const binString = Array.from(bytes, (byte) =>
String.fromCodePoint(byte)
).join("");
return btoa(binString);
}