mirror of
https://github.com/MercuryWorkshop/scramjet.git
synced 2025-05-14 06:50:01 -04:00
long awaited html/element rewrite merge
This commit is contained in:
parent
6f51642afb
commit
8e191b45ab
8 changed files with 184 additions and 183 deletions
|
@ -1,6 +1,7 @@
|
|||
import { createDocumentProxy } from "./document";
|
||||
import { createGlobalProxy } from "./global";
|
||||
import { createLocationProxy } from "./location";
|
||||
import { CookieStore, decodeUrl } from "./shared";
|
||||
import { createDocumentProxy, createGlobalProxy } from "./window";
|
||||
|
||||
declare global {
|
||||
interface Window {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
import { ScramjetClient } from "../client";
|
||||
import { config, decodeUrl } from "../shared";
|
||||
import { config, decodeUrl, htmlRules } from "../shared";
|
||||
import {
|
||||
encodeUrl,
|
||||
rewriteCss,
|
||||
|
@ -48,16 +48,10 @@ export default function (client: ScramjetClient, self: typeof window) {
|
|||
return decodeUrl(descriptor.get.call(this));
|
||||
}
|
||||
|
||||
if (this.$origattrs[attr]) {
|
||||
return this.$origattrs[attr];
|
||||
}
|
||||
|
||||
return descriptor.get.call(this);
|
||||
},
|
||||
|
||||
set(value) {
|
||||
this.$origattrs[attr] = value;
|
||||
|
||||
if (["nonce", "integrity", "csp"].includes(attr)) {
|
||||
return;
|
||||
} else if (
|
||||
|
@ -65,7 +59,7 @@ export default function (client: ScramjetClient, self: typeof window) {
|
|||
) {
|
||||
value = encodeUrl(value);
|
||||
} else if (attr === "srcdoc") {
|
||||
value = rewriteHtml(value, client.cookieStore);
|
||||
value = rewriteHtml(value, client.cookieStore, undefined, true);
|
||||
} else if (["srcset", "imagesrcset"].includes(attr)) {
|
||||
value = rewriteSrcset(value);
|
||||
}
|
||||
|
@ -76,56 +70,35 @@ export default function (client: ScramjetClient, self: typeof window) {
|
|||
}
|
||||
}
|
||||
|
||||
self.Element.prototype.$origattrs = {};
|
||||
client.Proxy("Element.prototype.setAttribute", {
|
||||
apply(ctx) {
|
||||
const [name, value] = ctx.args;
|
||||
|
||||
self.Element.prototype.getAttribute = new Proxy(
|
||||
self.Element.prototype.getAttribute,
|
||||
{
|
||||
apply(target, thisArg, argArray) {
|
||||
if (
|
||||
attrs.includes(argArray[0]) &&
|
||||
thisArg.hasAttribute(`data-${argArray[0]}`)
|
||||
) {
|
||||
return thisArg.getAttribute(`data-${argArray[0]}`);
|
||||
}
|
||||
const rule = htmlRules.find((rule) => {
|
||||
let r = rule[name];
|
||||
if (!r) return false;
|
||||
if (r === "*") return true;
|
||||
if (typeof r === "function") return false; // this can't happen but ts
|
||||
|
||||
if (attrs.includes(argArray[0]) && thisArg.$origattrs[argArray[0]]) {
|
||||
return thisArg.$origattrs[argArray[0]];
|
||||
}
|
||||
return r.includes(ctx.this.tagName.toLowerCase());
|
||||
});
|
||||
|
||||
return Reflect.apply(target, thisArg, argArray);
|
||||
},
|
||||
}
|
||||
);
|
||||
if (rule) {
|
||||
ctx.args[1] = rule.fn(value, client.url, client.cookieStore);
|
||||
ctx.fn.call(ctx.this, `data-scramjet-${ctx.args[0]}`, value);
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
self.Element.prototype.setAttribute = new Proxy(
|
||||
self.Element.prototype.setAttribute,
|
||||
{
|
||||
apply(target, thisArg, argArray) {
|
||||
if (attrs.includes(argArray[0])) {
|
||||
thisArg.$origattrs[argArray[0]] = argArray[1];
|
||||
if (["nonce", "integrity", "csp"].includes(argArray[0])) {
|
||||
return;
|
||||
} else if (
|
||||
["src", "data", "href", "action", "formaction"].includes(
|
||||
argArray[0]
|
||||
)
|
||||
) {
|
||||
argArray[1] = encodeUrl(argArray[1]);
|
||||
} else if (argArray[0] === "srcdoc") {
|
||||
// TODO: this will rewrite with the wrong url in mind for iframes!!
|
||||
argArray[1] = rewriteHtml(argArray[1], client.cookieStore);
|
||||
} else if (["srcset", "imagesrcset"].includes(argArray[0])) {
|
||||
argArray[1] = rewriteSrcset(argArray[1]);
|
||||
} else if (argArray[1] === "style") {
|
||||
argArray[1] = rewriteCss(argArray[1]);
|
||||
}
|
||||
}
|
||||
client.Proxy("Element.prototype.getAttribute", {
|
||||
apply(ctx) {
|
||||
const [name] = ctx.args;
|
||||
|
||||
return Reflect.apply(target, thisArg, argArray);
|
||||
},
|
||||
}
|
||||
);
|
||||
if (ctx.fn.call(ctx.this, `data-scramjet-${name}`)) {
|
||||
ctx.return(ctx.fn.call(ctx.this, `data-scramjet-${name}`));
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
const innerHTML = Object.getOwnPropertyDescriptor(
|
||||
self.Element.prototype,
|
||||
|
|
|
@ -4,7 +4,6 @@ import { decodeUrl, rewriteCss } from "../shared";
|
|||
export default function (client: ScramjetClient, self: typeof window) {
|
||||
client.Proxy("FontFace", {
|
||||
construct(ctx) {
|
||||
dbg.log("FontFace", ctx.args);
|
||||
ctx.args[1] = rewriteCss(ctx.args[1]);
|
||||
},
|
||||
});
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
export const {
|
||||
util: { isScramjetFile, BareClient },
|
||||
util: { BareClient },
|
||||
url: { encodeUrl, decodeUrl },
|
||||
rewrite: {
|
||||
rewriteCss,
|
||||
|
@ -8,6 +8,7 @@ export const {
|
|||
rewriteJs,
|
||||
rewriteHeaders,
|
||||
rewriteWorkers,
|
||||
htmlRules,
|
||||
},
|
||||
CookieStore,
|
||||
} = self.$scramjet.shared;
|
||||
|
|
|
@ -4,15 +4,14 @@ import { rewriteHtml, rewriteSrcset } from "./rewriters/html";
|
|||
import { rewriteJs } from "./rewriters/js";
|
||||
import { rewriteHeaders } from "./rewriters/headers";
|
||||
import { rewriteWorkers } from "./rewriters/worker";
|
||||
import { isScramjetFile } from "./rewriters/html";
|
||||
import { BareClient } from "@mercuryworkshop/bare-mux";
|
||||
import { parseDomain } from "parse-domain";
|
||||
import { ScramjetHeaders } from "./headers";
|
||||
import { CookieStore } from "./cookie";
|
||||
import { htmlRules } from "./rewriters/html";
|
||||
|
||||
self.$scramjet.shared = {
|
||||
util: {
|
||||
isScramjetFile,
|
||||
parseDomain,
|
||||
BareClient,
|
||||
ScramjetHeaders,
|
||||
|
@ -28,6 +27,7 @@ self.$scramjet.shared = {
|
|||
rewriteJs,
|
||||
rewriteHeaders,
|
||||
rewriteWorkers,
|
||||
htmlRules,
|
||||
},
|
||||
CookieStore,
|
||||
};
|
||||
|
|
|
@ -1,116 +1,178 @@
|
|||
import { ElementType, Parser } from "htmlparser2";
|
||||
import { DomHandler, Element, Text } from "domhandler";
|
||||
import { hasAttrib } from "domutils";
|
||||
import { ChildNode, DomHandler, Element, Node, Text } from "domhandler";
|
||||
import render from "dom-serializer";
|
||||
import { encodeUrl } from "./url";
|
||||
import { rewriteCss } from "./css";
|
||||
import { rewriteJs } from "./js";
|
||||
import { CookieStore } from "../cookie";
|
||||
|
||||
export function isScramjetFile(src: string) {
|
||||
let bool = false;
|
||||
["wasm", "codecs", "client", "shared", "worker"].forEach((file) => {
|
||||
if (src === self.$scramjet.config[file]) bool = true;
|
||||
});
|
||||
|
||||
return bool;
|
||||
}
|
||||
|
||||
export function rewriteHtml(
|
||||
html: string,
|
||||
cookieStore: CookieStore,
|
||||
origin?: URL
|
||||
origin?: URL,
|
||||
fromTop: boolean = false
|
||||
) {
|
||||
const handler = new DomHandler((err, dom) => dom);
|
||||
const parser = new Parser(handler);
|
||||
|
||||
parser.write(html);
|
||||
parser.end();
|
||||
traverseParsedHtml(handler.root, cookieStore, origin);
|
||||
|
||||
return render(traverseParsedHtml(handler.root, cookieStore, origin));
|
||||
function findhead(node) {
|
||||
if (node.type === ElementType.Tag && node.name === "head") {
|
||||
return node as Element;
|
||||
} else if (node.childNodes) {
|
||||
for (const child of node.childNodes) {
|
||||
const head = findhead(child);
|
||||
if (head) return head;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
if (fromTop) {
|
||||
let head = findhead(handler.root);
|
||||
if (!head) {
|
||||
head = new Element("head", {}, []);
|
||||
handler.root.children.unshift(head);
|
||||
}
|
||||
|
||||
const dump = JSON.stringify(cookieStore.dump());
|
||||
const injected = `
|
||||
self.COOKIE = ${dump};
|
||||
self.$scramjet.config = ${JSON.stringify(self.$scramjet.config)};
|
||||
self.$scramjet.codec = self.$scramjet.codecs[self.$scramjet.config.codec];
|
||||
if ("document" in self && document.currentScript) {
|
||||
document.currentScript.remove();
|
||||
}
|
||||
`;
|
||||
|
||||
head.children.unshift(
|
||||
new Element("script", {
|
||||
src: self.$scramjet.config["wasm"],
|
||||
}),
|
||||
new Element("script", {
|
||||
src: self.$scramjet.config["codecs"],
|
||||
}),
|
||||
new Element("script", {
|
||||
src: "data:application/javascript;base64," + btoa(injected),
|
||||
}),
|
||||
new Element("script", {
|
||||
src: self.$scramjet.config["shared"],
|
||||
}),
|
||||
new Element("script", {
|
||||
src: self.$scramjet.config["client"],
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
return render(handler.root);
|
||||
}
|
||||
|
||||
export const htmlRules: {
|
||||
[key: string]: "*" | string[] | Function;
|
||||
fn: (
|
||||
value: string,
|
||||
origin: URL | null,
|
||||
cookieStore: CookieStore
|
||||
) => string | null;
|
||||
}[] = [
|
||||
{
|
||||
fn: (value: string, origin: URL) => {
|
||||
if (["_parent", "_top", "_unfencedTop"].includes(value)) return "_self";
|
||||
|
||||
console.log(value, origin);
|
||||
return encodeUrl(value, origin);
|
||||
},
|
||||
|
||||
// url rewrites
|
||||
src: [
|
||||
"embed",
|
||||
"script",
|
||||
"img",
|
||||
"iframe",
|
||||
"source",
|
||||
"video",
|
||||
"audio",
|
||||
"input",
|
||||
"track",
|
||||
],
|
||||
href: ["a", "link", "base", "area"],
|
||||
data: ["object"],
|
||||
action: ["form"],
|
||||
formaction: ["button", "input", "textarea", "submit"],
|
||||
poster: ["video"],
|
||||
},
|
||||
{
|
||||
fn: () => null,
|
||||
|
||||
// csp stuff that must be deleted
|
||||
nonce: "*",
|
||||
integrity: ["script", "link"],
|
||||
csp: ["iframe"],
|
||||
},
|
||||
{
|
||||
fn: (value: string, origin?: URL) => rewriteSrcset(value, origin),
|
||||
|
||||
// srcset
|
||||
srcset: ["img", "source"],
|
||||
imagesrcset: ["link"],
|
||||
},
|
||||
{
|
||||
fn: (value: string, origin: URL, cookieStore: CookieStore) =>
|
||||
rewriteHtml(value, cookieStore, origin, true),
|
||||
|
||||
// srcdoc
|
||||
srcdoc: ["iframe"],
|
||||
},
|
||||
{
|
||||
fn: (value: string, origin?: URL) => rewriteCss(value, origin),
|
||||
style: "*",
|
||||
},
|
||||
];
|
||||
|
||||
// i need to add the attributes in during rewriting
|
||||
|
||||
function traverseParsedHtml(node, cookieStore: CookieStore, origin?: URL) {
|
||||
/* csp attributes */
|
||||
for (const cspAttr of ["nonce", "integrity", "csp"]) {
|
||||
if (hasAttrib(node, cspAttr)) {
|
||||
node.attribs[`data-${cspAttr}`] = node.attribs[cspAttr];
|
||||
delete node.attribs[cspAttr];
|
||||
function traverseParsedHtml(node: any, cookieStore: CookieStore, origin?: URL) {
|
||||
if (node.attribs)
|
||||
for (const rule of htmlRules) {
|
||||
for (const attr in rule) {
|
||||
const sel = rule[attr];
|
||||
if (typeof sel === "function") continue;
|
||||
|
||||
if (sel === "*" || sel.includes(node.name)) {
|
||||
if (node.attribs[attr] !== undefined) {
|
||||
const value = node.attribs[attr];
|
||||
let v = rule.fn(value, origin, cookieStore);
|
||||
|
||||
if (v === null) delete node.attribs[attr];
|
||||
else {
|
||||
node.attribs[attr] = v;
|
||||
node.attribs[`data-scramjet-${attr}`] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* url attributes */
|
||||
for (const urlAttr of ["src", "href", "action", "formaction", "poster"]) {
|
||||
if (
|
||||
hasAttrib(node, urlAttr) &&
|
||||
!isScramjetFile(node.attribs[urlAttr]) &&
|
||||
[
|
||||
"iframe",
|
||||
"embed",
|
||||
"script",
|
||||
"a",
|
||||
"img",
|
||||
"link",
|
||||
"object",
|
||||
"form",
|
||||
"media",
|
||||
"source",
|
||||
"video",
|
||||
].includes(node.name)
|
||||
) {
|
||||
if (["_parent", "_top", "_unfencedTop"].includes(node.attribs["target"]))
|
||||
node.attribs["target"] = "_self";
|
||||
const value = node.attribs[urlAttr];
|
||||
node.attribs[`data-${urlAttr}`] = value;
|
||||
node.attribs[urlAttr] = encodeUrl(value, origin);
|
||||
}
|
||||
}
|
||||
|
||||
/* other */
|
||||
for (const srcsetAttr of ["srcset", "imagesrcset"]) {
|
||||
if (hasAttrib(node, srcsetAttr)) {
|
||||
const value = node.attribs[srcsetAttr];
|
||||
node.attribs[`data-${srcsetAttr}`] = value;
|
||||
node.attribs[srcsetAttr] = rewriteSrcset(value, origin);
|
||||
}
|
||||
}
|
||||
|
||||
if (node.name === "meta" && hasAttrib(node, "http-equiv")) {
|
||||
const content = node.attribs.content;
|
||||
|
||||
const regex =
|
||||
/(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s]{2,}|www\.[a-zA-Z0-9]+\.[^\s]{2,})/;
|
||||
|
||||
if (regex.test(content)) {
|
||||
const url = content.match(regex)[0];
|
||||
|
||||
node.attribs.content = content.replace(url, encodeUrl(url, origin));
|
||||
}
|
||||
}
|
||||
|
||||
if (hasAttrib(node, "srcdoc"))
|
||||
node.attribs.srcdoc = rewriteHtml(node.attribs.srcdoc, cookieStore, origin);
|
||||
if (hasAttrib(node, "style"))
|
||||
node.attribs.style = rewriteCss(node.attribs.style, origin);
|
||||
|
||||
if (node.name === "style" && node.children[0] !== undefined)
|
||||
node.children[0].data = rewriteCss(node.children[0].data, origin);
|
||||
|
||||
if (
|
||||
node.name === "script" &&
|
||||
/(application|text)\/javascript|module|importmap|undefined/.test(
|
||||
node.attribs.type
|
||||
) &&
|
||||
node.children[0] !== undefined &&
|
||||
!node.attribs["data-scramjet"]
|
||||
node.children[0] !== undefined
|
||||
) {
|
||||
let js = node.children[0].data;
|
||||
const htmlcomment = /<!--[\s\S]*?-->/g;
|
||||
js = js.replace(htmlcomment, "");
|
||||
node.children[0].data = rewriteJs(js, origin);
|
||||
}
|
||||
if (node.name === "meta" && hasAttrib(node, "http-equiv")) {
|
||||
|
||||
if (node.name === "meta" && node.attribs["http-equiv"] != undefined) {
|
||||
if (node.attribs["http-equiv"] === "content-security-policy") {
|
||||
node = {};
|
||||
} else if (
|
||||
|
@ -124,50 +186,6 @@ function traverseParsedHtml(node, cookieStore: CookieStore, origin?: URL) {
|
|||
}
|
||||
}
|
||||
|
||||
if (node.name === "head") {
|
||||
const scripts = [];
|
||||
|
||||
const dump = JSON.stringify(cookieStore.dump());
|
||||
|
||||
scripts.push(
|
||||
new Element("script", {
|
||||
src: self.$scramjet.config["wasm"],
|
||||
"data-scramjet": "true",
|
||||
})
|
||||
);
|
||||
const codecs = new Element("script", {
|
||||
src: self.$scramjet.config["codecs"],
|
||||
"data-scramjet": "true",
|
||||
});
|
||||
const config = new Element("script", {
|
||||
src:
|
||||
"data:application/javascript;base64," +
|
||||
btoa(
|
||||
`
|
||||
self.COOKIE = ${dump};
|
||||
self.$scramjet.config = ${JSON.stringify(self.$scramjet.config)};
|
||||
self.$scramjet.codec = self.$scramjet.codecs[self.$scramjet.config.codec];
|
||||
if ("document" in self && document.currentScript) {
|
||||
document.currentScript.remove();
|
||||
}
|
||||
`
|
||||
),
|
||||
"data-scramjet": "true",
|
||||
});
|
||||
const shared = new Element("script", {
|
||||
src: self.$scramjet.config["shared"],
|
||||
"data-scramjet": "true",
|
||||
});
|
||||
const client = new Element("script", {
|
||||
src: self.$scramjet.config["client"],
|
||||
"data-scramjet": "true",
|
||||
});
|
||||
|
||||
scripts.push(codecs, config, shared, client);
|
||||
|
||||
node.children.unshift(...scripts);
|
||||
}
|
||||
|
||||
if (node.childNodes) {
|
||||
for (const childNode in node.childNodes) {
|
||||
node.childNodes[childNode] = traverseParsedHtml(
|
||||
|
|
10
src/types.d.ts
vendored
10
src/types.d.ts
vendored
|
@ -1,17 +1,20 @@
|
|||
import { ScramjetController } from "./bootsrapper/index";
|
||||
import { encodeUrl, decodeUrl } from "./shared/rewriters/url";
|
||||
import { rewriteCss } from "./shared/rewriters/css";
|
||||
import { rewriteHtml, rewriteSrcset } from "./shared/rewriters/html";
|
||||
import { htmlRules, rewriteHtml, rewriteSrcset } from "./shared/rewriters/html";
|
||||
import { rewriteJs } from "./shared/rewriters/js";
|
||||
import { rewriteHeaders } from "./shared/rewriters/headers";
|
||||
import { rewriteWorkers } from "./shared/rewriters/worker";
|
||||
import { isScramjetFile } from "./shared/rewriters/html";
|
||||
import type { Codec } from "./codecs";
|
||||
import { BareClient } from "@mercuryworkshop/bare-mux";
|
||||
import { parseDomain } from "parse-domain";
|
||||
import { ScramjetHeaders } from "./shared/headers";
|
||||
import { CookieStore } from "./shared/cookie";
|
||||
|
||||
type ScramjetFlags = {
|
||||
serviceworkers: boolean;
|
||||
};
|
||||
|
||||
interface ScramjetConfig {
|
||||
prefix: string;
|
||||
codec: string;
|
||||
|
@ -26,6 +29,7 @@ interface ScramjetConfig {
|
|||
thread: string;
|
||||
client: string;
|
||||
codecs: string;
|
||||
flags: ScramjetFlags;
|
||||
}
|
||||
|
||||
declare global {
|
||||
|
@ -43,11 +47,11 @@ declare global {
|
|||
rewriteJs: typeof rewriteJs;
|
||||
rewriteHeaders: typeof rewriteHeaders;
|
||||
rewriteWorkers: typeof rewriteWorkers;
|
||||
htmlRules: typeof htmlRules;
|
||||
};
|
||||
util: {
|
||||
BareClient: typeof BareClient;
|
||||
ScramjetHeaders: typeof ScramjetHeaders;
|
||||
isScramjetFile: typeof isScramjetFile;
|
||||
parseDomain: typeof parseDomain;
|
||||
};
|
||||
CookieStore: typeof CookieStore;
|
||||
|
|
|
@ -165,7 +165,12 @@ async function handleResponse(
|
|||
case "iframe":
|
||||
case "document":
|
||||
if (responseHeaders["content-type"]?.startsWith("text/html")) {
|
||||
responseBody = rewriteHtml(await response.text(), cookieStore, url);
|
||||
responseBody = rewriteHtml(
|
||||
await response.text(),
|
||||
cookieStore,
|
||||
url,
|
||||
true
|
||||
);
|
||||
} else {
|
||||
responseBody = response.body;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue