Skip to content

Commit

Permalink
Remove and flag client-side redirects in diffs (#639)
Browse files Browse the repository at this point in the history
When viewing a diff of a page that has client-side redirects, the experience can be pretty confusing. Because we are diffing the page's HTML, you never see the actual comparison. The comparison loads, but then immediately redirects somewhere else (often a live webpage instead of an archived copy). The page *looks* fine, but things aren't highlighted like you might expect.

This change finds client-side redirects using the `<meta>` tag (and could handle other types of redirects in the future). It then removes them and adds a banner to the top of the page describing the redirect. This way, a user sees what was actually compared/diffed, and also gets notified about the redirect and has the option to investigate it.

Partially covers #201.
  • Loading branch information
Mr0grog authored Nov 25, 2020
1 parent a3818c4 commit 9640874
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 4 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
"bundlewatch": "bundlewatch --config .bundlewatch.config.js",
"lint": "eslint --ignore-path .gitignore './**/*.{js,jsx}'",
"start": "node server/app.js",
"test": "jest --colors",
"test": "jest --colors --verbose",
"test-watch": "jest --watch"
},
"husky": {
Expand Down
7 changes: 6 additions & 1 deletion src/components/side-by-side-rendered-diff.jsx
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import React from 'react';
import {
removeStyleAndScript,
removeClientRedirect,
loadSubresourcesFromWayback,
compose,
addTargetBlank
Expand All @@ -26,7 +27,11 @@ import SandboxedHtml from './sandboxed-html';
*/
export default class SideBySideRenderedDiff extends React.Component {
render () {
const baseTransform = compose(this.props.removeFormatting && removeStyleAndScript, addTargetBlank);
const baseTransform = compose(
this.props.removeFormatting && removeStyleAndScript,
addTargetBlank,
removeClientRedirect
);
let transformA = baseTransform;
let transformB = baseTransform;
if (this.props.useWaybackResources) {
Expand Down
50 changes: 49 additions & 1 deletion src/scripts/__tests__/html-transforms.test.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
/* eslint-env jest */

import { removeStyleAndScript, addTargetBlank } from '../html-transforms';
import {
removeClientRedirect,
removeStyleAndScript,
addTargetBlank
} from '../html-transforms';


describe('HtmlTransforms module', () => {
Expand Down Expand Up @@ -61,6 +65,50 @@ describe('HtmlTransforms module', () => {
expect(document.querySelector('.wm-diff-other')).toBeInstanceOf(Element);
});

describe('removeClientRedirect', () => {

test('removes meta refresh elements', () => {
let document = parser.parseFromString(`<!doctype html>
<html>
<head>
<meta charset="utf-8">
<title>Test!</title>
<meta http-equiv="refresh" content="0; url=https://www.epa.gov/pi" />
<meta name="description" content="THIS IS A TEST" />
</head>
<body>
<h1 style="color: orange;">Hello</h1>
</body>
</html>
`, 'text/html');

document = removeClientRedirect(document);
const metas = document.querySelectorAll('meta');
expect(metas).toHaveLength(2);
expect(metas[0].getAttribute('charset')).toEqual('utf-8');
expect(metas[1]).toHaveProperty('name', 'description');
});

test('displays a banner when meta refresh elements are present', () => {
let document = parser.parseFromString(`<!doctype html>
<html>
<head>
<meta http-equiv="refresh" content="0; url=https://www.epa.gov/pi" />
</head>
<body>
<h1 style="color: orange;">Hello</h1>
</body>
</html>
`, 'text/html');

document = removeClientRedirect(document);
const banner = document.querySelector('.wm-redirect-banner');
expect(banner).toBeInstanceOf(HTMLElement);
expect(banner.innerHTML).toContain('redirect');
});

});

test('addTargetBlank adds a target attribute of "_blank" to only <a> tags', () => {
let document = parser.parseFromString(`<!doctype html>
<html>
Expand Down
58 changes: 57 additions & 1 deletion src/scripts/html-transforms.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ export function compose (...transforms) {
* the document. If any of them have a class or id that starts with 'wm-',
* it keeps them as an exception.
* Returns the resulting document.
* @param {HTMLDocument} The html document to change.
* @param {HTMLDocument} document The html document to change.
* @returns {HTMLDocument}
*/
export function removeStyleAndScript (document) {
Expand All @@ -48,6 +48,55 @@ export function removeStyleAndScript (document) {
return document;
}

/**
* Removes any client-side redirects we can find and places a notice on the
* page about the redirect.
* @param {HTMLDocument} document The HTML document to change.
* @returns {HTMLDocument}
*/
export function removeClientRedirect (document) {
const metaRefresh = document.querySelector('meta[http-equiv="refresh"]');
if (metaRefresh) {
const targetMatch = (metaRefresh.content || '').match(/^\s*\d+;\s*url=(.*)$/);
if (targetMatch) {
metaRefresh.remove();
const target = targetMatch[1];

const banner = document.createElement('div');
banner.className = 'wm-redirect-banner';
Object.assign(banner.style, {
background: '#fcf8e3',
border: '1px solid #ddd',
borderRadius: '5px',
margin: '1.5em 1em',
padding: '1em'
});
banner.innerHTML = `
<h1>Client-Side Redirect</h1>
<p>
This page contains an automatic redirect that runs
<em style="font-style: italic !important;">after</em> the page loads.
Because the redirect happens after the page loads instead of before,
we can’t show a highlighted comparison of page.
</p>
<p>
This page redirects to:
<a href="${target}">
<code style="font-family: monospace !important">
${escapeHtml(target)}
</code>
</a>
</p>
`;

document.body.prepend(banner);
}

}

return document;
}

/**
* Prevents navigation from within a diff by forcing links to open in a new
* tab when clicked. This helps ensure we don’t get in a state where one side
Expand Down Expand Up @@ -156,3 +205,10 @@ function resolveUrl (url, baseUrl) {
function twoDigit (number) {
return number.toString().padStart(2, '0');
}

function escapeHtml (text) {
return text
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;');
}

0 comments on commit 9640874

Please sign in to comment.