User:Ingenuity/MergeDuplicateRefs.js

From Wikipedia, the free encyclopedia
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
// <nowiki>

(function() {
	if ([0, 2, 118].includes(mw.config.get("wgNamespaceNumber"))) {
		mw.util.addPortletLink("p-cactions", "#", "Remove dup refs", "ca-duprefs", null, null, "#ca-duprefs"); 
		document.querySelector("#ca-duprefs").addEventListener("click", removeDups);
	}

	function getRefName(content) {
		let count = 1;
		while (content.includes(`auto${count}`)) {
			count++;
		}
		return `auto${count}`;
	}

	async function removeDups() {
		const treatAsSame = prompt("Should URLs with different query strings be treated as the same? (y/n)", "y") === "y";
		const api = new mw.Api();
		const data = await api.get({
			action: "query",
			prop: "revisions",
			rvprop: "content",
			titles: mw.config.get("wgPageName"),
			rvslots: "*"
		});

		const page = data.query.pages[Object.keys(data.query.pages)[0]];
		const content = page.revisions[0].slots.main["*"];
		let newContent = content;

		const fullMatches = [...content.matchAll(/<ref(?:[^>]+?)?>([^<]+?)<\/ref>/gmi)];
		const shortMatches = [...content.matchAll(/<ref(?:\s+)?name=\"?([^\">]+)\"?(?:\s+)?\/ ?>/gmi)];

		const fullRefDict = {}, shortRefDict = {};

		fullMatches.forEach((ref) => {
			const match = ref[1].match(/(https?:\/\/[^ }<>]+)/i);
			if (!match) {
				return;
			}
			const url = treatAsSame ? match[0].split("?")[0].split("#")[0] : match[0];

			if (!fullRefDict[url]) {
				fullRefDict[url] = [];
			}
			const name = ref[0].match(/<ref(?:[^<]+)?name="?([^<"]+)"?(?:[^<]+)?>/i);

			fullRefDict[url].push({
				text: ref[0],
				content: ref[1],
				name: name ? name[1] : null
			});
		});

		shortMatches.forEach((ref) => {
			if (!shortRefDict[ref[1]]) {
				shortRefDict[ref[1]] = [];
			}

			shortRefDict[ref[1]].push(ref[0]);
		});

		for (let url in fullRefDict) {
			if (fullRefDict[url].length === 1) {
				continue;
			}

			const item = fullRefDict[url];
			const firstRefName = item[0].name ? item[0].name : getRefName(newContent);
			newContent = newContent.replace(item[0].text, `<ref name="${firstRefName}">${item[0].content}</ref>`);

			for (let i = 1; i < item.length; i++) {
				newContent = newContent.replaceAll(item[i].text, `<ref name="${firstRefName}" />`);
				const ref = item[i];

				if (!ref.name || (ref.name && !shortRefDict[ref.name])) {
					continue;
				}

				for (const shortRef of shortRefDict[ref.name]) {
					newContent = newContent.replaceAll(shortRef, `<ref name="${firstRefName}" />`);
				}
			}
		}

		await api.postWithToken("csrf", {
			action: "edit",
			title: mw.config.get("wgPageName"),
			text: newContent,
			summary: "Merging duplicate references"
		});

		location.reload();
	}
})();

// </nowiki>