User:Phlsph7/Readability.js

From Wikipedia, the free encyclopedia
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
/* Userscript to highlight sentences by readability */

// Set score colors for css if they have not been defined
var readabilityScoreColors = readabilityScoreColors || [
	'rgb(255,96,96)', 
	'rgb(255,128,128)', 
	'rgb(255,192,128)', 
	'rgb(255,224,128)', 
	'rgb(255,255,128)', 
	'rgb(214,255,128)', 
	'rgb(171,255,128)', 
	'rgb(128,255,128)', 
	'rgb(128,255,171)', 
	'rgb(128,255,214)',];


// Goes through all the p-elements and splits their content into span-elements. Each span corresponds to a sentence.
function divideParagraphsIntoSentences(){
	let paragraphs = document.querySelectorAll('.mw-parser-output > p');
	
	// Periods are the main guide for where sentences start and end.
	// However, not all periods mark sentences, like in different forms of abbreviations.
	// Placeholders are used for exceptions.
	let periodPlaceholder = 'PERIOD_PLACEHOLDER';
	let exceptionString = '...; Mr.; Mrs.; Dr.; Jr.; Sr.; Prof.; St.; Ave.; Corp.; Inc.; Ltd.; Co.; Gov.; Capt.; Sgt.; et al.; vs.; e.t.a.; .A.; .B.; .C.; .D.; .E.; .F.; .G.; .H.; .I.; .J.; .K.; .L.; .M.; .N.; .O.; .P.; .Q.; .R.; .S.; .T.; .U.; .V.; .W.; .X.; .Y.; .Z.;  A.;  B.;  C.;  D.;  E.;  F.;  G.;  H.;  I.;  J.;  K.;  L.;  M.;  N.;  O.;  P.;  Q.;  R.;  S.;  T.;  U.;  V.;  W.;  X.;  Y.;  Z.; .a.; .b.; .c.; .d.; .e.; .f.; .g.; .h.; .i.; .j.; .k.; .l.; .m.; .n.; .o.; .p.; .q.; .r.; .s.; .t.; .u.; .v.; .w.; .x.; .y.; .z.; .a; .b; .c; .d; .e; .f; .g; .h; .i; .j; .k; .l; .m; .n; .o; .p; .q; .r; .s; .t; .u; .v; .w; .x; .y; .z; 0.0; 0.1; 0.2; 0.3; 0.4; 0.5; 0.6; 0.7; 0.8; 0.9; 1.0; 1.1; 1.2; 1.3; 1.4; 1.5; 1.6; 1.7; 1.8; 1.9; 2.0; 2.1; 2.2; 2.3; 2.4; 2.5; 2.6; 2.7; 2.8; 2.9; 3.0; 3.1; 3.2; 3.3; 3.4; 3.5; 3.6; 3.7; 3.8; 3.9; 4.0; 4.1; 4.2; 4.3; 4.4; 4.5; 4.6; 4.7; 4.8; 4.9; 5.0; 5.1; 5.2; 5.3; 5.4; 5.5; 5.6; 5.7; 5.8; 5.9; 6.0; 6.1; 6.2; 6.3; 6.4; 6.5; 6.6; 6.7; 6.8; 6.9; 7.0; 7.1; 7.2; 7.3; 7.4; 7.5; 7.6; 7.7; 7.8; 7.9; 8.0; 8.1; 8.2; 8.3; 8.4; 8.5; 8.6; 8.7; 8.8; 8.9; 9.0; 9.1; 9.2; 9.3; 9.4; 9.5; 9.6; 9.7; 9.8; 9.9. .0; .1; .2; .3; .4; .5; .6; .7; .8; .9;';
	let exceptionStringSeparator = '; ';
	let periodExceptions = exceptionString.split(exceptionStringSeparator);
	let periodExceptionPlaceholders = exceptionString.split('.').join(periodPlaceholder).split(exceptionStringSeparator);
	
	for(let paragraph of paragraphs){
		let textContent = paragraph.textContent.split('\r').join('').split('\n').join('').trim();
		
		// exclude very short paragraphs
		if(textContent.length > 20){
			divideIntoSentences(paragraph, periodExceptions, periodExceptionPlaceholders);
		}
	}

	// remove very short sentences
	let sentenceElements = document.getElementsByClassName('sentence');
	for(let sentenceElement of sentenceElements){
		let sentenceText = getSentenceText(sentenceElement);
		if(sentenceText.trim().length < 10){
			sentenceElement.classList.remove('sentence');
		}
	}

	// Split the content of a p-element into span-elements. Each span corresponds to a sentence.
	function divideIntoSentences(paragraph, periodExceptions, periodExceptionPlaceholders){
		// Loop through all the nodes inside the p-element.
		// Span-open-tags and close-tags are placed through code.
		let innerHTML = getSpanStartTag();
		let currentChild = paragraph.firstChild;
		while(currentChild){
			// if it is a text node, modify it
			if(currentChild.nodeType === Node.TEXT_NODE){
				innerHTML += adjustTextNodes(currentChild.nodeValue, periodExceptions, periodExceptionPlaceholders);
			}
			
			// if its an element, add outerHTML
			else if(currentChild.nodeType === Node.ELEMENT_NODE){
				innerHTML += currentChild.outerHTML;
			}
			
			// otherwise add nodeValue
			else{
				innerHTML += currentChild.nodeValue;
			}
			
			currentChild = currentChild.nextSibling;
		}

		innerHTML += '</span>';
		
		paragraph.innerHTML = innerHTML;

		// utility function to get the code for the opening span tag
		function getSpanStartTag(){
			return `<span class="sentence">`;
		}
		
		// utility function to get the code for span tags in the middle (closing + opening)
		function getSpanEndAndStart(punctuation){
			return punctuation + '</span>' + getSpanStartTag();
		}
		
		// utility function to modify text nodes
		// they contain the punctuation relevant for sentences
		function adjustTextNodes(text, periodExceptions, periodExceptionPlaceholders){
			// use placeholders to remove all periods that do not mark sentences
			text = insertPlaceholders(text, periodExceptions, periodExceptionPlaceholders);
			
			// split using the remaining punctuation
			text = text.split('.').join(getSpanEndAndStart('.'))
					.split('!').join(getSpanEndAndStart('!'))
					.split('?').join(getSpanEndAndStart('?'));
					
			
			// use placeholders to return all periods that do not mark sentences
			text = removePlaceholders(text, periodExceptions, periodExceptionPlaceholders);
			
			return text;
			
			function insertPlaceholders(text, periodExceptions, periodExceptionPlaceholders){
				let modifiedText = text;
				for(let i = 0; i < periodExceptions.length; i++){
					modifiedText = modifiedText.split(periodExceptions[i]).join(periodExceptionPlaceholders[i]);
				}
				return modifiedText;
			}
			
			function removePlaceholders(text, periodExceptions, periodExceptionPlaceholders){
				let modifiedText = text;
				for(let i = 0; i < periodExceptions.length; i++){
					modifiedText = modifiedText.split(periodExceptionPlaceholders[i]).join(periodExceptions[i]);
				}
				return modifiedText;
			}
		}
	}
}

// Function to rate the readability of sentences and give them their class accordingly.
function rateSentences(){
	
	// class names for different scores
	const scoreClasses = [
		'score-10-0', 
		'score-20-10', 
		'score-30-20', 
		'score-40-30', 
		'score-50-40', 
		'score-60-50', 
		'score-70-60', 
		'score-80-70', 
		'score-90-80', 
		'score-100-90'];
		
	// Loop through all sentences, add their score class and their title attribute.
	let sentenceElements = document.body.getElementsByClassName('sentence');
	for(let sentenceElement of sentenceElements){
		let sentenceText = getSentenceText(sentenceElement);
		let score = getSentenceScore(sentenceText);
		if(!isNaN(score)){
			sentenceElement.title = `Score: ${score.toFixed(2)}`;
			sentenceElement.dataset.sentenceText = sentenceText;
			sentenceElement.dataset.score = score;
			sentenceElement.classList.add(getScoreClass(score, scoreClasses));
		}
		else{
			sentenceElement.classList.remove('sentence');
		}
	}

	// Add the style sheet to color the score classes.
	addScoreStyleSheet(scoreClasses, readabilityScoreColors);

	function getScoreClass(score, scoreClasses){
		let index = Math.floor(score / 10);
		if(index < 0){
			index = 0;
		}
		if(index > 9){
			index = 9;
		}
		return scoreClasses[index];
	}

	function addScoreStyleSheet(scoreClasses, readabilityScoreColors){
		const style = document.createElement('style');
		for(let i = 0; i < scoreClasses.length; i++){
			style.innerHTML += `.${scoreClasses[i]} {background-color: ${readabilityScoreColors[i]}; } `;
		}
		document.head.appendChild(style);
	}
}

// Creates an overview at the top of the page
// This overview shows the readability of the whole article and other information
function createOverview(){
	// Readability depends on the number of syllables, words, and sentences
	let totalSyllableCount = 0;
	let totalPolySyllableCount = 0;
	let totalWordCount = 0;
	const sentenceElements = document.getElementsByClassName('sentence');
	let totalSentenceCount = sentenceElements.length;
	for(let sentenceElement of sentenceElements){
		let sentenceText = getSentenceText(sentenceElement);
		let words = getWords(sentenceText);
		totalWordCount += words.length;
		for(let word of words){
			let syllableCount = getSyllableCount(word);
			totalSyllableCount += syllableCount;
			if(syllableCount >= 3){
				totalPolySyllableCount++;
			}
		}
	}
	
	let totalReadability = getFleschKincaidReadability(totalSyllableCount, totalWordCount, totalSentenceCount);
	let totalGradeLevel = getFleschKincaidGradeLevel(totalSyllableCount, totalWordCount, totalSentenceCount);
	let totalSmogGradeLevel = getSmogGradeLevel(totalPolySyllableCount, totalSentenceCount);
	
	const mainDiv = document.getElementById('mw-content-text');
	const overviewDiv = document.createElement('div');
	mainDiv.insertBefore(overviewDiv, mainDiv.firstChild);
	const headline = document.createElement('h2');
	overviewDiv.appendChild(headline);
	headline.innerHTML = 'Readability overview';
	const tableDiv = document.createElement('div');
	overviewDiv.appendChild(tableDiv);
	tableDiv.style.display = 'flex';
	insertOverviewTable(tableDiv);
	insertKeyTable(tableDiv);
	//overviewDiv.appendChild(document.createElement('br'));
	insertSentenceTable(overviewDiv);
	
	function insertOverviewTable(parent){
		const overviewTable = document.createElement('table');
		parent.appendChild(overviewTable);
		overviewTable.classList.add('wikitable');
		overviewTable.style.marginRight = '20px';
		
		/*const overviewCaption = document.createElement('caption');
		overviewTable.appendChild(overviewCaption);
		overviewCaption.innerHTML = 'Readability overview';*/
		
		const overviewTableBody = document.createElement('tbody');
		overviewTable.appendChild(overviewTableBody);
		addRow(overviewTableBody, 'Readability (<a href="https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests">Flesch</a>)', totalReadability.toFixed(2));
		addRow(overviewTableBody, 'Grade level (<a href="https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests">Flesch</a>)', totalGradeLevel.toFixed(2));
		addRow(overviewTableBody, 'Grade level (<a href="https://en.wikipedia.org/wiki/SMOG">SMOG</a>)', totalSmogGradeLevel.toFixed(2));
		addRow(overviewTableBody, 'Sentences', totalSentenceCount);
		addRow(overviewTableBody, 'Words', totalWordCount);
		addRow(overviewTableBody, 'Syllables', totalSyllableCount);
	}
	
	// Key for the coloring
	function insertKeyTable(parent){
		const keyTable = document.createElement('table');
		parent.appendChild(keyTable);
		keyTable.classList.add('wikitable');
		keyTable.innerHTML = `<thead>
		<tr>
			<th>Score</th>
			<th>School level</th>
		</tr>
	</thead>
	<tbody>
		<tr class="score-100-90">
			<td>100–90</td>
			<td>5th grade</td>
		</tr>
		<tr class="score-90-80">
			<td>90–80</td>
			<td>6th grade</td>
		</tr>
		<tr class="score-80-70">
			<td>80–70</td>
			<td>7th grade</td>
		</tr>
		<tr class="score-70-60">
			<td>70–60</td>
			<td>8th & 9th grade</td>
		</tr>
		<tr class="score-60-50">
			<td>60–50</td>
			<td>10th to 12th grade</td>
		</tr>
		<tr class="score-50-40">
			<td>50–40</td>
			<td>College</td>
		</tr>
		<tr class="score-40-30">
			<td>40–30</td>
			<td>College</td>
		</tr>
		<tr class="score-30-20">
			<td>30–20</td>
			<td>College graduate</td>
		</tr>
		<tr class="score-20-10">
			<td>20–10</td>
			<td>College graduate</td>
		</tr>
		<tr class="score-10-0">
			<td>10–0</td>
			<td>College graduate</td>
		</tr>
	</tbody>`;
	}
	
	
	// sentence table to display all sentences ordered by lowest score
	function insertSentenceTable(parent){
		// button to show/hide the table
		const sentenceTableButton = document.createElement('button');
		parent.append(sentenceTableButton);
		sentenceTableButton.innerHTML = 'Show sentences ordered by lowest score';
		sentenceTableButton.style.fontSize = "24px";
		
		// the table itself
		const sentenceTable = document.createElement('table');
		parent.appendChild(sentenceTable);
		sentenceTable.classList.add('wikitable');
		sentenceTable.style.display = 'none';
		
		const sentenceCaption = document.createElement('caption');
		sentenceTable.appendChild(sentenceCaption);
		sentenceCaption.innerHTML = 'Sentences ordered by lowest score';
		
		const sentenceTableBody = document.createElement('tbody');
		sentenceTable.appendChild(sentenceTableBody);

		// matrix to store the table values
		const sentenceMatrix = [];
		for(let sentenceElement of sentenceElements){
			let sentenceText = sentenceElement.dataset.sentenceText;
			let score = parseFloat(sentenceElement.dataset.score);
			sentenceMatrix.push([sentenceText, score]);
		}
		
		// sort by lowest score
		sentenceMatrix.sort(function(a, b){
			return a[1] - b[1];
		});
		
		// loop through the matrix and add one row per index
		for(let i = 0; i < sentenceMatrix.length; i++){
			let sentenceText = sentenceMatrix[i][0];
			let score = sentenceMatrix[i][1];
			addRow(sentenceTableBody, sentenceText, score.toFixed(2));
		}
		
		// show/hide function of the button
		sentenceTableButton.onclick = function(){
			if(sentenceTableButton.innerHTML.includes('Show')){
				sentenceTable.style.display = '';
				sentenceTableButton.innerHTML = sentenceTableButton.innerHTML.split('Show').join('Hide');
			}
			else{
				sentenceTable.style.display = 'none';
				sentenceTableButton.innerHTML = sentenceTableButton.innerHTML.split('Hide').join('Show');
			}
		};
	}
	
	// utility function to add rows to a table
	function addRow(tableBody, name, value){
		let row = document.createElement('tr');
		
		let nameCell = document.createElement('td');
		nameCell.innerHTML = name;
		row.appendChild(nameCell);
		
		let valueCell = document.createElement('td');
		valueCell.innerHTML = value;
		row.appendChild(valueCell);
		
		tableBody.appendChild(row);
	}
}

// Utility function to extract the text from a sentence element
function getSentenceText(sentenceElement){
	// hide references and certain templates
	let refs = sentenceElement.querySelectorAll('.reference, .Inline-Template');
	for(let ref of refs){
		ref.style.display = 'none';
	}
	
	// the innerText attribute ignores hidden elements
	let sentenceText = sentenceElement.innerText;
	
	// show them again
	for(let ref of refs){
		ref.style.display = '';
	}
	
	// formatting
	if(sentenceText[0] == '"'){
		sentenceText = sentenceText.substring(1);
	}
	sentenceText = sentenceText.trim();
	
	return sentenceText;
}

// utility function to get the readability score of a sentence
function getSentenceScore(sentenceText){
	let syllableCount = 0;
	
	let words = getWords(sentenceText);
	for(let word of words){
		syllableCount += getSyllableCount(word);
	}
	
	let wordCount = words.length;
	let score = getFleschKincaidReadability(syllableCount, wordCount, 1);
	
	return score;
}

// utility function: this is the main metric
function getFleschKincaidReadability(syllableCount, wordCount, sentenceCount){
	let wordsPerSentence = wordCount / sentenceCount;
	let syllablesPerWord = syllableCount / wordCount;
	let score = 206.835 - (1.015 * wordsPerSentence) - (84.6 * syllablesPerWord);
	return score;
}

// utility function: this shows the grade level and is used for the overview
function getFleschKincaidGradeLevel(syllableCount, wordCount, sentenceCount){
	let wordsPerSentence = wordCount / sentenceCount;
	let syllablesPerWord = syllableCount / wordCount;
	let score = (0.39 * wordsPerSentence) + (11.8 * syllablesPerWord) - 15.59;
	return score;
}

// utility function: get SMOG readability for the overview
function getSmogGradeLevel(polySyllableCount, sentenceCount){
	let smogGradeLevel = 1.0430 * Math.sqrt(polySyllableCount * 30 / sentenceCount) + 3.1291;
	return smogGradeLevel;
}

// utility function to extract words from a sentence
function getWords(sentenceText){
	let punctuation = '.?!,;:"()[]{}--./&*#$%@+-=<>|~^\\' + "'";
	for(let character of punctuation){
		sentenceText = sentenceText.split(character).join('');
	}
	
	sentenceText = sentenceText.trim();
	sentenceText = fullReplace(sentenceText, '  ', ' ');
	
	let words = sentenceText.split(' ');
	words = words.filter(function(word) {
		return word.length > 0;
	});
	
	return words;
}

// utility function to count the syllables of a word
function getSyllableCount(word){
	word = word.toLowerCase();
	
	if (word.endsWith('e')) {
		word = word.slice(0, -1);
	}
	word = word.split('e').join('a')
		.split('i').join('a')
		.split('o').join('a')
		.split('u').join('a')
		.split('y').join('a')
		.split('e').join('a');
		
	word = fullReplace(word, 'aa', 'a');
	
	let syllableCount = word.split('a').length - 1;
	if(syllableCount < 1){
		syllableCount = 1;
	}
	
	return syllableCount;
}

// utility function to iteratively replace a string until no more occurrences are found
function fullReplace(string, oldSubstring, newSubstring){
	let newString = string;
	while(newString.includes(oldSubstring)){
		newString = newString.split(oldSubstring).join(newSubstring);
	}
	return newString;
}

// anonymous main function
(function(){
	// restrict script to mainspace, userspace, wikipedia, help, and draftspace
	const namespaceNumber = mw.config.get('wgNamespaceNumber');
	const allowedNamespaces = [0, 2, 4, 12, 118];
	if (allowedNamespaces.indexOf(namespaceNumber) != -1) {
		// add a link to the toolbox
		$.when(mw.loader.using('mediawiki.util'), $.ready).then(function (){
			var portletlink = mw.util.addPortletLink('p-tb', '#', 'Readability');
			
			// run the main function when the link is clicked
			portletlink.onclick = function(e) {
				e.preventDefault();
				divideParagraphsIntoSentences();
				rateSentences();
				createOverview();
			};
		});
	}
})();