//<pre>
// Analyze the article's structure
// with kind respects to Dr. pda, whose excellent prosesizebytes.js script was the inspiration
//
// To use this script, add "importScript('User:Proteins/articlestructure.js');" to your monobook.js subpage
// under your user page, as you can see at User:Proteins/monobook.js
function articleStructure() {
var alert_string = "";
var diagnostic_string = "";
var read_entire_article = true;
var show_lead_diagnostics = true;
var show_section_diagnostics = false;
var display_individual_words = false;
var using_Internet_Explorer = false;
var spaced_text = "";
var untagged_text = "";
var stripped_text = "";
var unescaped_text = "";
var anchors;
var temp_anchor;
var section_name = "";
var temp_anchor_name = "";
var num_anchors = 0;
var anchor_index = 0;
var anchor_level = 0;
var prev_anchor_level = 0;
var num_H2_anchors = 0;
var H2_anchor_index = 0;
var cutoff_anchor_index = 0;
var cutoff_H2_anchor_index = 0;
var cutoff_child_node_index = 0;
var last_P_child_node_index = 0;
var cutoff_element_node_index = 0;
var num_sections = 0;
var section_index = 0;
var element_node;
var num_element_nodes = 0;
var element_node_index = 0;
var temp_node_name = "";
var parent_node;
var grandparent_node;
var greatgrandparent_node;
var sibling_node;
var next_sibling_node;
var child_node;
var child_nodes;
var prev_child_node;
var num_child_nodes = 0;
var child_node_index = 0;
var child_node_name = "";
var num_prose_counted_nodes = 0;
var grandchild_node;
var grandchild_nodes;
var num_grandchild_nodes = 0;
var grandchild_node_index = 0;
var path_names;
var file_name = "";
var num_characters = 0;
var del_num_characters = 0;
var temp_num_characters = 0;
var temp_word = "";
var num_words = 0;
var word_count = 0;
var word_index = 0;
var nonempty_word_index = 0;
var tentative_num_words = 0;
var num_spaces = 0;
var paragraph_count = 0;
var list_item_count = 0;
var prose_size_bytes = 0;
var total_word_count = 0;
var total_paragraph_count = 0;
var total_list_item_count = 0;
var total_prose_size_bytes = 0;
var section_word_count = new Array();
var section_paragraph_count = new Array();
var section_list_item_count = new Array();
var section_prose_size_bytes = new Array();
var word_count_string = "";
var paragraph_count_string = "";
var list_item_count_string = "";
var prose_size_bytes_string = "";
var temp_paragraph;
var text_paragraphs;
var num_paragraphs = 0;
var paragraph_index = 0;
var temp_list_item;
var text_list_items;
var num_list_items = 0;
var list_item_index = 0;
var temp_image;
var num_pixels = 0;
var image_index = 0;
var image_counter = 0;
var num_raw_images = 0;
var num_nonicon_images = 0;
var num_anchors = 0;
var num_raw_links = 0;
var num_raw_tables = 0;
var num_raw_references = 0;
// check for Internet Explorer browser
using_Internet_Explorer = false;
if (navigator.userAgent.indexOf("MSIE") > -1) {
using_Internet_Explorer = true;
// alert_string = "This script works correctly in every browser — except Internet Explorer. Please be patient!"
// window.alert(alert_string);
}
// Find the cutoff H2 anchor index, where we stop counting things
alert_string = "";
num_H2_anchors = 0;
section_name = "lead section";
prev_anchor_level = 1; //begin at the H1 heading
read_entire_article = true;
anchors = document.anchors;
num_anchors = anchors.length;
for (anchor_index=1; anchor_index<num_anchors; anchor_index++) {
temp_anchor = anchors[anchor_index];
parent_node = temp_anchor.parentNode;
if (!parent_node) { continue; }
sibling_node = parent_node.nextSibling;
if (!sibling_node) { continue; }
// Check headings for jumps upwards in heading level
anchor_level = 0;
if (sibling_node.nodeName == "H1") {
alert_string += " WARNING: Illegal H1 heading in this section\n";
} else if (sibling_node.nodeName == "H2") {
anchor_level = 2;
} else if (sibling_node.nodeName == "H3") {
anchor_level = 3;
} else if (sibling_node.nodeName == "H4") {
anchor_level = 4;
} else if (sibling_node.nodeName == "H5") {
anchor_level = 5;
} else {
next_sibling_node = sibling_node.nextSibling;
if (!next_sibling_node) { continue; }
// Check headings for jumps upwards in heading level
if (next_sibling_node.nodeName == "H1") {
alert_string += " WARNING: Illegal H1 heading in this section\n";
} else if (next_sibling_node.nodeName == "H2") {
anchor_level = 2;
} else if (next_sibling_node.nodeName == "H3") {
anchor_level = 3;
} else if (next_sibling_node.nodeName == "H4") {
anchor_level = 4;
} else if (next_sibling_node.nodeName == "H5") {
anchor_level = 5;
}
} // closes assignment of the anchor level, if any
if (((anchor_level - prev_anchor_level) > 1) && (prev_anchor_level != 0)) {
if (num_H2_anchors == 0) {
alert_string += " WARNING: H" + prev_anchor_level + " to H" + anchor_level + " jump in the lead\n";
} else {
alert_string += " WARNING: H" + prev_anchor_level + " to H" + anchor_level + " jump in \"" + section_name.replace(/(_+)/ig, " ") + "\"\n";
}
}
if (anchor_level > 0) { prev_anchor_level = anchor_level; }
//Check major section headings for closing sections
if (anchor_level == 2) {
num_H2_anchors++;
section_name = temp_anchor.name;
temp_anchor_name = temp_anchor.name;
alert_string += "Section " + num_H2_anchors + " : " + section_name.replace(/(_+)/ig, " ") + "\n";
// alert_string += "Section " + num_H2_anchors + " : " + section_name.replace(/(_+)/ig, " ") + " " + temp_anchor.parentNode.nodeName + " " + sibling_node.nodeName + "\n";
temp_anchor_name = temp_anchor_name.replace(/:$/ig,""); // eliminate colons at end
temp_anchor_name = temp_anchor_name.replace(/s$/ig,""); // eliminate plurals at end
temp_anchor_name = temp_anchor_name.replace(/See_also/ig,"");
temp_anchor_name = temp_anchor_name.replace(/Related_topic/ig,"");
temp_anchor_name = temp_anchor_name.replace(/Related_article/ig,"");
temp_anchor_name = temp_anchor_name.replace(/Further_reading/ig,"");
temp_anchor_name = temp_anchor_name.replace(/External_link/ig,"");
temp_anchor_name = temp_anchor_name.replace(/Footnote/ig,"");
temp_anchor_name = temp_anchor_name.replace(/Note/ig,"");
temp_anchor_name = temp_anchor_name.replace(/Reference/ig,"");
temp_anchor_name = temp_anchor_name.replace(/Citation/ig,"");
temp_anchor_name = temp_anchor_name.replace(/Source/ig,"");
temp_anchor_name = temp_anchor_name.replace(/Link/ig,"");
temp_anchor_name = temp_anchor_name.replace(/s([_\s]+)and([_\s]+)/ig,"");
temp_anchor_name = temp_anchor_name.replace(/([_\s]+)and([_\s]+)/ig,"");
temp_anchor_name = temp_anchor_name.replace(/([_\s]+)/ig,"");
if (temp_anchor_name == "") { break; }
// diagnostic_string = "Section " + num_H2_anchors + " : " + temp_anchor_name + " L: " + temp_anchor_name.length;
// window.alert(diagnostic_string);
} // closes check for H2 anchor
} // closes loop over the anchors
cutoff_anchor_index = anchor_index;
cutoff_H2_anchor_index = num_H2_anchors;
if (cutoff_anchor_index < num_anchors) {
read_entire_article = false;
alert_string += "\nProse counting will stop before the \"" + temp_anchor.name.replace(/(_+)/ig, " ") + "\" section.\n";
} else {
read_entire_article = true;
alert_string += "\nProse counting will cover the entire article.\n";
}
window.alert(alert_string);
// Count child and element nodes
alert_string = "";
num_element_nodes = 0;
child_nodes = document.getElementById("bodyContent").childNodes;
num_child_nodes = child_nodes.length;
// if (num_child_nodes > 40) { num_child_nodes = 40;} // truncate loop for testing
for (child_node_index=0; child_node_index < num_child_nodes; child_node_index++) {
child_node = child_nodes[child_node_index];
if (child_node.nodeType != 1) {
// alert_string += "Child node " + child_node_index + " : " + child_node.nodeName + "\n";
continue;
} // examine only Element nodes
num_element_nodes++;
// alert_string += "Element node " + num_element_nodes + " : " + child_node.nodeName + "\n";
} // closes loop counting the element nodes
// window.alert(alert_string);
// Determine the corresponding childNode index cutoff
alert_string = "";
if (read_entire_article == true) {
cutoff_child_node_index = num_child_nodes;
cutoff_element_node_index = num_element_nodes;
} else {
H2_anchor_index = 0;
element_node_index = 0;
last_P_child_node_index = -1;
last_P_element_node_index = -1;
for (child_node_index=0; child_node_index < num_child_nodes; child_node_index++) {
child_node = child_nodes[child_node_index];
if (child_node.nodeType != 1) { continue; } // examine only Element nodes
element_node_index++;
if (child_node.nodeName == "P") {
last_P_child_node_index = child_node_index;
last_P_element_node_index = num_element_nodes;
} else if (child_node.nodeName == "H2") {
H2_anchor_index++;
if (H2_anchor_index == cutoff_H2_anchor_index) {
cutoff_child_node_index = last_P_child_node_index;
cutoff_element_node_index = last_P_element_node_index;
break;
}
}
// alert_string += "Section " + H2_anchor_index + ", Element node " + num_element_nodes + " : " + child_node.nodeName + " " + child_node.childNodes.length + "\n";
// if (num_element_nodes > 45) { break; } // for debugging
} // closes loop over the childNodes of the Document
if (last_P_child_node_index < 0) { // if no cutoff was discovered; should never happen
cutoff_child_node_index = num_child_nodes;
cutoff_element_node_index = num_element_nodes;
}
} // closes check whether to read entire article
alert_string = "\nThe child_node_index and element_node_index cutoffs are " + cutoff_child_node_index + " and " + cutoff_element_node_index + ", respectively.\n";
// window.alert(alert_string);
// Count the words, paragraphs and prose size bytes by section
word_count = 0;
paragraph_count = 0;
list_item_count = 0;
prose_size_bytes = 0;
num_prose_counted_nodes = 0;
H2_anchor_index = 0;
for (child_node_index=0; child_node_index < cutoff_child_node_index; child_node_index++) {
child_node = child_nodes[child_node_index];
if (child_node.nodeType != 1) { continue; } // examine only Element nodes
element_node_index++;
if (child_node.nodeName == "H2") {
section_word_count.push(word_count);
section_paragraph_count.push(paragraph_count);
section_list_item_count.push(list_item_count);
section_prose_size_bytes.push(prose_size_bytes);
H2_anchor_index++;
word_count = 0;
paragraph_count = 0;
list_item_count = 0;
prose_size_bytes = 0;
}
// if the child node meets the criteria, add to the prose size, word and paragraph counts
if ((child_node.nodeName == "P") || (child_node.nodeName == "PRE")) {
untagged_text = child_node.innerHTML;
untagged_text = untagged_text.replace(/<sup>/ig,""); // keep simple superscript text
untagged_text = untagged_text.replace(/(<sup([^>]+)>)(.*?<\/sup>)/ig,""); // remove superscript text
untagged_text = untagged_text.replace(/(<([^>]+)>)/ig,""); // remove remaining tags
untagged_text = untagged_text.replace(/>/ig, ">"); // convert > to a single character >
untagged_text = untagged_text.replace(/</ig, "<"); // convert < to a single character <
untagged_text = untagged_text.replace(/&/ig, "&"); // convert & to a single character &
untagged_text = untagged_text.replace(/—/ig, ", "); // replace em-dashes with comma+space
spaced_text = untagged_text.replace(/ /ig, " "); // convert non-breaking spaces to spaces
spaced_text = spaced_text.replace(/ /ig, " "); // convert non-breaking spaces to spaces
spaced_text = spaced_text.replace(/\s+/ig, " "); // convert all whitespace to a single space
// spaced_text = filterStringForProseSizeCounting(untagged_text);
words = spaced_text.split(' ');
tentative_num_words = words.length;
if (tentative_num_words > 0) { // verify that the paragraph contributes text
num_words = 0;
num_characters = 0;
for (word_index=0; word_index<tentative_num_words; word_index++) {
temp_word = words[word_index];
del_num_characters = temp_word.length;
if (del_num_characters > 0) {
num_words++;
num_characters += del_num_characters;
}
}
if (num_words > 0) {
paragraph_count++;
num_prose_counted_nodes++;
word_count += num_words;
prose_size_bytes += num_characters;
num_spaces = num_words - 1;
prose_size_bytes += num_spaces; // add spaces to character count
child_node.style.cssText = "background-color:yellow";
// Code for testing output
if ((!show_section_diagnostics) && ((!show_lead_diagnostics) || (H2_anchor_index != 0))) {
continue;
}
diagnostic_string = "";
nonempty_word_index = 0;
temp_num_characters = 0;
for (word_index=0; word_index<tentative_num_words; word_index++) {
if ((word_index%45 == 1) && (word_index>45) && (display_individual_words)) {
window.alert(diagnostic_string);
diagnostic_string = "Continued from previous screen:\n\n";
}
temp_word = words[word_index];
del_num_characters = temp_word.length;
if (del_num_characters > 0) {
nonempty_word_index++;
temp_num_characters += del_num_characters;
diagnostic_string += "Section " + H2_anchor_index + ", Paragraph " + paragraph_count + ", Word " + nonempty_word_index + " : " + temp_word + " " + del_num_characters + " " + temp_num_characters + "\n";
}
}
temp_num_characters += num_spaces;
diagnostic_string += "Added " + num_spaces + " spaces to the byte count.\n\n";
if (display_individual_words) {
diagnostic_string += "\nEND of paragraph " + paragraph_count + " of Section " + H2_anchor_index + ": character count = " + temp_num_characters + " total= " + prose_size_bytes + "\n";
window.alert(diagnostic_string);
}
} // closes check for non-empty paragraph
} // tentative check for words
} else if ((child_node.nodeName == "UL") || (child_node.nodeName == "OL")) { // unordered and ordered lists
grandchild_nodes = child_node.childNodes; // not all LI elements because of possible nesting
num_grandchild_nodes = grandchild_nodes.length;
for (grandchild_node_index=0; grandchild_node_index<num_grandchild_nodes; grandchild_node_index++) {
grandchild_node = grandchild_nodes[grandchild_node_index];
if (grandchild_node.nodeName == "LI") {
untagged_text = grandchild_node.innerHTML;
untagged_text = untagged_text.replace(/<sup>/ig,""); // keep simple superscript text
untagged_text = untagged_text.replace(/(<sup([^>]+)>)(.*?<\/sup>)/ig,""); // remove superscript text
untagged_text = untagged_text.replace(/(<([^>]+)>)/ig,""); // remove remaining tags
untagged_text = untagged_text.replace(/>/ig, ">"); // convert > to a single character >
untagged_text = untagged_text.replace(/</ig, "<"); // convert < to a single character <
untagged_text = untagged_text.replace(/&/ig, "&"); // convert & to a single character &
untagged_text = untagged_text.replace(/—/ig, ", "); // replace em-dashes with comma+space
spaced_text = untagged_text.replace(/ /ig, " "); // convert non-breaking spaces to spaces
spaced_text = spaced_text.replace(/ /ig, " "); // convert non-breaking spaces to spaces
spaced_text = spaced_text.replace(/\s+/ig, " "); // convert all whitespace to a single space
words = spaced_text.split(' ');
tentative_num_words = words.length;
if (tentative_num_words > 0) { // verify that the list item contributes text
num_words = 0;
num_characters = 0;
for (word_index=0; word_index<tentative_num_words; word_index++) {
temp_word = words[word_index];
del_num_characters = temp_word.length;
if (del_num_characters > 0) {
num_words++;
num_characters += del_num_characters;
}
}
if (num_words > 0) {
list_item_count++;
num_prose_counted_nodes++;
word_count += num_words;
prose_size_bytes += num_characters;
num_spaces = num_words - 1;
prose_size_bytes += num_spaces; // add spaces to character count
child_node.style.cssText = "background-color:yellow";
// Code for testing output
if ((!show_section_diagnostics) && ((!show_lead_diagnostics) || (H2_anchor_index != 0))) {
continue;
}
diagnostic_string = "";
nonempty_word_index = 0;
temp_num_characters = 0;
for (word_index=0; word_index<tentative_num_words; word_index++) {
if ((word_index%45 == 1) && (word_index>45) && (display_individual_words)) {
window.alert(diagnostic_string);
diagnostic_string = "Continued from previous screen:\n\n";
}
temp_word = words[word_index];
del_num_characters = temp_word.length;
if (del_num_characters > 0) {
nonempty_word_index++;
temp_num_characters += del_num_characters;
diagnostic_string += "Section " + H2_anchor_index + ", Paragraph " + paragraph_count + ", List item " + list_item_count + ", Word " + nonempty_word_index + " : " + temp_word + " " + del_num_characters + "\n";
}
}
temp_num_characters += num_spaces;
diagnostic_string += "Added " + num_spaces + " spaces to the byte count.\n\n";
if (display_individual_words) {
diagnostic_string += "\nEND of list item " + list_item_count + " of Section " + H2_anchor_index + ": character count = " + temp_num_characters + " total= " + prose_size_bytes + "\n";
window.alert(diagnostic_string);
}
} // closes check for non-empty list item
} // tentative check for words
} // closes check for a list item (LI) node
} // closes loop over grandchild nodes of an ordered (OL) or unordered (UL) list
} else if (child_node.nodeName == "DL") { // discursive lists
grandchild_nodes = child_node.childNodes;
num_grandchild_nodes = grandchild_nodes.length;
for (grandchild_node_index=0; grandchild_node_index<num_grandchild_nodes; grandchild_node_index++) {
grandchild_node = grandchild_nodes[grandchild_node_index];
if ((grandchild_node.nodeName == "DT") || (grandchild_node.nodeName == "DD")) {
// Exceptions that shouldn't be counted
if (grandchild_node.childNodes.length > 0) {
temp_node_name = grandchild_node.childNodes[0].nodeName;
if ((temp_node_name == "DIV") || (temp_node_name == "SPAN")) { continue; }
}
if (grandchild_node.childNodes.length > 1) {
temp_node_name = grandchild_node.childNodes[1].nodeName;
if ((temp_node_name == "DIV") || (temp_node_name == "SPAN")) { continue; }
}
untagged_text = grandchild_node.innerHTML;
untagged_text = untagged_text.replace(/<sup>/ig,""); // keep simple superscript text
untagged_text = untagged_text.replace(/(<sup([^>]+)>)(.*?<\/sup>)/ig,""); // remove superscript text
untagged_text = untagged_text.replace(/(<([^>]+)>)/ig,""); // remove remaining tags
untagged_text = untagged_text.replace(/>/ig, ">"); // convert > to a single character >
untagged_text = untagged_text.replace(/</ig, "<"); // convert < to a single character <
untagged_text = untagged_text.replace(/&/ig, "&"); // convert & to a single character &
untagged_text = untagged_text.replace(/—/ig, ", "); // replace em-dashes with comma+space
spaced_text = untagged_text.replace(/ /ig, " "); // convert non-breaking spaces to spaces
spaced_text = spaced_text.replace(/ /ig, " "); // convert non-breaking spaces to spaces
spaced_text = spaced_text.replace(/\s+/ig, " "); // convert all whitespace to a single space
words = spaced_text.split(' ');
tentative_num_words = words.length;
if (tentative_num_words > 0) { // verify that the list item contributes text
num_words = 0;
num_characters = 0;
for (word_index=0; word_index<tentative_num_words; word_index++) {
temp_word = words[word_index];
del_num_characters = temp_word.length;
if (del_num_characters > 0) {
num_words++;
num_characters += del_num_characters;
}
}
if (num_words > 0) {
list_item_count++;
num_prose_counted_nodes++;
word_count += num_words;
prose_size_bytes += num_characters;
num_spaces = num_words - 1;
prose_size_bytes += num_spaces; // add spaces to character count
child_node.style.cssText = "background-color:yellow";
// Code for testing output
if ((!show_section_diagnostics) && ((!show_lead_diagnostics) || (H2_anchor_index != 0))) {
continue;
}
diagnostic_string = "";
nonempty_word_index = 0;
temp_num_characters = 0;
for (word_index=0; word_index<tentative_num_words; word_index++) {
if ((word_index%45 == 1) && (word_index>45) && (display_individual_words)) {
window.alert(diagnostic_string);
diagnostic_string = "Continued from previous screen:\n\n";
}
temp_word = words[word_index];
del_num_characters = temp_word.length;
if (del_num_characters > 0) {
nonempty_word_index++;
temp_num_characters += del_num_characters;
diagnostic_string += "Section " + H2_anchor_index + ", Paragraph " + paragraph_count + ", List item " + list_item_count + ", Word " + nonempty_word_index + " : " + temp_word + " " + del_num_characters + "\n"; }
}
temp_num_characters += num_spaces;
diagnostic_string += "Added " + num_spaces + " spaces to the byte count.\n\n";
if (display_individual_words) {
diagnostic_string += "\nEND of discursive list item " + list_item_count + " of Section " + H2_anchor_index + ": character count = " + temp_num_characters + " total= " + prose_size_bytes + "\n";
window.alert(diagnostic_string);
}
} // closes check for non-empty list item
} // tentative check for words
} // closes check for a discursive list item (DT or DD) node
} // closes loop over grandchild nodes of a discursive list DL
} else if (child_node.nodeName == "BLOCKQUOTE") {
grandchild_nodes = child_node.getElementsByTagName("P");
num_grandchild_nodes = grandchild_nodes.length;
for (grandchild_node_index=0; grandchild_node_index<num_grandchild_nodes; grandchild_node_index++) {
grandchild_node = grandchild_nodes[grandchild_node_index];
if (grandchild_node.nodeName == "P") {
untagged_text = grandchild_node.innerHTML;
untagged_text = untagged_text.replace(/<sup>/ig,""); // keep simple superscript text
untagged_text = untagged_text.replace(/(<sup([^>]+)>)(.*?<\/sup>)/ig,""); // remove superscript text
untagged_text = untagged_text.replace(/(<([^>]+)>)/ig,""); // remove remaining tags
untagged_text = untagged_text.replace(/>/ig, ">"); // convert > to a single character >
untagged_text = untagged_text.replace(/</ig, "<"); // convert < to a single character <
untagged_text = untagged_text.replace(/&/ig, "&"); // convert & to a single character &
untagged_text = untagged_text.replace(/—/ig, ", "); // replace em-dashes with comma+space
spaced_text = untagged_text.replace(/ /ig, " "); // convert non-breaking spaces to spaces
spaced_text = spaced_text.replace(/ /ig, " "); // convert non-breaking spaces to spaces
spaced_text = spaced_text.replace(/\s+/ig, " "); // convert all whitespace to a single space
words = spaced_text.split(' ');
tentative_num_words = words.length;
if (tentative_num_words > 0) { // verify that the list item contributes text
num_words = 0;
num_characters = 0;
for (word_index=0; word_index<tentative_num_words; word_index++) {
temp_word = words[word_index];
del_num_characters = temp_word.length;
if (del_num_characters > 0) {
num_words++;
num_characters += del_num_characters;
}
}
if (num_words > 0) {
// don't count blockquotes, for now
num_prose_counted_nodes++;
word_count += num_words;
prose_size_bytes += num_characters;
num_spaces = num_words - 1;
prose_size_bytes += num_spaces; // add spaces to character count
child_node.style.cssText = "background-color:yellow";
// Code for testing output
if ((!show_section_diagnostics) && ((!show_lead_diagnostics) || (H2_anchor_index != 0))) {
continue;
}
diagnostic_string = "";
nonempty_word_index = 0;
temp_num_characters = 0;
for (word_index=0; word_index<tentative_num_words; word_index++) {
if ((word_index%45 == 1) && (word_index>45) && (display_individual_words)) {
window.alert(diagnostic_string);
diagnostic_string = "Continued from previous screen:\n\n";
}
temp_word = words[word_index];
del_num_characters = temp_word.length;
if (del_num_characters > 0) {
nonempty_word_index++;
temp_num_characters += del_num_characters;
diagnostic_string += "Section " + H2_anchor_index + ", Paragraph " + paragraph_count + ", Word " + nonempty_word_index + " : " + temp_word + " " + del_num_characters + "\n";
}
}
temp_num_characters += num_spaces;
diagnostic_string += "Added " + num_spaces + " spaces to the byte count.\n\n";
if (display_individual_words) {
diagnostic_string += "\nEND of BLOCKQUOTE in Section " + H2_anchor_index + ": character count = " + temp_num_characters + " total= " + prose_size_bytes + "\n";
window.alert(diagnostic_string);
}
} // closes check for non-empty list item
} // tentative check for words
} // closes check for a paragraph (P) node in a BLOCKQUOTE
} // closes loop over grandchild nodes in a BLOCKQUOTE
} else if (child_node.nodeName == "TABLE") {
if (child_node.className != "cquote") { continue; } // count only tables that are cquotes
grandchild_nodes = child_node.getElementsByTagName("TD");
num_grandchild_nodes = grandchild_nodes.length;
for (grandchild_node_index=0; grandchild_node_index<num_grandchild_nodes; grandchild_node_index++) {
grandchild_node = grandchild_nodes[grandchild_node_index];
if (grandchild_node.nodeName == "TD") {
untagged_text = grandchild_node.innerHTML;
untagged_text = untagged_text.replace(/<sup>/ig,""); // keep simple superscript text
untagged_text = untagged_text.replace(/(<sup([^>]+)>)(.*?<\/sup>)/ig,""); // remove superscript text
untagged_text = untagged_text.replace(/(<([^>]+)>)/ig,""); // remove remaining tags
untagged_text = untagged_text.replace(/>/ig, ">"); // convert > to a single character >
untagged_text = untagged_text.replace(/</ig, "<"); // convert < to a single character <
untagged_text = untagged_text.replace(/&/ig, "&"); // convert & to a single character &
untagged_text = untagged_text.replace(/—/ig, ", "); // replace em-dashes with comma+space
spaced_text = untagged_text.replace(/ /ig, " "); // convert non-breaking spaces to spaces
spaced_text = spaced_text.replace(/ /ig, " "); // convert non-breaking spaces to spaces
spaced_text = spaced_text.replace(/\s+/ig, " "); // convert all whitespace to a single space
words = spaced_text.split(' ');
tentative_num_words = words.length;
if (tentative_num_words > 0) { // verify that the list item contributes text
num_words = 0;
num_characters = 0;
for (word_index=0; word_index<tentative_num_words; word_index++) {
temp_word = words[word_index];
del_num_characters = temp_word.length;
if (del_num_characters > 0) {
num_words++;
num_characters += del_num_characters;
}
}
if (num_words > 0) {
// don't count cquotes, for now
num_prose_counted_nodes++;
word_count += num_words;
prose_size_bytes += num_characters;
num_spaces = num_words - 1;
prose_size_bytes += num_spaces; // add spaces to character count
child_node.style.cssText = "background-color:yellow";
// Code for testing output
if ((!show_section_diagnostics) && ((!show_lead_diagnostics) || (H2_anchor_index != 0))) {
continue;
}
diagnostic_string = "";
nonempty_word_index = 0;
temp_num_characters = 0;
for (word_index=0; word_index<tentative_num_words; word_index++) {
if ((word_index%45 == 1) && (word_index>45) && (display_individual_words)) {
window.alert(diagnostic_string);
diagnostic_string = "Continued from previous screen:\n\n";
}
temp_word = words[word_index];
del_num_characters = temp_word.length;
if (del_num_characters > 0) {
nonempty_word_index++;
temp_num_characters += del_num_characters;
diagnostic_string += "Section " + H2_anchor_index + ", Paragraph " + paragraph_count + ", Word " + nonempty_word_index + " : " + temp_word + " " + del_num_characters + "\n";
}
}
temp_num_characters += num_spaces;
diagnostic_string += "Added " + num_spaces + " spaces to the byte count.\n\n";
if (display_individual_words) {
diagnostic_string += "\nEND of CQUOTE paragraph in Section " + H2_anchor_index + ": character count = " + temp_num_characters + " total= " + prose_size_bytes + "\n";
window.alert(diagnostic_string);
}
} // closes check for non-empty list item
} // tentative check for words
} // closes check for a paragraph (P) node in a CQUOTE
} // closes loop over grandchild nodes in a CQUOTE
} else if (child_node.nodeName == "DIV") { // Poems
if (child_node.className != "poem") { continue; } // allow only poem DIV's
grandchild_nodes = child_node.getElementsByTagName("P");
num_grandchild_nodes = grandchild_nodes.length;
for (grandchild_node_index=0; grandchild_node_index<num_grandchild_nodes; grandchild_node_index++) {
grandchild_node = grandchild_nodes[grandchild_node_index];
if (grandchild_node.nodeName == "P") {
untagged_text = grandchild_node.innerHTML;
untagged_text = untagged_text.replace(/<sup>/ig,""); // keep simple superscript text
untagged_text = untagged_text.replace(/(<sup([^>]+)>)(.*?<\/sup>)/ig,""); // remove superscript text
untagged_text = untagged_text.replace(/(<([^>]+)>)/ig,""); // remove remaining tags
untagged_text = untagged_text.replace(/>/ig, ">"); // convert > to a single character >
untagged_text = untagged_text.replace(/</ig, "<"); // convert < to a single character <
untagged_text = untagged_text.replace(/&/ig, "&"); // convert & to a single character &
untagged_text = untagged_text.replace(/—/ig, ", "); // replace em-dashes with comma+space
spaced_text = untagged_text.replace(/ /ig, " "); // convert non-breaking spaces to spaces
spaced_text = spaced_text.replace(/ /ig, " "); // convert non-breaking spaces to spaces
spaced_text = spaced_text.replace(/\s+/ig, " "); // convert all whitespace to a single space
words = spaced_text.split(' ');
tentative_num_words = words.length;
if (tentative_num_words > 0) { // verify that the list item contributes text
num_words = 0;
num_characters = 0;
for (word_index=0; word_index<tentative_num_words; word_index++) {
temp_word = words[word_index];
del_num_characters = temp_word.length;
if (del_num_characters > 0) {
num_words++;
num_characters += del_num_characters;
}
}
if (num_words > 0) {
// don't count blockquotes, for now
num_prose_counted_nodes++;
word_count += num_words;
prose_size_bytes += num_characters;
num_spaces = num_words - 1;
prose_size_bytes += num_spaces; // add spaces to character count
child_node.style.cssText = "background-color:yellow";
// Code for testing output
if ((!show_section_diagnostics) && ((!show_lead_diagnostics) || (H2_anchor_index != 0))) {
continue;
}
diagnostic_string = "";
nonempty_word_index = 0;
temp_num_characters = 0;
for (word_index=0; word_index<tentative_num_words; word_index++) {
if ((word_index%45 == 1) && (word_index>45) && (display_individual_words)) {
window.alert(diagnostic_string);
diagnostic_string = "Continued from previous screen:\n\n";
}
temp_word = words[word_index];
del_num_characters = temp_word.length;
if (del_num_characters > 0) {
nonempty_word_index++;
temp_num_characters += del_num_characters;
diagnostic_string += "Section " + H2_anchor_index + ", Paragraph " + paragraph_count + ", Word " + nonempty_word_index + " : " + temp_word + " " + del_num_characters + "\n";
}
}
temp_num_characters += num_spaces;
diagnostic_string += "Added " + num_spaces + " spaces to the byte count.\n\n";
if (display_individual_words) {
diagnostic_string += "\nEND of <poem> in Section " + H2_anchor_index + ": character count = " + temp_num_characters + " total= " + prose_size_bytes + "\n";
window.alert(diagnostic_string);
}
} // closes check for non-empty list item
} // tentative check for words
} // closes check for a paragraph (P) node in a poem
} // closes loop over grandchild nodes in a poem
} // closes check for appropriate elements
} // closes loop over the child nodes
section_word_count.push(word_count);
section_paragraph_count.push(paragraph_count);
section_list_item_count.push(list_item_count);
section_prose_size_bytes.push(prose_size_bytes);
// Output the various counts
word_count_string = " word";
paragraph_count_string = " paragraph";
list_item_count_string = " list item";
prose_size_bytes_string = " byte";
if (section_word_count[0] != 1) { word_count_string += "s";}
if (section_paragraph_count[0] != 1) { paragraph_count_string += "s";}
if (section_list_item_count[0] != 1) { list_item_count_string += "s";}
if (section_prose_size_bytes[0] != 1) { prose_size_bytes_string += "s";}
alert_string = "Lead section: " + section_paragraph_count[0] + paragraph_count_string + ", " + section_list_item_count[0] + list_item_count_string + ", " + section_word_count[0] + word_count_string + ", " + section_prose_size_bytes[0] + prose_size_bytes_string + "\n\n";
total_word_count = section_word_count[0];
total_paragraph_count = section_paragraph_count[0];
total_list_item_count = section_list_item_count[0];
total_prose_size_bytes = section_prose_size_bytes[0];
num_sections = section_word_count.length;
for (section_index=1; section_index<num_sections; section_index++) {
total_word_count += section_word_count[section_index];
total_paragraph_count += section_paragraph_count[section_index];
total_list_item_count += section_list_item_count[section_index];
total_prose_size_bytes += section_prose_size_bytes[section_index];
word_count_string = " word";
paragraph_count_string = " paragraph";
list_item_count_string = " list item";
prose_size_bytes_string = " byte";
if (section_word_count[section_index] != 1) { word_count_string += "s";}
if (section_paragraph_count[section_index] != 1) { paragraph_count_string += "s";}
if (section_list_item_count[section_index] != 1) { list_item_count_string += "s";}
if (section_prose_size_bytes[section_index] != 1) { prose_size_bytes_string += "s";}
alert_string += "Section " + section_index + " : " + section_paragraph_count[section_index] + paragraph_count_string + ", " + section_list_item_count[section_index] + list_item_count_string + ", " + section_word_count[section_index] + word_count_string + ", " + section_prose_size_bytes[section_index] + prose_size_bytes_string + "\n";
}
if (num_sections>1) {alert_string += "\n";} // Make space for the totals
word_count_string = " word";
paragraph_count_string = " paragraph";
list_item_count_string = " list item";
prose_size_bytes_string = " byte";
if (total_word_count != 1) { word_count_string += "s";}
if (total_paragraph_count != 1) { paragraph_count_string += "s";}
if (total_list_item_count != 1) { list_item_count_string += "s";}
if (total_prose_size_bytes != 1) { prose_size_bytes_string += "s";}
alert_string += "Totals: " + total_paragraph_count + paragraph_count_string + ", " + total_list_item_count + list_item_count_string + ", " + total_word_count + word_count_string + ", " + total_prose_size_bytes + prose_size_bytes_string + "\n";
window.alert(alert_string);
// Count the article images
num_nonicon_images = 0;
num_raw_images = document.images.length;
alert_string = "This document has " + num_raw_images + " images.\n";
for (image_index=0; image_index<num_raw_images; image_index++) {
temp_image = document.images[image_index];
parent_node = temp_image.parentNode;
grandparent_node = parent_node.parentNode;
greatgrandparent_node = grandparent_node.parentNode;
num_pixels = temp_image.width * temp_image.height;
if (temp_image.src.match(/Replace_this_image_male\.svg/)) { continue; }
if (temp_image.src.match(/Replace_this_image_female\.svg/)) { continue; }
if (num_pixels > 5000) { num_nonicon_images++; }
}
if (num_nonicon_images == 1) {
alert_string = "This document has 1 image with more than 5000 pixels.\n\n";
} else {
alert_string = "This document has " + num_nonicon_images + " images with more than 5000 pixels.\n\n";
}
image_counter = 0;
for (image_index=0; image_index<num_raw_images; image_index++) {
temp_image = document.images[image_index];
parent_node = temp_image.parentNode;
grandparent_node = parent_node.parentNode;
greatgrandparent_node = grandparent_node.parentNode;
num_pixels = temp_image.width * temp_image.height;
if (temp_image.src.match(/Replace_this_image_male\.svg/)) { continue; }
if (temp_image.src.match(/Replace_this_image_female\.svg/)) { continue; }
if (num_pixels < 5001) { continue; }
image_counter++;
alert_string += image_counter + " " + temp_image.width + "x" + temp_image.height + " " + num_pixels + " ";
path_names = temp_image.src.split("/");
file_name = path_names.pop();
file_name = file_name.replace(/^(\d+)px-/, "");
alert_string += file_name + "\n";
}
window.alert(alert_string);
return;
// Count the article tables and check for infoboxes and navigation templates
num_raw_tables = document.getElementsByTagName("table").length;
// Check for className = "infobox vcard" or "navbox-group"
alert_string = "This document has " + num_raw_tables + " tables.\n";
window.alert(alert_string);
// Count the article references
num_raw_references = document.getElementsByTagName("li").length;
// Count the article interwikis
num_raw_interwikis = document.getElementsByTagName("li").length;
// Count the article categories
num_raw_categories = document.getElementsByTagName("table").length;
// Count the article anchors; for each anchor...
alert_string = "This document has " + document.anchors.length + " anchors:\n";
for (anchor_index=0; anchor_index<document.anchors.length; anchor_index++) {
temp_anchor = document.anchors[anchor_index];
alert_string += "Name " + anchor_index + ": " + temp_anchor.name + "\n";
}
window.alert(alert_string);
} // closes function articleStructure()
addOnloadHook(function () {
mw.util.addPortletLink('p-cactions', 'javascript:articleStructure()', 'structure', 'ca-structure', 'Structure of the article', 'g', '');
});
//</pre>