User:Pathoschild/standardise.js

From Wikisource
Jump to navigation Jump to search
Note: After saving, changes may not occur immediately. Click here to learn how to bypass your browser's cache.
  • Firefox / Safari: Hold Shift while clicking Reload, or press either Ctrl-F5 or Ctrl-R (Cmd-R on a Mac)
  • Google Chrome: Press Ctrl-Shift-R (Cmd-Shift-R on a Mac)
  • Internet Explorer: Hold Ctrl while clicking Refresh, or press Ctrl-F5
  • Opera: Clear the cache in Tools → Preferences

For details and instructions about other browsers, see Wikipedia:Bypass your cache.

/* global $, mw, pathoschild */
// <syntaxhighlight lang="javascript">

/*************
*** Wikisource standardization extension (0.6)
*** for [[m:TemplateScript]]
*** by [[m:user:Pathoschild]]
*************/

/**
 * TemplateScript adds configurable templates and scripts to the sidebar, and adds an example regex editor.
 * @see https://meta.wikimedia.org/wiki/TemplateScript
 * @update-token [[File:pathoschild/templatescript.js]]
 */
mw.loader.load('//tools-static.wmflabs.org/meta/scripts/pathoschild.templatescript.js');

function standardize(editor) {
	editor = editor || pathoschild.TemplateScript.Context; // backwards compatibility for regex menu framework
	
	// hide exceptions
	editor.escape(/<(nowiki|poem|pre|source)[^>]*>[\s\S]*?<\/\1>/ig);

	// main namespace
	if(mw.config.get('wgNamespaceNumber') === 0) {
		/*******************
		*** header normalization
		*******************/
		/* prepare template for parsing if present */
		if(editor.get().match(/{{\s*(?:msg:|template:)?\s*header/i)) {
			editor
				/* header syntax */
				.replace(/{{\s*(?:msg:|template:)\s*header/i,'{{header')
	
				/* fix delimiters */
				// header parameters
				.replace(/({{header2?[^\n]*)[\n\s]*\|[\s\n]*(previous|next|title|section|author|notes)\s*=\s*/ig,'$1~$2=',6)
	
				// nested templates (up to 5)
				.replace(/({{header[\s\S]*?{{[^\|}]*)\|/ig,'$1%%pipe%%',5)
				.replace(/({{header[\s\S]*?){{([^}]+)}}/ig,'$1%%leftcurlies%%$2%%rightcurlies%%',5)
	
				/* cleanup header */
				// deprecated arrows and brackets
				.replace(/((?:previous|next)[^~]*?)(?:&larr|&rarr;|←|→)/ig,'$1')
				.replace(/(section\s*=\s*)\(([^~}]*)\)/,'$1$2')
	
				// trailing whitespace
				.replace(/{{(header[^}]+)\s*}}/i,'{{$1}}')
	
				// non-semantic line-breaks
				.replace(/({{header[^}]+(?:title|section|author|section)=[^~]*)<[^>]*br[^>]*>/ig,'$1',8);
		}

		/* prepare values */
		var headerTemplate = String(editor.get().match(/{{header[\s\S]+?}}/i));

		/* parse */
		// title
		var pattern = /^[\s\S]*~title=([^~}]*)[\s\S]*$/;
		var headerTitle = headerTemplate.match(pattern)
			? headerTemplate.replace(pattern,'$1')
			: mw.config.get('wgTitle').match(/^[^\/]+/);

		// author
		var pattern = /^[\s\S]*~author=([^~}]*)[\s\S]*$/;
		var headerAuthor = headerTemplate.match(pattern)
			? headerTemplate.replace(pattern,'$1')
			: '';

		// section
		var pattern = /^[\s\S]*~section=([^~}]*)[\s\S]*$/;
		if(headerTemplate.match(pattern))
			var headerSection = headerTemplate.replace(pattern,'$1');
		else {
			if(mw.config.get('wgTitle').match(/.+\//)) {
				var newTitle = mw.config.get('wgTitle').replace(/^.*\/([^\/]+)/);
				var headerSection = mw.config.get('wgTitle').replace(/^.*\/([^\/]+)/,'$1');
			}
			else
				var headerSection = '';
		}

		// previous
		var pattern = /^[\s\S]*~previous=([^~}]*)[\s\S]*$/;
		if(headerTemplate.match(pattern))
			var headerPrevious = headerTemplate.replace(pattern,'$1');
		else {
			if(mw.config.get('wgTitle').match(/\/Chapter \d+$/)) {
				var newTitle = parseInt(mw.config.get('wgTitle').replace(/^.*\/Chapter (\d+)$/,'$1'))-1;

				if(newTitle > 0)
					var headerPrevious = '[[../Chapter '+newTitle+'|Chapter '+newTitle+']]';
				else
					var headerPrevious = '';
			}
			else
				var headerPrevious = '';
		}

		// next
		var pattern = /^[\s\S]*~next=([^~}]*)[\s\S]*$/;
		if(headerTemplate.match(pattern))
			var headerNext = headerTemplate.replace(pattern,'$1');
		else {
			if(mw.config.get('wgTitle').match(/\/Chapter \d+$/)) {
				var newTitle = parseInt(mw.config.get('wgTitle').replace(/^.*\/Chapter (\d+)$/,'$1'))+1;
				var headerNext = '[[../Chapter '+newTitle+'|Chapter '+newTitle+']]';
			}
			else
				var headerNext = '';
		}

		// notes
		var pattern = /^[\s\S]*~notes=([^~}]*)[\s\S]*$/;
		if(headerTemplate.match(pattern)) {
			var headerNotes = headerTemplate.replace(pattern,'$1');
			headerNotes = headerNotes.replace(/^([\s\S]*?)[\n\s]*$/,'$1'); // trailing whitespace
		}
		else
			var headerNotes = '';

		/* replace template */
		editor
			.replace(/{{header[^}]*}}\n*/ig, '')
			.set(
				'{{header2'
			   + '\n | title    = ' + headerTitle
			   + '\n | author   = ' + headerAuthor
			   + '\n | section  = ' + headerSection
			   + '\n | previous = ' + headerPrevious
			   + '\n | next     = ' + headerNext
			   + '\n | notes    = ' + headerNotes
			   + '\n}}\n'
			   + editor.get()
		  );

		/*******************
		*** fix false newlines
		*******************/
		editor
			/* replace newlines with placeholders */
			.replace(/([^\n])\n(\s*[^\n])/ig,'$1%%newline%%$2')
			
			/* selectively restore legitimate newlines */
			// paragraphs
			.replace(/%%newline%%%%newline%%|%%newline%%\n|\n%%newline%%/ig,'\n\n')
	
			// templates
			.replace(/%%newline%%(\s*(?:}}|\|))/ig,'\n$1') // before
			.replace(/}}\s*%%newline%%/ig,'}}\n') // after
	
			// images, categories, interwiki links
			.replace(/%%newline%%(\s*\[\[(?:Image|Category|[^:]+):[^\]]+\]\])/ig,'\n$1')
			.replace(/(\s*\[\[(?:Image|Category|[^:]+):[^\]]+\]\])%%newline%%/ig,'$1\n')
	
			// lists
			.replace(/%%newline%%([*#:;])/ig,'\n$1') // lists
			.replace(/([*#:;][^\n]*)%%newline%%/ig,'$1\n') // newlines closing list items
	
			// tables
			.replace(/%%newline%%{\|/ig,'\n{|')
			.replace(/{\|%%newline%%/ig,'{|\n')
	
			// rules
			.replace(/%%newline%%(----+)/g,'\n$1')
			.replace(/(----+)%%newline%%/g,'$1\n')
			
			// tags
			.replace(/(<[^>\n]+>)\s*%%newline%%/ig,'$1\n')
			.replace(/%%newline%%(<[^>\n]+>)/ig,'\n$1')
	
			/* remove remaining */
			.replace(/-%%newline%%([^\s])/ig,'-$1') // hyphenated words
			.replace(/\s*%%newline%%\s*/ig,' '); // all others
		
		/*******************
		*** Cleanup
		*******************/
		editor
			/* restore delimiters */
			.replace(/%%pipe%%/g,'|')
			.replace(/%%leftcurlies%%/g,'{{')
			.replace(/%%rightcurlies%%/g,'}}');
	}
	
	/*******************
	*** Authorspace
	*******************/
	if(mw.config.get('wgNamespaceNumber') === 102) {
		/*******************
		*** {{author}} normalization
		*******************/
		editor
			/* fix delimiters */
			.replace(/[\n\s]*\|\s*((?:first|last)name|last_initial|(?:birth|death)year|description|image|(?:wikipedia|wikiquote|commons)_link|dates|name|defaultsort)\s*=\s*/ig,'~$1=') // author parameter delimiters
			.replace(/({{author[\s\S]*?{{[^\|}]*)\|/ig,'$1%%pipe%%',5) // other template pipes
			.replace(/({{author[\s\S]*?){{([^}]+)}}/ig,'$1%%leftcurlies%%$2%%rightcurlies%%',5) // other template delimiters
			
			/* cleanup */
			.replace(/{{(author[^}]+)\s*}}/i,'{{$1}}') // rm trailing whitespace
			
			/* place standard template and move like parameters */
			.replace(/{{author/i,'{{author\n |firstname      =\n |lastname       =\n |last_initial   =\n |birthyear      =\n |deathyear      =\n |description    =\n |image          =\n |wikipedia_link =\n |wikiquote_link =\n |commons_link   =\n}}\n{{author')
			.replace(/(author[\s\S]*?\|((?:first|last)?name|last_initial|(?:birth|death)year|description|image|(?:wikipedia|wikiquote|commons)_link|dates|defaultsort)\s*)=([\s\S]*?)~?\2=([^~]*)/i,'$1=$4$3',10);
		
		/* get dates if necessary */
		if(editor.get().match(/(?:birth|death)year\s*=\s*\n/)) {
			// cannibalise categories
			editor
				.replace(/(birthyear\s*)=(\s*\n[\s\S]*?)\n?\[\[\s*Category\s*:\s*(\d+(?:\s*BCE)?) births\s*[^\]]*\]\]/,'$1=$3$2')
				.replace(/(deathyear\s*)=(\s*\n[\s\S]*?)\n?\[\[\s*Category\s*:\s*(\d+(?:\s*BCE)?) deaths\s*[^\]]*\]\]/,'$1=$3$2');
			
			// if that failed, parse from old template
			if(editor.get().match(/(?:birth|death)year\s*=\s*\n/) && editor.get().match(/~dates=[^~]/)) {
				/* get dates */
				// get raw parameter
				var oldDates = editor.get().replace(/^[\s\S]*dates=[^\d~}]*([^~}]+)[\s\S]*$/,'$1'); // raw parameter
				oldDates = oldDates.replace(/^(\d+)\s*BC?E/ig,'$1 BCE'); // fix eras
								
				// extract dates
				var birthYear = oldDates.replace(/^(\d+(?: BCE)?)[\s\S]*$/ig,'$1');
				var deathYear = oldDates.replace(/^\d+[^\d]+?(\d+(?: BCE)?)$/ig,'$1');
				
				/* fill in empty parameters */
				if(editor.get().match(/birthyear\s*=\s*\n/))
					editor.replace(/(birthyear\s*)=/,'$1='+birthYear);
				if(editor.get().match(/deathyear\s*=\s*\n/) && deathYear > birthYear)
					editor.replace(/(deathyear\s*)=/,'$1='+deathYear);
			}
		}
		
		/* get names */
		if(editor.replace(/(?:first|last)name\s*=\s*\n/)) {
			// cannibalise name field
			if(editor.replace(/(?:first|last)name\s*=\s*\n/)) {
				// extract
				var name = editor.get().replace(/^[\s\S]*~name=([^~}]*)[\s\S]*/,'$1');
				var firstName = name.replace(/([\s\S]+)\s+[\s\S]*/,'$1');
				var lastName = name.replace(/[\s\S]+\s+([\s\S]*)/,'$1');
									
				// fill in empty parameters
				if(editor.get().match(/firstname\s*=\s*\n/))
					editor.replace(/(firstname\s*)=/,'$1='+firstName);
				if(editor.get().match(/lastname\s*=\s*\n/))
					editor.replace(/(lastname\s*)=/,'$1='+lastName);
			}
		}
		/* cleanup */
		editor
			// remove old template
			.replace(/({{author[\s\S]*?)[\n\s]*{{author[^}]*}}[\n\s]*/ig,'$1\n\n')
	
			// restore delimiters
			.replace(/%%pipe%%/g,'|')
			.replace(/%%leftcurlies%%/g,'{{')
			.replace(/%%rightcurlies%%/g,'}}')
			
			// fix whitespace
			.replace(/((?:(?:first|last)name|last_initial|(?:birth|death)year|description|image|(?:wikipedia|wikiquote|commons)_link)\s*)=\s*/ig,'$1= ')
			.replace(/= \|/g,'= \n |')
			.replace(/= }}/g,'= \n}}')
			
			/* remove old categories */
			.replace(/\[\[\s*Category\s*:\s*\d+[^\]]*?(?:births|deaths)[^\]]*\]\]\n?/ig,'') // authors by year
			.replace(/\[\[\s*Category\s*:\s*(?:Ancient|Early modern|Medieval|Modern|Renaissance) authors[^\]]*\]\]\n?/ig,''); // authors by era
		
		/*******************
		*** Other tweaks
		*******************/
		/* update license templates */
		editor.replace(/{{\s*(?:msg:|template:)?(?:author-)?(PD-[^\|\}]+)(?:\|[^}]*)?}}/ig,'{{$1}}');
		
		/* normalize dates */
		editor.replace(/^([#*:]+ \[\[[^\]]+\]\]),\s*(\d+)/mig,'$1 ($2)');
	}
	
	/*******************
	*** miscellaneous cleanup
	*******************/
	editor
		/* templates */
		.replace(/{{\s*(?:msg:|template:)?([^}]+)}}/ig,'{{$1}}')
		
		/* syntax */
		// headers
		.replace(/\n*^(=+)\s*(.*?)\s*\1\s*/mig,'\n\n$1$2$1\n') // whitespace
		.replace(/=\n+=/ig,'=\n=') // fix consecutive headers
		
		// categories
		.replace(/\[\[\s*category\s*:\s*([^\|\]]+)(?:\s*(\|)([^\]]*))?\s*\]\]/ig,'[[' + 'category:$1$2$3]]')
		
		//links
		.replace(/\[\[\s*([^\|\]]+?)\s*(?:(\|)\s*([^\]]+?)\s*)?\]\]/ig,'[[$1$2$3]]') // redundant starting and ending whitespace
		.replace(/\[\[([^\|\]]+?)\s*\|\s*\1\]\]/ig,'[[$1]]') // redundant link text
		.replace(/\[\[([^\|\]]+?)_/ig,'[[$1 ',5) // underscores
		
		// lists
		.replace(/^([*#:]+)\s*/mig,'$1 ');
	
	
	/*******************
	*** sort elements
	*******************/
	/* store elements and remove from code */
	// categories
	var categories = editor.get().match(/\[\[category:[^\]]+\]\]/ig);
	editor.replace(/\[\[category:[^\]]+\]\]\n?/ig,'');
	
	// interlanguage links
	var interwikilinks = editor.get().match(/\[\[[a-z]{2,3}(?:-[^:\|\]]+)?:[^\]]+\]\]/ig); // get codes
	if(interwikilinks) {
		for(var x in interwikilinks) { // filter out known non-interlanguage prefixes
			if(interwikilinks[x].match(/\[\[(?:c2|cej|dcc|mw|rev|rfc|svn|wqy):/i)) {
				interwikilinks.splice(x,0);
			}
			else
				editor.replace(/\[\[[a-z]{2,3}(?:-[^:\|\]]+)?:[^\]]+\]\]\n?/ig,'');
		}
	}
	
	// license templates
	var licenses = editor.get().match(/{{(?:PD-|GFDL)[^}]*}}/ig);
	editor.replace(/{{(?:PD-|GFDL)[^}]*}}\n?/ig,'');
	
	/* sort and re-add */
	// compare function for case-insensitivity
	// courtesy <http://www.webreference.com/js/tips/000430.html>
	function caseless(a, b) {
		a = a.toLowerCase();
		b = b.toLowerCase();
		if (a < b) return -1;
		if (a > b) return 1;
		return 0;
	}

	// initial whitespace
	editor.replace(/[\s\n]*$/,'\n\n');

	// licenses
	if(licenses && licenses.length > 0) {
		// licenses
		for(var x in licenses)
			editor.set(editor.get() + licenses[x] + '\n');

		// whitespace
		editor.set(editor.get() + '\n');
	}

	// categories
	if(categories && categories.length > 0) {
		// sort and place
		categories.sort(caseless);
		for(var x in categories)
			editor.set(editor.get() + categories[x] + '\n');

		// whitespace
		editor.set(editor.get() + '\n');
	}
		
	// interlanguage links
	if(interwikilinks) {			
		// sort and place
		interwikilinks.sort(caseless);
		for(var x in interwikilinks)
			editor.set(editor.get() + interwikilinks[x] + '\n');
	}

	/* restore exceptions */
	editor.unescape();

	/* edit summary */
	editor.setEditSummary('[[WS:STYLE|standardization]], updates, and cleanup with [[m:TemplateScript|regex]]');
}
//</syntaxhighlight>