User:Inductiveload/dp reformat.js

From Wikisource
Jump to navigation Jump to search
Note: After saving, changes may not occur immediately. Click here to learn how to bypass your browser's cache.
  • Firefox / Safari: Hold Shift while clicking Reload, or press either Ctrl-F5 or Ctrl-R (Cmd-R on a Mac)
  • Google Chrome: Press Ctrl-Shift-R (Cmd-Shift-R on a Mac)
  • Internet Explorer: Hold Ctrl while clicking Refresh, or press Ctrl-F5
  • Opera: Clear the cache in Tools → Preferences

For details and instructions about other browsers, see Wikipedia:Bypass your cache.

(function($, mw) {

	"strict";

	// preload what we need
	mw.loader.load(['oojs-ui-core', 'oojs-ui-windows', 'oojs-ui-widgets']);

	var ImportDP = {
		signature: "import_dp",
		ui_init: false,
		ui: {},
		cfg: {
			// retain DP comments as Wikicode comments <!-- like this -->
			keep_comments: false,
		}
	}

	var domains = {
		en: {
			page_end_nop: "\n{{nop}}",
			ch: "{{ch|$1}}\n\n",
			sc_s: "{{small caps|",
			dhr: "\n{{dhr}}\n",
			peh: "{{peh}}",
			bar_s: "{{bar|",
			missing_img: "{{missing image}}\n$1\n",
			sidenote: function(m, content) {
				return "\n\n{{right sidenote|" + content.replace(/\n/g, " ") + "}}\n";
			},
			dots: function(dots) {
				if (dots === 3) {
					return "{{...}}";
				}
				return "{{...|" + dots + "}}";
			}
		},
		fr: {
			page_end_nop: "\n{{nulle}}",
			ch: "{{centré|$1}}\n\n",
			sc_s: "{{sc|",
			dhr: "\n{{dhr}}\n",
			// peh: "{{peh}}", // ???
			//missing_img: // ?
		 //sidenote
		}
	}

	function create_ImportDPParamDialog() {

		var ImportDPParamDialog = function(config) {
			ImportDPParamDialog.super.call(this, config);

			// mixin constructors
			// Mixin.call(this);
		}

		OO.inheritClass(ImportDPParamDialog, OO.ui.ProcessDialog);
		// OO.mixinClass(ImportDPParamDialog, Mixin);

		// Specify a name for .addWindows()
		ImportDPParamDialog.static.name = 'ImportDP';
		ImportDPParamDialog.static.title = 'DP import';

		ImportDPParamDialog.static.actions = [
			{ action: 'save', label: 'Done', flags: 'primary' },
			{ label: 'Cancel', flags: 'safe' }
		];

		ImportDPParamDialog.prototype.initialize = function () {
			ImportDPParamDialog.super.prototype.initialize.call(this);
			// Create and append a layout and some content.
			this.fieldset = new OO.ui.FieldsetLayout({
				label: 'Parameters',
				css: {"margin": "0 16px"},
			});

			this.$body.append(this.fieldset.$element);

			this.index_chooser = new OO.ui.TextInputWidget( {
					value: 'The ways of war - Kettle - 1917.pdf'
			} );

			var layout = new OO.ui.FieldLayout(this.index_chooser, {
				label: 'Index',
				help: 'The index to split this DP text to',
				align: 'right',
			});

			this.offset_input = new OO.ui.NumberInputWidget( {
				value: 6,
				step: 1,
				validate: function(s) {
					return /^[\-+]?[0-9]*$/.test(s);
				}
			});

			var offset_layout = new OO.ui.FieldLayout(this.offset_input, {
				label: 'Offset',
				help: 'The offset to the Index pages. Positive means the Index page number is higher than the DP image number.',
				align: 'right',
			});


			this.skip_input = new OO.ui.ButtonSelectWidget( {
				items: [
					new OO.ui.ButtonOptionWidget( {
						data: 'noskip',
						label: 'No skip',
						title: 'No page skipping: include every original page'
					} ),
					new OO.ui.ButtonOptionWidget( {
						data: 'skipodd',
						label: 'Skip odd',
						title: 'Skip odd pages (merge with previous page)'
					} ),
					new OO.ui.ButtonOptionWidget( {
						data: 'skipeven',
						label: 'Skip even',
						title: 'Skip even pages (merge with previous page)'
					} )
				]
			} );
			this.skip_input.selectItemByData( 'noskip' );

			var skip_layout = new OO.ui.FieldLayout(this.skip_input, {
				label: 'Skip',
				help: 'Skip pages: can be used when the PG/DP text is split per-column rather than per-page',
				align: 'right',
			});

			this.fieldset.addItems([layout, offset_layout, skip_layout]);
		}

		ImportDPParamDialog.prototype.getActionProcess = function (action) {
			var dialog = this;

			if (action === "save") {
				return new OO.ui.Process( function () {

					var params = {
						index: dialog.index_chooser.getValue(),
						offset: parseInt(dialog.offset_input.getValue()),
						skip_layout: dialog.skip_input.findSelectedItem().getData()
					};

					dialog.save_callback(params);
					dialog.close();
				});
			}
			{
				return new OO.ui.Process( function () {
					//dialog.cancel_callback();
					dialog.close();
				});
			}
		}

		ImportDPParamDialog.prototype.getSetupProcess = function(data) {
			data = data || {};

			var dialog = this;

			return ImportDPParamDialog.super.prototype.getSetupProcess.call(this, data)
				.next(function() {

					dialog.save_callback = data.save_callback;
				});
		}

		ImportDPParamDialog.prototype.getBodyHeight = function () {
			// Note that "expanded: false" must be set in the panel's configuration for this to work.
			// When working with a stack layout, you can use:
			//   return this.panels.getCurrentItem()..outerHeight( true );
			return 300;
		};

		return ImportDPParamDialog;
	}

	var chars = {
		Alpha: 'Α', alpha: 'α',
		Beta: 'Β', beta: 'β',
		Gamma: 'Γ', gamma: 'γ',
		Delta: 'Δ',  delta: 'δ',
		Epsilon: 'Ε', epsilon: 'ε',
		Zeta: 'Ζ', zeta: 'ζ',
		Eta: 'Η', eta: 'η',
		Theta: 'Θ', theta: 'θ',
		Iota: 'Ι', iota: 'ι',
		Kappa: 'Κ', kappa: 'κ',
		Lambda: 'Λ', lambda: 'λ',
		Mu: 'Μ', mu: 'μ',
		Nu: 'Ν', nu: 'ν',
		Xi: 'Ξ', xi: 'ξ',
		Omicron: 'Ο', omicron: 'ο',
		Pi: 'Π', pi: 'π',
		Rho: 'Ρ', rho: 'ρ',
		Sigma: 'Σ', sigma: 'σ',
		Tau: 'Τ', tau: 'τ',
		Upsilon: 'Υ', upsilon: 'υ',
		Phi: 'Φ', phi: 'φ',
		Chi: 'Χ', chi: 'χ',
		Psi: 'Ψ', psi: 'ψ',
		Omega: 'Ω', omega: 'ω',
		AE: 'Æ', ae: 'æ',
		OE: 'Œ', oe: 'œ',
		et: '⁊',
		'-D': 'Ð', '-d': 'ð',
		GH: 'Ȝ', gh: 'ȝ',
		e: 'ə',
	};

	var combining_before = {
		'`':  0x0300,
		'´':  0x0301,
		'^':  0x0302,
		'=': 0x0304,
		'~': 0x0303,
		'\\(': 0x0306,
		'"': 0x0308,
		'v': 0x030c, //caron
		'\\.': 0x0307,
	};

	var combining_after = {
		'\\.': 0x0323, // underdot
	};

	// lazily inited
	var chars_regexps;
	var combining_regexps;

	function replace_chars(t) {
		if (!chars_regexps) {
			// init the regexp singleton object
			chars_regexps = {};
			for (var key in chars) {
				chars_regexps[key] = new RegExp("\\[" + key + "\\]", "g");
			}

			combining_regexps = [];
			for (var key in combining_before) {
				// watch out for var scope (ES6 where art thou)
				combining_regexps.push([new RegExp("\\[" + key + "(.)\\]", "g"), (function() {
					var ccode = String.fromCharCode(combining_before[key]);
					return function (m, c) {
						return c + ccode;
					}
				})()]);
			}
			for (var key in combining_after) {
				combining_regexps.push([new RegExp("\\[(.)" + key + "\\]", "g"), (function() {
					var ccode = String.fromCharCode(combining_after[key]);
					return function (m, c) {
						return c + ccode;
					}
				})()]);
			}
		}

		for (var key in chars) {
			t = t.replace(chars_regexps[key], chars[key]);
		}

		combining_regexps.forEach(function(r) {
			t = t.replace(r[0], r[1]);
		});
		return t;
	}

	function collapse_line_breaks(t) {
		return t.replace(/\n/g, " ");
	}

	function format_page(lines, isDp, repls) {
		var t = lines.join("\n");

		t = t.replace(/^(\n\s*)+/, "")

		if (ImportDP.cfg.keep_comments) {
			t = t.replace(/\[\*\*(.*?)\]/g, "<!-- $1 -->");
		} else {
			t = t.replace(/\[\*\*(.*?)\]/g, "");
		}

		if ( !isDp ) {
			// no small cap markup -_-
		}

		t = t.replace(/<sc>/gi, repls.sc_s)
				.replace(/\n+<tb>\n+/gi, repls.dhr)
				.replace(/<\/(sc)>/gi, "}}")
				.replace(/(?<!<!)--(?!>)/g, "—")
				.replace(/$\*/g, "")
				.replace(/^\[\s*Blank page\s*\]$/gim, "")
				.replace(/\/#|#\//g, "")
				.replace(/\/\*\n*/g, "<poem>\n")
				.replace(/\n*\*\//g, "\n</poem>")

				// headings
				.replace(/^(.*)\n{3,}/g, repls.ch)

		t = replace_chars(t);

		if (repls.peh) {
			t = t.replace(/-\*\s*$/g, repls.peh);
		}

		// do this after {{peh}}
		t = t.replace(/<\/?[if]>/gi, "''")
			.replace(/<\/?b>/gi, "'''");

		// PG-style _italics
		if ( !isDp ) {
			t = t.replace(/_(?!{)/g, "''");
		}

		// do this after other []'s
		if (repls.missing_img) {
			t = t.replace(/\*?\[\s*Illustration:?\s*(.*?)\]\s?/gs, repls.missing_img);
		}

		if (repls.sidenote) {
			t = t.replace(/\s*\[\s*Sidenote:?\s*(.*?)\]\s*/gs, repls.sidenote);
		}

		// collapse italic paras
		// This is is a hack, but it'll do
		t = t.replace(/('{2,5})(.*?)('{2,5})(?=\n\n|$)/gs, function(m, s, text, e) {
			return s + collapse_line_breaks(text) + e;
		})

		if (repls.bar_s) {
			t = t.replace(/(—{2,})/g, function(match, bars) {
				return repls.bar_s + bars.length + "}}";
			});
		}

		if (repls.dots) {
			t = t.replace(/(\.{3,})/g, function(match, dots) {
				return repls.dots(dots.length)
			});
		}

		t = t.replace(/\^([^{])/g, '<sup>$1</sup>');
		t = t.replace(/\^\{(.*?)\}/g, '<sup>$1</sup>');
		t = t.replace(/\_([^{])/g, '<sub>$1</sub>');
		t = t.replace(/\_\{(.*?)\}/g, '<sub>$1</sub>');

		footnotes = []

		t = t.replace(/\s*\[\s*Footnote (.+?):?\s*(.*?)\]\s*/gs, function(m, name, content) {
			footnotes.push([name, content]);
			return "";
		});

		footnotes.forEach(function(fn) {
			var re = new RegExp("\\[" + fn[0] + "\\]");
			t = t.replace(re, "<ref>" + fn[1] + "</ref>");
		});

		// tidy blank lines
		t = t.replace(/^\s+$/, "") // empty page
			.replace(/(\n\s*){2,}$/, repls.page_end_nop); // para at page end


		if (t.length) {
			t += "\n";
		}

		return t
	}

	function reformat(t, isDp, params) {

		var subdomain = mw.config.get("wgServerName").replace(/\..*$/, "");
		var repls = domains[subdomain] || domains.en;

		var ot = ""


		if ( isDp ) {
			var current_page;
			var page_lines = [];
			var skip_header = false;
			var firstNum;

			var append_current_page = function(pagenum, isDp, skip_header) {
					if ( !skip_header ) {
						ot += "==[[Page:" + params.index + "/" + pagenum + "]]==\n";
					}
					ot += format_page(page_lines, isDp, repls);
					page_lines = [];
			}

			var lines = t.split("\n").forEach(function(l, idx, array) {

				if ((match = /-{5}File: *(\d+)\.\w+\s*-+/.exec(l))) {
					var num = parseInt( match[1] );
					var last_skip_header = skip_header;

					if ( !firstNum ) {
						firstNum = num;
					}

					skip_header = false;
					if ( params.skip_layout !== 'noskip' ) {
						if ( ( num % 2 === 0 && params.skip_layout === 'skipeven' ) ||
								( num % 2 === 1 && params.skip_layout === 'skipodd' ) ) {
							skip_header = true;
						}

						num = Math.floor( ( num - firstNum ) / 2 ) + firstNum;
					}

					var pagenum = params.offset + num;

					if (current_page) {
						append_current_page(current_page, isDp, last_skip_header)
					}

					current_page = pagenum;
				} else if (current_page) {
					page_lines.push(l);

					if (idx === array.length - 1) {
						append_current_page(current_page, isDp, skip_header);
					}
				}
			});
		} else {
			// PG text
			ot += format_page(t.split("\n"), isDp, repls);
		}

		return ot;
	}


	function activate( dp ) {
		console.log("Import DP text starting");

		var text = $("#wpTextbox1").val();

		var do_transform = function ( params ) {
			// PG has no UI
			text = reformat(text, dp, params);
			$("#wpTextbox1").val(text);
		}

		if ( dp ) {

			var windowManager = new OO.ui.WindowManager();
			// Create and append a window manager, which will open and close the window.
			$(document.body).append(windowManager.$element);

			// Make the window.
			var dialog = new ImportDP.ui.ImportDPParamDialog({
				size: 'medium'
			});

			$(document.body).append(windowManager.$element);
			windowManager.addWindows([dialog]);

			windowManager.openWindow(dialog, {
				save_callback: function(params) {
					console.log(params);

					do_transform( params );
				}
			});
		} else {
			do_transform( {} );
		}
	}

	function install_portlet() {
		var dp_portlet = mw.util.addPortletLink(
			'p-tb',
			'#',
			'Reformat DP text',
			't-reformat-dp',
			'Reformat a text from Distributed Proofreaders'
		);

		// var pg_portlet = mw.util.addPortletLink(
		// 	'p-tb',
		// 	'#',
		// 	'Reformat PG text',
		// 	't-reformat-pg',
		// 	'Reformat a text from Project Gutenberg'
		// );

		var activator = function( dp ) {
			mw.loader.using([
					'oojs-ui-core', 'oojs-ui-windows', 'oojs-ui-widgets']).done(function() {

				if (!ImportDP.ui_init) {

					// user-provided configs
					mw.hook(ImportDP.signature + ".config")
						.fire(ImportDP.cfg);

					ImportDP.ui.ImportDPParamDialog = create_ImportDPParamDialog();
					ImportDP.ui_init = true;
				}
				activate( dp );
			});
		};

		$(dp_portlet).click( function(e) {
			e.preventDefault();
			activator(true);
		} );

		// $(pg_portlet).click( function(e) {
		// 	e.preventDefault();
		// 	activator(false);
		// } );

	}

	$(function() {
		install_portlet();
	});

}(jQuery, mediaWiki))