User:Nonexyst/OCR-tools.js

From Wikisource
Jump to navigation Jump to search
Note: After saving, changes may not occur immediately. Click here to learn how to bypass your browser's cache.
  • Firefox / Safari: Hold Shift while clicking Reload, or press either Ctrl-F5 or Ctrl-R (Cmd-R on a Mac)
  • Google Chrome: Press Ctrl-Shift-R (Cmd-Shift-R on a Mac)
  • Internet Explorer: Hold Ctrl while clicking Refresh, or press Ctrl-F5
  • Opera: Clear the cache in Tools → Preferences

For details and instructions about other browsers, see Wikipedia:Bypass your cache.

function ocr_1() {
	var textArea = $('#wpTextbox1');

	var txt = textArea.textSelection('getSelection');
	if (txt === '') {
		txt = textArea.textSelection('getContents');
		txt = ocr_1_inner(txt, 1);
		textArea.textSelection('setContents', txt);
	} else {
		txt = ocr_1_inner(txt, 0);
		textArea.textSelection('encapsulateSelection', {pre: txt, replace: true});
	}
}

function ocr_1_inner(sel, r) {
	// manipulate the text in the sel variable
	sel = sel.replace(/[ \t]+$/gm, '');  // remove trailing spaces
	sel = sel.replace(/^[ \t]+/gm, '');  // remove leading spaces
	sel = sel.replace(/[ \t]+/gm, ' ');  // remove multiple spaces

	sel = sel.replace(/(\S)[\-¬\xad]\n(\S)/gm, '$1$2'); // merge lines in the paragraph separated with dash into single line
	sel = sel.replace(/(\S)\n(\S)/gm, '$1 $2'); // merge other lines in the paragraph into single line

	if (r) { // remove leading and trailing line feeds
		sel = sel.replace(/\n/gm, '\x01'); // hide \n's

		sel = sel.replace(/^\x01+/gm, ''); // remove leading line feeds
		sel = sel.replace(/\x01+$/gm, ''); // remove trailing line feeds

		sel = sel.replace(/\x01/gm, '\n'); // restore hidden \n's
	}

	return sel;
}

function ocr_2() {
	var textArea = $('#wpTextbox1');

	txt = textArea.textSelection('getContents');
	txt = ocr_2_inner(txt);
	textArea.textSelection('setContents', txt);
}

function ocr_2_inner(sel) {
	// manipulate the text in the sel variable
	sel = sel.replace(/\n/gm, '\x01'); // hide \n's

	sel = sel.replace(/^(\x01*)([\-a-zA-Z0-9А-Яа-яёЁІіѢѣѲѳѴѵ]+)/gm, '$1{{hwe|$2|$2}}'); // 

	sel = sel.replace(/\x01/gm, '\n'); // restore hidden \n's

	return sel;
}

function ocr_3() {
	var textArea = $('#wpTextbox1');

	txt = textArea.textSelection('getContents');
	txt = ocr_3_inner(txt);
	textArea.textSelection('setContents', txt);
}

function ocr_3_inner(sel) {
	// manipulate the text in the sel variable
	sel = sel.replace(/\n/gm, '\x01'); // hide \n's

	sel = sel.replace(/([\-a-zA-Z0-9А-Яа-яёЁІіѢѣѲѳѴѵ]*[a-zA-Z0-9А-Яа-яёЁІіѢѣѲѳѴѵ])[-¬]?(\x01*)$/gm, '{{hws|$1|$1}}$2'); // 

	sel = sel.replace(/\x01/gm, '\n'); // restore hidden \n's

	return sel;
}

function ocr_4() {
	var textArea = $('#wpTextbox1');

	var txt = textArea.textSelection('getSelection');
	if (txt === '') {
		txt = textArea.textSelection('getContents');
		txt = ocr_4_inner(txt, 1);
		textArea.textSelection('setContents', txt);
	} else {
		txt = ocr_4_inner(txt, 0);
		textArea.textSelection('encapsulateSelection', {pre: txt, replace: true});
	}
}

function ocr_4_inner(sel) {
	// manipulate the text in the sel variable
	sel = sel.replace(/([“‘]) +/gm, '$1');
	sel = sel.replace(/ +([!%\):;,.?”’″′])/gm, '$1');
	sel = sel.replace(/ *— */gm, '—');
	sel = sel.replace(/[“”]/gm, '\"');
	sel = sel.replace(/[‘’]/gm, '\'');

	return sel;
}

jQuery( document ).ready( function( $ ) {

/* Check if view is in edit mode and that the required modules are available. Then, customize the toolbar … */
if ( $.inArray( mw.config.get( 'wgAction' ), [ 'edit', 'submit' ] ) !== -1 ) {
	console.log('page is in edit mode');
	mw.loader.using( 'ext.wikiEditor',  function () {

		console.log('adding toolbar buttons');
		
		////////////////////////////////////////////////////
		$( '#wpTextbox1' ).wikiEditor( 'addToToolbar', {
		///	'section': 'advanced',
			'section': 'proofreadpage-tools',
			'groups': {
				'OCR': {
					'label': 'Additional tools:'
				}
			}
		} );

		$( '#wpTextbox1' ).wikiEditor( 'addToToolbar', {
		///	'section': 'advanced',
			'section': 'proofreadpage-tools',
			'group': 'OCR',
			'tools': {
				'OCR-1': {
					//label: '',
					type: 'button',
					icon: '//upload.wikimedia.org/wikipedia/commons/thumb/8/8b/Merge-arrows.gif/32px-Merge-arrows.gif',
					action: {
						type: 'callback',
						execute: function(context){
							ocr_1();
		                }
		      		}
				},
		      	'OCR-2': {
					//label: '',
					type: 'button',
					icon: '//upload.wikimedia.org/wikipedia/commons/thumb/6/68/Merge-arrow.gif/32px-Merge-arrow.gif',
					action: {
						type: 'callback',
						execute: function(context){
							ocr_3();
		                }
		      		}
				},
		      	'OCR-3': {
					//label: '',
					type: 'button',
					icon: '//upload.wikimedia.org/wikipedia/commons/thumb/a/ae/Mergefrom.gif/32px-Mergefrom.gif',
					action: {
						type: 'callback',
						execute: function(context){
							ocr_2();
		                }
		      		}
				},
				'OCR-4': {
					//label: '',
					type: 'button',
					icon: '//upload.wikimedia.org/wikipedia/commons/d/dd/Dash.svg',
					action: {
						type: 'callback',
						execute: function(context){
							ocr_4();
		                }
		      		}
				}

			}
		} );
		////////////////////////////////////////////////////

	} );
}

} );