User:Xover/unwrap.js

From Wikisource
Jump to navigation Jump to search
Note: After saving, changes may not occur immediately. Click here to learn how to bypass your browser's cache.
  • Firefox / Safari: Hold Shift while clicking Reload, or press either Ctrl-F5 or Ctrl-R (Cmd-R on a Mac)
  • Google Chrome: Press Ctrl-Shift-R (Cmd-Shift-R on a Mac)
  • Internet Explorer: Hold Ctrl while clicking Refresh, or press Ctrl-F5
  • Opera: Clear the cache in Tools → Preferences

For details and instructions about other browsers, see Wikipedia:Bypass your cache.

/* global $, mw */
"use strict";

// Make sure the necessary modules are loaded
mw.loader.using(['mediawiki.util'], () => {

  // Wait for the page to be parsed (new-style $(document).ready())
  $(() => { 


    /*
     *  First check that this is a context we should be active in.
     */

    // Only active on Page:-namespace pages.
    if (mw.config.get('wgCanonicalNamespace') !== 'Page') {
      return;
    }

    // Only active on pages with content model 'proofread-page'.
    if (mw.config.get('wgPageContentModel') !== 'proofread-page') {
      return;
    }

    // Only active when in edit/preview/diff mode.
    if ($.inArray(mw.config.get('wgAction'), ['edit', 'submit']) < 0) {
        return;
    }

    /*
     *  Add portlets for the various commands.
     */

    var unwrapPortlet = mw.util.addPortletLink(
      'p-tb', '#', '↲ Remove hard line breaks', 'ca-unwrap',
      'Remove hard line breaks from OCR text.'
    ); 
    $(unwrapPortlet).click(event => {
      event.preventDefault();
      doUnwrapText();
    });

  }); // END: $(document).ready()
}); // END: mw.loader.using()


function doUnwrapText() {
  let OCR = $('#wpTextbox1').val();
  OCR = OCR.replace(/^\s+$/mg, ''); // Nuke lines consisting of only whitespace
  OCR = OCR.replace(/([^\n])\n(?!\n)/gm, '$1 ');
  OCR = OCR.replace(/ +/g, ' ');

  // Stash these here for now
//  OCR = OCR.replace(/‘/g, '“');
//  OCR = OCR.replace(/’/g, '”');
  OCR = OCR.replace(/”(s|d|t)/g, '’$1');
  OCR = OCR.replace(/s” /g, 's’ ');
  OCR = OCR.replace(/&mdash;/g, '—');
  OCR = OCR.replace(/\s*—\s*/g, '—');
  OCR = OCR.replace(/- /g, '');
  OCR = OCR.replace(/ (’|”|;|!|\?|:)/g, '$1');
  OCR = OCR.replace(/(‘|“) /g, '$1');
  OCR = OCR.replace(/^ +(\S)/mg, '$1');
  OCR = OCR.replace(/(\S) +$/mg, '$1');
  OCR = OCR.replace(/(\S)\s+$/g, '$1');
  OCR = OCR.replace(/(\s*)\.\s*\.[ .]*(\s*)/g, '$1{{…}}$2');
  OCR = OCR.replace(/‘‘/g, '“'); // two single curly quotes is prolly a double quote
  OCR = OCR.replace(/’’/g, '”'); // two single curly quotes is prolly a double quote
  OCR = OCR.replace(/^(["'“‘])T /mg, '$1I '); // “T -> “I
  

  $('#wpTextbox1').val(OCR);
} // END: doUnwrapText()