User:Xover/Gadget-ocr.js
Jump to navigation
Jump to search
Note: After saving, changes may not occur immediately. Click here to learn how to bypass your browser's cache.
- Firefox / Safari: Hold Shift while clicking Reload, or press either Ctrl-F5 or Ctrl-R (Cmd-R on a Mac)
- Google Chrome: Press Ctrl-Shift-R (Cmd-Shift-R on a Mac)
- Internet Explorer: Hold Ctrl while clicking Refresh, or press Ctrl-F5
- Opera: Clear the cache in Tools → Preferences
For details and instructions about other browsers, see Wikipedia:Bypass your cache.
Code that you insert on this page could contain malicious content capable of compromising your account. If you are unsure whether code you are adding to this page is safe, you can ask at the central discussion page, Scriptorium. The code will be executed when previewing this page under some skins, including Monobook. You can in the interim if you wish to refresh the content sooner under another skin. |
Documentation for this script can be added at User:Xover/Gadget-ocr. |
/*jshint boss:true*/
/*global $, mw*/
/*
* Query an ocr for a given Page:, first try to get the hocr text layer as it's available
* for most book, fast and of a better quality. If it fails, try the older and slower
* ocr method. hocr fail around 1/5000 books. ocr should never fails as it use the image
* visible on the Page:.
*/
var lang = mw.config.get( 'wgContentLanguage' );
function disable_input(set)
{
if (set) {
$(document).keyup(function(e) {
if (e.which == 27) { disable_input(false); }
});
}
set ? $('#wsOcr1').off('click') : $('#wsOcr1').on('click', do_hocr);
set ? $('#wsOcr2').off('click') : $('#wsOcr1').on('click', fraktur_ocr);
$('#wpTextbox1').prop('disabled', set);
}
function ocr_callback(data) {
if (data.error) {
alert(data.text);
} else {
// Checking if tb is disabled is required with chrome as ESC doesn't kill
// the query.
var tb = document.getElementById("wpTextbox1");
if (tb.disabled)
tb.value = data.text;
}
disable_input(false);
}
function hocr_callback(data) {
if (data.error) {
// Fallback to the slow way.
disable_input(false);
do_ocr();
return;
} else {
// Checking if tb is disabled is required with chrome as ESC doesn't kill
// the query.
var tb = document.getElementById("wpTextbox1");
if (tb.disabled) {
localStorage.ws_hOCR = data.text;
var text = $(data.text).text();
// Ugly as hell.
text = text.replace(/^ +/mg, '')
.replace(/\n{4,}/g, '@_@_@_@')
.replace(/\n{2,}/g, '____SPACE____')
.replace(/\n/g, ' ')
.replace(/____SPACE____/g, '\n')
.replace(/@_@_@_@/g, '\n\n');
tb.value = $.trim(text);
}
}
disable_input(false);
}
function do_hocr() {
disable_input(true);
var request_url = '//tools.wmflabs.org/phetools/hocr_cgi.py?cmd=hocr&book='
+ encodeURIComponent(mw.config.get('wgTitle')) + '&lang=' + lang + '&user=' + mw.config.get('wgUserName');
$.getJSON(request_url).done(hocr_callback).fail(do_ocr);
}
function do_ocr() {
if ($( '.prp-page-image img' ).length) {
disable_input(true);
// server side can't use protocol relative url, request it as https:
var url_image = 'https:' + $( '.prp-page-image img' ).attr('src');
var request_url = "//tools.wmflabs.org/phetools/ocr.php?cmd=ocr&url="+url_image+"&lang="+lang+"&user="+mw.config.get('wgUserName');
$.getJSON( request_url ).done( ocr_callback );
}
}
function fraktur_ocr()
{
lang = 'de-f';
// For fraktur we need to use the slow way, all hocr for 'de'
// are done with non-fraktur.
do_ocr();
lang = mw.config.get( 'wgContentLanguage' );
}
function addButtonToWikiEditorToolbar( b ){
var tools = {};
tools[ b.imageId ] = {
label: b.speedTip,
filters: [ 'body.ns-104' ],
type: 'button',
icon: b.imageFile,
action: {
type: 'callback',
execute: b.onClick
}
};
$( '#wpTextbox1' ).wikiEditor( 'addToToolbar', {
section: 'main',
group: 'insert',
tools: tools
} );
$( '[rel="' + b.imageId + '"]' ).width( 42 );
}
function addButtonToClassicToolbar( b ){
mw.toolbar.addButton( {
imageFile: b.imageFile,
speedTip: b.speedTip,
imageId: b.imageId
} );
$( '#' + b.imageId ).off( 'click' ).click( function () {
b.onClick();
return false;
} ).width( 46 );
}
function customiseToolbar()
{
var modules, add, img;
// This can be the string "0" if the user disabled the preference ([[Phabricator:T54542#555387]])
if( mw.user.options.get( 'usebetatoolbar' ) == 1 ){
modules = [ 'ext.wikiEditor' ];
img = '//upload.wikimedia.org/wikipedia/commons/c/c9/Toolbaricon_OCR.png';
add = addButtonToWikiEditorToolbar;
} else if ( mw.user.options.get( 'showtoolbar' ) == 1 ){
modules = [ 'mediawiki.toolbar' ];
img = '//upload.wikimedia.org/wikipedia/commons/e/e0/Button_ocr.png';
add = addButtonToClassicToolbar;
} else {
return;
}
$.when(
mw.loader.using( modules ),
$.ready
).then( function(){
if( mw.config.get( 'wgContentLanguage' ) === 'de' ){
add( {
imageFile: img,
speedTip: 'Normale OCR',
imageId: 'wsOcr1',
onClick: do_hocr
} );
add( {
imageFile: '//upload.wikimedia.org/wikipedia/commons/a/af/Button_Fractur_OCR.png',
speedTip: 'Fraktur OCR',
imageId: 'wsOcr2',
onClick: fraktur_ocr
} );
} else {
add( {
imageFile: img,
speedTip: 'Get the text by OCR',
imageId: 'wsOcr1',
onClick: do_hocr
} );
}
} );
}
mw.loader.using( 'user.options', function () {
var isPage = mw.config.get( 'wgCanonicalNamespace' ) === 'Page',
editing = $.inArray( mw.config.get( 'wgAction' ), [ 'edit', 'submit' ] ) !== -1,
disableOcr = self.proofreadpage_disable_ocr,
dependencies = [ 'jquery.textSelection', 'ext.proofreadpage.page.edit' ],
usingBetaToolbar = mw.user.options.get( 'usebetatoolbar' ) === 1,
usingOldToolbar = mw.user.options.get( 'showtoolbar' ) === 1,
usingCodeMirror = mw.user.options.get( 'codemirror-syntax-highlight' ) === '1';
if ( isPage && editing && !disableOcr && ( usingBetaToolbar || usingOldToolbar ) ) {
// Set CodeMirror dependency if required.
if ( usingCodeMirror ) {
dependencies.push( 'ext.CodeMirror.lib' );
}
// Set dependency for the relevant toolbar module.
if ( usingBetaToolbar ) {
dependencies.push( 'ext.wikiEditor' );
} else if ( usingOldToolbar ) {
dependencies.push( 'mediawiki.toolbar' );
}
// Now run the toolbar customisation.
mw.loader.using( dependencies, customiseToolbar );
}
} );