User:Billinghurst/common cleanup.js
Jump to navigation
Jump to search
Note: After saving, changes may not occur immediately. Click here to learn how to bypass your browser's cache.
- Firefox / Safari: Hold Shift while clicking Reload, or press either Ctrl-F5 or Ctrl-R (Cmd-R on a Mac)
- Google Chrome: Press Ctrl-Shift-R (Cmd-Shift-R on a Mac)
- Internet Explorer: Hold Ctrl while clicking Refresh, or press Ctrl-F5
- Opera: Clear the cache in Tools → Preferences
For details and instructions about other browsers, see Wikipedia:Bypass your cache.
Code that you insert on this page could contain malicious content capable of compromising your account. If you are unsure whether code you are adding to this page is safe, you can ask at the central discussion page, Scriptorium. The code will be executed when previewing this page under some skins, including Monobook. You can in the interim if you wish to refresh the content sooner under another skin. |
Documentation for this script can be added at User:Billinghurst/common cleanup. |
/* not all components added,
some old script components currently stored in [[User:Billinghurst/TemplateScript.js]]
//set to internal namespace link, set specifically for subpage xref
function nslink()
//adds template 'DJVU page link'
function djvupagelink()
*/
//ALL NAMESPACE USE
// cleanup() - all sorts of text cleaning from OCR
{
name: 'cleanup',
position: 'cursor',
script: function(editor) {
var header = editor.forField('#wpHeaderTextbox');
var footer = editor.forField('#wpFooterTextbox');
editor
// Digitized by Google (kill)
.replace(/Digitized[\s\n]+by[^\n]+\n(Google)?/, '')
// remove trailing spaces at the end of each line
.replace(/ +\n/g, '\n')
// remove trailing whitespace preceding a hard line break
.replace(/ +<br *\/?>/g, '<br />')
// remove trailing whitespace at the end of page text
.replace(/\s+$/g, '')
// remove trailing spaces at the end of refs
.replace(/ +<\/ref>/g, '</ref>')
// remove trailing spaces at the end of template calls
.replace(/ +}}/g, '}}')
// convert double-hyphen to mdash (avoiding breaking HTML comment syntax)
.replace(/([^\!])--([^>])/g, '$1—$2')
// remove spacing around mdash, but only if it has spaces on both sides
// (we don't want to remove the trailing space from "...as follows:— ",
// bearing in mind that the space will already be gone if at end of line).
.replace(/ +— +/g, '—')
// join words that are hyphenated across a line break
// (but leave "|-" table syntax alone)
.replace(/([^\|])-\n/g, '$1')
// remove unwanted spaces around punctuation marks
.replace(/ ([);:\?!,])/g, '$1')
//OCR fixes
// convert i9 to 19, etc.
.replace(/[il]([0-9])/g, '1$1')
// li -> h ... "the", "them", "their", and whe etcetera
.replace(/([tT])lie/ig, '$1he')
.replace(/wlie/g, 'whe')
.replace(/(wlicli|ivhic(li|h))/g, 'which')
.replace(/liurcli/g, 'hurch')
// Av -> w
.replace(/Av(ill|ith)/g, 'w$1')
// "U" -> "ll" when preceded by a lowercase letter.
.replace(/([a-z])U/g, '$1ll');
// {{c}} to {{center}}
editor.replace(/{{c\|/g, '{{center|');
header.replace(/{{c\|/g, '{{center|');
footer.replace(/{{c\|/g, '{{center|');
// {{rh}} to {{RunningHeader}}
header.replace(/\n?{{rh\|/gi, '{{RunningHeader|');
editor
//{{hws}} & {{hwe}} expanded
.replace(/{{hws\|/g, '{{hyphenated word start|')
.replace(/{{hwe\|/g, '{{hyphenated word end|')
// {{di}} expanded
.replace(/{{di\|/g, '{{dropinitial|')
// {{hi}} expanded
.replace(/{{hi\|/g, '{{hanging indent|')
// expand diacritical templates
.replace(/{{((ae|oe|\w[:`'~^-]))}}/g, '{{subst'+':$1}}')
//convert {{—}} to —
.replace(/{{—}}/g, '—');
// M<sup>c</sup> to {{Mc}}
editor.replace(/M<sup>c<\/sup>/g, '{{Mc}}');
header.replace(/M<sup>c<\/sup>/g, '{{Mc}}');
editor
//DNB link conversion
.replace(/\[\[(.{0,40}?) \(DNB00\)\|([^\]]+?)\]\]/g, '{{DNB lkpl|$1|$2}}')
.replace(/\[\[(.{0,40}?) \(DNB01\)\|([^\]]+?)\]\]/g, '{{DNB lkpl|$1|$2|year=01}}')
.replace(/\[\[(.{0,40}?) \(DNB12\)\|([^\]]+?)\]\]/g, '{{DNB lkpl|$1|$2|year=12}}')
//section tag fix
.replace(/<section (begin|end)=(\w[^\/]+)\/>/g, '<section $1="$2"/>');
}
}
// stuff to do only if the page doesn't contain a <poem> tag:
/*if (editor.get().indexOf("<poem>") === -1) {
editor
// remove single line breaks; preserve multiple.
// but not if there's a tag, template or table syntax either side of the line break
.replace(/([^>}\n])\n([^<{\|\n])/g, '$1 $2')
// collapse sequences of spaces into a single space
.replace(/ +/g, ' ');
}
*/
// merged ... function smaller()
// if (isDictionary) {
// editor
// .replace(/<small>(.+?)<\/small>/g, '{{smaller block|$1}}')
// .replace(/<p style="font-size:smaller">(.+?)<\/p>/g, '{{smaller block|$1}}')
// .replace(/\{\{smaller\|\[/g, '{{smaller block|[');
// }
// else
// editor.replace(/<small>(.+?)<\/small>/g, '{{smaller|$1}}');
//
// if (isDictionary)
// editor.replace(/}} ?({{DNB .{2,5}})[ \n]?(<sect[^>]+?>)/g, '}}\n$1$2\n');
//}*/
//end of cleanup()