MediaWiki:Gadget-TemplateScript.js
Nota: Després de publicar, possiblement necessitareu refrescar la memòria cau del vostre navegador per a veure'n els canvis.
- Firefox / Safari: Premeu Majús i alhora cliqueu el botó Actualitzar, o pressioneu Ctrl+F5 o Ctrl+R (⌘+R en un Mac)
- Google Chrome: Premeu Ctrl+Majús+R (⌘+Shift+R en un Mac)
- Internet Explorer / Edge: Premeu Ctrl i alhora cliqueu a Actualitza o pressioneu Ctrl+F5
- Opera: Premeu Ctrl-F5.
/*
This page defines a TemplateScript library. It's not meant to be referenced
directly. See [[Wikisource:TemplateScript]] for usage.
*/
/* global $, pathoschild */
/**
* TemplateScript adds configurable templates and scripts to the sidebar, and adds an example regex editor.
* @see https://meta.wikimedia.org/wiki/TemplateScript
* @update-token [[File:Pathoschild/templatescript.js]]
*/
// <nowiki>
mw.loader.load('//tools-static.wmflabs.org/meta/scripts/i18n/ca.js');
$.ajax('//tools-static.wmflabs.org/meta/scripts/pathoschild.templatescript.js', { dataType:'script', cache:true }).then(function() {
/*********
** Define library
*********/
pathoschild.TemplateScript.library.define({
key: 'wikisource.proofreading',
name: 'Eines de transcripció',
url: '//ca.wikisource.org/wiki/Wikisource:TemplateScript',
description: 'Una sèrie d´eines per facilitar la <a href="/wiki/Ajuda:Pàginas de transcripció">transcripció a l´espai de noms <tt>Pàgina:</tt></a> Inclou eines per millorar l´OCR, afegir seccions d´encapçalament i altre eines de format.',
categories: [
{
name: 'Eines de transcripció',
scripts: [
{ key: 'add-header', name: 'Afegir capçalera', scriptUrl:'Special:MyPage/titols.js', script: function(editor) { addPageHeader(editor); }, forNamespaces: 'page', accessKey:'1' },
{ key: 'add-footer', name: 'Afegir peu de pàgina', scriptUrl:'Special:MyPage/titols.js', script: function(editor) { addPageFooter(editor); }, forNamespaces: 'page', accessKey:'2' },
{ key: 'ocr', name: 'OCR (reconeixement automàtic de text)', script: function(editor) { do_hocr(); }, forNamespaces: 'page', accessKey:'3' },
{ key: 'cleanup-ocr', name: 'Netejar OCR', script: function(editor) { pageCleanup(editor); }, forNamespaces: 'page', accessKey:'4' },
{ key: 'make-refs', name: 'Fer referències', script: function(editor) { makeReference(editor); }, forNamespaces: 'page', accessKey:'5' },
{ key: 'smallcaps', name: 'A versaleta', script: function(editor) { smallcaps(editor); }, forNamespaces: 'page' },
{ key: 'uppercase', name: 'A majúscules', script: function(editor) { upper(editor); }, forNamespaces: 'page' },
{ key: 'lowercase', name: 'A minúscules', script: function(editor) { lower(editor); }, forNamespaces: 'page' }
]
}
]
});
/*********
** Page context
*********/
var state = {
initialised: false, // whether the page context has been initialised
page: {
number: null, // the djvu page number extracted from the URL
proofed: null
},
specialFormats: [] // work-specific header template formats
};
/*********
** Private methods
*********/
/**
* Initialise the data needed by the page tools.
*/
var _initialise = function() {
// only initialise once
//if(state.initialised)
// return;
//state.initialised = true;
// get page metadata
var pn = /\.(?:djvu|pdf)\/([0-9]+)/g.exec($("#firstHeading").html());
var pq = document.getElementById('pagequality');
state.page = {
number: pn !== null ? parseInt(pn[1], 10) : null,
proofed: pq && pq.getAttribute('class') && pq.getAttribute('class').match(/quality0|quality[2-4]/)
};
// get user-defined work formats
// expected format:
// {
// title: /History of England /,
// evenHeader: '{{rh|...}}',
// oddHeader: '{{rh|...}}',
// footer: '',
// footerWithReferences: '{{smallrefs}}'
// }
if(window.specialFormats) state.specialFormats = window.specialFormats.concat(state.specialFormats);
};
/**
* Convert the text to title case based on English rules.
* @param {string} text The text to convert.
*/
var _titlecase = function(text) {
// split text into individual words and examine them one by one
var words = text.toLowerCase().split(" ");
$.each(function(i, words) {
switch(word) {
case "a":
case "á":
case "à":
case "e":
case "é":
case "è":
case "i":
case "y":
case "o":
case "ó":
case "ò":
case "u":
case "el":
case "la":
case "los":
case "las":
case "les":
case "un":
case "una":
case "unos":
case "unes":
case "de":
case "del":
break; // don't capitalise articles, "to" as part of an infinitive, prepositions or short conjunctions
default: // capitalise everything else
words[i] = word.substring(0, 1).toUpperCase() + word.substring(1, words[i].length);
break;
}
});
// capitalise first word regardless
words[0] = words[0].substring(0, 1).toUpperCase() + words[0].substring(1, words[0].length);
// capitalise last word regardless
var last = words.length-1;
words[last] = words[last].substring(0, 1).toUpperCase() + words[last].substring(1, words[last].length);
// reconstruct title
return words.join(' ');
};
/*********
** Script methods
*********/
/**
* Add a {{running header}} template to the page.
* @param {object} editor The script helpers for the page.
*/
var addPageHeader = function(editor) {
_initialise();
if(state.page.number === null)
return;
var isEven = (state.page.number % 2 === 0);
var generic = true;
var headertext = '';
if(window.titulos) state.specialFormats = window.titulos(state.page.number);
for (var f in state.specialFormats) {
var format = state.specialFormats[f];
if (mw.config.get('wgTitle').match(format.title)) {
headertext = isEven ? format.evenHeader : format.oddHeader;
generic = false;
break;
}
}
// no special header matched, use a generic running header
if (generic) {
if (isEven)
headertext = '{{RH|left='+state.page.number+'|center=}}'; // assume verso, with page number at left
else
headertext = '{{RH|center=|right='+state.page.number+'}}';
}
$('#wpHeaderTextbox').val(function(i, val) {
return $.trim(val + '\n' + headertext);
});
};
/**
* Clean up OCR errors in the text, and push <noinclude> content at the top
* & bottom of the page into the header & footer boxes respectively.
* @param {object} editor The script helpers for the page.
*/
var pageCleanup = function(editor) {
_initialise();
// push <noinclude> content at the top & bottom into the header & footer
if (editor.get().match(/^<noinclude\>/)) {
var text = editor.get();
var e = text.indexOf("</noinclude>");
$('#wpHeaderTextbox').val(function(i, val) {
return $.trim(val + "\n" + text.substr(11, e-11).replace(/^\s+|\s+$/g, ''));
});
editor.set(text.substr(e+12));
}
if (editor.get().match(/<\/noinclude\>$/)) {
var text = editor.get();
var s = text.lastIndexOf("<noinclude>");
$('#wpFooterTextbox').val(function(i, val) {
return $.trim(text.substr(s+11, text.length-s-11-12).replace(/^\s+|\s+$/g, '') + "\n" + val);
});
editor.set(text.substr(0, s));
}
// clean up text
editor
// remove trailing spaces at the end of each line
.replace(/ +\n/g, '\n')
// remove trailing whitespace preceding a hard line break
.replace(/ +<br *\/?>/g, '<br />')
// remove trailing whitespace and numerals at the end of page text
// (numerals are nearly always page numbers in the footer)
.replace(/[\s\d]+$/g, '')
.replace(/^[\s\d]+/g,'')
// remove trailing spaces at the end of refs
.replace(/ +<\/ref>/g, '</ref>')
// remove trailing spaces at the end of template calls
.replace(/ +}}/g, '}}')
// convert double-hyphen to mdash (avoiding breaking HTML comment syntax)
.replace(/([^\!])--([^>])/g, '$1—$2')
// remove spacing around mdash, but only if it has spaces on both sides
// (we don't want to remove the trailing space from "...as follows:— ",
// bearing in mind that the space will already be gone if at end of line).
.replace(/ +— +/g, '—')
// join words that are hyphenated across a line break
// (but leave "|-" table syntax alone)
.replace(/([^\|])-\n/g, '$1');
// clean up pages if they don't have <poem>
if (!editor.contains('<poem>')) {
editor
// lines that start with " should probably be new lines,
// if the previous line ends in punctuation,
// other than a comma or semicolon
// and let's get rid of trailing space while we're at it
.replace(/([^\n\w,;])\n\" */g, '$1\n\n"')
// lines that end with " should probably precede a new line,
// unless preceded by a comma,
// or unless the new line starts with a lower-case letter;
// and let's get rid of preceding space while we're at it
.replace(/([^,])\ *\"\n([^a-z\n])/g, '$1"\n\n$2')
// remove single line breaks; preserve multiple.
// but not if there's a tag, template or table syntax either side of the line break
.replace(/([^>}\|\n])\n([^:#\*<{\|\n])/g, '$1 $2')
// collapse sequences of spaces into a single space
.replace(/ +/g, ' ');
}
// more page cleanup
editor
// dump spurious hard breaks at the end of paragraphs
.replace(/<br *\/?>\n\n/g, '\n\n')
//caracter fantasma
.replace(/[�]/g,'')
// remove unwanted spaces around punctuation marks
.replace(/ ([;:\?!,\.])/g, '$1')
// unicodify
.replace(/—/g, '—')
.replace(/–/g, '–')
.replace(/"/g, '"')
// straighten quotes and apostrophes.
.replace(/[“”]/g, '"')
.replace(/[‘’`]/g, '\'')
//OCR fixes
.replace(/(a[bh]a )/g, 'aba ')
.replace(/(a[bh]an )/g, 'aban ')
.replace(/(acccn)/g, 'accen')
.replace(/([an][iïíìl!1I]x)/g, 'aix')
.replace(/(x[6d])/g, 'xó')
.replace(/(a[iïíìl!1I]g[anou][nu])/g, 'algun')
.replace(/(a[iïíìl!1I]g[un]a)/g, 'aigua')
.replace(/(aU)/g, 'all')
.replace(/( attre )/g, ' altre ')
.replace(/( a[iïíìl!1I][rtl]r)/g, ' altr')
.replace(/( a[iïíìl!1I][8s] )/g, ' als ')
.replace(/(anl )/g, 'ant ')
.replace(/b[ce]rt/g, 'bert')
.replace(/(b[6d])/g, 'bó')
.replace(/( calala)/g, ' catala')
.replace(/(c[iïíìl!1I][56d] )/g, 'ció ')
.replace(/(c[iïíìl!1I][56d][ns][8s] )/g, 'cions ')
.replace(/( corn )/g, ' com ')
.replace(/( co[un][iïíìl!1I] )/g, ' com ')
.replace(/(cb)/g, 'ch')
.replace(/(c[iïíìl!1I] )/g, 'ci ') //ofici
.replace(/(cuU)/g, 'cull')
.replace(/(d[*'\"])/g, 'd\'')
.replace(/( d[ocs] )/g, ' de ')
.replace(/(d[ce][iïíìl!1I]x)/g, 'deix')
.replace(/( d[ce][iïíìl!1I] )/g, ' del ')
.replace(/( d[ce][iïíìl!1I][8s] )/g, ' dels ')
.replace(/(d[ce][s8])/g, 'des')
.replace(/(D[ce][anu] )/g, 'Deu ')
.replace(/(d6[nu])/g, 'déu')
.replace(/(d[!1I])/g, 'di')
.replace(/(d[iïíìl!1I]g)/g, 'dig')
.replace(/dfs/g, 'dís')
.replace(/([nu][bh][ec][nu])/g, 'uhen')
.replace(/(drc)/g, 'dre')
.replace(/(dr[iïíìl!1I])/g, 'dri')
.replace(/([ce]o[nu][ce][ce][bh])/g, 'conech')
.replace(/(efe[ce][ce])/g, 'efecc')
.replace(/([BE][iïíìl!1I] )/g, 'El ')
.replace(/(EU )/g, 'Ell ')
.replace(/( [ce][iïíìl!1I] )/g, ' el ')
.replace(/([ -][ce][iïíìl!1I][8s] )/g, ' els ')
.replace(/( [ce][iïíìl!1I][iïíìl!1I] )/g, ' ell ')
.replace(/(eU)/g, 'ell')
.replace(/(cm)/g, 'em')
.replace(/[BE][nqu] /g, 'En ')
.replace(/ cn/g, ' en')
.replace(/ [ce][un] /g,' en ')
.replace(/[ce]n[lt] /g, 'ent ')
.replace(/ [ce]n[it]r[ce]/g, ' entre')
.replace(/ [ce]s /g, ' es ')
.replace(/[ce][8s][ce]ri/g, 'escri')
.replace(/[ce][8s][ce]r /g, 'eser ')
.replace(/ [ceo][8s][8s][ceo]r /g, ' esser ')
.replace(/ [ce][s8][lt]([aáà])([ t])/g, ' est$1$2')
.replace(/exlr/g, 'extr')
.replace(/fc/g, 'fe')
.replace(/f[ce][nu][lt]/g, 'fent')
.replace(/f[1!Iil]([bcdfgjlmnpqrstvxyz])/g, 'fi$1') //fic,fim...
.replace(/[fí][iïíìl!1I][iïíìl!1I][iïíìl!1I]/g, 'fill')
.replace(/(g[iïíìl!1I][6é]s)/g, 'glés')
.replace(/g[nu][óé]/g, 'gué')
.replace(/gn[ce]/g, 'gne') //p.ex. digne
.replace(/gu[ce]/g, 'gue')
.replace(/g[nu]t/g, 'gut')
.replace(/[hb]a[nu] /g, 'han ')
.replace(/[hb]ab[ce]m/g, 'havem')
.replace(/[hb]av[iïíìl!1I]a/g, 'havia')
.replace(/([hb]av[iïíìl!1I][ce][nu])/g, 'havien')
.replace(/([hb][iïíìl!1I] [hb]a)/g, 'hi ha')
.replace(/ [bh]a /g, ' ha ')
.replace(/h[iïìl!1I]/g, 'hi')
.replace(/[bh][oe]m[ce]/g, 'home')
.replace(/(horn)/g, 'hom')
.replace(/( [bh]o[no]t )/g, ' hont ')
.replace(/([iïíìl!1]U)/g, 'ill')
.replace(/([iïíìl!1I]n[lt] )/g, 'int ')
.replace(/J[ce][s8]/g, 'Jes')
.replace(/jomada/g, 'jornada')
.replace(/ [iïíìl!1I][nu]r /g, ' lur ')
.replace(/ [*'´\\^][iïíìl!1I] /g, ' \'l ')
.replace(/L[*´\\^]/g, 'L\'')
.replace(/([ .,;])[iïíìl!1I][*'´\\^]/g, '$1l\'')
.replace(/Ta([iy])gua/g, 'l\'a$1gua')
.replace(/V a/g, 'l\' a')
.replace(/ [iïíìl!1]a /g, ' la ')
.replace(/ [iïíìl!1][ec][a8s] /g, ' les ')
.replace(/ [iïíìl!1]i /g, ' li ')
.replace(/[UI][iïíìl!1I][bh]r[ec]/g, 'Uibre')
.replace(/ii([aáàeéèiíìoóòuúù])/g, 'll$1') //"iiibre"
.replace(/ [iïíìl!1]o /g, ' lo ')
.replace(/ [iïíìl!1]o[a8s] /g, ' los ')
.replace(/Uoc/g, 'lloc')
.replace(/U[nu][nu]/g, 'llun')
.replace(/([*'´][iïíìl!1I][8s])/g, '\'ls')
.replace(/( M[8s] )/g, ' \'\ls ')
.replace(/( [iïíìl!1][8s] )/g, ' ls ')
.replace(/ ine /g, 'me')
.replace(/m[ce]([ '])/g, 'me$1')
.replace(/(loient )/g, 'lment ')
.replace(/(ni[ce][nu]y)/g, 'menys')
.replace(/(mo[iïíìl!1]t)/g, 'molt')
.replace(/(—jN)/g, '—¡N')
.replace(/(n[*'\\^])/g, 'n\'')
.replace(/([*'\\^]n)/g, '\'n')
.replace(/ [nu]o /g, ' no ')
.replace(/nl([ ;.:r])/g, 'nt$1') //entre, -nt
.replace(/(oU)/g, 'oll')
.replace(/(—jP)/g, '—¡P')
.replace(/(prc)/g, 'pre')
.replace(/pnn/g, 'pun')
.replace(/(—[ij]Q)/g, '—¡Q')
.replace(/(Q[anou][ce])/g, 'Que')
.replace(/(Q[anou][iïíìl!1I])/g, 'Qui')
.replace(/([çq][*'\\^])/g, 'q\'')
.replace(/([çq][nu][*'\\^])/g, 'qu\'')
.replace(/([çq][anou][anou][il])/g, 'qual')
.replace(/([çq][anou][anou][nu])/g, 'quan')
.replace(/([çq][anou][ceo])/g, 'que')
.replace(/([çq]ii[ceo])/g, 'que')
.replace(/qae/g, 'que')
.replace(/([çq]u[ce] [iïíìl!1I] )/g, 'que l ')
.replace(/([çq][anou][óé])/g, 'qué')
.replace(/([çq][anou][ce]s[lt])/g, 'quest')
.replace(/([çq][anou][iïíìl!1I])/g, 'qui')
.replace(/(rcg)/g, 'reg') //tb podria ser "rog"
.replace(/(rcr)/g, 'rer') //tb podria ser "ror"
.replace(/rcy/g, 'rey')
.replace(/(Sl)/g, 'Si')
.replace(/([s8][*'\\^])/g, 's\'')
.replace(/([*'\\^]s)/g, '\'s')
.replace(/ [sa8][ce] /g, ' se ')
.replace(/([8s]cc)/g, 'sec')
.replace(/scd/g, 'sed')
.replace(/scg/g, 'seg')
.replace(/([8s][ceo][çgq][eo][nu])/g, 'segon')
.replace(/( [8s]cr )/g, ' ser ') //tb podria ser " sor "
.replace(/( [8s][ce]r[ce])/g, ' sere')
.replace(/( [8s]o[nD] )/g, ' son ')
.replace(/([8s]lr)/g, 'str') //vostra
.replace(/(t[*'\\^])/g, 't\'')
.replace(/([*'\\^]t)/g, '\'t')
.replace(/[lt]am[6b]([éèe])/g, 'tamb$1')
.replace(/[lt]amp[ceo][ceo]/g, 'tampoc')
.replace(/( tc )/g, ' te ')
.replace(/[lt][ce]rr/g, 'terr')
.replace(/(tU)/g, 'tll')
.replace(/ [tl][iIïíìÏÌÍIL][un][ce]([h ;.:])/g, ' tinc$1')
.replace(/(trc)/g, 'tre') //tb "tro"
.replace(/( [nu][nu] )/g, ' un ') //tb " nu "
.replace(/( [nu][nu][oa] )/g, ' una ') //tb " nua "
.replace(/(uU)/g, 'ull')
.replace(/(▼)/g, 'V')
.replace(/v[ce]ll/g, 'vell')
.replace(/(•)/g, '.');
};
/**
* As you work your way through the page, when you encounter a reference, just mark it with <ref></ref> tags and continue.
* Once you've got to the end of the page and proofed the references, simply highlight each reference in turn,
* and use this function to move it to its proper position.
* @param {object} editor The script helpers for the page.
*/
var makeReference = function(editor) {
_initialise();
var editbox = $('#wpTextbox1').get(0);
editbox.focus();
var refStart = editbox.selectionStart;
var refEnd = editbox.selectionEnd;
var firstref = editbox.value.indexOf('<ref></ref>');
if (firstref != -1) {
editbox.value = editbox.value.slice(0,firstref+5)
+ editbox.value.slice(refStart, refEnd)
+ editbox.value.slice(firstref+5, refStart)
+ editbox.value.slice(refEnd);
}
addPageFooter(editor);
};
/**
* Insert formatted references into the footer box if needed.
* @param {object} editor The script helpers for the page.
*/
var addPageFooter = function(editor) {
_initialise();
var editbox = $('#wpTextbox1').get(0);
var footerbox;
var generic;
var format;
var f;
if(window.titulos) state.specialFormats = window.titulos(state.page.number);
if (editbox.value.indexOf("<ref") == -1 && editbox.value.indexOf("{{#tag:ref") == -1) {
// page contains no refs
generic = true;
for (f in state.specialFormats) {
format = state.specialFormats[f];
if (mw.config.get('wgTitle').match(format.title)) {
footerbox = format.footer;
generic = false;
break;
}
}
// no special footer matched, use just strip out the references tag
if (generic)
footerbox = '';
}
else {
generic = true;
for (f in state.specialFormats) {
format = state.specialFormats[f];
if (mw.config.get('wgTitle').match(format.title)) {
footerbox = format.footerWithReferences;
generic = false;
break;
}
}
// no special footer matched, so use a generic ref tag
if (generic)
footerbox = '{{Referències}}';
}
$('#wpFooterTextbox').val(footerbox);
};
/**
* Mark the selected text with {{sc}}. If the text is uppercase, it will be converted to titlecase.
* @param {object} editor The script helpers for the page.
*/
var smallcaps = function(editor) {
_initialise();
editor.replaceSelection(function(text) {
// Applying small-caps to all-caps text is pointless...
// ... unless the all-caps is OCR of text that is actually small-caps.
// Check if text is all-caps, and if it is, convert it to title case before applying small-caps.
if (text == text.toUpperCase())
text = _titlecase(text);
return '{{maj|' + text + '}}';
});
};
/**
* Convert the text to uppercase.
* @param {object} editor The script helpers for the page.
*/
var upper = function(editor) {
_initialise();
editor.replaceSelection(function(text) {
return text.toUpperCase();
});
};
var lower = function(editor) {
_initialise();
editor.replaceSelection(function(text) {
return text.toLowerCase();
});
};
/*global $, mw*/
/*
* Query an ocr for a given Page:, first try to get the hocr text layer as it's available
* for most book, fast and of a better quality. If it fails, try the older and slower
* ocr method. hocr fail around 1/5000 books. ocr should never fails as it use the image
* visible on the Page:.
*/
var lang = mw.config.get( 'wgContentLanguage' );
function disable_input(set)
{
if (set) {
$(document).keyup(function(e) {
if (e.which == 27) { disable_input(false); }
});
}
set ? $('#wsOcr1').off('click') : $('#wsOcr1').on('click', do_hocr);
$('#wpTextbox1').prop('disabled', set);
}
function ocr_callback(data) {
if (data.error) {
alert(data.text);
} else {
// Checking if tb is disabled is required with chrome as ESC doesn't kill
// the query.
var tb = document.getElementById("wpTextbox1");
if (tb.disabled)
tb.value = data.text;
}
disable_input(false);
}
function hocr_callback(data) {
if (data.error) {
// Fallback to the slow way.
disable_input(false);
do_ocr();
return;
} else {
// Checking if tb is disabled is required with chrome as ESC doesn't kill
// the query.
var tb = document.getElementById("wpTextbox1");
if (tb.disabled) {
localStorage.ws_hOCR = data.text;
var text = $(data.text).text();
// Ugly as hell.
text = text.replace(/[ ]*\n[ ]*/g, '\n')
.replace(/\n\n\n\n/g, '@_@_@_@_@_@')
.replace(/\n\n/g, '\n')
.replace(/@_@_@_@_@_@/g, '\n\n')
.replace(/\n\n\n/g, '\n\n');
tb.value = $.trim(text);
}
}
disable_input(false);
}
function do_hocr() {
disable_input(true);
var request_url = '//tools.wmflabs.org/phetools/hocr_cgi.py?cmd=hocr&book='
+ encodeURIComponent(mw.config.get('wgTitle')) + '&lang=' + lang + '&user=' + mw.config.get('wgUserName');
$.getJSON(request_url).done(hocr_callback).fail(do_ocr);
}
function do_ocr() {
if ($( '.prp-page-image img' ).length) {
disable_input(true);
// server side can't use protocol relative url, request it as https:
var url_image = 'https:' + $( '.prp-page-image img' ).attr('src');
var request_url = "//tools.wmflabs.org/phetools/ocr.php?cmd=ocr&url="+url_image+"&lang="+lang+"&user="+mw.config.get('wgUserName');
$.getJSON( request_url ).done( ocr_callback );
}
}
});
// </nowiki>