User:Pathosbot/TemplateEditor.cs

From Wikisource
Jump to navigation Jump to search

This is an AutoWikiBrowser plugin written by Pathoschild. It provides a simplified framework for editing template parameters, including correct parsing of both unnamed and named parameters, emulation of template behavior (such as overwriting values when defined more than once), nested templates, and paranoid validation that stops editing when possible errors occur.

ProcessText() contains the text processing script itself, and typically changes from task to task. The code below is for: "Template standardisation" (Scriptorium, September 2008).

using System;
using System.Collections;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.Windows.Forms;

using WikiFunctions;
using WikiFunctions.AWBSettings;
using WikiFunctions.Parse;

namespace TemplateEditor {
	public class TemplateEditor : WikiFunctions.Plugin.IAWBPlugin {
		#region TemplateEditor functions
		/********************
		 * Functions (UI)
		 *******************/
		/* Display fix and ask what to do (attempt to fix, ignore, abort) */
		bool AcceptFix(string message, string oldText, string newText) {
			switch(MessageBox.Show(message + "\nAccept fix below? Yes, No (ignore fix), or Cancel editing? (text below)\n\nold: " + oldText + "\n\nnew: " + newText, "Possible fix", MessageBoxButtons.YesNoCancel)) {
				case DialogResult.Yes:
					return true;
				case DialogResult.No:
					return false;
				case DialogResult.Cancel:
					AWB.Stop("TemplateEditor");
					AWB.BotModeCheckbox.Checked = false;
					return false;
			}
			return false;
		}

		/* Display error and ask what to do (abort, retry, ignore) */
		void QueryError(string message) {
			QueryError(message, "");
		}
		void QueryError(string message, string text) {
			switch(MessageBox.Show(message + "\nAbort editing, Retry page, Ignore error? (text below)\n\n" + text, "Error", MessageBoxButtons.AbortRetryIgnore, MessageBoxIcon.Information)) {
				case DialogResult.Abort:
					AWB.Stop("TemplateEditor");
					AWB.BotModeCheckbox.Checked = false;
					break;

				case DialogResult.Retry:
					AWB.Start("TemplateEditor");
					break;

				case DialogResult.Ignore:
					break;
			}
		}

		/* display error and abort */
		void AbortError(string message) {
			switch(MessageBox.Show(message, "Critical error", MessageBoxButtons.RetryCancel, MessageBoxIcon.Error)) {
				case DialogResult.Retry:
					AWB.Start("TemplateEditor");
					break;
				case DialogResult.Cancel:
					AWB.Stop("TemplateEditor");
					AWB.BotModeCheckbox.Checked = false;
					break;
			}
		}


		/********************
		 * Functions (text processing)
		 *******************/
		/* Explicitly delimit templates in text:
		 * {{foo|bar}} -> <start foo>foo<pipe>bar</end foo> */
		string DelimitTemplates(string text) {
			// escape nowiki blocks
			MatchCollection escaped = Regex.Matches(text, "<(nowiki|pre)>.*?</\\1>", RegexOptions.Singleline | RegexOptions.IgnoreCase);
			int i = 0;
			foreach(Match match in escaped) {
				text = text.Replace(match.Value, "_pathosbot_escape_" + i + "_");
				i++;
			}

			// delimit parameters
			text = Regex.Replace(text, "\\|", "<pipe>");
			text = Regex.Replace(text, "\\[\\[([^\\]]+)<pipe>", "[[$1|"); // unescape wikilinks

			// delimit templates
			int count = 0;
			while(Regex.IsMatch(text, "{{") && count < 10) {
				text = Regex.Replace(text, "{{([\\s\\n\\r]*([^<{}]+?)[\\s\\n\\r]*(?:<pipe>[^{}]*)?)}}", "<start $2>$1<end $2>", RegexOptions.Singleline);
				count++;
				if(count == 10)
					QueryError("exceeded loop limit for template escaping.");
			};

			// restore nowiki blocks
			i = 0;
			foreach(Match match in escaped) {
				text = text.Replace("_pathosbot_escape_" + i + "_", match.Value);
				i++;
			}

			// exit
			return text;
		}

		/* Reverse explicit template delimiting */
		string UndelimitTemplates(string text) {
			text = Regex.Replace(text, "<start[^>]+>", "{{");
			text = Regex.Replace(text, "<end[^>]+>", "}}");
			text = Regex.Replace(text, "<pipe>", "|");

			return text;
		}
		string UndelimitTemplates(string text, string regexSearch) {
			return UndelimitTemplates(text, regexSearch, RegexOptions.None);
		}
		string UndelimitTemplates(string text, string regexSearch, RegexOptions options) {
			MatchCollection matches = Regex.Matches(text, regexSearch, options);

			foreach(Match match in matches) {
				text = text.Replace(match.Value, UndelimitTemplates(match.Value));
			}

			return text;
		}

		/* Given a delimited template, returns a hash table of its parameters */
		Hashtable GetParameters(string text) {
			return GetParameters(text, true);
		}

		Hashtable GetParameters(string text, bool error_on_no_parameters) {
			Hashtable parameters = new Hashtable();

			// remove main delimiters
			text = Regex.Replace(text, "^[\\r\\n\\s]*<start([^>]+)>[\\r\\n\\s]*(.+?)[\\r\\n\\s]*<end\\1>[\\r\\n\\s]*$", "$2", RegexOptions.Singleline);

			// unescape nested parameters
			text = UndelimitTemplates(text, "<start([^>]+)>.*<end\\1>", RegexOptions.Singleline);

			// normalize
			text = Regex.Replace(text, "[\\r\\n\\s]*<pipe>[\\r\\n\\s]*", "<pipe>"); // remove pipe whitespace
			text = Regex.Replace(text, "<pipe>([a-z_0-9]+?)[\\r\\n\\s]*=[\\r\\n\\s]*", "<pipe>$1=", RegexOptions.IgnoreCase); // remove parameter whitespace
			text = Regex.Replace(text, "[\\r\\n\\s]*$", ""); // remove ending whitespace

			// exit if no parameters
			if(!Regex.IsMatch(text, "<pipe>")) {
				if(error_on_no_parameters)
					QueryError("no parameters detected.");
				return parameters;
			}

			// process each parameter
			int unnamed = 0;
			MatchCollection matches = Regex.Matches(text, "(?<=<pipe>)(.*?)(?=<pipe>|$)", RegexOptions.Singleline);
			foreach(Match match in matches) {
				// parse key/value
				string name, value;

				if(Regex.IsMatch(match.Value, "^[a-z_0-9]+=", RegexOptions.IgnoreCase)) {
					name = Regex.Replace(match.Value, "=.*$", "", RegexOptions.Singleline);
					value = Regex.Replace(match.Value, "^[a-z_0-9]+=", "", RegexOptions.Singleline | RegexOptions.IgnoreCase);
				}
				else {
					unnamed++;
					name = "" + unnamed;
					value = match.Value;
				}

				// add to hash
				if(parameters.Contains(name))
					parameters[name] = value;
				else
					parameters.Add(name, value);
			}

			// exit
			return parameters;
		}

		/* Given text, returns whether it contains HTML formatting */
		bool HasFormatting(string text) {
			return Regex.IsMatch(text, "<(?:br|font|div|span)[^>]*>");
		}

		/********************
		 * Functions (parameter manipulation)
		 *******************/
		/* sets the value of a parameter, creating it if necessary */
		void SetParameter(Hashtable _parameters, string name, string text) {
			if(_parameters.Contains(name))
				_parameters[name] = text;
			else
				_parameters.Add(name, text);
		}

		/********************
		 * Functions (header title parsing)
		 *******************/
		/* guesses navigation links (uses en-Wikisource template) */
		Hashtable GuessNavigation(WikiFunctions.Plugin.ProcessArticleEventArgs eventargs) {
			// declare variables
			string subpage_name;
			string label_text;
			string value_text;

			Hashtable navigation = new Hashtable();
			navigation.Add("current", "");
			navigation.Add("previous", "");
			navigation.Add("next", "");

			// get subpage name
			subpage_name = Regex.Match(eventargs.ArticleTitle, "(?<=/).+?$").ToString();

			// break into label & value
			Match subpage_name_parts = Regex.Match(subpage_name, "^([a-z]+) ([0-9]+)$", RegexOptions.IgnoreCase);
			if(subpage_name_parts.Groups.Count != 3) {
				QueryError("Cannot parse subpage title into label and value (found " + subpage_name_parts.Groups.Count + " matches instead of 2).", subpage_name);
				return navigation;
			}
			label_text = subpage_name_parts.Groups[1].ToString();
			value_text = subpage_name_parts.Groups[2].ToString();

			// generate values
			navigation["current"] = subpage_name;
			navigation["previous"] = "{{subst:generate navigation|direction=back|label=" + label_text + "|value=" + value_text + "}}";
			navigation["next"] = "{{subst:generate navigation|direction=next|label=" + label_text + "|value=" + value_text + "}}";

			// return
			return navigation;
		}

		#endregion

		#region ProcessText
		/********************
		 * Text processor
		 *******************/
		/* main script */
		string ProcessText(WikiFunctions.Plugin.IAutoWikiBrowser sender, WikiFunctions.Plugin.ProcessArticleEventArgs eventargs) {
			string text = eventargs.ArticleText;

			/********************
			* Substitute templates
			*******************/
			text = DelimitTemplates(text); // delimit valid templates (prevents false positives with <pre>, <nowiki>, etc)
			text = Regex.Replace(text, "<start [Pp]rettytable>", "<start prettytable>subst:");
			text = UndelimitTemplates(text);

			/*******************
			* Clean up header
			*******************/
			if(Regex.IsMatch(text, "{{header")) {
				/* get titles for back/next linking */
				// get and normalize text
				text = Regex.Replace(text, "{{[hH]eader2?", "{{header");
				text = DelimitTemplates(text);
				string template = text;

				/* Exit on error */
				if(!Regex.IsMatch(text, "<start header>.+?<end header>", RegexOptions.IgnoreCase | RegexOptions.Singleline)) {
					QueryError("Template not detected.");
					return text;
				}
				if(Regex.IsMatch(text, "<start header>.+<start header>", RegexOptions.IgnoreCase | RegexOptions.Singleline)) {
					AbortError("multiple instances detected, cannot automate.");
					return text;
				}

				/* Process parameters */
				// extract
				template = Regex.Match(text, "<start header>.+?<end header>", RegexOptions.IgnoreCase | RegexOptions.Singleline).Value;
				Hashtable parameters = GetParameters(template, true); // false == no error if no parameters found

				/*
				if(parameters["previous"].ToString() == "" && parameters["next"].ToString() == "") {
					string[] titles = { "Acoemeti", "Acolyte", "Acominatus, Michael", "Aconcagua", "Aconcio, Giacomo", "Aconite", "Acontius", "Acorn", "Acorus Calamus", "Acosta, Jose de", "Acosta, Uriel", "Acotyledones", "Acoustics", "Acqui", "Acre (state)", "Acre (town)", "Acre (land measure)", "Acridine", "Acro, Helenius", "Acrobat", "Acrogenae", "Acroliths", "Acromegaly", "Acron", "Acropolis", "Acropolita, George", "Acrostic", "Acroterium", "Act", "Acta Diurna", "Actaeon", "Acta Senatus", "Actinometer", "Actinomycosis", "Actinozoa", "Action", "Actium", "Act of Parliament", "Acton (John Emerich Edward Dalbert Acton), 1st Baron", "Acton, Sir John Francis Edward, Bart.", "Acton", "Act on Petition", "Acts of the Apostles", "Actuary", "Acuminate", "Acuna, Christoval de", "Acupressure", "Acupuncture", "Adabazar", "Adad", "Adagio", "Adair, John", "Adalberon", "Adalbert (archbishop)", "Adalvert (bishop)", "Adalia", "Adam", "Adam of Bremen", "Adam (or Adan) de le Hale", "Adam, Alexander", "Adam, Sir Frederick", "Adam, Juliette", "Adam, Lambert Sigisbert", "Adam, Melchior", "Adam, Paul", "Adam, Robert", "Adam, William", "Adamant", "Adamawa", "Adamites", "Adamnan", "Adams, Andrew Leith", "Adams, Charles Francis", "Adams, Henry", "Adams, Henry Carter", "Adams, Herbert" };

					// what title are we at now?
					string current_title = Regex.Replace(eventargs.ArticleTitle, "1911 Encyclop(?:æ|ae|e)dia Britannica/", "");
					int current_index = Array.IndexOf(titles, current_title);

					if(current_index > 0 && current_index < titles.GetLength(0)) {
						// add values to template
						parameters.Add("previous", titles[current_index - 1]);
						parameters.Add("next", titles[current_index + 1]);
					}
					else {
						QueryError("Back/next links not found!");
					}
				}
				else {
					if(parameters["previous"].ToString() == "" || parameters["next"].ToString() == "") {
						QueryError("Missing one navigation link.");
					}
				}
				*/

				// validate & cleanup
				foreach(string parameter in new ArrayList(parameters.Keys)) {
					string value = parameters[parameter].ToString();

					switch(parameter) {
						case "author":
							if(HasFormatting(value))
								QueryError("formatting detected in 'author' parameter.");
							break;

						case "next":
							if(Regex.IsMatch(value, "(?:→|&rarr;)")) {
								string temp = Regex.Replace(parameters["next"].ToString(), "\\s*(?:→|&rarr;)\\s*", "");
								if(AcceptFix("arrow detected in 'next' parameter after cleanup.", parameters["next"].ToString(), temp))
									parameters["next"] = temp;
							}
							if(HasFormatting(value))
								QueryError("formatting detected in 'next' parameter.");
							break;

						case "noauthor":
							if(parameters["noauthor"].ToString() != "")
								QueryError("text detected in 'noauthor' parameter.", parameters["noauthor"].ToString());

							break;

						case "notes":
							break;

						case "override_author":
							if(parameters.Contains("author") && parameters["author"].ToString() != "")
								QueryError("Both 'author' and 'override_author' parameters assigned.", parameters["author"].ToString() + "\n" + parameter);
							if(HasFormatting(value))
								QueryError("formatting detected in 'override_author' parameter.");
							break;

						case "previous":
							if(Regex.IsMatch(value, "(?:←|&larr;)")) {
								string temp = Regex.Replace(parameters["previous"].ToString(), "\\s*(?:←|&larr;)\\s*", "");
								if(AcceptFix("arrow detected in 'previous' parameter after cleanup.", parameters["previous"].ToString(), temp))
									parameters["previous"] = temp;
							}
							if(HasFormatting(value))
								QueryError("formatting detected in 'previous' parameter.");
							break;

						case "section":
							if(HasFormatting(value))
								QueryError("formatting detected in 'section' parameter.");
							break;

						case "title":
							if(HasFormatting(value))
								QueryError("formatting detected in 'title' parameter.");
							break;

						case "translator":
							if(HasFormatting(value))
								QueryError("formatting detected in 'translator' parameter.");
							break;

						default:
							QueryError("unknown parameter detected (ignore to remove it).", parameter + " = " + value);
							break;
					}
				}

				/* generate new template */
				string newTemplate = "{{header"
					+ "\r\n | title    = " + ((parameters.Contains("title")) ? parameters["title"] : "")
					+ "\r\n | author   = " + ((parameters.Contains("override_author") && parameters["override_author"].ToString() != "") ? "|override_author=" + parameters["override_author"] : (parameters.Contains("author")) ? parameters["author"] : "")
					+ ((parameters.Contains("translator")) ? "\r\n | translator = " + parameters["translator"] : "")
					+ "\r\n | section  = " + ((parameters.Contains("section")) ? parameters["section"] : "")
					+ "\r\n | previous = " + ((parameters.Contains("previous")) ? parameters["previous"] : "")
					+ "\r\n | next     = " + ((parameters.Contains("next")) ? parameters["next"] : "")
					+ "\r\n | notes    = " + ((parameters.Contains("notes")) ? parameters["notes"] : "")
					+ "\r\n}}";

				/* insert into text */
				text = Regex.Replace(text, "<start header>.+?<end header>", newTemplate, RegexOptions.IgnoreCase | RegexOptions.Singleline);
				text = UndelimitTemplates(text);
			}
				return text;
		}
		#endregion

		#region IAWBPlugin Members
		internal WikiFunctions.Plugin.IAutoWikiBrowser AWB;
		/********************
		 * initialize plugin
		 *******************/
		public void Initialise(WikiFunctions.Plugin.IAutoWikiBrowser sender) {
			if(sender == null)
				throw new ArgumentNullException("sender");
			else
				AWB = sender;
		}

		/********************
		 * return plugin name
		*******************/
		string WikiFunctions.Plugin.IAWBPlugin.Name {
			get { return "Pathosbot (TemplateEditor plugin)"; }
		}

		string WikiFunctions.Plugin.IAWBPlugin.WikiName {
			get { return "Pathosbot (TemplateEditor plugin)"; }
		}

		/********************
		* Process article text
		*******************/
		public string ProcessArticle(WikiFunctions.Plugin.IAutoWikiBrowser sender, WikiFunctions.Plugin.ProcessArticleEventArgs eventargs) {
			return ProcessText(sender, eventargs);
		}

		/********************
		 * Not implemented
		 *******************/
		public void LoadSettings(object[] prefs) {
			return;
		}
		public object[] SaveSettings() {
			return null;
		}
		public void Nudge(out bool Cancel) {
			Cancel = false;
		}
		public void Nudged(int Nudges) { }
		public void Reset() { }

		#endregion
	}
}