User:Inductiveload/pwb quick.py

From Wikisource
Jump to navigation Jump to search

Simple script to wrap PWB to convert a simple command file to a command line invocation

  • Lines starting # are ignored
  • Lines like $VAR=foo set variables
    • $_SCRIPT set the PWB script, default is replace. Others include listpages and purge
  • Other lines are PWB inputs, in order

Example:

$REF=< *references */ *>
$DIV=< */ *(div|DIV) *>
-namespace:Page
-linter:stripped-tag
-summary:Removing <references/></div>
-regex
$REF *$DIV
<references/>


#! /usr/bin/env python3

import argparse
import logging

import subprocess

import re


def main():

    parser = argparse.ArgumentParser(description='')
    parser.add_argument('-v', '--verbose', action='store_true',
                        help='show debugging information')
    parser.add_argument("file", metavar="FILE",
                        help='Info file')
    args = parser.parse_args()

    log_level = logging.DEBUG if args.verbose else logging.INFO
    logging.basicConfig(level=log_level)

    cmd = ["python", "pwb.py"]

    script_name = "replace"

    variables = {}

    def interpolate(s):

        for key, value in variables.items():
            s = s.replace("$" + key, value)
            s = s.replace("${" + key + "}", value)
        return s

    pwb_args = []

    with open(args.file) as af:
        for line in af:

            if line.lstrip().startswith("#"):
                continue

            m = re.match(r"^\$([A-Z0-9_-]+)=(.*)$", line)

            if m:
                if m.group(1) == "_SCRIPT":
                    script_name = m.group(2)
                else:
                    variables[m.group(1)] = m.group(2)
                continue

            if line.startswith("-prefixindex"):

                parts = line.strip().split(":", 2)

                if len(parts) > 2:

                    if parts[1] == "Page":
                        basename = re.sub(r"/\d+", "", parts[2])
                    else:
                        basename = parts[1]

                    pwb_args.append("-namespace:" + interpolate(parts[1]))
                    pwb_args.append("-prefixindex:" + interpolate(basename))
                else:
                    pwb_args.append(interpolate(line))
                continue
            # auto-link templates
            if line.startswith("-summary"):
                line = re.sub(r"\{\{([^\[].*?)\}\}", r"{{[[Template:\1|\1]]}}", line)

            pwb_args.append(interpolate(line.rstrip("\n")))

    cmd.append(script_name)
    cmd += pwb_args
    logging.debug(cmd)

    subprocess.call(cmd)


if __name__ == "__main__":
    main()