User:Inductiveload/Scripts/Page namespace editor

From Wikisource
Jump to: navigation, search
import pw_script_header
import wikipedia
import codecs
import re
FIX = 'newline'
FILE= r'/home/john/src/pw/zz_filelist0.txt'
SUMMARY =  "[bot] Tidying formatting."
def decomposePage(wikiText):
    regex = re.compile(ur'(?ms)^<noinclude>(.*)</noinclude>(.*?)<noinclude>(.*)</noinclude>$')
    m =
    if m:
        header =
        body   =
        footer =
        return header, body, footer
        print "Can't find header, body, footer"
        return None
def composePage(header, body, footer):
    return '<noinclude>%s</noinclude>%s<noinclude>%s</noinclude>'%(header, body, footer)
def process_body(body):
    body = re.sub(ur'([^\n]) *\n([^\n])', ur'\1 \2', body)
    return body
def process_header(header):
    return header
def process_footer(footer):
    return footer
def main():
    in_file =, 'r', 'utf-8')
    ws_site = wikipedia.getSite("en", "wikisource")
    for page_title in in_file:
        print '(INF) Processing page: %s' % page_title
        page = wikipedia.Page(ws_site, page_title) # get the page
        old_wikitext = page.get() #extract wikitext
        header, body, footer = decomposePage(old_wikitext) #decompose the page
        body = process_body(body) #process the body
        header = process_header(header) #header
        footer = process_footer(footer) #footer
        new_wikitext = composePage(header, body, footer) # make a well formed Page: namespace page
        wikipedia.showDiff(old_wikitext, new_wikitext)
        print new_wikitext
        cont = raw_input("Upload? [y/n]: ")
        #cont = 'y'
        if cont in ['y','Y','yes','Yes']:
            page.put(new_wikitext, SUMMARY, minorEdit=True)
if __name__ == "__main__":