User:Inductiveload/Scripts/Page namespace editor
From Wikisource
< User:Inductiveload(Redirected from User:Inductiveload/Page namespace editor)
import pw_script_header import wikipedia import codecs import re FIX = 'newline' FILE= r'/home/john/src/pw/zz_filelist0.txt' SUMMARY = "[bot] Tidying formatting." def decomposePage(wikiText): regex = re.compile(ur'(?ms)^<noinclude>(.*)</noinclude>(.*?)<noinclude>(.*)</noinclude>$') m = regex.search(wikiText) if m: header = m.group(1) body = m.group(2) footer = m.group(3) return header, body, footer else: print "Can't find header, body, footer" return None def composePage(header, body, footer): return '<noinclude>%s</noinclude>%s<noinclude>%s</noinclude>'%(header, body, footer) def process_body(body): body = re.sub(ur'([^\n]) *\n([^\n])', ur'\1 \2', body) return body def process_header(header): return header def process_footer(footer): return footer def main(): in_file = codecs.open(FILE, 'r', 'utf-8') ws_site = wikipedia.getSite("en", "wikisource") for page_title in in_file: print '(INF) Processing page: %s' % page_title page = wikipedia.Page(ws_site, page_title) # get the page old_wikitext = page.get() #extract wikitext header, body, footer = decomposePage(old_wikitext) #decompose the page body = process_body(body) #process the body header = process_header(header) #header footer = process_footer(footer) #footer new_wikitext = composePage(header, body, footer) # make a well formed Page: namespace page wikipedia.showDiff(old_wikitext, new_wikitext) print new_wikitext cont = raw_input("Upload? [y/n]: ") #cont = 'y' if cont in ['y','Y','yes','Yes']: page.put(new_wikitext, SUMMARY, minorEdit=True) if __name__ == "__main__": main()