User:TalBot/spot-double-redirects.py
Jump to navigation
Jump to search
#! /usr/bin/env python
# _*_ coding: utf8 _*_
#
# Detect double or otherwise faulty soft or hard redirects
#
# Copyright (C) 2007, GrafZahl (en.wikisource.org user)
#
# Licence: GPLv2
#
# run with standard args "-log -putthrottle:xx"
#
# Further arguments:
#
# -cat:xxx
# Specifies the category for which soft redirects should be
# checked, for example: -cat:'Soft redirects/August 2006'
# (replace the single quotes with whatever is appropriate for
# your shell)
#
import catlib, sys, wikipedia
wikipedia.get_throttle.setDelay(5)
# Handle args
args = wikipedia.handleArgs()
month = False
for arg in args:
if arg[:5] == u'-cat:':
month = arg[5:]
if not month:
wikipedia.output(u'(FFF) No category given (-cat:xxx)')
sys.exit(1)
# basic text tokens, etc.
cattitle = u'Category:%s' % month
# Start operation
site = wikipedia.getSite()
cat = catlib.Category(site, cattitle)
articles = cat.articles()
# Examine first level soft redirects
wikipedia.output(u'(III) Starting first level check')
second_level = set()
for page in articles:
try:
if not page.exists():
wikipedia.output(u'(EEE) [[%s]] does not exist' % page.title())
continue
if page.isRedirectPage():
wikipedia.output(u'(EEE) [[%s]] is a hard redirect' % page.title())
continue
if page.get().find(u'oft redirect') == -1:
wikipedia.output(u'(EEE) [[%s]] may not be a soft redirect' % page.title())
continue
lp = page.linkedPages()
if len(lp) != 1:
wikipedia.output(u'(EEE) [[%s]] does not have an unambiguous reference to a page on this wiki' % page.title())
continue
# wikipedia.py does not always recognise correct section
# anchors. Therefore we ignore sections in the second level
# check. This is acceptable as the worst that can happen is
# that the user gets to the correct page without scrolling to
# the correct section.
#
# See bug #2928239
second_level.add(wikipedia.Page(site, lp[0].sectionFreeTitle()))
except wikipedia.Error:
wikipedia.output(u'(EEE) Exception processing [[%s]]' % page.title())
# Second level
wikipedia.output(u'(III) Starting second level check')
for page in second_level:
try:
if not page.exists():
wikipedia.output(u'(EEE) [[%s]] does not exist; soft redirects pointing to there are dangling' % page.title())
continue
if page.isRedirectPage() or (page.get().find(u'oft redirect') != -1):
wikipedia.output(u'(EEE) There exist double redirects pointing to [[%s]]' % page.title())
except wikipedia.Error:
wikipedia.output(u'(EEE) Exception processing [[%s]]' % page.title())