# -*- coding: utf-8 -*-
"""
This bot will make direct text replacements. It will retrieve information on
which pages might need changes either from an XML dump or a text file, or only
change a single page.
You can run the bot with the following commandline parameters:
-file - Work on all pages given in a local text file.
Will read any [[wiki link]] and use these articles.
Argument can also be given as "-file:filename".
-cat - Work on all pages which are in a specific category.
Argument can also be given as "-cat:categoryname".
-page - Only edit a specific page.
Argument can also be given as "-page:pagetitle". You can give this
parameter multiple times to edit multiple pages.
-ref - Work on all pages that link to a certain page.
Argument can also be given as "-ref:referredpagetitle".
-filelinks - Works on all pages that link to a certain image.
Argument can also be given as "-filelinks:ImageName".
-links - Work on all pages that are linked to from a certain page.
Argument can also be given as "-links:linkingpagetitle".
-start - Work on all pages in the wiki, starting at a given page. Choose
"-start:!" to start at the beginning.
NOTE: You are advised to use -xml instead of this option; this is
meant for cases where there is no recent XML dump.
-except:XYZ - Ignore pages which contain XYZ. If the -regex argument is given,
XYZ will be regarded as a regular expression.
-summary:XYZ - Set the summary message text for the edit to XYZ, bypassing the
predefined message texts with original and replacements inserted.
-template:XYZ-
-namespace:n - Number of namespace to process. The parameter can be used
multiple times. It works in combination with all other
parameters, except for the -start parameter. If you e.g. want to
iterate over all user pages starting at User:M, use
-start:User:M.
-always - Don't prompt you for each replacement
other: -
NOTE: Only use either -xml or -file or -page, but don't mix them.
Examples:
"""
#
# [[Utente:Wiso]] 2007
#
# Distributed under the terms of the GPL licence
#
from __future__ import generators
import sys, re
import wikipedia, pagegenerators,catlib, config
__version__='$Id: coordbot.py,v 0.1 $'
# Summary messages in different languages
# NOTE: Predefined replacement tasks might use their own dictionary, see 'fixes'
# below.`v
msg = u'robot Aggiungo [[Template:Coord]] dalla pagina %s'
templates = {
'safe': [
(r'\{\{ ?[Cc]oord(.*?)\}\}', r"{{Coord\1}}\n"),
(r'{{coor[_ ]title[_ ]d\|([0-9\.-]+)\|([NS])\|([0-9\.-]+)\|([EW])\|?([^}]*?)}}', r"{{coord|\1|\2|\3|\4|\5|display=title}}\n"),
(r'{{coor[_ ]title[_ ]dm\|([0-9\.-]+)\|([0-9\.-]+)\|([NS])\|([0-9\.-]+)\|([0-9\.-]+)\|([EW])\|?([^\}]*?)\}\}', r"{{coord|\1|\2|\3|\4|\5|\6|\7|display=title}}\n"),
(r'{{coor[_ ]title[_ ]dms\|([0-9\.-]+)\|([0-9\.-]+)\|([0-9\.-]+)\|([NS])\|([0-9\.-]+)\|([0-9\.-]+)\|([0-9\.-]+)\|([EW])\|?([^}]*?)}}', r"{{coord|\1|\2|\3|\4|\5|\6|\7|\8|\9|display=title}}\n"),
(r'\{\{ ?[Cc]oor[ _]d\|([0-9\.+-]+)\|([0-9\.+-])(\|?[^\|]*)\}\}', r"{{Coord|\1|\2\3|display=title}}\n"),
],
'notsafe': [
(r'\{\{ ?[Cc]oord[ _]dm\|([0-9]+)\|([0-9\.]+)\|([NS])\|([0-9\.]+)\|([0-9\.]+)\|([EW])(\|?[^\|]*)\}\}', r"{{Coord|\1|\2|\3|\4|\5|\6\7|display=title}}\n"),
(r'\{\{ ?[Cc]oor[ _]dms\|([0-9]+)\|([0-9\.]+)\|([0-9\.]+)\|([NS])\|([0-9\.]+)\|([0-9\.]+)\|([0-9\.]+)\|([EW])(\|?[^\|]*)\}\}', r"{{Coord|\1|\2|\3|\4|\5|\6|\7|\8\9|display=title}}\n"),
(r'\{\{.*latd *= *([0-9\.]+).*longd ?= ?([0-9\.]+)', r"{{Coord|\1|\2|display=title}}\n")
]
}
exceptions = [ r'\{\{ *?Geobox',
r'\{\{ ?[Cc]oord',
r'\{\{ ?Template:[Cc]oord',
r'\{\{ ?[mM]ontagna',
r'\{\{ ?(Template:)?[cC]omune',
r'\{\{ ?[cC]ittà',
r'\{\{ ?[mM]unicipalità',
r'\{\{ ?[aA]eroporto\|',
r'\{\{ ?[Mm]unicipi',
r'\{\{ ?[iI]nfobox[ _]Azienda\|',
r'\{\{ ?[Ss]\|aziende',
r'\{\{ ?[Dd]isambigua\|',
r'\{\{ ?[Ff]razione',
r'\{\{ ?[Ss]quadra',
r'\{\{ ?[Pp]asso ?(\||\n)',
r'\{\{ ?[Bb]undesland[ _]tedesco'
]
class CoordRobot:
"""
A bot that import coordinates from other wikipedia.
"""
def __init__(self, generator, autoTitle = False, autoText = False):
self.generator = generator
self.compileregex()
def compileregex(self):
for key in templates.keys():
for i in range(len(templates[key])):
old, new = templates[key][i]
oldR = re.compile(old, re.UNICODE)
templates[key][i] = oldR, new
for i in range(len(exceptions)):
exceptions[i] = re.compile(exceptions[i])
def checkExceptions(self, text):
for exception in exceptions:
hit = exception.search(text)
if hit:
return hit.group(0)
return None
def change(self,page,new_text):
try:
page.put(new_text)
except wikipedia.EditConflict:
wikipedia.output(u'Skipping %s because of edit conflict' % (page.title()))
except wikipedia.SpamfilterError, url:
wikipedia.output(u'Cannot change %s because of blacklist entry %s' % (page.title(),url))
def run(self):
trovato_en = False
sen = wikipedia.Site('en')
interwiki_list = []
for page in self.generator:
try:
if not page.canBeEdited():
wikipedia.output(u'Skipping locked page %s' % page.title())
continue
interwiki_list = page.interwiki()
except wikipedia.NoPage:
wikipedia.output(u'Page %s not found' % page.title())
continue
except wikipedia.IsRedirectPage:
wikipedia.output(u'Page %s is a redirect, skip' % page.title())
continue
trovato_en = False
for page_en in interwiki_list:
if page_en.site() == sen:
trovato_en = True
break
if not trovato_en:
continue
wikipedia.output(page.title())
wikipedia.output(u'en: %s' %page_en.title())
text_it = page.get()
match = self.checkExceptions(text_it)
# skip all pages that contain certain texts
if match:
colors = [None] * 9 + [None] * len(page.title()) + [None] * 21 + [10] * len(match)
wikipedia.output(u'Skipping %s because it contains %s' % (page.title(), match), colors = colors)
continue
try:
text_en = page_en.get()
except wikipedia.NoPage:
wikipedia.output(u'Page %s not found' %page_en.title())
continue
except wikipedia.IsRedirectPage:
wikipedia.output(u'Page %s is a redirect, follow redirect' %page_en.title())
text_en = page_en.get(get_redirect=True)
for old, new in templates['safe']:
match = old.search(text_en)
if not match:
continue
colors = [None] * 5 + [13] * len(page.title()) + [None] * 4
wikipedia.output(u'\n>>> %s <<<' % page.title(), colors = colors)
wikipedia.output(u'Trovato %s: ' %text_en[match.start():match.end()])
template_new = old.sub(new, text_en[match.start():match.end()])
wikipedia.output(template_new)
new_text_it = template_new + text_it
choice = wikipedia.inputChoice(u'Do you want to accept these changes?', ['Yes', 'No'], ['y', 'N'], 'N')
if choice in ['y', 'Y']:
wikipedia.setAction(msg % page_en.aslink())
self.change(page,new_text_it)
def main():
gen = None
# summary message
summary_commandline = None
# Don't edit pages which contain certain texts.
exceptions = []
# commandline paramater.
# Which namespaces should be processed?
# default to [] which means all namespaces will be processed
namespaces = []
template = None
PageTitles = []
autoText = False
autoTitle = False
# This factory is responsible for processing command line arguments
# that are also used by other scripts and that determine on which pages
# to work on.
genFactory = pagegenerators.GeneratorFactory()
# Load default summary message.
# BUG WARNING: This is probably incompatible with the -lang parameter.
wikipedia.setAction(msg)
# Read commandline parameters.
for arg in wikipedia.handleArgs():
if arg == '-autotitle':
autoTitle = True
elif arg =='-autotext':
autoText = True
elif arg.startswith('-page'):
if len(arg) == 5:
PageTitles.append(wikipedia.input(u'Which page do you want to chage?'))
else:
PageTitles.append(arg[6:])
elif arg.startswith('-except:'):
exceptions.append(arg[8:])
elif arg.startswith('-template:'):
template = arg[10:]
elif arg.startswith('-namespace:'):
namespaces.append(int(arg[11:]))
elif arg.startswith('-summary:'):
wikipedia.setAction(arg[9:])
summary_commandline = True
else:
generator = genFactory.handleArg(arg)
if generator:
gen = generator
print namespaces
if PageTitles:
pages = [wikipedia.Page(wikipedia.getSite(), PageTitle) for PageTitle in PageTitles]
gen = iter(pages)
if not gen:
# syntax error, show help text from the top of this file
wikipedia.showHelp('coordbot')
wikipedia.stopme()
sys.exit()
if namespaces != []:
gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
# gen = pagegenerators.RedirectFilterPageGenerator(gen)
preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber = 20)
bot = CoordRobot(preloadingGen, autoTitle, autoText)
bot.run()
if __name__ == "__main__":
try:
main()
finally:
wikipedia.stopme()