import wikipedia, pagegenerators, re
site = wikipedia.getSite()
f = open('lista.txt', 'r')
lista = f.read().split('\n')
f.close()
pages = []
for title in lista:
pages.append( wikipedia.Page(site, title.decode('utf8') ) )
gen = pagegenerators.PreloadingGenerator(pages)
d = {}
for p in gen:
txt = p.get()
names = []
ref = re.findall('< *ref(.*?)/?>', txt)
for r in ref:
name = r.strip()
if name == 'erences':
continue
elif name == '':
names.append(name)
else:
try:
names.index(name)
except:
names.append(name)
i = str(len(names))
if not d.has_key(i):
d[i] = []
d[i].append(p.title())
keys = d.keys()
keys = sorted(keys, key=lambda k: int(k), reverse=True )
f = open('out.txt', 'w')
for k in keys:
d[k].sort()
for t in d[k]:
line = u"*[[%s]] (%d)\n" % (t, int(k) )
f.write(line.encode('utf8') )
f.close()