forked from volpino/toolserver-scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathget_edit_count.py
More file actions
90 lines (82 loc) · 2.67 KB
/
get_edit_count.py
File metadata and controls
90 lines (82 loc) · 2.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#!/usr/bin/env python
import sys
import MySQLdb
import csv
import simplejson
import urllib
from toolserver import ToolserverConfig
if len(sys.argv) != 3:
print "Usage: %s pages_list output_file" % sys.argv[0]
print "Error: Wrong parameters!"
sys.exit(0)
LANG = "en"
FAMILY = "wikipedia"
SERVER = ""
DBNAME = ""
# connect to the MySQL server
tsc = ToolserverConfig()
try:
conn = MySQLdb.connect(host=tsc.host,
user=tsc.user,
passwd=tsc.password,
db="toolserver")
except MySQLdb.Error, e:
print "Error %d: %s" % (e.args[0], e.args[1])
sys.exit(1)
cursor = conn.cursor()
cursor.execute("SELECT server, dbname FROM wiki WHERE lang=%s AND family=%s",
(LANG, FAMILY))
row = cursor.fetchone()
if row == None:
print "Invalid wiki name!"
sys.exit(1)
SERVER = "sql-s%d" % row[0]
DBNAME = row[1]
conn.close()
print "Now connecting to ", SERVER, DBNAME
try:
conn = MySQLdb.connect(host=SERVER,
user=tsc.user,
passwd=tsc.password,
db=DBNAME)
except MySQLdb.Error, e:
print "Error %d: %s" % (e.args[0], e.args[1])
sys.exit(1)
infile = open(sys.argv[1])
csv_writer = csv.writer(open(sys.argv[2], 'w'), delimiter='|')
csv_writer.writerow(["page", "type", "total_edits", "unique_editors"])
cursor = conn.cursor()
queue = []
counter = 0
for page in infile:
page = page[:-1]
if not page:
continue
for page, t in [(page, 1), ("Talk:%s" % page, 0)]:
#print page
url = "http://%s.%s.org/w/api.php?action=query&titles=%s&format=json" \
% (LANG, FAMILY, page)
data = simplejson.load(urllib.urlopen(url))
page_id = data["query"]["pages"].keys()[0]
if page_id != -1:
query = """SELECT COUNT(DISTINCT rev_id) AS total_edits,
COUNT(DISTINCT rev_user) AS unique_editors
FROM revision JOIN user ON rev_user=user_id
LEFT OUTER JOIN user_groups ON user_id=ug_user
WHERE rev_page=%s AND
(ug_group IS NULL OR ug_group!="bot");""" \
% page_id
cursor.execute(query)
row = cursor.fetchone()
if row:
counter += 1
if counter % 100 == 0:
print "flushing", counter
csv_writer.writerows(queue)
queue = []
queue.append([page, t] + list(row))
else:
print "Error: page %s" % page
csv_writer.writerows(queue)
conn.commit()
conn.close()