]> git.openstreetmap.org Git - osqa.git/blob - forum_modules/sximporter/importer.py
fix in sximporter, some tags don't have creator
[osqa.git] / forum_modules / sximporter / importer.py
1 # -*- coding: utf-8 -*-\r
2 \r
3 from xml.dom import minidom\r
4 from datetime import datetime\r
5 import time\r
6 import re\r
7 from django.utils.translation import ugettext as _\r
8 from django.template.defaultfilters import slugify\r
9 from orm import orm\r
10 \r
11 def getText(el):\r
12     rc = ""\r
13     for node in el.childNodes:\r
14         if node.nodeType == node.TEXT_NODE:\r
15             rc = rc + node.data\r
16     return rc.strip()\r
17 \r
18 msstrip = re.compile(r'^(.*)\.\d+')\r
19 def readTime(ts):\r
20     noms = msstrip.match(ts)\r
21     if noms:\r
22         ts = noms.group(1)\r
23 \r
24     return datetime(*time.strptime(ts, '%Y-%m-%dT%H:%M:%S')[0:6])\r
25 \r
26 def readEl(el):\r
27     return dict([(n.tagName.lower(), getText(n)) for n in el.childNodes if n.nodeType == el.ELEMENT_NODE])\r
28 \r
29 def readTable(dump, name):\r
30     return [readEl(e) for e in minidom.parseString(dump.read("%s.xml" % name)).getElementsByTagName('row')]\r
31 \r
32 class UnknownUser(object):\r
33     counter = 0\r
34     def __init__(self):\r
35         UnknownUser.counter += 1\r
36         self.number = UnknownUser.counter\r
37 \r
38     def __str__(self):\r
39         return _("Unknown user %(number)d") % {'number': self.number}\r
40 \r
41     def __unicode__(self):\r
42         return self.__str__()\r
43 \r
44     def encode(self, *args):\r
45         return self.__str__()\r
46 \r
47 class IdMapper(dict):\r
48     def __getitem__(self, key):\r
49         key = int(key)\r
50         return super(IdMapper, self).get(key, key)\r
51 \r
52     def __setitem__(self, key, value):\r
53         super(IdMapper, self).__setitem__(int(key), int(value))\r
54 \r
55 openidre = re.compile('^https?\:\/\/')\r
56 def userimport(dump, options):\r
57     users = readTable(dump, "Users")\r
58 \r
59     user_by_name = {}\r
60     uidmapper = IdMapper()\r
61     merged_users = []\r
62 \r
63     owneruid = options.get('owneruid', None)\r
64     #check for empty values\r
65     if not owneruid:\r
66         owneruid = None\r
67 \r
68     for sxu in users:\r
69         create = True\r
70 \r
71         if sxu.get('id') == '-1':\r
72             continue\r
73 \r
74         if int(sxu.get('id')) == int(owneruid):\r
75             osqau = orm.User.objects.get(id=1)\r
76             uidmapper[owneruid] = 1\r
77             uidmapper[-1] = 1\r
78             create = False\r
79         else:\r
80             username = sxu.get('displayname', sxu.get('displaynamecleaned', sxu.get('realname', UnknownUser())))\r
81 \r
82             if not isinstance(username, UnknownUser) and username in user_by_name:\r
83                 if options.get('mergesimilar', False) and sxu.get('email', 'INVALID') == user_by_name[username].email:\r
84                     osqau = user_by_name[username]\r
85                     create = False\r
86                     uidmapper[sxu.get('id')] = osqau.id\r
87                 else:\r
88                     inc = 1\r
89                     while ("%s %d" % (username, inc)) in user_by_name:\r
90                         inc += 1\r
91 \r
92                     username = "%s %d" % (username, inc)\r
93 \r
94         sxbadges = sxu.get('badgesummary', None)\r
95         badges = {'1':'0','2':'0','3':'0'}\r
96 \r
97         if sxbadges:\r
98             badges.update(dict([b.split('=') for b in sxbadges.split()]))\r
99 \r
100         if create:\r
101             osqau = orm.User(\r
102                 id           = sxu.get('id'),\r
103                 username     = unicode(username),\r
104                 password     = '!',\r
105                 email        = sxu.get('email', ''),\r
106                 is_superuser = sxu.get('usertypeid') == '5',\r
107                 is_staff     = sxu.get('usertypeid') == '4',\r
108                 is_active    = True,\r
109                 date_joined  = readTime(sxu.get('creationdate')),\r
110                 about         = sxu.get('aboutme', ''),\r
111                 date_of_birth = sxu.get('birthday', None) and readTime(sxu['birthday']) or None,\r
112                 email_isvalid = int(sxu.get('usertypeid')) > 2,\r
113                 website       = sxu.get('websiteurl', ''),\r
114                 reputation    = int(sxu.get('reputation')),\r
115                 gold          = int(badges['1']),\r
116                 silver        = int(badges['2']),\r
117                 bronze        = int(badges['3']),\r
118                 real_name     = sxu.get('realname', ''),\r
119             )\r
120 \r
121             osqau.save()\r
122 \r
123             s = orm.SubscriptionSettings(user=osqau)\r
124             s.save()\r
125 \r
126             user_by_name[osqau.username] = osqau\r
127         else:\r
128             new_about = sxu.get('aboutme', None)\r
129             if new_about and osqau.about != new_about:\r
130                 if osqau.about:\r
131                     osqau.about = "%s\n|\n%s" % (osqau.about, new_about)\r
132                 else:\r
133                     osqau.about = new_about\r
134 \r
135             osqau.username = sxu.get('displayname', sxu.get('displaynamecleaned', sxu.get('realname', UnknownUser())))\r
136             osqau.email = sxu.get('email', '')\r
137             osqau.reputation += int(sxu.get('reputation'))\r
138             osqau.gold += int(badges['1'])\r
139             osqau.silver += int(badges['2'])\r
140             osqau.bronze += int(badges['3'])\r
141 \r
142             merged_users.append(osqau.id)\r
143             osqau.save()\r
144 \r
145 \r
146         openid = sxu.get('openid', None)\r
147         if openid and openidre.match(openid):\r
148             assoc = orm.AuthKeyUserAssociation(user=osqau, key=openid, provider="openidurl")\r
149             assoc.save()\r
150 \r
151     if uidmapper[-1] == -1:\r
152         uidmapper[-1] = 1\r
153 \r
154     return (uidmapper, merged_users)\r
155 \r
156 def tagsimport(dump, uidmap):\r
157     tags = readTable(dump, "Tags")\r
158 \r
159     tagmap = {}\r
160 \r
161     for sxtag in tags:\r
162         otag = orm.Tag(\r
163             id = int(sxtag['id']),\r
164             name = sxtag['name'],\r
165             used_count = int(sxtag['count']),\r
166             created_by_id = uidmap[sxtag.get('userid', 1)],\r
167         )\r
168         otag.save()\r
169 \r
170         tagmap[otag.name] = otag\r
171 \r
172     return tagmap\r
173 \r
174 def postimport(dump, uidmap, tagmap):\r
175     history = {}\r
176     all = {}\r
177 \r
178     for h in readTable(dump, "PostHistory"):\r
179         if not history.get(h.get('postid'), None):\r
180             history[h.get('postid')] = []\r
181 \r
182         history[h.get('postid')].append(h)\r
183 \r
184     posts = readTable(dump, "Posts")\r
185 \r
186     for sxpost in posts:\r
187         accepted = {}\r
188 \r
189         postclass = sxpost.get('posttypeid') == '1' and orm.Question or orm.Answer\r
190 \r
191         post = postclass(\r
192             id = sxpost['id'],\r
193             added_at = readTime(sxpost['creationdate']),\r
194             body = sxpost['body'],\r
195             score = sxpost.get('score', 0),\r
196             vote_up_count = 0,\r
197             vote_down_count = 0\r
198         )\r
199 \r
200         if sxpost.get('deletiondate', None):\r
201             post.deleted = True\r
202             post.deleted_at = readTime(sxpost['deletiondate'])\r
203             post.author_id = 1\r
204         else:\r
205             post.author_id = uidmap[sxpost['owneruserid']]\r
206 \r
207         if sxpost.get('lasteditoruserid', None):\r
208             post.last_edited_by_id = uidmap[sxpost.get('lasteditoruserid')]\r
209             post.last_edited_at = readTime(sxpost['lasteditdate'])\r
210 \r
211         if sxpost.get('communityowneddate', None):\r
212             post.wiki = True\r
213             post.wikified_at = readTime(sxpost['communityowneddate'])\r
214 \r
215         if sxpost.get('posttypeid') == '1': #question\r
216             post.node_type = "question"\r
217             post.title = sxpost['title']\r
218 \r
219             tagnames = sxpost['tags'].replace(u'ö', '-').replace(u'é', '').replace(u'à', '')\r
220             post.tagnames = tagnames\r
221 \r
222             post.view_count = sxpost.get('viewcount', 0)\r
223             post.favourite_count = sxpost.get('favoritecount', 0)\r
224             post.answer_count = sxpost.get('answercount', 0)\r
225 \r
226             if sxpost.get('lastactivityuserid', None):\r
227                 post.last_activity_by_id = uidmap[sxpost['lastactivityuserid']]\r
228                 post.last_activity_at = readTime(sxpost['lastactivitydate'])\r
229 \r
230             if sxpost.get('closeddate', None):\r
231                 post.closed = True\r
232                 post.closed_by_id = 1\r
233                 post.closed_at = datetime.now()\r
234 \r
235             if sxpost.get('acceptedanswerid', None):\r
236                 accepted[int(sxpost.get('acceptedanswerid'))] = post\r
237 \r
238         else:\r
239             post.node_type = "answer"\r
240             post.parent_id = sxpost['parentid']\r
241 \r
242             if int(post.id) in accepted:\r
243                 question = accepted[int(post.id)]\r
244                 question.accepted_answer_id = post\r
245                 question.save()\r
246 \r
247                 post.accepted = True\r
248                 post.accepted_at = datetime.now()\r
249                 post.accepted_by_id = question.author_id\r
250 \r
251         all[int(post.id)] = post\r
252 \r
253     return all\r
254 \r
255 def comment_import(dump, uidmap, posts):\r
256     comments = readTable(dump, "PostComments")\r
257     currid = max(posts.keys())\r
258     mapping = {}\r
259 \r
260     for sxc in comments:\r
261         currid += 1\r
262         oc = orm.Node(\r
263             id = currid,\r
264             node_type = "comment",\r
265             added_at = readTime(sxc['creationdate']),\r
266             author_id = uidmap[sxc['userid']],\r
267             body = sxc['text'],\r
268             parent_id = sxc.get('postid'),\r
269             vote_up_count = 0,\r
270             vote_down_count = 0\r
271         )\r
272 \r
273         if sxc.get('deletiondate', None):\r
274             oc.deleted = True\r
275             oc.deleted_at = readTime(sxc['deletiondate'])\r
276             oc.deleted_by_id = uidmap[sxc['deletionuserid']]\r
277             oc.author_id = uidmap[sxc['deletionuserid']]\r
278         else:\r
279             oc.author_id = uidmap[sxc['userid']]\r
280 \r
281 \r
282         posts[oc.id] = oc\r
283         mapping[int(sxc['id'])] = int(oc.id)\r
284 \r
285     return posts, mapping\r
286 \r
287 \r
288 def save_posts(posts, tagmap):\r
289     for post in posts.values():\r
290         post.save()\r
291 \r
292         if post.node_type == "question":\r
293             tags = filter(lambda t: t is not None, [tagmap.get(n, None) for n in post.tagnames.split()])\r
294             post.tagnames = " ".join([t.name for t in tags]).strip()\r
295             post.tags = tags\r
296 \r
297         create_and_activate_revision(post)\r
298 \r
299 \r
300 def create_and_activate_revision(post):\r
301     rev = orm.NodeRevision(\r
302         author_id = post.author_id,\r
303         body = post.body,\r
304         node_id = post.id,\r
305         revised_at = post.added_at,\r
306         revision = 1,\r
307         summary = 'Initial revision',\r
308         tagnames = post.tagnames,\r
309         title = post.title,\r
310     )\r
311 \r
312     rev.save()\r
313     post.active_revision_id = rev.id\r
314     post.save()\r
315 \r
316 \r
317 def post_vote_import(dump, uidmap, posts):\r
318     votes = readTable(dump, "Posts2Votes")\r
319 \r
320     for sxv in votes:\r
321         if sxv['votetypeid'] in ('2', '3'):\r
322             ov = orm.Vote(\r
323                 node_id = sxv['postid'],\r
324                 user_id = uidmap[sxv['userid']],\r
325                 voted_at = readTime(sxv['creationdate']),\r
326                 vote = sxv['votetypeid'] == '2' and 1 or -1,\r
327             )\r
328 \r
329             if sxv['votetypeid'] == '2':\r
330                 posts[int(sxv['postid'])].vote_up_count += 1\r
331             else:\r
332                 posts[int(sxv['postid'])].vote_down_count += 1\r
333 \r
334             ov.save()\r
335 \r
336 def comment_vote_import(dump, uidmap, comments, posts):\r
337     votes = readTable(dump, "Comments2Votes")\r
338 \r
339     for sxv in votes:\r
340         if sxv['votetypeid'] in ('2', '3'):\r
341             ov = orm.Vote(\r
342                 node_id = comments[int(sxv['postcommentid'])],\r
343                 user_id = uidmap[sxv['userid']],\r
344                 voted_at = readTime(sxv['creationdate']),\r
345                 vote = sxv['votetypeid'] == '2' and 1 or -1,\r
346             )\r
347 \r
348             if sxv['votetypeid'] == '2':\r
349                 posts[comments[int(sxv['postcommentid'])]].vote_up_count += 1\r
350             else:\r
351                 posts[comments[int(sxv['postcommentid'])]].vote_down_count += 1\r
352 \r
353             ov.save()\r
354 \r
355 \r
356 \r
357 def badges_import(dump, uidmap):\r
358     node_ctype = orm['contenttypes.contenttype'].objects.get(name='node')\r
359     obadges = dict([(b.slug, b) for b in orm.Badge.objects.all()])\r
360     sxbadges = dict([(int(b['id']), b) for b in readTable(dump, "Badges")])\r
361 \r
362     sx_to_osqa = {}\r
363 \r
364     for id, sxb in sxbadges.items():\r
365         slug = slugify(sxb['name'].replace('&', 'and'))\r
366         if slug in obadges:\r
367             sx_to_osqa[id] = obadges[slug]\r
368         else:\r
369             osqab = orm.Badge(\r
370                 name = sxb['name'],\r
371                 slug = slugify(sxb['name']),\r
372                 description = sxb['description'],\r
373                 multiple = sxb.get('single', 'false') == 'false',\r
374                 awarded_count = 0,\r
375                 type = sxb['class']                \r
376             )\r
377             osqab.save()\r
378             sx_to_osqa[id] = osqab\r
379 \r
380     sxawards = readTable(dump, "Users2Badges")\r
381     osqaawards = []\r
382 \r
383     for sxa in sxawards:\r
384         badge = sx_to_osqa[int(sxa['badgeid'])]\r
385         osqaa = orm.Award(\r
386             user_id = uidmap[sxa['userid']],\r
387             badge = badge,\r
388             content_type = node_ctype,\r
389             object_id = 1\r
390         )\r
391 \r
392         osqaawards.append(osqaa)\r
393         badge.awarded_count += 1\r
394 \r
395     for b in sx_to_osqa.values():\r
396         b.save()\r
397 \r
398     for a in osqaawards:\r
399         a.save()\r
400 \r
401 \r
402 def reset_sequences():\r
403     from south.db import db\r
404     if db.backend_name == "postgres":\r
405         db.start_transaction()\r
406         db.execute_many(PG_SEQUENCE_RESETS)\r
407         db.commit_transaction()\r
408 \r
409 def sximport(dump, options):\r
410     uidmap, merged_users = userimport(dump, options)\r
411     tagmap = tagsimport(dump, uidmap)\r
412     posts = postimport(dump, uidmap, tagmap)\r
413     posts, comments = comment_import(dump, uidmap, posts)\r
414     save_posts(posts, tagmap)\r
415     post_vote_import(dump, uidmap, posts)\r
416     comment_vote_import(dump, uidmap, comments, posts)\r
417     for post in posts.values():\r
418         post.save()\r
419     badges_import(dump, uidmap)\r
420 \r
421     from south.db import db\r
422     db.commit_transaction()\r
423 \r
424     reset_sequences()\r
425 \r
426     \r
427     \r
428 PG_SEQUENCE_RESETS = """\r
429 SELECT setval('"auth_user_groups_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "auth_user_groups";\r
430 SELECT setval('"auth_user_user_permissions_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "auth_user_user_permissions";\r
431 SELECT setval('"activity_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "activity";\r
432 SELECT setval('"forum_subscriptionsettings_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_subscriptionsettings";\r
433 SELECT setval('"forum_validationhash_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_validationhash";\r
434 SELECT setval('"forum_authkeyuserassociation_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_authkeyuserassociation";\r
435 SELECT setval('"tag_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "tag";\r
436 SELECT setval('"forum_markedtag_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_markedtag";\r
437 SELECT setval('"forum_node_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node";\r
438 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";\r
439 SELECT setval('"forum_noderevision_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_noderevision";\r
440 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";\r
441 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";\r
442 SELECT setval('"favorite_question_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "favorite_question";\r
443 SELECT setval('"forum_questionsubscription_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_questionsubscription";\r
444 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";\r
445 SELECT setval('"vote_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "vote";\r
446 SELECT setval('"flagged_item_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "flagged_item";\r
447 SELECT setval('"badge_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "badge";\r
448 SELECT setval('"award_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "award";\r
449 SELECT setval('"repute_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "repute";\r
450 SELECT setval('"forum_node_tags_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_node_tags";\r
451 SELECT setval('"forum_keyvalue_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_keyvalue";\r
452 SELECT setval('"forum_openidnonce_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_openidnonce";\r
453 SELECT setval('"forum_openidassociation_id_seq"', coalesce(max("id"), 1) + 2, max("id") IS NOT null) FROM "forum_openidassociation";\r
454 """\r
455 \r
456 \r
457     \r
458