]> git.openstreetmap.org Git - osqa.git/blob - forum_modules/exporter/importer.py
Several improvements in the exporter. Finished the importer engine. Need to adapt...
[osqa.git] / forum_modules / exporter / importer.py
1 import os, tarfile, datetime
2
3 from xml.sax import make_parser
4 from xml.sax.handler import ContentHandler, ErrorHandler
5
6 from exporter import TMP_FOLDER, DATETIME_FORMAT, DATE_FORMAT
7 from orm import orm
8
9 NO_DEFAULT = object()
10
11 class ContentElement():
12     def __init__(self, content):
13         self._content = content
14
15     def content(self):
16         return self._content.strip()
17
18     def as_bool(self):
19         return self.content() == "true"
20
21     def as_date(self, default=NO_DEFAULT):
22         try:
23             return datetime.datetime.strptime(self.content(), DATE_FORMAT)
24         except:
25             if default == NO_DEFAULT:
26                 return datetime.date.fromtimestamp(0)
27             else:
28                 return default
29             
30
31     def as_datetime(self, default=NO_DEFAULT):
32         try:
33             return datetime.datetime.strptime(self.content(), DATETIME_FORMAT)
34         except:
35             if default == NO_DEFAULT:
36                 return datetime.datetime.fromtimestamp(0)
37             else:
38                 return default
39
40     def as_int(self, default=0):
41         try:
42             return int(self.content())
43         except:
44             return default
45
46     def __str__(self):
47         return self.content()
48
49
50 class RowElement(ContentElement):
51     def __init__(self, name, attrs, parent=None):
52         self.name = name.lower()
53         self.parent = parent
54         self.attrs = dict([(k.lower(), ContentElement(v)) for k, v in attrs.items()])
55         self._content = ''
56         self.sub_elements = {}
57
58         if parent:
59             parent.add(self)
60
61     def add_to_content(self, ch):
62         self._content += ch
63
64     def add(self, sub):
65         curr = self.sub_elements.get(sub.name, None)
66
67         if not curr:
68             curr = []
69             self.sub_elements[sub.name] = curr
70
71         curr.append(sub)
72
73     def get(self, name, default=None):
74         return self.sub_elements.get(name.lower(), [default])[-1]
75
76     def get_list(self, name):
77         return self.sub_elements.get(name.lower(), [])
78
79     def get_listc(self, name):
80         return [r.content() for r in self.get_list(name)]
81
82     def getc(self, name, default=""):
83         el = self.get(name, None)
84
85         if el:
86             return el.content()
87         else:
88             return default
89
90     def get_attr(self, name, default=""):
91         return self.attrs.get(name.lower(), default)
92
93     def as_pickled(self, default=None):
94         value_el = self.get('value')
95
96         if value_el:
97             return value_el._as_pickled(default)
98         else:
99             return default
100
101     TYPES_MAP = dict([(c.__name__, c) for c in (int, long, str, unicode, float)])
102
103     def _as_pickled(self, default=None):
104         type = self.get_attr('type').content()
105
106         try:
107             if type == 'dict':
108                 return dict([ (item.get_attr('key'), item.as_pickled()) for item in self.get_list('item') ])
109             elif type == 'list':
110                 return [item.as_pickled() for item in self.get_list('item')]
111             elif type == 'bool':
112                 return self.content().lower() == 'true'
113             elif type in RowElement.TYPES_MAP:
114                 return RowElement.TYPES_MAP[type](self.content())
115             else:
116                 return self.content()
117         except:
118             return default
119
120
121
122
123 class TableHandler(ContentHandler):
124     def __init__(self, root_name, row_name, callback, callback_args = []):
125         self.root_name = root_name.lower()
126         self.row_name = row_name.lower()
127         self.callback = callback
128         self.callback_args = callback_args
129
130         self._reset()
131
132     def _reset(self):
133         self.curr_element = None
134         self.in_tag = None
135
136     def startElement(self, name, attrs):
137         name = name.lower()
138
139         if name == self.root_name.lower():
140             pass
141         elif name == self.row_name:
142             self.curr_element = RowElement(name, attrs)
143         else:
144             self.curr_element = RowElement(name, attrs, self.curr_element)
145
146     def characters(self, ch):
147         if self.curr_element:
148             self.curr_element.add_to_content(ch)
149
150     def endElement(self, name):
151         name = name.lower()
152
153         if name == self.root_name:
154             pass
155         elif name == self.row_name:
156             self.callback(self.curr_element, *self.callback_args)
157             self._reset()
158         else:
159             self.curr_element = self.curr_element.parent
160
161
162 class SaxErrorHandler(ErrorHandler):
163     def error(self, e):
164         raise e
165
166     def fatalError(self, e):
167         raise e
168
169     def warning(self, e):
170         raise e
171
172 FILE_HANDLERS = []
173
174 def start_import(fname, user):
175     #dump = tarfile.open(fname, 'r')
176     #dump.extractall(TMP_FOLDER)
177
178     for h in FILE_HANDLERS:
179         h(TMP_FOLDER, user)
180
181 def file_handler(file_name, root_tag, el_tag, args_handler=None, pre_callback=None, post_callback=None):
182     def decorator(fn):
183         def decorated(location, current_user):
184             if pre_callback:
185                 pre_callback(current_user)
186
187             if (args_handler):
188                 args = args_handler(current_user)
189             else:
190                 args = []
191
192             parser = make_parser()
193             handler = TableHandler(root_tag, el_tag, fn, args)
194             parser.setContentHandler(handler)
195             #parser.setErrorHandler(SaxErrorHandler())
196
197             parser.parse(os.path.join(location, file_name))
198
199             if post_callback:
200                 post_callback()
201
202         FILE_HANDLERS.append(decorated)
203         return decorated
204     return decorator
205
206
207 @file_handler('users.xml', 'users', 'user', args_handler=lambda u: [u])
208 def user_import(row, current_user):
209     if str(current_user.id) == row.getc('id'):
210         return
211
212     roles = row.get('roles').get_listc('role')
213     valid_email = row.get('email').get_attr('validated').as_bool()
214     badges = row.get('badges')
215
216     user = orm.User(
217             id           = row.getc('id'),
218             username     = row.getc('username'),
219             password     = row.getc('password'),
220             email        = row.getc('email'),
221             email_isvalid= valid_email,
222             is_superuser = 'superuser' in roles,
223             is_staff     = 'moderator' in roles,
224             is_active    = True,
225             date_joined  = row.get('joindate').as_datetime(),
226             about         = row.getc('bio'),
227             date_of_birth = row.get('birthdate').as_date(None),
228             website       = row.getc('website'),
229             reputation    = row.get('reputation').as_int(),
230             gold          = badges.get_attr('gold').as_int(),
231             silver        = badges.get_attr('silver').as_int(),
232             bronze        = badges.get_attr('bronze').as_int(),
233             real_name     = row.getc('realname'),
234             location      = row.getc('location'),
235     )
236
237     user.save()
238
239     authKeys = row.get('authKeys')
240
241     for key in authKeys.get_list('key'):
242         orm.AuthKeyUserAssociation(user=user, key=key.getc('key'), provider=key.getc('provider')).save()
243
244     notifications = row.get('notifications')
245
246     attributes = dict([(str(k), v.as_bool() and 'i' or 'n') for k, v in notifications.get('notify').attrs.items()])
247     attributes.update(dict([(str(k), v.as_bool()) for k, v in notifications.get('autoSubscribe').attrs.items()]))
248     attributes.update(dict([(str("notify_%s" % k), v.as_bool()) for k, v in notifications.get('notifyOnSubscribed').attrs.items()]))
249
250     orm.SubscriptionSettings(user=user, enable_notifications=notifications.get_attr('enabled').as_bool(), **attributes).save()
251
252 def pre_tag_import(user):
253     tag_import.tag_mappings={}
254
255
256 @file_handler('tags.xml', 'tags', 'tag', pre_callback=pre_tag_import)
257 def tag_import(row):
258     tag = orm.Tag(name=row.getc('name'), used_count=row.get('used').as_int(), created_by_id=row.get('author').as_int())
259     tag.save()
260     tag_import.tag_mappings[tag.name] = tag
261
262
263 def post_node_import():
264     tag_import.tag_mappings = None
265
266 @file_handler('nodes.xml', 'nodes', 'node', args_handler=lambda u: [tag_import.tag_mappings], post_callback=post_node_import)
267 def node_import(row, tags):
268
269     ntags = []
270
271     for t in row.get('tags').get_list('tag'):
272         ntags.append(tags[t.content()])
273
274     last_act = row.get('lastactivity')
275
276     node = orm.Node(
277             id            = row.getc('id'),
278             node_type     = row.getc('type'),
279             author_id     = row.get('author').as_int(),
280             added_at      = row.get('date').as_datetime(),
281             parent_id     = row.get('parent').as_int(None),
282             abs_parent_id = row.get('absparent').as_int(None),
283
284             last_activity_by_id = last_act.get('by').as_int(None),
285             last_activity_at    = last_act.get('at').as_datetime(None),
286
287             title         = row.getc('title'),
288             body          = row.getc('body'),
289             tagnames      = " ".join([t.name for t in ntags]),
290
291             marked        = row.get('marked').as_bool(),
292             extra_ref_id  = row.get('extraRef').as_int(None),
293             extra_count   = row.get('extraCount').as_int(0),
294             extra         = row.get('extraData').as_pickled()
295     )
296
297     node.save()
298     node.tags = ntags
299
300     revisions = row.get('revisions')
301     active = revisions.get_attr('active').as_int()
302
303     for r in revisions.get_list('revision'):
304         rev = orm.NodeRevision(
305             author_id = r.getc('author'),
306             body = r.getc('body'),
307             node = node,
308             revised_at = r.get('date').as_datetime(),
309             revision = r.get('number').as_int(),
310             summary = r.getc('summary'),
311             tagnames = " ".join(r.getc('tags').split(',')),
312             title = r.getc('title'),
313         )
314
315         rev.save()
316         if rev.revision == active:
317             active = rev
318
319     node.active_revision = active
320     node.save()
321
322 POST_ACTION = {}
323
324 def post_action(*types):
325     def decorator(fn):
326         for t in types:
327             POST_ACTION[t] = fn
328         return fn
329     return decorator
330
331 def post_action_import_callback():
332     with_state = orm.Node.objects.filter(id__in=orm.NodeState.objects.values_list('node_id', flat=True).distinct())
333
334     for n in with_state:
335         n.state_string = "".join(["(%s)" % s for s in n.states.values_list('state_type')])
336         n.save()
337
338 @file_handler('actions.xml', 'actions', 'action', post_callback=post_action_import_callback)
339 def actions_import(row):
340     action = orm.Action(
341         id           = row.get('id').as_int(),
342         action_type  = row.getc('type'),
343         action_date  = row.get('date').as_datetime(),
344         node_id      = row.get('node').as_int(None),
345         user_id      = row.get('user').as_int(),
346         real_user_id = row.get('realUser').as_int(None),
347         ip           = row.getc('ip'),
348         extra        = row.get('extraData').as_pickled(),
349     )
350
351     canceled = row.get('canceled')
352     if canceled.get_attr('state').as_bool():
353         action.canceled_by_id = canceled.get('user').as_int()
354         action.canceled_at = canceled.get('date').as_datetime(),
355         action.canceled_ip = canceled.getc('ip')
356
357     action.save()
358
359     for r in row.get('reputes').get_list('repute'):
360         by_canceled = r.get_attr('byCanceled').as_bool()
361
362         orm.ActionRepute(
363             action = action,
364             user_id = r.get('user').as_int(),
365             value = r.get('value').as_int(),
366
367             date = by_canceled and action.canceled_at or action.action_date,
368             by_canceled = by_canceled
369         ).save()
370
371     if (not action.canceled) and action.action_type in POST_ACTION:
372         POST_ACTION[action.action_type](row, action)
373
374
375
376
377 @post_action('voteup', 'votedown', 'voteupcomment')
378 def vote_action(row, action):
379     orm.Vote(user_id=action.user_id, node_id=action.node_id, action=action,
380              voted_at=action.action_date, value=(action.action_type != 'votedown') and 1 or -1).save()
381
382 def state_action(state):
383     def fn(row, action):
384         orm.NodeState(
385             state_type = state,
386             node_id = action.node_id,
387             action = action
388         ).save()
389     return fn
390
391 post_action('wikify')(state_action('wiki'))
392 post_action('delete')(state_action('deleted'))
393 post_action('acceptanswer')(state_action('accepted'))
394 post_action('publish')(state_action('published'))
395
396
397 @post_action('flag')
398 def flag_action(row, action):
399     orm.Flag(user_id=action.user_id, node_id=action.node_id, action=action, reason=action.extra).save()
400
401
402 def award_import_args(user):
403     return [ dict([ (b.cls, b) for b in orm.Badge.objects.all() ]) ]
404
405
406 @file_handler('awards.xml', 'awards', 'award', args_handler=award_import_args)
407 def awards_import(row, badges):
408     award = orm.Award(
409         user_id = row.get('user').as_int(),
410         badge = badges[row.getc('badge')],
411         node_id = row.get('node').as_int(None),
412         action_id = row.get('action').as_int(None),
413         trigger_id = row.get('trigger').as_int(None)
414     ).save()
415
416
417
418
419
420
421
422
423