1 import os, tarfile, datetime
3 from xml.sax import make_parser
4 from xml.sax.handler import ContentHandler, ErrorHandler
6 from exporter import TMP_FOLDER, DATETIME_FORMAT, DATE_FORMAT
11 class ContentElement():
12 def __init__(self, content):
13 self._content = content
16 return self._content.strip()
19 return self.content() == "true"
21 def as_date(self, default=NO_DEFAULT):
23 return datetime.datetime.strptime(self.content(), DATE_FORMAT)
25 if default == NO_DEFAULT:
26 return datetime.date.fromtimestamp(0)
31 def as_datetime(self, default=NO_DEFAULT):
33 return datetime.datetime.strptime(self.content(), DATETIME_FORMAT)
35 if default == NO_DEFAULT:
36 return datetime.datetime.fromtimestamp(0)
40 def as_int(self, default=0):
42 return int(self.content())
50 class RowElement(ContentElement):
51 def __init__(self, name, attrs, parent=None):
52 self.name = name.lower()
54 self.attrs = dict([(k.lower(), ContentElement(v)) for k, v in attrs.items()])
56 self.sub_elements = {}
61 def add_to_content(self, ch):
65 curr = self.sub_elements.get(sub.name, None)
69 self.sub_elements[sub.name] = curr
73 def get(self, name, default=None):
74 return self.sub_elements.get(name.lower(), [default])[-1]
76 def get_list(self, name):
77 return self.sub_elements.get(name.lower(), [])
79 def get_listc(self, name):
80 return [r.content() for r in self.get_list(name)]
82 def getc(self, name, default=""):
83 el = self.get(name, None)
90 def get_attr(self, name, default=""):
91 return self.attrs.get(name.lower(), default)
93 def as_pickled(self, default=None):
94 value_el = self.get('value')
97 return value_el._as_pickled(default)
101 TYPES_MAP = dict([(c.__name__, c) for c in (int, long, str, unicode, float)])
103 def _as_pickled(self, default=None):
104 type = self.get_attr('type').content()
108 return dict([ (item.get_attr('key'), item.as_pickled()) for item in self.get_list('item') ])
110 return [item.as_pickled() for item in self.get_list('item')]
112 return self.content().lower() == 'true'
113 elif type in RowElement.TYPES_MAP:
114 return RowElement.TYPES_MAP[type](self.content())
116 return self.content()
123 class TableHandler(ContentHandler):
124 def __init__(self, root_name, row_name, callback, callback_args = []):
125 self.root_name = root_name.lower()
126 self.row_name = row_name.lower()
127 self.callback = callback
128 self.callback_args = callback_args
133 self.curr_element = None
136 def startElement(self, name, attrs):
139 if name == self.root_name.lower():
141 elif name == self.row_name:
142 self.curr_element = RowElement(name, attrs)
144 self.curr_element = RowElement(name, attrs, self.curr_element)
146 def characters(self, ch):
147 if self.curr_element:
148 self.curr_element.add_to_content(ch)
150 def endElement(self, name):
153 if name == self.root_name:
155 elif name == self.row_name:
156 self.callback(self.curr_element, *self.callback_args)
159 self.curr_element = self.curr_element.parent
162 class SaxErrorHandler(ErrorHandler):
166 def fatalError(self, e):
169 def warning(self, e):
174 def start_import(fname, user):
175 #dump = tarfile.open(fname, 'r')
176 #dump.extractall(TMP_FOLDER)
178 for h in FILE_HANDLERS:
181 def file_handler(file_name, root_tag, el_tag, args_handler=None, pre_callback=None, post_callback=None):
183 def decorated(location, current_user):
185 pre_callback(current_user)
188 args = args_handler(current_user)
192 parser = make_parser()
193 handler = TableHandler(root_tag, el_tag, fn, args)
194 parser.setContentHandler(handler)
195 #parser.setErrorHandler(SaxErrorHandler())
197 parser.parse(os.path.join(location, file_name))
202 FILE_HANDLERS.append(decorated)
207 @file_handler('users.xml', 'users', 'user', args_handler=lambda u: [u])
208 def user_import(row, current_user):
209 if str(current_user.id) == row.getc('id'):
212 roles = row.get('roles').get_listc('role')
213 valid_email = row.get('email').get_attr('validated').as_bool()
214 badges = row.get('badges')
218 username = row.getc('username'),
219 password = row.getc('password'),
220 email = row.getc('email'),
221 email_isvalid= valid_email,
222 is_superuser = 'superuser' in roles,
223 is_staff = 'moderator' in roles,
225 date_joined = row.get('joindate').as_datetime(),
226 about = row.getc('bio'),
227 date_of_birth = row.get('birthdate').as_date(None),
228 website = row.getc('website'),
229 reputation = row.get('reputation').as_int(),
230 gold = badges.get_attr('gold').as_int(),
231 silver = badges.get_attr('silver').as_int(),
232 bronze = badges.get_attr('bronze').as_int(),
233 real_name = row.getc('realname'),
234 location = row.getc('location'),
239 authKeys = row.get('authKeys')
241 for key in authKeys.get_list('key'):
242 orm.AuthKeyUserAssociation(user=user, key=key.getc('key'), provider=key.getc('provider')).save()
244 notifications = row.get('notifications')
246 attributes = dict([(str(k), v.as_bool() and 'i' or 'n') for k, v in notifications.get('notify').attrs.items()])
247 attributes.update(dict([(str(k), v.as_bool()) for k, v in notifications.get('autoSubscribe').attrs.items()]))
248 attributes.update(dict([(str("notify_%s" % k), v.as_bool()) for k, v in notifications.get('notifyOnSubscribed').attrs.items()]))
250 orm.SubscriptionSettings(user=user, enable_notifications=notifications.get_attr('enabled').as_bool(), **attributes).save()
252 def pre_tag_import(user):
253 tag_import.tag_mappings={}
256 @file_handler('tags.xml', 'tags', 'tag', pre_callback=pre_tag_import)
258 tag = orm.Tag(name=row.getc('name'), used_count=row.get('used').as_int(), created_by_id=row.get('author').as_int())
260 tag_import.tag_mappings[tag.name] = tag
263 def post_node_import():
264 tag_import.tag_mappings = None
266 @file_handler('nodes.xml', 'nodes', 'node', args_handler=lambda u: [tag_import.tag_mappings], post_callback=post_node_import)
267 def node_import(row, tags):
271 for t in row.get('tags').get_list('tag'):
272 ntags.append(tags[t.content()])
274 last_act = row.get('lastactivity')
278 node_type = row.getc('type'),
279 author_id = row.get('author').as_int(),
280 added_at = row.get('date').as_datetime(),
281 parent_id = row.get('parent').as_int(None),
282 abs_parent_id = row.get('absparent').as_int(None),
284 last_activity_by_id = last_act.get('by').as_int(None),
285 last_activity_at = last_act.get('at').as_datetime(None),
287 title = row.getc('title'),
288 body = row.getc('body'),
289 tagnames = " ".join([t.name for t in ntags]),
291 marked = row.get('marked').as_bool(),
292 extra_ref_id = row.get('extraRef').as_int(None),
293 extra_count = row.get('extraCount').as_int(0),
294 extra = row.get('extraData').as_pickled()
300 revisions = row.get('revisions')
301 active = revisions.get_attr('active').as_int()
303 for r in revisions.get_list('revision'):
304 rev = orm.NodeRevision(
305 author_id = r.getc('author'),
306 body = r.getc('body'),
308 revised_at = r.get('date').as_datetime(),
309 revision = r.get('number').as_int(),
310 summary = r.getc('summary'),
311 tagnames = " ".join(r.getc('tags').split(',')),
312 title = r.getc('title'),
316 if rev.revision == active:
319 node.active_revision = active
324 def post_action(*types):
331 def post_action_import_callback():
332 with_state = orm.Node.objects.filter(id__in=orm.NodeState.objects.values_list('node_id', flat=True).distinct())
335 n.state_string = "".join(["(%s)" % s for s in n.states.values_list('state_type')])
338 @file_handler('actions.xml', 'actions', 'action', post_callback=post_action_import_callback)
339 def actions_import(row):
341 id = row.get('id').as_int(),
342 action_type = row.getc('type'),
343 action_date = row.get('date').as_datetime(),
344 node_id = row.get('node').as_int(None),
345 user_id = row.get('user').as_int(),
346 real_user_id = row.get('realUser').as_int(None),
348 extra = row.get('extraData').as_pickled(),
351 canceled = row.get('canceled')
352 if canceled.get_attr('state').as_bool():
353 action.canceled_by_id = canceled.get('user').as_int()
354 action.canceled_at = canceled.get('date').as_datetime(),
355 action.canceled_ip = canceled.getc('ip')
359 for r in row.get('reputes').get_list('repute'):
360 by_canceled = r.get_attr('byCanceled').as_bool()
364 user_id = r.get('user').as_int(),
365 value = r.get('value').as_int(),
367 date = by_canceled and action.canceled_at or action.action_date,
368 by_canceled = by_canceled
371 if (not action.canceled) and action.action_type in POST_ACTION:
372 POST_ACTION[action.action_type](row, action)
377 @post_action('voteup', 'votedown', 'voteupcomment')
378 def vote_action(row, action):
379 orm.Vote(user_id=action.user_id, node_id=action.node_id, action=action,
380 voted_at=action.action_date, value=(action.action_type != 'votedown') and 1 or -1).save()
382 def state_action(state):
386 node_id = action.node_id,
391 post_action('wikify')(state_action('wiki'))
392 post_action('delete')(state_action('deleted'))
393 post_action('acceptanswer')(state_action('accepted'))
394 post_action('publish')(state_action('published'))
398 def flag_action(row, action):
399 orm.Flag(user_id=action.user_id, node_id=action.node_id, action=action, reason=action.extra).save()
402 def award_import_args(user):
403 return [ dict([ (b.cls, b) for b in orm.Badge.objects.all() ]) ]
406 @file_handler('awards.xml', 'awards', 'award', args_handler=award_import_args)
407 def awards_import(row, badges):
409 user_id = row.get('user').as_int(),
410 badge = badges[row.getc('badge')],
411 node_id = row.get('node').as_int(None),
412 action_id = row.get('action').as_int(None),
413 trigger_id = row.get('trigger').as_int(None)