import re import MySQLdb as ms from MySQLdb.cursors import DictCursor from django.core.management.base import BaseCommand from rp.models import Article class Command(BaseCommand): help = """ Import data from the old press review. Should only be used by LQDN staff since the dataformat of the old website only make sense in their context. It takes three arguments: --host hostname --sql_user username --sql_password password host defaults to 127.0.0.1. """ def add_arguments(self, parser): parser.add_argument('--host', default='127.0.0.1', dest='host', nargs='?') parser.add_argument('--sql_user', dest='user', nargs='+') parser.add_argument('--sql_password', dest='password', nargs='+') def handle(self, *args, **options): db = ms.connect(host=options['host'], user=options['user'][0], password=options['password'][0], db='site') c = db.cursor(DictCursor) # First, let's get the data from presse table. # We're also grouping a lot of queries presse = c.execute("""SELECT *, nr.body as body, GROUP_CONCAT(DISTINCT t.name) as tags FROM presse p JOIN node n ON n.nid = p.nid JOIN node_revisions nr ON nr.vid = n.vid JOIN term_node tn ON tn.vid = n.vid JOIN term_data t ON t.tid = tn.tid GROUP BY n.nid""") print("Importing 0/{} from previous database".format(presse), end='\r') # And here we go done = 0 errors = 0 for item in c.fetchall(): done += 1 print("Importing {}/{} from previous database ({} errors for now)".format(done, presse, errors), end='\r') # Fetch the article, or creates it article = Article.add_new_url(url=item['url']) if item['lang'] != "": article.lang = item['lang'] article.published_at = item['date_publi'] article.title = item['title'] # Let's extract the website from the title website = re.search(r'\[(.*)]', item['title']) if website: article.website = website.group(1) # Raise the score if needed if item['note'] > 0: article.und_score_up = item['note'] if item['note'] < 0: article.und_score_down = abs(item['note']) article.save() article.refresh_from_db() # Insert tags article.tags.set(*item['tags'].lower().split(',')) # Publish or draft as needed if item['published'] >= 1: # Let's get the extracts article.extracts = item['body'] try: article.fetch_content() article.fetch_image() article.fetch_metadata() except Exception: errors += 1 if article.status not in ("DRAFT", "PUBLISHED", ): article.recover() if item['published'] >= 2: if article.status != "PUBLISHED": article.publish() article.save()