import_old_rp.py 3.57 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
import re

import MySQLdb as ms
from MySQLdb.cursors import DictCursor
from django.core.management.base import BaseCommand

from rp.models import Article


class Command(BaseCommand):
    help = """
    Import data from the old press review. Should only be used by
    LQDN staff since the dataformat of the old website only make
    sense in their context.

    It takes three arguments:
        --host hostname
        --sql_user username
        --sql_password password
    host defaults to 127.0.0.1.
    """

    def add_arguments(self, parser):
        parser.add_argument('--host',
                            default='127.0.0.1',
                            dest='host',
                            nargs='?')
        parser.add_argument('--sql_user',
                            dest='user',
                            nargs='+')
        parser.add_argument('--sql_password',
                            dest='password',
                            nargs='+')

    def handle(self, *args, **options):
        db = ms.connect(host=options['host'],
                        user=options['user'][0],
                        password=options['password'][0],
                        db='site')
        c = db.cursor(DictCursor)
41 42
        # First, let's get the data from presse table.
        # We're also grouping a lot of queries
43
        presse = c.execute("""SELECT *, nr.body as body, GROUP_CONCAT(DISTINCT t.name) as tags
44 45 46 47 48 49
                FROM presse p
                JOIN node n
                    ON n.nid = p.nid
                JOIN node_revisions nr
                    ON nr.vid = n.vid
                JOIN term_node tn
50
                    ON tn.vid = n.vid
51 52 53
                JOIN term_data t
                    ON t.tid = tn.tid
                GROUP BY n.nid""")
54

55
        print("Importing 0/{} from previous database".format(presse), end='\r')
56 57
        # And here we go
        done = 0
58
        errors = 0
59 60
        for item in c.fetchall():
            done += 1
61 62 63
            print("Importing {}/{} from previous database ({} errors for now)".format(done,
                                                                  presse, errors), end='\r')
            # Fetch the article, or creates it
64 65 66 67 68 69 70 71 72 73 74
            article = Article.add_new_url(url=item['url'])
            if item['lang'] != "":
                article.lang = item['lang']
            article.published_at = item['date_publi']
            article.title = item['title']

            # Let's extract the website from the title
            website = re.search(r'\[(.*)]', item['title'])
            if website:
                article.website = website.group(1)

75
            # Raise the score if needed
76 77 78 79 80 81 82
            if item['note'] > 0:
                article.und_score_up = item['note']
            if item['note'] < 0:
                article.und_score_down = abs(item['note'])
            article.save()
            article.refresh_from_db()

83
            # Insert tags
84
            article.tags.set(*item['tags'].lower().split(','))
85
            # Publish or draft as needed
86 87
            if item['published'] >= 1:
                # Let's get the extracts
88
                article.extracts = item['body']
89 90 91
                try:
                    article.fetch_content()
                    article.fetch_image()
92
                    article.fetch_metadata()
93
                except Exception:
94
                    errors += 1
95 96 97 98 99 100 101 102
                if article.status not in ("DRAFT", "PUBLISHED", ):
                    article.recover()

            if item['published'] >= 2:
                if article.status != "PUBLISHED":
                    article.publish()

            article.save()