import_old_rp.py 3.46 KB
Newer Older
Okhin's avatar
Okhin committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
import re

import MySQLdb as ms
from MySQLdb.cursors import DictCursor
from django.core.management.base import BaseCommand

from rp.models import Article


class Command(BaseCommand):
    help = """
    Import data from the old press review. Should only be used by
    LQDN staff since the dataformat of the old website only make
    sense in their context.

    It takes three arguments:
        --host hostname
        --sql_user username
        --sql_password password
    host defaults to 127.0.0.1.
    """

    def add_arguments(self, parser):
        parser.add_argument('--host',
                            default='127.0.0.1',
                            dest='host',
                            nargs='?')
        parser.add_argument('--sql_user',
                            dest='user',
                            nargs='+')
        parser.add_argument('--sql_password',
                            dest='password',
                            nargs='+')

    def handle(self, *args, **options):
        db = ms.connect(host=options['host'],
                        user=options['user'][0],
                        password=options['password'][0],
                        db='site')
        c = db.cursor(DictCursor)
        # First, let's get the data from presse table
        presse = c.execute("SELECT * FROM presse")

44
        print("Importing 0/{} from previous database\r".format(presse))
Okhin's avatar
Okhin committed
45 46
        # And here we go
        done = 0
47
        errors = 0
Okhin's avatar
Okhin committed
48 49
        for item in c.fetchall():
            done += 1
50 51 52
            print("Importing {}/{} from previous database - {} errors\r".format(done,
                                                                  presse,
                                                                  errors))
Okhin's avatar
Okhin committed
53 54 55 56 57 58 59 60 61
            c.execute("SELECT nid, vid FROM node WHERE nid=%s", (item['nid'],))
            node = c.fetchone()
            if node is None:
                continue

            c.execute("SELECT body FROM node_revisions WHERE vid=%s",
                      (node['vid'],))
            revision = c.fetchone()
            if revision is None:
62
                errors += 1
Okhin's avatar
Okhin committed
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
                continue

            # Récupérons l'article si il existe en base
            article = Article.add_new_url(url=item['url'])
            if item['lang'] != "":
                article.lang = item['lang']
            article.published_at = item['date_publi']
            article.title = item['title']

            # Let's extract the website from the title
            website = re.search(r'\[(.*)]', item['title'])
            if website:
                article.website = website.group(1)

            # Augmentons le score si nécessaire
            if item['note'] > 0:
                article.und_score_up = item['note']
            if item['note'] < 0:
                article.und_score_down = abs(item['note'])
            article.save()
            article.refresh_from_db()

            if item['published'] >= 1:
                # Let's get the extracts
                article.extracts = revision['body']
                try:
                    article.fetch_content()
                    article.fetch_image()
91
                    article.fetch_metadata()
Okhin's avatar
Okhin committed
92
                except Exception:
93
                    errors += 1
Okhin's avatar
Okhin committed
94 95 96 97 98 99 100 101
                if article.status not in ("DRAFT", "PUBLISHED", ):
                    article.recover()

            if item['published'] >= 2:
                if article.status != "PUBLISHED":
                    article.publish()

            article.save()