import_old_rp.py 3.28 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
import re

import MySQLdb as ms
from MySQLdb.cursors import DictCursor
from django.core.management.base import BaseCommand

from rp.models import Article


class Command(BaseCommand):
    help = """
    Import data from the old press review. Should only be used by
    LQDN staff since the dataformat of the old website only make
    sense in their context.

    It takes three arguments:
        --host hostname
        --sql_user username
        --sql_password password
    host defaults to 127.0.0.1.
    """

    def add_arguments(self, parser):
        parser.add_argument('--host',
                            default='127.0.0.1',
                            dest='host',
                            nargs='?')
        parser.add_argument('--sql_user',
                            dest='user',
                            nargs='+')
        parser.add_argument('--sql_password',
                            dest='password',
                            nargs='+')

    def handle(self, *args, **options):
        db = ms.connect(host=options['host'],
                        user=options['user'][0],
                        password=options['password'][0],
                        db='site')
        c = db.cursor(DictCursor)
        # First, let's get the data from presse table
        presse = c.execute("SELECT * FROM presse")

        print("Importing 0/{} from previous database".format(presse))
        # And here we go
        done = 0
        for item in c.fetchall():
            done += 1
            print("Importing {}/{} from previous database".format(done,
                                                                  presse))
            c.execute("SELECT nid, vid FROM node WHERE nid=%s", (item['nid'],))
            node = c.fetchone()
            if node is None:
                continue

            c.execute("SELECT body FROM node_revisions WHERE vid=%s",
                      (node['vid'],))
            revision = c.fetchone()
            if revision is None:
                continue

            # Récupérons l'article si il existe en base
            article = Article.add_new_url(url=item['url'])
            if item['lang'] != "":
                article.lang = item['lang']
            article.published_at = item['date_publi']
            article.title = item['title']

            # Let's extract the website from the title
            website = re.search(r'\[(.*)]', item['title'])
            if website:
                article.website = website.group(1)

            # Augmentons le score si nécessaire
            if item['note'] > 0:
                article.und_score_up = item['note']
            if item['note'] < 0:
                article.und_score_down = abs(item['note'])
            article.save()
            article.refresh_from_db()

            if item['published'] >= 1:
                # Let's get the extracts
                article.extracts = revision['body']
                try:
                    article.fetch_content()
                    article.fetch_image()
                except Exception:
                    pass
                if article.status not in ("DRAFT", "PUBLISHED", ):
                    article.recover()

            if item['published'] >= 2:
                if article.status != "PUBLISHED":
                    article.publish()

            article.save()