easydb documentation

#!/usr/bin/python
# coding=utf8

import sqlite3
import json
import datetime
import sys

# import the helper functions from easydb-migration-tools
# sys.path.append('../easydb-migration-tools/json_migration/')
sys.path.append('/home/pf/migration/easydb-migration/easydb-migration-tools/json_migration/') # xxx
import migration_util


# helper function
# convert the date format from the CSV file (d.m.yyyy) to the valid easydb format (yyyy-mm-dd)
def format_date(input):

    # try to parse the input string into a datetime object and format it
    try:
        d = datetime.datetime.strptime(input, '%d.%m.%Y')
        return migration_util.datetime_to_date(d)
    except Exception as e:
        print 'Datetime format error:', e
        return None


# helper function
# dump the json object with `payload_data` in the file `payload_path`
def save_payload(payload_path, payload_data):
    payload_file = open(payload_path, 'w')
    payload_file.write(json.dumps(payload_data, indent=2))
    payload_file.close()


def tags_payload(payload_path):

    # create a simple hard coded payload that includes the tag

    payload = {
        'import_type': 'tags',
        'tags': [
            {
                '_tags': [
                    {
                        'tag': {
                            'displayname': {
                                'en-US': 'Public Access'
                            },
                            'displaytype': 'facet',
                            'enabled': True,
                            'frontend_prefs': {
                                'webfrontend': {
                                    'color': 'green',
                                    'icon': 'fa-eye'
                                }
                            },
                            'is_default': False,
                            'reference': 'public',
                            'sticky': False,
                            'type': 'individual'
                        }
                    }
                ],
                'taggroup': {
                    'displayname': {
                        'en-US': 'Tag Group 1'
                    },
                    'reference': 'taggroup1',
                    'type': 'checkbox'
                }
            }
        ]
    }

    # save the payload
    filename = 'basetype_tags.json'
    save_payload(payload_path + '/' + filename, payload)

    return filename


def pools_payload(payload_path):

    # create a simple hard coded payload that includes the pool

    payload = {
        'import_type': 'pool',
        'pools': [
            {
                '_basetype': 'pool',
                'pool': {
                    '_version': 1,
                    'lookup:_id_parent': {
                        'reference': 'system:root'
                    },
                    'name': {
                        'en-US': 'Migrated Objects'
                    },
                    'reference': 'migrated_objects'
                }
            }
        ]
    }

    # save the payload
    filename = 'basetype_pools.json'
    save_payload(payload_path + '/' + filename, payload)

    return filename


def orte_to_payloads(connection, payload_path, parent_id='', parent_lookup='', level=0):

    # array of ordered payload names that will be returned as the result
    payload_names = []

    # array of ordered payload names that were returned by a recursive call of this function
    new_payload_names = []

    # get a connection cursor
    cursor = connection.cursor()

    # perform select statements to get data from the sqlite
    # find all rows from "source.orte.csv" where the parent is the same as the given parent value
    result = migration_util.execute_statement(
        cursor=cursor,
        sql="""
            SELECT id, name
            FROM "source.orte.csv"
            WHERE parent = '%s'
        """ % parent_id)

    # list of the generated linked objects that are saved in the payload for the current level
    orte_objects = []

    # iterate over all rows from the result and generate payloads
    # each row is a tuple with the values for the columns id and name
    for row in result:

        # get the values from the rows by using the column index
        id = row[0]
        name = row[1]

        # generate and append the payload
        object = {
            '_objecttype': 'orte',
            '_mask': '_all_fields',
            'orte': {
                '_version': 1,
                'name': name
            }
        }

        # if there is a parent, include the lookup for the parent id
        if len(parent_lookup) > 0:
            object['orte']['lookup:_id_parent'] = {
                'name': parent_lookup
            }

        print 'orte: level %d: %s' % (level, name)

        orte_objects.append(object)

        # recursively call the function for the children of the current object
        new_payload_names = orte_to_payloads(connection, payload_path, parent_id=id, parent_lookup=name, level=level + 1)

    # close the connection cursor
    cursor.close()

    # if there are generated objects for the payload, generate and save this payload file
    if len(orte_objects) > 0:
        payload = {
            'import_type': 'db',
            'objecttype': 'orte',
            'objects': orte_objects
        }

        # generate a filename that includes the objecttype and the current level
        filename = 'userobject-orte-level-%d.json' % level

        # dump the dict object with the payload data as a json object and save it in the file
        save_payload(payload_path + '/' + filename, payload)

        # the returned array must have the current payload name as the first element
        payload_names = [filename]

    # to keep the order, add the ordered payload names from all children
    for p in new_payload_names:
        payload_names.append(p)

    return payload_names


def linked_objects_payloads(connection, payload_path):

    # array of ordered payload names that will be returned as the result
    payload_names = []

    # perform select statements to get data from the sqlite
    cursor = connection.cursor()
    result = migration_util.execute_statement(
        cursor=cursor,
        sql="""
            SELECT fotograf, schlagworte
            FROM "source.bilder.csv"
        """)
    cursor.close()

    personen = set()
    schlagwoerter = set()

    for row in result:
        # different columns contain the values for different linked objects that are later linked into the main objects

        # column 'fotograf' (index 0) -> objecttype personen
        text = row[0]
        if len(text) > 0:
            personen.add(text)

        # column 'schlagworte' (index 1) -> objecttype schlagwoerter
        text = row[1]
        if len(text) > 0:
            # split multiple values
            values = text.split(';')
            for v in values:
                v = v.strip()
                if len(v) > 0:
                    schlagwoerter.add(v)

    # generate payloads from the collected values

    if len(personen) > 0:

        payload = {
            'import_type': 'db',
            'objecttype': 'personen',
            'objects': []
        }

        for p in personen:
            payload['objects'].append({
                '_objecttype': 'personen',
                '_mask': '_all_fields',
                'personen': {
                    '_version': 1,
                    'name': p
                }
            })

            print 'personen: %s' % p

        # dump the dict object with the payload data as a json object and save it in the file
        filename = 'userobject-personen.json'
        save_payload(payload_path + '/' + filename, payload)

        # add the filename to the payload list
        payload_names.append(filename)

    if len(schlagwoerter) > 0:

        payload = {
            'import_type': 'db',
            'objecttype': 'schlagwoerter',
            'objects': []
        }

        for s in schlagwoerter:
            payload['objects'].append({
                '_objecttype': 'schlagwoerter',
                '_mask': '_all_fields',
                'schlagwoerter': {
                    '_version': 1,
                    'name': s
                }
            })

            print 'schlagwoerter: %s' % s

        # dump the dict object with the payload data as a json object and save it in the file
        filename = 'userobject-schlagwoerter.json'
        save_payload(payload_path + '/' + filename, payload)

        # add the filename to the payload list
        payload_names.append(filename)

    return payload_names


def main_objects_payloads(connection, payload_path):

    # array of ordered payload names that will be returned as the result
    payload_names = []

    # objects for the list in the payload
    objects = []

    # perform select statements to get data from the sqlite
    cursor = connection.cursor()
    result = migration_util.execute_statement(
        cursor=cursor,
        sql="""
            SELECT inventarnummer,
                "date-from", "date-to",
                "title-de", "title-en",
                "date",
                image,
                ort, fotograf, schlagworte,
                public = 'true',
                __source_unique_id
            FROM "source.bilder.csv"
            WHERE inventarnummer != ''
        """)
    cursor.close()

    for row in result:

        # main object of objecttype `bilder`, reverse linked in objecttype `objekte`
        # create two objects from one row

        ##################################

        # structure for `bilder` object
        # this object will be included in the reverse nested table in the `objekte` object

        # use the __source_unique_id (index 10) to build a unique reference for the object

        reference = 'bild_' + row[11].strip()
        print 'bilder:', reference

        obj_bilder = {
            '_pool': {
                'pool': {
                    'lookup:_id': {
                        'reference': 'migrated_objects'
                    }
                }
            },
            '_version': 1,
            'reference': reference
        }

        # fill `bilder` object with data
        # for every text in the result row, strip the text to remove trailing whitespaces
        # only use the data if the text is not an empty string (since there is no NULL)

        # bilder.titel (index 3, 4)
        # l10n field, build object with values for different languages

        title_de = row[3].strip()
        title_en = row[4].strip()

        obj_bilder['titel'] = {
            'de-DE': title_de if len(title_de) else None,
            'en-US': title_en if len(title_en) else None
        }

        # bilder.aufnahmedatum (index 5)
        # date field, build object with date value

        obj_bilder['aufnahmedatum'] = {
            'value': format_date(row[5].strip()) # helper function to format the date
        }

        # bilder.datei (index 6)
        # asset field, build array with one object with asset url

        url = row[6].strip()
        if len(url) > 0:
            obj_bilder['datei'] = [
                {
                    'eas:url': url,
                    'preferred': True
                }
            ]

        # bilder.aufnahmeort (index 7)
        # linked objekt of objecttype `orte`, lookup by name

        ort = row[7].strip()
        if len(ort) > 0:
            obj_bilder['aufnahmeort'] = {
                '_mask': '_all_fields',
                '_objecttype': 'orte',
                'orte': {
                    'lookup:_id': {
                        'name': ort
                    }
                }
            }

        # bilder.personen (index 8)
        # linked objekt of objecttype `personen` in nested table, lookup by name

        person = row[8].strip()
        if len(person) > 0:
            obj_bilder['_nested:bilder__personen'] = [
                {
                    'bemerkung': 'Fotograf',
                    'person': {
                        '_mask': '_all_fields',
                        '_objecttype': 'personen',
                        'personen': {
                            'lookup:_id': {
                                'name': person
                            }
                        }
                    }
                }
            ]

        # bilder.schlagwoerter (index 9)
        # linked objekts of objecttype `schlagwoerter` in nested table, lookup by name
        # multiple values are separated by `;`, split the text, insert one linked object for each value

        obj_bilder['_nested:bilder__schlagwoerter'] = []

        schlagwoerter = row[9].strip()
        if len(schlagwoerter) > 0:
            values = schlagwoerter.split(';')
            for v in values:
                v = v.strip()

                if len(v) > 0:
                    # insert a new linked object with a lookup for this value
                    obj_bilder['_nested:bilder__schlagwoerter'].append(
                        {
                            'schlagwort': {
                                '_mask': '_all_fields',
                                '_objecttype': 'schlagwoerter',
                                'schlagwoerter': {
                                    'lookup:_id': {
                                        'name': v
                                    }
                                }
                            }
                        }
                    )

        ##################################

        # structure for `objekte` object
        obj_objekte = {
            '_mask': '_all_fields',
            '_objecttype': 'objekte',
            '_tags': [],
            'objekte': {
                '_pool': {
                    'pool': {
                        'lookup:_id': {
                            'reference': 'migrated_objects'
                        }
                    }
                },
                '_version': 1,
                # directly include the `bilder` object that was just created
                '_reverse_nested:bilder:objekte': [
                    obj_bilder
                ]
            }
        }

        # fill `objekte` object with data

        # check if the row is marked as `public`, if so add the tag
        # column: index 10, the return value here is a bool (see SELECT statement)

        if row[10]:
            obj_objekte['_tags'].append(
                {
                    'lookup:_id': {
                        'reference': 'public'
                    }
                }
            )

        # objekte.inventarnummer (index 0)
        # simple text field

        obj_objekte['objekte']['inventarnummer'] = row[0].strip()

        print 'objekte:', obj_objekte['objekte']['inventarnummer']

        # objekte.datierung (index 1, 2)
        # date range field, build object with two date values
        # start value (from): index 1
        # end value (to): index 2

        obj_objekte['objekte']['datierung'] = {
            'from': format_date(row[1].strip()), # helper function to format the date
            'to': format_date(row[2].strip()) # helper function to format the date
        }

        # append the generated object to the list of objects in the payload
        objects.append(obj_objekte)

    # build the payload with the objects

    if len(objects) > 0:
        payload = {
            'import_type': 'db',
            'objecttype': 'objekte',
            'objects': objects
        }

        # save the payload in a file and add the filename to the return value array
        filename = 'userobject-objekte-1.json'
        save_payload(payload_path + '/' + filename, payload)

        payload_names.append(filename)

    return payload_names


if __name__ == "__main__":

    payload_path = 'generated_payloads'

    payloads = []

    # connect to the sqlite database
    connection = migration_util.connect_to_sqlite('migration_data.sqlite')

    # basetype tag
    payloads.append(tags_payload(payload_path))

    # basetype pool
    payloads.append(pools_payload(payload_path))

    # read the hierarchic place names from orte.csv
    payloads += orte_to_payloads(connection, payload_path)

    # read the main table and generate payloads for all collected linked objects
    payloads += linked_objects_payloads(connection, payload_path)

    # read the main table again and generate payloads for main objects
    payloads += main_objects_payloads(connection, payload_path)

    # create the manifest
    manifest = {
        'source': 'Example Migration',
        'batch_size': 100,
        'eas_type': 'url',
        'payloads': payloads
    }

    # save the manifest file
    manifest_file = open(payload_path + '/manifest.json', 'w')
    manifest_file.write(json.dumps(manifest, indent=2))
    manifest_file.close()

    # close the connection to the sqlite database
    connection.close()