From f21e89898f01544cf99b87faab17aa6335528cc7 Mon Sep 17 00:00:00 2001
From: kieranjol <kieran.o.leary@gmail.com>
Date: Sun, 30 Jul 2017 14:01:32 +0100
Subject: [PATCH 01/31] creates premis CSV proof of concept scripts

---
 README.md        |  35 +++++++---
 premiscsv.py     | 117 ++++++++++++++++++++++++++++++++
 premisobjects.py | 170 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 314 insertions(+), 8 deletions(-)
 create mode 100755 premiscsv.py
 create mode 100755 premisobjects.py

diff --git a/README.md b/README.md
index 9c74f4a..c9688c2 100644
--- a/README.md
+++ b/README.md
@@ -7,22 +7,25 @@ table of contents
 1. [summary](https://github.com/kieranjol/IFIscripts#summary)
 2. [Arrangement](https://github.com/kieranjol/IFIscripts#arrangement)
     * [sipcreator.py](https://github.com/kieranjol/IFIscripts#sipcreator)
-3. [Transcodes](https://github.com/kieranjol/IFIscripts#transcodes)
+3. [PREMIS](https://github.com/kieranjol/IFIscripts#PREMIS)
+    * [premisobjects.py](https://github.com/kieranjol/IFIscripts#premisobjectspy)
+    * [premiscsv.py](https://github.com/kieranjol/IFIscripts#premiscsvpy)
+4. [Transcodes](https://github.com/kieranjol/IFIscripts#transcodes)
     * [makeffv1.py](https://github.com/kieranjol/IFIscripts#makeffv1py)
     * [bitc.py](https://github.com/kieranjol/IFIscripts#bitcpy)
     * [prores.py](https://github.com/kieranjol/IFIscripts#prorespy)
     * [concat.py](https://github.com/kieranjol/IFIscripts#concatpy)
-4. [Digital Cinema Package Scripts](https://github.com/kieranjol/IFIscripts#digital-cinema-package-scripts)
+5. [Digital Cinema Package Scripts](https://github.com/kieranjol/IFIscripts#digital-cinema-package-scripts)
 	* [dcpaccess.py](https://github.com/kieranjol/IFIscripts#dcpaccesspy)
     * [dcpfixity.py](https://github.com/kieranjol/IFIscripts#dcpfixitypy)
     * [dcpsubs2srt.py](https://github.com/kieranjol/IFIscripts#dcpsubs2srtpy)
-5. [Fixity Scripts](https://github.com/kieranjol/IFIscripts#fixity-scripts)
+6. [Fixity Scripts](https://github.com/kieranjol/IFIscripts#fixity-scripts)
     * [copyit.py](https://github.com/kieranjol/IFIscripts#copyitpy)
     * [manifest.py](https://github.com/kieranjol/IFIscripts#manifestpy)
     * [sha512deep.py](https://github.com/kieranjol/IFIscripts#sha512deeppy)
     * [validate.py](https://github.com/kieranjol/IFIscripts#validatepy)
     * [batchfixity.py](https://github.com/kieranjol/IFIscripts#batchfixitypy)
-6. [Image Sequences](https://github.com/kieranjol/IFIscripts#image-sequences)
+7. [Image Sequences](https://github.com/kieranjol/IFIscripts#image-sequences)
     * [makedpx.py](https://github.com/kieranjol/IFIscripts#makedpxpy)
     * [seq2ffv1.py](https://github.com/kieranjol/IFIscripts#seq2ffv1py)
     * [seq2prores.py](https://github.com/kieranjol/IFIscripts#seq2prorespy)
@@ -33,19 +36,19 @@ table of contents
     * [seq2dv.py](https://github.com/kieranjol/IFIscripts#seq2dvpy)
     * [batchmetadata.py](https://github.com/kieranjol/IFIscripts#batchmetadata)
 	* [batchrename.py](https://github.com/kieranjol/IFIscripts#batchrename)
-7. [Quality Control](https://github.com/kieranjol/IFIscripts#quality-control)
+8. [Quality Control](https://github.com/kieranjol/IFIscripts#quality-control)
     * [qctools.py](https://github.com/kieranjol/IFIscripts#qctoolspy)
-8. [Specific Workflows](https://github.com/kieranjol/IFIscripts#specific-workflows)
+9. [Specific Workflows](https://github.com/kieranjol/IFIscripts#specific-workflows)
     * [mezzaninecheck.py](https://github.com/kieranjol/IFIscripts#mezzaninecheckpy)
     * [loopline.py](https://github.com/kieranjol/IFIscripts#looplinepy)
     * [masscopy.py](https://github.com/kieranjol/IFIscripts#masscopypy)
     * [dvsip.py](https://github.com/kieranjol/IFIscripts#dvsippy)
-9. [Misc](https://github.com/kieranjol/IFIscripts#misc)
+10. [Misc](https://github.com/kieranjol/IFIscripts#misc)
     * [update.py](https://github.com/kieranjol/IFIscripts#updatepy)
     * [giffer.py](https://github.com/kieranjol/IFIscripts#gifferpy)
     * [makeuuid.py](https://github.com/kieranjol/IFIscripts#makeuuidpy)
     * [durationcheck.py](https://github.com/kieranjol/IFIscripts#durationcheck.py)
-10. [Experimental-Premis](https://github.com/kieranjol/IFIscripts#experimental-premis)
+11. [Experimental-Premis](https://github.com/kieranjol/IFIscripts#experimental-premis)
     * [premis.py](https://github.com/kieranjol/IFIscripts#premispy)
     * [revtmd.py](https://github.com/kieranjol/IFIscripts#revtmdpy)
     * [as11fixity.py](https://github.com/kieranjol/IFIscripts#as11fixitypy)
@@ -68,6 +71,22 @@ Note: Documentation template has been copied from [mediamicroservices](https://g
 * Usage for more than one directory - `sipcreator.py -i /path/to/directory_name1 /path/to/directory_name2 -o /path/to/output_folder`
 * Run `sipcreator.py -h` for all options.
 
+## PREMIS ##
+
+### premisobjects.py ###
+* Creates a somewhat PREMIS compliant CSV file describing objects in a package. A seperate script will need to be written in order to transform these CSV files into XML.
+* As the flat CSV structure prevents maintaining some of the complex relationships between units, some semantic units have been merged, for example:`relationship_structural_includes` is really a combination of the `relationshipType` and `relationshipSubType` units, which each have the values: `Structural` and `Includes` respectively.
+* Assumptions for now: representation UUID already exists as part of the SIP/AIP folder structure. Find a way to supply this, probably via argparse.
+* For more information, run `pydoc premisobjects `
+* Usage: `premiscsv.py path/to/SIP path/to/manifest.md5`
+
+### premiscsv.py ###
+* Extracts preservation events from an IFI plain text log file and converts to a CSV using the PREMIS data dictionary.
+* For more information, run `pydoc premiscsv`
+* Usage: - `premiscsv.py path/to/logfile.log`
+
+
+
 ## Transcodes ##
 
 ### makeffv1.py ###
diff --git a/premiscsv.py b/premiscsv.py
new file mode 100755
index 0000000..8ee66b8
--- /dev/null
+++ b/premiscsv.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python
+'''
+Extracts preservation events from an IFI plain text log file and converts
+to a CSV using the PREMIS data dictionary
+'''
+import os
+import sys
+# from lxml import etree
+import ififuncs
+def find_events(logfile):
+    '''
+    A very hacky attempt to extract the relevant preservation events from our
+    log files.
+    '''
+    sip_test = os.path.basename(logfile).replace('_sip_log.log', '')
+    if ififuncs.validate_uuid4(sip_test) != False:
+        linking_object_identifier_value = sip_test
+    with open(logfile, 'r') as logfile_object:
+        log_lines = logfile_object.readlines()
+    for event_test in log_lines:
+        if 'eventDetail=copyit.py' in event_test:
+            logsplit = event_test.split(',')
+            for line_fragment in logsplit:
+                manifest_event = line_fragment.replace(
+                    'eventDetail', ''
+                ).replace('\n', '').split('=')[1]
+    for log_entry in log_lines:
+        valid_entries = [
+            'eventType',
+            'eventDetail=sipcreator.py',
+            'eventDetail=Mediatrace',
+            'eventDetail=Technical',
+            'eventDetail=copyit.py'
+        ]
+        for entry in valid_entries:
+            if entry in log_entry:
+                break_loop = ''
+                event_outcome = ''
+                event_detail = ''
+                event_outcome_detail_note = ''
+                event_type = ''
+                event_row = []
+                datetime = log_entry[:19]
+                logsplit = log_entry.split(',')
+                for line_fragment in logsplit:
+                    if 'eventType' in line_fragment:
+                        if 'EVENT =' in line_fragment:
+                            line_fragment = line_fragment.split('EVENT =')[1]
+                        event_type = line_fragment.replace(
+                            ' eventType=', ''
+                        ).replace('assignement', 'assignment')
+                    if ' value' in line_fragment:
+                        # this assumes that the value is the outcome of an identifier assigment.
+                        event_outcome = line_fragment[7:].replace('\n', '')
+                    # we are less concerned with events starting.
+                    if 'status=started' in line_fragment:
+                        break_loop = 'continue'
+                    if 'Generating destination manifest:' in line_fragment:
+                        break_loop = ''
+                        event_detail = manifest_event
+                    # ugh, this might run multiple times.
+                    if 'eventDetail=sipcreator.py' in log_entry:
+                        event_type = 'Information Package Creation'
+                        event_detail = line_fragment.replace(
+                            'eventDetail', ''
+                        ).replace('\n', '').split('=')[1]
+                        event_outcome_detail_note = 'Submission Information Package'
+                    if ('eventDetail=Mediatrace' in log_entry) or ('eventDetail=Technical' in log_entry):
+                        event_type = 'metadata extraction'
+                        event_outcome = log_entry.split(
+                            'eventOutcome=', 1
+                        )[1].replace(', agentName=mediainfo', '').replace('\n', '')
+                        if 'eventDetail=Mediatrace' in log_entry:
+                            event_outcome = event_outcome.replace('mediainfo.xml', 'mediatrace.xml')
+                if (break_loop == 'continue') or (event_type == ''):
+                    continue
+                print event_type
+                event_row = [
+                    'UUID', ififuncs.create_uuid(),
+                    event_type, datetime, event_detail,
+                    '',
+                    event_outcome, '',
+                    event_outcome_detail_note, '',
+                    '', '',
+                    '', 'UUID',
+                    linking_object_identifier_value, ''
+                ]
+                ififuncs.append_csv('bla.csv', event_row)
+
+def make_events_csv():
+    '''
+    Generates a CSV with PREMIS-esque headings. Currently it's just called
+    'bla.csv' but it will probably be called:
+    UUID_premisevents.csv
+    and sit in the metadata directory.
+    '''
+    premis_events = [
+        'eventIdentifierType', 'eventIdentifierValue',
+        'eventType', 'eventDateTime', 'eventDetail',
+        'eventDetailExtension',
+        'eventOutcome',	'eventOutcomeDetail',
+        'eventOutcomeDetailNote', 'eventOutcomeDetailExtension',
+        'linkingAgentIdentifierType', 'linkingAgentIdentifierValue',
+        'linkingAgentIdentifierRole', 'linkingObjectIdentifierType',
+        'linkingObjectIdentifierValue', 'linkingObjectRole'
+    ]
+    ififuncs.create_csv('bla.csv', premis_events)
+
+def main():
+    '''
+    Launches all the other functions when run from the command line.
+    '''
+    logfile = sys.argv[1]
+    find_events(logfile)
+
+if __name__ == '__main__':
+    main()
diff --git a/premisobjects.py b/premisobjects.py
new file mode 100755
index 0000000..a534280
--- /dev/null
+++ b/premisobjects.py
@@ -0,0 +1,170 @@
+#!/usr/bin/env python
+'''
+Creates a somewhat PREMIS compliant CSV file describing objects in a package.
+A seperate script will need to be written in order to transform these
+CSV files into XML.
+As the flat CSV structure prevents maintaining some of the complex
+relationships between units, some semantic units have been merged, for example:
+relation_structural_includes is really a combination of the
+relationshipType and relationshipSubType units, which each have the values:
+Structural and Includes respectively.
+
+todo:
+Document identifier assignment for files and IE. Probably in events sheet?
+Allow for derivation to be entered
+Link with events sheet
+Link mediainfo xml in /metadata to the objectCharacteristicsExtension field.
+
+
+Assumptions for now: representation UUID already exists as part of the
+SIP/AIP folder structure. Find a way to supply this, probably via argparse.
+'''
+
+import os
+import sys
+import ififuncs
+
+
+def get_checksum(manifest, filename):
+    '''
+    Extracts checksum from manifest, rather than generating a fresh one.
+    '''
+    if os.path.isfile(manifest):
+        with open(manifest, 'r') as manifest_object:
+            manifest_lines = manifest_object.readlines()
+            for md5 in manifest_lines:
+                if filename in md5:
+                    return md5[:32]
+
+
+def make_skeleton_csv():
+    '''
+    Generates a CSV with PREMIS-esque headings. Currently it's just called
+    'cle.csv' but it will probably be called:
+    UUID_premisobjects.csv
+    and sit in the metadata directory.
+    '''
+    premis_object_units = [
+        'objectIdentifier',
+        'objectCategory',
+        'messageDigestAlgorithm', 'messageDigest', 'messageDigestOriginator',
+        'size',	'formatName', 'formatVersion',
+        'objectCharacteristicsExtension', 'originalName',
+        'contentLocationType', 'contentLocationValue',
+        'relatedObjectIdentifierType', 'relatedObjectIdentifierValue',
+        'relatedObjectSequence',
+        'relatedEventIdentifierType', 'relatedEventIdentifierValue',
+        'relatedEventSequence',
+        'linkingEventIdentifierType', 'linkingEventIdentifierValue',
+        'relationship_structural_includes',
+        'relationship_structural_isincludedin',
+        'relationship_structural_represents',
+        'relationship_structural_hasroot',
+        'relationship_derivation_hassource'
+    ]
+    ififuncs.create_csv('cle.csv', premis_object_units)
+
+
+def file_description(source, manifest, representation_uuid):
+    '''
+    Generate PREMIS descriptions for items and write to CSV.
+    '''
+    item_ids = []
+    for root, _, filenames in os.walk(source):
+        if os.path.basename(root) == 'objects':
+            filenames = [f for f in filenames if f[0] != '.']
+            for item in filenames:
+                item_uuid = ififuncs.create_uuid()
+                full_path = os.path.join(root, item)
+                item_dictionary = {}
+                item_dictionary['objectIdentifier'] = ['UUID', item_uuid]
+                item_dictionary['objectCategory'] = 'file'
+                item_dictionary['size'] = str(os.path.getsize(full_path))
+                item_dictionary['originalName'] = item
+                item_dictionary['relationship_structural_isincludedin'] = representation_uuid
+                item_ids.append(item_uuid)
+                file_data = [
+                    item_dictionary['objectIdentifier'],
+                    item_dictionary['objectCategory'],
+                    'md5', get_checksum(manifest, item), 'internal',
+                    item_dictionary['size'], '', '',
+                    '', '',
+                    '', '',
+                    '', '',
+                    '',
+                    '', '',
+                    '',
+                    '', '',
+                    '',
+                    item_dictionary['relationship_structural_isincludedin'],
+                    '',
+                    '',
+                    ''
+                ]
+                ififuncs.append_csv('cle.csv', file_data)
+    return item_ids
+def representation_description(representation_uuid, item_ids):
+    '''
+    Generate PREMIS descriptions for a representation and write to CSV.
+    '''
+
+    representation_dictionary = {}
+    representation_dictionary['objectIdentifier'] = ['UUID', representation_uuid]
+    representation_dictionary['objectCategory'] = 'representation'
+    representation_dictionary['relationship_structural_includes'] = ''
+    for item_id in item_ids:
+        representation_dictionary['relationship_structural_includes'] += item_id + '|'
+    representation_data = [
+        representation_dictionary['objectIdentifier'],
+        representation_dictionary['objectCategory'],
+        '', '', '',
+        '', '', '',
+        '', '',
+        '', '',
+        '', '',
+        '',
+        '', '',
+        '',
+        '', '',
+        representation_dictionary['relationship_structural_includes'],
+        '',
+        '',
+        '',
+        ''
+    ]
+    ififuncs.append_csv('cle.csv', representation_data)
+
+
+def intellectual_entity_description():
+    '''
+    Generate PREMIS descriptions for Intellectual Entities and write to CSV.
+    '''
+    intellectual_entity_dictionary = {}
+    intellectual_entity_dictionary['objectIdentifier'] = ['UUID', ififuncs.create_uuid()]
+    intellectual_entity_dictionary['objectCategory'] = 'intellectual entity'
+    print intellectual_entity_dictionary
+def find_representation_uuid(source):
+    '''
+    This extracts the representation UUID from a directory name.
+    This should be moved to ififuncs as it can be used by other scripts.
+    '''
+    for root, _, _ in os.walk(source):
+        if 'objects' in root:
+            return os.path.basename(os.path.dirname(root))
+
+
+def main():
+    '''
+    Launches all the other functions when run from the command line.
+    '''
+    make_skeleton_csv()
+    source = sys.argv[1]
+    manifest = sys.argv[2]
+    representation_uuid = find_representation_uuid(source)
+    item_ids = file_description(source, manifest, representation_uuid)
+    #intellectual_entity_description()
+    representation_description(representation_uuid, item_ids)
+
+if __name__ == '__main__':
+    main()
+

From 53aac7d6e2a6721076ce60b05a7d760a7e6a9ecd Mon Sep 17 00:00:00 2001
From: kieranjol <kieran.o.leary@gmail.com>
Date: Sun, 30 Jul 2017 22:39:48 +0100
Subject: [PATCH 02/31] adds premis csv to xml draft script

---
 premiscsv2xml.py | 71 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100755 premiscsv2xml.py

diff --git a/premiscsv2xml.py b/premiscsv2xml.py
new file mode 100755
index 0000000..c595f8d
--- /dev/null
+++ b/premiscsv2xml.py
@@ -0,0 +1,71 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import csv
+from lxml import etree
+
+def extract_metadata(csv_file):
+    object_dictionaries = []
+    input_file = csv.DictReader(open(csv_file))
+    for rows in input_file:
+        object_dictionaries.append(rows)
+    return object_dictionaries
+
+def add_value(value, element):
+    element.text = value
+
+
+def write_premis(doc, premisxml):
+    with open(premisxml, 'w') as outFile:
+        doc.write(outFile, pretty_print=True)
+
+
+def create_unit(index, parent, unitname):
+    premis_namespace = "http://www.loc.gov/premis/v3"
+    unitname = etree.Element("{%s}%s" % (premis_namespace, unitname))
+    parent.insert(index, unitname)
+    return unitname
+
+def setup_xml(object_dictionaries):
+    namespace = '<premis:premis xmlns:premis="http://www.loc.gov/premis/v3" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/premis/v3 https://www.loc.gov/standards/premis/premis.xsd" version="3.0"></premis:premis>'
+    premis_namespace = "http://www.loc.gov/premis/v3"
+    premis = etree.fromstring(namespace)
+    doc = etree.ElementTree(premis)
+    for objects in object_dictionaries:
+        id_list = objects['objectIdentifier'].replace(
+            '[', ''
+        ).replace(']', '').replace('\'', '').split(', ')
+        object_parent = create_unit(0, premis, 'object')
+        object_identifier_uuid = create_unit(2, object_parent, 'objectIdentifier')
+        object_identifier_uuid_type = create_unit(1, object_identifier_uuid, 'objectIdentifierType')
+        object_identifier_uuid_value = create_unit(2, object_identifier_uuid, 'objectIdentifierValue')
+        object_category = create_unit(4, object_parent, 'objectCategory')
+        add_value(id_list[0], object_identifier_uuid_type)
+        add_value(id_list[1], object_identifier_uuid_value)
+        add_value(objects['objectCategory'], object_category)
+        if objects['objectCategory'] == 'file':
+            object_characteristics = create_unit(10, object_parent, 'objectCharacteristics')
+            fixity = create_unit(0, object_characteristics, 'fixity')
+            size = create_unit(1, object_characteristics, 'size')
+            size.text = objects['size']
+            message_digest_algorithm = create_unit(0, fixity, 'messageDigestAlgorithm')
+            message_digest = create_unit(1, fixity, 'messageDigest')
+            message_digest_originator = create_unit(2, fixity, 'messageDigestOriginator')
+            message_digest_originator.text = objects['messageDigestOriginator']
+            message_digest.text = objects['messageDigest']
+            message_digest_algorithm.text = objects['messageDigestAlgorithm']
+    print(etree.tostring(doc, pretty_print=True))
+    return premis_namespace, doc, premis
+def main():
+    csv_file = sys.argv[1]
+    object_dictionaries = extract_metadata(csv_file)
+    setup_xml(object_dictionaries)
+    for x in object_dictionaries:
+        for i in x:
+            if x[i] != '':
+                print i, x[i]
+        print '\n'
+
+if __name__ == '__main__':
+    main()

From 88b94bea1a3f3adadc8967d94f668fb3d10b4be5 Mon Sep 17 00:00:00 2001
From: kieranjol <kieran.o.leary@gmail.com>
Date: Sun, 30 Jul 2017 22:56:58 +0100
Subject: [PATCH 03/31] premiscsv2xml - PEP-08 cleanup

---
 premiscsv2xml.py | 73 ++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 58 insertions(+), 15 deletions(-)

diff --git a/premiscsv2xml.py b/premiscsv2xml.py
index c595f8d..8483bb4 100755
--- a/premiscsv2xml.py
+++ b/premiscsv2xml.py
@@ -1,11 +1,15 @@
 #!/usr/bin/env python
-
-import os
+'''
+Takes a PREMIS CSV file, as generated by premiscsv.py, and transform into XML.
+'''
 import sys
 import csv
 from lxml import etree
 
 def extract_metadata(csv_file):
+    '''
+    Read the PREMIS csv and store the metadata in a list of dictionaries.
+    '''
     object_dictionaries = []
     input_file = csv.DictReader(open(csv_file))
     for rows in input_file:
@@ -13,21 +17,34 @@ def extract_metadata(csv_file):
     return object_dictionaries
 
 def add_value(value, element):
+    '''
+    Useless helper function - remove!
+    '''
     element.text = value
 
 
 def write_premis(doc, premisxml):
-    with open(premisxml, 'w') as outFile:
-        doc.write(outFile, pretty_print=True)
+    '''
+    Writes the PREMIS object to a file.
+    '''
+    with open(premisxml, 'w') as out_file:
+        doc.write(out_file, pretty_print=True)
 
 
 def create_unit(index, parent, unitname):
+    '''
+    Helper function that adds an XML element.
+    '''
     premis_namespace = "http://www.loc.gov/premis/v3"
     unitname = etree.Element("{%s}%s" % (premis_namespace, unitname))
     parent.insert(index, unitname)
     return unitname
 
 def setup_xml(object_dictionaries):
+    '''
+    This should just create the PREMIS lxml object.
+    Actual metadata generation should be moved to other functions.
+    '''
     namespace = '<premis:premis xmlns:premis="http://www.loc.gov/premis/v3" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/premis/v3 https://www.loc.gov/standards/premis/premis.xsd" version="3.0"></premis:premis>'
     premis_namespace = "http://www.loc.gov/premis/v3"
     premis = etree.fromstring(namespace)
@@ -36,28 +53,54 @@ def setup_xml(object_dictionaries):
         id_list = objects['objectIdentifier'].replace(
             '[', ''
         ).replace(']', '').replace('\'', '').split(', ')
-        object_parent = create_unit(0, premis, 'object')
-        object_identifier_uuid = create_unit(2, object_parent, 'objectIdentifier')
-        object_identifier_uuid_type = create_unit(1, object_identifier_uuid, 'objectIdentifierType')
-        object_identifier_uuid_value = create_unit(2, object_identifier_uuid, 'objectIdentifierValue')
-        object_category = create_unit(4, object_parent, 'objectCategory')
+        object_parent = create_unit(
+            0, premis, 'object'
+        )
+        object_identifier_uuid = create_unit(
+            2, object_parent, 'objectIdentifier'
+        )
+        object_identifier_uuid_type = create_unit(
+            1, object_identifier_uuid, 'objectIdentifierType'
+        )
+        object_identifier_uuid_value = create_unit(
+            2, object_identifier_uuid, 'objectIdentifierValue'
+        )
+        object_category = create_unit(
+            4, object_parent, 'objectCategory'
+        )
         add_value(id_list[0], object_identifier_uuid_type)
         add_value(id_list[1], object_identifier_uuid_value)
         add_value(objects['objectCategory'], object_category)
         if objects['objectCategory'] == 'file':
-            object_characteristics = create_unit(10, object_parent, 'objectCharacteristics')
-            fixity = create_unit(0, object_characteristics, 'fixity')
-            size = create_unit(1, object_characteristics, 'size')
+            object_characteristics = create_unit(
+                10, object_parent, 'objectCharacteristics'
+            )
+            fixity = create_unit(
+                0, object_characteristics, 'fixity'
+            )
+            size = create_unit(
+                1, object_characteristics, 'size'
+            )
             size.text = objects['size']
-            message_digest_algorithm = create_unit(0, fixity, 'messageDigestAlgorithm')
-            message_digest = create_unit(1, fixity, 'messageDigest')
-            message_digest_originator = create_unit(2, fixity, 'messageDigestOriginator')
+            message_digest_algorithm = create_unit(
+                0, fixity, 'messageDigestAlgorithm'
+            )
+            message_digest = create_unit(
+                1, fixity, 'messageDigest'
+            )
+            message_digest_originator = create_unit(
+                2, fixity, 'messageDigestOriginator'
+            )
             message_digest_originator.text = objects['messageDigestOriginator']
             message_digest.text = objects['messageDigest']
             message_digest_algorithm.text = objects['messageDigestAlgorithm']
     print(etree.tostring(doc, pretty_print=True))
     return premis_namespace, doc, premis
 def main():
+    '''
+    Launches all the other functions when run from the command line.
+    For debugging purposes, the contents of the CSV is printed to screen.
+    '''
     csv_file = sys.argv[1]
     object_dictionaries = extract_metadata(csv_file)
     setup_xml(object_dictionaries)

From 713d2aff3eaa1fbcc2af8d94edd7668202a22b72 Mon Sep 17 00:00:00 2001
From: kieranjol <kieran.o.leary@gmail.com>
Date: Mon, 31 Jul 2017 00:53:10 +0100
Subject: [PATCH 04/31] premiscsv2xml - removes objectCategory, move
 information to attribute in order to validate against schema

---
 premiscsv2xml.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/premiscsv2xml.py b/premiscsv2xml.py
index 8483bb4..8d85c45 100755
--- a/premiscsv2xml.py
+++ b/premiscsv2xml.py
@@ -47,6 +47,7 @@ def setup_xml(object_dictionaries):
     '''
     namespace = '<premis:premis xmlns:premis="http://www.loc.gov/premis/v3" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/premis/v3 https://www.loc.gov/standards/premis/premis.xsd" version="3.0"></premis:premis>'
     premis_namespace = "http://www.loc.gov/premis/v3"
+    xsi_namespace = "http://www.w3.org/2001/XMLSchema-instance"
     premis = etree.fromstring(namespace)
     doc = etree.ElementTree(premis)
     for objects in object_dictionaries:
@@ -56,6 +57,9 @@ def setup_xml(object_dictionaries):
         object_parent = create_unit(
             0, premis, 'object'
         )
+        object_parent.attrib[
+            "{%s}type" % xsi_namespace
+        ] = "premis:%s" % objects['objectCategory']
         object_identifier_uuid = create_unit(
             2, object_parent, 'objectIdentifier'
         )
@@ -65,12 +69,8 @@ def setup_xml(object_dictionaries):
         object_identifier_uuid_value = create_unit(
             2, object_identifier_uuid, 'objectIdentifierValue'
         )
-        object_category = create_unit(
-            4, object_parent, 'objectCategory'
-        )
         add_value(id_list[0], object_identifier_uuid_type)
         add_value(id_list[1], object_identifier_uuid_value)
-        add_value(objects['objectCategory'], object_category)
         if objects['objectCategory'] == 'file':
             object_characteristics = create_unit(
                 10, object_parent, 'objectCharacteristics'

From de5b4b025b22b203f5090f3db953c83e3534d36d Mon Sep 17 00:00:00 2001
From: kieranjol <kieran.o.leary@gmail.com>
Date: Mon, 31 Jul 2017 11:12:55 +0100
Subject: [PATCH 05/31] premiscsv2xml - performs recursive item search

---
 premiscsv2xml.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/premiscsv2xml.py b/premiscsv2xml.py
index 8d85c45..f761127 100755
--- a/premiscsv2xml.py
+++ b/premiscsv2xml.py
@@ -104,11 +104,12 @@ def main():
     csv_file = sys.argv[1]
     object_dictionaries = extract_metadata(csv_file)
     setup_xml(object_dictionaries)
+    '''
     for x in object_dictionaries:
         for i in x:
             if x[i] != '':
                 print i, x[i]
         print '\n'
-
+    '''
 if __name__ == '__main__':
     main()

From 1986b32696d0cf23986aedd0db378b579e7cfe49 Mon Sep 17 00:00:00 2001
From: kieranjol <kieran.o.leary@gmail.com>
Date: Mon, 31 Jul 2017 11:13:21 +0100
Subject: [PATCH 06/31] premisobjects - remove debug statements

---
 premisobjects.py | 63 ++++++++++++++++++++++++------------------------
 1 file changed, 32 insertions(+), 31 deletions(-)

diff --git a/premisobjects.py b/premisobjects.py
index a534280..1024344 100755
--- a/premisobjects.py
+++ b/premisobjects.py
@@ -72,36 +72,37 @@ def file_description(source, manifest, representation_uuid):
     item_ids = []
     for root, _, filenames in os.walk(source):
         if os.path.basename(root) == 'objects':
-            filenames = [f for f in filenames if f[0] != '.']
-            for item in filenames:
-                item_uuid = ififuncs.create_uuid()
-                full_path = os.path.join(root, item)
-                item_dictionary = {}
-                item_dictionary['objectIdentifier'] = ['UUID', item_uuid]
-                item_dictionary['objectCategory'] = 'file'
-                item_dictionary['size'] = str(os.path.getsize(full_path))
-                item_dictionary['originalName'] = item
-                item_dictionary['relationship_structural_isincludedin'] = representation_uuid
-                item_ids.append(item_uuid)
-                file_data = [
-                    item_dictionary['objectIdentifier'],
-                    item_dictionary['objectCategory'],
-                    'md5', get_checksum(manifest, item), 'internal',
-                    item_dictionary['size'], '', '',
-                    '', '',
-                    '', '',
-                    '', '',
-                    '',
-                    '', '',
-                    '',
-                    '', '',
-                    '',
-                    item_dictionary['relationship_structural_isincludedin'],
-                    '',
-                    '',
-                    ''
-                ]
-                ififuncs.append_csv('cle.csv', file_data)
+            for root, _, filenames in os.walk(root):
+                filenames = [f for f in filenames if f[0] != '.']
+                for item in filenames:
+                    item_uuid = ififuncs.create_uuid()
+                    full_path = os.path.join(root, item)
+                    item_dictionary = {}
+                    item_dictionary['objectIdentifier'] = ['UUID', item_uuid]
+                    item_dictionary['objectCategory'] = 'file'
+                    item_dictionary['size'] = str(os.path.getsize(full_path))
+                    item_dictionary['originalName'] = item
+                    item_dictionary['relationship_structural_isincludedin'] = representation_uuid
+                    item_ids.append(item_uuid)
+                    file_data = [
+                        item_dictionary['objectIdentifier'],
+                        item_dictionary['objectCategory'],
+                        'md5', get_checksum(manifest, item), 'internal',
+                        item_dictionary['size'], '', '',
+                        '', '',
+                        '', '',
+                        '', '',
+                        '',
+                        '', '',
+                        '',
+                        '', '',
+                        '',
+                        item_dictionary['relationship_structural_isincludedin'],
+                        '',
+                        '',
+                        ''
+                    ]
+                    ififuncs.append_csv('cle.csv', file_data)
     return item_ids
 def representation_description(representation_uuid, item_ids):
     '''
@@ -142,7 +143,7 @@ def intellectual_entity_description():
     intellectual_entity_dictionary = {}
     intellectual_entity_dictionary['objectIdentifier'] = ['UUID', ififuncs.create_uuid()]
     intellectual_entity_dictionary['objectCategory'] = 'intellectual entity'
-    print intellectual_entity_dictionary
+    #print intellectual_entity_dictionary
 def find_representation_uuid(source):
     '''
     This extracts the representation UUID from a directory name.

From 2af635607add7b86c0a8de1acf3dc810bbeca58c Mon Sep 17 00:00:00 2001
From: kieranjol <kieran.o.leary@gmail.com>
Date: Wed, 2 Aug 2017 22:54:50 +0100
Subject: [PATCH 07/31] ififuncs - adds pronom/siegfied function

---
 ififuncs.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/ififuncs.py b/ififuncs.py
index 385c6d1..d587049 100755
--- a/ififuncs.py
+++ b/ififuncs.py
@@ -12,6 +12,7 @@
 import uuid
 import tempfile
 import csv
+import json
 from glob import glob
 from email.mime.multipart import MIMEMultipart
 from email.mime.audio import MIMEAudio
@@ -774,3 +775,18 @@ def checksum_replace(manifest, logname):
     with open(manifest, 'wb') as fo:
         for lines in updated_manifest:
             fo.write(lines)
+
+def get_pronom_format(filename):
+    '''
+    Uses siegfried to return a tuple that contains:
+    pronom_id, authority, siegfried version
+    '''
+    siegfried_json = subprocess.check_output(
+        ['sf', '-json', filename]
+    )
+    json_object = json.loads(siegfried_json)
+    pronom_id = str(json_object['files'][0]['matches'][0]['id'])
+    authority = str(json_object['files'][0]['matches'][0]['ns'])
+    version = str(json_object['siegfried'])
+    return (pronom_id, authority, version)
+

From 55b6c86e535840ec84cfbd2525ab7ff32bbcab5e Mon Sep 17 00:00:00 2001
From: kieranjol <kieran.o.leary@gmail.com>
Date: Wed, 2 Aug 2017 23:14:03 +0100
Subject: [PATCH 08/31] premiscsv - ads siegfried/pronom format registry

---
 premiscsv2xml.py | 18 ++++++++++++++++++
 premisobjects.py |  6 ++++++
 2 files changed, 24 insertions(+)

diff --git a/premiscsv2xml.py b/premiscsv2xml.py
index f761127..8e5319e 100755
--- a/premiscsv2xml.py
+++ b/premiscsv2xml.py
@@ -81,6 +81,21 @@ def setup_xml(object_dictionaries):
             size = create_unit(
                 1, object_characteristics, 'size'
             )
+            format_ = create_unit(
+                2, object_characteristics, 'format'
+            )
+            format_registry = create_unit(
+                1, format_, 'formatRegistry'
+            )
+            format_registry_name = create_unit(
+                0, format_registry, 'formatRegistryName'
+            )
+            format_registry_key = create_unit(
+                1, format_registry, 'formatRegistryKey'
+            )
+            format_registry_role = create_unit(
+                2, format_registry, 'formatRegistryRole'
+            )
             size.text = objects['size']
             message_digest_algorithm = create_unit(
                 0, fixity, 'messageDigestAlgorithm'
@@ -94,6 +109,9 @@ def setup_xml(object_dictionaries):
             message_digest_originator.text = objects['messageDigestOriginator']
             message_digest.text = objects['messageDigest']
             message_digest_algorithm.text = objects['messageDigestAlgorithm']
+            format_registry_name.text = objects['formatRegistryName']
+            format_registry_key.text = objects['formatRegistryKey']
+            format_registry_role.text = objects['formatRegistryRole']
     print(etree.tostring(doc, pretty_print=True))
     return premis_namespace, doc, premis
 def main():
diff --git a/premisobjects.py b/premisobjects.py
index 1024344..082466e 100755
--- a/premisobjects.py
+++ b/premisobjects.py
@@ -49,6 +49,7 @@ def make_skeleton_csv():
         'objectCategory',
         'messageDigestAlgorithm', 'messageDigest', 'messageDigestOriginator',
         'size',	'formatName', 'formatVersion',
+        'formatRegistryName', 'formatRegistryKey', 'formatRegistryRole',
         'objectCharacteristicsExtension', 'originalName',
         'contentLocationType', 'contentLocationValue',
         'relatedObjectIdentifierType', 'relatedObjectIdentifierValue',
@@ -77,6 +78,9 @@ def file_description(source, manifest, representation_uuid):
                 for item in filenames:
                     item_uuid = ififuncs.create_uuid()
                     full_path = os.path.join(root, item)
+                    pronom_id, authority, version = ififuncs.get_pronom_format(
+                        full_path
+                    )
                     item_dictionary = {}
                     item_dictionary['objectIdentifier'] = ['UUID', item_uuid]
                     item_dictionary['objectCategory'] = 'file'
@@ -89,6 +93,7 @@ def file_description(source, manifest, representation_uuid):
                         item_dictionary['objectCategory'],
                         'md5', get_checksum(manifest, item), 'internal',
                         item_dictionary['size'], '', '',
+                        authority, pronom_id, 'identification',
                         '', '',
                         '', '',
                         '', '',
@@ -120,6 +125,7 @@ def representation_description(representation_uuid, item_ids):
         representation_dictionary['objectCategory'],
         '', '', '',
         '', '', '',
+        '', '', '',
         '', '',
         '', '',
         '', '',

From f4af73e27e08eae269dde1b62fc5f5cc8ec8eb52 Mon Sep 17 00:00:00 2001
From: kieranjol <kieran.o.leary@gmail.com>
Date: Thu, 3 Aug 2017 22:47:35 +0100
Subject: [PATCH 09/31] premiscsv - fix mediainfo eventDetail

---
 premiscsv.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/premiscsv.py b/premiscsv.py
index 8ee66b8..c0dde87 100755
--- a/premiscsv.py
+++ b/premiscsv.py
@@ -67,6 +67,9 @@ def find_events(logfile):
                         event_outcome_detail_note = 'Submission Information Package'
                     if ('eventDetail=Mediatrace' in log_entry) or ('eventDetail=Technical' in log_entry):
                         event_type = 'metadata extraction'
+                        event_detail = log_entry.split(
+                            'eventDetail=', 1
+                        )[1].split(',')[0]
                         event_outcome = log_entry.split(
                             'eventOutcome=', 1
                         )[1].replace(', agentName=mediainfo', '').replace('\n', '')
@@ -85,7 +88,7 @@ def find_events(logfile):
                     '', 'UUID',
                     linking_object_identifier_value, ''
                 ]
-                ififuncs.append_csv('bla.csv', event_row)
+                ififuncs.append_csv('events.csv', event_row)
 
 def make_events_csv():
     '''
@@ -104,12 +107,13 @@ def make_events_csv():
         'linkingAgentIdentifierRole', 'linkingObjectIdentifierType',
         'linkingObjectIdentifierValue', 'linkingObjectRole'
     ]
-    ififuncs.create_csv('bla.csv', premis_events)
+    ififuncs.create_csv('events.csv', premis_events)
 
 def main():
     '''
     Launches all the other functions when run from the command line.
     '''
+    make_events_csv()
     logfile = sys.argv[1]
     find_events(logfile)
 

From 33b78d5c9a1d446dcd0a7055dc1302e25f195404 Mon Sep 17 00:00:00 2001
From: kieranjol <kieran.o.leary@gmail.com>
Date: Thu, 3 Aug 2017 23:06:06 +0100
Subject: [PATCH 10/31] premisobjects - extract relative path of object for
 contentLocation

---
 premisobjects.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/premisobjects.py b/premisobjects.py
index 082466e..ae0ba84 100755
--- a/premisobjects.py
+++ b/premisobjects.py
@@ -27,14 +27,14 @@
 
 def get_checksum(manifest, filename):
     '''
-    Extracts checksum from manifest, rather than generating a fresh one.
+    Extracts the checksum and path within a manifest, returning both as a tuple.
     '''
     if os.path.isfile(manifest):
         with open(manifest, 'r') as manifest_object:
             manifest_lines = manifest_object.readlines()
             for md5 in manifest_lines:
                 if filename in md5:
-                    return md5[:32]
+                    return md5[:32], md5[34:]
 
 
 def make_skeleton_csv():
@@ -63,7 +63,7 @@ def make_skeleton_csv():
         'relationship_structural_hasroot',
         'relationship_derivation_hassource'
     ]
-    ififuncs.create_csv('cle.csv', premis_object_units)
+    ififuncs.create_csv('objects.csv', premis_object_units)
 
 
 def file_description(source, manifest, representation_uuid):
@@ -76,6 +76,7 @@ def file_description(source, manifest, representation_uuid):
             for root, _, filenames in os.walk(root):
                 filenames = [f for f in filenames if f[0] != '.']
                 for item in filenames:
+                    md5, urn = get_checksum(manifest, item)
                     item_uuid = ififuncs.create_uuid()
                     full_path = os.path.join(root, item)
                     pronom_id, authority, version = ififuncs.get_pronom_format(
@@ -91,11 +92,11 @@ def file_description(source, manifest, representation_uuid):
                     file_data = [
                         item_dictionary['objectIdentifier'],
                         item_dictionary['objectCategory'],
-                        'md5', get_checksum(manifest, item), 'internal',
+                        'md5', md5, 'internal',
                         item_dictionary['size'], '', '',
                         authority, pronom_id, 'identification',
-                        '', '',
-                        '', '',
+                        '', item,
+                        'urn', urn,
                         '', '',
                         '',
                         '', '',
@@ -107,7 +108,7 @@ def file_description(source, manifest, representation_uuid):
                         '',
                         ''
                     ]
-                    ififuncs.append_csv('cle.csv', file_data)
+                    ififuncs.append_csv('objects.csv', file_data)
     return item_ids
 def representation_description(representation_uuid, item_ids):
     '''
@@ -139,7 +140,7 @@ def representation_description(representation_uuid, item_ids):
         '',
         ''
     ]
-    ififuncs.append_csv('cle.csv', representation_data)
+    ififuncs.append_csv('objects.csv', representation_data)
 
 
 def intellectual_entity_description():

From 7bc4538ca2c3ed5f413b5a1a813c1773e4a3f19c Mon Sep 17 00:00:00 2001
From: kieranjol <kieran.o.leary@gmail.com>
Date: Fri, 4 Aug 2017 00:51:49 +0100
Subject: [PATCH 11/31] premiscsv - extract linkingObjectIdentifier for
 metadata extraction

---
 premiscsv.py     | 26 ++++++++++++++++++++++++++
 premisobjects.py |  5 +++--
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/premiscsv.py b/premiscsv.py
index c0dde87..5874219 100755
--- a/premiscsv.py
+++ b/premiscsv.py
@@ -5,8 +5,19 @@
 '''
 import os
 import sys
+import csv
 # from lxml import etree
 import ififuncs
+
+def extract_metadata(csv_file):
+    '''
+    Read the PREMIS csv and store the metadata in a list of dictionaries.
+    '''
+    object_dictionaries = []
+    input_file = csv.DictReader(open(csv_file))
+    for rows in input_file:
+        object_dictionaries.append(rows)
+    return object_dictionaries
 def find_events(logfile):
     '''
     A very hacky attempt to extract the relevant preservation events from our
@@ -24,6 +35,10 @@ def find_events(logfile):
                 manifest_event = line_fragment.replace(
                     'eventDetail', ''
                 ).replace('\n', '').split('=')[1]
+    object_info = extract_metadata('objects.csv')
+    object_locations = {}
+    for i in object_info:
+        object_locations[i['contentLocationValue']] = i['objectIdentifier'].split(', ')[1].replace(']', '')
     for log_entry in log_lines:
         valid_entries = [
             'eventType',
@@ -75,6 +90,17 @@ def find_events(logfile):
                         )[1].replace(', agentName=mediainfo', '').replace('\n', '')
                         if 'eventDetail=Mediatrace' in log_entry:
                             event_outcome = event_outcome.replace('mediainfo.xml', 'mediatrace.xml')
+                        for x in object_locations:
+                            '''
+                            This is trying to get the UUID of the source object
+                            that relates to the mediainfo xmls. This is
+                            achieved via a dictionary.
+                            '''
+                            if 'objects' in x:
+                                a = os.path.basename(event_outcome).replace('_mediainfo.xml', '').replace('_mediatrace.xml', '')[:-1]
+                                b = os.path.basename(x)
+                                if a == b:
+                                    linking_object_identifier_value = object_locations[x].replace('\'','')
                 if (break_loop == 'continue') or (event_type == ''):
                     continue
                 print event_type
diff --git a/premisobjects.py b/premisobjects.py
index ae0ba84..503fc9e 100755
--- a/premisobjects.py
+++ b/premisobjects.py
@@ -33,8 +33,9 @@ def get_checksum(manifest, filename):
         with open(manifest, 'r') as manifest_object:
             manifest_lines = manifest_object.readlines()
             for md5 in manifest_lines:
-                if filename in md5:
-                    return md5[:32], md5[34:]
+                if 'objects' in md5:
+                    if filename in md5:
+                        return md5[:32], md5[34:].rstrip()
 
 
 def make_skeleton_csv():

From 0f6e048728c408eae69545e418c097314724988d Mon Sep 17 00:00:00 2001
From: kieranjol <kieran.o.leary@gmail.com>
Date: Fri, 4 Aug 2017 01:13:34 +0100
Subject: [PATCH 12/31] premiscsv - ads contentLocation to xml transform

---
 premiscsv2xml.py | 16 +++++++++++++++-
 premisobjects.py |  4 ++--
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/premiscsv2xml.py b/premiscsv2xml.py
index 8e5319e..266a28d 100755
--- a/premiscsv2xml.py
+++ b/premiscsv2xml.py
@@ -73,7 +73,19 @@ def setup_xml(object_dictionaries):
         add_value(id_list[1], object_identifier_uuid_value)
         if objects['objectCategory'] == 'file':
             object_characteristics = create_unit(
-                10, object_parent, 'objectCharacteristics'
+                5, object_parent, 'objectCharacteristics'
+            )
+            storage = create_unit(
+                7, object_parent, 'storage'
+            )
+            content_location = create_unit(
+                0, storage, 'contentLocation'
+            )
+            content_location_type = create_unit(
+                0, content_location, 'contentLocationType'
+            )
+            content_location_value = create_unit(
+                1, content_location, 'contentLocationValue'
             )
             fixity = create_unit(
                 0, object_characteristics, 'fixity'
@@ -112,6 +124,8 @@ def setup_xml(object_dictionaries):
             format_registry_name.text = objects['formatRegistryName']
             format_registry_key.text = objects['formatRegistryKey']
             format_registry_role.text = objects['formatRegistryRole']
+            content_location_type.text = objects['contentLocationType']
+            content_location_value.text = objects['contentLocationValue']
     print(etree.tostring(doc, pretty_print=True))
     return premis_namespace, doc, premis
 def main():
diff --git a/premisobjects.py b/premisobjects.py
index 503fc9e..991249b 100755
--- a/premisobjects.py
+++ b/premisobjects.py
@@ -77,7 +77,7 @@ def file_description(source, manifest, representation_uuid):
             for root, _, filenames in os.walk(root):
                 filenames = [f for f in filenames if f[0] != '.']
                 for item in filenames:
-                    md5, urn = get_checksum(manifest, item)
+                    md5, uri = get_checksum(manifest, item)
                     item_uuid = ififuncs.create_uuid()
                     full_path = os.path.join(root, item)
                     pronom_id, authority, version = ififuncs.get_pronom_format(
@@ -97,7 +97,7 @@ def file_description(source, manifest, representation_uuid):
                         item_dictionary['size'], '', '',
                         authority, pronom_id, 'identification',
                         '', item,
-                        'urn', urn,
+                        'uri', uri,
                         '', '',
                         '',
                         '', '',

From 4ab7049d3f1ff2e2fbb305ee1d0c274ced0caac6 Mon Sep 17 00:00:00 2001
From: kieranjol <kieran.o.leary@gmail.com>
Date: Sat, 5 Aug 2017 18:45:40 +0100
Subject: [PATCH 13/31] adds linkingEventIdentifierValue to object descriptions

---
 premiscsv.py | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/premiscsv.py b/premiscsv.py
index 5874219..0270d96 100755
--- a/premiscsv.py
+++ b/premiscsv.py
@@ -116,6 +116,53 @@ def find_events(logfile):
                 ]
                 ififuncs.append_csv('events.csv', event_row)
 
+def update_objects():
+    '''
+    Update the object description with the linkingEventIdentifiers
+    '''
+    link_dict = {}
+    event_dicts = extract_metadata('events.csv')
+    for i in event_dicts:
+        a =  i['eventIdentifierValue']
+        try:
+            link_dict[i['linkingObjectIdentifierValue']]  += a + '|'
+        except KeyError:
+            link_dict[i['linkingObjectIdentifierValue']]  = a + '|'
+    print link_dict
+    object_dicts = extract_metadata('objects.csv')
+    for x in object_dicts:
+        for link in link_dict:
+
+            if link ==  x['objectIdentifier'].split(', ')[1].replace(']', '').replace('\'',''):
+                x['linkingEventIdentifierValue'] = link_dict[link]
+    premis_object_units = [
+        'objectIdentifier',
+        'objectCategory',
+        'messageDigestAlgorithm', 'messageDigest', 'messageDigestOriginator',
+        'size',	'formatName', 'formatVersion',
+        'formatRegistryName', 'formatRegistryKey', 'formatRegistryRole',
+        'objectCharacteristicsExtension', 'originalName',
+        'contentLocationType', 'contentLocationValue',
+        'relatedObjectIdentifierType', 'relatedObjectIdentifierValue',
+        'relatedObjectSequence',
+        'relatedEventIdentifierType', 'relatedEventIdentifierValue',
+        'relatedEventSequence',
+        'linkingEventIdentifierType', 'linkingEventIdentifierValue',
+        'relationship_structural_includes',
+        'relationship_structural_isincludedin',
+        'relationship_structural_represents',
+        'relationship_structural_hasroot',
+        'relationship_derivation_hassource'
+    ]
+    with open('mycsvfile.csv', 'wb') as f:
+        counter = 0
+        for i in object_dicts:
+            w = csv.DictWriter(f, fieldnames=premis_object_units)
+            if counter == 0:
+                w.writeheader()
+            counter += 1
+            w.writerow(i)
+
 def make_events_csv():
     '''
     Generates a CSV with PREMIS-esque headings. Currently it's just called
@@ -142,6 +189,7 @@ def main():
     make_events_csv()
     logfile = sys.argv[1]
     find_events(logfile)
+    update_objects()
 
 if __name__ == '__main__':
     main()

From 494a167f8913f09a984e40c4a6d0a6343e67449d Mon Sep 17 00:00:00 2001
From: kieranjol <kieran.o.leary@gmail.com>
Date: Sat, 5 Aug 2017 19:24:44 +0100
Subject: [PATCH 14/31] premiscsv2xml - converts Events CSV to XML

---
 premiscsv2xml.py | 29 +++++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/premiscsv2xml.py b/premiscsv2xml.py
index 266a28d..1469bf8 100755
--- a/premiscsv2xml.py
+++ b/premiscsv2xml.py
@@ -40,7 +40,7 @@ def create_unit(index, parent, unitname):
     parent.insert(index, unitname)
     return unitname
 
-def setup_xml(object_dictionaries):
+def setup_xml(object_dictionaries, event_dictionaries):
     '''
     This should just create the PREMIS lxml object.
     Actual metadata generation should be moved to other functions.
@@ -126,6 +126,29 @@ def setup_xml(object_dictionaries):
             format_registry_role.text = objects['formatRegistryRole']
             content_location_type.text = objects['contentLocationType']
             content_location_value.text = objects['contentLocationValue']
+    for x in event_dictionaries:
+        event_parent = create_unit(
+                99, premis, 'event'
+            )
+        event_identifier_uuid = create_unit(
+            1, event_parent, 'eventIdentifier'
+        )
+        event_identifier_uuid_type = create_unit(
+            1, event_identifier_uuid, 'eventIdentifierType'
+        )
+        event_identifier_uuid_value = create_unit(
+            2, event_identifier_uuid, 'eventIdentifierValue'
+        )
+        event_type = create_unit(
+            1, event_parent, 'eventType'
+        )
+        event_date_time = create_unit(
+            1, event_parent, 'eventDateTime'
+        )
+        event_identifier_uuid_type.text = x['eventIdentifierType']
+        event_identifier_uuid_value.text = x['eventIdentifierValue']
+        event_type.text = x['eventType']
+        event_date_time.text = x['eventDateTime']
     print(etree.tostring(doc, pretty_print=True))
     return premis_namespace, doc, premis
 def main():
@@ -134,8 +157,10 @@ def main():
     For debugging purposes, the contents of the CSV is printed to screen.
     '''
     csv_file = sys.argv[1]
+    events_csv = sys.argv[2]
     object_dictionaries = extract_metadata(csv_file)
-    setup_xml(object_dictionaries)
+    event_dictionaries = extract_metadata(events_csv)
+    setup_xml(object_dictionaries, event_dictionaries)
     '''
     for x in object_dictionaries:
         for i in x:

From 39c3235fa27af94c700d3f51e391bc66e595ab6c Mon Sep 17 00:00:00 2001
From: kieranjol <kieran.o.leary@gmail.com>
Date: Sat, 5 Aug 2017 19:27:08 +0100
Subject: [PATCH 15/31] premiscsv2xml - fixes element order, XML validates
 again against schema

---
 premiscsv2xml.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/premiscsv2xml.py b/premiscsv2xml.py
index 1469bf8..1361432 100755
--- a/premiscsv2xml.py
+++ b/premiscsv2xml.py
@@ -143,7 +143,7 @@ def setup_xml(object_dictionaries, event_dictionaries):
             1, event_parent, 'eventType'
         )
         event_date_time = create_unit(
-            1, event_parent, 'eventDateTime'
+            2, event_parent, 'eventDateTime'
         )
         event_identifier_uuid_type.text = x['eventIdentifierType']
         event_identifier_uuid_value.text = x['eventIdentifierValue']

From 63cc919bae0bcfcd4f0c4f4ac3e60af535c9fec4 Mon Sep 17 00:00:00 2001
From: kieranjol <kieran.o.leary@gmail.com>
Date: Sat, 5 Aug 2017 23:04:18 +0100
Subject: [PATCH 16/31] premiscsv2xml - more PREMIS event info

---
 premiscsv2xml.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/premiscsv2xml.py b/premiscsv2xml.py
index 1361432..1815719 100755
--- a/premiscsv2xml.py
+++ b/premiscsv2xml.py
@@ -145,10 +145,31 @@ def setup_xml(object_dictionaries, event_dictionaries):
         event_date_time = create_unit(
             2, event_parent, 'eventDateTime'
         )
+        event_detail_information = create_unit(
+            3, event_parent, 'eventDetailInformation'
+        )
+        event_detail = create_unit(
+            1, event_detail_information, 'eventDetail'
+        )
+        event_outcome_information = create_unit(
+            4, event_parent, 'eventOutcomeInformation'
+        )
+        event_outcome = create_unit(
+            1, event_outcome_information, 'eventOutcome'
+        )
+        event_outcome_detail = create_unit(
+            2, event_outcome_information, 'eventOutcomeDetail'
+        )
+        event_outcome_detail_note = create_unit(
+            1, event_outcome_detail, 'eventOutcomeDetailNote'
+        )
         event_identifier_uuid_type.text = x['eventIdentifierType']
         event_identifier_uuid_value.text = x['eventIdentifierValue']
         event_type.text = x['eventType']
         event_date_time.text = x['eventDateTime']
+        event_detail.text = x['eventDetail']
+        event_outcome.text = x['eventOutcome']
+        event_outcome_detail_note.text = x['eventOutcomeDetailNote']
     print(etree.tostring(doc, pretty_print=True))
     return premis_namespace, doc, premis
 def main():

From d079604d57ea4a488ccfb46b47028c6f0290d864 Mon Sep 17 00:00:00 2001
From: kieranjol <kieran.o.leary@gmail.com>
Date: Sat, 5 Aug 2017 23:22:32 +0100
Subject: [PATCH 17/31] premiscsv2xml - adds linkingEventIdentifiers

---
 premiscsv2xml.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/premiscsv2xml.py b/premiscsv2xml.py
index 1815719..920b9ee 100755
--- a/premiscsv2xml.py
+++ b/premiscsv2xml.py
@@ -126,6 +126,20 @@ def setup_xml(object_dictionaries, event_dictionaries):
             format_registry_role.text = objects['formatRegistryRole']
             content_location_type.text = objects['contentLocationType']
             content_location_value.text = objects['contentLocationValue']
+        linked_events = objects['linkingEventIdentifierValue'].split('|')
+        for event in linked_events:
+            if event != '':
+                linking_event_identifier = create_unit(
+                        99, object_parent, 'linkingEventIdentifier'
+                    )
+                linking_event_identifier_type = create_unit(
+                        1, linking_event_identifier, 'linkingEventIdentifierType'
+                    )
+                linking_event_identifier_value = create_unit(
+                        2, linking_event_identifier, 'linkingEventIdentifierValue'
+                    )
+                linking_event_identifier_type.text = 'UUID'
+                linking_event_identifier_value.text = event
     for x in event_dictionaries:
         event_parent = create_unit(
                 99, premis, 'event'

From bd4259f04833ad94de4b16a717ea4dc8f2f922c9 Mon Sep 17 00:00:00 2001
From: kieranjol <kieran.o.leary@gmail.com>
Date: Sun, 6 Aug 2017 18:33:25 +0100
Subject: [PATCH 18/31] ififuncs/premiscsv - moves functions form premis
 scripts into ififuncs

---
 ififuncs.py      | 31 +++++++++++++++++++++++++++++++
 premiscsv.py     | 18 ++++++------------
 premiscsv2xml.py | 15 +++------------
 premisobjects.py | 24 +++---------------------
 4 files changed, 43 insertions(+), 45 deletions(-)

diff --git a/ififuncs.py b/ififuncs.py
index d587049..87a8d1d 100755
--- a/ififuncs.py
+++ b/ififuncs.py
@@ -790,3 +790,34 @@ def get_pronom_format(filename):
     version = str(json_object['siegfried'])
     return (pronom_id, authority, version)
 
+def get_checksum(manifest, filename):
+    '''
+    Extracts the checksum and path within a manifest, returning both as a tuple.
+    '''
+    if os.path.isfile(manifest):
+        with open(manifest, 'r') as manifest_object:
+            manifest_lines = manifest_object.readlines()
+            for md5 in manifest_lines:
+                if 'objects' in md5:
+                    if filename in md5:
+                        return md5[:32], md5[34:].rstrip()
+
+def find_representation_uuid(source):
+    '''
+    This extracts the representation UUID from a directory name.
+    This should be moved to ififuncs as it can be used by other scripts.
+    '''
+    for root, _, _ in os.walk(source):
+        if 'objects' in root:
+            return os.path.basename(os.path.dirname(root))
+
+def extract_metadata(csv_file):
+    '''
+    Read the PREMIS csv and store the metadata in a list of dictionaries.
+    '''
+    object_dictionaries = []
+    input_file = csv.DictReader(open(csv_file))
+    for rows in input_file:
+        object_dictionaries.append(rows)
+    return object_dictionaries
+
diff --git a/premiscsv.py b/premiscsv.py
index 0270d96..f0a2317 100755
--- a/premiscsv.py
+++ b/premiscsv.py
@@ -6,18 +6,11 @@
 import os
 import sys
 import csv
+import shutil
 # from lxml import etree
 import ififuncs
 
-def extract_metadata(csv_file):
-    '''
-    Read the PREMIS csv and store the metadata in a list of dictionaries.
-    '''
-    object_dictionaries = []
-    input_file = csv.DictReader(open(csv_file))
-    for rows in input_file:
-        object_dictionaries.append(rows)
-    return object_dictionaries
+
 def find_events(logfile):
     '''
     A very hacky attempt to extract the relevant preservation events from our
@@ -35,7 +28,7 @@ def find_events(logfile):
                 manifest_event = line_fragment.replace(
                     'eventDetail', ''
                 ).replace('\n', '').split('=')[1]
-    object_info = extract_metadata('objects.csv')
+    object_info = ififuncs.extract_metadata('objects.csv')
     object_locations = {}
     for i in object_info:
         object_locations[i['contentLocationValue']] = i['objectIdentifier'].split(', ')[1].replace(']', '')
@@ -121,7 +114,7 @@ def update_objects():
     Update the object description with the linkingEventIdentifiers
     '''
     link_dict = {}
-    event_dicts = extract_metadata('events.csv')
+    event_dicts = ififuncs.extract_metadata('events.csv')
     for i in event_dicts:
         a =  i['eventIdentifierValue']
         try:
@@ -129,7 +122,7 @@ def update_objects():
         except KeyError:
             link_dict[i['linkingObjectIdentifierValue']]  = a + '|'
     print link_dict
-    object_dicts = extract_metadata('objects.csv')
+    object_dicts = ififuncs.extract_metadata('objects.csv')
     for x in object_dicts:
         for link in link_dict:
 
@@ -162,6 +155,7 @@ def update_objects():
                 w.writeheader()
             counter += 1
             w.writerow(i)
+    shutil.move('mycsvfile.csv', 'objects.csv')
 
 def make_events_csv():
     '''
diff --git a/premiscsv2xml.py b/premiscsv2xml.py
index 920b9ee..1fc0921 100755
--- a/premiscsv2xml.py
+++ b/premiscsv2xml.py
@@ -5,16 +5,7 @@
 import sys
 import csv
 from lxml import etree
-
-def extract_metadata(csv_file):
-    '''
-    Read the PREMIS csv and store the metadata in a list of dictionaries.
-    '''
-    object_dictionaries = []
-    input_file = csv.DictReader(open(csv_file))
-    for rows in input_file:
-        object_dictionaries.append(rows)
-    return object_dictionaries
+import ififuncs
 
 def add_value(value, element):
     '''
@@ -193,8 +184,8 @@ def main():
     '''
     csv_file = sys.argv[1]
     events_csv = sys.argv[2]
-    object_dictionaries = extract_metadata(csv_file)
-    event_dictionaries = extract_metadata(events_csv)
+    object_dictionaries = ififuncs.extract_metadata(csv_file)
+    event_dictionaries = ififuncs.extract_metadata(events_csv)
     setup_xml(object_dictionaries, event_dictionaries)
     '''
     for x in object_dictionaries:
diff --git a/premisobjects.py b/premisobjects.py
index 991249b..f0fe6db 100755
--- a/premisobjects.py
+++ b/premisobjects.py
@@ -25,17 +25,7 @@
 import ififuncs
 
 
-def get_checksum(manifest, filename):
-    '''
-    Extracts the checksum and path within a manifest, returning both as a tuple.
-    '''
-    if os.path.isfile(manifest):
-        with open(manifest, 'r') as manifest_object:
-            manifest_lines = manifest_object.readlines()
-            for md5 in manifest_lines:
-                if 'objects' in md5:
-                    if filename in md5:
-                        return md5[:32], md5[34:].rstrip()
+
 
 
 def make_skeleton_csv():
@@ -77,7 +67,7 @@ def file_description(source, manifest, representation_uuid):
             for root, _, filenames in os.walk(root):
                 filenames = [f for f in filenames if f[0] != '.']
                 for item in filenames:
-                    md5, uri = get_checksum(manifest, item)
+                    md5, uri = ififuncs.get_checksum(manifest, item)
                     item_uuid = ififuncs.create_uuid()
                     full_path = os.path.join(root, item)
                     pronom_id, authority, version = ififuncs.get_pronom_format(
@@ -152,14 +142,6 @@ def intellectual_entity_description():
     intellectual_entity_dictionary['objectIdentifier'] = ['UUID', ififuncs.create_uuid()]
     intellectual_entity_dictionary['objectCategory'] = 'intellectual entity'
     #print intellectual_entity_dictionary
-def find_representation_uuid(source):
-    '''
-    This extracts the representation UUID from a directory name.
-    This should be moved to ififuncs as it can be used by other scripts.
-    '''
-    for root, _, _ in os.walk(source):
-        if 'objects' in root:
-            return os.path.basename(os.path.dirname(root))
 
 
 def main():
@@ -169,7 +151,7 @@ def main():
     make_skeleton_csv()
     source = sys.argv[1]
     manifest = sys.argv[2]
-    representation_uuid = find_representation_uuid(source)
+    representation_uuid = ififuncs.find_representation_uuid(source)
     item_ids = file_description(source, manifest, representation_uuid)
     #intellectual_entity_description()
     representation_description(representation_uuid, item_ids)

From a455a45cedf2f92deedeb6fec0b88391ffa2ee7b Mon Sep 17 00:00:00 2001
From: kieranjol <kieran.o.leary@gmail.com>
Date: Sun, 6 Aug 2017 20:36:38 +0100
Subject: [PATCH 19/31] premiscsv2xml - removes stupid add_value() function

---
 premiscsv2xml.py | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/premiscsv2xml.py b/premiscsv2xml.py
index 1fc0921..90ae7f5 100755
--- a/premiscsv2xml.py
+++ b/premiscsv2xml.py
@@ -7,12 +7,6 @@
 from lxml import etree
 import ififuncs
 
-def add_value(value, element):
-    '''
-    Useless helper function - remove!
-    '''
-    element.text = value
-
 
 def write_premis(doc, premisxml):
     '''
@@ -60,8 +54,8 @@ def setup_xml(object_dictionaries, event_dictionaries):
         object_identifier_uuid_value = create_unit(
             2, object_identifier_uuid, 'objectIdentifierValue'
         )
-        add_value(id_list[0], object_identifier_uuid_type)
-        add_value(id_list[1], object_identifier_uuid_value)
+        object_identifier_uuid_type.text = id_list[0]
+        object_identifier_uuid_value.text = id_list[1]
         if objects['objectCategory'] == 'file':
             object_characteristics = create_unit(
                 5, object_parent, 'objectCharacteristics'

From 4d762b53adf1cac2aa6f1073066f058759b67194 Mon Sep 17 00:00:00 2001
From: kieranjol <kieran.o.leary@gmail.com>
Date: Sun, 6 Aug 2017 21:55:28 +0100
Subject: [PATCH 20/31] premiscsv2xml - cleanup and docstrings

---
 premiscsv2xml.py | 43 +++++++++++++++++++++++++++----------------
 1 file changed, 27 insertions(+), 16 deletions(-)

diff --git a/premiscsv2xml.py b/premiscsv2xml.py
index 90ae7f5..be5bea5 100755
--- a/premiscsv2xml.py
+++ b/premiscsv2xml.py
@@ -3,7 +3,6 @@
 Takes a PREMIS CSV file, as generated by premiscsv.py, and transform into XML.
 '''
 import sys
-import csv
 from lxml import etree
 import ififuncs
 
@@ -25,16 +24,20 @@ def create_unit(index, parent, unitname):
     parent.insert(index, unitname)
     return unitname
 
-def setup_xml(object_dictionaries, event_dictionaries):
+def setup_xml():
     '''
     This should just create the PREMIS lxml object.
     Actual metadata generation should be moved to other functions.
     '''
     namespace = '<premis:premis xmlns:premis="http://www.loc.gov/premis/v3" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/premis/v3 https://www.loc.gov/standards/premis/premis.xsd" version="3.0"></premis:premis>'
-    premis_namespace = "http://www.loc.gov/premis/v3"
-    xsi_namespace = "http://www.w3.org/2001/XMLSchema-instance"
     premis = etree.fromstring(namespace)
-    doc = etree.ElementTree(premis)
+    return premis
+
+def describe_objects(premis, object_dictionaries):
+    '''
+    Converts the CSV object metadata into PREMIS XML.
+    '''
+    xsi_namespace = "http://www.w3.org/2001/XMLSchema-instance"
     for objects in object_dictionaries:
         id_list = objects['objectIdentifier'].replace(
             '[', ''
@@ -115,20 +118,25 @@ def setup_xml(object_dictionaries, event_dictionaries):
         for event in linked_events:
             if event != '':
                 linking_event_identifier = create_unit(
-                        99, object_parent, 'linkingEventIdentifier'
-                    )
+                    99, object_parent, 'linkingEventIdentifier'
+                )
                 linking_event_identifier_type = create_unit(
-                        1, linking_event_identifier, 'linkingEventIdentifierType'
-                    )
+                    1, linking_event_identifier, 'linkingEventIdentifierType'
+                )
                 linking_event_identifier_value = create_unit(
-                        2, linking_event_identifier, 'linkingEventIdentifierValue'
-                    )
+                    2, linking_event_identifier, 'linkingEventIdentifierValue'
+                )
                 linking_event_identifier_type.text = 'UUID'
                 linking_event_identifier_value.text = event
+    return premis, linked_events
+def describe_events(premis, event_dictionaries):
+    '''
+    Converts the CSV object metadata into PREMIS XML.
+    '''
     for x in event_dictionaries:
         event_parent = create_unit(
-                99, premis, 'event'
-            )
+            99, premis, 'event'
+        )
         event_identifier_uuid = create_unit(
             1, event_parent, 'eventIdentifier'
         )
@@ -169,8 +177,9 @@ def setup_xml(object_dictionaries, event_dictionaries):
         event_detail.text = x['eventDetail']
         event_outcome.text = x['eventOutcome']
         event_outcome_detail_note.text = x['eventOutcomeDetailNote']
-    print(etree.tostring(doc, pretty_print=True))
-    return premis_namespace, doc, premis
+    print(etree.tostring(premis, pretty_print=True))
+
+
 def main():
     '''
     Launches all the other functions when run from the command line.
@@ -180,7 +189,9 @@ def main():
     events_csv = sys.argv[2]
     object_dictionaries = ififuncs.extract_metadata(csv_file)
     event_dictionaries = ififuncs.extract_metadata(events_csv)
-    setup_xml(object_dictionaries, event_dictionaries)
+    premis = setup_xml()
+    premis = describe_objects(premis, object_dictionaries)
+    describe_events(premis, event_dictionaries)
     '''
     for x in object_dictionaries:
         for i in x:

From 73f08d619a92c6d5f8f3d6c463c9ff74e15fb539 Mon Sep 17 00:00:00 2001
From: kieranjol <kieran.o.leary@gmail.com>
Date: Sun, 6 Aug 2017 23:10:48 +0100
Subject: [PATCH 21/31] premisobjects/premiscsv2xml - more cleanup

---
 premiscsv2xml.py | 11 +++--------
 premisobjects.py |  1 -
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/premiscsv2xml.py b/premiscsv2xml.py
index be5bea5..127fb8d 100755
--- a/premiscsv2xml.py
+++ b/premiscsv2xml.py
@@ -128,7 +128,7 @@ def describe_objects(premis, object_dictionaries):
                 )
                 linking_event_identifier_type.text = 'UUID'
                 linking_event_identifier_value.text = event
-    return premis, linked_events
+    return premis
 def describe_events(premis, event_dictionaries):
     '''
     Converts the CSV object metadata into PREMIS XML.
@@ -192,12 +192,7 @@ def main():
     premis = setup_xml()
     premis = describe_objects(premis, object_dictionaries)
     describe_events(premis, event_dictionaries)
-    '''
-    for x in object_dictionaries:
-        for i in x:
-            if x[i] != '':
-                print i, x[i]
-        print '\n'
-    '''
+
+
 if __name__ == '__main__':
     main()
diff --git a/premisobjects.py b/premisobjects.py
index f0fe6db..af788ce 100755
--- a/premisobjects.py
+++ b/premisobjects.py
@@ -12,7 +12,6 @@
 todo:
 Document identifier assignment for files and IE. Probably in events sheet?
 Allow for derivation to be entered
-Link with events sheet
 Link mediainfo xml in /metadata to the objectCharacteristicsExtension field.
 
 

From a6288cc032c3af17de1fe97b0c6770dd4ab1aba7 Mon Sep 17 00:00:00 2001
From: kieranjol <kieran.o.leary@gmail.com>
Date: Mon, 7 Aug 2017 00:57:09 +0100
Subject: [PATCH 22/31] ififuncs/premiscsv - adds argparse and changes
 variables in all PREMIS CSV scripts

---
 premiscsv.py     | 58 ++++++++++++++++++++++++++++++++++++------------
 premiscsv2xml.py | 33 ++++++++++++++++++++++-----
 premisobjects.py | 56 ++++++++++++++++++++++++++++++++++------------
 3 files changed, 114 insertions(+), 33 deletions(-)

diff --git a/premiscsv.py b/premiscsv.py
index f0a2317..3d8a4dd 100755
--- a/premiscsv.py
+++ b/premiscsv.py
@@ -7,11 +7,12 @@
 import sys
 import csv
 import shutil
+import argparse
 # from lxml import etree
 import ififuncs
 
 
-def find_events(logfile):
+def find_events(logfile, objects_csv, output):
     '''
     A very hacky attempt to extract the relevant preservation events from our
     log files.
@@ -107,14 +108,14 @@ def find_events(logfile):
                     '', 'UUID',
                     linking_object_identifier_value, ''
                 ]
-                ififuncs.append_csv('events.csv', event_row)
+                ififuncs.append_csv(output, event_row)
 
-def update_objects():
+def update_objects(output, objects_csv):
     '''
     Update the object description with the linkingEventIdentifiers
     '''
     link_dict = {}
-    event_dicts = ififuncs.extract_metadata('events.csv')
+    event_dicts = ififuncs.extract_metadata(output)
     for i in event_dicts:
         a =  i['eventIdentifierValue']
         try:
@@ -122,7 +123,7 @@ def update_objects():
         except KeyError:
             link_dict[i['linkingObjectIdentifierValue']]  = a + '|'
     print link_dict
-    object_dicts = ififuncs.extract_metadata('objects.csv')
+    object_dicts = ififuncs.extract_metadata(objects_csv)
     for x in object_dicts:
         for link in link_dict:
 
@@ -155,9 +156,9 @@ def update_objects():
                 w.writeheader()
             counter += 1
             w.writerow(i)
-    shutil.move('mycsvfile.csv', 'objects.csv')
+    shutil.move('mycsvfile.csv', objects_csv)
 
-def make_events_csv():
+def make_events_csv(output):
     '''
     Generates a CSV with PREMIS-esque headings. Currently it's just called
     'bla.csv' but it will probably be called:
@@ -174,16 +175,45 @@ def make_events_csv():
         'linkingAgentIdentifierRole', 'linkingObjectIdentifierType',
         'linkingObjectIdentifierValue', 'linkingObjectRole'
     ]
-    ififuncs.create_csv('events.csv', premis_events)
+    ififuncs.create_csv(output, premis_events)
 
-def main():
+def parse_args(args_):
+    '''
+    Parse command line arguments.
+    '''
+    parser = argparse.ArgumentParser(
+        description='Describes events using PREMIS data dictionary via CSV'
+        ' Written by Kieran O\'Leary.'
+    )
+    parser.add_argument(
+        '-i',
+        help='full path of a log textfile', required=True
+    )
+    parser.add_argument(
+        '-o',
+        help='full path of output csv', required=True
+    )
+    parser.add_argument(
+        '-object_csv',
+        help='full path of object description csv', required=True
+    )
+    parser.add_argument(
+        '-user',
+        help='Declare who you are. If this is not set, you will be prompted.'
+    )
+    parsed_args = parser.parse_args(args_)
+    return parsed_args
+def main(args_):
     '''
     Launches all the other functions when run from the command line.
     '''
-    make_events_csv()
-    logfile = sys.argv[1]
-    find_events(logfile)
-    update_objects()
+    args = parse_args(args_)
+    logfile = args.i
+    output = args.o
+    objects_csv = args.object_csv
+    make_events_csv(output)
+    find_events(logfile, objects_csv, output)
+    update_objects(output, objects_csv)
 
 if __name__ == '__main__':
-    main()
+    main(sys.argv[1:])
diff --git a/premiscsv2xml.py b/premiscsv2xml.py
index 127fb8d..3611923 100755
--- a/premiscsv2xml.py
+++ b/premiscsv2xml.py
@@ -3,6 +3,7 @@
 Takes a PREMIS CSV file, as generated by premiscsv.py, and transform into XML.
 '''
 import sys
+import argparse
 from lxml import etree
 import ififuncs
 
@@ -179,14 +180,36 @@ def describe_events(premis, event_dictionaries):
         event_outcome_detail_note.text = x['eventOutcomeDetailNote']
     print(etree.tostring(premis, pretty_print=True))
 
-
-def main():
+def parse_args(args_):
+    '''
+    Parse command line arguments.
+    '''
+    parser = argparse.ArgumentParser(
+        description='Converts PREMIS CSV to XML'
+        ' Written by Kieran O\'Leary.'
+    )
+    parser.add_argument(
+        '-i',
+        help='full path of objects csv', required=True
+    )
+    parser.add_argument(
+        '-ev',
+        help='full path of events csv', required=True
+    )
+    parser.add_argument(
+        '-user',
+        help='Declare who you are. If this is not set, you will be prompted.'
+    )
+    parsed_args = parser.parse_args(args_)
+    return parsed_args
+def main(args_):
     '''
     Launches all the other functions when run from the command line.
     For debugging purposes, the contents of the CSV is printed to screen.
     '''
-    csv_file = sys.argv[1]
-    events_csv = sys.argv[2]
+    args = parse_args(args_)
+    csv_file = args.i
+    events_csv = args.ev
     object_dictionaries = ififuncs.extract_metadata(csv_file)
     event_dictionaries = ififuncs.extract_metadata(events_csv)
     premis = setup_xml()
@@ -195,4 +218,4 @@ def main():
 
 
 if __name__ == '__main__':
-    main()
+    main(sys.argv[1:])
diff --git a/premisobjects.py b/premisobjects.py
index af788ce..f8d8f84 100755
--- a/premisobjects.py
+++ b/premisobjects.py
@@ -21,13 +21,14 @@
 
 import os
 import sys
+import argparse
 import ififuncs
 
 
 
 
 
-def make_skeleton_csv():
+def make_skeleton_csv(output):
     '''
     Generates a CSV with PREMIS-esque headings. Currently it's just called
     'cle.csv' but it will probably be called:
@@ -53,10 +54,10 @@ def make_skeleton_csv():
         'relationship_structural_hasroot',
         'relationship_derivation_hassource'
     ]
-    ififuncs.create_csv('objects.csv', premis_object_units)
+    ififuncs.create_csv(output, premis_object_units)
 
 
-def file_description(source, manifest, representation_uuid):
+def file_description(source, manifest, representation_uuid, output):
     '''
     Generate PREMIS descriptions for items and write to CSV.
     '''
@@ -98,9 +99,9 @@ def file_description(source, manifest, representation_uuid):
                         '',
                         ''
                     ]
-                    ififuncs.append_csv('objects.csv', file_data)
+                    ififuncs.append_csv(output, file_data)
     return item_ids
-def representation_description(representation_uuid, item_ids):
+def representation_description(representation_uuid, item_ids, output):
     '''
     Generate PREMIS descriptions for a representation and write to CSV.
     '''
@@ -130,7 +131,7 @@ def representation_description(representation_uuid, item_ids):
         '',
         ''
     ]
-    ififuncs.append_csv('objects.csv', representation_data)
+    ififuncs.append_csv(output, representation_data)
 
 
 def intellectual_entity_description():
@@ -142,19 +143,46 @@ def intellectual_entity_description():
     intellectual_entity_dictionary['objectCategory'] = 'intellectual entity'
     #print intellectual_entity_dictionary
 
-
-def main():
+def parse_args(args_):
+    '''
+    Parse command line arguments.
+    '''
+    parser = argparse.ArgumentParser(
+        description='Describes objects using PREMIS data dictionary using CSV'
+        ' Written by Kieran O\'Leary.'
+    )
+    parser.add_argument(
+        '-i',
+        help='full path of input directory', required=True
+    )
+    parser.add_argument(
+        '-o', '-output',
+        help='full path of output directory', required=True
+    )
+    parser.add_argument(
+        '-m', '-manifest',
+        help='full path to a pre-existing manifest', required=True
+    )
+    parser.add_argument(
+        '-user',
+        help='Declare who you are. If this is not set, you will be prompted.'
+    )
+    parsed_args = parser.parse_args(args_)
+    return parsed_args
+def main(args_):
     '''
     Launches all the other functions when run from the command line.
     '''
-    make_skeleton_csv()
-    source = sys.argv[1]
-    manifest = sys.argv[2]
+    args = parse_args(args_)
+    source = args.i
+    output = args.o
+    manifest = args.m
+    make_skeleton_csv(output)
     representation_uuid = ififuncs.find_representation_uuid(source)
-    item_ids = file_description(source, manifest, representation_uuid)
+    item_ids = file_description(source, manifest, representation_uuid, output)
     #intellectual_entity_description()
-    representation_description(representation_uuid, item_ids)
+    representation_description(representation_uuid, item_ids, output)
 
 if __name__ == '__main__':
-    main()
+    main(sys.argv[1:])
 

From 8c452284a2f41207602b1bee78be520ac95c35cf Mon Sep 17 00:00:00 2001
From: kieranjol <kieran.o.leary@gmail.com>
Date: Mon, 7 Aug 2017 01:50:34 +0100
Subject: [PATCH 23/31] cleans up PREMIS csv scripts

---
 premiscsv.py     | 25 ++++++++++++++++---------
 premiscsv2xml.py |  7 +++++++
 premisobjects.py | 10 ++++++----
 3 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/premiscsv.py b/premiscsv.py
index 3d8a4dd..9d6b34d 100755
--- a/premiscsv.py
+++ b/premiscsv.py
@@ -12,7 +12,7 @@
 import ififuncs
 
 
-def find_events(logfile, objects_csv, output):
+def find_events(logfile, output):
     '''
     A very hacky attempt to extract the relevant preservation events from our
     log files.
@@ -32,7 +32,9 @@ def find_events(logfile, objects_csv, output):
     object_info = ififuncs.extract_metadata('objects.csv')
     object_locations = {}
     for i in object_info:
-        object_locations[i['contentLocationValue']] = i['objectIdentifier'].split(', ')[1].replace(']', '')
+        object_locations[
+            i['contentLocationValue']
+        ] = i['objectIdentifier'].split(', ')[1].replace(']', '')
     for log_entry in log_lines:
         valid_entries = [
             'eventType',
@@ -94,7 +96,7 @@ def find_events(logfile, objects_csv, output):
                                 a = os.path.basename(event_outcome).replace('_mediainfo.xml', '').replace('_mediatrace.xml', '')[:-1]
                                 b = os.path.basename(x)
                                 if a == b:
-                                    linking_object_identifier_value = object_locations[x].replace('\'','')
+                                    linking_object_identifier_value = object_locations[x].replace('\'', '')
                 if (break_loop == 'continue') or (event_type == ''):
                     continue
                 print event_type
@@ -110,6 +112,7 @@ def find_events(logfile, objects_csv, output):
                 ]
                 ififuncs.append_csv(output, event_row)
 
+
 def update_objects(output, objects_csv):
     '''
     Update the object description with the linkingEventIdentifiers
@@ -117,17 +120,16 @@ def update_objects(output, objects_csv):
     link_dict = {}
     event_dicts = ififuncs.extract_metadata(output)
     for i in event_dicts:
-        a =  i['eventIdentifierValue']
+        a = i['eventIdentifierValue']
         try:
-            link_dict[i['linkingObjectIdentifierValue']]  += a + '|'
+            link_dict[i['linkingObjectIdentifierValue']] += a + '|'
         except KeyError:
-            link_dict[i['linkingObjectIdentifierValue']]  = a + '|'
+            link_dict[i['linkingObjectIdentifierValue']] = a + '|'
     print link_dict
     object_dicts = ififuncs.extract_metadata(objects_csv)
     for x in object_dicts:
         for link in link_dict:
-
-            if link ==  x['objectIdentifier'].split(', ')[1].replace(']', '').replace('\'',''):
+            if link == x['objectIdentifier'].split(', ')[1].replace(']', '').replace('\'', ''):
                 x['linkingEventIdentifierValue'] = link_dict[link]
     premis_object_units = [
         'objectIdentifier',
@@ -158,6 +160,7 @@ def update_objects(output, objects_csv):
             w.writerow(i)
     shutil.move('mycsvfile.csv', objects_csv)
 
+
 def make_events_csv(output):
     '''
     Generates a CSV with PREMIS-esque headings. Currently it's just called
@@ -177,6 +180,7 @@ def make_events_csv(output):
     ]
     ififuncs.create_csv(output, premis_events)
 
+
 def parse_args(args_):
     '''
     Parse command line arguments.
@@ -203,6 +207,8 @@ def parse_args(args_):
     )
     parsed_args = parser.parse_args(args_)
     return parsed_args
+
+
 def main(args_):
     '''
     Launches all the other functions when run from the command line.
@@ -212,8 +218,9 @@ def main(args_):
     output = args.o
     objects_csv = args.object_csv
     make_events_csv(output)
-    find_events(logfile, objects_csv, output)
+    find_events(logfile, output)
     update_objects(output, objects_csv)
 
+
 if __name__ == '__main__':
     main(sys.argv[1:])
diff --git a/premiscsv2xml.py b/premiscsv2xml.py
index 3611923..610799a 100755
--- a/premiscsv2xml.py
+++ b/premiscsv2xml.py
@@ -25,6 +25,7 @@ def create_unit(index, parent, unitname):
     parent.insert(index, unitname)
     return unitname
 
+
 def setup_xml():
     '''
     This should just create the PREMIS lxml object.
@@ -34,6 +35,7 @@ def setup_xml():
     premis = etree.fromstring(namespace)
     return premis
 
+
 def describe_objects(premis, object_dictionaries):
     '''
     Converts the CSV object metadata into PREMIS XML.
@@ -130,6 +132,8 @@ def describe_objects(premis, object_dictionaries):
                 linking_event_identifier_type.text = 'UUID'
                 linking_event_identifier_value.text = event
     return premis
+
+
 def describe_events(premis, event_dictionaries):
     '''
     Converts the CSV object metadata into PREMIS XML.
@@ -180,6 +184,7 @@ def describe_events(premis, event_dictionaries):
         event_outcome_detail_note.text = x['eventOutcomeDetailNote']
     print(etree.tostring(premis, pretty_print=True))
 
+
 def parse_args(args_):
     '''
     Parse command line arguments.
@@ -202,6 +207,8 @@ def parse_args(args_):
     )
     parsed_args = parser.parse_args(args_)
     return parsed_args
+
+
 def main(args_):
     '''
     Launches all the other functions when run from the command line.
diff --git a/premisobjects.py b/premisobjects.py
index f8d8f84..c10f65d 100755
--- a/premisobjects.py
+++ b/premisobjects.py
@@ -25,13 +25,10 @@
 import ififuncs
 
 
-
-
-
 def make_skeleton_csv(output):
     '''
     Generates a CSV with PREMIS-esque headings. Currently it's just called
-    'cle.csv' but it will probably be called:
+    'objects.csv' but it will probably be called:
     UUID_premisobjects.csv
     and sit in the metadata directory.
     '''
@@ -70,6 +67,7 @@ def file_description(source, manifest, representation_uuid, output):
                     md5, uri = ififuncs.get_checksum(manifest, item)
                     item_uuid = ififuncs.create_uuid()
                     full_path = os.path.join(root, item)
+                    print 'Using Siegfriend to analyze %s' % item
                     pronom_id, authority, version = ififuncs.get_pronom_format(
                         full_path
                     )
@@ -143,6 +141,7 @@ def intellectual_entity_description():
     intellectual_entity_dictionary['objectCategory'] = 'intellectual entity'
     #print intellectual_entity_dictionary
 
+
 def parse_args(args_):
     '''
     Parse command line arguments.
@@ -169,6 +168,8 @@ def parse_args(args_):
     )
     parsed_args = parser.parse_args(args_)
     return parsed_args
+
+
 def main(args_):
     '''
     Launches all the other functions when run from the command line.
@@ -183,6 +184,7 @@ def main(args_):
     #intellectual_entity_description()
     representation_description(representation_uuid, item_ids, output)
 
+
 if __name__ == '__main__':
     main(sys.argv[1:])
 

From 9bad84c12172b44544ded8540fab4d636b90bc7f Mon Sep 17 00:00:00 2001
From: kieranjol <kieran.o.leary@gmail.com>
Date: Mon, 7 Aug 2017 11:05:46 +0100
Subject: [PATCH 24/31] premisobjects - be less IFI folder structure specific

---
 premisobjects.py | 80 +++++++++++++++++++++++-------------------------
 1 file changed, 39 insertions(+), 41 deletions(-)

diff --git a/premisobjects.py b/premisobjects.py
index c10f65d..421fd91 100755
--- a/premisobjects.py
+++ b/premisobjects.py
@@ -10,7 +10,8 @@
 Structural and Includes respectively.
 
 todo:
-Document identifier assignment for files and IE. Probably in events sheet?
+Document identifier assignment for files and IE. Probably in events sheet?\
+This would ideally just add to the log in the helper script.
 Allow for derivation to be entered
 Link mediainfo xml in /metadata to the objectCharacteristicsExtension field.
 
@@ -60,50 +61,47 @@ def file_description(source, manifest, representation_uuid, output):
     '''
     item_ids = []
     for root, _, filenames in os.walk(source):
-        if os.path.basename(root) == 'objects':
-            for root, _, filenames in os.walk(root):
-                filenames = [f for f in filenames if f[0] != '.']
-                for item in filenames:
-                    md5, uri = ififuncs.get_checksum(manifest, item)
-                    item_uuid = ififuncs.create_uuid()
-                    full_path = os.path.join(root, item)
-                    print 'Using Siegfriend to analyze %s' % item
-                    pronom_id, authority, version = ififuncs.get_pronom_format(
-                        full_path
-                    )
-                    item_dictionary = {}
-                    item_dictionary['objectIdentifier'] = ['UUID', item_uuid]
-                    item_dictionary['objectCategory'] = 'file'
-                    item_dictionary['size'] = str(os.path.getsize(full_path))
-                    item_dictionary['originalName'] = item
-                    item_dictionary['relationship_structural_isincludedin'] = representation_uuid
-                    item_ids.append(item_uuid)
-                    file_data = [
-                        item_dictionary['objectIdentifier'],
-                        item_dictionary['objectCategory'],
-                        'md5', md5, 'internal',
-                        item_dictionary['size'], '', '',
-                        authority, pronom_id, 'identification',
-                        '', item,
-                        'uri', uri,
-                        '', '',
-                        '',
-                        '', '',
-                        '',
-                        '', '',
-                        '',
-                        item_dictionary['relationship_structural_isincludedin'],
-                        '',
-                        '',
-                        ''
-                    ]
-                    ififuncs.append_csv(output, file_data)
+        filenames = [f for f in filenames if f[0] != '.']
+        for item in filenames:
+            md5, uri = ififuncs.get_checksum(manifest, item)
+            item_uuid = ififuncs.create_uuid()
+            full_path = os.path.join(root, item)
+            print 'Using Siegfriend to analyze %s' % item
+            pronom_id, authority, version = ififuncs.get_pronom_format(
+                full_path
+            )
+            item_dictionary = {}
+            item_dictionary['objectIdentifier'] = ['UUID', item_uuid]
+            item_dictionary['objectCategory'] = 'file'
+            item_dictionary['size'] = str(os.path.getsize(full_path))
+            item_dictionary['originalName'] = item
+            item_dictionary['relationship_structural_isincludedin'] = representation_uuid
+            item_ids.append(item_uuid)
+            file_data = [
+                item_dictionary['objectIdentifier'],
+                item_dictionary['objectCategory'],
+                'md5', md5, 'internal',
+                item_dictionary['size'], '', '',
+                authority, pronom_id, 'identification',
+                '', item,
+                'uri', uri,
+                '', '',
+                '',
+                '', '',
+                '',
+                '', '',
+                '',
+                item_dictionary['relationship_structural_isincludedin'],
+                '',
+                '',
+                ''
+            ]
+            ififuncs.append_csv(output, file_data)
     return item_ids
 def representation_description(representation_uuid, item_ids, output):
     '''
     Generate PREMIS descriptions for a representation and write to CSV.
     '''
-
     representation_dictionary = {}
     representation_dictionary['objectIdentifier'] = ['UUID', representation_uuid]
     representation_dictionary['objectCategory'] = 'representation'
@@ -152,7 +150,7 @@ def parse_args(args_):
     )
     parser.add_argument(
         '-i',
-        help='full path of input directory', required=True
+        help='full path of input objects directory', required=True
     )
     parser.add_argument(
         '-o', '-output',

From f0683108b673f258b63c3768b50e83ce51c16236 Mon Sep 17 00:00:00 2001
From: kieranjol <kieran.o.leary@gmail.com>
Date: Mon, 7 Aug 2017 11:14:31 +0100
Subject: [PATCH 25/31] logs2premis - renames premiscsv to logs2premis

---
 logs2premis.py | 226 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 226 insertions(+)
 create mode 100755 logs2premis.py

diff --git a/logs2premis.py b/logs2premis.py
new file mode 100755
index 0000000..9d6b34d
--- /dev/null
+++ b/logs2premis.py
@@ -0,0 +1,226 @@
+#!/usr/bin/env python
+'''
+Extracts preservation events from an IFI plain text log file and converts
+to a CSV using the PREMIS data dictionary
+'''
+import os
+import sys
+import csv
+import shutil
+import argparse
+# from lxml import etree
+import ififuncs
+
+
+def find_events(logfile, output):
+    '''
+    A very hacky attempt to extract the relevant preservation events from our
+    log files.
+    '''
+    sip_test = os.path.basename(logfile).replace('_sip_log.log', '')
+    if ififuncs.validate_uuid4(sip_test) != False:
+        linking_object_identifier_value = sip_test
+    with open(logfile, 'r') as logfile_object:
+        log_lines = logfile_object.readlines()
+    for event_test in log_lines:
+        if 'eventDetail=copyit.py' in event_test:
+            logsplit = event_test.split(',')
+            for line_fragment in logsplit:
+                manifest_event = line_fragment.replace(
+                    'eventDetail', ''
+                ).replace('\n', '').split('=')[1]
+    object_info = ififuncs.extract_metadata('objects.csv')
+    object_locations = {}
+    for i in object_info:
+        object_locations[
+            i['contentLocationValue']
+        ] = i['objectIdentifier'].split(', ')[1].replace(']', '')
+    for log_entry in log_lines:
+        valid_entries = [
+            'eventType',
+            'eventDetail=sipcreator.py',
+            'eventDetail=Mediatrace',
+            'eventDetail=Technical',
+            'eventDetail=copyit.py'
+        ]
+        for entry in valid_entries:
+            if entry in log_entry:
+                break_loop = ''
+                event_outcome = ''
+                event_detail = ''
+                event_outcome_detail_note = ''
+                event_type = ''
+                event_row = []
+                datetime = log_entry[:19]
+                logsplit = log_entry.split(',')
+                for line_fragment in logsplit:
+                    if 'eventType' in line_fragment:
+                        if 'EVENT =' in line_fragment:
+                            line_fragment = line_fragment.split('EVENT =')[1]
+                        event_type = line_fragment.replace(
+                            ' eventType=', ''
+                        ).replace('assignement', 'assignment')
+                    if ' value' in line_fragment:
+                        # this assumes that the value is the outcome of an identifier assigment.
+                        event_outcome = line_fragment[7:].replace('\n', '')
+                    # we are less concerned with events starting.
+                    if 'status=started' in line_fragment:
+                        break_loop = 'continue'
+                    if 'Generating destination manifest:' in line_fragment:
+                        break_loop = ''
+                        event_detail = manifest_event
+                    # ugh, this might run multiple times.
+                    if 'eventDetail=sipcreator.py' in log_entry:
+                        event_type = 'Information Package Creation'
+                        event_detail = line_fragment.replace(
+                            'eventDetail', ''
+                        ).replace('\n', '').split('=')[1]
+                        event_outcome_detail_note = 'Submission Information Package'
+                    if ('eventDetail=Mediatrace' in log_entry) or ('eventDetail=Technical' in log_entry):
+                        event_type = 'metadata extraction'
+                        event_detail = log_entry.split(
+                            'eventDetail=', 1
+                        )[1].split(',')[0]
+                        event_outcome = log_entry.split(
+                            'eventOutcome=', 1
+                        )[1].replace(', agentName=mediainfo', '').replace('\n', '')
+                        if 'eventDetail=Mediatrace' in log_entry:
+                            event_outcome = event_outcome.replace('mediainfo.xml', 'mediatrace.xml')
+                        for x in object_locations:
+                            '''
+                            This is trying to get the UUID of the source object
+                            that relates to the mediainfo xmls. This is
+                            achieved via a dictionary.
+                            '''
+                            if 'objects' in x:
+                                a = os.path.basename(event_outcome).replace('_mediainfo.xml', '').replace('_mediatrace.xml', '')[:-1]
+                                b = os.path.basename(x)
+                                if a == b:
+                                    linking_object_identifier_value = object_locations[x].replace('\'', '')
+                if (break_loop == 'continue') or (event_type == ''):
+                    continue
+                print event_type
+                event_row = [
+                    'UUID', ififuncs.create_uuid(),
+                    event_type, datetime, event_detail,
+                    '',
+                    event_outcome, '',
+                    event_outcome_detail_note, '',
+                    '', '',
+                    '', 'UUID',
+                    linking_object_identifier_value, ''
+                ]
+                ififuncs.append_csv(output, event_row)
+
+
+def update_objects(output, objects_csv):
+    '''
+    Update the object description with the linkingEventIdentifiers
+    '''
+    link_dict = {}
+    event_dicts = ififuncs.extract_metadata(output)
+    for i in event_dicts:
+        a = i['eventIdentifierValue']
+        try:
+            link_dict[i['linkingObjectIdentifierValue']] += a + '|'
+        except KeyError:
+            link_dict[i['linkingObjectIdentifierValue']] = a + '|'
+    print link_dict
+    object_dicts = ififuncs.extract_metadata(objects_csv)
+    for x in object_dicts:
+        for link in link_dict:
+            if link == x['objectIdentifier'].split(', ')[1].replace(']', '').replace('\'', ''):
+                x['linkingEventIdentifierValue'] = link_dict[link]
+    premis_object_units = [
+        'objectIdentifier',
+        'objectCategory',
+        'messageDigestAlgorithm', 'messageDigest', 'messageDigestOriginator',
+        'size',	'formatName', 'formatVersion',
+        'formatRegistryName', 'formatRegistryKey', 'formatRegistryRole',
+        'objectCharacteristicsExtension', 'originalName',
+        'contentLocationType', 'contentLocationValue',
+        'relatedObjectIdentifierType', 'relatedObjectIdentifierValue',
+        'relatedObjectSequence',
+        'relatedEventIdentifierType', 'relatedEventIdentifierValue',
+        'relatedEventSequence',
+        'linkingEventIdentifierType', 'linkingEventIdentifierValue',
+        'relationship_structural_includes',
+        'relationship_structural_isincludedin',
+        'relationship_structural_represents',
+        'relationship_structural_hasroot',
+        'relationship_derivation_hassource'
+    ]
+    with open('mycsvfile.csv', 'wb') as f:
+        counter = 0
+        for i in object_dicts:
+            w = csv.DictWriter(f, fieldnames=premis_object_units)
+            if counter == 0:
+                w.writeheader()
+            counter += 1
+            w.writerow(i)
+    shutil.move('mycsvfile.csv', objects_csv)
+
+
+def make_events_csv(output):
+    '''
+    Generates a CSV with PREMIS-esque headings. Currently it's just called
+    'bla.csv' but it will probably be called:
+    UUID_premisevents.csv
+    and sit in the metadata directory.
+    '''
+    premis_events = [
+        'eventIdentifierType', 'eventIdentifierValue',
+        'eventType', 'eventDateTime', 'eventDetail',
+        'eventDetailExtension',
+        'eventOutcome',	'eventOutcomeDetail',
+        'eventOutcomeDetailNote', 'eventOutcomeDetailExtension',
+        'linkingAgentIdentifierType', 'linkingAgentIdentifierValue',
+        'linkingAgentIdentifierRole', 'linkingObjectIdentifierType',
+        'linkingObjectIdentifierValue', 'linkingObjectRole'
+    ]
+    ififuncs.create_csv(output, premis_events)
+
+
+def parse_args(args_):
+    '''
+    Parse command line arguments.
+    '''
+    parser = argparse.ArgumentParser(
+        description='Describes events using PREMIS data dictionary via CSV'
+        ' Written by Kieran O\'Leary.'
+    )
+    parser.add_argument(
+        '-i',
+        help='full path of a log textfile', required=True
+    )
+    parser.add_argument(
+        '-o',
+        help='full path of output csv', required=True
+    )
+    parser.add_argument(
+        '-object_csv',
+        help='full path of object description csv', required=True
+    )
+    parser.add_argument(
+        '-user',
+        help='Declare who you are. If this is not set, you will be prompted.'
+    )
+    parsed_args = parser.parse_args(args_)
+    return parsed_args
+
+
+def main(args_):
+    '''
+    Launches all the other functions when run from the command line.
+    '''
+    args = parse_args(args_)
+    logfile = args.i
+    output = args.o
+    objects_csv = args.object_csv
+    make_events_csv(output)
+    find_events(logfile, output)
+    update_objects(output, objects_csv)
+
+
+if __name__ == '__main__':
+    main(sys.argv[1:])

From 784665dd1ce9a1c236982b6f79c1a2a15d362e8e Mon Sep 17 00:00:00 2001
From: kieranjol <kieran.o.leary@gmail.com>
Date: Mon, 7 Aug 2017 12:34:51 +0100
Subject: [PATCH 26/31] deletes renamed premiscsv

---
 premiscsv.py | 226 ---------------------------------------------------
 1 file changed, 226 deletions(-)
 delete mode 100755 premiscsv.py

diff --git a/premiscsv.py b/premiscsv.py
deleted file mode 100755
index 9d6b34d..0000000
--- a/premiscsv.py
+++ /dev/null
@@ -1,226 +0,0 @@
-#!/usr/bin/env python
-'''
-Extracts preservation events from an IFI plain text log file and converts
-to a CSV using the PREMIS data dictionary
-'''
-import os
-import sys
-import csv
-import shutil
-import argparse
-# from lxml import etree
-import ififuncs
-
-
-def find_events(logfile, output):
-    '''
-    A very hacky attempt to extract the relevant preservation events from our
-    log files.
-    '''
-    sip_test = os.path.basename(logfile).replace('_sip_log.log', '')
-    if ififuncs.validate_uuid4(sip_test) != False:
-        linking_object_identifier_value = sip_test
-    with open(logfile, 'r') as logfile_object:
-        log_lines = logfile_object.readlines()
-    for event_test in log_lines:
-        if 'eventDetail=copyit.py' in event_test:
-            logsplit = event_test.split(',')
-            for line_fragment in logsplit:
-                manifest_event = line_fragment.replace(
-                    'eventDetail', ''
-                ).replace('\n', '').split('=')[1]
-    object_info = ififuncs.extract_metadata('objects.csv')
-    object_locations = {}
-    for i in object_info:
-        object_locations[
-            i['contentLocationValue']
-        ] = i['objectIdentifier'].split(', ')[1].replace(']', '')
-    for log_entry in log_lines:
-        valid_entries = [
-            'eventType',
-            'eventDetail=sipcreator.py',
-            'eventDetail=Mediatrace',
-            'eventDetail=Technical',
-            'eventDetail=copyit.py'
-        ]
-        for entry in valid_entries:
-            if entry in log_entry:
-                break_loop = ''
-                event_outcome = ''
-                event_detail = ''
-                event_outcome_detail_note = ''
-                event_type = ''
-                event_row = []
-                datetime = log_entry[:19]
-                logsplit = log_entry.split(',')
-                for line_fragment in logsplit:
-                    if 'eventType' in line_fragment:
-                        if 'EVENT =' in line_fragment:
-                            line_fragment = line_fragment.split('EVENT =')[1]
-                        event_type = line_fragment.replace(
-                            ' eventType=', ''
-                        ).replace('assignement', 'assignment')
-                    if ' value' in line_fragment:
-                        # this assumes that the value is the outcome of an identifier assigment.
-                        event_outcome = line_fragment[7:].replace('\n', '')
-                    # we are less concerned with events starting.
-                    if 'status=started' in line_fragment:
-                        break_loop = 'continue'
-                    if 'Generating destination manifest:' in line_fragment:
-                        break_loop = ''
-                        event_detail = manifest_event
-                    # ugh, this might run multiple times.
-                    if 'eventDetail=sipcreator.py' in log_entry:
-                        event_type = 'Information Package Creation'
-                        event_detail = line_fragment.replace(
-                            'eventDetail', ''
-                        ).replace('\n', '').split('=')[1]
-                        event_outcome_detail_note = 'Submission Information Package'
-                    if ('eventDetail=Mediatrace' in log_entry) or ('eventDetail=Technical' in log_entry):
-                        event_type = 'metadata extraction'
-                        event_detail = log_entry.split(
-                            'eventDetail=', 1
-                        )[1].split(',')[0]
-                        event_outcome = log_entry.split(
-                            'eventOutcome=', 1
-                        )[1].replace(', agentName=mediainfo', '').replace('\n', '')
-                        if 'eventDetail=Mediatrace' in log_entry:
-                            event_outcome = event_outcome.replace('mediainfo.xml', 'mediatrace.xml')
-                        for x in object_locations:
-                            '''
-                            This is trying to get the UUID of the source object
-                            that relates to the mediainfo xmls. This is
-                            achieved via a dictionary.
-                            '''
-                            if 'objects' in x:
-                                a = os.path.basename(event_outcome).replace('_mediainfo.xml', '').replace('_mediatrace.xml', '')[:-1]
-                                b = os.path.basename(x)
-                                if a == b:
-                                    linking_object_identifier_value = object_locations[x].replace('\'', '')
-                if (break_loop == 'continue') or (event_type == ''):
-                    continue
-                print event_type
-                event_row = [
-                    'UUID', ififuncs.create_uuid(),
-                    event_type, datetime, event_detail,
-                    '',
-                    event_outcome, '',
-                    event_outcome_detail_note, '',
-                    '', '',
-                    '', 'UUID',
-                    linking_object_identifier_value, ''
-                ]
-                ififuncs.append_csv(output, event_row)
-
-
-def update_objects(output, objects_csv):
-    '''
-    Update the object description with the linkingEventIdentifiers
-    '''
-    link_dict = {}
-    event_dicts = ififuncs.extract_metadata(output)
-    for i in event_dicts:
-        a = i['eventIdentifierValue']
-        try:
-            link_dict[i['linkingObjectIdentifierValue']] += a + '|'
-        except KeyError:
-            link_dict[i['linkingObjectIdentifierValue']] = a + '|'
-    print link_dict
-    object_dicts = ififuncs.extract_metadata(objects_csv)
-    for x in object_dicts:
-        for link in link_dict:
-            if link == x['objectIdentifier'].split(', ')[1].replace(']', '').replace('\'', ''):
-                x['linkingEventIdentifierValue'] = link_dict[link]
-    premis_object_units = [
-        'objectIdentifier',
-        'objectCategory',
-        'messageDigestAlgorithm', 'messageDigest', 'messageDigestOriginator',
-        'size',	'formatName', 'formatVersion',
-        'formatRegistryName', 'formatRegistryKey', 'formatRegistryRole',
-        'objectCharacteristicsExtension', 'originalName',
-        'contentLocationType', 'contentLocationValue',
-        'relatedObjectIdentifierType', 'relatedObjectIdentifierValue',
-        'relatedObjectSequence',
-        'relatedEventIdentifierType', 'relatedEventIdentifierValue',
-        'relatedEventSequence',
-        'linkingEventIdentifierType', 'linkingEventIdentifierValue',
-        'relationship_structural_includes',
-        'relationship_structural_isincludedin',
-        'relationship_structural_represents',
-        'relationship_structural_hasroot',
-        'relationship_derivation_hassource'
-    ]
-    with open('mycsvfile.csv', 'wb') as f:
-        counter = 0
-        for i in object_dicts:
-            w = csv.DictWriter(f, fieldnames=premis_object_units)
-            if counter == 0:
-                w.writeheader()
-            counter += 1
-            w.writerow(i)
-    shutil.move('mycsvfile.csv', objects_csv)
-
-
-def make_events_csv(output):
-    '''
-    Generates a CSV with PREMIS-esque headings. Currently it's just called
-    'bla.csv' but it will probably be called:
-    UUID_premisevents.csv
-    and sit in the metadata directory.
-    '''
-    premis_events = [
-        'eventIdentifierType', 'eventIdentifierValue',
-        'eventType', 'eventDateTime', 'eventDetail',
-        'eventDetailExtension',
-        'eventOutcome',	'eventOutcomeDetail',
-        'eventOutcomeDetailNote', 'eventOutcomeDetailExtension',
-        'linkingAgentIdentifierType', 'linkingAgentIdentifierValue',
-        'linkingAgentIdentifierRole', 'linkingObjectIdentifierType',
-        'linkingObjectIdentifierValue', 'linkingObjectRole'
-    ]
-    ififuncs.create_csv(output, premis_events)
-
-
-def parse_args(args_):
-    '''
-    Parse command line arguments.
-    '''
-    parser = argparse.ArgumentParser(
-        description='Describes events using PREMIS data dictionary via CSV'
-        ' Written by Kieran O\'Leary.'
-    )
-    parser.add_argument(
-        '-i',
-        help='full path of a log textfile', required=True
-    )
-    parser.add_argument(
-        '-o',
-        help='full path of output csv', required=True
-    )
-    parser.add_argument(
-        '-object_csv',
-        help='full path of object description csv', required=True
-    )
-    parser.add_argument(
-        '-user',
-        help='Declare who you are. If this is not set, you will be prompted.'
-    )
-    parsed_args = parser.parse_args(args_)
-    return parsed_args
-
-
-def main(args_):
-    '''
-    Launches all the other functions when run from the command line.
-    '''
-    args = parse_args(args_)
-    logfile = args.i
-    output = args.o
-    objects_csv = args.object_csv
-    make_events_csv(output)
-    find_events(logfile, output)
-    update_objects(output, objects_csv)
-
-
-if __name__ == '__main__':
-    main(sys.argv[1:])

From 951ae948e56e6b7e528c0fa27ea8ed0224678d68 Mon Sep 17 00:00:00 2001
From: kieranjol <kieran.o.leary@gmail.com>
Date: Mon, 7 Aug 2017 13:30:51 +0100
Subject: [PATCH 27/31] makepremis - makepremis - adds helper script that
 launches premisobjects/logs2premis/premiscsv2xml

---
 makepremis.py | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)
 create mode 100755 makepremis.py

diff --git a/makepremis.py b/makepremis.py
new file mode 100755
index 0000000..d02be77
--- /dev/null
+++ b/makepremis.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python
+'''
+Creates PREMIS CSV and XML descriptions by launching other IFIscripts,
+such as logs2premis.py, premisobjects.py, premiscsv2xml.py'
+'''
+import os
+import argparse
+import premisobjects
+import premiscsv2xml
+import logs2premis
+
+
+def parse_args():
+    '''
+    Parse command line arguments.
+    '''
+    parser = argparse.ArgumentParser(
+        description='Creates PREMIS CSV and XML descriptions by launching'
+        'other IFIscripts, such as logs2premis.py, premisobjects.py,'
+        'premiscsv2xml.py'
+        ' Written by Kieran O\'Leary.'
+    )
+    parser.add_argument(
+        'input',
+        help='full path to your input directory'
+    )
+    parser.add_argument(
+        '-user',
+        help='Declare who you are. If this is not set, you will be prompted.'
+    )
+    parsed_args = parser.parse_args()
+    return parsed_args
+
+
+def launch_scripts(source):
+    '''
+    Launches premisobjects, logs2premis and premiscsv2xml in input directory
+    '''
+    for root, _, _ in os.walk(source):
+        if os.path.basename(root) == 'objects':
+            objects_csv = 'kelvin.csv'
+            events_csv = 'temp.csv'
+            uuid_dir = os.path.dirname(root)
+            logs_dir = os.path.join(
+                uuid_dir, 'logs'
+            )
+            logname = os.path.join(
+                logs_dir, os.path.basename(uuid_dir + '_sip_log.log')
+            )
+            manifest = os.path.join(
+                os.path.dirname(uuid_dir), os.path.basename(uuid_dir + '_manifest.md5')
+            )
+            premisobjects.main(
+                ['-i', root, '-m', manifest, '-o', objects_csv]
+            )
+            logs2premis.main(
+                ['-i', logname, '-object_csv', objects_csv, '-o', events_csv]
+            )
+            premiscsv2xml.main(
+                ['-i', objects_csv, '-ev', events_csv]
+            )
+
+
+def main():
+    '''
+    Launch the other functions when called from the command line
+    '''
+    args = parse_args()
+    source = args.input
+    launch_scripts(source)
+
+if __name__ == '__main__':
+    main()

From 01a52546d4a69c167eb1e23f12211afe6558f3eb Mon Sep 17 00:00:00 2001
From: Kieran O'Leary <kieran.o.leary@gmail.com>
Date: Fri, 18 Aug 2017 21:00:23 +0100
Subject: [PATCH 28/31] makepremis - adds arguments for object/events csv
 filenames

---
 makepremis.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/makepremis.py b/makepremis.py
index d02be77..b8819aa 100755
--- a/makepremis.py
+++ b/makepremis.py
@@ -28,18 +28,26 @@ def parse_args():
         '-user',
         help='Declare who you are. If this is not set, you will be prompted.'
     )
+    parser.add_argument(
+        '-object_csv', required=True,
+        help='full path of objects csv'
+    )
+    parser.add_argument(
+        '-event_csv', required=True,
+        help='full path of events csv'
+    )
     parsed_args = parser.parse_args()
     return parsed_args
 
 
-def launch_scripts(source):
+def launch_scripts(source, args):
     '''
     Launches premisobjects, logs2premis and premiscsv2xml in input directory
     '''
     for root, _, _ in os.walk(source):
         if os.path.basename(root) == 'objects':
-            objects_csv = 'kelvin.csv'
-            events_csv = 'temp.csv'
+            objects_csv = args.object_csv
+            events_csv = args.event_csv
             uuid_dir = os.path.dirname(root)
             logs_dir = os.path.join(
                 uuid_dir, 'logs'
@@ -67,7 +75,7 @@ def main():
     '''
     args = parse_args()
     source = args.input
-    launch_scripts(source)
+    launch_scripts(source, args)
 
 if __name__ == '__main__':
     main()

From 61fd02e90f382e1c93cb08cea6b9bf75c83239c8 Mon Sep 17 00:00:00 2001
From: Kieran O'Leary <kieran.o.leary@gmail.com>
Date: Sat, 19 Aug 2017 16:31:21 +0100
Subject: [PATCH 29/31] README.md - updates PREMIS scripts documentation

---
 README.md | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 9a8fcc5..9debac5 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,9 @@ table of contents
     * [sipcreator.py](https://github.com/kieranjol/IFIscripts#sipcreator)
 3. [PREMIS](https://github.com/kieranjol/IFIscripts#PREMIS)
     * [premisobjects.py](https://github.com/kieranjol/IFIscripts#premisobjectspy)
-    * [premiscsv.py](https://github.com/kieranjol/IFIscripts#premiscsvpy)
+    * [logs2premis.py](https://github.com/kieranjol/IFIscripts#logs2premispy)
+    * [makepremis.py](https://github.com/kieranjol/IFIscripts#makepremispremispy)
+    * [premiscsv2xml.py](https://github.com/kieranjol/IFIscripts#premiscsv2xmlpy)
 4. [Transcodes](https://github.com/kieranjol/IFIscripts#transcodes)
     * [makeffv1.py](https://github.com/kieranjol/IFIscripts#makeffv1py)
     * [bitc.py](https://github.com/kieranjol/IFIscripts#bitcpy)
@@ -74,18 +76,28 @@ Note: Documentation template has been copied from [mediamicroservices](https://g
 
 ## PREMIS ##
 
+### makepremis.py ###
+* Creates PREMIS CSV and XML descriptions by launching other IFIscripts, such as logs2premis.py, premisobjects.py, premiscsv2xml.py.
+* Assumptions for now: representation UUID already exists as part of the SIP/AIP folder structure. Find a way to supply this, probably via argparse.
+* For more information, run `pydoc makepremis `
+* Usage: `makepremis.py -event_csv path/to/events.csv -object_csv path/to/objects.csv`
+
 ### premisobjects.py ###
 * Creates a somewhat PREMIS compliant CSV file describing objects in a package. A seperate script will need to be written in order to transform these CSV files into XML.
-* As the flat CSV structure prevents maintaining some of the complex relationships between units, some semantic units have been merged, for example:`relationship_structural_includes` is really a combination of the `relationshipType` and `relationshipSubType` units, which each have the values: `Structural` and `Includes` respectively.
+* As the flat CSV structure prevents maintaining some of the relationships between units, some semantic units have been merged, for example:`relationship_structural_includes` is really a combination of the `relationshipType` and `relationshipSubType` units, which each have the values: `Structural` and `Includes` respectively.
 * Assumptions for now: representation UUID already exists as part of the SIP/AIP folder structure. Find a way to supply this, probably via argparse.
-* For more information, run `pydoc premisobjects `
-* Usage: `premiscsv.py path/to/SIP path/to/manifest.md5`
+* For more information, run `pydoc premisobjects`
+* Usage: `premisobjects.py -i path/to/SIP -m path/to/manifest.md5 -o path/to/output.csv`
 
-### premiscsv.py ###
+### logs2premis.py ###
 * Extracts preservation events from an IFI plain text log file and converts to a CSV using the PREMIS data dictionary.
 * For more information, run `pydoc premiscsv`
-* Usage: - `premiscsv.py path/to/logfile.log`
+* Usage: - `premiscsv.py -i  path/to/logfile.log -o path/to/output.csv -object_csv path/to/objects.csv`
 
+### premiscsv2xml.py ###
+* Transforms PREMIS csv files into XML.
+* For more information, run `pydoc premiscsv2xml`
+* Usage: `premiscsv2xml.py -ev path/to/events.csv -i path/to/objects.csv`
 
 
 ## Transcodes ##

From ebc6ef4b1d5941341d65d619999c72690702d2a7 Mon Sep 17 00:00:00 2001
From: kieranjol <kieran.o.leary@gmail.com>
Date: Tue, 16 Jan 2018 14:41:15 +0000
Subject: [PATCH 30/31] premisobjects/makepremis - clarifies argparse and fixes
 typo

---
 makepremis.py    | 4 ++--
 premisobjects.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/makepremis.py b/makepremis.py
index b8819aa..5180869 100755
--- a/makepremis.py
+++ b/makepremis.py
@@ -30,11 +30,11 @@ def parse_args():
     )
     parser.add_argument(
         '-object_csv', required=True,
-        help='full path of objects csv'
+        help='full path and filename of the output objects CSV.'
     )
     parser.add_argument(
         '-event_csv', required=True,
-        help='full path of events csv'
+        help='full path and filename of the output events CSV'
     )
     parsed_args = parser.parse_args()
     return parsed_args
diff --git a/premisobjects.py b/premisobjects.py
index 421fd91..c2c5f5e 100755
--- a/premisobjects.py
+++ b/premisobjects.py
@@ -66,7 +66,7 @@ def file_description(source, manifest, representation_uuid, output):
             md5, uri = ififuncs.get_checksum(manifest, item)
             item_uuid = ififuncs.create_uuid()
             full_path = os.path.join(root, item)
-            print 'Using Siegfriend to analyze %s' % item
+            print 'Using Siegfried to analyze %s' % item
             pronom_id, authority, version = ififuncs.get_pronom_format(
                 full_path
             )

From ec2e17b924d94baaa7b0d39795850c2f8c44ba93 Mon Sep 17 00:00:00 2001
From: kieranjol <kieran.o.leary@gmail.com>
Date: Wed, 17 Jan 2018 22:26:22 +0000
Subject: [PATCH 31/31] premisobjects - adds placeholder relationships function

---
 premisobjects.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/premisobjects.py b/premisobjects.py
index c2c5f5e..883d5e8 100755
--- a/premisobjects.py
+++ b/premisobjects.py
@@ -98,6 +98,23 @@ def file_description(source, manifest, representation_uuid, output):
             ]
             ififuncs.append_csv(output, file_data)
     return item_ids
+
+def build_relationships():
+    '''
+    Placeholder function that will produce a CSV containing the relationships 
+    within a PREMIS object description. 
+    '''
+    relationships = [
+        "relationship_uuid",
+        "objectIdentifierValue",
+        "relationshipType",
+        "relationshipSubType",
+        "relatedObjectIdentifierType",
+        "relatedObjectIdentifierValue",
+        "relatedEventIdentifierType",
+        "relatedEventIdentifierValue",
+        "relatedEventSequence"
+    ]
 def representation_description(representation_uuid, item_ids, output):
     '''
     Generate PREMIS descriptions for a representation and write to CSV.