From 5baca3b992c51d7b9b279c24e6a4fad48c4b28f4 Mon Sep 17 00:00:00 2001 From: kieranjol Date: Mon, 6 Mar 2017 00:15:45 +0000 Subject: [PATCH 01/30] tapepremis - adds placeholder tape premis workflow --- tapepremis.py | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100755 tapepremis.py diff --git a/tapepremis.py b/tapepremis.py new file mode 100755 index 0000000..a31209f --- /dev/null +++ b/tapepremis.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python +import subprocess +import argparse +import sys +import os +import hashlib +import shutil +import uuid +import time +import uuid +from glob import glob +from ififuncs import hashlib_manifest +from ififuncs import get_date_modified +from premis import make_premis +from premis import write_premis +from premis import make_agent +from premis import make_event +from premis import setup_xml +from premis import create_representation +from premis import create_intellectual_entity + + +def main(): + premisxml, premis_namespace, doc, premis = setup_xml(sys.argv[1]) + print premisxml, doc, premis + source_file = sys.argv[1] + items = {"workflow":"raw audio","oe":os.path.basename(source_file), "filmographic":'n/a', "sourceAccession":os.path.basename(source_file), "interventions":['placeholder'], "prepList":['placeholder'], "user":'Kieran O\' Leary'} + representation_uuid = str(uuid.uuid4()) + xml_info = make_premis(source_file, items, premis, premis_namespace, premisxml,representation_uuid,'not_sequence') + capture_uuid = str(uuid.uuid4()) + transcode_uuid = str(uuid.uuid4()) + framemd5_uuid = str(uuid.uuid4()) + manifest_uuid = str(uuid.uuid4()) + ffmpegAgent = make_agent(premis,[transcode_uuid] , 'ee83e19e-cdb1-4d83-91fb-7faf7eff738e') + make_event(premis, 'creation', 'transcode to ffv1 (figure out wording later)', [ffmpegAgent], transcode_uuid,xml_info[4], 'outcome', 'now-placeholder') + + write_premis(doc, premisxml) + +if __name__ == '__main__': + main() From 16df6da797d8fd492641138d5a722fe4db91eba3 Mon Sep 17 00:00:00 2001 From: kieranjol Date: Mon, 6 Mar 2017 21:53:12 +0000 Subject: [PATCH 02/30] tapepremis/premis extract md5 from manifest --- premis.py | 11 ++++++++--- tapepremis.py | 13 ++++++++++++- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/premis.py b/premis.py index bd92ccd..20d89dc 100755 --- a/premis.py +++ b/premis.py @@ -315,9 +315,14 @@ def create_object(source_file, items, premis, premis_namespace, premisxml, repre relationshipType.text = 'structural' relationshipSubType = create_unit(1,relationship, 'relationshipSubType') relationshipSubType.text = 'is included in' - - md5_output = hashlib_md5(source_file, image) - messageDigest.text = md5_output + # this is a total hack. if sequence = loopline', do not generate hash as it already exists in manifest :( + print len(sequence) + print sequence + if not len(sequence) == 32: + md5_output = hashlib_md5(source_file, image) + messageDigest.text = md5_output + else: + messageDigest.text = sequence messageDigestAlgorithm.text = 'md5' mediainfo_counter += 1 # When the image info has been grabbed, add info about the representation to the wav file. This may be problematic if makedpx is run first.. diff --git a/tapepremis.py b/tapepremis.py index a31209f..997eab2 100755 --- a/tapepremis.py +++ b/tapepremis.py @@ -19,14 +19,25 @@ from premis import create_representation from premis import create_intellectual_entity +def get_checksum(manifest): + if os.path.isfile(manifest): + with open(manifest, 'r') as fo: + manifest_lines = fo.readlines() + for md5 in manifest_lines: + if md5[-5:].rsplit()[0] == '.mkv': + return md5[:32] + + def main(): premisxml, premis_namespace, doc, premis = setup_xml(sys.argv[1]) print premisxml, doc, premis source_file = sys.argv[1] + md5 = get_checksum(sys.argv[2]) items = {"workflow":"raw audio","oe":os.path.basename(source_file), "filmographic":'n/a', "sourceAccession":os.path.basename(source_file), "interventions":['placeholder'], "prepList":['placeholder'], "user":'Kieran O\' Leary'} representation_uuid = str(uuid.uuid4()) - xml_info = make_premis(source_file, items, premis, premis_namespace, premisxml,representation_uuid,'not_sequence') + # the final argument here is 'loopline' which tells premis.py to not generate a checksum + xml_info = make_premis(source_file, items, premis, premis_namespace, premisxml,representation_uuid,md5) capture_uuid = str(uuid.uuid4()) transcode_uuid = str(uuid.uuid4()) framemd5_uuid = str(uuid.uuid4()) From fe6dea4f87f9fcb2e97fd65be7bd5252b7f55515 Mon Sep 17 00:00:00 2001 From: kieranjol Date: Mon, 6 Mar 2017 22:28:16 +0000 Subject: [PATCH 03/30] tapepremis.py - adds workstation interview placeholder and manifest check --- tapepremis.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/tapepremis.py b/tapepremis.py index 997eab2..9ae18bb 100755 --- a/tapepremis.py +++ b/tapepremis.py @@ -28,12 +28,37 @@ def get_checksum(manifest): return md5[:32] +def get_capture_workstation(): + capture_station = '' + if not capture_station == '1' or capture_station == '2' or capture_station == '3': + capture_station = raw_input('\n\n**** Where was tape captured?\nPress 1, 2 or 3\n\n1. es2\n2. loopline\n3. ingest 1\n' ) + while capture_station not in ('1','2','3'): + capture_station = raw_input('\n\n**** Where was tape captured?\nPress 1, 2 or 3\n\n1. es2\n2. loopline\n3. ingest 1\n' ) + if capture_station == '1': + capture_station = 'telecine' + elif capture_station == '2': + capture_station = 'ca_machine' + elif capture_station == '3': + capture_station = 'ca_machine' + return capture_station + def main(): premisxml, premis_namespace, doc, premis = setup_xml(sys.argv[1]) - print premisxml, doc, premis + get_capture_workstation() source_file = sys.argv[1] - md5 = get_checksum(sys.argv[2]) + sip_dir = os.path.dirname(source_file) + parent_dir = os.path.dirname(sip_dir) + ''' + /home/kieranjol/ifigit/ifiscripts/massive/objects sip + /home/kieranjol/ifigit/ifiscripts/massive parent + + ''' + manifest = parent_dir + '_manifest.md5' + if not os.path.isfile(manifest): + print 'no manifest found' + sys.exit() + md5 = get_checksum(manifest) items = {"workflow":"raw audio","oe":os.path.basename(source_file), "filmographic":'n/a', "sourceAccession":os.path.basename(source_file), "interventions":['placeholder'], "prepList":['placeholder'], "user":'Kieran O\' Leary'} representation_uuid = str(uuid.uuid4()) # the final argument here is 'loopline' which tells premis.py to not generate a checksum From b23bb6c97b34e0f407c8746c0d0220705b08f4a0 Mon Sep 17 00:00:00 2001 From: kieranjol Date: Tue, 7 Mar 2017 00:44:36 +0000 Subject: [PATCH 04/30] tapepremis - adds some placeholder capture agents/events --- tapepremis.py | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/tapepremis.py b/tapepremis.py index 9ae18bb..a9de0bd 100755 --- a/tapepremis.py +++ b/tapepremis.py @@ -19,6 +19,27 @@ from premis import create_representation from premis import create_intellectual_entity + +def capture_description(premis, xml_info): + ''' + Events: + 1. capture - glean from v210 mediainfo xml + 2. ffv1 - ffmpeg logs but get time from sip log also user input + 3. lossless verification - ffmpeg logs and time/judgement from sip log + 4. whole file manifest - sip log + that's it? + ''' + capture_uuid = str(uuid.uuid4()) + transcode_uuid = str(uuid.uuid4()) + framemd5_uuid = str(uuid.uuid4()) + manifest_uuid = str(uuid.uuid4()) + ffmpegAgent = make_agent(premis,[transcode_uuid] , 'ee83e19e-cdb1-4d83-91fb-7faf7eff738e') + m200pAgent = make_agent(premis,[transcode_uuid] , '60ae3a85-b595-45e0-8e4a-b95e90a6c422') + + capture_agents = [ffmpegAgent, m200pAgent] + make_event(premis, 'creation', 'transcode to ffv1 (figure out wording later)', capture_agents, transcode_uuid,xml_info[4], 'outcome', 'now-placeholder') + + def get_checksum(manifest): if os.path.isfile(manifest): with open(manifest, 'r') as fo: @@ -63,12 +84,9 @@ def main(): representation_uuid = str(uuid.uuid4()) # the final argument here is 'loopline' which tells premis.py to not generate a checksum xml_info = make_premis(source_file, items, premis, premis_namespace, premisxml,representation_uuid,md5) - capture_uuid = str(uuid.uuid4()) - transcode_uuid = str(uuid.uuid4()) - framemd5_uuid = str(uuid.uuid4()) - manifest_uuid = str(uuid.uuid4()) - ffmpegAgent = make_agent(premis,[transcode_uuid] , 'ee83e19e-cdb1-4d83-91fb-7faf7eff738e') - make_event(premis, 'creation', 'transcode to ffv1 (figure out wording later)', [ffmpegAgent], transcode_uuid,xml_info[4], 'outcome', 'now-placeholder') + capture_description(premis, xml_info) + + write_premis(doc, premisxml) From ac27c65a530a718c8c0379e65710d380078ef607 Mon Sep 17 00:00:00 2001 From: kieranjol Date: Tue, 7 Mar 2017 00:45:41 +0000 Subject: [PATCH 05/30] premis_agents - adds dvw-m2000p --- premis_agents.csv | 94 +++++++++++++++++++++++------------------------ 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/premis_agents.csv b/premis_agents.csv index f679f8e..ec5ac87 100644 --- a/premis_agents.csv +++ b/premis_agents.csv @@ -30,53 +30,53 @@ UUID,8c02d962-5ac5-4e51-a30c-002553134320,Baselight,software,4.4.7725,Plugin for UUID,52adf876-bf30-431c-b0c6-80cc4fd9406c,osx,software,10.9.5,,implementer UUID,9e59e772-14b0-4f9e-95b3-b88b6e751c3b,Aoife Fitzmaurice,person,,,implementer UUID,bc3de900-3903-4764-ab91-2ce89977d0d2,AEO-Light,software,2.2,,executing program -,60ae3a85-b595-45e0-8e4a-b95e90a6c422,,,,, -,dbdbb06b-ab10-49db-97a1-ff2ad285f9d2,,,,, -,e2ca7ad2-8edf-4e4e-a3c7-36e970c796c9,,,,, -,f47b98a2-b879-4786-9f6b-11fc3234a91e,,,,, -,c5e504ca-b4d5-410f-b87b-4b7ed794e44d,,,,, -,75a0b9ff-1f04-43bd-aa87-c31b73b1b61c,,,,, -,c5fc84fc-cc96-42a1-a5be-830b4e3012ae,,,,, -,be3060a8-6ccf-4339-97d5-a265687c3a5a,,,,, -,c93ee9a5-4c0c-4670-b857-8726bfd23cae,,,,, -,5fd99e09-63d7-4e9f-8383-1902f727d2a5,,,,, -,d13fae39-ac71-446e-88df-96c0d267b26c,,,,, -,8fac66ac-05de-478c-9d93-c89d46233197,,,,, -,005d4f57-69a4-40d9-9360-d45ff8e44f64,,,,, -,acf3de69-f7c2-4010-a96b-e2e5018a43b3,,,,, -,6d0a4ad3-7dc6-4607-b3bc-a284228e0c73,,,,, -,8419bfcb-51cb-4ea6-a8b2-e51a9af2e68a,,,,, -,14b935f1-8a9e-45b7-abf2-2593e89e77fc,,,,, -,b499818d-aecd-4ad7-bc61-f4fed329b18b,,,,, -,e37a0f86-816b-416f-8da7-4f5780d80a12,,,,, -,a3dd197d-ed80-41ff-bb84-f13e79a6e6d6,,,,, -,6d782410-db75-497b-97a6-c41a0fcc285c,,,,, -,bfc78799-ddfb-452d-b0ab-bf48c9dbcaa1,,,,, -,81a11bf9-65c4-4688-ab4a-954dbb1be241,,,,, -,29db18b3-1a90-4f7e-8432-adbf1ae40507,,,,, -,1be59fbb-bd99-40a9-8593-94d07f0e89f4,,,,, -,ee377e44-8635-4bda-a444-f7e632d32096,,,,, -,175468eb-c087-43b8-8444-222378604a95,,,,, -,2b45a5a3-1e90-4918-a6aa-41d9a93aef5e,,,,, -,f98bf550-a3fe-4f66-925e-8944799f930d,,,,, -,491e18c3-e4e1-4a47-b945-dadb53b565d1,,,,, -,4ac05527-bc27-4848-9f57-b2dc0820fab4,,,,, -,aa82e005-3246-4b2c-ba18-61de26a0855a,,,,, -,3acdc15f-ffd1-4be9-b279-351cb369ec15,,,,, -,62d87cd8-fc74-4895-b8c1-c2173feae060,,,,, -,2f68cf72-3fde-4e40-ac7b-ec7e382fbcf8,,,,, -,8d87bd1c-1936-46ee-ab56-5d571462f9ec,,,,, -,199f8f98-90d4-4ca1-9d84-bcde93d4b698,,,,, -,efb8371f-7d46-4f41-9133-011478b4b97a,,,,, -,b6a7bddf-08a0-46b7-a2b3-09e5fc1ec23b,,,,, -,f1b47f5f-fa70-42e3-ae39-6c56665e4a30,,,,, -,88e13b20-ba85-49c0-a8d0-ac32398adf6c,,,,, -,0ca96616-d585-436e-af3d-7a10202ce1c5,,,,, -,2178af8e-19e7-43c3-9bcd-24c462853672,,,,, -,f9e0a990-afc9-4411-bd2a-930f7a521778,,,,, -,3eec23de-facc-4a5e-a4ed-f8ad34626251,,,,, -,b924d919-fea5-47b7-8106-eeff86233b3d,,,,, -,2178d1d3-85be-4bfa-8be1-114a8a17ebbd,,,,, +UUID,60ae3a85-b595-45e0-8e4a-b95e90a6c422,DVW-M2000p,hardware,,Sony Digital Betacam VTR,implementer +UUID,dbdbb06b-ab10-49db-97a1-ff2ad285f9d2,,,,, +UUID,e2ca7ad2-8edf-4e4e-a3c7-36e970c796c9,,,,, +UUID,f47b98a2-b879-4786-9f6b-11fc3234a91e,,,,, +UUID,c5e504ca-b4d5-410f-b87b-4b7ed794e44d,,,,, +UUID,75a0b9ff-1f04-43bd-aa87-c31b73b1b61c,,,,, +UUID,c5fc84fc-cc96-42a1-a5be-830b4e3012ae,,,,, +UUID,be3060a8-6ccf-4339-97d5-a265687c3a5a,,,,, +UUID,c93ee9a5-4c0c-4670-b857-8726bfd23cae,,,,, +UUID,5fd99e09-63d7-4e9f-8383-1902f727d2a5,,,,, +UUID,d13fae39-ac71-446e-88df-96c0d267b26c,,,,, +UUID,8fac66ac-05de-478c-9d93-c89d46233197,,,,, +UUID,005d4f57-69a4-40d9-9360-d45ff8e44f64,,,,, +UUID,acf3de69-f7c2-4010-a96b-e2e5018a43b3,,,,, +UUID,6d0a4ad3-7dc6-4607-b3bc-a284228e0c73,,,,, +UUID,8419bfcb-51cb-4ea6-a8b2-e51a9af2e68a,,,,, +UUID,14b935f1-8a9e-45b7-abf2-2593e89e77fc,,,,, +UUID,b499818d-aecd-4ad7-bc61-f4fed329b18b,,,,, +UUID,e37a0f86-816b-416f-8da7-4f5780d80a12,,,,, +UUID,a3dd197d-ed80-41ff-bb84-f13e79a6e6d6,,,,, +UUID,6d782410-db75-497b-97a6-c41a0fcc285c,,,,, +UUID,bfc78799-ddfb-452d-b0ab-bf48c9dbcaa1,,,,, +UUID,81a11bf9-65c4-4688-ab4a-954dbb1be241,,,,, +UUID,29db18b3-1a90-4f7e-8432-adbf1ae40507,,,,, +UUID,1be59fbb-bd99-40a9-8593-94d07f0e89f4,,,,, +UUID,ee377e44-8635-4bda-a444-f7e632d32096,,,,, +UUID,175468eb-c087-43b8-8444-222378604a95,,,,, +UUID,2b45a5a3-1e90-4918-a6aa-41d9a93aef5e,,,,, +UUID,f98bf550-a3fe-4f66-925e-8944799f930d,,,,, +UUID,491e18c3-e4e1-4a47-b945-dadb53b565d1,,,,, +UUID,4ac05527-bc27-4848-9f57-b2dc0820fab4,,,,, +UUID,aa82e005-3246-4b2c-ba18-61de26a0855a,,,,, +UUID,3acdc15f-ffd1-4be9-b279-351cb369ec15,,,,, +UUID,62d87cd8-fc74-4895-b8c1-c2173feae060,,,,, +UUID,2f68cf72-3fde-4e40-ac7b-ec7e382fbcf8,,,,, +UUID,8d87bd1c-1936-46ee-ab56-5d571462f9ec,,,,, +UUID,199f8f98-90d4-4ca1-9d84-bcde93d4b698,,,,, +UUID,efb8371f-7d46-4f41-9133-011478b4b97a,,,,, +UUID,b6a7bddf-08a0-46b7-a2b3-09e5fc1ec23b,,,,, +UUID,f1b47f5f-fa70-42e3-ae39-6c56665e4a30,,,,, +UUID,88e13b20-ba85-49c0-a8d0-ac32398adf6c,,,,, +UUID,0ca96616-d585-436e-af3d-7a10202ce1c5,,,,, +UUID,2178af8e-19e7-43c3-9bcd-24c462853672,,,,, +UUID,f9e0a990-afc9-4411-bd2a-930f7a521778,,,,, +UUID,3eec23de-facc-4a5e-a4ed-f8ad34626251,,,,, +UUID,b924d919-fea5-47b7-8106-eeff86233b3d,,,,, +UUID,2178d1d3-85be-4bfa-8be1-114a8a17ebbd,,,,, ,97eb6b34-9eb8-4d18-88a8-1949ef439ccc,,,,, ,0edd5887-d142-4788-a63f-3a66e861085f,,,,, ,edb34421-9849-4ed7-8949-32d8ed663e63,,,,, From 9a76550f21bacbeceae7939afadf98b8e49033a6 Mon Sep 17 00:00:00 2001 From: kieranjol Date: Tue, 7 Mar 2017 09:23:46 +0000 Subject: [PATCH 06/30] tapepremis - guesses which capture used based on metadata --- tapepremis.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/tapepremis.py b/tapepremis.py index a9de0bd..2c35375 100755 --- a/tapepremis.py +++ b/tapepremis.py @@ -9,6 +9,7 @@ import time import uuid from glob import glob +import lxml.etree as ET from ififuncs import hashlib_manifest from ififuncs import get_date_modified from premis import make_premis @@ -49,7 +50,20 @@ def get_checksum(manifest): return md5[:32] -def get_capture_workstation(): +def get_capture_workstation(mediaxml): + mediaxml_object = ET.parse(mediaxml) + mxml = mediaxml_object.getroot() + mediaExpress_check = len(mxml.xpath('//COMAPPLEPROAPPSLOGNOTE')) + fcp7_check = len(mxml.xpath('//COMAPPLEFINALCUTSTUDIOMEDIAUUID')) + if mediaExpress_check > 0: + print 'this was probably Media Express?' + elif fcp7_check > 0: + print 'this was probably FCP7?' + else: + # i can't find any meaningful distinctive metadata that control room writes. + print 'this was probably Control Room?' + + sys.exit() capture_station = '' if not capture_station == '1' or capture_station == '2' or capture_station == '3': capture_station = raw_input('\n\n**** Where was tape captured?\nPress 1, 2 or 3\n\n1. es2\n2. loopline\n3. ingest 1\n' ) @@ -66,10 +80,14 @@ def get_capture_workstation(): def main(): premisxml, premis_namespace, doc, premis = setup_xml(sys.argv[1]) - get_capture_workstation() + source_file = sys.argv[1] sip_dir = os.path.dirname(source_file) parent_dir = os.path.dirname(sip_dir) + metadata_dir = os.path.join(parent_dir, 'metadata') + ffv1_xml = os.path.join(metadata_dir, os.path.basename(sys.argv[1] + '_mediainfo.xml')) + if os.path.isfile(ffv1_xml): + get_capture_workstation(ffv1_xml) ''' /home/kieranjol/ifigit/ifiscripts/massive/objects sip /home/kieranjol/ifigit/ifiscripts/massive parent From 9717bce38b156adaec210300d87f95094928de20 Mon Sep 17 00:00:00 2001 From: kieranjol Date: Tue, 7 Mar 2017 09:43:34 +0000 Subject: [PATCH 07/30] premis_agents - adds some more tape hardware --- premis_agents.csv | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/premis_agents.csv b/premis_agents.csv index ec5ac87..3328ee4 100644 --- a/premis_agents.csv +++ b/premis_agents.csv @@ -30,11 +30,11 @@ UUID,8c02d962-5ac5-4e51-a30c-002553134320,Baselight,software,4.4.7725,Plugin for UUID,52adf876-bf30-431c-b0c6-80cc4fd9406c,osx,software,10.9.5,,implementer UUID,9e59e772-14b0-4f9e-95b3-b88b6e751c3b,Aoife Fitzmaurice,person,,,implementer UUID,bc3de900-3903-4764-ab91-2ce89977d0d2,AEO-Light,software,2.2,,executing program -UUID,60ae3a85-b595-45e0-8e4a-b95e90a6c422,DVW-M2000p,hardware,,Sony Digital Betacam VTR,implementer -UUID,dbdbb06b-ab10-49db-97a1-ff2ad285f9d2,,,,, -UUID,e2ca7ad2-8edf-4e4e-a3c7-36e970c796c9,,,,, -UUID,f47b98a2-b879-4786-9f6b-11fc3234a91e,,,,, -UUID,c5e504ca-b4d5-410f-b87b-4b7ed794e44d,,,,, +UUID,60ae3a85-b595-45e0-8e4a-b95e90a6c422,Sony DVW-M2000P,hardware,,Sony Digital Betacam VTR,implementer +UUID,dbdbb06b-ab10-49db-97a1-ff2ad285f9d2,Sony DVW-A510P,hardware,,Sony Digital Betacam VTR,implementer +UUID,e2ca7ad2-8edf-4e4e-a3c7-36e970c796c9,Sony J30-SDI,hardware,,Sony Digital Betacam VTR,implementer +UUID,f47b98a2-b879-4786-9f6b-11fc3234a91e,Blackmagic UltraStudio 4K,hardware,,,implementer +UUID,c5e504ca-b4d5-410f-b87b-4b7ed794e44d,AJA KONA 3,hardware,,, UUID,75a0b9ff-1f04-43bd-aa87-c31b73b1b61c,,,,, UUID,c5fc84fc-cc96-42a1-a5be-830b4e3012ae,,,,, UUID,be3060a8-6ccf-4339-97d5-a265687c3a5a,,,,, From 00a229bf893da81956838f164e0d35614fd9e95c Mon Sep 17 00:00:00 2001 From: kieranjol Date: Tue, 7 Mar 2017 23:30:53 +0000 Subject: [PATCH 08/30] tapepremis - makes a bit more progress w/ interview --- tapepremis.py | 47 +++++++++++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/tapepremis.py b/tapepremis.py index 2c35375..bcc3411 100755 --- a/tapepremis.py +++ b/tapepremis.py @@ -21,7 +21,7 @@ from premis import create_intellectual_entity -def capture_description(premis, xml_info): +def capture_description(premis, xml_info,capture_station): ''' Events: 1. capture - glean from v210 mediainfo xml @@ -35,9 +35,9 @@ def capture_description(premis, xml_info): framemd5_uuid = str(uuid.uuid4()) manifest_uuid = str(uuid.uuid4()) ffmpegAgent = make_agent(premis,[transcode_uuid] , 'ee83e19e-cdb1-4d83-91fb-7faf7eff738e') - m200pAgent = make_agent(premis,[transcode_uuid] , '60ae3a85-b595-45e0-8e4a-b95e90a6c422') + m2000pAgent = make_agent(premis,[transcode_uuid] , '60ae3a85-b595-45e0-8e4a-b95e90a6c422') - capture_agents = [ffmpegAgent, m200pAgent] + capture_agents = [ffmpegAgent, m2000pAgent] make_event(premis, 'creation', 'transcode to ffv1 (figure out wording later)', capture_agents, transcode_uuid,xml_info[4], 'outcome', 'now-placeholder') @@ -57,25 +57,33 @@ def get_capture_workstation(mediaxml): fcp7_check = len(mxml.xpath('//COMAPPLEFINALCUTSTUDIOMEDIAUUID')) if mediaExpress_check > 0: print 'this was probably Media Express?' + capture_station = 'es2' elif fcp7_check > 0: print 'this was probably FCP7?' + capture_station = 'loopline' else: # i can't find any meaningful distinctive metadata that control room writes. print 'this was probably Control Room?' + capture_station = 'ingest1' + print 'Does this sound ok? Y/N?' + station_confirm = '' + while station_confirm not in ('Y','y','N','n'): + station_confirm = raw_input() + if station_confirm not in ('Y','y','N','n'): + print 'Incorrect input. Please enter Y or N' - sys.exit() - capture_station = '' - if not capture_station == '1' or capture_station == '2' or capture_station == '3': - capture_station = raw_input('\n\n**** Where was tape captured?\nPress 1, 2 or 3\n\n1. es2\n2. loopline\n3. ingest 1\n' ) - while capture_station not in ('1','2','3'): - capture_station = raw_input('\n\n**** Where was tape captured?\nPress 1, 2 or 3\n\n1. es2\n2. loopline\n3. ingest 1\n' ) - if capture_station == '1': - capture_station = 'telecine' - elif capture_station == '2': - capture_station = 'ca_machine' - elif capture_station == '3': - capture_station = 'ca_machine' - return capture_station + capture_station = '' + if not capture_station == '1' or capture_station == '2' or capture_station == '3': + capture_station = raw_input('\n\n**** Where was tape captured?\nPress 1, 2 or 3\n\n1. es2\n2. loopline\n3. ingest 1\n' ) + while capture_station not in ('1','2','3'): + capture_station = raw_input('\n\n**** Where was tape captured?\nPress 1, 2 or 3\n\n1. es2\n2. loopline\n3. ingest 1\n' ) + if capture_station == '1': + capture_station = 'es2' + elif capture_station == '2': + capture_station = 'loopline' + elif capture_station == '3': + capture_station = 'ingest1' + return capture_station def main(): @@ -87,7 +95,10 @@ def main(): metadata_dir = os.path.join(parent_dir, 'metadata') ffv1_xml = os.path.join(metadata_dir, os.path.basename(sys.argv[1] + '_mediainfo.xml')) if os.path.isfile(ffv1_xml): - get_capture_workstation(ffv1_xml) + capture_sation = get_capture_workstation(ffv1_xml) + else: + print('Can\'t find XML of FFv1 file. Exiting!') + sys.exit() ''' /home/kieranjol/ifigit/ifiscripts/massive/objects sip /home/kieranjol/ifigit/ifiscripts/massive parent @@ -102,7 +113,7 @@ def main(): representation_uuid = str(uuid.uuid4()) # the final argument here is 'loopline' which tells premis.py to not generate a checksum xml_info = make_premis(source_file, items, premis, premis_namespace, premisxml,representation_uuid,md5) - capture_description(premis, xml_info) + capture_description(premis, xml_info, capture_station) From b056658c7271f51f39496288e3d1f3de6a24858a Mon Sep 17 00:00:00 2001 From: kieranjol Date: Tue, 7 Mar 2017 23:47:59 +0000 Subject: [PATCH 09/30] tapepremis - few more events placeholders --- tapepremis.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tapepremis.py b/tapepremis.py index bcc3411..25b22f6 100755 --- a/tapepremis.py +++ b/tapepremis.py @@ -35,10 +35,12 @@ def capture_description(premis, xml_info,capture_station): framemd5_uuid = str(uuid.uuid4()) manifest_uuid = str(uuid.uuid4()) ffmpegAgent = make_agent(premis,[transcode_uuid] , 'ee83e19e-cdb1-4d83-91fb-7faf7eff738e') - m2000pAgent = make_agent(premis,[transcode_uuid] , '60ae3a85-b595-45e0-8e4a-b95e90a6c422') + m2000pAgent = make_agent(premis,[transcode_uuid] , '60ae3a85-b595-45e0-8e4a-b95e90a6c422') capture_agents = [ffmpegAgent, m2000pAgent] - make_event(premis, 'creation', 'transcode to ffv1 (figure out wording later)', capture_agents, transcode_uuid,xml_info[4], 'outcome', 'now-placeholder') + make_event(premis, 'creation', 'tape capture', capture_agents, transcode_uuid,xml_info[4], 'outcome', 'now-placeholder') + make_event(premis, 'compression', 'transcode to ffv1 (figure out wording later)', capture_agents, transcode_uuid,xml_info[4], 'outcome', 'now-placeholder') + make_event(premis, 'fixity check', 'lossless verification via framemd5 (figure out wording later)', capture_agents, transcode_uuid,xml_info[4], 'outcome', 'now-placeholder') def get_checksum(manifest): @@ -95,7 +97,7 @@ def main(): metadata_dir = os.path.join(parent_dir, 'metadata') ffv1_xml = os.path.join(metadata_dir, os.path.basename(sys.argv[1] + '_mediainfo.xml')) if os.path.isfile(ffv1_xml): - capture_sation = get_capture_workstation(ffv1_xml) + capture_station = get_capture_workstation(ffv1_xml) else: print('Can\'t find XML of FFv1 file. Exiting!') sys.exit() From c679f484c3b5ac0f946bd295641778eb8a1a2dce Mon Sep 17 00:00:00 2001 From: kieranjol Date: Wed, 8 Mar 2017 09:33:47 +0000 Subject: [PATCH 10/30] tapepremis/premis_agents - more events and agents --- premis_agents.csv | 10 +++++----- tapepremis.py | 43 ++++++++++++++++++++++++++----------------- 2 files changed, 31 insertions(+), 22 deletions(-) diff --git a/premis_agents.csv b/premis_agents.csv index 3328ee4..5a1f2b4 100644 --- a/premis_agents.csv +++ b/premis_agents.csv @@ -20,11 +20,11 @@ UUID,838a1a1b-7ddd-4846-ae8e-3b5ecb4aae55,Mac Pro,hardware,,Image restoration te UUID,55003bbd-49a4-4c7b-8da2-0d5b9bf10168,ProTools,software,12.5.1,,implementer UUID,ca731b64-638f-4dc3-9d27-0fc14387e38c,Steadyframe Computer,hardware,,Host computer for 1369-e9d1-425b-a810-6db1150955ba,implementer UUID,946e5d40-a07f-47d1-9637-def5cb7854ba,HP Z800,hardware,Z800,,implementer -UUID,230d72da-07e7-4a79-96ca-998b9f7a3e41,mac mini,hardware,,,implementer +UUID,230d72da-07e7-4a79-96ca-998b9f7a3e41,mac mini,hardware,,Telecine room Mac Mini,implementer UUID,634edcae-65a7-48b1-9758-bcfbdd12f80b,Aaron Healy,person,,,implementer UUID,25cdb926-12b5-4647-bef1-6f9bca3c18bc,Felix Meehan,person,,,implementer UUID,642208d9-6f65-4498-998f-9a17379319f9,Eoin O'Donohoe,person,,,implementer -UUID,68f56ede-a1cf-48aa-b1d8-dc9850d5bfcc,osx,software,10.11.4,,implementer +UUID,68f56ede-a1cf-48aa-b1d8-dc9850d5bfcc,osx,software,10.11.4,El Capitan,implementer UUID,e5872957-8ee8-4c20-bd8e-d76e1de01b34,iZotope Rx5,software,5.01.184,,implementer UUID,8c02d962-5ac5-4e51-a30c-002553134320,Baselight,software,4.4.7725,Plugin for 11e157a3-1aa7-4195-b816-009a3d47148c,implementer UUID,52adf876-bf30-431c-b0c6-80cc4fd9406c,osx,software,10.9.5,,implementer @@ -33,9 +33,9 @@ UUID,bc3de900-3903-4764-ab91-2ce89977d0d2,AEO-Light,software,2.2,,executing prog UUID,60ae3a85-b595-45e0-8e4a-b95e90a6c422,Sony DVW-M2000P,hardware,,Sony Digital Betacam VTR,implementer UUID,dbdbb06b-ab10-49db-97a1-ff2ad285f9d2,Sony DVW-A510P,hardware,,Sony Digital Betacam VTR,implementer UUID,e2ca7ad2-8edf-4e4e-a3c7-36e970c796c9,Sony J30-SDI,hardware,,Sony Digital Betacam VTR,implementer -UUID,f47b98a2-b879-4786-9f6b-11fc3234a91e,Blackmagic UltraStudio 4K,hardware,,,implementer -UUID,c5e504ca-b4d5-410f-b87b-4b7ed794e44d,AJA KONA 3,hardware,,, -UUID,75a0b9ff-1f04-43bd-aa87-c31b73b1b61c,,,,, +UUID,f47b98a2-b879-4786-9f6b-11fc3234a91e,Blackmagic UltraStudio 4K,hardware,,Edit Suite 2,implementer +UUID,c5e504ca-b4d5-410f-b87b-4b7ed794e44d,AJA KONA 3,hardware,,,implementer +UUID,75a0b9ff-1f04-43bd-aa87-c31b73b1b61c,Mac Pro,hardware,,Edit Suite 2,implementer UUID,c5fc84fc-cc96-42a1-a5be-830b4e3012ae,,,,, UUID,be3060a8-6ccf-4339-97d5-a265687c3a5a,,,,, UUID,c93ee9a5-4c0c-4670-b857-8726bfd23cae,,,,, diff --git a/tapepremis.py b/tapepremis.py index 25b22f6..5852cda 100755 --- a/tapepremis.py +++ b/tapepremis.py @@ -35,12 +35,20 @@ def capture_description(premis, xml_info,capture_station): framemd5_uuid = str(uuid.uuid4()) manifest_uuid = str(uuid.uuid4()) ffmpegAgent = make_agent(premis,[transcode_uuid] , 'ee83e19e-cdb1-4d83-91fb-7faf7eff738e') - m2000pAgent = make_agent(premis,[transcode_uuid] , '60ae3a85-b595-45e0-8e4a-b95e90a6c422') - - capture_agents = [ffmpegAgent, m2000pAgent] - make_event(premis, 'creation', 'tape capture', capture_agents, transcode_uuid,xml_info[4], 'outcome', 'now-placeholder') + j30sdiAgent = make_agent(premis,[capture_uuid] , 'e2ca7ad2-8edf-4e4e-a3c7-36e970c796c9') + bm4kAgent = make_agent(premis,[capture_uuid] , 'f47b98a2-b879-4786-9f6b-11fc3234a91e') + edit_suite2_macAgent = make_agent(premis,[capture_uuid] , '75a0b9ff-1f04-43bd-aa87-c31b73b1b61c') + m2000pAgent = make_agent(premis,[capture_uuid] , '60ae3a85-b595-45e0-8e4a-b95e90a6c422') + elcapitanAgent = make_agent(premis,[capture_uuid] , '68f56ede-a1cf-48aa-b1d8-dc9850d5bfcc') + print capture_station + if capture_station == 'es2': + capture_agents = [j30sdiAgent, bm4kAgent, edit_suite2_macAgent, elcapitanAgent] + if capture_station == 'loopline': + capture_agents = [j30sdiAgent, bm4kAgent, edit_suite2_macAgent, elcapitanAgent] + make_event(premis, 'creation', 'tape capture', capture_agents, capture_uuid,xml_info[4], 'outcome', 'now-placeholder') make_event(premis, 'compression', 'transcode to ffv1 (figure out wording later)', capture_agents, transcode_uuid,xml_info[4], 'outcome', 'now-placeholder') - make_event(premis, 'fixity check', 'lossless verification via framemd5 (figure out wording later)', capture_agents, transcode_uuid,xml_info[4], 'outcome', 'now-placeholder') + make_event(premis, 'fixity check', 'lossless verification via framemd5 (figure out wording later)', capture_agents, framemd5_uuid,xml_info[4], 'source', 'now-placeholder') + make_event(premis, 'message digest calculation', 'whole file checksum manifest of SIP', capture_agents, manifest_uuid,xml_info[4], 'source', 'now-placeholder') def get_checksum(manifest): @@ -68,24 +76,25 @@ def get_capture_workstation(mediaxml): print 'this was probably Control Room?' capture_station = 'ingest1' print 'Does this sound ok? Y/N?' + print capture_station station_confirm = '' while station_confirm not in ('Y','y','N','n'): station_confirm = raw_input() if station_confirm not in ('Y','y','N','n'): print 'Incorrect input. Please enter Y or N' - - capture_station = '' - if not capture_station == '1' or capture_station == '2' or capture_station == '3': - capture_station = raw_input('\n\n**** Where was tape captured?\nPress 1, 2 or 3\n\n1. es2\n2. loopline\n3. ingest 1\n' ) - while capture_station not in ('1','2','3'): + elif station_confirm not in ('Y','y'): + capture_station = '' + if not capture_station == '1' or capture_station == '2' or capture_station == '3': capture_station = raw_input('\n\n**** Where was tape captured?\nPress 1, 2 or 3\n\n1. es2\n2. loopline\n3. ingest 1\n' ) - if capture_station == '1': - capture_station = 'es2' - elif capture_station == '2': - capture_station = 'loopline' - elif capture_station == '3': - capture_station = 'ingest1' - return capture_station + while capture_station not in ('1','2','3'): + capture_station = raw_input('\n\n**** Where was tape captured?\nPress 1, 2 or 3\n\n1. es2\n2. loopline\n3. ingest 1\n' ) + if capture_station == '1': + capture_station = 'es2' + elif capture_station == '2': + capture_station = 'loopline' + elif capture_station == '3': + capture_station = 'ingest1' + return capture_station def main(): From 88a80cf9edd1acc720ec8210bdba3bebee355ebb Mon Sep 17 00:00:00 2001 From: kieranjol Date: Wed, 8 Mar 2017 09:41:21 +0000 Subject: [PATCH 11/30] tapepremis - hacky method for getting source capture time --- tapepremis.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tapepremis.py b/tapepremis.py index 5852cda..9900cc0 100755 --- a/tapepremis.py +++ b/tapepremis.py @@ -59,7 +59,12 @@ def get_checksum(manifest): if md5[-5:].rsplit()[0] == '.mkv': return md5[:32] +def get_times(sourcexml): + mediaxml_object = ET.parse(sourcexml) + mxml = mediaxml_object.getroot() + capture_date = mxml.xpath('//File_Modified_Date_Local')[0].text #encoded date is probably better + print capture_date def get_capture_workstation(mediaxml): mediaxml_object = ET.parse(mediaxml) mxml = mediaxml_object.getroot() @@ -105,6 +110,9 @@ def main(): parent_dir = os.path.dirname(sip_dir) metadata_dir = os.path.join(parent_dir, 'metadata') ffv1_xml = os.path.join(metadata_dir, os.path.basename(sys.argv[1] + '_mediainfo.xml')) + # the replace here is a terrible hack. Sad! Fix! + source_xml = os.path.join(metadata_dir, os.path.basename(sys.argv[1].replace('.mkv', '.mov') + '_source_mediainfo.xml')) + get_times(source_xml) if os.path.isfile(ffv1_xml): capture_station = get_capture_workstation(ffv1_xml) else: From 6a8c69f3bfb1348f4873d22c8bfaa2e9b56ddad3 Mon Sep 17 00:00:00 2001 From: kieranjol Date: Fri, 10 Mar 2017 17:19:24 +0000 Subject: [PATCH 12/30] tapepremis/premis_agents - adds a few more agents --- premis_agents.csv | 10 +++++----- tapepremis.py | 30 +++++++++++++++++++++--------- 2 files changed, 26 insertions(+), 14 deletions(-) diff --git a/premis_agents.csv b/premis_agents.csv index 5a1f2b4..28e59db 100644 --- a/premis_agents.csv +++ b/premis_agents.csv @@ -11,7 +11,8 @@ UUID,b22baa5c-8160-427d-9e2f-b62a7263439d,debian,software,4.0r5,Operating system UUID,9486b779-907c-4cc4-802c-22e07dc1242f,osx,software,10.11.1,,implementer UUID,a3bc371f-11fa-4319-a656-1e53c2527552,osx,software,10.11.2,,implementer UUID,f2b79d56-dcd7-4f32-8d80-497851a8fd6b,osx,software,10.11.3,,implementer -UUID,192f61b1-8130-4236-a827-a194a20557fe,Windows 7,software,Service Pack 2,Operating system for 946e5d40-a07f-47d1-9637-def5cb7854ba,implementer +UUID,c5fc84fc-cc96-42a1-a5be-830b4e3012ae,osx,software,10.7.5,Mac OSX Lion,implementer +UUID,192f61b1-8130-4236-a827-a194a20557fe,Windows 7,software,Service Pack 2,,implementer UUID,9281e196-b691-4f03-a004-0ea2bb032a93,SteadyFrame Transcoder,software,,,implementer UUID,b342d3f7-d87e-4fe3-8da5-89e16a30b59e,Raelene Casey,person,,,implementer UUID,cb638899-7ade-4a18-9bba-b8b1a20132a7,Dean Kavanagh,person,,,implementer @@ -36,10 +37,9 @@ UUID,e2ca7ad2-8edf-4e4e-a3c7-36e970c796c9,Sony J30-SDI,hardware,,Sony Digital Be UUID,f47b98a2-b879-4786-9f6b-11fc3234a91e,Blackmagic UltraStudio 4K,hardware,,Edit Suite 2,implementer UUID,c5e504ca-b4d5-410f-b87b-4b7ed794e44d,AJA KONA 3,hardware,,,implementer UUID,75a0b9ff-1f04-43bd-aa87-c31b73b1b61c,Mac Pro,hardware,,Edit Suite 2,implementer -UUID,c5fc84fc-cc96-42a1-a5be-830b4e3012ae,,,,, -UUID,be3060a8-6ccf-4339-97d5-a265687c3a5a,,,,, -UUID,c93ee9a5-4c0c-4670-b857-8726bfd23cae,,,,, -UUID,5fd99e09-63d7-4e9f-8383-1902f727d2a5,,,,, +UUID,be3060a8-6ccf-4339-97d5-a265687c3a5a,Mac Pro,hardware,,Loopline edit suite,implementer +UUID,c93ee9a5-4c0c-4670-b857-8726bfd23cae,AJA KONA LHe Plus,hardware,00T59106,,implementer +UUID,5fd99e09-63d7-4e9f-8383-1902f727d2a5,Hewlett Packard Z420 Workstation,hardware,CZC4310HNZ,Ingest 1,implementer UUID,d13fae39-ac71-446e-88df-96c0d267b26c,,,,, UUID,8fac66ac-05de-478c-9d93-c89d46233197,,,,, UUID,005d4f57-69a4-40d9-9360-d45ff8e44f64,,,,, diff --git a/tapepremis.py b/tapepremis.py index 9900cc0..088f477 100755 --- a/tapepremis.py +++ b/tapepremis.py @@ -35,18 +35,29 @@ def capture_description(premis, xml_info,capture_station): framemd5_uuid = str(uuid.uuid4()) manifest_uuid = str(uuid.uuid4()) ffmpegAgent = make_agent(premis,[transcode_uuid] , 'ee83e19e-cdb1-4d83-91fb-7faf7eff738e') - j30sdiAgent = make_agent(premis,[capture_uuid] , 'e2ca7ad2-8edf-4e4e-a3c7-36e970c796c9') - bm4kAgent = make_agent(premis,[capture_uuid] , 'f47b98a2-b879-4786-9f6b-11fc3234a91e') - edit_suite2_macAgent = make_agent(premis,[capture_uuid] , '75a0b9ff-1f04-43bd-aa87-c31b73b1b61c') - m2000pAgent = make_agent(premis,[capture_uuid] , '60ae3a85-b595-45e0-8e4a-b95e90a6c422') - elcapitanAgent = make_agent(premis,[capture_uuid] , '68f56ede-a1cf-48aa-b1d8-dc9850d5bfcc') - print capture_station if capture_station == 'es2': + j30sdiAgent = make_agent(premis,[capture_uuid] , 'e2ca7ad2-8edf-4e4e-a3c7-36e970c796c9') + bm4kAgent = make_agent(premis,[capture_uuid] , 'f47b98a2-b879-4786-9f6b-11fc3234a91e') + edit_suite2_macAgent = make_agent(premis,[capture_uuid] , '75a0b9ff-1f04-43bd-aa87-c31b73b1b61c') + elcapitanAgent = make_agent(premis,[capture_uuid] , '68f56ede-a1cf-48aa-b1d8-dc9850d5bfcc') capture_agents = [j30sdiAgent, bm4kAgent, edit_suite2_macAgent, elcapitanAgent] - if capture_station == 'loopline': - capture_agents = [j30sdiAgent, bm4kAgent, edit_suite2_macAgent, elcapitanAgent] + elif capture_station == 'loopline': + m2000pAgent = make_agent(premis,[capture_uuid] , '60ae3a85-b595-45e0-8e4a-b95e90a6c422') + kona3Agent = make_agent(premis,[capture_uuid] , 'c5e504ca-b4d5-410f-b87b-4b7ed794e44d') + osxLionAgent = make_agent(premis,[capture_uuid] , 'c5fc84fc-cc96-42a1-a5be-830b4e3012ae') + looplineMacAgent = make_agent(premis,[capture_uuid] , 'be3060a8-6ccf-4339-97d5-a265687c3a5a') + capture_agents = [m2000pAgent, kona3Agent, looplineMacAgent, osxLionAgent] + elif capture_station == 'ingest1': + sony510pAgent = make_agent(premis,[capture_uuid] , 'dbdbb06b-ab10-49db-97a1-ff2ad285f9d2') + ingest1Agent = make_agent(premis,[capture_uuid] , '5fd99e09-63d7-4e9f-8383-1902f727d2a5') + windows7Agent = make_agent(premis,[capture_uuid] , '192f61b1-8130-4236-a827-a194a20557fe') + ingest1konaAgent = make_agent(premis,[capture_uuid] , 'c93ee9a5-4c0c-4670-b857-8726bfd23cae') + capture_agents = [sony510pAgent, ingest1konaAgent, ingest1Agent, windows7Agent] make_event(premis, 'creation', 'tape capture', capture_agents, capture_uuid,xml_info[4], 'outcome', 'now-placeholder') - make_event(premis, 'compression', 'transcode to ffv1 (figure out wording later)', capture_agents, transcode_uuid,xml_info[4], 'outcome', 'now-placeholder') + if capture_station == 'loopline': + make_event(premis, 'compression', 'transcode to ffv1 while specifying 4:3 DAR and Top Field First interlacement', capture_agents, transcode_uuid,xml_info[4], 'outcome', 'now-placeholder') + else: + make_event(premis, 'compression', 'transcode to ffv1 (figure out wording later)', capture_agents, transcode_uuid,xml_info[4], 'outcome', 'now-placeholder') make_event(premis, 'fixity check', 'lossless verification via framemd5 (figure out wording later)', capture_agents, framemd5_uuid,xml_info[4], 'source', 'now-placeholder') make_event(premis, 'message digest calculation', 'whole file checksum manifest of SIP', capture_agents, manifest_uuid,xml_info[4], 'source', 'now-placeholder') @@ -132,6 +143,7 @@ def main(): representation_uuid = str(uuid.uuid4()) # the final argument here is 'loopline' which tells premis.py to not generate a checksum xml_info = make_premis(source_file, items, premis, premis_namespace, premisxml,representation_uuid,md5) + print xml_info capture_description(premis, xml_info, capture_station) From 02c0d66a4c4dc1bd42574546b4077e3c140a0375 Mon Sep 17 00:00:00 2001 From: kieranjol Date: Fri, 10 Mar 2017 19:27:19 +0000 Subject: [PATCH 13/30] tapepremis - adds a very broken but promising representation --- tapepremis.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tapepremis.py b/tapepremis.py index 088f477..f93d7b9 100755 --- a/tapepremis.py +++ b/tapepremis.py @@ -115,7 +115,6 @@ def get_capture_workstation(mediaxml): def main(): premisxml, premis_namespace, doc, premis = setup_xml(sys.argv[1]) - source_file = sys.argv[1] sip_dir = os.path.dirname(source_file) parent_dir = os.path.dirname(sip_dir) @@ -140,9 +139,12 @@ def main(): sys.exit() md5 = get_checksum(manifest) items = {"workflow":"raw audio","oe":os.path.basename(source_file), "filmographic":'n/a', "sourceAccession":os.path.basename(source_file), "interventions":['placeholder'], "prepList":['placeholder'], "user":'Kieran O\' Leary'} + representation_uuid = str(uuid.uuid4()) # the final argument here is 'loopline' which tells premis.py to not generate a checksum xml_info = make_premis(source_file, items, premis, premis_namespace, premisxml,representation_uuid,md5) + linkinguuids = [xml_info[4][0],'n/a',os.path.basename(source_file)] + create_representation(premisxml, premis_namespace, doc, premis, items,linkinguuids, representation_uuid, 'no_sequence', 'n/a') print xml_info capture_description(premis, xml_info, capture_station) From cd8c093c9975bf4158b46e5db1687fc7ae1e004a Mon Sep 17 00:00:00 2001 From: kieranjol Date: Sat, 11 Mar 2017 11:30:16 +0000 Subject: [PATCH 14/30] tapepremis - pull capture and ffv1 times from mediainfo --- tapepremis.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/tapepremis.py b/tapepremis.py index f93d7b9..c3b6f13 100755 --- a/tapepremis.py +++ b/tapepremis.py @@ -21,7 +21,8 @@ from premis import create_intellectual_entity -def capture_description(premis, xml_info,capture_station): +def capture_description(premis, xml_info,capture_station, times): + print times ''' Events: 1. capture - glean from v210 mediainfo xml @@ -53,11 +54,11 @@ def capture_description(premis, xml_info,capture_station): windows7Agent = make_agent(premis,[capture_uuid] , '192f61b1-8130-4236-a827-a194a20557fe') ingest1konaAgent = make_agent(premis,[capture_uuid] , 'c93ee9a5-4c0c-4670-b857-8726bfd23cae') capture_agents = [sony510pAgent, ingest1konaAgent, ingest1Agent, windows7Agent] - make_event(premis, 'creation', 'tape capture', capture_agents, capture_uuid,xml_info[4], 'outcome', 'now-placeholder') + make_event(premis, 'creation', 'tape capture', capture_agents, capture_uuid,xml_info[4], 'outcome', times[0]) if capture_station == 'loopline': - make_event(premis, 'compression', 'transcode to ffv1 while specifying 4:3 DAR and Top Field First interlacement', capture_agents, transcode_uuid,xml_info[4], 'outcome', 'now-placeholder') + make_event(premis, 'compression', 'transcode to ffv1 while specifying 4:3 DAR and Top Field First interlacement', capture_agents, transcode_uuid,xml_info[4], 'outcome', times[1]) else: - make_event(premis, 'compression', 'transcode to ffv1 (figure out wording later)', capture_agents, transcode_uuid,xml_info[4], 'outcome', 'now-placeholder') + make_event(premis, 'compression', 'transcode to ffv1 (figure out wording later)', capture_agents, transcode_uuid,xml_info[4], 'outcome', times[1]) make_event(premis, 'fixity check', 'lossless verification via framemd5 (figure out wording later)', capture_agents, framemd5_uuid,xml_info[4], 'source', 'now-placeholder') make_event(premis, 'message digest calculation', 'whole file checksum manifest of SIP', capture_agents, manifest_uuid,xml_info[4], 'source', 'now-placeholder') @@ -75,7 +76,7 @@ def get_times(sourcexml): mediaxml_object = ET.parse(sourcexml) mxml = mediaxml_object.getroot() capture_date = mxml.xpath('//File_Modified_Date_Local')[0].text #encoded date is probably better - print capture_date + return capture_date def get_capture_workstation(mediaxml): mediaxml_object = ET.parse(mediaxml) mxml = mediaxml_object.getroot() @@ -122,7 +123,9 @@ def main(): ffv1_xml = os.path.join(metadata_dir, os.path.basename(sys.argv[1] + '_mediainfo.xml')) # the replace here is a terrible hack. Sad! Fix! source_xml = os.path.join(metadata_dir, os.path.basename(sys.argv[1].replace('.mkv', '.mov') + '_source_mediainfo.xml')) - get_times(source_xml) + capture_time = get_times(source_xml) + transcode_time = get_times(ffv1_xml) + times = [capture_time, transcode_time] if os.path.isfile(ffv1_xml): capture_station = get_capture_workstation(ffv1_xml) else: @@ -146,7 +149,7 @@ def main(): linkinguuids = [xml_info[4][0],'n/a',os.path.basename(source_file)] create_representation(premisxml, premis_namespace, doc, premis, items,linkinguuids, representation_uuid, 'no_sequence', 'n/a') print xml_info - capture_description(premis, xml_info, capture_station) + capture_description(premis, xml_info, capture_station, times) From 348b22d99fcb77049ecd787c9083959d0cd96d48 Mon Sep 17 00:00:00 2001 From: kieranjol Date: Sat, 11 Mar 2017 11:47:01 +0000 Subject: [PATCH 15/30] tapepremis - remove redundant modules and some housekeeping --- tapepremis.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/tapepremis.py b/tapepremis.py index c3b6f13..8bc0398 100755 --- a/tapepremis.py +++ b/tapepremis.py @@ -1,16 +1,11 @@ #!/usr/bin/env python import subprocess -import argparse import sys import os -import hashlib import shutil -import uuid import time import uuid -from glob import glob import lxml.etree as ET -from ififuncs import hashlib_manifest from ififuncs import get_date_modified from premis import make_premis from premis import write_premis @@ -22,7 +17,6 @@ def capture_description(premis, xml_info,capture_station, times): - print times ''' Events: 1. capture - glean from v210 mediainfo xml @@ -49,10 +43,10 @@ def capture_description(premis, xml_info,capture_station, times): looplineMacAgent = make_agent(premis,[capture_uuid] , 'be3060a8-6ccf-4339-97d5-a265687c3a5a') capture_agents = [m2000pAgent, kona3Agent, looplineMacAgent, osxLionAgent] elif capture_station == 'ingest1': - sony510pAgent = make_agent(premis,[capture_uuid] , 'dbdbb06b-ab10-49db-97a1-ff2ad285f9d2') + sony510pAgent = make_agent(premis,[capture_uuid] , 'dbdbb06b-ab10-49db-97a1-ff2ad285f9d2') ingest1Agent = make_agent(premis,[capture_uuid] , '5fd99e09-63d7-4e9f-8383-1902f727d2a5') windows7Agent = make_agent(premis,[capture_uuid] , '192f61b1-8130-4236-a827-a194a20557fe') - ingest1konaAgent = make_agent(premis,[capture_uuid] , 'c93ee9a5-4c0c-4670-b857-8726bfd23cae') + ingest1konaAgent = make_agent(premis,[capture_uuid] , 'c93ee9a5-4c0c-4670-b857-8726bfd23cae') capture_agents = [sony510pAgent, ingest1konaAgent, ingest1Agent, windows7Agent] make_event(premis, 'creation', 'tape capture', capture_agents, capture_uuid,xml_info[4], 'outcome', times[0]) if capture_station == 'loopline': From d702362887a5c2ac793af6fc6edc65a673228266 Mon Sep 17 00:00:00 2001 From: kieranjol Date: Sat, 11 Mar 2017 21:59:09 +0000 Subject: [PATCH 16/30] tapepremis - tries to be more PEP 8 compliant --- tapepremis.py | 217 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 145 insertions(+), 72 deletions(-) diff --git a/tapepremis.py b/tapepremis.py index 8bc0398..0ec29f1 100755 --- a/tapepremis.py +++ b/tapepremis.py @@ -1,9 +1,6 @@ #!/usr/bin/env python -import subprocess import sys import os -import shutil -import time import uuid import lxml.etree as ET from ififuncs import get_date_modified @@ -13,10 +10,9 @@ from premis import make_event from premis import setup_xml from premis import create_representation -from premis import create_intellectual_entity -def capture_description(premis, xml_info,capture_station, times): +def capture_description(premis, xml_info, capture_station, times): ''' Events: 1. capture - glean from v210 mediainfo xml @@ -25,38 +21,87 @@ def capture_description(premis, xml_info,capture_station, times): 4. whole file manifest - sip log that's it? ''' - capture_uuid = str(uuid.uuid4()) - transcode_uuid = str(uuid.uuid4()) - framemd5_uuid = str(uuid.uuid4()) - manifest_uuid = str(uuid.uuid4()) - ffmpegAgent = make_agent(premis,[transcode_uuid] , 'ee83e19e-cdb1-4d83-91fb-7faf7eff738e') + capture_uuid = str(uuid.uuid4()) + transcode_uuid = str(uuid.uuid4()) + framemd5_uuid = str(uuid.uuid4()) + manifest_uuid = str(uuid.uuid4()) if capture_station == 'es2': - j30sdiAgent = make_agent(premis,[capture_uuid] , 'e2ca7ad2-8edf-4e4e-a3c7-36e970c796c9') - bm4kAgent = make_agent(premis,[capture_uuid] , 'f47b98a2-b879-4786-9f6b-11fc3234a91e') - edit_suite2_macAgent = make_agent(premis,[capture_uuid] , '75a0b9ff-1f04-43bd-aa87-c31b73b1b61c') - elcapitanAgent = make_agent(premis,[capture_uuid] , '68f56ede-a1cf-48aa-b1d8-dc9850d5bfcc') - capture_agents = [j30sdiAgent, bm4kAgent, edit_suite2_macAgent, elcapitanAgent] + j30sdi_agent = make_agent( + premis, [capture_uuid], 'e2ca7ad2-8edf-4e4e-a3c7-36e970c796c9' + ) + bm4k_agent = make_agent( + premis, [capture_uuid], 'f47b98a2-b879-4786-9f6b-11fc3234a91e' + ) + edit_suite2_mac_agent = make_agent( + premis, [capture_uuid], '75a0b9ff-1f04-43bd-aa87-c31b73b1b61c' + ) + elcapitan_agent = make_agent( + premis, [capture_uuid], '68f56ede-a1cf-48aa-b1d8-dc9850d5bfcc' + ) + capture_agents = [ + j30sdi_agent, bm4k_agent, edit_suite2_mac_agent, elcapitan_agent + ] elif capture_station == 'loopline': - m2000pAgent = make_agent(premis,[capture_uuid] , '60ae3a85-b595-45e0-8e4a-b95e90a6c422') - kona3Agent = make_agent(premis,[capture_uuid] , 'c5e504ca-b4d5-410f-b87b-4b7ed794e44d') - osxLionAgent = make_agent(premis,[capture_uuid] , 'c5fc84fc-cc96-42a1-a5be-830b4e3012ae') - looplineMacAgent = make_agent(premis,[capture_uuid] , 'be3060a8-6ccf-4339-97d5-a265687c3a5a') - capture_agents = [m2000pAgent, kona3Agent, looplineMacAgent, osxLionAgent] + m2000p_agent = make_agent( + premis, [capture_uuid], '60ae3a85-b595-45e0-8e4a-b95e90a6c422' + ) + kona3_agent = make_agent( + premis, [capture_uuid], 'c5e504ca-b4d5-410f-b87b-4b7ed794e44d' + ) + osx_lion_agent = make_agent( + premis, [capture_uuid], 'c5fc84fc-cc96-42a1-a5be-830b4e3012ae' + ) + loopline_mac_agent = make_agent( + premis, [capture_uuid], 'be3060a8-6ccf-4339-97d5-a265687c3a5a' + ) + capture_agents = [ + m2000p_agent, kona3_agent, loopline_mac_agent, osx_lion_agent + ] elif capture_station == 'ingest1': - sony510pAgent = make_agent(premis,[capture_uuid] , 'dbdbb06b-ab10-49db-97a1-ff2ad285f9d2') - ingest1Agent = make_agent(premis,[capture_uuid] , '5fd99e09-63d7-4e9f-8383-1902f727d2a5') - windows7Agent = make_agent(premis,[capture_uuid] , '192f61b1-8130-4236-a827-a194a20557fe') - ingest1konaAgent = make_agent(premis,[capture_uuid] , 'c93ee9a5-4c0c-4670-b857-8726bfd23cae') - capture_agents = [sony510pAgent, ingest1konaAgent, ingest1Agent, windows7Agent] - make_event(premis, 'creation', 'tape capture', capture_agents, capture_uuid,xml_info[4], 'outcome', times[0]) + sony510p_agent = make_agent( + premis, [capture_uuid], 'dbdbb06b-ab10-49db-97a1-ff2ad285f9d2' + ) + ingest1_agent = make_agent( + premis, [capture_uuid], '5fd99e09-63d7-4e9f-8383-1902f727d2a5' + ) + windows7_agent = make_agent( + premis, [capture_uuid], '192f61b1-8130-4236-a827-a194a20557fe' + ) + ingest1kona_agent = make_agent( + premis, [capture_uuid], 'c93ee9a5-4c0c-4670-b857-8726bfd23cae' + ) + capture_agents = [ + sony510p_agent, ingest1kona_agent, ingest1_agent, windows7_agent + ] + make_event( + premis, 'creation', 'tape capture', + capture_agents, capture_uuid, xml_info[4], 'outcome', times[0] + ) if capture_station == 'loopline': - make_event(premis, 'compression', 'transcode to ffv1 while specifying 4:3 DAR and Top Field First interlacement', capture_agents, transcode_uuid,xml_info[4], 'outcome', times[1]) + make_event( + premis, 'compression', + 'transcode to ffv1 while specifying 4:3 DAR' + ' and Top Field First interlacement', + capture_agents, transcode_uuid, xml_info[4], 'outcome', times[1] + ) else: - make_event(premis, 'compression', 'transcode to ffv1 (figure out wording later)', capture_agents, transcode_uuid,xml_info[4], 'outcome', times[1]) - make_event(premis, 'fixity check', 'lossless verification via framemd5 (figure out wording later)', capture_agents, framemd5_uuid,xml_info[4], 'source', 'now-placeholder') - make_event(premis, 'message digest calculation', 'whole file checksum manifest of SIP', capture_agents, manifest_uuid,xml_info[4], 'source', 'now-placeholder') - - + make_event( + premis, 'compression', + 'transcode to ffv1 (figure out wording later)', + capture_agents, transcode_uuid, xml_info[4], 'outcome', times[1] + ) + make_event( + premis, 'fixity check', + 'lossless verification via framemd5 (figure out wording later)', + capture_agents, framemd5_uuid, xml_info[4], 'source', 'now-placeholder' + ) + make_event( + premis, 'message digest calculation', + 'whole file checksum manifest of SIP', capture_agents, + manifest_uuid, xml_info[4], 'source', 'now-placeholder' + ) + + def get_checksum(manifest): if os.path.isfile(manifest): with open(manifest, 'r') as fo: @@ -65,46 +110,55 @@ def get_checksum(manifest): if md5[-5:].rsplit()[0] == '.mkv': return md5[:32] -def get_times(sourcexml): - mediaxml_object = ET.parse(sourcexml) - mxml = mediaxml_object.getroot() - capture_date = mxml.xpath('//File_Modified_Date_Local')[0].text #encoded date is probably better +def get_times(sourcexml): + mediaxml_object = ET.parse(sourcexml) + mxml = mediaxml_object.getroot() + # encoded date is probably better + capture_date = mxml.xpath('//File_Modified_Date_Local')[0].text return capture_date + + def get_capture_workstation(mediaxml): - mediaxml_object = ET.parse(mediaxml) - mxml = mediaxml_object.getroot() - mediaExpress_check = len(mxml.xpath('//COMAPPLEPROAPPSLOGNOTE')) - fcp7_check = len(mxml.xpath('//COMAPPLEFINALCUTSTUDIOMEDIAUUID')) - if mediaExpress_check > 0: + mediaxml_object = ET.parse(mediaxml) + mxml = mediaxml_object.getroot() + mediaexpress_check = len(mxml.xpath('//COMAPPLEPROAPPSLOGNOTE')) + fcp7_check = len(mxml.xpath('//COMAPPLEFINALCUTSTUDIOMEDIAUUID')) + if mediaexpress_check > 0: print 'this was probably Media Express?' capture_station = 'es2' elif fcp7_check > 0: print 'this was probably FCP7?' capture_station = 'loopline' else: - # i can't find any meaningful distinctive metadata that control room writes. + # i can't find any distinctive metadata that control room writes. print 'this was probably Control Room?' capture_station = 'ingest1' print 'Does this sound ok? Y/N?' print capture_station - station_confirm = '' - while station_confirm not in ('Y','y','N','n'): - station_confirm = raw_input() - if station_confirm not in ('Y','y','N','n'): - print 'Incorrect input. Please enter Y or N' - elif station_confirm not in ('Y','y'): - capture_station = '' - if not capture_station == '1' or capture_station == '2' or capture_station == '3': - capture_station = raw_input('\n\n**** Where was tape captured?\nPress 1, 2 or 3\n\n1. es2\n2. loopline\n3. ingest 1\n' ) - while capture_station not in ('1','2','3'): - capture_station = raw_input('\n\n**** Where was tape captured?\nPress 1, 2 or 3\n\n1. es2\n2. loopline\n3. ingest 1\n' ) - if capture_station == '1': - capture_station = 'es2' - elif capture_station == '2': - capture_station = 'loopline' - elif capture_station == '3': - capture_station = 'ingest1' + station_confirm = '' + while station_confirm not in ('Y', 'y', 'N', 'n'): + station_confirm = raw_input() + if station_confirm not in ('Y', 'y', 'N', 'n'): + print 'Incorrect input. Please enter Y or N' + elif station_confirm not in ('Y', 'y'): + capture_station = '' + if capture_station not in range(1, 4): + capture_station = raw_input( + '\n\n**** Where was tape captured?\n' + 'Press 1, 2 or 3\n\n1. es2\n2. loopline\n3. ingest 1\n' + ) + while capture_station not in range(1, 4): + capture_station = raw_input( + '\n\n**** Where was tape captured?\n' + 'Press 1, 2 or 3\n\n1. es2\n2. loopline\n3. ingest 1\n' + ) + if capture_station == '1': + capture_station = 'es2' + elif capture_station == '2': + capture_station = 'loopline' + elif capture_station == '3': + capture_station = 'ingest1' return capture_station @@ -114,17 +168,26 @@ def main(): sip_dir = os.path.dirname(source_file) parent_dir = os.path.dirname(sip_dir) metadata_dir = os.path.join(parent_dir, 'metadata') - ffv1_xml = os.path.join(metadata_dir, os.path.basename(sys.argv[1] + '_mediainfo.xml')) + ffv1_xml = os.path.join( + metadata_dir, os.path.basename( + sys.argv[1] + + '_mediainfo.xml' + ) + ) # the replace here is a terrible hack. Sad! Fix! - source_xml = os.path.join(metadata_dir, os.path.basename(sys.argv[1].replace('.mkv', '.mov') + '_source_mediainfo.xml')) + source_xml = os.path.join( + metadata_dir, + os.path.basename( + sys.argv[1].replace('.mkv', '.mov') + + '_source_mediainfo.xml')) capture_time = get_times(source_xml) transcode_time = get_times(ffv1_xml) times = [capture_time, transcode_time] if os.path.isfile(ffv1_xml): capture_station = get_capture_workstation(ffv1_xml) else: - print('Can\'t find XML of FFv1 file. Exiting!') - sys.exit() + print 'Can\'t find XML of FFv1 file. Exiting!' + sys.exit() ''' /home/kieranjol/ifigit/ifiscripts/massive/objects sip /home/kieranjol/ifigit/ifiscripts/massive parent @@ -135,18 +198,28 @@ def main(): print 'no manifest found' sys.exit() md5 = get_checksum(manifest) - items = {"workflow":"raw audio","oe":os.path.basename(source_file), "filmographic":'n/a', "sourceAccession":os.path.basename(source_file), "interventions":['placeholder'], "prepList":['placeholder'], "user":'Kieran O\' Leary'} - + items = { + "workflow":"raw audio", + "oe":os.path.basename(source_file), + "filmographic":'n/a', + "sourceAccession":os.path.basename(source_file), + "interventions":['placeholder'], + "prepList":['placeholder'], + "user":'Kieran O\' Leary' + } representation_uuid = str(uuid.uuid4()) - # the final argument here is 'loopline' which tells premis.py to not generate a checksum - xml_info = make_premis(source_file, items, premis, premis_namespace, premisxml,representation_uuid,md5) - linkinguuids = [xml_info[4][0],'n/a',os.path.basename(source_file)] - create_representation(premisxml, premis_namespace, doc, premis, items,linkinguuids, representation_uuid, 'no_sequence', 'n/a') + # looks like loopline isn't the keyword any longer. it's len = 32? + xml_info = make_premis( + source_file, items, premis, + premis_namespace, premisxml, representation_uuid, md5 + ) + linkinguuids = [xml_info[4][0], 'n/a', os.path.basename(source_file)] + create_representation( + premisxml, premis_namespace, doc, premis, + items, linkinguuids, representation_uuid, 'no_sequence', 'n/a' + ) print xml_info capture_description(premis, xml_info, capture_station, times) - - - write_premis(doc, premisxml) if __name__ == '__main__': From fd2d9d1dde2eb2591653b9a30a77d86ecca294e7 Mon Sep 17 00:00:00 2001 From: kieranjol Date: Sat, 11 Mar 2017 22:25:10 +0000 Subject: [PATCH 17/30] tapepremis - fixes capture_station interview --- tapepremis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tapepremis.py b/tapepremis.py index 0ec29f1..69ebcc6 100755 --- a/tapepremis.py +++ b/tapepremis.py @@ -148,7 +148,7 @@ def get_capture_workstation(mediaxml): '\n\n**** Where was tape captured?\n' 'Press 1, 2 or 3\n\n1. es2\n2. loopline\n3. ingest 1\n' ) - while capture_station not in range(1, 4): + while int(capture_station) not in range(1, 4): capture_station = raw_input( '\n\n**** Where was tape captured?\n' 'Press 1, 2 or 3\n\n1. es2\n2. loopline\n3. ingest 1\n' From 0b6714828f8f94255db4280ef6ec51781a5f6a61 Mon Sep 17 00:00:00 2001 From: kieranjol Date: Sun, 12 Mar 2017 19:39:17 +0000 Subject: [PATCH 18/30] tapepremis - implements agents/events refactored - needs cleanup --- tapepremis.py | 285 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 232 insertions(+), 53 deletions(-) diff --git a/tapepremis.py b/tapepremis.py index 69ebcc6..dae0a60 100755 --- a/tapepremis.py +++ b/tapepremis.py @@ -2,17 +2,57 @@ import sys import os import uuid +import csv import lxml.etree as ET from ififuncs import get_date_modified from premis import make_premis -from premis import write_premis from premis import make_agent -from premis import make_event +from premis import write_premis from premis import setup_xml from premis import create_representation +from premis import create_unit +def make_event(premis,event_type, event_detail, agentlist, eventID, eventLinkingObjectIdentifier, eventLinkingObjectRole, event_time): + premis_namespace = "http://www.loc.gov/premis/v3" + event = ET.SubElement(premis, "{%s}event" % (premis_namespace)) + premis.insert(-1,event) + event_Identifier = create_unit(1,event,'eventIdentifier') + event_id_type = ET.Element("{%s}eventIdentifierType" % (premis_namespace)) + event_Identifier.insert(0,event_id_type) + event_id_value = ET.Element("{%s}eventIdentifierValue" % (premis_namespace)) + event_Identifier.insert(0,event_id_value) + event_Type = ET.Element("{%s}eventType" % (premis_namespace)) + event.insert(2,event_Type) + event_DateTime = ET.Element("{%s}eventDateTime" % (premis_namespace)) + event.insert(3,event_DateTime) + if event_time == 'now': + event_DateTime.text = time.strftime("%Y-%m-%dT%H:%M:%S") + else: + event_DateTime.text = event_time + event_Type.text = event_type + event_id_value.text = eventID + event_id_type.text = 'UUID' + eventDetailInformation = create_unit(4,event,'eventDetailInformation') + eventDetail = create_unit(0,eventDetailInformation,'eventDetail') + eventDetail.text = event_detail + for i in eventLinkingObjectIdentifier: + linkingObjectIdentifier = create_unit(5,event,'linkingObjectIdentifier') + linkingObjectIdentifierType = create_unit(0,linkingObjectIdentifier,'linkingObjectIdentifierType') + linkingObjectIdentifierValue = create_unit(1,linkingObjectIdentifier,'linkingObjectIdentifierValue') + linkingObjectIdentifierValue.text = i + linkingObjectRole = create_unit(2,linkingObjectIdentifier,'linkingObjectRole') + linkingObjectIdentifierType.text = 'UUID' + linkingObjectRole.text = eventLinkingObjectRole + for i in agentlist: + linkingAgentIdentifier = create_unit(-1,event,'linkingAgentIdentifier') + linkingAgentIdentifierType = create_unit(0,linkingAgentIdentifier,'linkingAgentIdentifierType') + linkingAgentIdentifierValue = create_unit(1,linkingAgentIdentifier,'linkingAgentIdentifierValue') + linkingAgentIdentifierRole = create_unit(2,linkingAgentIdentifier,'linkingAgentRole') + linkingAgentIdentifierRole.text = 'implementer' + linkingAgentIdentifierType.text = 'UUID' + linkingAgentIdentifierValue.text = i -def capture_description(premis, xml_info, capture_station, times): +def capture_description(premis, xml_info, capture_station, times, total_agents): ''' Events: 1. capture - glean from v210 mediainfo xml @@ -22,54 +62,29 @@ def capture_description(premis, xml_info, capture_station, times): that's it? ''' capture_uuid = str(uuid.uuid4()) - transcode_uuid = str(uuid.uuid4()) - framemd5_uuid = str(uuid.uuid4()) - manifest_uuid = str(uuid.uuid4()) + capture_dict = {} if capture_station == 'es2': - j30sdi_agent = make_agent( - premis, [capture_uuid], 'e2ca7ad2-8edf-4e4e-a3c7-36e970c796c9' - ) - bm4k_agent = make_agent( - premis, [capture_uuid], 'f47b98a2-b879-4786-9f6b-11fc3234a91e' - ) - edit_suite2_mac_agent = make_agent( - premis, [capture_uuid], '75a0b9ff-1f04-43bd-aa87-c31b73b1b61c' - ) - elcapitan_agent = make_agent( - premis, [capture_uuid], '68f56ede-a1cf-48aa-b1d8-dc9850d5bfcc' - ) + j30sdi_agent = 'e2ca7ad2-8edf-4e4e-a3c7-36e970c796c9' + bm4k_agent = 'f47b98a2-b879-4786-9f6b-11fc3234a91e' + edit_suite2_mac_agent = '75a0b9ff-1f04-43bd-aa87-c31b73b1b61c' + elcapitan_agent = '68f56ede-a1cf-48aa-b1d8-dc9850d5bfcc' capture_agents = [ j30sdi_agent, bm4k_agent, edit_suite2_mac_agent, elcapitan_agent ] elif capture_station == 'loopline': - m2000p_agent = make_agent( - premis, [capture_uuid], '60ae3a85-b595-45e0-8e4a-b95e90a6c422' - ) - kona3_agent = make_agent( - premis, [capture_uuid], 'c5e504ca-b4d5-410f-b87b-4b7ed794e44d' - ) - osx_lion_agent = make_agent( - premis, [capture_uuid], 'c5fc84fc-cc96-42a1-a5be-830b4e3012ae' - ) - loopline_mac_agent = make_agent( - premis, [capture_uuid], 'be3060a8-6ccf-4339-97d5-a265687c3a5a' - ) + m2000p_agent = '60ae3a85-b595-45e0-8e4a-b95e90a6c422' + kona3_agent = 'c5e504ca-b4d5-410f-b87b-4b7ed794e44d' + loopline_mac_agent = 'be3060a8-6ccf-4339-97d5-a265687c3a5a' + osx_lion_agent = 'c5fc84fc-cc96-42a1-a5be-830b4e3012ae' capture_agents = [ m2000p_agent, kona3_agent, loopline_mac_agent, osx_lion_agent ] + elif capture_station == 'ingest1': - sony510p_agent = make_agent( - premis, [capture_uuid], 'dbdbb06b-ab10-49db-97a1-ff2ad285f9d2' - ) - ingest1_agent = make_agent( - premis, [capture_uuid], '5fd99e09-63d7-4e9f-8383-1902f727d2a5' - ) - windows7_agent = make_agent( - premis, [capture_uuid], '192f61b1-8130-4236-a827-a194a20557fe' - ) - ingest1kona_agent = make_agent( - premis, [capture_uuid], 'c93ee9a5-4c0c-4670-b857-8726bfd23cae' - ) + sony510p_agent = 'dbdbb06b-ab10-49db-97a1-ff2ad285f9d2' + ingest1_agent = '5fd99e09-63d7-4e9f-8383-1902f727d2a5' + windows7_agent = '192f61b1-8130-4236-a827-a194a20557fe' + ingest1kona_agent = 'c93ee9a5-4c0c-4670-b857-8726bfd23cae' capture_agents = [ sony510p_agent, ingest1kona_agent, ingest1_agent, windows7_agent ] @@ -77,29 +92,105 @@ def capture_description(premis, xml_info, capture_station, times): premis, 'creation', 'tape capture', capture_agents, capture_uuid, xml_info[4], 'outcome', times[0] ) - if capture_station == 'loopline': + event_dict = {} + for agent in capture_agents: + # Just the UUID is returned. + event_dict[agent] = [capture_uuid] + print capture_uuid, 'capture' + print event_dict, 0 + return event_dict + +def ffv1_description(premis, xml_info, capture_station, times, event_dict): + transcode_uuid = str(uuid.uuid4()) + framemd5_uuid = str(uuid.uuid4()) + manifest_uuid = str(uuid.uuid4()) + if capture_station == 'es2': + edit_suite2_mac_agent = '75a0b9ff-1f04-43bd-aa87-c31b73b1b61c' + elcapitan_agent = '68f56ede-a1cf-48aa-b1d8-dc9850d5bfcc' + ffv1_agents = [ + edit_suite2_mac_agent, elcapitan_agent + ] make_event( premis, 'compression', - 'transcode to ffv1 while specifying 4:3 DAR' - ' and Top Field First interlacement', - capture_agents, transcode_uuid, xml_info[4], 'outcome', times[1] + 'transcode to FFV1/Matroska (figure out wording later)', + ffv1_agents, transcode_uuid, xml_info[4], 'outcome', times[1] ) - else: + + elif capture_station == 'ingest1': + ingest1_agent = '5fd99e09-63d7-4e9f-8383-1902f727d2a5' + windows7_agent = '192f61b1-8130-4236-a827-a194a20557fe' + ffv1_agents = [ + ingest1_agent, windows7_agent + ] make_event( premis, 'compression', - 'transcode to ffv1 (figure out wording later)', - capture_agents, transcode_uuid, xml_info[4], 'outcome', times[1] + 'transcode to FFV1/Matroska (figure out wording later)', + ffv1_agents, transcode_uuid, xml_info[4], 'outcome', times[1] ) + elif capture_station == 'loopline': + osx_lion_agent = 'c5fc84fc-cc96-42a1-a5be-830b4e3012ae' + loopline_mac_agent = 'be3060a8-6ccf-4339-97d5-a265687c3a5a' + ffv1_agents = [ + osx_lion_agent, loopline_mac_agent + ] + make_event( + premis, 'compression', + 'transcode to FFV1/Matroska while specifying 4:3 DAR ' + 'and Top Field First interlacement', + ffv1_agents, transcode_uuid, xml_info[4], 'outcome', times[1] + ) make_event( premis, 'fixity check', 'lossless verification via framemd5 (figure out wording later)', - capture_agents, framemd5_uuid, xml_info[4], 'source', 'now-placeholder' + ffv1_agents, framemd5_uuid, xml_info[4], 'source', 'now-placeholder' ) make_event( premis, 'message digest calculation', - 'whole file checksum manifest of SIP', capture_agents, + 'whole file checksum manifest of SIP', ffv1_agents, manifest_uuid, xml_info[4], 'source', 'now-placeholder' ) + for agent in ffv1_agents: + # Just the UUID is returned. + event_dict[agent] += [transcode_uuid] + event_dict[agent] += [framemd5_uuid] + event_dict[agent] += [manifest_uuid] + + print event_dict + for agent in event_dict: + make_agent( + premis, event_dict[agent],agent + ) +def ingest1_description(premis): + # this really just lists all the permanent agents at this workstation + ingest1_agent = make_agent( + premis, '5fd99e09-63d7-4e9f-8383-1902f727d2a5', 'not-write' + ) + windows7_agent = make_agent( + premis, '192f61b1-8130-4236-a827-a194a20557fe', 'not-write' + ) + return ingest1_agent, windows7_agent + + +def es2_description(premis): + # this really just lists all the permanent agents at this workstation + edit_suite2_mac_agent = make_agent( + premis, '75a0b9ff-1f04-43bd-aa87-c31b73b1b61c', 'not-write' + ) + elcapitan_agent = make_agent( + premis, '68f56ede-a1cf-48aa-b1d8-dc9850d5bfcc', 'not-write' + ) + return edit_suite_mac_agent, elcapitan_agent + + +def loopline_description(premis): + osx_lion_agent = make_agent( + premis, 'c5fc84fc-cc96-42a1-a5be-830b4e3012ae', 'not-write' + ) + loopline_mac_agent = make_agent( + premis, 'be3060a8-6ccf-4339-97d5-a265687c3a5a', 'not-write' + ) + return osx_lion_agent, loopline_mac_agent + def get_checksum(manifest): @@ -118,7 +209,67 @@ def get_times(sourcexml): capture_date = mxml.xpath('//File_Modified_Date_Local')[0].text return capture_date +''' +def make_agent(premis, agentId, write): + # write argument will either return agent info without writing xml + csv_file = os.path.expanduser("~/ifigit/ifiscripts/premis_agents.csv") + if os.path.isfile(csv_file): + read_object = open(csv_file) + reader = csv.reader(read_object) + csv_list = list(reader) + read_object.close() + for lists in csv_list: + for item in lists: + if item == agentId: + agent_info = lists + ( + agentIdType_value, agentIdValue_value, + agentName_value, agentType_value, agentVersion_value, + agentNote_value,agentRole + ) = agent_info + if agentVersion_value == 'ffmpeg_autoextract': + agentVersion_value = subprocess.check_output( + ['ffmpeg','-version','-v','0'] + ).splitlines()[0] + if write == 'write': + premis_namespace = "http://www.loc.gov/premis/v3" + agent = ET.SubElement( + premis, "{%s}agent" % (premis_namespace) + ) + premis.insert(-1, agent) + agentIdentifier = create_unit( + 1,agent,'agentIdentifier' + ) + agentIdType = create_unit( + 2,agentIdentifier,'agentIdentifierType' + ) + agentIdValue = create_unit( + 2,agentIdentifier,'agentIdentifierValue' + ) + agentName = create_unit(2,agent,'agentName') + agentName.text = agentName_value + if not agentNote_value == '': + agentNote = create_unit( + 5,agent,'agentNote' + ) + agentNote.text = agentNote_value + agentType = create_unit( + 3,agent,'agentType' + ) + if not agentVersion_value == '': + agentVersion = create_unit( + 4,agent,'agentVersion' + ) + agentVersion.text = agentVersion_value + agentIdType.text = agentIdType_value + agentIdValue.text = agentIdValue_value + agentType.text = agentType_value + agent_info = [agentIdType_value,agentIdValue_value] + return agent_info + else: + return agent_info +''' def get_capture_workstation(mediaxml): mediaxml_object = ET.parse(mediaxml) mxml = mediaxml_object.getroot() @@ -162,7 +313,30 @@ def get_capture_workstation(mediaxml): return capture_station + +def get_user(question): + user = '' + if not user == '1' or user == '2' or user =='3': + user = raw_input( + '\n\n%s' + '\nPress 1 or 2 or 3\n\n' + '1. Kieran O\'Leary\n2. Aoife Fitzmaurice\n3. Raelene Casey\n' % question) + while user not in ('1', '2', '3'): + user = raw_input( + '\n\n%s' + '\nPress 1 or 2 or 3\n\n' + '1. Kieran O\'Leary\n2. Aoife Fitzmaurice\n3. Raelene Casey\n' % question) + if user == '1': + user = 'Kieran O\'Leary' + elif user == '2': + user = 'Aoife Fitzmaurice' + elif user == '3': + user = 'Raelene Casey' + return user def main(): + total_agents = [] + script_user = get_user('**** Who is running this script?') + user = get_user('**** Who captured the actual tape?') premisxml, premis_namespace, doc, premis = setup_xml(sys.argv[1]) source_file = sys.argv[1] sip_dir = os.path.dirname(source_file) @@ -198,6 +372,8 @@ def main(): print 'no manifest found' sys.exit() md5 = get_checksum(manifest) + # this items var is sad,clearly there's hardcoded workflow crap in premis.py + # I don't even know if any of these are relevant anymore items = { "workflow":"raw audio", "oe":os.path.basename(source_file), @@ -218,8 +394,11 @@ def main(): premisxml, premis_namespace, doc, premis, items, linkinguuids, representation_uuid, 'no_sequence', 'n/a' ) - print xml_info - capture_description(premis, xml_info, capture_station, times) + event_dict = capture_description( + premis, xml_info, capture_station, times, total_agents + ) + + ffv1_description(premis, xml_info, capture_station, times, event_dict) write_premis(doc, premisxml) if __name__ == '__main__': From 5215d0e970b4cc74eabbc1d8d1e3c7bf6e39470a Mon Sep 17 00:00:00 2001 From: kieranjol Date: Sun, 12 Mar 2017 21:58:21 +0000 Subject: [PATCH 19/30] tapepremis - add users to premis/cleanup --- tapepremis.py | 242 +++++++++++++++++++++----------------------------- 1 file changed, 102 insertions(+), 140 deletions(-) diff --git a/tapepremis.py b/tapepremis.py index dae0a60..7a52bcc 100755 --- a/tapepremis.py +++ b/tapepremis.py @@ -12,47 +12,75 @@ from premis import create_representation from premis import create_unit -def make_event(premis,event_type, event_detail, agentlist, eventID, eventLinkingObjectIdentifier, eventLinkingObjectRole, event_time): - premis_namespace = "http://www.loc.gov/premis/v3" + +def make_event( + premis,event_type, event_detail, + agentlist, eventID, eventLinkingObjectIdentifier, + eventLinkingObjectRole, event_time + ): + # This is really only here because the premis.py version handles the \ + # linkingAgentIdentifiers differently. + premis_namespace = "http://www.loc.gov/premis/v3" event = ET.SubElement(premis, "{%s}event" % (premis_namespace)) premis.insert(-1,event) - event_Identifier = create_unit(1,event,'eventIdentifier') - event_id_type = ET.Element("{%s}eventIdentifierType" % (premis_namespace)) + event_Identifier = create_unit(1,event,'eventIdentifier') + event_id_type = ET.Element("{%s}eventIdentifierType" % (premis_namespace)) event_Identifier.insert(0,event_id_type) - event_id_value = ET.Element("{%s}eventIdentifierValue" % (premis_namespace)) + event_id_value = ET.Element("{%s}eventIdentifierValue" % (premis_namespace)) event_Identifier.insert(0,event_id_value) - event_Type = ET.Element("{%s}eventType" % (premis_namespace)) + event_Type = ET.Element("{%s}eventType" % (premis_namespace)) event.insert(2,event_Type) - event_DateTime = ET.Element("{%s}eventDateTime" % (premis_namespace)) + event_DateTime = ET.Element("{%s}eventDateTime" % (premis_namespace)) event.insert(3,event_DateTime) if event_time == 'now': - event_DateTime.text = time.strftime("%Y-%m-%dT%H:%M:%S") + event_DateTime.text = time.strftime("%Y-%m-%dT%H:%M:%S") else: - event_DateTime.text = event_time - event_Type.text = event_type - event_id_value.text = eventID - event_id_type.text = 'UUID' - eventDetailInformation = create_unit(4,event,'eventDetailInformation') - eventDetail = create_unit(0,eventDetailInformation,'eventDetail') - eventDetail.text = event_detail + event_DateTime.text = event_time + event_Type.text = event_type + event_id_value.text = eventID + event_id_type.text = 'UUID' + eventDetailInformation = create_unit( + 4,event,'eventDetailInformation' + ) + eventDetail = create_unit( + 0,eventDetailInformation,'eventDetail' + ) + eventDetail.text = event_detail for i in eventLinkingObjectIdentifier: - linkingObjectIdentifier = create_unit(5,event,'linkingObjectIdentifier') - linkingObjectIdentifierType = create_unit(0,linkingObjectIdentifier,'linkingObjectIdentifierType') - linkingObjectIdentifierValue = create_unit(1,linkingObjectIdentifier,'linkingObjectIdentifierValue') - linkingObjectIdentifierValue.text = i - linkingObjectRole = create_unit(2,linkingObjectIdentifier,'linkingObjectRole') - linkingObjectIdentifierType.text = 'UUID' - linkingObjectRole.text = eventLinkingObjectRole + linkingObjectIdentifier = create_unit( + 5,event,'linkingObjectIdentifier' + ) + linkingObjectIdentifierType = create_unit( + 0,linkingObjectIdentifier,'linkingObjectIdentifierType' + ) + linkingObjectIdentifierValue = create_unit( + 1,linkingObjectIdentifier,'linkingObjectIdentifierValue' + ) + linkingObjectIdentifierValue.text = i + linkingObjectRole = create_unit( + 2,linkingObjectIdentifier,'linkingObjectRole' + ) + linkingObjectIdentifierType.text = 'UUID' + linkingObjectRole.text = eventLinkingObjectRole for i in agentlist: - linkingAgentIdentifier = create_unit(-1,event,'linkingAgentIdentifier') - linkingAgentIdentifierType = create_unit(0,linkingAgentIdentifier,'linkingAgentIdentifierType') - linkingAgentIdentifierValue = create_unit(1,linkingAgentIdentifier,'linkingAgentIdentifierValue') - linkingAgentIdentifierRole = create_unit(2,linkingAgentIdentifier,'linkingAgentRole') - linkingAgentIdentifierRole.text = 'implementer' - linkingAgentIdentifierType.text = 'UUID' - linkingAgentIdentifierValue.text = i + linkingAgentIdentifier = create_unit( + -1,event,'linkingAgentIdentifier' + ) + linkingAgentIdentifierType = create_unit( + 0,linkingAgentIdentifier,'linkingAgentIdentifierType' + ) + linkingAgentIdentifierValue = create_unit( + 1,linkingAgentIdentifier,'linkingAgentIdentifierValue' + ) + linkingAgentIdentifierRole = create_unit( + 2,linkingAgentIdentifier,'linkingAgentRole' + ) + linkingAgentIdentifierRole.text = 'implementer' + linkingAgentIdentifierType.text = 'UUID' + linkingAgentIdentifierValue.text = i + -def capture_description(premis, xml_info, capture_station, times, total_agents): +def capture_description(premis, xml_info, capture_station, times, total_agents, engineer): ''' Events: 1. capture - glean from v210 mediainfo xml @@ -61,6 +89,14 @@ def capture_description(premis, xml_info, capture_station, times, total_agents): 4. whole file manifest - sip log that's it? ''' + if engineer == 'Kieran O\'Leary': + engineer_agent = '0b3b7e69-80e1-48ec-bf07-62b04669117d' + elif engineer == 'Aoife Fitzmaurice': + engineer_agent = '9e59e772-14b0-4f9e-95b3-b88b6e751c3b' + elif engineer == 'Raelene Casey': + engineer_agent = 'b342d3f7-d87e-4fe3-8da5-89e16a30b59e' + + capture_uuid = str(uuid.uuid4()) capture_dict = {} if capture_station == 'es2': @@ -69,7 +105,9 @@ def capture_description(premis, xml_info, capture_station, times, total_agents): edit_suite2_mac_agent = '75a0b9ff-1f04-43bd-aa87-c31b73b1b61c' elcapitan_agent = '68f56ede-a1cf-48aa-b1d8-dc9850d5bfcc' capture_agents = [ - j30sdi_agent, bm4k_agent, edit_suite2_mac_agent, elcapitan_agent + j30sdi_agent, bm4k_agent, + edit_suite2_mac_agent, elcapitan_agent, + engineer_agent ] elif capture_station == 'loopline': m2000p_agent = '60ae3a85-b595-45e0-8e4a-b95e90a6c422' @@ -77,7 +115,9 @@ def capture_description(premis, xml_info, capture_station, times, total_agents): loopline_mac_agent = 'be3060a8-6ccf-4339-97d5-a265687c3a5a' osx_lion_agent = 'c5fc84fc-cc96-42a1-a5be-830b4e3012ae' capture_agents = [ - m2000p_agent, kona3_agent, loopline_mac_agent, osx_lion_agent + m2000p_agent, kona3_agent, + loopline_mac_agent, osx_lion_agent, + engineer_agent ] elif capture_station == 'ingest1': @@ -86,7 +126,9 @@ def capture_description(premis, xml_info, capture_station, times, total_agents): windows7_agent = '192f61b1-8130-4236-a827-a194a20557fe' ingest1kona_agent = 'c93ee9a5-4c0c-4670-b857-8726bfd23cae' capture_agents = [ - sony510p_agent, ingest1kona_agent, ingest1_agent, windows7_agent + sony510p_agent, ingest1kona_agent, + ingest1_agent, windows7_agent, + engineer_agent ] make_event( premis, 'creation', 'tape capture', @@ -96,11 +138,16 @@ def capture_description(premis, xml_info, capture_station, times, total_agents): for agent in capture_agents: # Just the UUID is returned. event_dict[agent] = [capture_uuid] - print capture_uuid, 'capture' - print event_dict, 0 return event_dict -def ffv1_description(premis, xml_info, capture_station, times, event_dict): + +def ffv1_description(premis, xml_info, capture_station, times, event_dict, script_user): + if script_user == 'Kieran O\'Leary': + script_user_agent = '0b3b7e69-80e1-48ec-bf07-62b04669117d' + elif script_user == 'Aoife Fitzmaurice': + script_user_agent = '9e59e772-14b0-4f9e-95b3-b88b6e751c3b' + elif script_user == 'Raelene Casey': + script_user_agent = 'b342d3f7-d87e-4fe3-8da5-89e16a30b59e' transcode_uuid = str(uuid.uuid4()) framemd5_uuid = str(uuid.uuid4()) manifest_uuid = str(uuid.uuid4()) @@ -108,7 +155,7 @@ def ffv1_description(premis, xml_info, capture_station, times, event_dict): edit_suite2_mac_agent = '75a0b9ff-1f04-43bd-aa87-c31b73b1b61c' elcapitan_agent = '68f56ede-a1cf-48aa-b1d8-dc9850d5bfcc' ffv1_agents = [ - edit_suite2_mac_agent, elcapitan_agent + edit_suite2_mac_agent, elcapitan_agent, script_user_agent ] make_event( premis, 'compression', @@ -120,7 +167,7 @@ def ffv1_description(premis, xml_info, capture_station, times, event_dict): ingest1_agent = '5fd99e09-63d7-4e9f-8383-1902f727d2a5' windows7_agent = '192f61b1-8130-4236-a827-a194a20557fe' ffv1_agents = [ - ingest1_agent, windows7_agent + ingest1_agent, windows7_agent, script_user_agent ] make_event( premis, 'compression', @@ -131,7 +178,7 @@ def ffv1_description(premis, xml_info, capture_station, times, event_dict): osx_lion_agent = 'c5fc84fc-cc96-42a1-a5be-830b4e3012ae' loopline_mac_agent = 'be3060a8-6ccf-4339-97d5-a265687c3a5a' ffv1_agents = [ - osx_lion_agent, loopline_mac_agent + osx_lion_agent, loopline_mac_agent, script_user_agent ] make_event( premis, 'compression', @@ -149,48 +196,21 @@ def ffv1_description(premis, xml_info, capture_station, times, event_dict): 'whole file checksum manifest of SIP', ffv1_agents, manifest_uuid, xml_info[4], 'source', 'now-placeholder' ) + print ffv1_agents for agent in ffv1_agents: # Just the UUID is returned. - event_dict[agent] += [transcode_uuid] - event_dict[agent] += [framemd5_uuid] - event_dict[agent] += [manifest_uuid] - - print event_dict + if agent in event_dict: + event_dict[agent] += [transcode_uuid] + event_dict[agent] += [framemd5_uuid] + event_dict[agent] += [manifest_uuid] + else: + event_dict[agent] = [transcode_uuid] + event_dict[agent] += [framemd5_uuid] + event_dict[agent] += [manifest_uuid] for agent in event_dict: make_agent( premis, event_dict[agent],agent ) -def ingest1_description(premis): - # this really just lists all the permanent agents at this workstation - ingest1_agent = make_agent( - premis, '5fd99e09-63d7-4e9f-8383-1902f727d2a5', 'not-write' - ) - windows7_agent = make_agent( - premis, '192f61b1-8130-4236-a827-a194a20557fe', 'not-write' - ) - return ingest1_agent, windows7_agent - - -def es2_description(premis): - # this really just lists all the permanent agents at this workstation - edit_suite2_mac_agent = make_agent( - premis, '75a0b9ff-1f04-43bd-aa87-c31b73b1b61c', 'not-write' - ) - elcapitan_agent = make_agent( - premis, '68f56ede-a1cf-48aa-b1d8-dc9850d5bfcc', 'not-write' - ) - return edit_suite_mac_agent, elcapitan_agent - - -def loopline_description(premis): - osx_lion_agent = make_agent( - premis, 'c5fc84fc-cc96-42a1-a5be-830b4e3012ae', 'not-write' - ) - loopline_mac_agent = make_agent( - premis, 'be3060a8-6ccf-4339-97d5-a265687c3a5a', 'not-write' - ) - return osx_lion_agent, loopline_mac_agent - def get_checksum(manifest): @@ -209,67 +229,7 @@ def get_times(sourcexml): capture_date = mxml.xpath('//File_Modified_Date_Local')[0].text return capture_date -''' -def make_agent(premis, agentId, write): - # write argument will either return agent info without writing xml - csv_file = os.path.expanduser("~/ifigit/ifiscripts/premis_agents.csv") - if os.path.isfile(csv_file): - read_object = open(csv_file) - reader = csv.reader(read_object) - csv_list = list(reader) - read_object.close() - for lists in csv_list: - for item in lists: - if item == agentId: - agent_info = lists - ( - agentIdType_value, agentIdValue_value, - agentName_value, agentType_value, agentVersion_value, - agentNote_value,agentRole - ) = agent_info - if agentVersion_value == 'ffmpeg_autoextract': - agentVersion_value = subprocess.check_output( - ['ffmpeg','-version','-v','0'] - ).splitlines()[0] - if write == 'write': - premis_namespace = "http://www.loc.gov/premis/v3" - agent = ET.SubElement( - premis, "{%s}agent" % (premis_namespace) - ) - premis.insert(-1, agent) - agentIdentifier = create_unit( - 1,agent,'agentIdentifier' - ) - agentIdType = create_unit( - 2,agentIdentifier,'agentIdentifierType' - ) - agentIdValue = create_unit( - 2,agentIdentifier,'agentIdentifierValue' - ) - agentName = create_unit(2,agent,'agentName') - agentName.text = agentName_value - if not agentNote_value == '': - agentNote = create_unit( - 5,agent,'agentNote' - ) - agentNote.text = agentNote_value - agentType = create_unit( - 3,agent,'agentType' - ) - if not agentVersion_value == '': - agentVersion = create_unit( - 4,agent,'agentVersion' - ) - agentVersion.text = agentVersion_value - agentIdType.text = agentIdType_value - agentIdValue.text = agentIdValue_value - agentType.text = agentType_value - agent_info = [agentIdType_value,agentIdValue_value] - return agent_info - else: - return agent_info -''' def get_capture_workstation(mediaxml): mediaxml_object = ET.parse(mediaxml) mxml = mediaxml_object.getroot() @@ -286,7 +246,6 @@ def get_capture_workstation(mediaxml): print 'this was probably Control Room?' capture_station = 'ingest1' print 'Does this sound ok? Y/N?' - print capture_station station_confirm = '' while station_confirm not in ('Y', 'y', 'N', 'n'): station_confirm = raw_input() @@ -333,10 +292,12 @@ def get_user(question): elif user == '3': user = 'Raelene Casey' return user + + def main(): total_agents = [] script_user = get_user('**** Who is running this script?') - user = get_user('**** Who captured the actual tape?') + engineer = get_user('**** Who captured the actual tape?') premisxml, premis_namespace, doc, premis = setup_xml(sys.argv[1]) source_file = sys.argv[1] sip_dir = os.path.dirname(source_file) @@ -395,10 +356,11 @@ def main(): items, linkinguuids, representation_uuid, 'no_sequence', 'n/a' ) event_dict = capture_description( - premis, xml_info, capture_station, times, total_agents + premis, xml_info, capture_station, times, total_agents, engineer + ) + ffv1_description( + premis, xml_info, capture_station, times, event_dict, script_user ) - - ffv1_description(premis, xml_info, capture_station, times, event_dict) write_premis(doc, premisxml) if __name__ == '__main__': From c3628f6ef149339d95561e1a54a19399d1b7186b Mon Sep 17 00:00:00 2001 From: kieranjol Date: Sun, 12 Mar 2017 23:29:51 +0000 Subject: [PATCH 20/30] tapepremis - adds some more pep 8 improvements --- tapepremis.py | 95 +++++++++++++++++++++++++++------------------------ 1 file changed, 50 insertions(+), 45 deletions(-) diff --git a/tapepremis.py b/tapepremis.py index 7a52bcc..f6442ff 100755 --- a/tapepremis.py +++ b/tapepremis.py @@ -2,7 +2,6 @@ import sys import os import uuid -import csv import lxml.etree as ET from ififuncs import get_date_modified from premis import make_premis @@ -14,73 +13,75 @@ def make_event( - premis,event_type, event_detail, - agentlist, eventID, eventLinkingObjectIdentifier, - eventLinkingObjectRole, event_time + premis, event_type, event_detail, + agentlist, event_id, event_linking_object_identifier, + event_linking_object_role, event_time ): # This is really only here because the premis.py version handles the \ # linkingAgentIdentifiers differently. premis_namespace = "http://www.loc.gov/premis/v3" event = ET.SubElement(premis, "{%s}event" % (premis_namespace)) - premis.insert(-1,event) - event_Identifier = create_unit(1,event,'eventIdentifier') + premis.insert(-1, event) + event_Identifier = create_unit(1, event, 'eventIdentifier') event_id_type = ET.Element("{%s}eventIdentifierType" % (premis_namespace)) - event_Identifier.insert(0,event_id_type) + event_Identifier.insert(0, event_id_type) event_id_value = ET.Element("{%s}eventIdentifierValue" % (premis_namespace)) - event_Identifier.insert(0,event_id_value) + event_Identifier.insert(0, event_id_value) event_Type = ET.Element("{%s}eventType" % (premis_namespace)) - event.insert(2,event_Type) + event.insert(2, event_Type) event_DateTime = ET.Element("{%s}eventDateTime" % (premis_namespace)) - event.insert(3,event_DateTime) + event.insert(3, event_DateTime) if event_time == 'now': event_DateTime.text = time.strftime("%Y-%m-%dT%H:%M:%S") else: event_DateTime.text = event_time event_Type.text = event_type - event_id_value.text = eventID + event_id_value.text = event_id event_id_type.text = 'UUID' eventDetailInformation = create_unit( - 4,event,'eventDetailInformation' + 4, event, 'eventDetailInformation' ) eventDetail = create_unit( - 0,eventDetailInformation,'eventDetail' + 0, eventDetailInformation, 'eventDetail' ) eventDetail.text = event_detail - for i in eventLinkingObjectIdentifier: + for i in event_linking_object_identifier: linkingObjectIdentifier = create_unit( - 5,event,'linkingObjectIdentifier' + 5, event, 'linkingObjectIdentifier' ) linkingObjectIdentifierType = create_unit( - 0,linkingObjectIdentifier,'linkingObjectIdentifierType' + 0, linkingObjectIdentifier, 'linkingObjectIdentifierType' ) linkingObjectIdentifierValue = create_unit( - 1,linkingObjectIdentifier,'linkingObjectIdentifierValue' + 1, linkingObjectIdentifier, 'linkingObjectIdentifierValue' ) linkingObjectIdentifierValue.text = i linkingObjectRole = create_unit( - 2,linkingObjectIdentifier,'linkingObjectRole' + 2, linkingObjectIdentifier, 'linkingObjectRole' ) linkingObjectIdentifierType.text = 'UUID' - linkingObjectRole.text = eventLinkingObjectRole + linkingObjectRole.text = event_linking_object_role for i in agentlist: linkingAgentIdentifier = create_unit( - -1,event,'linkingAgentIdentifier' + -1, event, 'linkingAgentIdentifier' ) linkingAgentIdentifierType = create_unit( - 0,linkingAgentIdentifier,'linkingAgentIdentifierType' + 0, linkingAgentIdentifier, 'linkingAgentIdentifierType' ) linkingAgentIdentifierValue = create_unit( - 1,linkingAgentIdentifier,'linkingAgentIdentifierValue' + 1, linkingAgentIdentifier, 'linkingAgentIdentifierValue' ) linkingAgentIdentifierRole = create_unit( - 2,linkingAgentIdentifier,'linkingAgentRole' + 2, linkingAgentIdentifier, 'linkingAgentRole' ) linkingAgentIdentifierRole.text = 'implementer' linkingAgentIdentifierType.text = 'UUID' linkingAgentIdentifierValue.text = i -def capture_description(premis, xml_info, capture_station, times, total_agents, engineer): +def capture_description( + premis, xml_info, capture_station, times, total_agents, engineer + ): ''' Events: 1. capture - glean from v210 mediainfo xml @@ -95,8 +96,8 @@ def capture_description(premis, xml_info, capture_station, times, total_agents, engineer_agent = '9e59e772-14b0-4f9e-95b3-b88b6e751c3b' elif engineer == 'Raelene Casey': engineer_agent = 'b342d3f7-d87e-4fe3-8da5-89e16a30b59e' - - + + capture_uuid = str(uuid.uuid4()) capture_dict = {} if capture_station == 'es2': @@ -107,7 +108,7 @@ def capture_description(premis, xml_info, capture_station, times, total_agents, capture_agents = [ j30sdi_agent, bm4k_agent, edit_suite2_mac_agent, elcapitan_agent, - engineer_agent + engineer_agent ] elif capture_station == 'loopline': m2000p_agent = '60ae3a85-b595-45e0-8e4a-b95e90a6c422' @@ -119,7 +120,7 @@ def capture_description(premis, xml_info, capture_station, times, total_agents, loopline_mac_agent, osx_lion_agent, engineer_agent ] - + elif capture_station == 'ingest1': sony510p_agent = 'dbdbb06b-ab10-49db-97a1-ff2ad285f9d2' ingest1_agent = '5fd99e09-63d7-4e9f-8383-1902f727d2a5' @@ -141,13 +142,15 @@ def capture_description(premis, xml_info, capture_station, times, total_agents, return event_dict -def ffv1_description(premis, xml_info, capture_station, times, event_dict, script_user): +def ffv1_description( + premis, xml_info, capture_station, times, event_dict, script_user + ): if script_user == 'Kieran O\'Leary': script_user_agent = '0b3b7e69-80e1-48ec-bf07-62b04669117d' elif script_user == 'Aoife Fitzmaurice': script_user_agent = '9e59e772-14b0-4f9e-95b3-b88b6e751c3b' elif script_user == 'Raelene Casey': - script_user_agent = 'b342d3f7-d87e-4fe3-8da5-89e16a30b59e' + script_user_agent = 'b342d3f7-d87e-4fe3-8da5-89e16a30b59e' transcode_uuid = str(uuid.uuid4()) framemd5_uuid = str(uuid.uuid4()) manifest_uuid = str(uuid.uuid4()) @@ -156,13 +159,13 @@ def ffv1_description(premis, xml_info, capture_station, times, event_dict, scrip elcapitan_agent = '68f56ede-a1cf-48aa-b1d8-dc9850d5bfcc' ffv1_agents = [ edit_suite2_mac_agent, elcapitan_agent, script_user_agent - ] + ] make_event( premis, 'compression', 'transcode to FFV1/Matroska (figure out wording later)', ffv1_agents, transcode_uuid, xml_info[4], 'outcome', times[1] ) - + elif capture_station == 'ingest1': ingest1_agent = '5fd99e09-63d7-4e9f-8383-1902f727d2a5' windows7_agent = '192f61b1-8130-4236-a827-a194a20557fe' @@ -179,13 +182,13 @@ def ffv1_description(premis, xml_info, capture_station, times, event_dict, scrip loopline_mac_agent = 'be3060a8-6ccf-4339-97d5-a265687c3a5a' ffv1_agents = [ osx_lion_agent, loopline_mac_agent, script_user_agent - ] + ] make_event( premis, 'compression', 'transcode to FFV1/Matroska while specifying 4:3 DAR ' 'and Top Field First interlacement', ffv1_agents, transcode_uuid, xml_info[4], 'outcome', times[1] - ) + ) make_event( premis, 'fixity check', 'lossless verification via framemd5 (figure out wording later)', @@ -196,9 +199,9 @@ def ffv1_description(premis, xml_info, capture_station, times, event_dict, scrip 'whole file checksum manifest of SIP', ffv1_agents, manifest_uuid, xml_info[4], 'source', 'now-placeholder' ) - print ffv1_agents for agent in ffv1_agents: - # Just the UUID is returned. + # Just the UUID is returned. This prevents errors if the engineer and + # script user are different if agent in event_dict: event_dict[agent] += [transcode_uuid] event_dict[agent] += [framemd5_uuid] @@ -209,7 +212,7 @@ def ffv1_description(premis, xml_info, capture_station, times, event_dict, scrip event_dict[agent] += [manifest_uuid] for agent in event_dict: make_agent( - premis, event_dict[agent],agent + premis, event_dict[agent], agent ) @@ -272,19 +275,21 @@ def get_capture_workstation(mediaxml): return capture_station - def get_user(question): user = '' - if not user == '1' or user == '2' or user =='3': - user = raw_input( + if not user == '1' or user == '2' or user == '3': + user = raw_input( '\n\n%s' '\nPress 1 or 2 or 3\n\n' - '1. Kieran O\'Leary\n2. Aoife Fitzmaurice\n3. Raelene Casey\n' % question) + '1. Kieran O\'Leary\n2. Aoife Fitzmaurice\n3. Raelene Casey\n' + % question) while user not in ('1', '2', '3'): - user = raw_input( - '\n\n%s' - '\nPress 1 or 2 or 3\n\n' - '1. Kieran O\'Leary\n2. Aoife Fitzmaurice\n3. Raelene Casey\n' % question) + user = raw_input( + '\n\n%s' + '\nPress 1 or 2 or 3\n\n' + '1. Kieran O\'Leary\n2. Aoife Fitzmaurice\n3. Raelene Casey\n' + % question + ) if user == '1': user = 'Kieran O\'Leary' elif user == '2': From 3748919a5849c16159ecddfa57bf564b4c32ee2f Mon Sep 17 00:00:00 2001 From: kieranjol Date: Sun, 12 Mar 2017 23:53:05 +0000 Subject: [PATCH 21/30] tapepremis - adds log file datetime extraction --- tapepremis.py | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/tapepremis.py b/tapepremis.py index f6442ff..fb437f2 100755 --- a/tapepremis.py +++ b/tapepremis.py @@ -192,12 +192,12 @@ def ffv1_description( make_event( premis, 'fixity check', 'lossless verification via framemd5 (figure out wording later)', - ffv1_agents, framemd5_uuid, xml_info[4], 'source', 'now-placeholder' + ffv1_agents, framemd5_uuid, xml_info[4], 'source', times[3] ) make_event( premis, 'message digest calculation', 'whole file checksum manifest of SIP', ffv1_agents, - manifest_uuid, xml_info[4], 'source', 'now-placeholder' + manifest_uuid, xml_info[4], 'source', times[2] ) for agent in ffv1_agents: # Just the UUID is returned. This prevents errors if the engineer and @@ -299,6 +299,23 @@ def get_user(question): return user +def analyze_log(logfile): + losslessness = '' + framemd5_time = '' + manifest_time = '' + with open(logfile, 'r') as fo: + log_lines = fo.readlines() + for line in log_lines: + if 'Transcode was lossless' in line: + losslessness = 'lossless' + if 'Framemd5 generation of output file completed' in line: + framemd5_time = line[:19] + if 'MD5 manifest started' in line: + manifest_time = line[:19] + + return manifest_time, framemd5_time, losslessness + + def main(): total_agents = [] script_user = get_user('**** Who is running this script?') @@ -308,6 +325,7 @@ def main(): sip_dir = os.path.dirname(source_file) parent_dir = os.path.dirname(sip_dir) metadata_dir = os.path.join(parent_dir, 'metadata') + logs_dir = os.path.join(parent_dir, 'logs') ffv1_xml = os.path.join( metadata_dir, os.path.basename( sys.argv[1] @@ -320,9 +338,17 @@ def main(): os.path.basename( sys.argv[1].replace('.mkv', '.mov') + '_source_mediainfo.xml')) + logfile = os.path.join( + logs_dir, + os.path.basename( + sys.argv[1].replace('.mkv', '.mov') + + '_log.log')) capture_time = get_times(source_xml) transcode_time = get_times(ffv1_xml) - times = [capture_time, transcode_time] + manifest_time, framemd5_time, losslessness = analyze_log(logfile) + times = [ + capture_time, transcode_time, manifest_time, framemd5_time, losslessness + ] if os.path.isfile(ffv1_xml): capture_station = get_capture_workstation(ffv1_xml) else: From bb91c4cca2c549532a6b1e8c4a88b2bffa9c8ca0 Mon Sep 17 00:00:00 2001 From: kieranjol Date: Sun, 12 Mar 2017 23:55:43 +0000 Subject: [PATCH 22/30] premis - removes debug prints --- premis.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/premis.py b/premis.py index 20d89dc..f6000dd 100755 --- a/premis.py +++ b/premis.py @@ -179,7 +179,6 @@ def setup_xml(source_file): namespace = '' premis_namespace = "http://www.loc.gov/premis/v3" xsi_namespace = "http://www.w3.org/2001/XMLSchema-instance" - print premisxml if os.path.isfile(premisxml): print 'looks like premis already exists?' parser = ET.XMLParser(remove_blank_text=True) @@ -316,8 +315,7 @@ def create_object(source_file, items, premis, premis_namespace, premisxml, repre relationshipSubType = create_unit(1,relationship, 'relationshipSubType') relationshipSubType.text = 'is included in' # this is a total hack. if sequence = loopline', do not generate hash as it already exists in manifest :( - print len(sequence) - print sequence + # looks like loopline isn't the keyword any longer. it's len = 32? if not len(sequence) == 32: md5_output = hashlib_md5(source_file, image) messageDigest.text = md5_output From b986a6d0a8e54c6ab159b871fb96de154ae6ec71 Mon Sep 17 00:00:00 2001 From: kieranjol Date: Thu, 16 Mar 2017 20:44:00 +0000 Subject: [PATCH 23/30] tapepremis - adds intellectual entity --- tapepremis.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tapepremis.py b/tapepremis.py index fb437f2..0b884ae 100755 --- a/tapepremis.py +++ b/tapepremis.py @@ -9,6 +9,7 @@ from premis import write_premis from premis import setup_xml from premis import create_representation +from premis import create_intellectual_entity from premis import create_unit @@ -376,12 +377,17 @@ def main(): "user":'Kieran O\' Leary' } representation_uuid = str(uuid.uuid4()) + intellectual_entity_uuid = str(uuid.uuid4()) # looks like loopline isn't the keyword any longer. it's len = 32? xml_info = make_premis( source_file, items, premis, premis_namespace, premisxml, representation_uuid, md5 ) linkinguuids = [xml_info[4][0], 'n/a', os.path.basename(source_file)] + create_intellectual_entity( + premisxml, premis_namespace, doc, premis, + items, intellectual_entity_uuid + ) create_representation( premisxml, premis_namespace, doc, premis, items, linkinguuids, representation_uuid, 'no_sequence', 'n/a' From 5fd2f7c99f03d03b180577cfd600a354e9250d4f Mon Sep 17 00:00:00 2001 From: kieranjol Date: Thu, 16 Mar 2017 20:53:03 +0000 Subject: [PATCH 24/30] tapepremis - adds a bunch of placeholders to be updated later in workflow --- tapepremis.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tapepremis.py b/tapepremis.py index 0b884ae..1f54136 100755 --- a/tapepremis.py +++ b/tapepremis.py @@ -369,12 +369,12 @@ def main(): # I don't even know if any of these are relevant anymore items = { "workflow":"raw audio", - "oe":os.path.basename(source_file), + "oe":'n/a', "filmographic":'n/a', - "sourceAccession":os.path.basename(source_file), + "sourceAccession":'unknown at present', "interventions":['placeholder'], "prepList":['placeholder'], - "user":'Kieran O\' Leary' + "user":'n/a' } representation_uuid = str(uuid.uuid4()) intellectual_entity_uuid = str(uuid.uuid4()) @@ -383,7 +383,7 @@ def main(): source_file, items, premis, premis_namespace, premisxml, representation_uuid, md5 ) - linkinguuids = [xml_info[4][0], 'n/a', os.path.basename(source_file)] + linkinguuids = [xml_info[4][0], 'n/a', 'n/a'] create_intellectual_entity( premisxml, premis_namespace, doc, premis, items, intellectual_entity_uuid From e5383805ca0aa25fcd7eab626cbbec6a0c9eb949 Mon Sep 17 00:00:00 2001 From: kieranjol Date: Thu, 16 Mar 2017 23:37:11 +0000 Subject: [PATCH 25/30] tapepremis - add deck interview for ingest1 --- tapepremis.py | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/tapepremis.py b/tapepremis.py index 1f54136..afcbd12 100755 --- a/tapepremis.py +++ b/tapepremis.py @@ -81,7 +81,7 @@ def make_event( def capture_description( - premis, xml_info, capture_station, times, total_agents, engineer + premis, xml_info, capture_station, times, engineer ): ''' Events: @@ -100,7 +100,6 @@ def capture_description( capture_uuid = str(uuid.uuid4()) - capture_dict = {} if capture_station == 'es2': j30sdi_agent = 'e2ca7ad2-8edf-4e4e-a3c7-36e970c796c9' bm4k_agent = 'f47b98a2-b879-4786-9f6b-11fc3234a91e' @@ -122,13 +121,18 @@ def capture_description( engineer_agent ] - elif capture_station == 'ingest1': + elif 'ingest1' in capture_station: sony510p_agent = 'dbdbb06b-ab10-49db-97a1-ff2ad285f9d2' + sony1200p_agent = 'd13fae39-ac71-446e-88df-96c0d267b26c' ingest1_agent = '5fd99e09-63d7-4e9f-8383-1902f727d2a5' windows7_agent = '192f61b1-8130-4236-a827-a194a20557fe' ingest1kona_agent = 'c93ee9a5-4c0c-4670-b857-8726bfd23cae' - capture_agents = [ - sony510p_agent, ingest1kona_agent, + if capture_station == 'ingest1-dvw': + capture_agents = [sony510p_agent] + elif capture_station == 'ingest1-uvw': + capture_agents = [sony1200p_agent] + capture_agents += [ + ingest1kona_agent, ingest1_agent, windows7_agent, engineer_agent ] @@ -144,7 +148,7 @@ def capture_description( def ffv1_description( - premis, xml_info, capture_station, times, event_dict, script_user + premis, xml_info, capture_station, times, event_dict, script_user ): if script_user == 'Kieran O\'Leary': script_user_agent = '0b3b7e69-80e1-48ec-bf07-62b04669117d' @@ -167,7 +171,7 @@ def ffv1_description( ffv1_agents, transcode_uuid, xml_info[4], 'outcome', times[1] ) - elif capture_station == 'ingest1': + elif 'ingest1' in capture_station: ingest1_agent = '5fd99e09-63d7-4e9f-8383-1902f727d2a5' windows7_agent = '192f61b1-8130-4236-a827-a194a20557fe' ffv1_agents = [ @@ -273,6 +277,24 @@ def get_capture_workstation(mediaxml): capture_station = 'loopline' elif capture_station == '3': capture_station = 'ingest1' + if capture_station == 'ingest1': + ingest_deck = '0' + while int(ingest_deck) not in range(1,3): + ingest_deck = raw_input( + '\n\n**** Where was tape captured?\n' + 'Press 1, 2\n1. DVW-510p (Digi)\n2. UVW-1200p (BetaSP)\n' + ) + if int(ingest_deck) not in range(1,3): + print 'Incorrect input. Please enter 1 or 2 plz' + while int(ingest_deck) not in range(1,3): + ingest_deck = raw_input( + '\n\n**** Where was tape captured?\n' + 'Press 1, 2\n2. DVW-510p (Digi)\n3. UVW-1200p (BetaSP)\n' + ) + if ingest_deck == '1': + capture_station = 'ingest1-dvw' + elif ingest_deck == '2': + capture_station = 'ingest1-uvw' return capture_station @@ -318,7 +340,6 @@ def analyze_log(logfile): def main(): - total_agents = [] script_user = get_user('**** Who is running this script?') engineer = get_user('**** Who captured the actual tape?') premisxml, premis_namespace, doc, premis = setup_xml(sys.argv[1]) @@ -393,7 +414,7 @@ def main(): items, linkinguuids, representation_uuid, 'no_sequence', 'n/a' ) event_dict = capture_description( - premis, xml_info, capture_station, times, total_agents, engineer + premis, xml_info, capture_station, times, engineer ) ffv1_description( premis, xml_info, capture_station, times, event_dict, script_user From 24aa0f05d3bd89fca92ac108f6c1f04281f4042a Mon Sep 17 00:00:00 2001 From: kieranjol Date: Thu, 16 Mar 2017 23:53:14 +0000 Subject: [PATCH 26/30] tapepremis - accepts folder as input --- tapepremis.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/tapepremis.py b/tapepremis.py index afcbd12..632322f 100755 --- a/tapepremis.py +++ b/tapepremis.py @@ -342,15 +342,23 @@ def analyze_log(logfile): def main(): script_user = get_user('**** Who is running this script?') engineer = get_user('**** Who captured the actual tape?') - premisxml, premis_namespace, doc, premis = setup_xml(sys.argv[1]) - source_file = sys.argv[1] + if not os.path.isdir(sys.argv[1]): + print 'Input should be a directory' + sys.exit() + for root, dirs, filenames in os.walk(sys.argv[1]): + for filename in filenames: + if filename.endswith('.mkv'): + if os.path.isfile(os.path.join(root, filename)): + source_file = os.path.join(root, filename) + print 'Processing: %s' % source_file + premisxml, premis_namespace, doc, premis = setup_xml(source_file) sip_dir = os.path.dirname(source_file) parent_dir = os.path.dirname(sip_dir) metadata_dir = os.path.join(parent_dir, 'metadata') logs_dir = os.path.join(parent_dir, 'logs') ffv1_xml = os.path.join( metadata_dir, os.path.basename( - sys.argv[1] + source_file + '_mediainfo.xml' ) ) @@ -358,12 +366,12 @@ def main(): source_xml = os.path.join( metadata_dir, os.path.basename( - sys.argv[1].replace('.mkv', '.mov') + source_file.replace('.mkv', '.mov') + '_source_mediainfo.xml')) logfile = os.path.join( logs_dir, os.path.basename( - sys.argv[1].replace('.mkv', '.mov') + source_file.replace('.mkv', '.mov') + '_log.log')) capture_time = get_times(source_xml) transcode_time = get_times(ffv1_xml) From a7b88641b91448f5c83b7a5d362894cfea7988e2 Mon Sep 17 00:00:00 2001 From: kieranjol Date: Fri, 17 Mar 2017 17:56:45 +0000 Subject: [PATCH 27/30] tapepremis - moves create_object() into tapepremis, adds pbCore2 extension --- tapepremis.py | 138 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 132 insertions(+), 6 deletions(-) diff --git a/tapepremis.py b/tapepremis.py index 632322f..065c75b 100755 --- a/tapepremis.py +++ b/tapepremis.py @@ -2,17 +2,143 @@ import sys import os import uuid +import subprocess import lxml.etree as ET from ififuncs import get_date_modified -from premis import make_premis from premis import make_agent from premis import write_premis from premis import setup_xml from premis import create_representation from premis import create_intellectual_entity from premis import create_unit +from premis import get_input - +def create_object( + source_file, items, premis, premis_namespace, + premisxml, representation_uuid, sequence + ): + video_files = get_input(source_file) + mediainfo_counter = 1 + image_uuids = [] + rep_counter = 0 + print('Generating PREMIS metadata about each file object - this may take' + ' some time if on a network and/or working with an image sequence') + for image in video_files: + object_parent = create_unit( + -1, premis, 'object' + ) + object_identifier_uuid = create_unit( + 1, object_parent, 'objectIdentifier' + ) + object_identifier_uuid_type = create_unit( + 1, object_identifier_uuid, 'objectIdentifierType' + ) + object_identifier_uuid_type.text = 'UUID' + object_identifier_uuid_value = create_unit( + 2, object_identifier_uuid, 'objectIdentifierValue' + ) + file_uuid = str(uuid.uuid4()) + image_uuids.append(file_uuid) + object_identifier_uuid_value.text = file_uuid + object_category = ET.Element( + "{%s}objectCategory" % (premis_namespace) + ) + object_parent.insert( + 5, object_category + ) + object_category.text = 'file' + if rep_counter == 0: + root_uuid = file_uuid + rep_counter += 1 + format_ = ET.Element("{%s}format" % (premis_namespace)) + object_characteristics = create_unit( + 10, object_parent, 'objectCharacteristics' + ) + object_characteristics.insert(2, format_) + mediainfo = subprocess.check_output( + ['mediainfo', '--Output=PBCore2', image] + ) + parser = ET.XMLParser( + remove_blank_text=True, remove_comments=True + ) + mediainfo_xml = ET.fromstring((mediainfo), parser=parser) + fixity = create_unit( + 0, object_characteristics, 'fixity' + ) + size = create_unit( + 1, object_characteristics, 'size' + ) + size.text = str(os.path.getsize(image)) + format_designation = create_unit( + 0, format_, 'formatDesignation' + ) + format_name = create_unit( + 1, format_designation, 'formatName' + ) + format_name_mediainfo = subprocess.check_output( + ['mediainfo', '--Inform=General;%InternetMediaType%', image] + ).rstrip() + if format_name_mediainfo == '': + format_name_mediainfo = subprocess.check_output( + ['mediainfo', '--Inform=General;%Format_Commercial%', image] + ).rstrip() + format_name.text = format_name_mediainfo + message_digest_algorithm = create_unit( + 0, fixity, 'messageDigestAlgorithm' + ) + message_digest = create_unit( + 1, fixity, 'messageDigest' + ) + message_digestOriginator = create_unit( + 2, fixity, 'messageDigestOriginator' + ) + message_digestOriginator.text = 'internal' + object_characteristicsExtension = create_unit( + 4, object_characteristics, 'objectCharacteristicsExtension' + ) + object_characteristicsExtension.insert( + mediainfo_counter, mediainfo_xml + ) + relationship = create_unit( + 7, object_parent, 'relationship' + ) + relatedObjectIdentifier = create_unit( + 2, relationship, 'relatedObjectIdentifier' + ) + relatedObjectIdentifierType = create_unit( + 2, relatedObjectIdentifier, 'relatedObjectIdentifierType' + ) + relatedObjectIdentifierType.text = 'UUID' + relatedObjectIdentifierValue = create_unit( + 3, relatedObjectIdentifier, 'relatedObjectIdentifierValue' + ) + relatedObjectIdentifierValue.text = representation_uuid + if sequence == 'sequence': + relatedObjectSequence = create_unit( + 4, relationship, 'relatedObjectSequence' + ) + relatedObjectSequence.text = str(mediainfo_counter) + relationshipType = create_unit( + 0, relationship, 'relationshipType' + ) + relationshipType.text = 'structural' + relationshipSubType = create_unit( + 1, relationship, 'relationshipSubType' + ) + relationshipSubType.text = 'is included in' + # this is a total hack. if sequence = loopline', do not generate hash as it already exists in manifest :( + # looks like loopline isn't the keyword any longer. it's len = 32? + if not len(sequence) == 32: + md5_output = hashlib_md5(source_file, image) + message_digest.text = md5_output + else: + message_digest.text = sequence + message_digest_algorithm.text = 'md5' + mediainfo_counter += 1 + # When the image info has been grabbed, add info about the representation to the wav file. This may be problematic if makedpx is run first.. + doc = ET.ElementTree(premis) + xml_info = [doc, premisxml, root_uuid, sequence, image_uuids] + return xml_info def make_event( premis, event_type, event_detail, agentlist, event_id, event_linking_object_identifier, @@ -279,14 +405,14 @@ def get_capture_workstation(mediaxml): capture_station = 'ingest1' if capture_station == 'ingest1': ingest_deck = '0' - while int(ingest_deck) not in range(1,3): + while int(ingest_deck) not in range(1, 3): ingest_deck = raw_input( '\n\n**** Where was tape captured?\n' 'Press 1, 2\n1. DVW-510p (Digi)\n2. UVW-1200p (BetaSP)\n' ) - if int(ingest_deck) not in range(1,3): + if int(ingest_deck) not in range(1, 3): print 'Incorrect input. Please enter 1 or 2 plz' - while int(ingest_deck) not in range(1,3): + while int(ingest_deck) not in range(1, 3): ingest_deck = raw_input( '\n\n**** Where was tape captured?\n' 'Press 1, 2\n2. DVW-510p (Digi)\n3. UVW-1200p (BetaSP)\n' @@ -408,7 +534,7 @@ def main(): representation_uuid = str(uuid.uuid4()) intellectual_entity_uuid = str(uuid.uuid4()) # looks like loopline isn't the keyword any longer. it's len = 32? - xml_info = make_premis( + xml_info = create_object( source_file, items, premis, premis_namespace, premisxml, representation_uuid, md5 ) From a6015734ed2eb708100ff3984bbb71dffbc2fdbf Mon Sep 17 00:00:00 2001 From: kieranjol Date: Fri, 17 Mar 2017 18:00:01 +0000 Subject: [PATCH 28/30] premis - revert to master --- premis.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/premis.py b/premis.py index f6000dd..bd92ccd 100755 --- a/premis.py +++ b/premis.py @@ -179,6 +179,7 @@ def setup_xml(source_file): namespace = '' premis_namespace = "http://www.loc.gov/premis/v3" xsi_namespace = "http://www.w3.org/2001/XMLSchema-instance" + print premisxml if os.path.isfile(premisxml): print 'looks like premis already exists?' parser = ET.XMLParser(remove_blank_text=True) @@ -314,13 +315,9 @@ def create_object(source_file, items, premis, premis_namespace, premisxml, repre relationshipType.text = 'structural' relationshipSubType = create_unit(1,relationship, 'relationshipSubType') relationshipSubType.text = 'is included in' - # this is a total hack. if sequence = loopline', do not generate hash as it already exists in manifest :( - # looks like loopline isn't the keyword any longer. it's len = 32? - if not len(sequence) == 32: - md5_output = hashlib_md5(source_file, image) - messageDigest.text = md5_output - else: - messageDigest.text = sequence + + md5_output = hashlib_md5(source_file, image) + messageDigest.text = md5_output messageDigestAlgorithm.text = 'md5' mediainfo_counter += 1 # When the image info has been grabbed, add info about the representation to the wav file. This may be problematic if makedpx is run first.. From 6865628e3c2c87d1d4b97be8a6e56366a06309ec Mon Sep 17 00:00:00 2001 From: kieranjol Date: Fri, 17 Mar 2017 23:37:53 +0000 Subject: [PATCH 29/30] premis_agents - adds uvw1200p --- premis_agents.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/premis_agents.csv b/premis_agents.csv index 28e59db..4801157 100644 --- a/premis_agents.csv +++ b/premis_agents.csv @@ -40,7 +40,7 @@ UUID,75a0b9ff-1f04-43bd-aa87-c31b73b1b61c,Mac Pro,hardware,,Edit Suite 2,impleme UUID,be3060a8-6ccf-4339-97d5-a265687c3a5a,Mac Pro,hardware,,Loopline edit suite,implementer UUID,c93ee9a5-4c0c-4670-b857-8726bfd23cae,AJA KONA LHe Plus,hardware,00T59106,,implementer UUID,5fd99e09-63d7-4e9f-8383-1902f727d2a5,Hewlett Packard Z420 Workstation,hardware,CZC4310HNZ,Ingest 1,implementer -UUID,d13fae39-ac71-446e-88df-96c0d267b26c,,,,, +UUID,d13fae39-ac71-446e-88df-96c0d267b26c,Sony UVW-1200P,hardware,,Sony Betacam SP VTR,implementer UUID,8fac66ac-05de-478c-9d93-c89d46233197,,,,, UUID,005d4f57-69a4-40d9-9360-d45ff8e44f64,,,,, UUID,acf3de69-f7c2-4010-a96b-e2e5018a43b3,,,,, From a061ea7ee1a3e4bf23d1bd4b17b9276664c458ce Mon Sep 17 00:00:00 2001 From: kieranjol Date: Sun, 19 Mar 2017 23:29:02 +0000 Subject: [PATCH 30/30] tapepremis - moves create_representation into tapepremis/guesses workstation --- tapepremis.py | 127 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 117 insertions(+), 10 deletions(-) diff --git a/tapepremis.py b/tapepremis.py index 065c75b..6c95d5d 100755 --- a/tapepremis.py +++ b/tapepremis.py @@ -8,10 +8,115 @@ from premis import make_agent from premis import write_premis from premis import setup_xml -from premis import create_representation from premis import create_intellectual_entity from premis import create_unit from premis import get_input +from premis import representation_uuid_csv + + +def create_representation( + premisxml, premis_namespace, doc, premis, + items, linkinguuids, representation_uuid, + sequence, intellectual_entity_uuid + ): + object_parent = create_unit( + 1, premis, 'object' + ) + object_identifier_parent = create_unit( + 1, object_parent, 'objectIdentifier' + ) + object_identifier_uuid = create_unit( + 0, object_parent, 'objectIdentifier' + ) + object_identifier_uuid_type = create_unit( + 1, object_identifier_uuid, 'objectIdentifierType' + ) + object_identifier_uuid_type.text = 'UUID' + object_identifier_uuid_value = create_unit( + 2, object_identifier_uuid, 'objectIdentifierValue' + ) + object_identifier_uuid_value.text = representation_uuid + # add uuids to csv so that other workflows can use them as linking identifiers. + representation_uuid_csv( + items['filmographic'], items['sourceAccession'], representation_uuid + ) + object_parent.insert( + 1, object_identifier_parent + ) + ob_id_type = ET.Element("{%s}objectIdentifierType" % (premis_namespace)) + ob_id_type.text = 'Irish Film Archive Object Entry Register' + objectIdentifierValue = create_unit( + 1, object_identifier_parent, 'objectIdentifierValue' + ) + objectIdentifierValue.text = items['oe'] + object_identifier_parent.insert( + 0, ob_id_type + ) + objectCategory = create_unit( + 2, object_parent, 'objectCategory' + ) + objectCategory.text = 'representation' + # These hardcoded relationships do not really belong here. They should be stipulated by another microservice + if sequence == 'sequence': + representation_relationship( + object_parent, premisxml, items, + 'structural', 'has root', linkinguuids[1][0], + 'root_sequence', 'UUID' + ) + for i in linkinguuids[1]: + representation_relationship( + object_parent, premisxml, items, 'structural', + 'includes', i, 'includes', 'UUID' + ) + representation_relationship( + object_parent, premisxml, items, 'structural', + 'includes',linkinguuids[0], 'n/a', 'UUID' + ) + representation_relationship( + object_parent, premisxml, items, 'derivation', + 'has source',linkinguuids[2], 'n/a', + 'Irish Film Archive Film Accession Register 2010 -' + ) + representation_relationship( + object_parent, premisxml, items, + 'structural', 'represents', intellectual_entity_uuid, 'n/a', 'UUID' + ) + return object_parent + +def representation_relationship( + object_parent, premisxml, items, relationshiptype, + relationshipsubtype, linking_identifier, root_sequence, linkingtype + ): + relationship = create_unit( + -1, object_parent, 'relationship' + ) + representationrelatedObjectIdentifier = create_unit( + 2, relationship, 'relatedObjectIdentifier' + ) + representationrelatedObjectIdentifierType = create_unit( + 2, representationrelatedObjectIdentifier, + 'relatedObjectIdentifierType' + ) + representationrelatedObjectIdentifierValue = create_unit( + 3, representationrelatedObjectIdentifier, + 'relatedObjectIdentifierValue' + ) + if root_sequence == 'root_sequence': + relatedObjectSequence = create_unit( + 4, relationship, 'relatedObjectSequence' + ) + relatedObjectSequence.text = '1' + relationshipType = create_unit( + 0, relationship, 'relationshipType' + ) + relationshipType.text = relationshiptype + relationshipSubType = create_unit( + 1, relationship, 'relationshipSubType' + ) + relationshipSubType.text = relationshipsubtype + representationrelatedObjectIdentifierType.text = linkingtype + representationrelatedObjectIdentifierValue.text = linking_identifier + def create_object( source_file, items, premis, premis_namespace, @@ -274,7 +379,7 @@ def capture_description( def ffv1_description( - premis, xml_info, capture_station, times, event_dict, script_user + premis, xml_info, workstation, times, event_dict, script_user ): if script_user == 'Kieran O\'Leary': script_user_agent = '0b3b7e69-80e1-48ec-bf07-62b04669117d' @@ -285,7 +390,7 @@ def ffv1_description( transcode_uuid = str(uuid.uuid4()) framemd5_uuid = str(uuid.uuid4()) manifest_uuid = str(uuid.uuid4()) - if capture_station == 'es2': + if 'admin' in workstation: edit_suite2_mac_agent = '75a0b9ff-1f04-43bd-aa87-c31b73b1b61c' elcapitan_agent = '68f56ede-a1cf-48aa-b1d8-dc9850d5bfcc' ffv1_agents = [ @@ -297,7 +402,7 @@ def ffv1_description( ffv1_agents, transcode_uuid, xml_info[4], 'outcome', times[1] ) - elif 'ingest1' in capture_station: + elif 'kieranjol' in workstation: ingest1_agent = '5fd99e09-63d7-4e9f-8383-1902f727d2a5' windows7_agent = '192f61b1-8130-4236-a827-a194a20557fe' ffv1_agents = [ @@ -308,7 +413,7 @@ def ffv1_description( 'transcode to FFV1/Matroska (figure out wording later)', ffv1_agents, transcode_uuid, xml_info[4], 'outcome', times[1] ) - elif capture_station == 'loopline': + elif 'kaja' in workstation: osx_lion_agent = 'c5fc84fc-cc96-42a1-a5be-830b4e3012ae' loopline_mac_agent = 'be3060a8-6ccf-4339-97d5-a265687c3a5a' ffv1_agents = [ @@ -452,6 +557,7 @@ def analyze_log(logfile): losslessness = '' framemd5_time = '' manifest_time = '' + logged_workstation = '' with open(logfile, 'r') as fo: log_lines = fo.readlines() for line in log_lines: @@ -461,8 +567,10 @@ def analyze_log(logfile): framemd5_time = line[:19] if 'MD5 manifest started' in line: manifest_time = line[:19] + workstation = log_lines[0][20:35] + print workstation - return manifest_time, framemd5_time, losslessness + return manifest_time, framemd5_time, losslessness, workstation def main(): @@ -501,7 +609,7 @@ def main(): + '_log.log')) capture_time = get_times(source_xml) transcode_time = get_times(ffv1_xml) - manifest_time, framemd5_time, losslessness = analyze_log(logfile) + manifest_time, framemd5_time, losslessness, workstation = analyze_log(logfile) times = [ capture_time, transcode_time, manifest_time, framemd5_time, losslessness ] @@ -513,7 +621,6 @@ def main(): ''' /home/kieranjol/ifigit/ifiscripts/massive/objects sip /home/kieranjol/ifigit/ifiscripts/massive parent - ''' manifest = parent_dir + '_manifest.md5' if not os.path.isfile(manifest): @@ -543,7 +650,7 @@ def main(): premisxml, premis_namespace, doc, premis, items, intellectual_entity_uuid ) - create_representation( + representation_object = create_representation( premisxml, premis_namespace, doc, premis, items, linkinguuids, representation_uuid, 'no_sequence', 'n/a' ) @@ -551,7 +658,7 @@ def main(): premis, xml_info, capture_station, times, engineer ) ffv1_description( - premis, xml_info, capture_station, times, event_dict, script_user + premis, xml_info, workstation, times, event_dict, script_user ) write_premis(doc, premisxml)