diff --git a/premis_agents.csv b/premis_agents.csv index f679f8e..4801157 100644 --- a/premis_agents.csv +++ b/premis_agents.csv @@ -11,7 +11,8 @@ UUID,b22baa5c-8160-427d-9e2f-b62a7263439d,debian,software,4.0r5,Operating system UUID,9486b779-907c-4cc4-802c-22e07dc1242f,osx,software,10.11.1,,implementer UUID,a3bc371f-11fa-4319-a656-1e53c2527552,osx,software,10.11.2,,implementer UUID,f2b79d56-dcd7-4f32-8d80-497851a8fd6b,osx,software,10.11.3,,implementer -UUID,192f61b1-8130-4236-a827-a194a20557fe,Windows 7,software,Service Pack 2,Operating system for 946e5d40-a07f-47d1-9637-def5cb7854ba,implementer +UUID,c5fc84fc-cc96-42a1-a5be-830b4e3012ae,osx,software,10.7.5,Mac OSX Lion,implementer +UUID,192f61b1-8130-4236-a827-a194a20557fe,Windows 7,software,Service Pack 2,,implementer UUID,9281e196-b691-4f03-a004-0ea2bb032a93,SteadyFrame Transcoder,software,,,implementer UUID,b342d3f7-d87e-4fe3-8da5-89e16a30b59e,Raelene Casey,person,,,implementer UUID,cb638899-7ade-4a18-9bba-b8b1a20132a7,Dean Kavanagh,person,,,implementer @@ -20,63 +21,62 @@ UUID,838a1a1b-7ddd-4846-ae8e-3b5ecb4aae55,Mac Pro,hardware,,Image restoration te UUID,55003bbd-49a4-4c7b-8da2-0d5b9bf10168,ProTools,software,12.5.1,,implementer UUID,ca731b64-638f-4dc3-9d27-0fc14387e38c,Steadyframe Computer,hardware,,Host computer for 1369-e9d1-425b-a810-6db1150955ba,implementer UUID,946e5d40-a07f-47d1-9637-def5cb7854ba,HP Z800,hardware,Z800,,implementer -UUID,230d72da-07e7-4a79-96ca-998b9f7a3e41,mac mini,hardware,,,implementer +UUID,230d72da-07e7-4a79-96ca-998b9f7a3e41,mac mini,hardware,,Telecine room Mac Mini,implementer UUID,634edcae-65a7-48b1-9758-bcfbdd12f80b,Aaron Healy,person,,,implementer UUID,25cdb926-12b5-4647-bef1-6f9bca3c18bc,Felix Meehan,person,,,implementer UUID,642208d9-6f65-4498-998f-9a17379319f9,Eoin O'Donohoe,person,,,implementer -UUID,68f56ede-a1cf-48aa-b1d8-dc9850d5bfcc,osx,software,10.11.4,,implementer +UUID,68f56ede-a1cf-48aa-b1d8-dc9850d5bfcc,osx,software,10.11.4,El Capitan,implementer UUID,e5872957-8ee8-4c20-bd8e-d76e1de01b34,iZotope Rx5,software,5.01.184,,implementer UUID,8c02d962-5ac5-4e51-a30c-002553134320,Baselight,software,4.4.7725,Plugin for 11e157a3-1aa7-4195-b816-009a3d47148c,implementer UUID,52adf876-bf30-431c-b0c6-80cc4fd9406c,osx,software,10.9.5,,implementer UUID,9e59e772-14b0-4f9e-95b3-b88b6e751c3b,Aoife Fitzmaurice,person,,,implementer UUID,bc3de900-3903-4764-ab91-2ce89977d0d2,AEO-Light,software,2.2,,executing program -,60ae3a85-b595-45e0-8e4a-b95e90a6c422,,,,, -,dbdbb06b-ab10-49db-97a1-ff2ad285f9d2,,,,, -,e2ca7ad2-8edf-4e4e-a3c7-36e970c796c9,,,,, -,f47b98a2-b879-4786-9f6b-11fc3234a91e,,,,, -,c5e504ca-b4d5-410f-b87b-4b7ed794e44d,,,,, -,75a0b9ff-1f04-43bd-aa87-c31b73b1b61c,,,,, -,c5fc84fc-cc96-42a1-a5be-830b4e3012ae,,,,, -,be3060a8-6ccf-4339-97d5-a265687c3a5a,,,,, -,c93ee9a5-4c0c-4670-b857-8726bfd23cae,,,,, -,5fd99e09-63d7-4e9f-8383-1902f727d2a5,,,,, -,d13fae39-ac71-446e-88df-96c0d267b26c,,,,, -,8fac66ac-05de-478c-9d93-c89d46233197,,,,, -,005d4f57-69a4-40d9-9360-d45ff8e44f64,,,,, -,acf3de69-f7c2-4010-a96b-e2e5018a43b3,,,,, -,6d0a4ad3-7dc6-4607-b3bc-a284228e0c73,,,,, -,8419bfcb-51cb-4ea6-a8b2-e51a9af2e68a,,,,, -,14b935f1-8a9e-45b7-abf2-2593e89e77fc,,,,, -,b499818d-aecd-4ad7-bc61-f4fed329b18b,,,,, -,e37a0f86-816b-416f-8da7-4f5780d80a12,,,,, -,a3dd197d-ed80-41ff-bb84-f13e79a6e6d6,,,,, -,6d782410-db75-497b-97a6-c41a0fcc285c,,,,, -,bfc78799-ddfb-452d-b0ab-bf48c9dbcaa1,,,,, -,81a11bf9-65c4-4688-ab4a-954dbb1be241,,,,, -,29db18b3-1a90-4f7e-8432-adbf1ae40507,,,,, -,1be59fbb-bd99-40a9-8593-94d07f0e89f4,,,,, -,ee377e44-8635-4bda-a444-f7e632d32096,,,,, -,175468eb-c087-43b8-8444-222378604a95,,,,, -,2b45a5a3-1e90-4918-a6aa-41d9a93aef5e,,,,, -,f98bf550-a3fe-4f66-925e-8944799f930d,,,,, -,491e18c3-e4e1-4a47-b945-dadb53b565d1,,,,, -,4ac05527-bc27-4848-9f57-b2dc0820fab4,,,,, -,aa82e005-3246-4b2c-ba18-61de26a0855a,,,,, -,3acdc15f-ffd1-4be9-b279-351cb369ec15,,,,, -,62d87cd8-fc74-4895-b8c1-c2173feae060,,,,, -,2f68cf72-3fde-4e40-ac7b-ec7e382fbcf8,,,,, -,8d87bd1c-1936-46ee-ab56-5d571462f9ec,,,,, -,199f8f98-90d4-4ca1-9d84-bcde93d4b698,,,,, -,efb8371f-7d46-4f41-9133-011478b4b97a,,,,, -,b6a7bddf-08a0-46b7-a2b3-09e5fc1ec23b,,,,, -,f1b47f5f-fa70-42e3-ae39-6c56665e4a30,,,,, -,88e13b20-ba85-49c0-a8d0-ac32398adf6c,,,,, -,0ca96616-d585-436e-af3d-7a10202ce1c5,,,,, -,2178af8e-19e7-43c3-9bcd-24c462853672,,,,, -,f9e0a990-afc9-4411-bd2a-930f7a521778,,,,, -,3eec23de-facc-4a5e-a4ed-f8ad34626251,,,,, -,b924d919-fea5-47b7-8106-eeff86233b3d,,,,, -,2178d1d3-85be-4bfa-8be1-114a8a17ebbd,,,,, +UUID,60ae3a85-b595-45e0-8e4a-b95e90a6c422,Sony DVW-M2000P,hardware,,Sony Digital Betacam VTR,implementer +UUID,dbdbb06b-ab10-49db-97a1-ff2ad285f9d2,Sony DVW-A510P,hardware,,Sony Digital Betacam VTR,implementer +UUID,e2ca7ad2-8edf-4e4e-a3c7-36e970c796c9,Sony J30-SDI,hardware,,Sony Digital Betacam VTR,implementer +UUID,f47b98a2-b879-4786-9f6b-11fc3234a91e,Blackmagic UltraStudio 4K,hardware,,Edit Suite 2,implementer +UUID,c5e504ca-b4d5-410f-b87b-4b7ed794e44d,AJA KONA 3,hardware,,,implementer +UUID,75a0b9ff-1f04-43bd-aa87-c31b73b1b61c,Mac Pro,hardware,,Edit Suite 2,implementer +UUID,be3060a8-6ccf-4339-97d5-a265687c3a5a,Mac Pro,hardware,,Loopline edit suite,implementer +UUID,c93ee9a5-4c0c-4670-b857-8726bfd23cae,AJA KONA LHe Plus,hardware,00T59106,,implementer +UUID,5fd99e09-63d7-4e9f-8383-1902f727d2a5,Hewlett Packard Z420 Workstation,hardware,CZC4310HNZ,Ingest 1,implementer +UUID,d13fae39-ac71-446e-88df-96c0d267b26c,Sony UVW-1200P,hardware,,Sony Betacam SP VTR,implementer +UUID,8fac66ac-05de-478c-9d93-c89d46233197,,,,, +UUID,005d4f57-69a4-40d9-9360-d45ff8e44f64,,,,, +UUID,acf3de69-f7c2-4010-a96b-e2e5018a43b3,,,,, +UUID,6d0a4ad3-7dc6-4607-b3bc-a284228e0c73,,,,, +UUID,8419bfcb-51cb-4ea6-a8b2-e51a9af2e68a,,,,, +UUID,14b935f1-8a9e-45b7-abf2-2593e89e77fc,,,,, +UUID,b499818d-aecd-4ad7-bc61-f4fed329b18b,,,,, +UUID,e37a0f86-816b-416f-8da7-4f5780d80a12,,,,, +UUID,a3dd197d-ed80-41ff-bb84-f13e79a6e6d6,,,,, +UUID,6d782410-db75-497b-97a6-c41a0fcc285c,,,,, +UUID,bfc78799-ddfb-452d-b0ab-bf48c9dbcaa1,,,,, +UUID,81a11bf9-65c4-4688-ab4a-954dbb1be241,,,,, +UUID,29db18b3-1a90-4f7e-8432-adbf1ae40507,,,,, +UUID,1be59fbb-bd99-40a9-8593-94d07f0e89f4,,,,, +UUID,ee377e44-8635-4bda-a444-f7e632d32096,,,,, +UUID,175468eb-c087-43b8-8444-222378604a95,,,,, +UUID,2b45a5a3-1e90-4918-a6aa-41d9a93aef5e,,,,, +UUID,f98bf550-a3fe-4f66-925e-8944799f930d,,,,, +UUID,491e18c3-e4e1-4a47-b945-dadb53b565d1,,,,, +UUID,4ac05527-bc27-4848-9f57-b2dc0820fab4,,,,, +UUID,aa82e005-3246-4b2c-ba18-61de26a0855a,,,,, +UUID,3acdc15f-ffd1-4be9-b279-351cb369ec15,,,,, +UUID,62d87cd8-fc74-4895-b8c1-c2173feae060,,,,, +UUID,2f68cf72-3fde-4e40-ac7b-ec7e382fbcf8,,,,, +UUID,8d87bd1c-1936-46ee-ab56-5d571462f9ec,,,,, +UUID,199f8f98-90d4-4ca1-9d84-bcde93d4b698,,,,, +UUID,efb8371f-7d46-4f41-9133-011478b4b97a,,,,, +UUID,b6a7bddf-08a0-46b7-a2b3-09e5fc1ec23b,,,,, +UUID,f1b47f5f-fa70-42e3-ae39-6c56665e4a30,,,,, +UUID,88e13b20-ba85-49c0-a8d0-ac32398adf6c,,,,, +UUID,0ca96616-d585-436e-af3d-7a10202ce1c5,,,,, +UUID,2178af8e-19e7-43c3-9bcd-24c462853672,,,,, +UUID,f9e0a990-afc9-4411-bd2a-930f7a521778,,,,, +UUID,3eec23de-facc-4a5e-a4ed-f8ad34626251,,,,, +UUID,b924d919-fea5-47b7-8106-eeff86233b3d,,,,, +UUID,2178d1d3-85be-4bfa-8be1-114a8a17ebbd,,,,, ,97eb6b34-9eb8-4d18-88a8-1949ef439ccc,,,,, ,0edd5887-d142-4788-a63f-3a66e861085f,,,,, ,edb34421-9849-4ed7-8949-32d8ed663e63,,,,, diff --git a/tapepremis.py b/tapepremis.py new file mode 100755 index 0000000..6c95d5d --- /dev/null +++ b/tapepremis.py @@ -0,0 +1,666 @@ +#!/usr/bin/env python +import sys +import os +import uuid +import subprocess +import lxml.etree as ET +from ififuncs import get_date_modified +from premis import make_agent +from premis import write_premis +from premis import setup_xml +from premis import create_intellectual_entity +from premis import create_unit +from premis import get_input +from premis import representation_uuid_csv + + +def create_representation( + premisxml, premis_namespace, doc, premis, + items, linkinguuids, representation_uuid, + sequence, intellectual_entity_uuid + ): + object_parent = create_unit( + 1, premis, 'object' + ) + object_identifier_parent = create_unit( + 1, object_parent, 'objectIdentifier' + ) + object_identifier_uuid = create_unit( + 0, object_parent, 'objectIdentifier' + ) + object_identifier_uuid_type = create_unit( + 1, object_identifier_uuid, 'objectIdentifierType' + ) + object_identifier_uuid_type.text = 'UUID' + object_identifier_uuid_value = create_unit( + 2, object_identifier_uuid, 'objectIdentifierValue' + ) + object_identifier_uuid_value.text = representation_uuid + # add uuids to csv so that other workflows can use them as linking identifiers. + representation_uuid_csv( + items['filmographic'], items['sourceAccession'], representation_uuid + ) + object_parent.insert( + 1, object_identifier_parent + ) + ob_id_type = ET.Element("{%s}objectIdentifierType" % (premis_namespace)) + ob_id_type.text = 'Irish Film Archive Object Entry Register' + objectIdentifierValue = create_unit( + 1, object_identifier_parent, 'objectIdentifierValue' + ) + objectIdentifierValue.text = items['oe'] + object_identifier_parent.insert( + 0, ob_id_type + ) + objectCategory = create_unit( + 2, object_parent, 'objectCategory' + ) + objectCategory.text = 'representation' + # These hardcoded relationships do not really belong here. They should be stipulated by another microservice + if sequence == 'sequence': + representation_relationship( + object_parent, premisxml, items, + 'structural', 'has root', linkinguuids[1][0], + 'root_sequence', 'UUID' + ) + for i in linkinguuids[1]: + representation_relationship( + object_parent, premisxml, items, 'structural', + 'includes', i, 'includes', 'UUID' + ) + representation_relationship( + object_parent, premisxml, items, 'structural', + 'includes',linkinguuids[0], 'n/a', 'UUID' + ) + representation_relationship( + object_parent, premisxml, items, 'derivation', + 'has source',linkinguuids[2], 'n/a', + 'Irish Film Archive Film Accession Register 2010 -' + ) + representation_relationship( + object_parent, premisxml, items, + 'structural', 'represents', intellectual_entity_uuid, 'n/a', 'UUID' + ) + return object_parent + +def representation_relationship( + object_parent, premisxml, items, relationshiptype, + relationshipsubtype, linking_identifier, root_sequence, linkingtype + ): + relationship = create_unit( + -1, object_parent, 'relationship' + ) + representationrelatedObjectIdentifier = create_unit( + 2, relationship, 'relatedObjectIdentifier' + ) + representationrelatedObjectIdentifierType = create_unit( + 2, representationrelatedObjectIdentifier, + 'relatedObjectIdentifierType' + ) + representationrelatedObjectIdentifierValue = create_unit( + 3, representationrelatedObjectIdentifier, + 'relatedObjectIdentifierValue' + ) + if root_sequence == 'root_sequence': + relatedObjectSequence = create_unit( + 4, relationship, 'relatedObjectSequence' + ) + relatedObjectSequence.text = '1' + relationshipType = create_unit( + 0, relationship, 'relationshipType' + ) + relationshipType.text = relationshiptype + relationshipSubType = create_unit( + 1, relationship, 'relationshipSubType' + ) + relationshipSubType.text = relationshipsubtype + representationrelatedObjectIdentifierType.text = linkingtype + representationrelatedObjectIdentifierValue.text = linking_identifier + + +def create_object( + source_file, items, premis, premis_namespace, + premisxml, representation_uuid, sequence + ): + video_files = get_input(source_file) + mediainfo_counter = 1 + image_uuids = [] + rep_counter = 0 + print('Generating PREMIS metadata about each file object - this may take' + ' some time if on a network and/or working with an image sequence') + for image in video_files: + object_parent = create_unit( + -1, premis, 'object' + ) + object_identifier_uuid = create_unit( + 1, object_parent, 'objectIdentifier' + ) + object_identifier_uuid_type = create_unit( + 1, object_identifier_uuid, 'objectIdentifierType' + ) + object_identifier_uuid_type.text = 'UUID' + object_identifier_uuid_value = create_unit( + 2, object_identifier_uuid, 'objectIdentifierValue' + ) + file_uuid = str(uuid.uuid4()) + image_uuids.append(file_uuid) + object_identifier_uuid_value.text = file_uuid + object_category = ET.Element( + "{%s}objectCategory" % (premis_namespace) + ) + object_parent.insert( + 5, object_category + ) + object_category.text = 'file' + if rep_counter == 0: + root_uuid = file_uuid + rep_counter += 1 + format_ = ET.Element("{%s}format" % (premis_namespace)) + object_characteristics = create_unit( + 10, object_parent, 'objectCharacteristics' + ) + object_characteristics.insert(2, format_) + mediainfo = subprocess.check_output( + ['mediainfo', '--Output=PBCore2', image] + ) + parser = ET.XMLParser( + remove_blank_text=True, remove_comments=True + ) + mediainfo_xml = ET.fromstring((mediainfo), parser=parser) + fixity = create_unit( + 0, object_characteristics, 'fixity' + ) + size = create_unit( + 1, object_characteristics, 'size' + ) + size.text = str(os.path.getsize(image)) + format_designation = create_unit( + 0, format_, 'formatDesignation' + ) + format_name = create_unit( + 1, format_designation, 'formatName' + ) + format_name_mediainfo = subprocess.check_output( + ['mediainfo', '--Inform=General;%InternetMediaType%', image] + ).rstrip() + if format_name_mediainfo == '': + format_name_mediainfo = subprocess.check_output( + ['mediainfo', '--Inform=General;%Format_Commercial%', image] + ).rstrip() + format_name.text = format_name_mediainfo + message_digest_algorithm = create_unit( + 0, fixity, 'messageDigestAlgorithm' + ) + message_digest = create_unit( + 1, fixity, 'messageDigest' + ) + message_digestOriginator = create_unit( + 2, fixity, 'messageDigestOriginator' + ) + message_digestOriginator.text = 'internal' + object_characteristicsExtension = create_unit( + 4, object_characteristics, 'objectCharacteristicsExtension' + ) + object_characteristicsExtension.insert( + mediainfo_counter, mediainfo_xml + ) + relationship = create_unit( + 7, object_parent, 'relationship' + ) + relatedObjectIdentifier = create_unit( + 2, relationship, 'relatedObjectIdentifier' + ) + relatedObjectIdentifierType = create_unit( + 2, relatedObjectIdentifier, 'relatedObjectIdentifierType' + ) + relatedObjectIdentifierType.text = 'UUID' + relatedObjectIdentifierValue = create_unit( + 3, relatedObjectIdentifier, 'relatedObjectIdentifierValue' + ) + relatedObjectIdentifierValue.text = representation_uuid + if sequence == 'sequence': + relatedObjectSequence = create_unit( + 4, relationship, 'relatedObjectSequence' + ) + relatedObjectSequence.text = str(mediainfo_counter) + relationshipType = create_unit( + 0, relationship, 'relationshipType' + ) + relationshipType.text = 'structural' + relationshipSubType = create_unit( + 1, relationship, 'relationshipSubType' + ) + relationshipSubType.text = 'is included in' + # this is a total hack. if sequence = loopline', do not generate hash as it already exists in manifest :( + # looks like loopline isn't the keyword any longer. it's len = 32? + if not len(sequence) == 32: + md5_output = hashlib_md5(source_file, image) + message_digest.text = md5_output + else: + message_digest.text = sequence + message_digest_algorithm.text = 'md5' + mediainfo_counter += 1 + # When the image info has been grabbed, add info about the representation to the wav file. This may be problematic if makedpx is run first.. + doc = ET.ElementTree(premis) + xml_info = [doc, premisxml, root_uuid, sequence, image_uuids] + return xml_info +def make_event( + premis, event_type, event_detail, + agentlist, event_id, event_linking_object_identifier, + event_linking_object_role, event_time + ): + # This is really only here because the premis.py version handles the \ + # linkingAgentIdentifiers differently. + premis_namespace = "http://www.loc.gov/premis/v3" + event = ET.SubElement(premis, "{%s}event" % (premis_namespace)) + premis.insert(-1, event) + event_Identifier = create_unit(1, event, 'eventIdentifier') + event_id_type = ET.Element("{%s}eventIdentifierType" % (premis_namespace)) + event_Identifier.insert(0, event_id_type) + event_id_value = ET.Element("{%s}eventIdentifierValue" % (premis_namespace)) + event_Identifier.insert(0, event_id_value) + event_Type = ET.Element("{%s}eventType" % (premis_namespace)) + event.insert(2, event_Type) + event_DateTime = ET.Element("{%s}eventDateTime" % (premis_namespace)) + event.insert(3, event_DateTime) + if event_time == 'now': + event_DateTime.text = time.strftime("%Y-%m-%dT%H:%M:%S") + else: + event_DateTime.text = event_time + event_Type.text = event_type + event_id_value.text = event_id + event_id_type.text = 'UUID' + eventDetailInformation = create_unit( + 4, event, 'eventDetailInformation' + ) + eventDetail = create_unit( + 0, eventDetailInformation, 'eventDetail' + ) + eventDetail.text = event_detail + for i in event_linking_object_identifier: + linkingObjectIdentifier = create_unit( + 5, event, 'linkingObjectIdentifier' + ) + linkingObjectIdentifierType = create_unit( + 0, linkingObjectIdentifier, 'linkingObjectIdentifierType' + ) + linkingObjectIdentifierValue = create_unit( + 1, linkingObjectIdentifier, 'linkingObjectIdentifierValue' + ) + linkingObjectIdentifierValue.text = i + linkingObjectRole = create_unit( + 2, linkingObjectIdentifier, 'linkingObjectRole' + ) + linkingObjectIdentifierType.text = 'UUID' + linkingObjectRole.text = event_linking_object_role + for i in agentlist: + linkingAgentIdentifier = create_unit( + -1, event, 'linkingAgentIdentifier' + ) + linkingAgentIdentifierType = create_unit( + 0, linkingAgentIdentifier, 'linkingAgentIdentifierType' + ) + linkingAgentIdentifierValue = create_unit( + 1, linkingAgentIdentifier, 'linkingAgentIdentifierValue' + ) + linkingAgentIdentifierRole = create_unit( + 2, linkingAgentIdentifier, 'linkingAgentRole' + ) + linkingAgentIdentifierRole.text = 'implementer' + linkingAgentIdentifierType.text = 'UUID' + linkingAgentIdentifierValue.text = i + + +def capture_description( + premis, xml_info, capture_station, times, engineer + ): + ''' + Events: + 1. capture - glean from v210 mediainfo xml + 2. ffv1 - ffmpeg logs but get time from sip log also user input + 3. lossless verification - ffmpeg logs and time/judgement from sip log + 4. whole file manifest - sip log + that's it? + ''' + if engineer == 'Kieran O\'Leary': + engineer_agent = '0b3b7e69-80e1-48ec-bf07-62b04669117d' + elif engineer == 'Aoife Fitzmaurice': + engineer_agent = '9e59e772-14b0-4f9e-95b3-b88b6e751c3b' + elif engineer == 'Raelene Casey': + engineer_agent = 'b342d3f7-d87e-4fe3-8da5-89e16a30b59e' + + + capture_uuid = str(uuid.uuid4()) + if capture_station == 'es2': + j30sdi_agent = 'e2ca7ad2-8edf-4e4e-a3c7-36e970c796c9' + bm4k_agent = 'f47b98a2-b879-4786-9f6b-11fc3234a91e' + edit_suite2_mac_agent = '75a0b9ff-1f04-43bd-aa87-c31b73b1b61c' + elcapitan_agent = '68f56ede-a1cf-48aa-b1d8-dc9850d5bfcc' + capture_agents = [ + j30sdi_agent, bm4k_agent, + edit_suite2_mac_agent, elcapitan_agent, + engineer_agent + ] + elif capture_station == 'loopline': + m2000p_agent = '60ae3a85-b595-45e0-8e4a-b95e90a6c422' + kona3_agent = 'c5e504ca-b4d5-410f-b87b-4b7ed794e44d' + loopline_mac_agent = 'be3060a8-6ccf-4339-97d5-a265687c3a5a' + osx_lion_agent = 'c5fc84fc-cc96-42a1-a5be-830b4e3012ae' + capture_agents = [ + m2000p_agent, kona3_agent, + loopline_mac_agent, osx_lion_agent, + engineer_agent + ] + + elif 'ingest1' in capture_station: + sony510p_agent = 'dbdbb06b-ab10-49db-97a1-ff2ad285f9d2' + sony1200p_agent = 'd13fae39-ac71-446e-88df-96c0d267b26c' + ingest1_agent = '5fd99e09-63d7-4e9f-8383-1902f727d2a5' + windows7_agent = '192f61b1-8130-4236-a827-a194a20557fe' + ingest1kona_agent = 'c93ee9a5-4c0c-4670-b857-8726bfd23cae' + if capture_station == 'ingest1-dvw': + capture_agents = [sony510p_agent] + elif capture_station == 'ingest1-uvw': + capture_agents = [sony1200p_agent] + capture_agents += [ + ingest1kona_agent, + ingest1_agent, windows7_agent, + engineer_agent + ] + make_event( + premis, 'creation', 'tape capture', + capture_agents, capture_uuid, xml_info[4], 'outcome', times[0] + ) + event_dict = {} + for agent in capture_agents: + # Just the UUID is returned. + event_dict[agent] = [capture_uuid] + return event_dict + + +def ffv1_description( + premis, xml_info, workstation, times, event_dict, script_user + ): + if script_user == 'Kieran O\'Leary': + script_user_agent = '0b3b7e69-80e1-48ec-bf07-62b04669117d' + elif script_user == 'Aoife Fitzmaurice': + script_user_agent = '9e59e772-14b0-4f9e-95b3-b88b6e751c3b' + elif script_user == 'Raelene Casey': + script_user_agent = 'b342d3f7-d87e-4fe3-8da5-89e16a30b59e' + transcode_uuid = str(uuid.uuid4()) + framemd5_uuid = str(uuid.uuid4()) + manifest_uuid = str(uuid.uuid4()) + if 'admin' in workstation: + edit_suite2_mac_agent = '75a0b9ff-1f04-43bd-aa87-c31b73b1b61c' + elcapitan_agent = '68f56ede-a1cf-48aa-b1d8-dc9850d5bfcc' + ffv1_agents = [ + edit_suite2_mac_agent, elcapitan_agent, script_user_agent + ] + make_event( + premis, 'compression', + 'transcode to FFV1/Matroska (figure out wording later)', + ffv1_agents, transcode_uuid, xml_info[4], 'outcome', times[1] + ) + + elif 'kieranjol' in workstation: + ingest1_agent = '5fd99e09-63d7-4e9f-8383-1902f727d2a5' + windows7_agent = '192f61b1-8130-4236-a827-a194a20557fe' + ffv1_agents = [ + ingest1_agent, windows7_agent, script_user_agent + ] + make_event( + premis, 'compression', + 'transcode to FFV1/Matroska (figure out wording later)', + ffv1_agents, transcode_uuid, xml_info[4], 'outcome', times[1] + ) + elif 'kaja' in workstation: + osx_lion_agent = 'c5fc84fc-cc96-42a1-a5be-830b4e3012ae' + loopline_mac_agent = 'be3060a8-6ccf-4339-97d5-a265687c3a5a' + ffv1_agents = [ + osx_lion_agent, loopline_mac_agent, script_user_agent + ] + make_event( + premis, 'compression', + 'transcode to FFV1/Matroska while specifying 4:3 DAR ' + 'and Top Field First interlacement', + ffv1_agents, transcode_uuid, xml_info[4], 'outcome', times[1] + ) + make_event( + premis, 'fixity check', + 'lossless verification via framemd5 (figure out wording later)', + ffv1_agents, framemd5_uuid, xml_info[4], 'source', times[3] + ) + make_event( + premis, 'message digest calculation', + 'whole file checksum manifest of SIP', ffv1_agents, + manifest_uuid, xml_info[4], 'source', times[2] + ) + for agent in ffv1_agents: + # Just the UUID is returned. This prevents errors if the engineer and + # script user are different + if agent in event_dict: + event_dict[agent] += [transcode_uuid] + event_dict[agent] += [framemd5_uuid] + event_dict[agent] += [manifest_uuid] + else: + event_dict[agent] = [transcode_uuid] + event_dict[agent] += [framemd5_uuid] + event_dict[agent] += [manifest_uuid] + for agent in event_dict: + make_agent( + premis, event_dict[agent], agent + ) + + +def get_checksum(manifest): + if os.path.isfile(manifest): + with open(manifest, 'r') as fo: + manifest_lines = fo.readlines() + for md5 in manifest_lines: + if md5[-5:].rsplit()[0] == '.mkv': + return md5[:32] + + +def get_times(sourcexml): + mediaxml_object = ET.parse(sourcexml) + mxml = mediaxml_object.getroot() + # encoded date is probably better + capture_date = mxml.xpath('//File_Modified_Date_Local')[0].text + return capture_date + + +def get_capture_workstation(mediaxml): + mediaxml_object = ET.parse(mediaxml) + mxml = mediaxml_object.getroot() + mediaexpress_check = len(mxml.xpath('//COMAPPLEPROAPPSLOGNOTE')) + fcp7_check = len(mxml.xpath('//COMAPPLEFINALCUTSTUDIOMEDIAUUID')) + if mediaexpress_check > 0: + print 'this was probably Media Express?' + capture_station = 'es2' + elif fcp7_check > 0: + print 'this was probably FCP7?' + capture_station = 'loopline' + else: + # i can't find any distinctive metadata that control room writes. + print 'this was probably Control Room?' + capture_station = 'ingest1' + print 'Does this sound ok? Y/N?' + station_confirm = '' + while station_confirm not in ('Y', 'y', 'N', 'n'): + station_confirm = raw_input() + if station_confirm not in ('Y', 'y', 'N', 'n'): + print 'Incorrect input. Please enter Y or N' + elif station_confirm not in ('Y', 'y'): + capture_station = '' + if capture_station not in range(1, 4): + capture_station = raw_input( + '\n\n**** Where was tape captured?\n' + 'Press 1, 2 or 3\n\n1. es2\n2. loopline\n3. ingest 1\n' + ) + while int(capture_station) not in range(1, 4): + capture_station = raw_input( + '\n\n**** Where was tape captured?\n' + 'Press 1, 2 or 3\n\n1. es2\n2. loopline\n3. ingest 1\n' + ) + if capture_station == '1': + capture_station = 'es2' + elif capture_station == '2': + capture_station = 'loopline' + elif capture_station == '3': + capture_station = 'ingest1' + if capture_station == 'ingest1': + ingest_deck = '0' + while int(ingest_deck) not in range(1, 3): + ingest_deck = raw_input( + '\n\n**** Where was tape captured?\n' + 'Press 1, 2\n1. DVW-510p (Digi)\n2. UVW-1200p (BetaSP)\n' + ) + if int(ingest_deck) not in range(1, 3): + print 'Incorrect input. Please enter 1 or 2 plz' + while int(ingest_deck) not in range(1, 3): + ingest_deck = raw_input( + '\n\n**** Where was tape captured?\n' + 'Press 1, 2\n2. DVW-510p (Digi)\n3. UVW-1200p (BetaSP)\n' + ) + if ingest_deck == '1': + capture_station = 'ingest1-dvw' + elif ingest_deck == '2': + capture_station = 'ingest1-uvw' + return capture_station + + +def get_user(question): + user = '' + if not user == '1' or user == '2' or user == '3': + user = raw_input( + '\n\n%s' + '\nPress 1 or 2 or 3\n\n' + '1. Kieran O\'Leary\n2. Aoife Fitzmaurice\n3. Raelene Casey\n' + % question) + while user not in ('1', '2', '3'): + user = raw_input( + '\n\n%s' + '\nPress 1 or 2 or 3\n\n' + '1. Kieran O\'Leary\n2. Aoife Fitzmaurice\n3. Raelene Casey\n' + % question + ) + if user == '1': + user = 'Kieran O\'Leary' + elif user == '2': + user = 'Aoife Fitzmaurice' + elif user == '3': + user = 'Raelene Casey' + return user + + +def analyze_log(logfile): + losslessness = '' + framemd5_time = '' + manifest_time = '' + logged_workstation = '' + with open(logfile, 'r') as fo: + log_lines = fo.readlines() + for line in log_lines: + if 'Transcode was lossless' in line: + losslessness = 'lossless' + if 'Framemd5 generation of output file completed' in line: + framemd5_time = line[:19] + if 'MD5 manifest started' in line: + manifest_time = line[:19] + workstation = log_lines[0][20:35] + print workstation + + return manifest_time, framemd5_time, losslessness, workstation + + +def main(): + script_user = get_user('**** Who is running this script?') + engineer = get_user('**** Who captured the actual tape?') + if not os.path.isdir(sys.argv[1]): + print 'Input should be a directory' + sys.exit() + for root, dirs, filenames in os.walk(sys.argv[1]): + for filename in filenames: + if filename.endswith('.mkv'): + if os.path.isfile(os.path.join(root, filename)): + source_file = os.path.join(root, filename) + print 'Processing: %s' % source_file + premisxml, premis_namespace, doc, premis = setup_xml(source_file) + sip_dir = os.path.dirname(source_file) + parent_dir = os.path.dirname(sip_dir) + metadata_dir = os.path.join(parent_dir, 'metadata') + logs_dir = os.path.join(parent_dir, 'logs') + ffv1_xml = os.path.join( + metadata_dir, os.path.basename( + source_file + + '_mediainfo.xml' + ) + ) + # the replace here is a terrible hack. Sad! Fix! + source_xml = os.path.join( + metadata_dir, + os.path.basename( + source_file.replace('.mkv', '.mov') + + '_source_mediainfo.xml')) + logfile = os.path.join( + logs_dir, + os.path.basename( + source_file.replace('.mkv', '.mov') + + '_log.log')) + capture_time = get_times(source_xml) + transcode_time = get_times(ffv1_xml) + manifest_time, framemd5_time, losslessness, workstation = analyze_log(logfile) + times = [ + capture_time, transcode_time, manifest_time, framemd5_time, losslessness + ] + if os.path.isfile(ffv1_xml): + capture_station = get_capture_workstation(ffv1_xml) + else: + print 'Can\'t find XML of FFv1 file. Exiting!' + sys.exit() + ''' + /home/kieranjol/ifigit/ifiscripts/massive/objects sip + /home/kieranjol/ifigit/ifiscripts/massive parent + ''' + manifest = parent_dir + '_manifest.md5' + if not os.path.isfile(manifest): + print 'no manifest found' + sys.exit() + md5 = get_checksum(manifest) + # this items var is sad,clearly there's hardcoded workflow crap in premis.py + # I don't even know if any of these are relevant anymore + items = { + "workflow":"raw audio", + "oe":'n/a', + "filmographic":'n/a', + "sourceAccession":'unknown at present', + "interventions":['placeholder'], + "prepList":['placeholder'], + "user":'n/a' + } + representation_uuid = str(uuid.uuid4()) + intellectual_entity_uuid = str(uuid.uuid4()) + # looks like loopline isn't the keyword any longer. it's len = 32? + xml_info = create_object( + source_file, items, premis, + premis_namespace, premisxml, representation_uuid, md5 + ) + linkinguuids = [xml_info[4][0], 'n/a', 'n/a'] + create_intellectual_entity( + premisxml, premis_namespace, doc, premis, + items, intellectual_entity_uuid + ) + representation_object = create_representation( + premisxml, premis_namespace, doc, premis, + items, linkinguuids, representation_uuid, 'no_sequence', 'n/a' + ) + event_dict = capture_description( + premis, xml_info, capture_station, times, engineer + ) + ffv1_description( + premis, xml_info, workstation, times, event_dict, script_user + ) + write_premis(doc, premisxml) + +if __name__ == '__main__': + main()