From 78d8f70f6682ce28ea0ab7fd7dd41cf8c9fffa6f Mon Sep 17 00:00:00 2001 From: Syd Pleno Date: Tue, 26 Apr 2022 23:47:55 +1000 Subject: [PATCH 1/2] Initial commit. --- dfimagetools/docker.py | 298 ++++++++++++++++++++++++++++++++ tools/list_container_entries.py | 154 +++++++++++++++++ 2 files changed, 452 insertions(+) create mode 100644 dfimagetools/docker.py create mode 100755 tools/list_container_entries.py diff --git a/dfimagetools/docker.py b/dfimagetools/docker.py new file mode 100644 index 0000000..28a8d4e --- /dev/null +++ b/dfimagetools/docker.py @@ -0,0 +1,298 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""Script to enumerate Docker containers. + +Code is based on Docker-Explorer""" + +import json +import os + +from dfvfs.lib import definitions +from dfvfs.path import factory +from dfvfs.path import path_spec +from dfvfs.resolver import context +from dfvfs.resolver import resolver +from dfvfs.vfs import overlay_file_system + + +_DEFAULT_DOCKER_VERSION = 2 + +_resolver_context = context.Context() + + +class DockerContainer: + """Class for a Docker container object. + + Attributes: + config_image_name (str): the name of the container's image (eg: 'busybox'). + config_labels (list(str)): labels attached to the container. + container_id (str): the ID of the container. + creation_timestamp (str): the container's creation timestamp. + docker_version (int): the version number of the storage system. + image_id (str): the ID of the container's image. + mount_points (list(dict)): list of mount points to bind from host to the + container. (Docker storage backend v2). + name (str): the name of the container. + running (boolean): True if the container is running. + start_timestamp (str): the container's start timestamp. + storage_name (str): the container's storage driver name. + storage_object (BaseStorage): the container's storage backend object. + upper_dir (str): path to upper_dir folder. + volumes (list(tuple)): list of mount points to bind from host to the + container. (Docker storage backend v1). + exposed_ports (dict): list of exposed ports from the container + """ + + CONFIG_FILENAME_MAP = { + 1: 'config.json', + 2: 'config.v2.json' + } + + def __init__(self, container_spec, docker_instance, + resolver_context=_resolver_context): + """Initializes the Container class. + + Args: + container_spec (path_spec.PathSpec): the path spec to the container + directory in the docker folder. + docker_instance (DockerInstance): the Docker instance the container + belongs to. + resolver_context (Optional[resolver.Context]): the resolver context. + """ + self.container_spec = container_spec + self.docker_instance = docker_instance + + if resolver_context: + self.resolver_context = resolver_context + else: + resolver_context = resolver.Context() + + # build the path spec for the container config file + config_file_segments = [self.container_spec.location] + config_file_segments.append( + self.CONFIG_FILENAME_MAP[self.docker_instance.docker_version]) + config_file_location = self.docker_instance.file_system.JoinPath(config_file_segments) + + self.config_path_spec = factory.Factory.NewPathSpec( + type_indicator=self.container_spec.TYPE_INDICATOR, location=config_file_location, + parent=self.container_spec.parent) + + self._ParseConfigFile() + + self._ParseMountId() + + def _GetDictValue(self, nested_dict, values): + """Gets a value from a nested dictionary. + + Args: + nested_dict (Dict[str, Any]): a nested dictionary where keys are all + strings. + values (List[str]): a list of key names, ordered from the root to desired + key in the nested_dict. + + Returns: + Any: the nested dictionary value or None if the desired key does not + exist. + """ + current = nested_dict + for value in values: + if not current or not isinstance(current, dict): + return None + if value not in current: + return None + current = current[value] + return current + + def _ParseMountId(self): + """Assumes overlay2.""" + # build the path spec for the container config file + mount_id_segments = ['image'] + mount_id_segments.append(self.storage_driver) + mount_id_segments.append('layerdb') + mount_id_segments.append('mounts') + mount_id_segments.append(self.identifier) + mount_id_segments.append('mount-id') + print(mount_id_segments) + mount_id_spec = self.docker_instance.GetDockerDirectorySpec( + mount_id_segments) + + mount_id_file_io = resolver.Resolver.OpenFileObject( + mount_id_spec, self.resolver_context) + + self.mount_id = mount_id_file_io.read().decode() + print(self.mount_id) + + def _ParseConfigFile(self): + """Parses the container configuration file. + + Raises: + ValueError: if there was an error when parsing the container + configuration file. + """ + container_file_io = resolver.Resolver.OpenFileObject( + self.config_path_spec, self.resolver_context) + + #container_file_io = container_file.GetFileObject() + container_info_dict = json.load(container_file_io) + + # Parse the 'Config' key, which relates to the Image configuration + self.name = container_info_dict.get('Name') + self.identifier = container_info_dict.get('ID') + self.created = container_info_dict.get('Created') + self.image_id = container_info_dict.get('Image') + + self.storage_driver = container_info_dict.get('Driver') + + if self.storage_driver is None: + raise ValueError(f'{self.config_path_spec.location} lacks Driver key.') + + if self.storage_driver not in {'overlay2'}: + raise ValueError( + f'Unsupported Driver {self.storage_driver} in ' + f'{self.config_path_spec.location}.') + + self.running = self._GetDictValue( + container_info_dict, ['State', 'Running']) + self.started_at = self._GetDictValue( + container_info_dict, ['State', 'StartedAt']) + self.finished_at = self._GetDictValue( + container_info_dict, ['State', 'FinishedAt']) + + self.config_image_name = self._GetDictValue( + container_info_dict, ['Config', 'Image']) + self.config_labels = self._GetDictValue( + container_info_dict, ['Config', 'Labels']) + self.creation_timestamp = self._GetDictValue( + container_info_dict, ['Created']) + self.image_id = self._GetDictValue( + container_info_dict, ['Image']) + + def GetLowerLayerSpecs(self): + """Gets the lower layer path specs of the container file system. + + Returns: + List[path_spec.PathSpec]: a list of path specifications. + """ + lower_directory_segments = [self.storage_driver, + self.mount_id, + 'lower'] + lower_spec = self.docker_instance.GetDockerDirectorySpec( + lower_directory_segments) + lower_file_io = resolver.Resolver.OpenFileObject( + lower_spec, self.resolver_context) + lower_specs = lower_file_io.read().decode() + + path_specs = [] + for lower_spec_segment in lower_specs.split(':'): + layer_segments = [self.storage_driver, lower_spec_segment] + layer_spec = self.docker_instance.GetDockerDirectorySpec(layer_segments) + layer_file_entry = resolver.Resolver.OpenFileEntry( + layer_spec, self.resolver_context) + + layer_spec_segments = [self.storage_driver, 'l', layer_file_entry.link] + path_spec = self.docker_instance.GetDockerDirectorySpec( + layer_spec_segments) + # TODO: is there another way to normalise path? + path_spec.location = os.path.normpath(path_spec.location) + path_specs.append(path_spec) + return path_specs + + def GetUpperLayerSpec(self) -> path_spec.PathSpec: + """Gets the upper layer of the container file system. + + Returns: + path_spec.PathSpec: a path specification. + """ + upper_directory_segments = [self.storage_driver, + self.mount_id, + 'diff'] + return self.docker_instance.GetDockerDirectorySpec(upper_directory_segments) + + def GetOverlayFileSystem(self): + """Returns the Overlay root path specification. + + Returns: + overlay_file_system.OverlayFileSystem: the overlay file system.""" + + lower_layer_specs = self.GetLowerLayerSpecs() + upper_layer_spec = self.GetUpperLayerSpec() + + resolver_context = context.Context() + overlay_path_spec = factory.Factory.NewPathSpec( + type_indicator=definitions.TYPE_INDICATOR_OVERLAY, + location='/') + + return overlay_file_system.OverlayFileSystem( + resolver_context, overlay_path_spec, + lower_layer_specs, upper_layer_spec) + + +class DockerInstance: + """Class for a Docker instance object.""" + + def __init__(self, path_spec, docker_version=_DEFAULT_DOCKER_VERSION, + context=_resolver_context): + """Initializes the DockerExplorer class. + + Args: + path_spec (PathSpec): path specification for the docker instance. + docker_version (int): the Docker version. + context (resolver.Context): the resolver context. + """ + self.file_system = resolver.Resolver.OpenFileSystem(path_spec) + self.path_spec = path_spec + self.docker_directory = path_spec.location + self.docker_version = docker_version + self.resolver_context = context + + def GetDockerDirectorySpec(self, subdirectory): + """Returns a path spec for a subdirectory in the docker instance. + + Args: + subdirectory (List[str]): the desired subdirectory. + + Returns: + path_spec.PathSpec: the path specification for the subdirectory. + """ + segments = [self.path_spec.location] + segments.extend(subdirectory) + location = self.file_system.JoinPath(segments) + return factory.Factory.NewPathSpec( + type_indicator=self.path_spec.TYPE_INDICATOR, location=location, + parent=self.path_spec.parent) + + def GetContainerByIdentifier(self, container_id: str) -> DockerContainer: + """Returns a Docker container specified by it's identifier. + + Args: + container_id: the container ID. + + Returns: + A DockerContainer object. + + Raises: + ValueError: when the specified container ID does not exist. + """ + container_spec = self.GetDockerDirectorySpec(['containers', container_id]) + + if not self.file_system.FileEntryExistsByPathSpec( + container_spec): + raise ValueError('Container folder does not exist.') + + return DockerContainer(container_spec, self) + + def GetContainierEntries(self): + """Returns the containers in the Docker instance. + + Returns: + List[path_spec.PathSpec]: a list of path specifications for containers. + """ + container_root_path_spec = self.GetDockerDirectorySpec(['containers']) + + container_root_file_entry = resolver.Resolver.OpenFileEntry( + container_root_path_spec, self.resolver_context) + + if not container_root_file_entry.IsDirectory(): + return [] # TODO: raise error instead? + + return list(container_root_file_entry.sub_file_entries) diff --git a/tools/list_container_entries.py b/tools/list_container_entries.py new file mode 100755 index 0000000..e3b8af4 --- /dev/null +++ b/tools/list_container_entries.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""Script to list file entries from an overlay filesystem.""" + +import argparse +import logging +import os +import sys +from unittest.loader import VALID_MODULE_NAME + +from dfimagetools import docker +from dfvfs.helpers import command_line as dfvfs_command_line +from dfvfs.helpers import volume_scanner as dfvfs_volume_scanner +from dfvfs.lib import errors as dfvfs_errors +from dfvfs.resolver import resolver as dfvfs_resolver +from dfvfs.path import factory as dfvfs_factory + + +_DEFAULT_DOCKER_DIRECTORY = '/var/lib/docker' + +def Main(): + """The main program function. + + Returns: + bool: True if successful or False if not. + """ + argument_parser = argparse.ArgumentParser(description=( + 'Lists metadata of file entries in a container on storage media image.')) + + argument_parser.add_argument( + '--container_id', '--container-id', dest='container_id', + action='store', type=str, default='all', help=( + 'Docker folder location. Default is all.')) + + argument_parser.add_argument( + '--docker_folder', '--docker-folder', dest='docker_folder', + action='store', type=str, default=_DEFAULT_DOCKER_DIRECTORY, help=( + 'Docker folder location. Default is /var/lib/docker')) + + argument_parser.add_argument( + '--partitions', '--partition', dest='partitions', action='store', + type=str, default=None, help=( + 'Define partitions to be processed. A range of partitions can be ' + 'defined as: "3..5". Multiple partitions can be defined as: "1,3,5" ' + '(a list of comma separated values). Ranges and lists can also be ' + 'combined as: "1,3..5". The first partition is 1. All partitions ' + 'can be specified with: "all".')) + + argument_parser.add_argument( + '--snapshots', '--snapshot', dest='snapshots', action='store', type=str, + default=None, help=( + 'Define snapshots to be processed. A range of snapshots can be ' + 'defined as: "3..5". Multiple snapshots can be defined as: "1,3,5" ' + '(a list of comma separated values). Ranges and lists can also be ' + 'combined as: "1,3..5". The first snapshot is 1. All snapshots can ' + 'be specified with: "all".')) + + argument_parser.add_argument( + '--volumes', '--volume', dest='volumes', action='store', type=str, + default=None, help=( + 'Define volumes to be processed. A range of volumes can be defined ' + 'as: "3..5". Multiple volumes can be defined as: "1,3,5" (a list ' + 'of comma separated values). Ranges and lists can also be combined ' + 'as: "1,3..5". The first volume is 1. All volumes can be specified ' + 'with: "all".')) + + argument_parser.add_argument( + 'source', nargs='?', action='store', metavar='image.raw', + default=None, help='path of the storage media image.') + + options = argument_parser.parse_args() + + if not options.source: + print('Source value is missing.') + print('') + argument_parser.print_help() + print('') + return False + + logging.basicConfig( + level=logging.INFO, format='[%(levelname)s] %(message)s') + + mediator = dfvfs_command_line.CLIVolumeScannerMediator() + + volume_scanner_options = dfvfs_volume_scanner.VolumeScannerOptions() + volume_scanner_options.partitions = mediator.ParseVolumeIdentifiersString( + options.partitions) + + if options.snapshots == 'none': + volume_scanner_options.snapshots = ['none'] + else: + volume_scanner_options.snapshots = mediator.ParseVolumeIdentifiersString( + options.snapshots) + + volume_scanner_options.volumes = mediator.ParseVolumeIdentifiersString( + options.volumes) + + volume_scanner = dfvfs_volume_scanner.VolumeScanner(mediator=mediator) + + try: + base_path_specs = volume_scanner.GetBasePathSpecs( + options.source, options=volume_scanner_options) + if not base_path_specs: + print('No supported file ystem found in source.') + print('') + return False + + for base_path_spec in base_path_specs: + docker_path_spec = dfvfs_factory.Factory.NewPathSpec( + base_path_spec.TYPE_INDICATOR, location=options.docker_folder, + parent=base_path_spec.parent) + + file_system = dfvfs_resolver.Resolver.OpenFileSystem(docker_path_spec) + if not file_system.FileEntryExistsByPathSpec(docker_path_spec): + print('[INFO] docker folder not found.', file=sys.stderr) + continue + + docker_instance = docker.DockerInstance(docker_path_spec) + for container_entry in docker_instance.GetContainierEntries(): + container = docker_instance.GetContainerByIdentifier( + container_entry.name) + + print(f'[INFO] Enumerating overlay for {container.name} ' + f'{container.identifier}.') + fs = container.GetOverlayFileSystem() + ListEntries(fs.GetRootFileEntry()) + + + except dfvfs_errors.ScannerError as exception: + print('[ERROR] {0!s}'.format(exception), file=sys.stderr) + print('') + return False + + except KeyboardInterrupt: + print('Aborted by user.', file=sys.stderr) + print('') + return False + + return True + + +def ListEntries(current_entry): + for entry in current_entry.sub_file_entries: + print(entry.path_spec.location, entry.path_spec.parent.location) + if entry.IsDirectory(): + ListEntries(entry) + + + +if __name__ == '__main__': + if not Main(): + sys.exit(1) + else: + sys.exit(0) From bd8fb678381068ab975fc890bfa5effa74037b59 Mon Sep 17 00:00:00 2001 From: Syd Pleno Date: Wed, 18 May 2022 05:32:16 +1000 Subject: [PATCH 2/2] Update cli script. --- tools/list_container_entries.py | 76 ++++++++++++++++++++++----------- 1 file changed, 50 insertions(+), 26 deletions(-) diff --git a/tools/list_container_entries.py b/tools/list_container_entries.py index e3b8af4..02f4d18 100755 --- a/tools/list_container_entries.py +++ b/tools/list_container_entries.py @@ -4,9 +4,7 @@ import argparse import logging -import os import sys -from unittest.loader import VALID_MODULE_NAME from dfimagetools import docker from dfvfs.helpers import command_line as dfvfs_command_line @@ -15,9 +13,48 @@ from dfvfs.resolver import resolver as dfvfs_resolver from dfvfs.path import factory as dfvfs_factory +from dfimagetools import bodyfile + _DEFAULT_DOCKER_DIRECTORY = '/var/lib/docker' + +def _ListEntries(current_entry, bodyfile_generator, parent_path_segments): + """Recursively print the bodyfile of a FileEntry. + + Args: + current_entry (file_entry.FileEntry): the file entry to list bodyfile + entries. + bodyfile_generator (bodyfile.BodyFileGenerator): a generator for + bodyfile entries. + parent_path_segments (List[str]): path segments of the full path of the + parent file entry + """ + for entry in current_entry.sub_file_entries: + for bodyfile_entry in bodyfile_generator.GetEntries( + entry, parent_path_segments + [entry.name]): + print(bodyfile_entry) + if entry.IsDirectory(): + _ListEntries(entry, bodyfile_generator, parent_path_segments + + [entry.name]) + + +def ProcessContainer(container: docker.DockerContainer): + """Produces a bodyfile output of the container filesystem. + + Args: + container (docker.DockerContainer): the Docker container to process. + """ + bodyfile_generator = bodyfile.BodyfileGenerator() + + print(f'[INFO] Enumerating overlay for {container.name} ' + f'{container.identifier}.', file=sys.stderr) + + filesystem = container.GetOverlayFileSystem() + filesystem.Open() + _ListEntries(filesystem.GetRootFileEntry(), bodyfile_generator, ['']) + + def Main(): """The main program function. @@ -28,12 +65,12 @@ def Main(): 'Lists metadata of file entries in a container on storage media image.')) argument_parser.add_argument( - '--container_id', '--container-id', dest='container_id', - action='store', type=str, default='all', help=( - 'Docker folder location. Default is all.')) + '--container_id', '--container-id', dest='container_id', + action='store', type=str, required=True, help=( + 'Docker folder location.')) argument_parser.add_argument( - '--docker_folder', '--docker-folder', dest='docker_folder', + '--docker_folder', '--docker-folder', dest='docker_folder', action='store', type=str, default=_DEFAULT_DOCKER_DIRECTORY, help=( 'Docker folder location. Default is /var/lib/docker')) @@ -99,16 +136,16 @@ def Main(): try: base_path_specs = volume_scanner.GetBasePathSpecs( - options.source, options=volume_scanner_options) + options.source, options=volume_scanner_options) if not base_path_specs: - print('No supported file ystem found in source.') + print('No supported file system found in source.') print('') return False for base_path_spec in base_path_specs: docker_path_spec = dfvfs_factory.Factory.NewPathSpec( - base_path_spec.TYPE_INDICATOR, location=options.docker_folder, - parent=base_path_spec.parent) + base_path_spec.TYPE_INDICATOR, location=options.docker_folder, + parent=base_path_spec.parent) file_system = dfvfs_resolver.Resolver.OpenFileSystem(docker_path_spec) if not file_system.FileEntryExistsByPathSpec(docker_path_spec): @@ -116,15 +153,10 @@ def Main(): continue docker_instance = docker.DockerInstance(docker_path_spec) - for container_entry in docker_instance.GetContainierEntries(): - container = docker_instance.GetContainerByIdentifier( - container_entry.name) - - print(f'[INFO] Enumerating overlay for {container.name} ' - f'{container.identifier}.') - fs = container.GetOverlayFileSystem() - ListEntries(fs.GetRootFileEntry()) + container = docker_instance.GetContainerByIdentifier( + options.container_id) + ProcessContainer(container) except dfvfs_errors.ScannerError as exception: print('[ERROR] {0!s}'.format(exception), file=sys.stderr) @@ -139,14 +171,6 @@ def Main(): return True -def ListEntries(current_entry): - for entry in current_entry.sub_file_entries: - print(entry.path_spec.location, entry.path_spec.parent.location) - if entry.IsDirectory(): - ListEntries(entry) - - - if __name__ == '__main__': if not Main(): sys.exit(1)