diff --git a/.cache/v/cache/lastfailed b/.cache/v/cache/lastfailed deleted file mode 100644 index 9e26dfee..00000000 --- a/.cache/v/cache/lastfailed +++ /dev/null @@ -1 +0,0 @@ -{} \ No newline at end of file diff --git a/.coveragerc b/.coveragerc index 46041110..8c2c9130 100644 --- a/.coveragerc +++ b/.coveragerc @@ -10,3 +10,4 @@ exclude_lines = ignore_errors = True omit = tests/* + crawler/utils/plugincont/* diff --git a/.travis.yml b/.travis.yml index d319e200..acf2e146 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,7 +2,6 @@ sudo: required dist: trusty group: deprecated-2017Q4 language: python - services: - docker @@ -35,6 +34,8 @@ before_install: - cp -r psvmi/maps maps - cp -r psvmi/offsets offsets - cp psvmi/header.h . + # for safe plugin mode + - sudo apt-get install libcap-dev iptables iptables-dev # command to install dependencies # XXX: Now mock complains if we don't `sudo pip install`. diff --git a/Dockerfile b/Dockerfile index d5549a73..221a4934 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,6 +14,9 @@ COPY \ RUN dpkg -i /tmp/python-socket-datacollector_*_all.deb && \ apt-get -y update && \ apt-get -y install libpcap0.8 && \ + apt-get -y install libcap-dev && \ + apt-get -y install iptables && \ + apt-get -y install iptables-dev && \ dpkg -i /tmp/softflowd_0.9.*_amd64.deb && \ pip install pyroute2 py-radix requests-unixsocket json-rpc && \ dpkg -i /tmp/python-conntrackprobe_*_all.deb && \ diff --git a/crawler/.gitignore b/crawler/.gitignore index cff5a5e1..8902896e 100644 --- a/crawler/.gitignore +++ b/crawler/.gitignore @@ -4,5 +4,4 @@ binaries/ kafka-producer.py timeout.py alchemy.py -*.json *.sh diff --git a/crawler/containers.py b/crawler/containers.py index 1c10cde7..28d10df6 100644 --- a/crawler/containers.py +++ b/crawler/containers.py @@ -10,7 +10,8 @@ def list_all_containers(user_list='ALL', host_namespace='', - ignore_raw_containers=True): + ignore_raw_containers=True, + group_by_pid_namespace=True): """ Returns a list of all running containers in the host. @@ -24,10 +25,13 @@ def list_all_containers(user_list='ALL', host_namespace='', for _container in get_docker_containers(host_namespace=host_namespace, user_list=user_list): - curr_ns = _container.process_namespace - if curr_ns not in visited_ns: - visited_ns.add(curr_ns) + if group_by_pid_namespace is False: yield _container + else: + curr_ns = _container.process_namespace + if curr_ns not in visited_ns: + visited_ns.add(curr_ns) + yield _container # XXX get list of rkt containers @@ -62,7 +66,8 @@ def get_containers( environment='cloudsight', host_namespace=misc.get_host_ipaddr(), user_list='ALL', - ignore_raw_containers=True + ignore_raw_containers=True, + group_by_pid_namespace=True ): """ Returns a list of all containers running in the host. @@ -79,7 +84,8 @@ def get_containers( """ filtered_list = [] containers_list = list_all_containers(user_list, host_namespace, - ignore_raw_containers) + ignore_raw_containers, + group_by_pid_namespace) for _container in containers_list: default_environment = 'cloudsight' if (environment != default_environment and diff --git a/crawler/crawler.conf b/crawler/crawler.conf index bed0d526..eb55a150 100644 --- a/crawler/crawler.conf +++ b/crawler/crawler.conf @@ -17,9 +17,9 @@ [[ process_host ]] - [[ ruby_pkg ]] + [[ rubypackage ]] - [[ python_pkg ]] + [[ pythonpackage ]] avoid_setns = False [[ fprobe_container ]] diff --git a/crawler/crawler.py b/crawler/crawler.py index f4faa8cd..3508d756 100755 --- a/crawler/crawler.py +++ b/crawler/crawler.py @@ -7,6 +7,7 @@ from worker import Worker from containers_crawler import ContainersCrawler +from safe_containers_crawler import SafeContainersCrawler from utils import misc from crawlmodes import Modes from emitters_manager import EmittersManager @@ -93,11 +94,12 @@ def main(): Modes.OUTVM, Modes.MOUNTPOINT, Modes.OUTCONTAINER, + Modes.OUTCONTAINERSAFE, Modes.MESOS, ], default=Modes.INVM, help='The crawler mode: ' - '{INVM,OUTVM,MOUNTPOINT,OUTCONTAINER}. ' + '{INVM,OUTVM,MOUNTPOINT,OUTCONTAINER,OUTCONTAINERSAFE}. ' 'Defaults to INVM', ) parser.add_argument( @@ -222,6 +224,15 @@ def main(): host_namespace=args.namespace, plugin_places=args.plugin_places, options=options) + elif args.crawlmode == 'OUTCONTAINERSAFE': + crawler = SafeContainersCrawler( + features=args.features, + environment=args.environment, + user_list=args.crawlContainers, + host_namespace=args.namespace, + plugin_places=args.plugin_places, + frequency=args.frequency, + options=options) else: raise NotImplementedError('Invalid crawlmode') diff --git a/crawler/crawlmodes.py b/crawler/crawlmodes.py index ce91ed83..251fa8d8 100644 --- a/crawler/crawlmodes.py +++ b/crawler/crawlmodes.py @@ -4,4 +4,5 @@ OUTVM='OUTVM', MOUNTPOINT='MOUNTPOINT', OUTCONTAINER='OUTCONTAINER', + OUTCONTAINERSAFE='OUTCONTAINERSAFE', MESOS='MESOS') diff --git a/crawler/dockercontainer.py b/crawler/dockercontainer.py index a874e8e4..fbeb767a 100644 --- a/crawler/dockercontainer.py +++ b/crawler/dockercontainer.py @@ -162,7 +162,7 @@ def __init__( self.volumes = inspect.get('Volumes') self.image_name = inspect['Config']['Image'] self.inspect = inspect - + self.plugincont = None self.process_namespace = (process_namespace or namespace.get_pid_namespace(self.pid)) diff --git a/crawler/plugin_containers_manager.py b/crawler/plugin_containers_manager.py new file mode 100644 index 00000000..5d48c45c --- /dev/null +++ b/crawler/plugin_containers_manager.py @@ -0,0 +1,367 @@ +import os +import sys +import time +import json +import docker +import iptc +import shutil +import ctypes +import utils.dockerutils +from utils.crawler_exceptions import ContainerWithoutCgroups +from utils.namespace import run_as_another_namespace + + +class PluginContainersManager(): + + def __init__(self, frequency=-1): + self.frequency = frequency + self.pluginconts = dict() + self.plugincont_image = 'plugincont_image' + self.plugincont_name_prefix = 'plugin_cont' + self.plugincont_username = 'plugincont_user' + self.plugincont_framedir = '/home/' + \ + self.plugincont_username + '/features/' + self.plugincont_py_path = '/usr/bin/python2.7' + self.plugincont_seccomp_profile_path = os.getcwd( + ) + '/crawler/utils/plugincont/seccomp-no-ptrace.json' + self.plugincont_image_path = os.getcwd() + \ + '/crawler/utils/plugincont/plugincont_img' + self.plugincont_guestcont_mountpoint = '/rootfs_local' + self.plugincont_guestcont_sysfs_mountpoint = '/sysfs_local' + self.docker_client = docker.DockerClient( + base_url='unix://var/run/docker.sock', version='auto') + self.docker_APIclient = docker.APIClient( + base_url='unix://var/run/docker.sock', version='auto') + if self.set_plugincont_uid() == -1: + raise ValueError('Failed to verify docker userns-remap settings') + if self.set_plugincont_cgroup_netclsid() == -1: + raise ValueError('Failed to set cgroup netclsid') + if self.build_plugincont_img() != 0: + raise ValueError('Failed to build image') + + def is_int(self, s): + try: + int(s) + return True + except ValueError: + return False + + def _get_next_uid(self): + # TODO: check uid is within [UID_MIN,UID_MAX] in /etc/login.defs + # alternate approach: useradd nextid; id -u nextid; userdel nextid + # assumption the unused uid on host is also an unused uid in container + # exact ranges maybe found in /etc/subuid + uid = 1010 + uids_in_use = [] + try: + fd = open('/etc/passwd', 'r') + for users in fd.readlines(): + uids_in_use.append(users.split(':')[2]) + fd.close() + while str(uid) in uids_in_use: + uid = uid + 1 + except Exception as exc: + print sys.exc_info()[0], exc, sys.exc_info()[-1].tb_lineno + uid = -1 + return uid + + def set_plugincont_dockerfile_uid(self, uid): + retVal = 0 + uid = str(uid) + user = self.plugincont_username + try: + shutil.copyfile( + self.plugincont_image_path + '/Dockerfile.template', + self.plugincont_image_path + '/Dockerfile') + fd = open(self.plugincont_image_path + '/Dockerfile', 'a') + fd.write('RUN groupadd -r ' + user + ' -g ' + uid + '\n') + fd.write('RUN useradd -u ' + uid + + ' -m ' + user + ' -g ' + user + '\n') + fd.write('RUN usermod -a -G ' + user + ' ' + user + '\n') + fd.write('RUN chsh -s /bin/bash ' + user + '\n') + fd.close() + except Exception as exc: + print sys.exc_info()[0], exc, sys.exc_info()[-1].tb_lineno + retVal = -1 + return retVal + + def set_plugincont_uid(self): + self.plugincont_host_uid = -1 + try: + uid = self._get_next_uid() + if uid <= 0: + return -1 + if self.set_plugincont_dockerfile_uid(uid) != 0: + return -1 + self.plugincont_host_uid = uid + # /var/lib/docker/165536.16553 from docker userns remapping + docker_root_dir = utils.dockerutils._get_docker_root_dir() + leaf_dir = docker_root_dir.split('/')[-1] # 165536.165536 + possible_sub_uid = leaf_dir.split('.')[0] # 165536 + if self.is_int(possible_sub_uid) is True: + self.plugincont_host_uid = int(possible_sub_uid) + uid + except Exception as exc: + print sys.exc_info()[0], exc, sys.exc_info()[-1].tb_lineno + self.plugincont_host_uid = -1 + return self.plugincont_host_uid + + def set_plugincont_cgroup_netclsid(self): + # self.plugincont_cgroup_netclsid = '43' #random cgroup net cls id + res_clsid = -1 + try: + cgroup_netcls_path = self._get_cgroup_dir( + ['net_cls', 'net_cls,net_prio']) + for root, dirs, files in os.walk(cgroup_netcls_path): + for file in files: + if file.endswith('net_cls.classid'): + fd = open(root + '/' + file, 'r') + clsid = int(fd.readline(), 16) + if res_clsid <= clsid: + res_clsid = clsid + 1 + fd.close() + res_clsid = res_clsid + 2 + except Exception as exc: + print sys.exc_info()[0], exc, sys.exc_info()[-1].tb_lineno + res_clsid = -1 + self.plugincont_cgroup_netclsid = res_clsid + + def destroy_cont(self, id=None, name=None): + client = self.docker_APIclient + if name is None and id is None: + return + if name is not None: + _id = name + filter = {'name': name} + else: + _id = id + filter = {'id': id} + if client.containers(all=True, filters=filter) != []: + client.stop(_id) + client.remove_container(_id) + + def set_plugincont_py_cap(self, plugincont_id): + retVal = 0 + verify = False + try: + rootfs = utils.dockerutils.get_docker_container_rootfs_path( + plugincont_id) + py_path = rootfs + self.plugincont_py_path + libcap = ctypes.cdll.LoadLibrary("libcap.so") + caps = libcap.cap_from_text( + 'cap_dac_read_search,cap_sys_chroot,cap_sys_ptrace+ep') + retVal = libcap.cap_set_file(py_path, caps) + if verify is True: + libcap.cap_to_text.restype = ctypes.c_char_p + caps_set = libcap.cap_get_file(py_path, caps) + caps_set_str = libcap.cap_to_text(caps_set, None) + assert 'cap_dac_read_search' in caps_set_str + assert 'cap_sys_chroot' in caps_set_str + assert 'cap_sys_ptrace' in caps_set_str + except Exception as exc: + print sys.exc_info()[0], exc, sys.exc_info()[-1].tb_lineno + retVal = -1 + return retVal + + def build_plugincont_img(self): + retVal = 0 + build_status = list(self.docker_APIclient.build( + path=self.plugincont_image_path, tag=self.plugincont_image)) + assert 'Successfully built' in build_status[-1] + try: + plugincont = self.docker_client.containers.run( + image=self.plugincont_image, + command="tail -f /dev/null", + detach=True) + time.sleep(5) + retVal = self.set_plugincont_py_cap(plugincont.id) + if retVal == 0: + self.docker_APIclient.commit( + plugincont.id, repository=self.plugincont_image) + self.destroy_cont(id=plugincont.id) + except Exception as exc: + print sys.exc_info()[0], exc, sys.exc_info()[-1].tb_lineno + retVal = -1 + return retVal + + def get_plugincont_framedir(self, guestcont): + frame_dir = None + if guestcont is not None and guestcont.plugincont is not None: + plugincont_id = guestcont.plugincont.id + rootfs = utils.dockerutils.get_docker_container_rootfs_path( + plugincont_id) + frame_dir = rootfs + self.plugincont_framedir + return frame_dir + + def create_plugincont(self, guestcont): + guestcont_id = guestcont.long_id + guestcont_rootfs = utils.dockerutils.get_docker_container_rootfs_path( + guestcont_id) + guestcont_sysfs_mem = os.path.join( + self._get_cgroup_dir(['memory']), 'docker', guestcont_id) + guestcont_sysfs_cpu = os.path.join( + self._get_cgroup_dir(['cpuacct', 'cpu,cpuacct']), + 'docker', guestcont_id) + plugincont = None + plugincont_name = self.plugincont_name_prefix + '_' + guestcont_id + seccomp_attr = json.dumps( + json.load(open(self.plugincont_seccomp_profile_path))) + client = self.docker_client + try: + self.destroy_cont(name=plugincont_name) + plugincont = client.containers.run( + image=self.plugincont_image, + name=plugincont_name, + user=self.plugincont_username, + command="/usr/bin/python2.7 /crawler/crawler_lite.py " + "--frequency=" + str(self.frequency), + pids_limit=10, + mem_limit='256m', + cpu_period=100000, + cpu_quota=25000, + pid_mode='container:' + guestcont_id, + network_mode='container:' + guestcont_id, + cap_add=["SYS_PTRACE", "DAC_READ_SEARCH"], + security_opt=['seccomp:' + seccomp_attr], + volumes={ + guestcont_rootfs: { + 'bind': self.plugincont_guestcont_mountpoint, + 'mode': 'ro'}, + guestcont_sysfs_mem: { + 'bind': self.plugincont_guestcont_sysfs_mountpoint + '' + '/sys/fs/cgroup/memory'}, + guestcont_sysfs_cpu: { + 'bind': self.plugincont_guestcont_sysfs_mountpoint + '' + '/sys/fs/cgroup/cpu,cpuacct'}}, + detach=True) + time.sleep(5) + except Exception as exc: + print sys.exc_info()[0], exc, sys.exc_info()[-1].tb_lineno + + self.pluginconts[str(guestcont_id)] = plugincont + guestcont.plugincont = plugincont + + def _add_iptable_rules_in(self): + retVal = 0 + try: + rule = iptc.Rule() + match = iptc.Match(rule, "cgroup") + match.cgroup = str(self.plugincont_cgroup_netclsid) + rule.add_match(match) + rule.src = "!127.0.0.1" + rule.target = iptc.Target(rule, "DROP") + chain = iptc.Chain(iptc.Table(iptc.Table.FILTER), "INPUT") + chain.insert_rule(rule) + except Exception as exc: + print sys.exc_info()[0], exc, sys.exc_info()[-1].tb_lineno + retVal = -1 + return retVal + + def _add_iptable_rules_out(self): + retVal = 0 + try: + rule = iptc.Rule() + match = iptc.Match(rule, "owner") + match.uid_owner = str(self.plugincont_host_uid) + rule.add_match(match) + rule.dst = "!127.0.0.1" + rule.target = iptc.Target(rule, "DROP") + chain = iptc.Chain(iptc.Table(iptc.Table.FILTER), "OUTPUT") + chain.insert_rule(rule) + except Exception as exc: + print sys.exc_info()[0], exc, sys.exc_info()[-1].tb_lineno + retVal = -1 + return retVal + + def _add_iptable_rules(self): + retVal1 = 0 + retVal2 = 0 + retVal1 = self._add_iptable_rules_in() + retVal2 = self._add_iptable_rules_out() + return (retVal1 + retVal2) / 2 + + def _get_cgroup_dir(self, devlist=[]): + for dev in devlist: + paths = [os.path.join('/cgroup/', dev), + os.path.join('/sys/fs/cgroup/', dev)] + for path in paths: + if os.path.ismount(path): + return path + + # Try getting the mount point from /proc/mounts + for l in open('/proc/mounts', 'r'): + _type, mnt, _, _, _, _ = l.split(' ') + if _type == 'cgroup' and mnt.endswith('cgroup/' + dev): + return mnt + + raise ContainerWithoutCgroups('Can not find the cgroup dir') + + def _setup_netcls_cgroup(self, plugincont_id): + retVal = 0 + try: + # cgroup_netcls_path = + # '/sys/fs/cgroup/net_cls/docker/'+plugincont_id + cgroup_netcls_path = self._get_cgroup_dir( + ['net_cls', 'net_cls,net_prio']) + '/docker/' + plugincont_id + tasks_path = cgroup_netcls_path + '/tasks' + block_path = cgroup_netcls_path + '/block' + block_classid_path = block_path + '/net_cls.classid' + block_tasks_path = block_path + '/tasks' + + if not os.path.isdir(block_path): + os.makedirs(block_path) + + fd = open(block_classid_path, 'w') + fd.write(str(self.plugincont_cgroup_netclsid)) + fd.close() + + fd = open(tasks_path, 'r') + plugincont_pids = fd.readlines() + # should be just one pid == plugincont_pid + fd.close() + + fd = open(block_tasks_path, 'w') + for pid in plugincont_pids: + fd.write(pid) + fd.close() + except Exception as exc: + print sys.exc_info()[0], exc, sys.exc_info()[-1].tb_lineno + retVal = -1 + return retVal + + def set_plugincont_iptables(self, plugincont_id): + retVal = 0 + try: + client = self.docker_APIclient + plugincont_pid = client.inspect_container( + plugincont_id)['State']['Pid'] + retVal = self._setup_netcls_cgroup(plugincont_id) + if retVal == 0: + retVal = run_as_another_namespace(str(plugincont_pid), + ['net'], + self._add_iptable_rules) + except Exception as exc: + print sys.exc_info()[0], exc, sys.exc_info()[-1].tb_lineno + retVal = -1 + return retVal + + def destroy_plugincont(self, guestcont): + guestcont_id = str(guestcont.long_id) + plugincont_id = guestcont.plugincont.id + self.destroy_cont(id=plugincont_id) + guestcont.plugincont = None + self.pluginconts.pop(str(guestcont_id)) + + def setup_plugincont(self, guestcont): + guestcont_id = str(guestcont.long_id) + if guestcont_id in self.pluginconts.keys(): + guestcont.plugincont = self.pluginconts[guestcont_id] + return + + self.create_plugincont(guestcont) + if guestcont.plugincont is None: + return + + plugincont_id = guestcont.plugincont.id + if self.set_plugincont_iptables(plugincont_id) != 0: + self.destroy_plugincont(guestcont) + return diff --git a/crawler/plugins/systems/pythonpackage_container_crawler.plugin b/crawler/plugins/systems/pythonpackage_container_crawler.plugin index 9bf66e80..0d45bb0b 100644 --- a/crawler/plugins/systems/pythonpackage_container_crawler.plugin +++ b/crawler/plugins/systems/pythonpackage_container_crawler.plugin @@ -1,5 +1,5 @@ [Core] -Name = python_pkg +Name = pythonpackage Module = pythonpackage_container_crawler [Documentation] diff --git a/crawler/plugins/systems/pythonpackage_container_crawler.py b/crawler/plugins/systems/pythonpackage_container_crawler.py index 45f2af31..8e6b7663 100644 --- a/crawler/plugins/systems/pythonpackage_container_crawler.py +++ b/crawler/plugins/systems/pythonpackage_container_crawler.py @@ -14,7 +14,7 @@ class PythonPackageCrawler(IContainerCrawler): def get_feature(self): - return 'python-package' + return 'pythonpackage' def _crawl_files(self, path, extensions): output = [] @@ -59,7 +59,7 @@ def _get_packages_by_extension(self, mountpoint): yield ( pkg_name, {"pkgname": pkg_name, "pkgversion": pkg_version}, - 'python-package') + 'pythonpackage') def _get_packages_by_cmd(self): # better coverage with pkg_resources.working_set than @@ -87,7 +87,7 @@ def _get_packages_by_cmd(self): yield ( pkg_name, {"pkgname": pkg_name, "pkgversion": pkg_version}, - 'python-package') + 'pythonpackage') def _crawl_without_setns(self, container_id): mountpoint = utils.dockerutils.get_docker_container_rootfs_path( diff --git a/crawler/plugins/systems/rubypackage_container_crawler.plugin b/crawler/plugins/systems/rubypackage_container_crawler.plugin index d89d3dcf..a4a38dc2 100644 --- a/crawler/plugins/systems/rubypackage_container_crawler.plugin +++ b/crawler/plugins/systems/rubypackage_container_crawler.plugin @@ -1,5 +1,5 @@ [Core] -Name = ruby_pkg +Name = rubypackage Module = rubypackage_container_crawler [Documentation] diff --git a/crawler/plugins/systems/rubypackage_container_crawler.py b/crawler/plugins/systems/rubypackage_container_crawler.py index 7cd351da..3ef7a990 100644 --- a/crawler/plugins/systems/rubypackage_container_crawler.py +++ b/crawler/plugins/systems/rubypackage_container_crawler.py @@ -14,7 +14,7 @@ class RubyPackageCrawler(IContainerCrawler): def get_feature(self): - return 'ruby-package' + return 'rubypackage' def _crawl_files(self, path, extension): output = [] @@ -46,7 +46,7 @@ def _get_packages_by_extension(self, mountpoint): yield ( pkg_name, {"pkgname": pkg_name, "pkgversion": pkg_version}, - 'ruby-package') + 'rubypackage') def _get_packages_by_cmd(self): proc = subprocess.Popen( @@ -65,7 +65,7 @@ def _get_packages_by_cmd(self): yield ( pkg_name, {"pkgname": pkg_name, "pkgversion": pkg_version}, - 'ruby-package') + 'rubypackage') def _crawl_without_setns(self, container_id): mountpoint = utils.dockerutils.get_docker_container_rootfs_path( diff --git a/crawler/safe_containers_crawler.py b/crawler/safe_containers_crawler.py new file mode 100644 index 00000000..b4cfaf78 --- /dev/null +++ b/crawler/safe_containers_crawler.py @@ -0,0 +1,146 @@ +import ast +import os +import sys +import time +import plugins_manager +from base_crawler import BaseCrawler, BaseFrame +from plugin_containers_manager import PluginContainersManager +from containers import get_containers + + +class ContainerFrame(BaseFrame): + + def __init__(self, feature_types, container): + BaseFrame.__init__(self, feature_types) + self.metadata.update(container.get_metadata_dict()) + self.metadata['system_type'] = 'container' + + +class SafeContainersCrawler(BaseCrawler): + + def __init__(self, + features=['os', 'cpu'], + environment='cloudsight', + user_list='ALL', + host_namespace='', + plugin_places=['plugins'], + frequency=-1, + options={}): + + BaseCrawler.__init__( + self, + features=features, + plugin_places=plugin_places, + options=options) + plugins_manager.reload_env_plugin(environment, plugin_places) + plugins_manager.reload_container_crawl_plugins( + features, plugin_places, options) + self.plugins = plugins_manager.get_container_crawl_plugins(features) + self.environment = environment + self.host_namespace = host_namespace + self.user_list = user_list + self.pluginconts_manager = None + try: + self.pluginconts_manager = PluginContainersManager(frequency) + except ValueError as err: + print(err.args) + + # Return list of features after reading frame from plugin cont + def get_plugincont_features(self, guestcont): + features = [] + if self.pluginconts_manager is None: + return features + + if guestcont.plugincont is None: + self.pluginconts_manager.setup_plugincont(guestcont) + if guestcont.plugincont is None: + return features + frame_dir = self.pluginconts_manager.get_plugincont_framedir(guestcont) + try: + frame_list = os.listdir(frame_dir) + frame_list.sort(key=int) + if frame_list != []: + earliest_frame_file = frame_dir + frame_list[0] + fd = open(earliest_frame_file) + for feature_line in fd.readlines(): + (type, key, val) = feature_line.strip().split('\t') + features.append( + (ast.literal_eval(key), ast.literal_eval(val), type)) + fd.close() + os.remove(earliest_frame_file) + except Exception as exc: + print exc + print sys.exc_info()[0] + + return features + + def crawl_container_mini(self, container, ignore_plugin_exception=True): + frame = ContainerFrame(self.features, container) + try: + frame.add_features(self.get_plugincont_features(container)) + except Exception as exc: + if not ignore_plugin_exception: + raise exc + return frame + + def crawl_container(self, container, ignore_plugin_exception=True): + """ + Crawls a specific container and returns a Frame for it. + + :param container: a Container object + :param ignore_plugin_exception: just ignore exceptions in a plugin + :return: a Frame object. The returned frame can have 0 features and + still have metadata. This can occur if there were no plugins, or all + the plugins raised an exception (and ignore_plugin_exception was True). + """ + frame = ContainerFrame(self.features, container) + + # collect plugin crawl output for privileged plugins run at host + for (plugin_obj, plugin_args) in self.plugins: + try: + frame.add_features( + plugin_obj.crawl( + container_id=container.long_id, + **plugin_args)) + except Exception as exc: + if not ignore_plugin_exception: + raise exc + + # collect plugin crawl output from inside plugin sidecar container + try: + frame.add_features(self.get_plugincont_features(container)) + except Exception as exc: + if not ignore_plugin_exception: + raise exc + + return frame + + def polling_crawl(self, timeout, ignore_plugin_exception=True): + """ + Crawls any container created before `timeout` seconds have elapsed. + + :param timeout: seconds to wait for new containers + :param ignore_plugin_exception: just ignore exceptions in a plugin + :return: a Frame object + """ + # Not implemented + time.sleep(timeout) + return None + + def crawl(self, ignore_plugin_exception=True): + """ + Crawls all containers. + + :param ignore_plugin_exception: just ignore exceptions in a plugin + :return: a list generator of Frame objects + """ + if self.pluginconts_manager is None: + return + containers_list = get_containers( + user_list=self.user_list, + host_namespace=self.host_namespace, + group_by_pid_namespace=False) + for container in containers_list: + plugincont_prefix = self.pluginconts_manager.plugincont_name_prefix + if not container.name.startswith(plugincont_prefix): + yield self.crawl_container(container, ignore_plugin_exception) diff --git a/crawler/utils/dockerutils.py b/crawler/utils/dockerutils.py index 184a3822..6e7df176 100644 --- a/crawler/utils/dockerutils.py +++ b/crawler/utils/dockerutils.py @@ -30,7 +30,7 @@ def exec_dockerps(): This call executes the `docker inspect` command every time it is invoked. """ try: - client = docker.Client( + client = docker.APIClient( base_url='unix://var/run/docker.sock', version='auto') containers = client.containers() inspect_arr = [] @@ -46,8 +46,8 @@ def exec_dockerps(): def exec_docker_history(long_id): try: - client = docker.Client(base_url='unix://var/run/docker.sock', - version='auto') + client = docker.APIClient( + base_url='unix://var/run/docker.sock', version='auto') image = client.inspect_container(long_id)['Image'] history = client.history(image) return history @@ -70,7 +70,7 @@ def _reformat_inspect(inspect): def exec_dockerinspect(long_id): try: - client = docker.Client( + client = docker.APIClient( base_url='unix://var/run/docker.sock', version='auto') inspect = client.inspect_container(long_id) _reformat_inspect(inspect) @@ -110,7 +110,7 @@ def _get_docker_storage_driver(): # Step 1, get it from "docker info" try: - client = docker.Client( + client = docker.APIClient( base_url='unix://var/run/docker.sock', version='auto') driver = client.info()['Driver'] except (docker.errors.DockerException, KeyError): @@ -196,7 +196,7 @@ def _get_docker_server_version(): """Run the `docker info` command to get server version """ try: - client = docker.Client( + client = docker.APIClient( base_url='unix://var/run/docker.sock', version='auto') return client.version()['Version'] except (docker.errors.DockerException, KeyError) as e: @@ -288,14 +288,28 @@ def _get_container_rootfs_path_btrfs(long_id, inspect=None): return rootfs_path +def _get_docker_root_dir(): + try: + client = docker.APIClient( + base_url='unix://var/run/docker.sock', version='auto') + docker_info = client.info() + root_dir = str(docker_info['DockerRootDir']) + return root_dir + except docker.errors.APIError as e: + logger.warning(str(e)) + raise DockerutilsException('Failed to get docker info') + + def _get_container_rootfs_path_aufs(long_id, inspect=None): rootfs_path = None + root_dir_prefix = _get_docker_root_dir() + if VERSION_SPEC.match(semantic_version.Version(_fix_version( server_version))): aufs_path = None - mountid_path = ('/var/lib/docker/image/aufs/layerdb/mounts/' + + mountid_path = (root_dir_prefix + '/image/aufs/layerdb/mounts/' + long_id + '/mount-id') try: with open(mountid_path, 'r') as f: @@ -304,11 +318,11 @@ def _get_container_rootfs_path_aufs(long_id, inspect=None): logger.warning(str(e)) if not aufs_path: raise DockerutilsException('Failed to get rootfs on aufs') - rootfs_path = '/var/lib/docker/aufs/mnt/' + aufs_path + rootfs_path = root_dir_prefix + '/aufs/mnt/' + aufs_path else: rootfs_path = None - for _path in ['/var/lib/docker/aufs/mnt/' + long_id, - '/var/lib/docker/aufs/diff/' + long_id]: + for _path in [root_dir_prefix + '/aufs/mnt/' + long_id, + root_dir_prefix + '/aufs/diff/' + long_id]: if os.path.isdir(_path) and os.listdir(_path): rootfs_path = _path break @@ -386,8 +400,8 @@ def get_docker_container_rootfs_path(long_id, inspect=None): def poll_container_create_events(timeout=0.1): try: - client = docker.Client(base_url='unix://var/run/docker.sock', - version='auto') + client = docker.APIClient( + base_url='unix://var/run/docker.sock', version='auto') filters = dict() filters['type'] = 'container' filters['event'] = 'start' diff --git a/crawler/utils/plugincont/namespace.py b/crawler/utils/plugincont/namespace.py new file mode 100644 index 00000000..50d533c6 --- /dev/null +++ b/crawler/utils/plugincont/namespace.py @@ -0,0 +1,268 @@ +#This namespace.py is needed for setns() in userns-remap world +#!/usr/bin/python +# -*- coding: utf-8 -*- +import os +import multiprocessing +import Queue +import logging +import sys +import types +import signal +import ctypes +import misc +import traceback +import time +from crawler_exceptions import CrawlTimeoutError, CrawlError + +logger = logging.getLogger('crawlutils') + +try: + libc = ctypes.CDLL('libc.so.6') +except Exception as e: + libc = None + +ALL_NAMESPACES = [ + 'user', + 'pid', + 'uts', + 'ipc', + 'net', + 'mnt', +] + +IN_CONTAINER_TIMEOUT = 300 + +def get_pid_namespace(pid): + try: + ns = os.stat('/proc/' + str(pid) + '/ns/pid').st_ino + return ns + except Exception: + logger.debug('There is no container with pid=%s running.' + % pid) + return None + + +class ProcessContext: + + def __init__(self, pid, namespaces): + self.namespaces = namespaces + self.pid = pid + + def attach(self): + # Just to be sure log rotation does not happen in the container + + logging.disable(logging.CRITICAL) + + self.container_ns_fds = {} + try: + open_process_namespaces(self.pid, self.container_ns_fds, + self.namespaces) + except Exception as e: + logging.disable(logging.NOTSET) + logger.debug(e) + try: + close_process_namespaces(self.host_ns_fds, self.namespaces) + except Exception as e: + logger.warning('Could not close the namespaces: %s' % e) + raise + + try: + attach_to_process_namespaces(self.container_ns_fds, + self.namespaces) + except Exception as e: + logging.disable(logging.NOTSET) + error_msg = ('Could not attach to a pid={pid} namespace, Exception: {exc}'.format( + pid=self.pid, exc=e)) + logger.error(error_msg) + raise + +def run_as_another_namespace( + pid, + namespaces, + function, + *args, + **kwargs +): + + # Create the queue and its pipes before attaching to the container mnt namespace + queue = multiprocessing.Queue(2 ** 15) + + context = ProcessContext(pid, namespaces) + + # Fork before attaching to the container mnt namespace to drop to a single thread + child_process = multiprocessing.Process(target=_run_as_another_namespace_executor, + args=(queue, context, pid, function, args), + kwargs=kwargs) + child_process.start() + + grandchild_exception = None + try: + (result, grandchild_exception) = queue.get(timeout=IN_CONTAINER_TIMEOUT) + except Queue.Empty: + grandchild_exception = CrawlTimeoutError('Timed out waiting for response from crawler process') + except Exception: + result = None + if grandchild_exception: + result = None + + child_process.join(1) + # If the join timed out the process might still be alive + if child_process.is_alive(): + errmsg = ('Timed out waiting for process %d to exit.' % + child_process.pid) + queue.close() + os.kill(child_process.pid, 9) + logger.error(errmsg) + raise CrawlTimeoutError(errmsg) + + if result is None: + if grandchild_exception: + raise grandchild_exception + raise CrawlError('Unknown crawl error.') + return result + +def signal_handler_sighup(*args): + logger.warning('Crawler parent process died, so exiting... Bye!') + exit(1) + +def cache_modules_from_crawler_mnt_namespace(): + prime_process = multiprocessing.Process(target=time.sleep, args=(1,)) + prime_process.start() + prime_process.is_alive() + prime_process.join(0.001) + prime_process.terminate() + prime_process.join() + prime_process.is_alive() + del prime_process + prime_queue = multiprocessing.Queue(2 ** 15) + prime_queue.put('something') + prime_queue.get() + prime_queue.close() + prime_queue.join_thread() + del prime_queue + +def wait_for_linux_thread_cleanup(expected_threads): + start_time = os.times()[4] + while True: + task_count = len(os.listdir('/proc/{}/task'.format(os.getpid()))) + if task_count > expected_threads: + time.sleep(0.001) + else: + break + logger.debug('Waited {} seconds for Linux to cleanup terminated threads'.format(os.times()[4] - start_time)) + +def _run_as_another_namespace_executor(queue, context, pid, function, args, **kwargs): + # Die if the parent dies + PR_SET_PDEATHSIG = 1 + libc.prctl(PR_SET_PDEATHSIG, signal.SIGHUP) + signal.signal(signal.SIGHUP, signal_handler_sighup) + + cache_modules_from_crawler_mnt_namespace() + wait_for_linux_thread_cleanup(1) + try: + context.attach() + except Exception as e: + queue.put((None, e)) + sys.exit(1) + + try: + grandchild_process = multiprocessing.Process( + name='crawler-%s' % pid, + target=function_wrapper, + args=(queue, function, args), + kwargs=kwargs) + grandchild_process.start() + except OSError: + sys.exit(1) + + grandchild_process.join(IN_CONTAINER_TIMEOUT) + # If the join timed out the process might still be alive + if grandchild_process.is_alive(): + os.kill(grandchild_process.pid, 9) + sys.exit(1) + +def function_wrapper( + queue, + function, + *args, + **kwargs +): + + # Die if the parent dies + PR_SET_PDEATHSIG = 1 + libc.prctl(PR_SET_PDEATHSIG, signal.SIGHUP) + signal.signal(signal.SIGHUP, signal_handler_sighup) + + result = None + try: + args = args[0] + result = function(*args) + + # if res is a generator (i.e. function uses yield) + if isinstance(result, types.GeneratorType): + result = list(result) + + queue.put((result, None)) + queue.close() + sys.exit(0) + except Exception as e: + e.traceback = traceback.format_exc() + queue.put((None, e)) + queue.close() + sys.exit(1) + +def open_process_namespaces(pid, namespace_fd, namespaces): + for ct_ns in namespaces: + try: + + # arg 0 means readonly + namespace_fd[ct_ns] = libc.open('/proc/' + str(pid) + '/ns/' + ct_ns, 0) + if namespace_fd[ct_ns] == -1: + errno_msg = get_errno_msg(libc) + error_msg = 'Opening the %s namespace file failed: %s' % (ct_ns, errno_msg) + logger.warning(error_msg) + raise OSError('Failed to open {ns} namespace of {pid}: {err}'.format(ns=ct_ns, pid=pid, err=error_msg)) + except Exception as e: + error_msg = 'The open() syscall failed with: %s' % e + logger.warning(error_msg) + raise + +def close_process_namespaces(namespace_fd, namespaces): + for ct_ns in namespaces: + try: + libc.close(namespace_fd[ct_ns]) + except Exception as e: + error_msg = 'The close() syscall failed with: %s' % e + logger.warning(error_msg) + +def attach_to_process_namespaces(namespace_fd, ct_namespaces): + for ct_ns in ct_namespaces: + try: + if hasattr(libc, 'setns'): + r = libc.setns(namespace_fd[ct_ns], 0) + else: + # The Linux kernel ABI should be stable enough + __NR_setns = 308 + r = libc.syscall(__NR_setns, namespace_fd[ct_ns], 0) + if r == -1: + errno_msg = get_errno_msg(libc) + error_msg = ('Could not attach to the container %s ' + 'namespace (fd=%s): %s' % + (ct_ns, namespace_fd[ct_ns], errno_msg)) + logger.warning(error_msg) + raise OSError('Failed to attach to {ns} namespace of {fd}: {err}'.format(ns=ct_ns, fd=namespace_fd[ct_ns], err=error_msg)) + except Exception as e: + error_msg = 'The setns() syscall failed with: %s' % e + logger.warning(error_msg) + logger.exception(e) + raise + +def get_errno_msg(libc): + try: + import ctypes + libc.__errno_location.restype = ctypes.POINTER(ctypes.c_int) + errno = libc.__errno_location().contents.value + errno_msg = os.strerror(errno) + return errno_msg + except Exception: + return 'unknown error' diff --git a/crawler/utils/plugincont/plugincont_img/Dockerfile.template b/crawler/utils/plugincont/plugincont_img/Dockerfile.template new file mode 100644 index 00000000..956222a8 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/Dockerfile.template @@ -0,0 +1,27 @@ +FROM python:2.7 + +WORKDIR /crawler + +COPY requirements.txt /crawler/requirements.txt +RUN pip install -r requirements.txt + +COPY \ + dependencies/python-socket-datacollector_0.1.4-1_all.deb \ + dependencies/softflowd_0.9.9902-1_amd64.deb \ + dependencies/python-conntrackprobe_0.2.1-1_all.deb \ + /tmp/ + +RUN dpkg -i /tmp/python-socket-datacollector_*_all.deb && \ + apt-get -y update && \ + apt-get -y install libpcap0.8 && \ + dpkg -i /tmp/softflowd_0.9.*_amd64.deb && \ + pip install pyroute2 py-radix requests-unixsocket json-rpc && \ + dpkg -i /tmp/python-conntrackprobe_*_all.deb && \ + rm -f /tmp/*.deb + +ENV PYTHONPATH=/usr/lib/python2.7/dist-packages:/usr/local/lib/python2.7/site-packages + +ADD crawler /crawler + +RUN sed -i s/" and isfile_strict(file):"/:/ /usr/local/lib/python2.7/site-packages/psutil/_pslinux.py + diff --git a/crawler/utils/plugincont/plugincont_img/crawler/.gitignore b/crawler/utils/plugincont/plugincont_img/crawler/.gitignore new file mode 100644 index 00000000..cff5a5e1 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/.gitignore @@ -0,0 +1,8 @@ +deprecated/ +*.pyc +binaries/ +kafka-producer.py +timeout.py +alchemy.py +*.json +*.sh diff --git a/crawler/utils/plugincont/plugincont_img/crawler/__init__.py b/crawler/utils/plugincont/plugincont_img/crawler/__init__.py new file mode 100644 index 00000000..836e3e88 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/__init__.py @@ -0,0 +1,2 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- diff --git a/crawler/utils/plugincont/plugincont_img/crawler/base_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/base_crawler.py new file mode 100644 index 00000000..a7fe255c --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/base_crawler.py @@ -0,0 +1,70 @@ +import time +import uuid + + +class BaseFrame: + + def __init__(self, feature_types): + """ + + :param feature_types: list of feature types, e.g. ['os','cpu']. + This list is just used to describe the features in a frame. No + checks are made to verify that all features in this list + have an actual feature in .data + """ + self.data = [] + self.metadata = {} + self.metadata['features'] = ','.join(map(str, feature_types)) + self.metadata['timestamp'] = time.strftime('%Y-%m-%dT%H:%M:%S%z') + self.metadata['uuid'] = str(uuid.uuid4()) + self.num_features = 0 + + def add_features(self, features=[]): + """features is a list of (str, FeatureObject, str)""" + self.data.extend(features) + self.num_features += len(features) + + def add_feature(self, feature_type, feature_key, feature_value): + self.data.append((feature_type, feature_key, feature_value)) + self.num_features += 1 + + def __str__(self): + return '\n'.join(str(feature) for feature in self.data) + + +class BaseCrawler: + + def __init__(self, + features=['os', 'cpu'], + plugin_places=['plugins'], + options={}): + """ + Store and check the types of the arguments. + + :param frequency: Sleep seconds between iterations + """ + self.features = features + self.plugin_places = plugin_places + self.options = options + + def crawl(self, ignore_plugin_exception=True): + """ + Crawl to get a list of snapshot frames for all systems. + + :param ignore_plugin_exception: ignore exceptions raised on a plugin + :return: a list generator of Frame objects + """ + raise NotImplementedError('crawl method implementation is missing.') + + def polling_crawl(self, timeout, ignore_plugin_exception=True): + """ + Crawl to get a snapshot frame of any new system created before + `timeout` seconds. + + :param timeout: seconds to wait for new systems + :param ignore_plugin_exception: ignore exceptions raised on a plugin + :return: a Frame object or None if no system was created. + """ + if timeout > 0: + time.sleep(timeout) + return None diff --git a/crawler/utils/plugincont/plugincont_img/crawler/config_parser.py b/crawler/utils/plugincont/plugincont_img/crawler/config_parser.py new file mode 100644 index 00000000..9e0eb9d8 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/config_parser.py @@ -0,0 +1,37 @@ +import logging + +from configobj import ConfigObj +from validate import Validator + +from utils import misc + +CONFIG_SPEC_PATH = 'config_spec_and_defaults.conf' + +_config = None + +logger = logging.getLogger('crawlutils') + + +def parse_crawler_config(config_path='crawler.conf'): + global _config + + # 1. get configs + _config = ConfigObj(infile=misc.execution_path(config_path), + configspec=misc.execution_path(CONFIG_SPEC_PATH)) + + # Configspec is not being used currently + # but keeping validate() and apply_user_args() for future. + # Essentially NOP right now + + # 2. apply defaults + vdt = Validator() + _config.validate(vdt) + + +def get_config(): + global _config + + if not _config: + parse_crawler_config() + + return _config diff --git a/crawler/utils/plugincont/plugincont_img/crawler/config_spec_and_defaults.conf b/crawler/utils/plugincont/plugincont_img/crawler/config_spec_and_defaults.conf new file mode 100644 index 00000000..f314c8e0 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/config_spec_and_defaults.conf @@ -0,0 +1,10 @@ +[ general ] +features_to_crawl = string_list(default=list('os', 'cpu')) +environment = string(min=1, max=30, default='cloudsight') + +plugin_places = string_list(default=list('plugins')) +compress = boolean(default=False) + +link_container_log_files = boolean(default=False) +default_mountpoint = string(default='/') +docker_containers_list = string(default='ALL') diff --git a/crawler/utils/plugincont/plugincont_img/crawler/container.py b/crawler/utils/plugincont/plugincont_img/crawler/container.py new file mode 100644 index 00000000..31a41d0f --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/container.py @@ -0,0 +1,114 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +import logging +import os + +import psutil + +from utils import misc, namespace + +logger = logging.getLogger('crawlutils') + + +def list_raw_containers(user_list='ALL'): + """ + A running container is defined as a group of processes with the + `pid` namespace different to the `init` process `pid` namespace. + """ + init_ns = namespace.get_pid_namespace(1) + for p in psutil.process_iter(): + pid = (p.pid() if hasattr(p.pid, '__call__') else p.pid) + if pid == 1 or pid == '1': + + # don't confuse the init process as a container + + continue + if user_list not in ['ALL', 'all', 'All']: + if str(pid) not in user_list: + + # skip containers not in the list + + continue + if misc.process_is_crawler(pid): + + # don't confuse the crawler process with a container + + continue + curr_ns = namespace.get_pid_namespace(pid) + if not curr_ns: + + # invalid container + + continue + if curr_ns == init_ns: + continue + yield Container(pid, curr_ns) + + +class Container(object): + + """ + This class abstracts a running Linux container. + """ + + def __init__(self, pid, process_namespace=None): + self.pid = str(pid) + self.short_id = str(hash(pid)) + self.long_id = str(hash(pid)) + self.name = str(pid) + self.namespace = str(pid) + self.image = None + self.root_fs = None + self.log_prefix = None + self.log_file_list = None + self.process_namespace = (process_namespace or + namespace.get_pid_namespace(pid)) + + def __eq__(self, other): + """ + A container is equal to another if they have the same PID + """ + if isinstance(other, Container): + return self.pid == other.pid + else: + return False + + def __hash__(self): + return 1 + + def __ne__(self, other): + return not self.__eq__(other) + + def is_docker_container(self): + return False + + def __str__(self): + return str(self.__dict__) + + def get_metadata_dict(self): + metadata = { + 'namespace': self.namespace, + 'container_long_id': self.long_id, + 'container_short_id': self.short_id, + 'container_name': self.name, + 'container_image': self.image, + 'emit_shortname': self.short_id, + } + return metadata + + def get_memory_cgroup_path(self, node='memory.stat'): + raise NotImplementedError() + + def get_cpu_cgroup_path(self, node='cpuacct.usage'): + raise NotImplementedError() + + def is_running(self): + return os.path.exists('/proc/' + self.pid) + + def link_logfiles(self): + # no-op + pass + + def unlink_logfiles(self): + # no-op + pass diff --git a/crawler/utils/plugincont/plugincont_img/crawler/containers.py b/crawler/utils/plugincont/plugincont_img/crawler/containers.py new file mode 100644 index 00000000..1c10cde7 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/containers.py @@ -0,0 +1,91 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +import logging + +import container +from utils import misc +from dockercontainer import get_docker_containers, poll_docker_containers + +logger = logging.getLogger('crawlutils') + + +def list_all_containers(user_list='ALL', host_namespace='', + ignore_raw_containers=True): + """ + Returns a list of all running containers in the host. + + :param user_list: list of Docker container IDs. TODO: include rkt Ids. + :param host_namespace: string representing the host name (e.g. host IP) + :param ignore_raw_containers: if True, only include Docker or rkt. + An example of a non-docker container is a chromium-browser process. + :return: a list of Container objects + """ + visited_ns = set() # visited PID namespaces + + for _container in get_docker_containers(host_namespace=host_namespace, + user_list=user_list): + curr_ns = _container.process_namespace + if curr_ns not in visited_ns: + visited_ns.add(curr_ns) + yield _container + + # XXX get list of rkt containers + + if ignore_raw_containers: + return + + for _container in container.list_raw_containers(user_list): + curr_ns = _container.process_namespace + if curr_ns not in visited_ns: + visited_ns.add(curr_ns) + yield _container + + +def poll_containers(timeout, user_list='ALL', host_namespace='', + ignore_raw_containers=True): + """ + Returns a list of all running containers in the host. + + :param timeout: seconds to wait for a new container + :param user_list: list of Docker container IDs. TODO: include rkt Ids. + :param host_namespace: string representing the host name (e.g. host IP) + :param ignore_raw_containers: if True, only include Docker or rkt. + An example of a non-docker container is a chromium-browser process. + :return: a list of Container objects + """ + # XXX: we only support polling docker containers + return poll_docker_containers(timeout, user_list=user_list, + host_namespace=host_namespace) + + +def get_containers( + environment='cloudsight', + host_namespace=misc.get_host_ipaddr(), + user_list='ALL', + ignore_raw_containers=True +): + """ + Returns a list of all containers running in the host. + + XXX This list excludes non-docker containers when running in non-cloudsight + environment. TODO: fix this weird behaviour. + + :param environment: this defines how the name (namespace) is constructed. + :param host_namespace: string representing the host name (e.g. host IP) + :param user_list: list of Docker container IDs. TODO: include rkt. + :param ignore_raw_containers: if True, only include Docker or rkt. + An example of a non-docker container is a chromium-browser process. + :return: a list of Container objects. + """ + filtered_list = [] + containers_list = list_all_containers(user_list, host_namespace, + ignore_raw_containers) + for _container in containers_list: + default_environment = 'cloudsight' + if (environment != default_environment and + not _container.is_docker_container()): + continue + + filtered_list.append(_container) + + return filtered_list diff --git a/crawler/utils/plugincont/plugincont_img/crawler/containers_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/containers_crawler.py new file mode 100644 index 00000000..d1305210 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/containers_crawler.py @@ -0,0 +1,87 @@ +from containers import poll_containers, get_containers +import plugins_manager +from base_crawler import BaseCrawler, BaseFrame + + +class ContainerFrame(BaseFrame): + + def __init__(self, feature_types, container): + BaseFrame.__init__(self, feature_types) + self.metadata.update(container.get_metadata_dict()) + self.metadata['system_type'] = 'container' + + +class ContainersCrawler(BaseCrawler): + + def __init__(self, + features=['os', 'cpu'], + environment='cloudsight', + user_list='ALL', + host_namespace='', + plugin_places=['plugins'], + options={}): + + BaseCrawler.__init__( + self, + features=features, + plugin_places=plugin_places, + options=options) + plugins_manager.reload_env_plugin(environment, plugin_places) + plugins_manager.reload_container_crawl_plugins( + features, plugin_places, options) + self.plugins = plugins_manager.get_container_crawl_plugins(features) + self.environment = environment + self.host_namespace = host_namespace + self.user_list = user_list + + def crawl_container(self, container, ignore_plugin_exception=True): + """ + Crawls a specific container and returns a Frame for it. + + :param container: a Container object + :param ignore_plugin_exception: just ignore exceptions in a plugin + :return: a Frame object. The returned frame can have 0 features and + still have metadata. This can occur if there were no plugins, or all + the plugins raised an exception (and ignore_plugin_exception was True). + """ + frame = ContainerFrame(self.features, container) + for (plugin_obj, plugin_args) in self.plugins: + try: + frame.add_features( + plugin_obj.crawl( + container_id=container.long_id, + **plugin_args)) + except Exception as exc: + if not ignore_plugin_exception: + raise exc + return frame + + def polling_crawl(self, timeout, ignore_plugin_exception=True): + """ + Crawls any container created before `timeout` seconds have elapsed. + + :param timeout: seconds to wait for new containers + :param ignore_plugin_exception: just ignore exceptions in a plugin + :return: a Frame object + """ + container = poll_containers( + timeout, + user_list=self.user_list, + host_namespace=self.host_namespace) + if container: + return self.crawl_container(container, ignore_plugin_exception) + + return None + + def crawl(self, ignore_plugin_exception=True): + """ + Crawls all containers. + + :param ignore_plugin_exception: just ignore exceptions in a plugin + :return: a list generator of Frame objects + """ + containers_list = get_containers( + user_list=self.user_list, + host_namespace=self.host_namespace) + for container in containers_list: + yield self.crawl_container(container, ignore_plugin_exception) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/containers_logs_linker.py b/crawler/utils/plugincont/plugincont_img/crawler/containers_logs_linker.py new file mode 100644 index 00000000..0e9d6c07 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/containers_logs_linker.py @@ -0,0 +1,123 @@ +import argparse +import os + +from base_crawler import BaseCrawler +from worker import Worker +from containers import get_containers +from utils import misc + + +class DockerContainersLogsLinker(BaseCrawler): + """ + Class used to maintain symlinks to container log files. The idea with this + is to symlink all log files of interest (from all containers of interest) + to some known directory in the host. Then point some log collector like + logstash to it (and get container logs). + """ + + def __init__(self, + environment='cloudsight', + user_list='ALL', + host_namespace=''): + self.containers_list = set() + self.new = set() + self.deleted = set() + self.environment = environment + self.host_namespace = host_namespace + self.user_list = user_list + + def update_containers_list(self): + """ + Actually poll for new containers. This updates the list of new and + deleted containers, in self.new and self.deleted. + :return: None + """ + curr_containers = set( + get_containers( + environment=self.environment, + user_list=self.user_list, + host_namespace=self.host_namespace)) + self.new = curr_containers - self.containers_list + self.deleted = self.containers_list - curr_containers + self.containers_list = curr_containers + + def link_containers(self): + for container in self.deleted: + container.unlink_logfiles() + for container in self.new: + container.link_logfiles() + + def crawl(self): + self.update_containers_list() + self.link_containers() + return [] + + +if __name__ == '__main__': + + euid = os.geteuid() + if euid != 0: + print 'Need to run this as root.' + exit(1) + + parser = argparse.ArgumentParser() + parser.add_argument( + '--namespace', + dest='namespace', + type=str, + nargs='?', + default=misc.get_host_ipaddr(), + help='Data source this crawler is associated with. Defaults to ' + '/localhost', + ) + parser.add_argument( + '--frequency', + dest='frequency', + type=int, + default=-1, + help='Target time period for iterations. Defaults to -1 which ' + 'means only run one iteration.' + ) + parser.add_argument('--logfile', dest='logfile', type=str, + default='crawler.log', + help='Logfile path. Defaults to crawler.log' + ) + parser.add_argument( + '--crawlContainers', + dest='crawlContainers', + type=str, + nargs='?', + default='ALL', + help='List of containers to crawl as a list of Docker container IDs. ' + 'If this is not passed, then just the host is crawled. ' + 'Alternatively the word "ALL" can be used to crawl every ' + 'container. "ALL" will crawl all namespaces including the host ' + 'itself. This option is only valid for INVM crawl mode. Example: ' + '--crawlContainers 5f3380d2319e,681be3e32661', + ) + parser.add_argument( + '--environment', + dest='environment', + type=str, + default='cloudsight', + help='This speficies some environment specific behavior, like how ' + 'to name a container. The way to add a new behavior is by ' + 'implementing a plugin (see plugins/cloudsight_environment.py ' + 'as an example. Defaults to "cloudsight".', + ) + + misc.setup_logger('crawlutils', 'linker.log') + misc.setup_logger('yapsy', 'yapsy.log') + args = parser.parse_args() + crawler = DockerContainersLogsLinker(environment=args.environment, + user_list=args.crawlContainers, + host_namespace=args.namespace) + + worker = Worker(emitters=None, + frequency=args.frequency, + crawler=crawler) + + try: + worker.run() + except KeyboardInterrupt: + pass diff --git a/crawler/utils/plugincont/plugincont_img/crawler/crawler.conf b/crawler/utils/plugincont/plugincont_img/crawler/crawler.conf new file mode 100644 index 00000000..591c6e1f --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/crawler.conf @@ -0,0 +1,80 @@ +[ general ] + #enabled_plugins = os_container, cpu_container + #enabled_emitter_plugins = Stdout Emitter, File Emitter +[ crawlers ] + + [[ os_container ]] + target = CONTAINER + + [[ process_container ]] + + [[ os_vm ]] + + [[ process_vm ]] + + [[ os_host ]] + + [[ process_host ]] + + [[ rubypackage ]] + + [[ pythonpackage ]] + avoid_setns = False + + [[ fprobe_container ]] + # parameters for softflowd timeouts + maxlife_timeout = 5 + + # flow probe must create the chosen netflow version + netflow_version = 10 + + # The directory where all the flow probe's output data will be written to + fprobe_output_dir = /tmp/crawler-fprobe + + # The filename pattern of the files that the data collector will produce + # container-id, pid, and timestamp will be replaced with concrete values + output_filepattern = fprobe-{ifname}-{timestamp} + + # The user to switch socket-datafile collector to in order to + # drop root privileges + fprobe_user = nobody + + # Terminate the started netflow probe process when terminating the crawler; + # this is useful when running the crawler as a process and all started + # flow probe processes should automatically terminate, thus ending to + # produce further data; set to 'false' or '0' to disable, enable otherwise; + # the default value is 'false' + terminate_fprobe = 1 + + # Berkel packet filter for the probe + fprobe_bpf = (tcp[tcpflags] & (tcp-syn|tcp-ack|tcp-fin) != 0) or not tcp + + [[ ctprobe_container ]] + + # The user to switch socket-datafile collector and conntrackprobe to + # in order to drop root privileges + ctprobe_user = nobody + + # The directory where all the probe's output data will be written to + ctprobe_output_dir = /tmp/crawler-ctprobe + + # The filename pattern of the files that the data collector will produce + # container-id, pid, and timestamp will be replaced with concrete values + output_filepattern = fprobe-{ifname}-{timestamp} + +[ emitters ] + + [[ Stdout Emitter ]] + arg_from_conf = 1 + format = csv + + [[ File Emitter ]] + url = file://tmp/crawler-out + format = csv + arg_from_conf = 2 + + [[ SAS Https Emitter ]] + token_filepath = /etc/sas-secrets/token + access_group_filepath = /etc/sas-secrets/access_group + cloudoe_filepath = /etc/sas-secrets/cloudoe + ssl_verification = False diff --git a/crawler/utils/plugincont/plugincont_img/crawler/crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/crawler.py new file mode 100755 index 00000000..f4faa8cd --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/crawler.py @@ -0,0 +1,239 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import argparse +import json +import os + +from worker import Worker +from containers_crawler import ContainersCrawler +from utils import misc +from crawlmodes import Modes +from emitters_manager import EmittersManager +from host_crawler import HostCrawler +from vms_crawler import VirtualMachinesCrawler + +logger = None + + +def csv_list(string): + return string.split(',') + + +def json_parser(string): + return json.loads(string) + + +def main(): + + euid = os.geteuid() + if euid != 0: + print 'Need to run this as root.' + exit(1) + + parser = argparse.ArgumentParser() + parser.add_argument( + '--options', + dest='options', + type=json_parser, + default={}, + help='JSON dict of crawler options used to be passed as arguments' + 'to the crawler plugins.' + ) + parser.add_argument( + '--url', + dest='url', + type=csv_list, + default=['stdout://'], + help='Send the snapshot data to URL. Defaults to the console.', + ) + parser.add_argument( + '--namespace', + dest='namespace', + type=str, + nargs='?', + default=misc.get_host_ipaddr(), + help='Data source this crawler is associated with. Defaults to ' + '/localhost', + ) + parser.add_argument( + '--features', + dest='features', + type=csv_list, + default=['os', 'cpu'], + help='Comma-separated list of feature-types to crawl. Defaults to ' + 'os,cpu', + ) + parser.add_argument( + '--frequency', + dest='frequency', + type=int, + default=-1, + help='Target time period for iterations. Defaults to -1 which ' + 'means only run one iteration.' + ) + parser.add_argument( + '--compress', + dest='compress', + action='store_true', + default=False, + help='Whether to GZIP-compress the output frame data, must be one of ' + '{true,false}. Defaults to false', + ) + parser.add_argument('--logfile', dest='logfile', type=str, + default='crawler.log', + help='Logfile path. Defaults to crawler.log' + ) + parser.add_argument( + '--crawlmode', + dest='crawlmode', + type=str, + choices=[ + Modes.INVM, + Modes.OUTVM, + Modes.MOUNTPOINT, + Modes.OUTCONTAINER, + Modes.MESOS, + ], + default=Modes.INVM, + help='The crawler mode: ' + '{INVM,OUTVM,MOUNTPOINT,OUTCONTAINER}. ' + 'Defaults to INVM', + ) + parser.add_argument( + '--mountpoint', + dest='mountpoint', + type=str, + default='/', + help='Mountpoint location used as the / for features like packages,' + 'files, config' + ) + parser.add_argument( + '--format', + dest='format', + type=str, + default='csv', + choices=['csv', 'graphite', 'json', 'logstash'], + help='Emitted data format.', + ) + parser.add_argument( + '--crawlContainers', + dest='crawlContainers', + type=str, + nargs='?', + default='ALL', + help='List of containers to crawl as a list of Docker container IDs' + '(only Docker is supported at the moment). ' 'Defaults to all ' + 'running containers. Example: --crawlContainers aaa,bbb', + ) + parser.add_argument( + '--crawlVMs', + dest='vm_descs_list', + nargs='+', + default='ALL', + help='List of VMs to crawl' + 'Default is \'ALL\' VMs' + 'Currently need following as input for each VM' + '\'vm_name, kernel_version_long, linux_flavour, arch\'' + 'Auto kernel version detection in future, when only vm names' + '(\'ALL\' by default) would need to be passed' + 'Example --crawlVM' + 'vm1,3.13.0-24-generic_3.13.0-24.x86_64,ubuntu,x86_64' + 'vm2,4.0.3.x86_64,vanilla,x86_64', + ) + parser.add_argument( + '--environment', + dest='environment', + type=str, + default='cloudsight', + help='This speficies some environment specific behavior, like how ' + 'to name a container. The way to add a new behavior is by ' + 'implementing a plugin (see plugins/cloudsight_environment.py ' + 'as an example. Defaults to "cloudsight".', + ) + parser.add_argument( + '--plugins', + dest='plugin_places', + type=csv_list, + default=['plugins'], + help='This is a comma separated list of directories where to find ' + 'plugins. Each path can be an absolute, or a relative to the ' + 'location of the crawler.py. Default is "plugins"', + ) + parser.add_argument( + '--numprocesses', + dest='numprocesses', + type=int, + default=1, + help='Number of processes used for container crawling. Defaults ' + 'to the number of cores. NOT SUPPORTED.' + ) + parser.add_argument( + '--extraMetadata', + dest='extraMetadata', + type=json_parser, + default={}, + help='Json with data to annotate all features. It can be used ' + 'to append a set of system identifiers to the metadata feature ' + 'and if the --extraMetadataForAll' + ) + parser.add_argument( + '--avoidSetns', + dest='avoid_setns', + action='store_true', + default=False, + help='Avoids the use of the setns() syscall to crawl containers. ' + 'Some features like process will not work with this option. ' + 'Only applies to the OUTCONTAINER mode' + ) + + args = parser.parse_args() + misc.setup_logger('crawlutils', args.logfile) + misc.setup_logger('yapsy', 'yapsy.log') + + options = args.options + options['avoid_setns'] = args.avoid_setns + options['mountpoint'] = args.mountpoint + + emitters = EmittersManager(urls=args.url, + format=args.format, + compress=args.compress, + extra_metadata=args.extraMetadata, + plugin_places=args.plugin_places) + + if args.crawlmode == 'OUTCONTAINER': + crawler = ContainersCrawler( + features=args.features, + environment=args.environment, + user_list=args.crawlContainers, + host_namespace=args.namespace, + plugin_places=args.plugin_places, + options=options) + elif args.crawlmode == 'INVM' or args.crawlmode == 'MOUNTPOINT': + crawler = HostCrawler( + features=args.features, + namespace=args.namespace, + plugin_places=args.plugin_places, + options=options) + elif args.crawlmode == 'OUTVM': + crawler = VirtualMachinesCrawler( + features=args.features, + user_list=args.vm_descs_list, + host_namespace=args.namespace, + plugin_places=args.plugin_places, + options=options) + else: + raise NotImplementedError('Invalid crawlmode') + + worker = Worker(emitters=emitters, + frequency=args.frequency, + crawler=crawler) + + try: + worker.run() + except KeyboardInterrupt: + pass + + +if __name__ == '__main__': + main() diff --git a/crawler/utils/plugincont/plugincont_img/crawler/crawler_lite.py b/crawler/utils/plugincont/plugincont_img/crawler/crawler_lite.py new file mode 100644 index 00000000..fe3ce557 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/crawler_lite.py @@ -0,0 +1,182 @@ +import os +import sys +import inspect +import imp +import time +import argparse +import shutil +import cStringIO +import json +import logging +logging.basicConfig() +from icrawl_plugin import IContainerCrawler + +plugins_dir = '/crawler/plugins/systems/' # might eventually become /home/user1/crawler/plugins/... +guestcont_plugins_file = '/rootfs_local/crawlplugins' +plugins_file = '/rootfs_local/crawlplugins' # should eventually be /home/user1/crawlplugins +frame_dir = os.path.expanduser('~') + '/features/' # '/home/plugincont_user/features/' +plugin_objs = [] +active_plugins = [] +frquency = -1 +next_iteration_time = None + +def get_plugin_obj(plugin_name): + plugin_module_name = plugin_name.strip()+'_container_crawler' + plugin_filename = plugin_name.strip()+'_container_crawler.py' + for filename in os.listdir(plugins_dir): + if plugin_filename == filename: + plugin_module = imp.load_source(plugin_module_name, plugins_dir+plugin_filename) + plugin_classes = inspect.getmembers(plugin_module, inspect.isclass) + for plugin_class_name, plugin_class in plugin_classes: + if plugin_class_name is not 'IContainerCrawler' and issubclass(plugin_class, IContainerCrawler): + plugin_obj = plugin_class() + return plugin_obj + break + +def run_plugins_org(): + # import pdb + # pdb.set_trace() + plugin_names = tuple(open('/crawlercmd/crawlplugins','r')) + for plugin_name in plugin_names: + print plugin_name + plugin_obj = get_plugin_obj(plugin_name) + print plugin_obj.get_feature() + try: + for i in plugin_obj.crawl('some_cont_id',avoid_setns=False): + print i + except: + print sys.exc_info()[0] + +def parse_args(): + global frequency + parser = argparse.ArgumentParser() + parser.add_argument( + '--frequency', + dest='frequency', + type=int, + default=-1, + help='Target time period for iterations. Defaults to -1 which ' + 'means only run one iteration.' + ) + args = parser.parse_args() + frequency = args.frequency + +def _get_next_iteration_time(snapshot_time): + """ + Returns the number of seconds to sleep before the next iteration. + :param snapshot_time: Start timestamp of the current iteration. + :return: Seconds to sleep as a float. + """ + global next_iteration_time + if frequency == 0: + return 0 + + if next_iteration_time is None: + next_iteration_time = snapshot_time + frequency + else: + next_iteration_time += frequency + + while next_iteration_time + frequency < time.time(): + next_iteration_time += frequency + + time_to_sleep = next_iteration_time - time.time() + return time_to_sleep + +def format(frame): + """ + Writes frame data and metadata into iostream in csv format. + + :param iostream: a CStringIO used to buffer the formatted features. + :param frame: a BaseFrame object to be written into iostream + :return: None + """ + iostream = cStringIO.StringIO() + for (key, val, feature_type) in frame: + if not isinstance(val, dict): + val = val._asdict() + iostream.write('%s\t%s\t%s\n' % ( + feature_type, json.dumps(key), + json.dumps(val, separators=(',', ':')))) + return iostream + +def iterate(snapshot_time=0, timeout=0): + if timeout > 0: + time.sleep(timeout) + try: + reload_plugins() + frame_file = frame_dir+str(int(snapshot_time)) + fd = open(frame_file,'w') + for plugin_obj in plugin_objs: + plugin_crawl_output = plugin_obj.crawl('some_cont_id',avoid_setns=False) + iostream = format(plugin_crawl_output) + iostream.seek(0) + shutil.copyfileobj(iostream, fd) + fd.close() + except: + print sys.exc_info()[0] + +def run_plugins(): + if os.path.isdir(frame_dir): + shutil.rmtree(frame_dir) + os.makedirs(frame_dir) + time_to_sleep = 0 + while True: + snapshot_time = time.time() + iterate(snapshot_time,time_to_sleep) + # Frequency < 0 means only one run. + if frequency < 0: + break + time_to_sleep = _get_next_iteration_time(snapshot_time) + +def get_plugin_external(url): + # download tar or .plugin+.py files from url + # put inside plugins_dir == crawler/plugins/ + # do pip install requirements.txt for plugin + # add plugin name to plugins_file /home/user1/crawlplugins + # TODO + pass + +def get_plugin_local(plugin_name): + # collect plugin using plugin_name from a central crawler-specific repo. + # put inside plugins_dir == crawler/plugins/ + # do pip install requirements.txt for plugin + # add plugin name to plugins_file /home/user1/crawlplugins + # central repo plugins can also be preloaded in plugin cont + # TODO + pass + +def gather_plugins(): + if not os.path.exists(guestcont_plugins_file): + return + + fd = open(guestcont_plugins_file,'r') + for plugin_line in fd.readlines(): + if plugin_line.startswith('http'): + get_plugin_external(plugin_line) + else: + get_plugin_local(plugin_line) + fd.close() + + global plugin_objs + global active_plugins + plugin_names = tuple(open(plugins_file,'r')) + for plugin_name in plugin_names: + if plugin_name in active_plugins: + continue + plugin_obj = get_plugin_obj(plugin_name) + if plugin_obj is not None: + print plugin_name, plugin_obj.get_feature() + plugin_objs.append(plugin_obj) + active_plugins.append(plugin_name) + +def reload_plugins(): + gather_plugins() + +def sleep_forever(): + while True: + time.sleep(10) + +parse_args() +gather_plugins() +run_plugins() +sleep_forever() diff --git a/crawler/utils/plugincont/plugincont_img/crawler/crawler_lite_back.py b/crawler/utils/plugincont/plugincont_img/crawler/crawler_lite_back.py new file mode 100644 index 00000000..bc34dd88 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/crawler_lite_back.py @@ -0,0 +1,11 @@ +import os + +plugin_names = tuple(open('/crawlercmd/crawlplugins','r')) +for plugin_name in plugin_names: + plugin_file = plugin_name.strip()+'_container_crawler.py' + plugin_module = plugin_name.strip()+'_container_crawler' + for filename in os.listdir('/crawler/crawler/plugins/systems'): + if filename == plugin_file: + print filename + import plugin_module + diff --git a/crawler/utils/plugincont/plugincont_img/crawler/crawlmodes.py b/crawler/utils/plugincont/plugincont_img/crawler/crawlmodes.py new file mode 100644 index 00000000..ce91ed83 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/crawlmodes.py @@ -0,0 +1,7 @@ +from utils.misc import enum + +Modes = enum(INVM='INVM', + OUTVM='OUTVM', + MOUNTPOINT='MOUNTPOINT', + OUTCONTAINER='OUTCONTAINER', + MESOS='MESOS') diff --git a/crawler/utils/plugincont/plugincont_img/crawler/dockercontainer.py b/crawler/utils/plugincont/plugincont_img/crawler/dockercontainer.py new file mode 100644 index 00000000..7d8ca25a --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/dockercontainer.py @@ -0,0 +1,510 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +import glob +import json +import logging +import os +import shutil + +from requests.exceptions import HTTPError + +import plugins_manager +from container import Container +from utils import misc, namespace + +from utils.crawler_exceptions import (ContainerInvalidEnvironment, + ContainerNonExistent, + DockerutilsNoJsonLog, + DockerutilsException, + ContainerWithoutCgroups) +from utils.dockerutils import (exec_dockerps, + get_docker_container_json_logs_path, + get_docker_container_rootfs_path, + exec_dockerinspect, + poll_container_create_events) + +logger = logging.getLogger('crawlutils') + +HOST_LOG_BASEDIR = '/var/log/crawler_container_logs/' +LOG_TYPES_FILE = 'd464347c-3b99-11e5-b0e9-062dcffc249f.type-mapping' +DEFAULT_LOG_FILES = [{'name': '/var/log/messages', + 'type': None}, + {'name': '/etc/csf_env.properties', + 'type': None}, ] + + +def get_docker_containers(user_list=None, host_namespace=''): + """ + Get the list of running Docker containers, as `DockerContainer` objects. + This is basically polling. Ideally, we should subscribe to Docker + events so we can keep the containers list up to date without having to + poll like this. + + :param host_namespace: string representing the host name (e.g. host IP) + :param user_list: list of Docker container IDs. `None` means all + containers. + :return: a list of DockerContainer objects + """ + for inspect in exec_dockerps(): + long_id = inspect['Id'] + + if user_list not in ['ALL', 'all', 'All', None]: + user_ctrs = [cid[:12] for cid in user_list.split(',')] + short_id = long_id[:12] + if short_id not in user_ctrs: + continue + + try: + c = DockerContainer(long_id, inspect=inspect, + host_namespace=host_namespace) + if c.namespace: + yield c + except ContainerInvalidEnvironment as e: + logger.exception(e) + + +def poll_docker_containers(timeout, user_list=None, host_namespace=''): + """ + Get the first container created before `timeout` seconds have elapsed. + + :param timeout: seconds to wait for a new container. + :param host_namespace: string representing the host name (e.g. host IP) + :param user_list: list of Docker container IDs. `None` means all + containers. + :return: a DockerContainer object (just the first container created). + """ + if timeout <= 0: + return None + + try: + cEvent = poll_container_create_events(timeout) + + if not cEvent: + return None + c = DockerContainer(cEvent.get_containerid(), inspect=None, + host_namespace=host_namespace) + if c.namespace: + return c + except ContainerInvalidEnvironment as e: + logger.exception(e) + + +class LogFileLink(): + """ + If `host_log_dir is not None`, then we should prefix `dest` with + `host_log_dir`. + """ + + def __init__(self, name=None, type=None, source=None, + dest=None, host_log_dir=None): + self.name = name + self.type = type + self.source = source + self.dest = dest + self.host_log_dir = host_log_dir + + def __str__(self): + return "%s: %s --> %s" % (self.name, self.source, self.dest) + + def get_dest(self): + if self.host_log_dir: + return misc.join_abs_paths(self.host_log_dir, self.dest) + return self.dest + + +class DockerContainer(Container): + + DOCKER_JSON_LOG_FILE = "docker.log" + + def __init__( + self, + long_id, + inspect=None, + host_namespace='', + process_namespace=None, + ): + + # Some quick sanity checks + if not isinstance(long_id, basestring): + raise TypeError('long_id should be a string') + if inspect and not isinstance(inspect, dict): + raise TypeError('inspect should be a dict.') + + if not inspect: + try: + inspect = exec_dockerinspect(long_id) + except HTTPError: + raise ContainerNonExistent('No docker container with ID: %s' + % long_id) + + state = inspect['State'] + self.image = inspect['Image'] + + assert(long_id == inspect['Id']) + self.long_id = long_id + self.host_namespace = host_namespace + self.pid = str(state['Pid']) + self.name = inspect['Name'] + self.running = state['Running'] + self.created = inspect['Created'] + self.network_settings = inspect['NetworkSettings'] + self.cmd = inspect['Config']['Cmd'] + self.mounts = inspect.get('Mounts') + self.volumes = inspect.get('Volumes') + self.image_name = inspect['Config']['Image'] + self.inspect = inspect + + self.process_namespace = (process_namespace or + namespace.get_pid_namespace(self.pid)) + + # This short ID is mainly used for logging purposes + self.short_id = long_id[:12] + + # Docker prepends a '/' to the name. Let's remove it. + if self.name[0] == '/': + self.name = self.name[1:] + + self._set_image_fields(inspect.get('RepoTag', '')) + self._set_mounts_list() + + try: + self.root_fs = get_docker_container_rootfs_path(self.long_id) + except (HTTPError, RuntimeError, DockerutilsException) as e: + logger.exception(e) + self.root_fs = None + + self._set_logs_list_input() + self._set_environment_specific_options() + self._set_logs_list() + + def _set_image_fields(self, repo_tag): + """ + This function parses the image repository:tag string to try + to get info like the registry, and the "owner_namespace". + This "owner_namespace" field is not exactly officially a docker + concept, but it usually points to the owner of the image. + """ + self.docker_image_long_name = repo_tag + self.docker_image_short_name = os.path.basename(repo_tag) + if (':' in repo_tag) and ('/' not in repo_tag.rsplit(':', 1)[1]): + self.docker_image_tag = repo_tag.rsplit(':', 1)[1] + else: + self.docker_image_tag = '' + self.docker_image_registry = os.path.dirname(repo_tag).split('/')[0] + try: + # This is the 'abc' in 'registry/abc/bla:latest' + self.owner_namespace = os.path.dirname(repo_tag).split('/', 1)[1] + except IndexError: + self.owner_namespace = '' + + def is_docker_container(self): + return True + + def get_container_ip(self): + ip = self.inspect['NetworkSettings'][ + 'Networks']['bridge']['IPAddress'] + return ip + + def get_container_ports(self): + ports = [] + for item in self.inspect['Config']['ExposedPorts'].keys(): + ports.append(item.split('/')[0]) + return ports + + def get_metadata_dict(self): + metadata = super(DockerContainer, self).get_metadata_dict() + metadata['owner_namespace'] = self.owner_namespace + metadata['docker_image_long_name'] = self.docker_image_long_name + metadata['docker_image_short_name'] = self.docker_image_short_name + metadata['docker_image_tag'] = self.docker_image_tag + metadata['docker_image_registry'] = self.docker_image_registry + + return metadata + + def _set_environment_specific_options(self): + """ + This function is used to setup these environment specific fields: + namespace, log_prefix, and logfile_links. + """ + + logger.info('setup_namespace_and_metadata: long_id=' + + self.long_id) + + try: + _options = { + 'root_fs': self.root_fs, + 'type': 'docker', + 'name': self.name, + 'host_namespace': self.host_namespace, + 'container_logs': DEFAULT_LOG_FILES} + env = plugins_manager.get_runtime_env_plugin() + namespace = env.get_container_namespace( + self.long_id, _options) + if not namespace: + _env = env.get_environment_name() + logger.warning('Container %s does not have %s ' + 'metadata.' % (self.short_id, _env)) + raise ContainerInvalidEnvironment('') + self.namespace = namespace + + self.log_prefix = env.get_container_log_prefix( + self.long_id, _options) + + self.logs_list_input.extend([LogFileLink(name=log['name']) + for log in + env.get_container_log_file_list( + self.long_id, _options)]) + except ValueError: + # XXX-kollerr: plugins are not supposed to throw ValueError + logger.warning('Container %s does not have a valid alchemy ' + 'metadata json file.' % self.short_id) + raise ContainerInvalidEnvironment() + + def _set_mounts_list(self): + """ + Create self.mounts out of Volumes for old versions of Docker + """ + + if not self.mounts and self.volumes: + self.mounts = [{'Destination': vol, + 'Source': self.volumes[vol]} + for vol in self.volumes] + elif not self.mounts and not self.volumes: + self.mounts = [] + + # Find the mount point of the specified cgroup + + def _get_cgroup_dir(self, devlist=[]): + for dev in devlist: + paths = [os.path.join('/cgroup/', dev), + os.path.join('/sys/fs/cgroup/', dev)] + for path in paths: + if os.path.ismount(path): + return path + + # Try getting the mount point from /proc/mounts + for l in open('/proc/mounts', 'r'): + _type, mnt, _, _, _, _ = l.split(' ') + if _type == 'cgroup' and mnt.endswith('cgroup/' + dev): + return mnt + + raise ContainerWithoutCgroups('Can not find the cgroup dir') + + def get_memory_cgroup_path(self, node='memory.stat'): + return os.path.join(self._get_cgroup_dir(['memory']), 'docker', + self.long_id, node) + + def get_cpu_cgroup_path(self, node='cpuacct.usage'): + # In kernels 4.x, the node is actually called 'cpu,cpuacct' + cgroup_dir = self._get_cgroup_dir(['cpuacct', 'cpu,cpuacct']) + return os.path.join(cgroup_dir, 'docker', self.long_id, node) + + def __str__(self): + return str(self.__dict__) + + def link_logfiles(self): + + host_log_dir = self._get_logfiles_links_dest(HOST_LOG_BASEDIR) + + logger.debug('Linking log files for container %s' % self.short_id) + + # create an empty dir for the container logs + + if not os.path.exists(host_log_dir): + os.makedirs(host_log_dir) + + # Create a symlink from src to dst + + for log in self.logs_list: + dest = log.get_dest() + try: + if not os.path.exists(log.source): + logger.debug( + 'Log file %s does not exist, but linking it anyway' + % log.source) + dest_dir = os.path.dirname(dest) + if not os.path.exists(dest_dir): + os.makedirs(dest_dir) + os.symlink(log.source, dest) + logger.info( + 'Linking container %s %s logfile %s -> %s' % + (self.short_id, log.name, log.source, dest)) + except (OSError, IOError) as e: + logger.debug(e) + logger.debug('Link already exists: %s -> %s' + % (log.source, dest)) + except Exception as e: + logger.warning(e) + + # Keep record of what is linked in a file. + + try: + types_host_log_path = os.path.join(host_log_dir, + LOG_TYPES_FILE) + with open(types_host_log_path, 'w') as outfile: + logs_dict = [{'name': log.name, 'type': log.type} + for log in self.logs_list] + json.dump(logs_dict, outfile) + except (OSError, IOError) as e: + # Not a critical error: move on + logger.exception(e) + + def unlink_logfiles(self): + + host_log_dir = self._get_logfiles_links_dest(HOST_LOG_BASEDIR) + + logger.info('Un-linking log files for container %s.' + % self.short_id) + + logger.info('Trying to delete this directory and its symlinks: %s.' + % host_log_dir) + assert(host_log_dir.startswith('/var/log/crawler_container_logs/')) + + try: + shutil.rmtree(host_log_dir) + except (IOError, OSError) as exc: + logger.error('Could not delete directory %s: %s' % + (host_log_dir, exc)) + + def _parse_user_input_logs(self, var='LOG_LOCATIONS'): + """ + The user can provide a list of logfiles in a container for us + to maintain links to. This list of log files is passed as with + the `var` environment variable. + """ + + container = self + logs = [] # list of LogFileLink's + try: + logs = [LogFileLink(name=name) for name in + misc.get_process_env(container.pid)[var].split(',')] + except (IOError, KeyError, ValueError) as e: + logger.debug('There is a problem with the env. variables: %s' % e) + return logs + + def _set_logs_list_input(self): + """ + Sets the list of container logs that we should maintain links for. + + The paths are relative to the filesystem of the container. For example + the path for /var/log/messages in the container will be just + /var/log/messages in this list. + """ + + self.logs_list_input = self._parse_user_input_logs(var='LOG_LOCATIONS') + + def _expand_and_map_log_link(self, log, host_log_dir, rootfs_path): + """ + Returns a list of LogFileLinks with all the fields set after + expanding the globs and mapping mount points. + """ + _logs = [] + if not self.mounts: + source = misc.join_abs_paths(rootfs_path, log.name) + if "*" in source: + _logs = [LogFileLink(name=log.name, + source=s, + type=log.type, + dest=s.split(rootfs_path, 1)[1], + host_log_dir=host_log_dir) + for s in glob.glob(source)] + else: + _logs = [LogFileLink(name=log.name, + type=log.type, + source=source, + dest=log.name, + host_log_dir=host_log_dir)] + + for mount in self.mounts: + mount_src = mount['Source'] + mount_dst = mount['Destination'] + if log.name.startswith(mount['Destination']): + source = log.name.replace(mount_dst, mount_src) + if "*" in source: + _logs = [LogFileLink(name=log.name, + source=s, + type=log.type, + dest=s.replace(mount_src, + mount_dst), + host_log_dir=host_log_dir) + for s in glob.glob(source)] + else: + _logs = [LogFileLink(name=log.name, + source=source, + dest=log.name, + type=log.type, + host_log_dir=host_log_dir)] + else: + source = misc.join_abs_paths(rootfs_path, log.name) + if "*" in source: + _logs = [LogFileLink(name=log.name, + source=s, + type=log.type, + dest=s.split(rootfs_path, 1)[1], + host_log_dir=host_log_dir) + for s in glob.glob(source)] + else: + _logs = [LogFileLink(name=log.name, + source=source, + dest=log.name, + type=log.type, + host_log_dir=host_log_dir)] + return _logs + + def _set_logs_list(self): + """ + Initializes the LogFileLinks list in `self.logs_list` + """ + + host_log_dir = self._get_logfiles_links_dest(HOST_LOG_BASEDIR) + + self.logs_list = [] + + rootfs_path = self.root_fs + if not rootfs_path: + logger.warning( + 'Container %s does not have a rootfs_path set' % + self.short_id) + return + + # remove relative paths + for log in self.logs_list_input: + # remove relative paths + if (not os.path.isabs(log.name)) or ('../' in log.name): + logger.warning('User provided a log file path that is not ' + 'absolute: %s' % log.name) + continue + + _logs = self._expand_and_map_log_link(log, + host_log_dir, + rootfs_path) + for log in _logs: + if log not in self.logs_list: + self.logs_list.append(log) + + logger.debug('logmap %s' % self.logs_list) + + # Link the container json log file name if there is one + + try: + docker_log_source = get_docker_container_json_logs_path( + self.long_id, self.inspect) + docker_log_dest = os.path.join(host_log_dir, + self.DOCKER_JSON_LOG_FILE) + self.logs_list.append(LogFileLink(name=self.DOCKER_JSON_LOG_FILE, + type=None, + source=docker_log_source, + dest=docker_log_dest)) + except DockerutilsNoJsonLog as e: + logger.exception(e) + + def _get_logfiles_links_dest( + self, + host_log_basedir + ): + """ + Returns the path in the host file system where the container's log + files should be linked to. + """ + + return os.path.join(host_log_basedir, self.log_prefix) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/emitters_manager.py b/crawler/utils/plugincont/plugincont_img/crawler/emitters_manager.py new file mode 100644 index 00000000..f8ca5988 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/emitters_manager.py @@ -0,0 +1,66 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +import logging + +import plugins_manager +from base_crawler import BaseFrame +from utils.crawler_exceptions import EmitterUnsupportedProtocol + +logger = logging.getLogger('crawlutils') + + +class EmittersManager: + + """ + Class that manages a list of formatter and emitter objects, one per url. + The formatter takes a frame and writes it into an iostream, and the + emitter takes the iostream and emits it. + + This class should be instantiated at the beginning of the program, + and emit() should be called for each frame. + """ + + def __init__( + self, + urls, + format='csv', + compress=False, + extra_metadata={}, + plugin_places=['plugins'] + ): + """ + Initializes a list of emitter objects; also stores all the args. + + :param urls: list of URLs to send to + :param format: format of each feature string + :param compress: gzip each emitter frame or not + :param extra_metadata: dict added to the metadata of each frame + """ + self.extra_metadata = extra_metadata + self.compress = compress + + # Create a list of Emitter objects based on urls + self.emitter_plugins = plugins_manager.get_emitter_plugins( + urls, + format, + plugin_places) + if not self.emitter_plugins: + raise EmitterUnsupportedProtocol('Emit protocols not supported') + + def emit(self, frame, snapshot_num=0): + """ + Sends a frame to the URLs specified at __init__ + + :param frame: frame of type BaseFrame + :param snapshot_num: iteration count (from worker.py). This is just + used to differentiate successive frame files (when url is file://). + :return: None + """ + if not isinstance(frame, BaseFrame): + raise TypeError('frame is not of type BaseFrame') + + metadata = frame.metadata + metadata.update(self.extra_metadata) + for (emitter_obj, emitter_args) in self.emitter_plugins: + emitter_obj.emit(frame, self.compress, + metadata, snapshot_num, **(emitter_args or {})) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/formatters.py b/crawler/utils/plugincont/plugincont_img/crawler/formatters.py new file mode 100644 index 00000000..babd5a06 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/formatters.py @@ -0,0 +1,140 @@ +import json +import time +from morph import flatten + + +def write_in_csv_format(iostream, frame): + """ + Writes frame data and metadata into iostream in csv format. + + :param iostream: a CStringIO used to buffer the formatted features. + :param frame: a BaseFrame object to be written into iostream + :return: None + """ + iostream.write('%s\t%s\t%s\n' % + ('metadata', json.dumps('metadata'), + json.dumps(frame.metadata, separators=(',', ':')))) + for (key, val, feature_type) in frame.data: + if not isinstance(val, dict): + val = val._asdict() + iostream.write('%s\t%s\t%s\n' % ( + feature_type, json.dumps(key), + json.dumps(val, separators=(',', ':')))) + + +def write_in_json_format(iostream, frame): + """ + Writes frame data and metadata into iostream in json format. + + :param iostream: a CStringIO used to buffer the formatted features. + :param frame: a BaseFrame object to be written into iostream + :return: None + """ + iostream.write('%s\n' % json.dumps(frame.metadata)) + for (key, val, feature_type) in frame.data: + if not isinstance(val, dict): + val = val._asdict() + val['feature_type'] = feature_type + val['namespace'] = frame.metadata.get('namespace', '') + iostream.write('%s\n' % json.dumps(val)) + + +def write_in_logstash_format(iostream, frame): + """ + Writes frame data and meta data in json format. + Similar to write_in_json_format, but this method concatenate them + in to a single json object. + + :param iostream: a CStringIO used to buffer the formatted features. + :param frame: a BaseFrame Object to be written into iostream + :return: None + """ + payload = {} + payload['metadata'] = frame.metadata + for (key, val, feature_type) in frame.data: + if not isinstance(val, dict): + val = val._asdict() + if feature_type not in payload: + payload[feature_type] = {} + payload[feature_type][key] = val + iostream.write('%s\n' % json.dumps(payload)) + + +def write_in_graphite_format(iostream, frame): + """ + Writes frame data and metadata into iostream in graphite format. + + :param iostream: a CStringIO used to buffer the formatted features. + :param frame: a BaseFrame object to be written into iostream + :return: None + """ + namespace = frame.metadata.get('namespace', '') + timestamp = frame.metadata.get('timestamp', '') + for (key, val, feature_type) in frame.data: + if not isinstance(val, dict): + val = val._asdict() + write_feature_in_graphite_format(iostream, namespace, timestamp, + key, val, feature_type) + + +def write_feature_in_graphite_format(iostream, namespace, timestamp, + feature_key, feature_val, + feature_type): + """ + Write a feature in graphite format into iostream. The graphite format + looks like this, one line per metric value: + + [namespace].[feature_key].[metric] [value] [timestamp]\r\n + [namespace].[feature_key].[metric] [value] [timestamp]\r\n + [namespace].[feature_key].[metric] [value] [timestamp]\r\n + + This function converts a feature into that string and writes it into + the iostream. + + :param namespace: Frame namespace for this feature + :param timestamp: From frame metadata, fmt: %Y-%m-%dT%H:%M:%S%z + :param feature_type: + :param feature_key: + :param feature_val: + :param iostream: a CStringIO used to buffer the formatted features. + :return: None + """ + # to convert 2017-02-07T13:20:15-0500 to 1486491615 (=epoch) + # for python >=3.2, following works + # time.strptime(timestamp,'%Y-%m-%dT%H:%M:%S%z'), + # but previous pyhon versions don't respect %z timezone info, + # so skipping time zone conversion assuming + # timezone during str timestamp creation in metadata is same for reverse + + timestamp = time.mktime(time.strptime(timestamp[:-5], '%Y-%m-%dT%H:%M:%S')) + + items = flatten(feature_val).items() + if isinstance(namespace, dict): + namespace = json.dumps(namespace) + else: + namespace = namespace.replace('/', '.') + + for (metric, value) in items: + try: + # Only emit values that we can cast as floats + value = float(value) + except (TypeError, ValueError): + continue + + metric = metric.replace('(', '_').replace(')', '') + metric = metric.replace(' ', '_').replace('-', '_') + metric = metric.replace('/', '_').replace('\\', '_') + + feature_key = feature_key.replace('_', '-') + if 'cpu' in feature_key or 'memory' in feature_key: + metric = metric.replace('_', '-') + if 'if' in metric: + metric = metric.replace('_tx', '.tx') + metric = metric.replace('_rx', '.rx') + if feature_key == 'load': + feature_key = 'load.load' + feature_key = feature_key.replace('/', '$') + + tmp_message = '%s.%s.%s %f %d\r\n' % (namespace, feature_key, + metric, value, timestamp) + iostream.write(tmp_message) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/host_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/host_crawler.py new file mode 100644 index 00000000..422daa99 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/host_crawler.py @@ -0,0 +1,42 @@ +import plugins_manager +from base_crawler import BaseCrawler, BaseFrame + + +class HostFrame(BaseFrame): + + def __init__(self, feature_types, namespace): + BaseFrame.__init__(self, feature_types) + self.metadata['namespace'] = namespace + self.metadata['system_type'] = 'host' + + +class HostCrawler(BaseCrawler): + + def __init__(self, + features=['os', 'cpu'], namespace='', + plugin_places=['plugins'], options={}): + BaseCrawler.__init__( + self, + features=features, + plugin_places=plugin_places) + plugins_manager.reload_host_crawl_plugins( + features, plugin_places, options) + self.plugins = plugins_manager.get_host_crawl_plugins( + features=features) + self.namespace = namespace + + def crawl(self, ignore_plugin_exception=True): + """ + Crawl the host with all the plugins loaded on __init__ + + :param ignore_plugin_exception: just ignore exceptions on a plugin + :return: a list generator with a frame object + """ + frame = HostFrame(self.features, self.namespace) + for (plugin_obj, plugin_args) in self.plugins: + try: + frame.add_features(plugin_obj.crawl(**plugin_args)) + except Exception as exc: + if not ignore_plugin_exception: + raise exc + yield frame diff --git a/crawler/utils/plugincont/plugincont_img/crawler/icrawl_plugin.py b/crawler/utils/plugincont/plugincont_img/crawler/icrawl_plugin.py new file mode 100644 index 00000000..7e9fcaa7 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/icrawl_plugin.py @@ -0,0 +1,73 @@ +from yapsy.IPlugin import IPlugin + + +class IContainerCrawler(IPlugin): + + """ + Crawler plugin interface + + Subclasses of this class can be used to implement crawling functions + for different systems. + """ + + def crawl(self, container_id): + """ + Crawling function that should return a list of features for + `container_id`. This function is called once for every container + at every crawling interval. + """ + raise NotImplementedError() + + def get_feature(self): + """ + Returns the feature type as a string. + """ + raise NotImplementedError() + + +class IVMCrawler(IPlugin): + + """ + Crawler plugin interface + + Subclasses of this class can be used to implement crawling functions + for different systems. + """ + + def crawl(self, vm_desc): + """ + Crawling function that should return a list of features for + `vm_desc`. This should change to 'vm_name' after auto kernel version + detection. This function is called once for every VM + at every crawling interval. + """ + raise NotImplementedError() + + def get_feature(self): + """ + Returns the feature type as a string. + """ + raise NotImplementedError() + + +class IHostCrawler(IPlugin): + + """ + Crawler plugin interface + + Subclasses of this class can be used to implement crawling functions + for different host features (e.g. processes running in the host). + """ + + def crawl(self): + """ + Crawling function that should return a list of features for the host. + This function is called once at every crawling interval. + """ + raise NotImplementedError() + + def get_feature(self): + """ + Returns the feature type as a string. + """ + raise NotImplementedError() diff --git a/crawler/utils/plugincont/plugincont_img/crawler/iemit_plugin.py b/crawler/utils/plugincont/plugincont_img/crawler/iemit_plugin.py new file mode 100644 index 00000000..7bf0c597 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/iemit_plugin.py @@ -0,0 +1,57 @@ +import cStringIO +from yapsy.IPlugin import IPlugin +from formatters import (write_in_csv_format, + write_in_json_format, + write_in_graphite_format, + write_in_logstash_format) +from utils.crawler_exceptions import (EmitterUnsupportedFormat) + + +class IEmitter(IPlugin): + + """ + Base emitter class from which emitters like FileEmitter, StdoutEmitter + should inherit. The main idea is that all emitters get a url, and should + implement an emit() function given an iostream (a buffer with the features + to emit). + """ + + def init(self, url, timeout=1, max_retries=5, emit_format='csv'): + self.url = url + self.timeout = timeout + self.max_retries = max_retries + self.emit_per_line = False + + self.supported_formats = {'csv': write_in_csv_format, + 'graphite': write_in_graphite_format, + 'json': write_in_json_format, + 'logstash': write_in_logstash_format} + + if emit_format in self.supported_formats: + self.formatter = self.supported_formats[emit_format] + else: + raise EmitterUnsupportedFormat('Not supported: %s' % emit_format) + + def get_emitter_protocol(self): + raise NotImplementedError() + + def format(self, frame): + # this writes the frame metadata and data into iostream + # Pass iostream to the emitters so they can send its content to their + # respective url + iostream = cStringIO.StringIO() + self.formatter(iostream, frame) + return iostream + + def emit(self, frame, compress=False, + metadata={}, snapshot_num=0, **kwargs): + """ + + :param iostream: a CStringIO used to buffer the formatted features. + :param compress: + :param metadata: + :param snapshot_num: + :return: + """ + # this formats and emits an input frame + raise NotImplementedError() diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/__init__.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/__init__.py new file mode 100644 index 00000000..836e3e88 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/__init__.py @@ -0,0 +1,2 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/__init__.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/__init__.py new file mode 100644 index 00000000..836e3e88 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/__init__.py @@ -0,0 +1,2 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/apache/__init__.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/apache/__init__.py new file mode 100644 index 00000000..836e3e88 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/apache/__init__.py @@ -0,0 +1,2 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/apache/apache_container_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/apache/apache_container_crawler.plugin new file mode 100644 index 00000000..c32738ca --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/apache/apache_container_crawler.plugin @@ -0,0 +1,9 @@ +[Core] +Name = application_apache_container +Module = apache_container_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = "Apache httpd server" + diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/apache/apache_container_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/apache/apache_container_crawler.py new file mode 100644 index 00000000..d7c0eb75 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/apache/apache_container_crawler.py @@ -0,0 +1,51 @@ +import logging + + +import dockercontainer +from icrawl_plugin import IContainerCrawler +from plugins.applications.apache import apache_crawler +from utils.crawler_exceptions import CrawlError + +logger = logging.getLogger('crawlutils') + + +class ApacheContainerCrawler(IContainerCrawler): + feature_type = 'application' + feature_key = 'apache' + + def get_feature(self): + return self.feature_key + + def crawl(self, container_id=None, **kwargs): + + c = dockercontainer.DockerContainer(container_id) + + # check image name + if c.image_name.find("httpd") == -1: + + logger.error("%s is not %s container", + c.image_name, + self.feature_key) + raise CrawlError("%s does not have expected name for %s (name=%s)", + container_id, + self.feature_key, + c.image_name) + + # extract IP and Port information + ip = c.get_container_ip() + ports = c.get_container_ports() + + # crawl all candidate ports + for port in ports: + try: + metrics = apache_crawler.retrieve_metrics(ip, port) + except CrawlError: + logger.error("can't find metrics endpoint at http://%s:%s", + ip, + port) + continue + return [(self.feature_key, metrics, self.feature_type)] + + raise CrawlError("%s has no accessible endpoint for %s", + container_id, + self.feature_key) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/apache/apache_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/apache/apache_crawler.py new file mode 100644 index 00000000..a6aa4cd1 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/apache/apache_crawler.py @@ -0,0 +1,75 @@ +import urllib2 +from plugins.applications.apache import feature +from collections import defaultdict +from utils.crawler_exceptions import CrawlError + + +def retrieve_status_page(host, port): + statusPage = "http://%s:%s/server-status?auto" % (host, port) + req = urllib2.Request(statusPage) + response = urllib2.urlopen(req) + return response.read() + + +def parse_score_board(line, stats): + switch = { + "_": 'waiting_for_connection', + "S": 'starting_up', + "R": 'reading_request', + "W": 'sending_reply', + "K": 'keepalive_read', + "D": 'dns_lookup', + "C": 'closing_connection', + "L": 'logging', + "G": 'graceful_finishing', + "I": 'idle_worker_cleanup', + } + res = line.split(': ') + + workcounts = defaultdict(int) + for i in res[1]: + workcounts[i] += 1 + + for x, y in workcounts.iteritems(): + stats[switch.get(x)] = str(y) + + +def retrieve_metrics(host='localhost', port=80): + try: + status = retrieve_status_page(host, port).splitlines() + except Exception: + raise CrawlError("can't access to http://%s:%s", + host, port) + switch = { + "Total kBytes": 'Total_kBytes', + "Total Accesses": 'Total_Accesses', + "BusyWorkers": "BusyWorkers", + "IdleWorkers": "IdleWorkers", + "BytesPerSec": "BytesPerSec", + "BytesPerReq": "BytesPerReq", + "ReqPerSec": "ReqPerSec", + "Uptime": "Uptime" + } + + stats = {} + + for line in status: + if "Scoreboard" in line: + parse_score_board(line, stats) + + else: + res = line.split(': ') + if res[0] in switch: + stats[switch.get(res[0])] = res[1] + + feature_attributes = feature.ApacheFeature + + if len(stats) == 0: + raise CrawlError("failure to parse http://%s:%s", host, port) + + for name in feature_attributes._fields: + if name not in stats: + stats[name] = '0' + + feature_attributes = feature.get_feature(stats) + return feature_attributes diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/apache/apache_host_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/apache/apache_host_crawler.plugin new file mode 100644 index 00000000..9a72f283 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/apache/apache_host_crawler.plugin @@ -0,0 +1,9 @@ +[Core] +Name = application_apache_host +Module = apache_host_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = "Apache httpd server" + diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/apache/apache_host_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/apache/apache_host_crawler.py new file mode 100644 index 00000000..9c7a7bc8 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/apache/apache_host_crawler.py @@ -0,0 +1,21 @@ +from icrawl_plugin import IHostCrawler +from plugins.applications.apache import apache_crawler +import logging + +logger = logging.getLogger('crawlutils') + + +class ApacheHostCrawler(IHostCrawler): + feature_type = 'application' + feature_key = 'apache' + default_port = 80 + + def get_feature(self): + return self.feature_key + + def crawl(self): + metrics = apache_crawler.retrieve_metrics( + host='localhost', + port=self.default_port + ) + return [(self.feature_key, metrics, self.feature_type)] diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/apache/feature.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/apache/feature.py new file mode 100644 index 00000000..cc88c6a2 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/apache/feature.py @@ -0,0 +1,46 @@ +from collections import namedtuple + + +def get_feature(stats): + feature_attributes = ApacheFeature( + stats['BusyWorkers'], + stats['IdleWorkers'], + stats['waiting_for_connection'], + stats['starting_up'], + stats['reading_request'], + stats['sending_reply'], + stats['keepalive_read'], + stats['dns_lookup'], + stats['closing_connection'], + stats['logging'], + stats['graceful_finishing'], + stats['idle_worker_cleanup'], + stats['BytesPerSec'], + stats['BytesPerReq'], + stats['ReqPerSec'], + stats['Uptime'], + stats['Total_kBytes'], + stats['Total_Accesses'] + ) + return feature_attributes + +ApacheFeature = namedtuple('ApacheFeature', [ + 'BusyWorkers', + 'IdleWorkers', + 'waiting_for_connection', + 'starting_up', + 'reading_request', + 'sending_reply', + 'keepalive_read', + 'dns_lookup', + 'closing_connection', + 'logging', + 'graceful_finishing', + 'idle_worker_cleanup', + 'BytesPerSec', + 'BytesPerReq', + 'ReqPerSec', + 'Uptime', + 'Total_kBytes', + 'Total_Accesses' +]) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/db2/__init__.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/db2/__init__.py new file mode 100644 index 00000000..836e3e88 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/db2/__init__.py @@ -0,0 +1,2 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/db2/db2_container_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/db2/db2_container_crawler.plugin new file mode 100644 index 00000000..7639fcdb --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/db2/db2_container_crawler.plugin @@ -0,0 +1,14 @@ +[Core] +Name = application_db2_container +Module = db2_container_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = "DB2 server" + +[Options] +user = administrator user name, Default is db2inst1 +password = administrator password, Default is db2inst1-pwd +db = db name to connect, Default is sample + diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/db2/db2_container_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/db2/db2_container_crawler.py new file mode 100644 index 00000000..7843b291 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/db2/db2_container_crawler.py @@ -0,0 +1,65 @@ +import logging + +import dockercontainer +from icrawl_plugin import IContainerCrawler +from plugins.applications.db2 import db2_crawler +from utils.crawler_exceptions import CrawlError + +logger = logging.getLogger('crawlutils') + + +class DB2ContainerCrawler(IContainerCrawler): + feature_type = 'application' + feature_key = 'db2' + + def get_feature(self): + return self.feature_key + + def crawl(self, container_id=None, **kwargs): + password = "db2inst1" + user = "db2inst1-pwd" + db = "sample" + + if "password" in kwargs: + password = kwargs["password"] + + if "user" in kwargs: + user = kwargs["user"] + + if "db" in kwargs: + db = kwargs["db"] + + c = dockercontainer.DockerContainer(container_id) + + # check image name + if c.image_name.find(self.feature_key) == -1: + logger.error("%s is not %s container", + c.image_name, + self.feature_key) + raise CrawlError("%s does not have expected name for %s (name=%s)", + container_id, + self.feature_key, + c.image_name) + + # extract IP and Port information + ip = c.get_container_ip() + ports = c.get_container_ports() + + # crawl all candidate ports + for each_port in ports: + try: + metrics = db2_crawler.retrieve_metrics( + host=ip, + user=user, + password=password, + db=db, + ) + except CrawlError: + logger.error("can't find metrics endpoint at %s db %s", + ip, db) + continue + return [(self.feature_key, metrics, self.feature_type)] + + raise CrawlError("%s has no accessible endpoint for %s", + container_id, + self.feature_key) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/db2/db2_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/db2/db2_crawler.py new file mode 100644 index 00000000..24f4323b --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/db2/db2_crawler.py @@ -0,0 +1,158 @@ +import logging +from plugins.applications.db2 import feature +from utils.crawler_exceptions import CrawlError + +logger = logging.getLogger('crawlutils') + + +def retrieve_metrics(host='localhost', + user='db2inst1', password='db2inst1-pwd', + db='sample'): + import pip + pip.main(['install', 'ibm_db']) + import ibm_db_dbi + import ibm_db + + sql_list = ["SELECT db_size FROM systools.stmg_dbsize_info", + "SELECT db_capacity FROM systools.stmg_dbsize_info", + "select service_level concat ' FP'" + "concat fixpack_num from sysibmadm.env_inst_info", + "select inst_name from sysibmadm.env_inst_info", + "Select PRODUCT_NAME from sysibmadm.snapdbm", + "Select DB_NAME from sysibmadm.snapdb", + "Select SERVICE_LEVEL from sysibmadm.snapdbm", + "Select REM_CONS_IN + LOCAL_CONS from sysibmadm.snapdbm", + "Select sum(POOL_CUR_SIZE) from sysibmadm.SNAPDBM_MEMORY_POOL", + "Select TOTAL_CONS from sysibmadm.snapdb", + "Select TOTAL_LOG_USED *1. / " + "TOTAL_LOG_AVAILABLE * 100. from sysibmadm.snapdb", + "Select NUM_INDOUBT_TRANS from sysibmadm.snapdb", + "Select X_LOCK_ESCALS from sysibmadm.snapdb", + "Select LOCK_ESCALS from sysibmadm.snapdb", + "Select LOCK_TIMEOUTS from sysibmadm.snapdb", + "Select DEADLOCKS from sysibmadm.snapdb", + "Select LAST_BACKUP from sysibmadm.snapdb", + "Select DB_STATUS from sysibmadm.snapdb", + "select DB2_STATUS from sysibmadm.snapdbm", + "select case POOL_INDEX_L_READS when 0 then 1 else " + "(POOL_INDEX_L_READS * 1. - POOL_INDEX_P_READS * 1.) / " + "POOL_INDEX_L_READS end * 100. from sysibmadm.snapdb", + "select case POOL_DATA_L_READS when 0 then 1 else " + "(POOL_DATA_L_READS * 1. - POOL_DATA_P_READS * 1.) / " + "POOL_DATA_L_READS end *100. from sysibmadm.snapdb", + "select case TOTAL_SORTS when 0 then 0 else SORT_OVERFLOWS " + "*1. / TOTAL_SORTS *1. end * 100. from sysibmadm.snapdb", + "select COALESCE(AGENTS_WAITING_TOP,0) from sysibmadm.snapdbm", + "Select ROWS_UPDATED from sysibmadm.snapdb", + "Select ROWS_INSERTED from sysibmadm.snapdb", + "Select ROWS_SELECTED from sysibmadm.snapdb", + "Select ROWS_DELETED from sysibmadm.snapdb", + "Select SELECT_SQL_STMTS from sysibmadm.snapdb", + "Select STATIC_SQL_STMTS from sysibmadm.snapdb", + "Select DYNAMIC_SQL_STMTS from sysibmadm.snapdb", + "Select ROLLBACK_SQL_STMTS from sysibmadm.snapdb", + "Select COMMIT_SQL_STMTS from sysibmadm.snapdb", + "select case POOL_TEMP_INDEX_L_READS when 0 then 1 " + "else (POOL_TEMP_INDEX_L_READS * 1. - " + "POOL_TEMP_INDEX_P_READS * 1.) / POOL_TEMP_INDEX_L_READS end " + "* 100 from sysibmadm.snapdb", + "select case POOL_TEMP_DATA_L_READS when 0 then 1 else " + "(POOL_TEMP_DATA_L_READS * 1. - POOL_TEMP_DATA_P_READS * 1.) /" + " POOL_TEMP_DATA_L_READS end * 100. from sysibmadm.snapdb" + ] + + sql_stats = ["dbSize", + "dbCapacity", + "dbVersion", + "instanceName", + "productName", + "dbName", + "serviceLevel", + "instanceConn", + "instanceUsedMem", + "dbConn", + "usedLog", + "transcationInDoubt", + "xlocksEscalation", + "locksEscalation", + "locksTimeOut", + "deadLock", + "lastBackupTime", + "dbStatus", + "instanceStatus", + "bpIndexHitRatio", + "bpDatahitRatio", + "sortsInOverflow", + "agetnsWait", + "updateRows", + "insertRows", + "selectedRows", + "deleteRows", + "selects", + "selectSQLs", + "dynamicSQLs", + "rollbacks", + "commits", + "bpTempIndexHitRatio", + "bpTempDataHitRatio" + ] + + sql_stats_list = {} + + try: + ibm_db_conn = ibm_db.connect("DATABASE=" + db + + ";HOSTNAME=" + host + + ";UID=" + user + + ";PWD="+password+";", "", "") + conn = ibm_db_dbi.Connection(ibm_db_conn) + except: + raise CrawlError("cannot connect to database," + " db: %s, host: %s ", db, host) + + c = conn.cursor() + + i = 0 + for sql in sql_list: + try: + c.execute(sql) + except: + raise CrawlError("cannot execute sql %s", sql) + sql_stats_list[sql_stats[i]] = str(c.fetchone()[0]) + i += 1 + + db2_attributes = feature.DB2Feature( + sql_stats_list["dbCapacity"], + sql_stats_list["dbVersion"], + sql_stats_list["instanceName"], + sql_stats_list["productName"], + sql_stats_list["dbName"], + sql_stats_list["serviceLevel"], + sql_stats_list["instanceConn"], + sql_stats_list["instanceUsedMem"], + sql_stats_list["dbConn"], + sql_stats_list["usedLog"], + sql_stats_list["transcationInDoubt"], + sql_stats_list["xlocksEscalation"], + sql_stats_list["locksEscalation"], + sql_stats_list["locksTimeOut"], + sql_stats_list["deadLock"], + sql_stats_list["lastBackupTime"], + sql_stats_list["dbStatus"], + sql_stats_list["instanceStatus"], + sql_stats_list["bpIndexHitRatio"], + sql_stats_list["bpDatahitRatio"], + sql_stats_list["sortsInOverflow"], + sql_stats_list["agetnsWait"], + sql_stats_list["updateRows"], + sql_stats_list["insertRows"], + sql_stats_list["selectedRows"], + sql_stats_list["deleteRows"], + sql_stats_list["selects"], + sql_stats_list["selectSQLs"], + sql_stats_list["dynamicSQLs"], + sql_stats_list["rollbacks"], + sql_stats_list["commits"], + sql_stats_list["bpTempIndexHitRatio"], + sql_stats_list["bpTempDataHitRatio"] + ) + return db2_attributes diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/db2/db2_host_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/db2/db2_host_crawler.plugin new file mode 100644 index 00000000..87e917eb --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/db2/db2_host_crawler.plugin @@ -0,0 +1,14 @@ +[Core] +Name = application_db2_host +Module = db2_host_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = "DB2 server" + +[Options] +user = administrator user name, Default is db2inst1 +password = administrator password, Default is db2inst1-pwd +db = db name to connect, Default is sample + diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/db2/db2_host_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/db2/db2_host_crawler.py new file mode 100644 index 00000000..cd7f8a36 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/db2/db2_host_crawler.py @@ -0,0 +1,39 @@ +from icrawl_plugin import IHostCrawler +from plugins.applications.db2 import db2_crawler +from utils.crawler_exceptions import CrawlError +import logging + +logger = logging.getLogger('crawlutils') + + +class DB2HostCrawler(IHostCrawler): + feature_type = 'application' + feature_key = 'db2' + + def get_feature(self): + return self.feature_key + + def crawl(self, **options): + password = "db2inst1-pwd" + user = "db2inst1" + db = "sample" + + if "password" in options: + password = options["password"] + + if "user" in options: + user = options["user"] + + if "db" in options: + db = options["db"] + + try: + metrics = db2_crawler.retrieve_metrics( + host="localhost", + user=user, + password=password, + db=db + ) + return [(self.feature_key, metrics, self.feature_type)] + except: + raise CrawlError("cannot retrice metrics db %s", db) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/db2/feature.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/db2/feature.py new file mode 100644 index 00000000..766ea8f6 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/db2/feature.py @@ -0,0 +1,37 @@ +from collections import namedtuple + +DB2Feature = namedtuple('DB2Feature', [ + "dbCapacity", + "dbVersion", + "instanceName", + "productName", + "dbName", + "serviceLevel", + "instanceConn", + "instanceUsedMem", + "dbConn", + "usedLog", + "transcationInDoubt", + "xlocksEscalation", + "locksEscalation", + "locksTimeOut", + "deadLock", + "lastBackupTime", + "dbStatus", + "instanceStatus", + "bpIndexHitRatio", + "bpDatahitRatio", + "sortsInOverflow", + "agetnsWait", + "updateRows", + "insertRows", + "selectedRows", + "deleteRows", + "selects", + "selectSQLs", + "dynamicSQLs", + "rollbacks", + "commits", + "bpTempIndexHitRatio", + "bpTempDataHitRatio" +]) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/liberty/__init__.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/liberty/__init__.py new file mode 100644 index 00000000..836e3e88 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/liberty/__init__.py @@ -0,0 +1,2 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/liberty/feature.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/liberty/feature.py new file mode 100644 index 00000000..0cf6c75c --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/liberty/feature.py @@ -0,0 +1,45 @@ +from collections import namedtuple + +LibertyServletFeature = namedtuple('LibertyServletFeature', [ + 'name', + 'appName', + 'reqCount', + 'responseMean', + 'responseMax', + 'responseMin' + ]) + +LibertyJVMFeature = namedtuple('LibertyJVMFeature', [ + 'heap', + 'freeMemory', + 'usedMemory', + 'processCPU', + 'gcCount', + 'gcTime', + 'upTime' + ]) + +LibertyThreadFeature = namedtuple('LibertyThreadFeature', [ + 'activeThreads', + 'poolSize', + 'poolName' + ]) + +LibertySessionFeature = namedtuple('LibertySessionFeature', [ + 'name', + 'createCount', + 'liveCount', + 'activeCount', + 'invalidatedCount', + 'invalidatedCountByTimeout', + ]) + +LibertyMongoConnectionFeature = namedtuple('LibertyMongoConnectionFeature', [ + 'checkedOutCount', + 'waitQueueSize', + 'maxSize', + 'minSize', + 'host', + 'port', + 'size', + ]) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/liberty/liberty_container_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/liberty/liberty_container_crawler.plugin new file mode 100644 index 00000000..3df968c3 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/liberty/liberty_container_crawler.plugin @@ -0,0 +1,13 @@ +[Core] +Name = application_liberty_container +Module = liberty_container_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = Liberty crawling function for containers on the host + +[Options] +user = administrator user name, Default is user +password = administrator password, Default is password + diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/liberty/liberty_container_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/liberty/liberty_container_crawler.py new file mode 100644 index 00000000..098ec01a --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/liberty/liberty_container_crawler.py @@ -0,0 +1,57 @@ +import logging + +import dockercontainer +from icrawl_plugin import IContainerCrawler +from plugins.applications.liberty import liberty_crawler +from utils.crawler_exceptions import CrawlError + +logger = logging.getLogger('crawlutils') + + +class LibertyContainerCrawler(IContainerCrawler): + feature_type = 'application' + feature_key = 'liberty' + default_port = 9443 + + def get_feature(self): + return self.feature_key + + def crawl(self, container_id=None, **kwargs): + password = "password" + user = "user" + + if "password" in kwargs: + password = kwargs["password"] + + if "user" in kwargs: + user = kwargs["user"] + + c = dockercontainer.DockerContainer(container_id) + + # check image name + if c.image_name.find(self.feature_key) == -1: + logger.error("%s is not %s container", + c.image_name, + self.feature_key) + raise CrawlError("%s does not have expected name for %s (name=%s)", + container_id, + self.feature_key, + c.image_name) + + # extract IP and Port information + ip = c.get_container_ip() + ports = c.get_container_ports() + + # crawl all candidate ports + for each_port in ports: + return liberty_crawler.retrieve_metrics( + host=ip, + port=each_port, + user=user, + password=password, + feature_type=self.feature_type + ) + + raise CrawlError("%s has no accessible endpoint for %s", + container_id, + self.feature_key) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/liberty/liberty_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/liberty/liberty_crawler.py new file mode 100644 index 00000000..f2a58425 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/liberty/liberty_crawler.py @@ -0,0 +1,243 @@ +import urllib2 +import ssl +import json +import re +from plugins.applications.liberty import feature +from utils.crawler_exceptions import CrawlError + + +def retrieve_status_page(user, password, url): + + try: + ssl._create_unverified_context + except AttributeError: + pass + else: + ssl._create_default_https_context = ssl._create_unverified_context + + password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm() + password_mgr.add_password(None, url, user, password) + handler = urllib2.HTTPBasicAuthHandler(password_mgr) + opener = urllib2.build_opener(handler) + urllib2.install_opener(opener) + + req = urllib2.Request(url) + try: + response = urllib2.urlopen(req) + return response.read() + except Exception: + raise CrawlError("can't access to http://%s", url) + + +def get_url(json_array, className): + urllist = [] + + for each_json in json_array: + if each_json.get("className") == className: + urllist.append(each_json.get("URL")) + + return urllist + + +def get_url_and_name(json_array, className): + url_name_list = [] + r = re.compile("name=(.+)") + for each_json in json_array: + + if each_json.get("className") == className: + m = r.search(each_json.get("objectName")) + if m: + url_name_list.append([each_json.get("URL"), m.group(1)]) + else: + url_name_list.append([each_json.get("URL"), 0]) + + return url_name_list + + +def servlet_get_url(attribute_array, name): + for attribute in attribute_array: + if attribute.get("name") == name: + return attribute.get("URL") + + +def get_servlet_stats(base_url, url, user, password): + monitor_status = json.loads(retrieve_status_page( + user, password, base_url+url)) + serv_stats = {} + + attribute_array = monitor_status.get("attributes") + servlet_url = servlet_get_url(attribute_array, "ResponseTimeDetails") + servlet_status = json.loads(retrieve_status_page( + user, password, base_url+servlet_url)) + + serv_stats["reqCount"] = servlet_status.get("value").get("count") + serv_stats["responseMean"] = servlet_status.get("value").get("mean") + serv_stats["responseMax"] = servlet_status.get("value").get("maximumValue") + serv_stats["responseMin"] = servlet_status.get("value").get("minimumValue") + + servlet_url = servlet_get_url(attribute_array, "ServletName") + servlet_status = json.loads(retrieve_status_page( + user, password, base_url + servlet_url)) + serv_stats["name"] = servlet_status.get("value") + + servlet_url = servlet_get_url(attribute_array, "AppName") + servlet_status = json.loads(retrieve_status_page( + user, password, base_url + servlet_url)) + serv_stats["appName"] = servlet_status.get("value") + return serv_stats + + +def get_jvm_stats(base_url, url, user, password): + monitor_status = json.loads(retrieve_status_page( + user, password, base_url+url)) + jvm_stats = {} + + attribute_array = monitor_status.get("attributes") + stats_name_array = ["Heap", "FreeMemory", "UsedMemory", + "ProcessCPU", "GcCount", "GcTime", "UpTime"] + for stat_name in stats_name_array: + jvm_url = servlet_get_url(attribute_array, stat_name) + jvm_status = json.loads(retrieve_status_page( + user, password, base_url+jvm_url)) + jvm_stats[stat_name] = jvm_status.get("value") + + return jvm_stats + + +def get_thread_stats(base_url, url, user, password): + monitor_status = json.loads(retrieve_status_page( + user, password, base_url+url)) + thread_stats = {} + + attribute_array = monitor_status.get("attributes") + stats_name_array = ["ActiveThreads", "PoolSize", "PoolName"] + for stat_name in stats_name_array: + thread_url = servlet_get_url(attribute_array, stat_name) + thread_status = json.loads(retrieve_status_page( + user, password, base_url+thread_url)) + thread_stats[stat_name] = thread_status.get("value") + + return thread_stats + + +def get_session_stats(base_url, url, user, password): + monitor_status = json.loads(retrieve_status_page( + user, password, base_url+url)) + session_stats = {} + + attribute_array = monitor_status.get("attributes") + session_name_array = ["CreateCount", "LiveCount", "ActiveCount", + "InvalidatedCount", "InvalidatedCountbyTimeout"] + for stat_name in session_name_array: + session_url = servlet_get_url(attribute_array, stat_name) + session_status = json.loads(retrieve_status_page( + user, password, base_url+session_url)) + session_stats[stat_name] = session_status.get("value") + + return session_stats + + +def get_mongo_connection_stats(base_url, url, user, password): + monitor_status = json.loads(retrieve_status_page( + user, password, base_url+url)) + connection_stats = {} + + attribute_array = monitor_status.get("attributes") + connection_name_array = ["CheckedOutCount", "WaitQueueSize", + "MinSize", "MaxSize", "Size", "Host", "Port"] + for stat_name in connection_name_array: + connection_url = servlet_get_url(attribute_array, stat_name) + connection_status = json.loads(retrieve_status_page( + user, password, base_url+connection_url)) + connection_stats[stat_name] = connection_status.get("value") + + return connection_stats + + +def retrieve_metrics(host='localhost', port=9443, + user='user', password='password', + feature_type='application'): + url = "https://%s:%s/IBMJMXConnectorREST/mbeans/" % (host, port) + + status = retrieve_status_page(user, password, url) + json_obj = json.loads(status) + base_url = "https://%s:%s" % (host, port) + + mbeans_url_array = get_url(json_obj, + "com.ibm.ws.webcontainer.monitor.ServletStats") + for url in mbeans_url_array: + serv_stats = get_servlet_stats(base_url, url, user, password) + servlet_attributes = feature.LibertyServletFeature( + serv_stats.get("name"), + serv_stats.get("appName"), + serv_stats.get("reqCount"), + serv_stats.get("responseMean"), + serv_stats.get("responseMax"), + serv_stats.get("responseMin") + ) + yield ('liberty_servlet_status', servlet_attributes, feature_type) + + mbeans_url_array = get_url(json_obj, "com.ibm.ws.monitors.helper.JvmStats") + + for url in mbeans_url_array: + jvm_stats = get_jvm_stats(base_url, url, user, password) + jvm_attributes = feature.LibertyJVMFeature( + jvm_stats.get("Heap"), + jvm_stats.get("FreeMemory"), + jvm_stats.get("UsedMemory"), + jvm_stats.get("ProcessCPU"), + jvm_stats.get("GcCount"), + jvm_stats.get("GcTime"), + jvm_stats.get("UpTime") + ) + yield ('liberty_jvm_status', jvm_attributes, feature_type) + + mbeans_url_array = get_url(json_obj, + "com.ibm.ws.monitors.helper.ThreadPoolStats") + + for url in mbeans_url_array: + thread_stats = get_thread_stats(base_url, url, user, password) + thread_attributes = feature.LibertyThreadFeature( + thread_stats.get("ActiveThreads"), + thread_stats.get("PoolSize"), + thread_stats.get("PoolName") + ) + yield ('liberty_thread_status', thread_attributes, feature_type) + + mbeans_url_name_array = get_url_and_name(json_obj, + "com.ibm.ws.session.monitor" + ".SessionStats") + + for url_name in mbeans_url_name_array: + session_stats = get_session_stats(base_url, + url_name[0], user, password) + session_attributes = feature.LibertySessionFeature( + url_name[1], + session_stats.get("CreateCount"), + session_stats.get("LiveCount"), + session_stats.get("ActiveCount"), + session_stats.get("InvalidatedCount"), + session_stats.get("InvalidatedCountbyTimeout"), + ) + yield ('liberty_session_status', session_attributes, feature_type) + + mbeans_url_name_array = get_url_and_name(json_obj, + "com.mongodb.management" + ".ConnectionPoolStatistics") + + for url_name in mbeans_url_name_array: + connection_stats = get_mongo_connection_stats(base_url, + url_name[0], + user, password) + + connection_attributes = feature.LibertyMongoConnectionFeature( + connection_stats.get("CheckedOutCount"), + connection_stats.get("WaitQueueSize"), + connection_stats.get("MaxSize"), + connection_stats.get("MinSize"), + connection_stats.get("Host"), + connection_stats.get("Port"), + connection_stats.get("Size") + ) + yield ('liberty_mongo_connection_status', + connection_attributes, feature_type) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/liberty/liberty_host_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/liberty/liberty_host_crawler.plugin new file mode 100644 index 00000000..a5419ec8 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/liberty/liberty_host_crawler.plugin @@ -0,0 +1,13 @@ +[Core] +Name = application_liberty_host +Module = liberty_host_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = "WAS liberty server" + +[Options] +user = administrator user name, Default is user +password = administrator password, Default is password + diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/liberty/liberty_host_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/liberty/liberty_host_crawler.py new file mode 100644 index 00000000..136c1506 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/liberty/liberty_host_crawler.py @@ -0,0 +1,32 @@ +from icrawl_plugin import IHostCrawler +from plugins.applications.liberty import liberty_crawler +import logging + +logger = logging.getLogger('crawlutils') + + +class LibertyHostCrawler(IHostCrawler): + feature_type = 'application' + feature_key = 'liberty' + default_port = 9443 + + def get_feature(self): + return self.feature_key + + def crawl(self, **options): + password = "password" + user = "user" + + if "password" in options: + password = options["password"] + + if "user" in options: + user = options["user"] + + return liberty_crawler.retrieve_metrics( + host='localhost', + port=self.default_port, + user=user, + password=password, + feature_type=self.feature_type + ) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/nginx/__init__.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/nginx/__init__.py new file mode 100644 index 00000000..836e3e88 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/nginx/__init__.py @@ -0,0 +1,2 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/nginx/feature.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/nginx/feature.py new file mode 100644 index 00000000..16b72ec0 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/nginx/feature.py @@ -0,0 +1,22 @@ +from collections import namedtuple + + +def get_feature(match1, match2, match3): + feature_attributes = NginxFeature( + int(match1.group(1)), + int(match2.group(1)), + int(match2.group(3)), + int(match3.group(1)), + int(match3.group(2)), + int(match3.group(3)) + ) + return feature_attributes + +NginxFeature = namedtuple('NginxFeature', [ + 'Connections', + 'Accepted', + 'Requests', + 'Reading', + 'Writing', + 'Waiting' +]) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/nginx/nginx_container_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/nginx/nginx_container_crawler.plugin new file mode 100644 index 00000000..cbe9ab0a --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/nginx/nginx_container_crawler.plugin @@ -0,0 +1,9 @@ +[Core] +Name = application_nginx_container +Module = nginx_container_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = "nginx httpd server" + diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/nginx/nginx_container_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/nginx/nginx_container_crawler.py new file mode 100644 index 00000000..006ffab4 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/nginx/nginx_container_crawler.py @@ -0,0 +1,48 @@ +import logging + +import dockercontainer +from icrawl_plugin import IContainerCrawler +from plugins.applications.nginx import nginx_crawler +from utils.crawler_exceptions import CrawlError + +logger = logging.getLogger('crawlutils') + + +class NginxContainerCrawler(IContainerCrawler): + feature_type = 'application' + feature_key = 'nginx' + + def get_feature(self): + return self.feature_key + + def crawl(self, container_id=None, **kwargs): + c = dockercontainer.DockerContainer(container_id) + + # check image name + if c.image_name.find(self.feature_key) == -1: + logger.error("%s is not %s container", + c.image_name, + self.feature_key) + raise CrawlError("%s does not have expected name for %s (name=%s)", + container_id, + self.feature_key, + c.image_name) + + # extract IP and Port information + ip = c.get_container_ip() + ports = c.get_container_ports() + + # crawl all candidate ports + for port in ports: + try: + metrics = nginx_crawler.retrieve_metrics(ip, port) + except CrawlError: + logger.error("can't find metrics endpoint at http://%s:%s", + ip, + port) + continue + return [(self.feature_key, metrics, self.feature_type)] + + raise CrawlError("%s has no accessible endpoint for %s", + container_id, + self.feature_key) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/nginx/nginx_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/nginx/nginx_crawler.py new file mode 100644 index 00000000..b4e0536a --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/nginx/nginx_crawler.py @@ -0,0 +1,34 @@ +import re +import urllib2 + +from plugins.applications.nginx import feature +from utils.crawler_exceptions import CrawlError + + +def retrieve_status_page(host, port): + status_page = "http://%s:%s/nginx_status" % (host, port) + req = urllib2.Request(status_page) + response = urllib2.urlopen(req) + return response.read() + + +def retrieve_metrics(host='localhost', port=80): + try: + status = retrieve_status_page(host, port) + except Exception: + raise CrawlError("can't access to http://%s:%s", + host, port) + + match1 = re.search(r'Active connections:\s+(\d+)', status) + match2 = re.search(r'\s*(\d+)\s+(\d+)\s+(\d+)', status) + match3 = re.search(r'Reading:\s*(\d+)\s*Writing:\s*(\d+)\s*' + 'Waiting:\s*(\d+)', status) + + try: + feature_attributes = feature.get_feature( + match1, + match2, + match3) + return feature_attributes + except Exception: + raise CrawlError("failure to parse http://%s:%s", host, port) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/nginx/nginx_host_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/nginx/nginx_host_crawler.plugin new file mode 100644 index 00000000..0bb1d099 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/nginx/nginx_host_crawler.plugin @@ -0,0 +1,9 @@ +[Core] +Name = application_nginx_host +Module = nginx_host_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = "nginx httpd server" + diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/nginx/nginx_host_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/nginx/nginx_host_crawler.py new file mode 100644 index 00000000..ce237e92 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/nginx/nginx_host_crawler.py @@ -0,0 +1,21 @@ +from icrawl_plugin import IHostCrawler +from plugins.applications.nginx import nginx_crawler +import logging + +logger = logging.getLogger('crawlutils') + + +class NginxHostCrawler(IHostCrawler): + feature_type = 'application' + feature_key = 'nginx' + default_port = 80 + + def get_feature(self): + return self.feature_key + + def crawl(self): + metrics = nginx_crawler.retrieve_metrics( + host='localhost', + port=self.default_port + ) + return [(self.feature_key, metrics, self.feature_type)] diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/redis/__init__.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/redis/__init__.py new file mode 100644 index 00000000..836e3e88 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/redis/__init__.py @@ -0,0 +1,2 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/redis/feature.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/redis/feature.py new file mode 100644 index 00000000..a3e263fc --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/redis/feature.py @@ -0,0 +1,174 @@ +from collections import namedtuple + + +def create_feature(metrics): + fields = RedisFeature._fields + + for field_name in fields: + if field_name not in metrics: + metrics[field_name] = "" + + feature_attributes = RedisFeature( + metrics['aof_current_rewrite_time_sec'], + metrics['aof_enabled'], + metrics['aof_last_bgrewrite_status'], + metrics['aof_last_rewrite_time_sec'], + metrics['aof_last_write_status'], + metrics['aof_rewrite_in_progress'], + metrics['aof_rewrite_scheduled'], + metrics['arch_bits'], + metrics['blocked_clients'], + metrics['client_biggest_input_buf'], + metrics['client_longest_output_list'], + metrics['cluster_enabled'], + metrics['config_file'], + metrics['connected_clients'], + metrics['connected_slaves'], + metrics['evicted_keys'], + metrics['executable'], + metrics['expired_keys'], + metrics['gcc_version'], + metrics['hz'], + metrics['instantaneous_input_kbps'], + metrics['instantaneous_ops_per_sec'], + metrics['instantaneous_output_kbps'], + metrics['keyspace_hits'], + metrics['keyspace_misses'], + metrics['latest_fork_usec'], + metrics['loading'], + metrics['lru_clock'], + metrics['master_repl_offset'], + metrics['maxmemory'], + metrics['maxmemory_human'], + metrics['maxmemory_policy'], + metrics['mem_allocator'], + metrics['mem_fragmentation_ratio'], + metrics['migrate_cached_sockets'], + metrics['multiplexing_api'], + metrics['os'], + metrics['process_id'], + metrics['pubsub_channels'], + metrics['pubsub_patterns'], + metrics['rdb_bgsave_in_progress'], + metrics['rdb_changes_since_last_save'], + metrics['rdb_current_bgsave_time_sec'], + metrics['rdb_last_bgsave_status'], + metrics['rdb_last_bgsave_time_sec'], + metrics['rdb_last_save_time'], + metrics['redis_build_id'], + metrics['redis_git_dirty'], + metrics['redis_git_sha1'], + metrics['redis_mode'], + metrics['redis_version'], + metrics['rejected_connections'], + metrics['repl_backlog_active'], + metrics['repl_backlog_first_byte_offset'], + metrics['repl_backlog_histlen'], + metrics['repl_backlog_size'], + metrics['role'], + metrics['run_id'], + metrics['sync_full'], + metrics['sync_partial_err'], + metrics['sync_partial_ok'], + metrics['tcp_port'], + metrics['total_commands_processed'], + metrics['total_connections_received'], + metrics['total_net_input_bytes'], + metrics['total_net_output_bytes'], + metrics['total_system_memory'], + metrics['total_system_memory_human'], + metrics['uptime_in_days'], + metrics['uptime_in_seconds'], + metrics['used_cpu_sys'], + metrics['used_cpu_sys_children'], + metrics['used_cpu_user'], + metrics['used_cpu_user_children'], + metrics['used_memory'], + metrics['used_memory_peak'], + metrics['used_memory_peak_human'], + metrics['used_memory_rss'], + metrics['used_memory_rss_human'] + ) + return feature_attributes + +RedisFeature = namedtuple('RedisFeature', [ + 'aof_current_rewrite_time_sec', + 'aof_enabled', + 'aof_last_bgrewrite_status', + 'aof_last_rewrite_time_sec', + 'aof_last_write_status', + 'aof_rewrite_in_progress', + 'aof_rewrite_scheduled', + 'arch_bits', + 'blocked_clients', + 'client_biggest_input_buf', + 'client_longest_output_list', + 'cluster_enabled', + 'config_file', + 'connected_clients', + 'connected_slaves', + 'evicted_keys', + 'executable', + 'expired_keys', + 'gcc_version', + 'hz', + 'instantaneous_input_kbps', + 'instantaneous_ops_per_sec', + 'instantaneous_output_kbps', + 'keyspace_hits', + 'keyspace_misses', + 'latest_fork_usec', + 'loading', + 'lru_clock', + 'master_repl_offset', + 'maxmemory', + 'maxmemory_human', + 'maxmemory_policy', + 'mem_allocator', + 'mem_fragmentation_ratio', + 'migrate_cached_sockets', + 'multiplexing_api', + 'os', + 'process_id', + 'pubsub_channels', + 'pubsub_patterns', + 'rdb_bgsave_in_progress', + 'rdb_changes_since_last_save', + 'rdb_current_bgsave_time_sec', + 'rdb_last_bgsave_status', + 'rdb_last_bgsave_time_sec', + 'rdb_last_save_time', + 'redis_build_id', + 'redis_git_dirty', + 'redis_git_sha1', + 'redis_mode', + 'redis_version', + 'rejected_connections', + 'repl_backlog_active', + 'repl_backlog_first_byte_offset', + 'repl_backlog_histlen', + 'repl_backlog_size', + 'role', + 'run_id', + 'sync_full', + 'sync_partial_err', + 'sync_partial_ok', + 'tcp_port', + 'total_commands_processed', + 'total_connections_received', + 'total_net_input_bytes', + 'total_net_output_bytes', + 'total_system_memory', + 'total_system_memory_human', + 'uptime_in_days', + 'uptime_in_seconds', + 'used_cpu_sys', + 'used_cpu_sys_children', + 'used_cpu_user', + 'used_cpu_user_children', + 'used_memory', + 'used_memory_peak', + 'used_memory_peak_human', + 'used_memory_rss', + 'used_memory_rss_human', +]) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/redis/redis_container_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/redis/redis_container_crawler.plugin new file mode 100644 index 00000000..51aba1af --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/redis/redis_container_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = application_redis_container +Module = redis_container_crawler + +[Documentation] +Author = Tatsuhiro Chiba (chiba@jp.ibm.com) +Version = 0.1 +Description = Redis crawling function for containers on the host diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/redis/redis_container_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/redis/redis_container_crawler.py new file mode 100644 index 00000000..f5d5314d --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/redis/redis_container_crawler.py @@ -0,0 +1,57 @@ +from icrawl_plugin import IContainerCrawler +from plugins.applications.redis import feature +import dockercontainer +from requests.exceptions import ConnectionError +import logging + + +logger = logging.getLogger('crawlutils') + + +class RedisContainerCrawler(IContainerCrawler): + ''' + Crawling app provided metrics for redis container on docker. + Usually redis listens on port 6379. + ''' + + feature_type = "application" + feature_key = "redis" + default_port = 6379 + + def get_feature(self): + return self.feature_key + + def crawl(self, container_id=None, **kwargs): + + import pip + pip.main(['install', 'redis']) + import redis + + # only crawl redis container. Otherwise, quit. + c = dockercontainer.DockerContainer(container_id) + if c.image_name.find(self.feature_key) == -1: + logger.debug("%s is not %s container" % + (c.image_name, self.feature_key)) + raise NameError("this is not target crawl container") + + # extract IP and Port information + ip = c.get_container_ip() + ports = c.get_container_ports() + + # set default port number + if len(ports) == 0: + ports.append(self.default_port) + + # query to all available ports + for port in ports: + client = redis.Redis(host=ip, port=port) + try: + metrics = client.info() + except ConnectionError: + logger.info("redis does not listen on port:%d", port) + continue + feature_attributes = feature.create_feature(metrics) + return [(self.feature_key, feature_attributes, self.feature_type)] + + # any ports are not available + raise ConnectionError("no listen ports") diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/redis/redis_host_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/redis/redis_host_crawler.plugin new file mode 100644 index 00000000..c9e55de7 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/redis/redis_host_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = application_redis_host +Module = redis_host_crawler + +[Documentation] +Author = Tatsuhiro Chiba (chiba@jp.ibm.com) +Version = 0.1 +Description = Redis crawling function for the local host diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/redis/redis_host_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/redis/redis_host_crawler.py new file mode 100644 index 00000000..697d24c1 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/redis/redis_host_crawler.py @@ -0,0 +1,37 @@ +from icrawl_plugin import IHostCrawler +from plugins.applications.redis import feature +from requests.exceptions import ConnectionError +import logging + +logger = logging.getLogger('crawlutils') + + +class RedisHostCrawler(IHostCrawler): + ''' + Crawling app provided metrics for redis on host. + Usually redis listens on port 6379. + ''' + + feature_type = "application" + feature_key = "redis" + default_port = 6379 + + def get_feature(self): + return self.feature_key + + # TODO: prepare an useful way to set host/port + def crawl(self, root_dir='/', **kwargs): + import pip + pip.main(['install', 'redis']) + import redis + + try: + client = redis.Redis(host='localhost', port=self.default_port) + metrics = client.info() + except ConnectionError: + logger.info("redis does not listen on port:%d", self.default_port) + raise ConnectionError("no listen at %d", self.default_port) + + feature_attributes = feature.create_feature(metrics) + + return [(self.feature_key, feature_attributes, self.feature_type)] diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/tomcat/__init__.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/tomcat/__init__.py new file mode 100644 index 00000000..836e3e88 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/tomcat/__init__.py @@ -0,0 +1,2 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/tomcat/feature.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/tomcat/feature.py new file mode 100644 index 00000000..8fb45603 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/tomcat/feature.py @@ -0,0 +1,41 @@ +from collections import namedtuple + + +TomcatJVMFeature = namedtuple('TomcatJVMFeature', [ + 'free', + 'total', + 'max' + ]) + +TomcatMemoryFeature = namedtuple('TomcatMemoryFeature', [ + 'name', + 'type', + 'initial', + 'committed', + 'maximum', + 'used' + ]) + +TomcatConnectorFeature = namedtuple('TomcatConnectorFeature', [ + 'connector', + 'maxThread', + 'currentThread', + 'currentThreadBusy', + 'requestMaxTime', + 'processingTime', + 'requestCount', + 'errorCount', + 'byteReceived', + 'byteSent' + ]) + +TomcatWorkerFeature = namedtuple('TomcatWorkerFeature', [ + 'connector', + 'stage', + 'time', + 'byteSent', + 'byteReceived', + 'client', + 'vhost', + 'request' + ]) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/tomcat/tomcat_container_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/tomcat/tomcat_container_crawler.plugin new file mode 100644 index 00000000..571695b6 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/tomcat/tomcat_container_crawler.plugin @@ -0,0 +1,12 @@ +[Core] +Name = application_tomcat_container +Module = tomcat_container_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = Tomcat crawling function for containers on the host + +[Options] +user = administrator user name, Default is tomcat +password = administrator password, Default is password diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/tomcat/tomcat_container_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/tomcat/tomcat_container_crawler.py new file mode 100644 index 00000000..69fea3d0 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/tomcat/tomcat_container_crawler.py @@ -0,0 +1,57 @@ +import logging + +import dockercontainer +from icrawl_plugin import IContainerCrawler +from plugins.applications.tomcat import tomcat_crawler +from utils.crawler_exceptions import CrawlError + +logger = logging.getLogger('crawlutils') + + +class TomcatContainerCrawler(IContainerCrawler): + feature_type = 'application' + feature_key = 'tomcat' + default_port = 8080 + + def get_feature(self): + return self.feature_key + + def crawl(self, container_id=None, **kwargs): + password = "password" + user = "tomcat" + + if "password" in kwargs: + password = kwargs["password"] + + if "user" in kwargs: + user = kwargs["user"] + + c = dockercontainer.DockerContainer(container_id) + + # check image name + if c.image_name.find(self.feature_key) == -1: + logger.error("%s is not %s container", + c.image_name, + self.feature_key) + raise CrawlError("%s does not have expected name for %s (name=%s)", + container_id, + self.feature_key, + c.image_name) + + # extract IP and Port information + ip = c.get_container_ip() + ports = c.get_container_ports() + + # crawl all candidate ports + for each_port in ports: + return tomcat_crawler.retrieve_metrics( + host=ip, + port=each_port, + user=user, + password=password, + feature_type=self.feature_type + ) + + raise CrawlError("%s has no accessible endpoint for %s", + container_id, + self.feature_key) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/tomcat/tomcat_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/tomcat/tomcat_crawler.py new file mode 100644 index 00000000..9511ba70 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/tomcat/tomcat_crawler.py @@ -0,0 +1,82 @@ +import urllib2 +from plugins.applications.tomcat import feature +from xml.etree import ElementTree +from utils.crawler_exceptions import CrawlError + + +def retrieve_status_page(hostname, port, user, password): + statusPage = "http://%s:%s/manager/status?XML=true" % (hostname, port) + + password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm() + password_mgr.add_password(None, statusPage, user, password) + handler = urllib2.HTTPBasicAuthHandler(password_mgr) + opener = urllib2.build_opener(handler) + urllib2.install_opener(opener) + + req = urllib2.Request(statusPage) + try: + response = urllib2.urlopen(req) + return response.read() + except Exception: + raise CrawlError("can't access to http://%s:%s", + hostname, port) + + +def retrieve_metrics(host='localhost', port=8080, + user='tomcat', password='password', + feature_type='application'): + + status = retrieve_status_page(host, port, user, password) + tree = ElementTree.XML(status) + memoryNode = tree.find('jvm/memory') + jvm_attributes = feature.TomcatJVMFeature( + memoryNode.get("free"), + memoryNode.get("total"), + memoryNode.get("max") + ) + + yield('tomcat_jvm', jvm_attributes, feature_type) + + for node in tree.iter('memorypool'): + memory_pool_attributes = feature.TomcatMemoryFeature( + node.get("name"), + node.get("type"), + node.get("usageInit"), + node.get("usageCommitted"), + node.get("usageMax"), + node.get("usageUsed") + ) + yield('tomcat_memory', memory_pool_attributes, feature_type) + + ConnectorNode = tree.iter('connector') + for node in ConnectorNode: + threadInfo = node.find("threadInfo") + reqInfo = node.find("requestInfo") + + connector_feature_attributes = feature.TomcatConnectorFeature( + node.get("name"), + threadInfo.get("maxThreads"), + threadInfo.get("currentThreadCount"), + threadInfo.get("currentThreadsBusy"), + reqInfo.get("maxTime"), + reqInfo.get("processingTime"), + reqInfo.get("requestCount"), + reqInfo.get("errorCount"), + reqInfo.get("bytesReceived"), + reqInfo.get("bytesSent") + ) + yield('tomcat_connector', connector_feature_attributes, feature_type) + + workNode = node.iter("worker") + for work in workNode: + worker_feature_attributes = feature.TomcatWorkerFeature( + node.get("name"), + work.get("stage"), + work.get("requestProcessingTime"), + work.get("requestBytesSent"), + work.get("requestBytesReceived"), + work.get("remoteAddr"), + work.get("virtualHost"), + work.get("currentUri") + ) + yield('tomcat_worker', worker_feature_attributes, feature_type) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/tomcat/tomcat_host_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/tomcat/tomcat_host_crawler.plugin new file mode 100644 index 00000000..3869c3f5 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/tomcat/tomcat_host_crawler.plugin @@ -0,0 +1,12 @@ +[Core] +Name = application_tomcat_host +Module = tomcat_host_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = "Apache httpd server" + +[Options] +user = administrator user name, Default is tomcat +password = administrator password, Default is password diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/tomcat/tomcat_host_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/tomcat/tomcat_host_crawler.py new file mode 100644 index 00000000..3091971e --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/applications/tomcat/tomcat_host_crawler.py @@ -0,0 +1,32 @@ +from icrawl_plugin import IHostCrawler +from plugins.applications.tomcat import tomcat_crawler +import logging + +logger = logging.getLogger('crawlutils') + + +class TomcatHostCrawler(IHostCrawler): + feature_type = 'application' + feature_key = 'tomcat' + default_port = 8080 + + def get_feature(self): + return self.feature_key + + def crawl(self, **options): + password = "password" + user = "tomcat" + + if "password" in options: + password = options["password"] + + if "user" in options: + user = options["user"] + + return tomcat_crawler.retrieve_metrics( + host='localhost', + port=self.default_port, + user=user, + password=password, + feature_type=self.feature_type + ) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/__init__.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/base_emitter.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/base_emitter.py new file mode 100644 index 00000000..ab65ba6f --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/base_emitter.py @@ -0,0 +1,27 @@ + +class BaseEmitter: + """ + Base emitter class from which emitters like FileEmitter, StdoutEmitter + should inherit. The main idea is that all emitters get a url, and should + implement an emit() function given an iostream (a buffer with the features + to emit). + """ + + def __init__(self, url, timeout=1, max_retries=5, + emit_per_line=False): + self.url = url + self.timeout = timeout + self.max_retries = max_retries + self.emit_per_line = emit_per_line + + def emit(self, iostream, compress=False, + metadata={}, snapshot_num=0): + """ + + :param iostream: a CStringIO used to buffer the formatted features. + :param compress: + :param metadata: + :param snapshot_num: + :return: + """ + pass diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/base_http_emitter.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/base_http_emitter.py new file mode 100644 index 00000000..a6f142e2 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/base_http_emitter.py @@ -0,0 +1,80 @@ +import logging +import time + +import requests + +from iemit_plugin import IEmitter + +logger = logging.getLogger('crawlutils') + + +class BaseHttpEmitter: + """ + Base emitter class for HTTP/HTTPS protocol. + HTTP/HTTPS emitter inherit init(), emit() and post() function. + They should implement get_emitter_protocol(). + """ + + def get_emitter_protocol(self): + raise NotImplementedError("Use http or https emitter plugin instead.") + + def init(self, url, timeout=1, max_retries=5, emit_format='csv'): + IEmitter.init(self, url, + timeout=timeout, + max_retries=max_retries, + emit_format=emit_format) + if emit_format == 'json': + self.emit_per_line = True + + def emit(self, frame, compress=False, + metadata={}, snapshot_num=0, **kwargs): + """ + + :param frame: a frame containing extracted features + :param compress: + :param metadata: + :param snapshot_num: + :return: None + """ + iostream = self.format(frame) + if compress: + proto = self.get_emitter_protocol() + raise NotImplementedError( + '%s emitter does not support gzip.' % proto + ) + if self.emit_per_line: + iostream.seek(0) + for line in iostream.readlines(): + self.post(line, metadata) + else: + self.post(iostream.getvalue(), metadata) + + def post(self, content='', metadata={}): + headers = {'content-type': 'application/csv'} + params = {} + for attempt in range(self.max_retries): + try: + response = requests.post(self.url, headers=headers, + params=params, + data=content) + except requests.exceptions.ChunkedEncodingError as e: + logger.exception(e) + logger.error( + "POST to %s resulted in exception (attempt %d of %d), " + "Exiting." % (self.url, attempt + 1, self.max_retries)) + break + except requests.exceptions.RequestException as e: + logger.exception(e) + logger.error( + "POST to %s resulted in exception (attempt %d of %d)" % + (self.url, attempt + 1, self.max_retries)) + time.sleep(2.0 ** attempt * 0.1) + continue + if response.status_code != requests.codes.ok: + logger.error("POST to %s resulted in status code %s: %s " + "(attempt %d of %d)" % + (self.url, str(response.status_code), + response.text, attempt + 1, self.max_retries)) + time.sleep(2.0 ** attempt * 0.1) + else: + break diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/file_emitter.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/file_emitter.plugin new file mode 100644 index 00000000..fbb901bd --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/file_emitter.plugin @@ -0,0 +1,8 @@ +[Core] +Name = File Emitter +Module = file_emitter + +[Documentation] +Author = IBM +Version = 0.1 +Description = Plugin to emit frame to file diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/file_emitter.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/file_emitter.py new file mode 100644 index 00000000..85cb0988 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/file_emitter.py @@ -0,0 +1,46 @@ +import gzip +import shutil + +from iemit_plugin import IEmitter + + +class FileEmitter(IEmitter): + + """ + Emitter to file. This creates one file per frame. The file names + are the ones in the url. For example: for file:///tmp/a the file for + the first frame would be /tmp/a.0 for a host, and /tmp/a.xyz.0 for a + container with id xyz. + """ + + def get_emitter_protocol(self): + return 'file' + + def emit(self, frame, compress=False, + metadata={}, snapshot_num=0, **kwargs): + """ + + :param iostream: a CStringIO used to buffer the formatted features. + :param compress: + :param metadata: + :param snapshot_num: + :return: + """ + iostream = self.format(frame) + output_path = self.url[len('file://'):] + short_name = metadata.get('emit_shortname', None) + if not short_name: + file_suffix = str(snapshot_num) + else: + file_suffix = '{0}.{1}'.format(short_name, snapshot_num) + output_path = '{0}.{1}'.format(output_path, file_suffix) + output_path += '.gz' if compress else '' + + with open(output_path, 'w') as fd: + if compress: + gzip_file = gzip.GzipFile(fileobj=fd, mode='w') + gzip_file.write(iostream.getvalue()) + gzip_file.close() + else: + iostream.seek(0) + shutil.copyfileobj(iostream, fd) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/fluentd_emitter.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/fluentd_emitter.plugin new file mode 100644 index 00000000..9c80ca91 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/fluentd_emitter.plugin @@ -0,0 +1,8 @@ +[Core] +Name = Fluentd Emitter +Module = fluentd_emitter + +[Documentation] +Author = IBM +Version = 0.1 +Description = Plugin to emit frame to Fluentd diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/fluentd_emitter.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/fluentd_emitter.py new file mode 100644 index 00000000..1fa4ed34 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/fluentd_emitter.py @@ -0,0 +1,98 @@ +import logging + +from iemit_plugin import IEmitter +from utils.crawler_exceptions import EmitterUnsupportedFormat +from utils.misc import call_with_retries +from fluent import sender +import time + +logger = logging.getLogger('crawlutils') + + +class FluentdEmitter(IEmitter): + + def get_emitter_protocol(self): + return 'fluentd' + + def init(self, url, timeout=1, max_retries=5, emit_format='fluentd'): + self.url = url + self.timeout = timeout + self.max_retries = max_retries + self.emit_per_line = True + + if emit_format != 'json': + raise EmitterUnsupportedFormat('Not supported: %s' % emit_format) + + try: + # assumption URL fot fluentd engine is of form fuentd://IP:PORT + host, port = url[len('fluentd://'):].split(':') + except (KeyError, TypeError) as exc: + logger.warn('Can not parse the url provided.') + raise exc + + self.fluentd_sender = None + + call_with_retries(self.connect_to_fluentd_engine, + max_retries=self.max_retries, + _args=tuple((host, int(port)))) + + def connect_to_fluentd_engine(self, host, port): + self.fluentd_sender = sender.FluentSender( + 'crawler', host=host, port=port) + if self.fluentd_sender.socket is None: + raise Exception + + def get_json_item(self, frame): + yield frame.metadata + for (key, val, feature_type) in frame.data: + output = dict() + if not isinstance(val, dict): + val = val._asdict() + output['feature_type'] = feature_type + output['feature_key'] = key + output['feature_val'] = val + yield output + + def emit_frame_atonce(self, tag, timestamp, frame): + combined_dict = dict() + item_count = 0 + + for json_item in self.get_json_item(frame): + key = 'feature' + str(item_count) + combined_dict[key] = json_item + item_count += 1 + + self._emit(tag, timestamp, combined_dict) + + def _emit(self, tag, timestamp, item): + self.fluentd_sender.emit_with_time(tag, timestamp, item) + if self.fluentd_sender.last_error is not None: + self.fluentd_sender.clear_last_error() + raise Exception + + def emit(self, frame, compress=False, + metadata={}, snapshot_num=0, **kwargs): + """ + + :param compress: + :param metadata: + :param snapshot_num: + :return: + """ + if compress: + raise NotImplementedError('Compress not implemented.') + + tag = frame.metadata.get('namespace', '') + timestamp = frame.metadata.get('timestamp', '') + timestamp = time.mktime( + time.strptime(timestamp[:-5], '%Y-%m-%dT%H:%M:%S')) + + if self.emit_per_line: + for json_item in self.get_json_item(frame): + call_with_retries(self._emit, + max_retries=self.max_retries, + _args=tuple((tag, timestamp, json_item))) + else: + call_with_retries(self.emit_frame_atonce, + max_retries=self.max_retries, + _args=tuple((tag, timestamp, frame))) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/http_emitter.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/http_emitter.plugin new file mode 100644 index 00000000..c2d7a759 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/http_emitter.plugin @@ -0,0 +1,8 @@ +[Core] +Name = Http Emitter +Module = http_emitter + +[Documentation] +Author = IBM +Version = 0.1 +Description = Plugin to post frame data to http server diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/http_emitter.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/http_emitter.py new file mode 100644 index 00000000..58731f10 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/http_emitter.py @@ -0,0 +1,12 @@ +import logging + +from iemit_plugin import IEmitter +from plugins.emitters.base_http_emitter import BaseHttpEmitter + +logger = logging.getLogger('crawlutils') + + +class HttpEmitter(BaseHttpEmitter, IEmitter): + + def get_emitter_protocol(self): + return 'http' diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/https_emitter.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/https_emitter.plugin new file mode 100644 index 00000000..bb4a44e3 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/https_emitter.plugin @@ -0,0 +1,8 @@ +[Core] +Name = Https Emitter +Module = https_emitter + +[Documentation] +Author = IBM +Version = 0.1 +Description = Plugin to post frame data to https server diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/https_emitter.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/https_emitter.py new file mode 100644 index 00000000..d43a29fb --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/https_emitter.py @@ -0,0 +1,12 @@ +import logging + +from iemit_plugin import IEmitter +from plugins.emitters.base_http_emitter import BaseHttpEmitter + +logger = logging.getLogger('crawlutils') + + +class HttpsEmitter(BaseHttpEmitter, IEmitter): + + def get_emitter_protocol(self): + return 'https' diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/kafka_emitter.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/kafka_emitter.plugin new file mode 100644 index 00000000..e917d99c --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/kafka_emitter.plugin @@ -0,0 +1,8 @@ +[Core] +Name = Kafka Emitter +Module = kafka_emitter + +[Documentation] +Author = IBM +Version = 0.1 +Description = Plugin to emit frame over kafka diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/kafka_emitter.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/kafka_emitter.py new file mode 100644 index 00000000..b633e49a --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/kafka_emitter.py @@ -0,0 +1,71 @@ +import logging + +import kafka as kafka_python +import pykafka + +from iemit_plugin import IEmitter +from utils.misc import (NullHandler, call_with_retries) + +logger = logging.getLogger('crawlutils') +# Kafka logs too much +logging.getLogger('kafka').addHandler(NullHandler()) + + +class KafkaEmitter(IEmitter): + + def get_emitter_protocol(self): + return 'kafka' + + def init(self, url, timeout=1, max_retries=10, emit_format='csv'): + IEmitter.init(self, url, + timeout=timeout, + max_retries=max_retries, + emit_format=emit_format) + + if emit_format == 'json': + self.emit_per_line = True + + try: + broker, topic = url[len('kafka://'):].split('/') + except (KeyError, TypeError) as exc: + logger.warn('Can not parse the url provided.') + raise exc + + self.client = None + self.producer = None + + call_with_retries(self.connect_to_broker, + max_retries=self.max_retries, + _args=tuple((broker, topic))) + + def connect_to_broker(self, broker, topic): + kafka_python_client = kafka_python.SimpleClient(broker) + kafka_python_client.ensure_topic_exists(topic) + + self.client = pykafka.KafkaClient(hosts=broker) + self.producer = self.client.topics[topic].get_producer() + + def emit(self, frame, compress=False, + metadata={}, snapshot_num=0, **kwargs): + """ + + :param compress: + :param metadata: + :param snapshot_num: + :return: + """ + iostream = self.format(frame) + if compress: + raise NotImplementedError('Compress not implemented.') + + if self.emit_per_line: + iostream.seek(0) + for line in iostream.readlines(): + call_with_retries(lambda io: self.producer.produce([line]), + max_retries=self.max_retries, + _args=tuple([iostream])) + else: + call_with_retries( + lambda io: self.producer.produce([io.getvalue()]), + max_retries=self.max_retries, + _args=tuple([iostream])) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/mtgraphite_emitter.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/mtgraphite_emitter.plugin new file mode 100644 index 00000000..af6da177 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/mtgraphite_emitter.plugin @@ -0,0 +1,8 @@ +[Core] +Name = MTGraphite Emitter +Module = mtgraphite_emitter + +[Documentation] +Author = IBM +Version = 0.1 +Description = Plugin to emit frame to MTGraphite server diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/mtgraphite_emitter.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/mtgraphite_emitter.py new file mode 100644 index 00000000..1d841e5a --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/mtgraphite_emitter.py @@ -0,0 +1,43 @@ +import logging + +from iemit_plugin import IEmitter +from utils.mtgraphite import MTGraphiteClient +from formatters import write_in_graphite_format +from utils.crawler_exceptions import EmitterUnsupportedFormat + +logger = logging.getLogger('crawlutils') + + +class MtGraphiteEmitter(IEmitter): + + def get_emitter_protocol(self): + return 'mtgraphite' + + def init(self, url, timeout=1, max_retries=5, emit_format='graphite'): + self.url = url + self.timeout = timeout + self.max_retries = max_retries + self.emit_per_line = True + + if emit_format != 'graphite': + raise EmitterUnsupportedFormat('Not supported: %s' % emit_format) + + self.formatter = write_in_graphite_format + self.mtgraphite_client = MTGraphiteClient(self.url) + + def emit(self, frame, compress=False, + metadata={}, snapshot_num=0, **kwargs): + """ + + :param compress: + :param metadata: + :param snapshot_num: + :return: + """ + iostream = self.format(frame) + if self.emit_per_line: + iostream.seek(0) + num = self.mtgraphite_client.send_messages(iostream.readlines()) + else: + num = self.mtgraphite_client.send_messages([iostream.getvalue()]) + logger.debug('Pushed %d messages to mtgraphite queue' % num) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/sas_emitter.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/sas_emitter.plugin new file mode 100644 index 00000000..8134d8f3 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/sas_emitter.plugin @@ -0,0 +1,14 @@ +[Core] +Name = SAS Https Emitter +Module = sas_emitter + +[Documentation] +Author = IBM +Version = 0.1 +Description = Plugin to post frame data to SAS (security analytics service) https server + +[Options] +token_filepath = /etc/sas-secrets/token +access_group_filepath = /etc/sas-secrets/access_group +cloudoe_filepath = /etc/sas-secrets/cloudoe +ssl_verifcation = False diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/sas_emitter.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/sas_emitter.py new file mode 100644 index 00000000..0943e1b3 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/sas_emitter.py @@ -0,0 +1,146 @@ +import logging +import os +import json +import time + +import requests + +from iemit_plugin import IEmitter +from plugins.emitters.base_http_emitter import BaseHttpEmitter +from utils.crawler_exceptions import EmitterUnsupportedFormat + +logger = logging.getLogger('crawlutils') + + +class SasEmitter(BaseHttpEmitter, IEmitter): + + def get_emitter_protocol(self): + return 'sas' + + def init(self, url, timeout=1, max_retries=5, emit_format='csv'): + IEmitter.init(self, url, + timeout=timeout, + max_retries=max_retries, + emit_format=emit_format) + if emit_format != 'csv': + raise EmitterUnsupportedFormat('Not supported: %s' % emit_format) + + def emit(self, frame, compress=False, + metadata={}, snapshot_num=0, **kwargs): + """ + + :param frame: a frame containing extracted features + :param compress: + :param metadata: + :param snapshot_num: + :return: None + """ + self.token_filepath = kwargs.get("token_filepath", "") + self.access_group_filepath = kwargs.get("access_group_filepath", "") + self.cloudoe_filepath = kwargs.get("cloudoe_filepath", "") + self.ssl_verification = kwargs.get("ssl_verification", "") + + iostream = self.format(frame) + if compress: + proto = self.get_emitter_protocol() + raise NotImplementedError( + '%s emitter does not support gzip.' % proto + ) + if self.emit_per_line: + iostream.seek(0) + for line in iostream.readlines(): + self.post(line, metadata) + else: + self.post(iostream.getvalue(), metadata) + + ''' + This function retrievs sas token information from k8s secrets. + Current model of secret deployment in k8s is through mounting + 'secret' inside crawler container. + ''' + def get_sas_tokens(self): + assert(os.path.exists(self.token_filepath)) + assert(os.path.exists(self.access_group_filepath)) + assert(os.path.exists(self.cloudoe_filepath)) + + fp = open(self.access_group_filepath) + access_group = fp.read().rstrip('\n') + fp.close() + + fp = open(self.cloudoe_filepath) + cloudoe = fp.read().rstrip('\n') + fp.close() + + fp = open(self.token_filepath) + token = fp.read().rstrip('\n') + fp.close() + + return(token, cloudoe, access_group) + + ''' + SAS requires following crawl metadata about entity + being crawled. + - timestamp + - namespace + - features + - source type + This function parses the crawled metadata feature and + gets these information. + ''' + def __parse_crawl_metadata(self, content=''): + metadata_str = content.split('\n')[0].split()[2] + metadata_json = json.loads(metadata_str) + timestamp = metadata_json.get('timestamp', '') + namespace = metadata_json.get('namespace', '') + features = metadata_json.get('features', '') + system_type = metadata_json.get('system_type', '') + + return (namespace, timestamp, features, system_type) + + def post(self, content='', metadata={}): + (namespace, timestamp, features, system_type) =\ + self.__parse_crawl_metadata(content) + (token, cloudoe, access_group) = self.get_sas_tokens() + headers = {'content-type': 'application/csv'} + headers.update({'Cloud-OE-ID': cloudoe}) + headers.update({'X-Auth-Token': token}) + + params = {} + params.update({'access_group': access_group}) + params.update({'namespace': namespace}) + params.update({'features': features}) + params.update({'timestamp': timestamp}) + params.update({'source_type': system_type}) + + self.url = self.url.replace('sas:', 'https:') + + verify = True + if self.ssl_verification == "False": + verify = False + + for attempt in range(self.max_retries): + try: + response = requests.post(self.url, headers=headers, + params=params, + data=content, verify=verify) + except requests.exceptions.ChunkedEncodingError as e: + logger.exception(e) + logger.error( + "POST to %s resulted in exception (attempt %d of %d), " + "Exiting." % (self.url, attempt + 1, self.max_retries)) + break + except requests.exceptions.RequestException as e: + logger.exception(e) + logger.error( + "POST to %s resulted in exception (attempt %d of %d)" % + (self.url, attempt + 1, self.max_retries)) + time.sleep(2.0 ** attempt * 0.1) + continue + if response.status_code != requests.codes.ok: + logger.error("POST to %s resulted in status code %s: %s " + "(attempt %d of %d)" % + (self.url, str(response.status_code), + response.text, attempt + 1, self.max_retries)) + time.sleep(2.0 ** attempt * 0.1) + else: + break diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/stdout_emitter.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/stdout_emitter.plugin new file mode 100644 index 00000000..d00b6a34 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/stdout_emitter.plugin @@ -0,0 +1,8 @@ +[Core] +Name = Stdout Emitter +Module = stdout_emitter + +[Documentation] +Author = IBM +Version = 0.1 +Description = Plugin to emit frame to console diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/stdout_emitter.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/stdout_emitter.py new file mode 100644 index 00000000..88c52c75 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/emitters/stdout_emitter.py @@ -0,0 +1,40 @@ +import cStringIO +import gzip +import sys + +from iemit_plugin import IEmitter + + +class StdoutEmitter(IEmitter): + + def get_emitter_protocol(self): + return 'stdout' + + def emit(self, frame, compress=False, + metadata={}, snapshot_num=0, **kwargs): + """ + + :param iostream: a CStringIO used to buffer the formatted features. + :param compress: + :param metadata: + :param snapshot_num: + :return: + """ + iostream = self.format(frame) + if self.emit_per_line: + iostream.seek(0) + for line in iostream.readlines(): + self.emit_string(line, compress) + else: + self.emit_string(iostream.getvalue().strip(), compress) + + def emit_string(self, string, compress): + if compress: + tempio = cStringIO.StringIO() + gzip_file = gzip.GzipFile(fileobj=tempio, mode='w') + gzip_file.write(string) + gzip_file.close() + print tempio.getvalue() + else: + print "%s" % string + sys.stdout.flush() diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/environments/cloudsight_environment.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/environments/cloudsight_environment.plugin new file mode 100644 index 00000000..e83c226e --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/environments/cloudsight_environment.plugin @@ -0,0 +1,8 @@ +[Core] +Name = Cloudsight Environment +Module = cloudsight_environment + +[Documentation] +Author = IBM +Version = 0.1 +Description = Default environment diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/environments/cloudsight_environment.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/environments/cloudsight_environment.py new file mode 100644 index 00000000..c2a73560 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/environments/cloudsight_environment.py @@ -0,0 +1,42 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import os +import logging +import copy + +from runtime_environment import IRuntimeEnvironment + +logger = logging.getLogger('crawlutils') + + +class CloudsightEnvironment(IRuntimeEnvironment): + name = 'cloudsight' + + def get_environment_name(self): + return self.name + + def get_container_namespace(self, long_id, options): + assert isinstance(long_id, str) or unicode, "long_id is not a string" + assert 'name' in options and 'host_namespace' in options + name = options['name'] + name = (name if len(name) > 0 else long_id[:12]) + name = (name[1:] if name[0] == '/' else name) + return options['host_namespace'] + '/' + name + + def get_container_log_file_list(self, long_id, options): + assert isinstance(long_id, str) or unicode, "long_id is not a string" + assert 'container_logs' in options + container_logs = copy.deepcopy(options['container_logs']) + for log in container_logs: + name = log['name'] + if not os.path.isabs(name) or '..' in name: + container_logs.remove(log) + logger.warning( + 'User provided a log file path that is not absolute: %s' % + name) + return container_logs + + def get_container_log_prefix(self, long_id, options): + assert isinstance(long_id, str) or unicode, "long_id is not a string" + return self.get_container_namespace(long_id, options) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/environments/kubernetes_environment.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/environments/kubernetes_environment.plugin new file mode 100644 index 00000000..400cd125 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/environments/kubernetes_environment.plugin @@ -0,0 +1,8 @@ +[Core] +Name = Kubernetes Environment +Module = kubernetes_environment + +[Documentation] +Author = IBM +Version = 0.1 +Description = Kubernetes environment diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/environments/kubernetes_environment.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/environments/kubernetes_environment.py new file mode 100644 index 00000000..5a38cf6c --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/environments/kubernetes_environment.py @@ -0,0 +1,69 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import os +import logging +import copy + +from runtime_environment import IRuntimeEnvironment +from utils.dockerutils import exec_dockerinspect + +logger = logging.getLogger('crawlutils') + +META_CONFIG = 'Config' +META_LABELS = 'Labels' +META_UUID = 'Id' +META_HOSTNAME = 'Hostname' + +K8S_NS_LABEL = "io.kubernetes.pod.namespace" +K8S_POD_LABEL = "io.kubernetes.pod.name" +K8S_CONTAINER_NAME_LABEL = "io.kubernetes.container.name" + +CRAWLER_NAMESPACE_FORMAT = "{K8S_NS}/{K8S_POD}/{K8S_CONT_NAME}/{K8S_CONT_ID}" + + +class KubernetesEnvironment(IRuntimeEnvironment): + name = 'kubernetes' + + def get_environment_name(self): + return self.name + + def get_container_namespace(self, long_id, options): + assert isinstance(long_id, str) or unicode, "long_id is not a string" + crawler_k8s_ns = "" + container_meta = exec_dockerinspect(long_id) + try: + labels = container_meta.get(META_CONFIG).get(META_LABELS) + if labels: + crawler_k8s_ns = CRAWLER_NAMESPACE_FORMAT.format( + K8S_NS=labels.get(K8S_NS_LABEL, ""), + K8S_POD=labels.get(K8S_POD_LABEL, ""), + K8S_CONT_NAME=labels.get(K8S_CONTAINER_NAME_LABEL, ""), + K8S_CONT_ID=long_id) + except KeyError: + logger.error('Error retrieving container labels for: %s' % + long_id) + pass + + return crawler_k8s_ns + + def get_container_log_file_list(self, long_id, options): + assert isinstance(long_id, str) or unicode, "long_id is not a string" + assert 'container_logs' in options + container_logs = copy.deepcopy(options['container_logs']) + for log in container_logs: + name = log['name'] + if not os.path.isabs(name) or '..' in name: + container_logs.remove(log) + logger.warning( + 'User provided a log file path that is not absolute: %s' % + name) + return container_logs + + def get_container_log_prefix(self, long_id, options): + assert isinstance(long_id, str) or unicode, "long_id is not a string" + assert 'name' in options and 'host_namespace' in options + name = options['name'] + name = (name if len(name) > 0 else long_id[:12]) + name = (name[1:] if name[0] == '/' else name) + return options['host_namespace'] + '/' + name diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/sahil.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/sahil.py new file mode 100644 index 00000000..aa2dc124 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/sahil.py @@ -0,0 +1,11 @@ +import subprocess + +proc = subprocess.Popen( + ['python', '-c', 'import pkg_resources; pkgs = [ (p.key, p.version) for p in pkg_resources.working_set]; print pkgs'], + #['sh', '-c', 'pip list'], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) +output, err = proc.communicate() + +if output: + print output diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/__init__.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/c b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/c new file mode 100644 index 00000000..b4d2a445 Binary files /dev/null and b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/c differ diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/config_container_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/config_container_crawler.plugin new file mode 100644 index 00000000..ba0f8bc5 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/config_container_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = config_container +Module = config_container_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = Config crawling function for containers diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/config_container_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/config_container_crawler.py new file mode 100644 index 00000000..0decb84b --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/config_container_crawler.py @@ -0,0 +1,65 @@ +import logging +import os +import utils.misc +from icrawl_plugin import IContainerCrawler +from utils.config_utils import crawl_config_files + +logger = logging.getLogger('crawlutils') + + +class ConfigContainerCrawler(IContainerCrawler): + + def get_feature(self): + return 'config' + + def crawl( + self, + container_id=None, + avoid_setns=False, + root_dir='/', + exclude_dirs=[ + '/dev', + '/proc', + '/mnt', + '/tmp', + '/var/cache', + '/usr/share/man', + '/usr/share/doc', + '/usr/share/mime'], + known_config_files=[ + '/etc/passwd', + '/etc/group', + '/etc/hosts', + '/etc/hostname', + '/etc/mtab', + '/etc/fstab', + '/etc/aliases', + '/etc/ssh/ssh_config', + '/etc/ssh/sshd_config', + '/etc/sudoers'], + discover_config_files=False, + **kwargs): + logger.debug('Crawling config for container %s' % container_id) + + if avoid_setns: + rootfs_dir = '/rootfs_local' + exclude_dirs = [utils.misc.join_abs_paths(rootfs_dir, d) + for d in exclude_dirs] + return list(crawl_config_files( + root_dir=utils.misc.join_abs_paths(rootfs_dir, root_dir), + exclude_dirs=exclude_dirs, + root_dir_alias=root_dir, + known_config_files=known_config_files, + discover_config_files=discover_config_files)) + else: # in all other cases, including wrong mode set + real_root = os.open('/', os.O_RDONLY) + os.chroot('/rootfs_local') + os.chdir('/') + config_list = list(crawl_config_files(root_dir, + exclude_dirs, + None, + known_config_files, + discover_config_files)) + os.fchdir(real_root) + os.chroot('.') + return config_list diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/config_host_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/config_host_crawler.plugin new file mode 100644 index 00000000..4b8ff556 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/config_host_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = config_host +Module = config_host_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = Config crawling function for hosts diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/config_host_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/config_host_crawler.py new file mode 100644 index 00000000..0a861d79 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/config_host_crawler.py @@ -0,0 +1,43 @@ +import logging + +from icrawl_plugin import IHostCrawler +from utils.config_utils import crawl_config_files + +logger = logging.getLogger('crawlutils') + + +class ConfigHostCrawler(IHostCrawler): + + def get_feature(self): + return 'config' + + def crawl( + self, + root_dir='/', + exclude_dirs=[ + '/dev', + '/proc', + '/mnt', + '/tmp', + '/var/cache', + '/usr/share/man', + '/usr/share/doc', + '/usr/share/mime'], + known_config_files=[ + '/etc/passwd', + '/etc/group', + '/etc/hosts', + '/etc/hostname', + '/etc/mtab', + '/etc/fstab', + '/etc/aliases', + '/etc/ssh/ssh_config', + '/etc/ssh/sshd_config', + '/etc/sudoers'], + discover_config_files=False, + **kwargs): + return crawl_config_files( + root_dir=root_dir, + exclude_dirs=exclude_dirs, + known_config_files=known_config_files, + discover_config_files=discover_config_files) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/connection_container_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/connection_container_crawler.plugin new file mode 100644 index 00000000..876aa5a4 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/connection_container_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = connection_container +Module = connection_container_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = "Connection crawling function for containers" diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/connection_container_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/connection_container_crawler.py new file mode 100644 index 00000000..77810e2d --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/connection_container_crawler.py @@ -0,0 +1,22 @@ +import logging + +from icrawl_plugin import IContainerCrawler +from utils.connection_utils import crawl_connections + +logger = logging.getLogger('crawlutils') + + +class ConnectionContainerCrawler(IContainerCrawler): + + def get_feature(self): + return 'connection' + + def crawl(self, container_id, avoid_setns=False, **kwargs): + logger.debug( + 'Crawling %s for container %s' % + (self.get_feature(), container_id)) + + if avoid_setns: + raise NotImplementedError('avoidsetns mode not implemented') + else: # in all other cases, including wrong mode set + return crawl_connections() diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/connection_host_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/connection_host_crawler.plugin new file mode 100644 index 00000000..dc301888 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/connection_host_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = connection_host +Module = connection_host_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = "Connection crawling function for hosts" diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/connection_host_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/connection_host_crawler.py new file mode 100644 index 00000000..312eca0a --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/connection_host_crawler.py @@ -0,0 +1,17 @@ +import logging + +from icrawl_plugin import IHostCrawler +from utils.connection_utils import crawl_connections + +logger = logging.getLogger('crawlutils') + + +class ConnectionHostCrawler(IHostCrawler): + + def get_feature(self): + return 'connection' + + def crawl(self, **kwargs): + logger.debug('Crawling %s' % (self.get_feature())) + + return crawl_connections() diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/connection_vm_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/connection_vm_crawler.plugin new file mode 100644 index 00000000..fc25c7cb --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/connection_vm_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = connection_vm +Module = connection_vm_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = "Connection crawling function for VMs" diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/connection_vm_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/connection_vm_crawler.py new file mode 100644 index 00000000..8e6caaa4 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/connection_vm_crawler.py @@ -0,0 +1,50 @@ +import logging + +import psutil + +from icrawl_plugin import IVMCrawler +from utils.connection_utils import crawl_single_connection + +try: + import psvmi +except ImportError: + psvmi = None + +logger = logging.getLogger('crawlutils') + + +class ConnectionVmCrawler(IVMCrawler): + + def get_feature(self): + return 'connection' + + def crawl(self, vm_desc, **kwargs): + created_since = -1 + + if psvmi is None: + raise NotImplementedError() + else: + (domain_name, kernel_version, distro, arch) = vm_desc + # XXX: this has to be read from some cache instead of + # instead of once per plugin/feature + vm_context = psvmi.context_init( + domain_name, domain_name, kernel_version, distro, arch) + proc_list = psvmi.process_iter(vm_context) + + for p in proc_list: + pid = (p.pid() if hasattr(p.pid, '__call__') else p.pid) + status = (p.status() if hasattr(p.status, '__call__' + ) else p.status) + if status == psutil.STATUS_ZOMBIE: + continue + + create_time = ( + p.create_time() if hasattr( + p.create_time, + '__call__') else p.create_time) + name = (p.name() if hasattr(p.name, '__call__') else p.name) + + if create_time <= created_since: + continue + for conn in p.get_connections(): + yield crawl_single_connection(conn, pid, name) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/cpu_container_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/cpu_container_crawler.plugin new file mode 100644 index 00000000..d4a8501b --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/cpu_container_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = cpu_container +Module = cpu_container_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = "Cpu crawling function for containers" diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/cpu_container_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/cpu_container_crawler.py new file mode 100644 index 00000000..4ce5b2c8 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/cpu_container_crawler.py @@ -0,0 +1,172 @@ +import logging +import re +import time +import os +import psutil + +from icrawl_plugin import IContainerCrawler +from utils.features import CpuFeature + +logger = logging.getLogger('crawlutils') + + +class CpuContainerCrawler(IContainerCrawler): + + """ + To calculate rates like packets sent per second, we need to + store the last measurement. We store it in this dictionary. + """ + + def __init__(self): + self._cached_values = {} + + def _cache_put_value(self, key, value): + self._cached_values[key] = (value, time.time()) + + def _cache_get_value(self, key): + if key in self._cached_values: + return self._cached_values[key] + else: + return None, None + + def _save_container_cpu_times(self, container_long_id, times): + cache_key = container_long_id + self._cache_put_value(cache_key, times) + + def _get_prev_container_cpu_times(self, container_long_id): + cache_key = container_long_id + return self._cache_get_value(cache_key) + + def _get_cgroup_dir(self, devlist=[]): + for dev in devlist: + paths = [os.path.join('/cgroup/', dev), + os.path.join('/sys/fs/cgroup/', dev)] + for path in paths: + if os.path.ismount(path): + return path + + for dev in devlist: + # Try getting the mount point from /proc/mounts + for l in open('/proc/mounts', 'r'): + _type, mnt, _, _, _, _ = l.split(' ') + if _type == 'cgroup' and mnt.endswith('cgroup/' + dev): + return mnt + + raise ValueError('Can not find the cgroup dir') + + def get_cpu_cgroup_path(self, node='cpuacct.usage'): + # In kernels 4.x, the node is actually called 'cpu,cpuacct' + cgroup_dir = self._get_cgroup_dir(['cpuacct', 'cpu,cpuacct']) + return os.path.join(cgroup_dir, node) + + def get_feature(self): + return 'cpu' + + def crawl(self, container_id, avoid_setns=False, per_cpu=False, **kwargs): + logger.debug( + 'Crawling %s for container %s' % + (self.get_feature(), container_id)) + + host_cpu_feature = {} + for (idx, cpu) in enumerate(psutil.cpu_times_percent(percpu=True)): + host_cpu_feature[idx] = CpuFeature( + cpu.idle, + cpu.nice, + cpu.user, + cpu.iowait, + cpu.system, + cpu.irq, + cpu.steal, + 100 - int(cpu.idle), + ) + + real_root = os.open('/', os.O_RDONLY) + os.chroot('/sysfs_local') + os.chdir('/') + + if per_cpu: + stat_file_name = 'cpuacct.usage_percpu' + else: + stat_file_name = 'cpuacct.usage' + + (cpu_usage_t1, prev_time) = ( + self._get_prev_container_cpu_times(container_id)) + + if cpu_usage_t1: + logger.debug('Using previous cpu times for container %s' + % container_id) + interval = time.time() - prev_time + else: + logger.debug( + 'There are no previous cpu times for container %s ' + 'so we will be sleeping for 100 milliseconds' % + container_id) + + with open(self.get_cpu_cgroup_path(stat_file_name), + 'r') as f: + cpu_usage_t1 = f.readline().strip().split(' ') + interval = 0.1 # sleep for 100ms + time.sleep(interval) + + with open(self.get_cpu_cgroup_path(stat_file_name), + 'r') as f: + cpu_usage_t2 = f.readline().strip().split(' ') + + # Store the cpu times for the next crawl + + self._save_container_cpu_times(container_id, + cpu_usage_t2) + + cpu_user_system = {} + path = self.get_cpu_cgroup_path('cpuacct.stat') + with open(path, 'r') as f: + for line in f: + m = re.search(r"(system|user)\s+(\d+)", line) + if m: + cpu_user_system[m.group(1)] = \ + float(m.group(2)) + + os.fchdir(real_root) + os.chroot('.') + + for (index, cpu_usage_ns) in enumerate(cpu_usage_t1): + usage_secs = (float(cpu_usage_t2[index]) - + float(cpu_usage_ns)) / float(1e9) + + # Interval is never 0 because of step 0 (forcing a sleep) + + usage_percent = usage_secs / interval * 100.0 + if usage_percent > 100.0: + usage_percent = 100.0 + idle = 100.0 - usage_percent + + # Approximation 1 + + user_plus_sys_hz = cpu_user_system['user'] \ + + cpu_user_system['system'] + if user_plus_sys_hz == 0: + # Fake value to avoid divide by zero. + user_plus_sys_hz = 0.1 + user = usage_percent * (cpu_user_system['user'] / + user_plus_sys_hz) + system = usage_percent * (cpu_user_system['system'] / + user_plus_sys_hz) + + # Approximation 2 + + nice = host_cpu_feature[index][1] + wait = host_cpu_feature[index][3] + interrupt = host_cpu_feature[index][5] + steal = host_cpu_feature[index][6] + feature_key = '{0}-{1}'.format('cpu', index) + feature_attributes = CpuFeature( + idle, + nice, + user, + wait, + system, + interrupt, + steal, + usage_percent, + ) + yield (feature_key, feature_attributes, 'cpu') diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/cpu_host_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/cpu_host_crawler.plugin new file mode 100644 index 00000000..f993c9ca --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/cpu_host_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = cpu_host +Module = cpu_host_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = "Cpu crawling function for hosts" diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/cpu_host_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/cpu_host_crawler.py new file mode 100644 index 00000000..449d1595 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/cpu_host_crawler.py @@ -0,0 +1,31 @@ +import logging + +import psutil + +from icrawl_plugin import IHostCrawler +from utils.features import CpuFeature + +logger = logging.getLogger('crawlutils') + + +class CpuHostCrawler(IHostCrawler): + + def get_feature(self): + return 'cpu' + + def crawl(self, **kwargs): + logger.debug('Crawling %s' % (self.get_feature())) + + for (idx, cpu) in enumerate(psutil.cpu_times_percent(percpu=True)): + feature_attributes = CpuFeature( + cpu.idle, + cpu.nice, + cpu.user, + cpu.iowait, + cpu.system, + cpu.irq, + cpu.steal, + 100 - int(cpu.idle), + ) + feature_key = '{0}-{1}'.format('cpu', idx) + yield (feature_key, feature_attributes, 'cpu') diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/cpu_vm_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/cpu_vm_crawler.plugin new file mode 100644 index 00000000..cf30eb4e --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/cpu_vm_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = cpu_vm +Module = cpu_vm_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = "Cpu crawling function for VMs" diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/cpu_vm_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/cpu_vm_crawler.py new file mode 100644 index 00000000..042aa0d2 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/cpu_vm_crawler.py @@ -0,0 +1,20 @@ +from icrawl_plugin import IVMCrawler +import logging + +# External dependencies that must be pip install'ed separately + +try: + import psvmi +except ImportError: + psvmi = None + +logger = logging.getLogger('crawlutils') + + +class cpu_vm_crawler(IVMCrawler): + + def get_feature(self): + return 'cpu' + + def crawl(self, vm_desc, **kwargs): + raise NotImplementedError('Unsupported crawl mode') diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/ctprobe_container_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/ctprobe_container_crawler.plugin new file mode 100644 index 00000000..1d6b8ae6 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/ctprobe_container_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = ctprobe_container +Module = ctprobe_container_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = Crawling function for containers to start conntrackprobe diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/ctprobe_container_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/ctprobe_container_crawler.py new file mode 100644 index 00000000..5f3864fa --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/ctprobe_container_crawler.py @@ -0,0 +1,438 @@ + +import errno +import glob +import json +import logging +import os +import pwd +import signal +import time + +from collections import namedtuple + +import netifaces +import psutil +import utils.dockerutils +import requests_unixsocket + +from icrawl_plugin import IContainerCrawler +from utils.ethtool import ethtool_get_peer_ifindex +from utils.namespace import run_as_another_namespace +from utils.process_utils import start_child +from utils.socket_utils import if_indextoname + +logger = logging.getLogger('crawlutils') + +PeerInterface = namedtuple('PeerInterface', ['peer_ifindex', 'ip_addresses']) +NetlinkFeature = namedtuple('NetlinkFeature', ['data']) + +DEFAULT_UNIX_PATH = '/var/run/conntrackprobe.sock' + + +class ConntrackProbeClient(object): + """ Client class for talking to the conntrack probe """ + def __init__(self, sockpath=DEFAULT_UNIX_PATH): + self.sockpath = sockpath + + def add_collector(self, url, ipaddresses, ifname): + """ + Add a collector for the given IP addresses and tied to the given + interface. + """ + code, content = self.send_request('add_collector', + [url, ipaddresses, ifname]) + if code == 200: + return True + else: + raise Exception('HTTP Error %d: %s' % (code, content['error'])) + + def send_request(self, method, params): + req = { + 'jsonrpc': '2.0', + 'method': method, + 'params': params, + 'id': 1, + } + sp = self.sockpath.replace('/', '%2f') + session = requests_unixsocket.Session() + r = session.get('http+unix://%s' % sp, data=json.dumps(req)) + + return r.status_code, json.loads(r.content) + + +class CTProbeContainerCrawler(IContainerCrawler): + # Class for acquiring netlink data via a conntrackprobe + + BIND_ADDRESS = '127.0.0.1' + STALE_FILE_TIMEOUT = 3600 + + # whether the conntrackprobe process has been started + ctprobe_pid = 0 + + # Interfaces for which conntrackprobe has been configured. + # This is a list of interfaces for which conntrackprobe + # has been configured. + ifaces_monitored = [] + + # Since we don't get notified when a container dies + # we need to periodically check the interfaces on the host + # against those in ctprobes_monitored. + next_cleanup = 0 + + def get_feature(self): + return 'ctprobe' + + def setup_outputdir(self, output_dir, uid, gid): + """ + If necessary create or change ownership of the output directory. + """ + if not os.path.exists(output_dir): + try: + os.makedirs(output_dir) + except Exception as ex: + logger.error('Could not created dir %s : %s' % + (output_dir, str(ex))) + return False + + try: + os.chown(output_dir, uid, gid) + except Exception as ex: + logger.error('Could not change ownership of %s: %s' % + (output_dir, str(ex))) + return False + + return True + + def _get_user(self, **kwargs): + """ Get the deprivileged user we are supposed to use """ + ctprobe_user = kwargs.get('ctprobe_user', 'nobody') + try: + passwd = pwd.getpwnam(ctprobe_user) + return ctprobe_user, passwd + except Exception as ex: + logger.error('Could not find user %s on this system: %s' % + (ctprobe_user, ex)) + return ctprobe_user, None + + def start_ctprobe(self, sockpath=DEFAULT_UNIX_PATH, **kwargs): + """ + Start the conntrackprobe process; + use the bindaddr and port as the collector. + This function returns the process ID of the started process + and an errcode (errno) in case an error was encountered in + the start_child function. + """ + ctprobe_user, passwd = self._get_user(**kwargs) + if not passwd: + return -1, errno.ENOENT + + params = ['conntrackprobe', + '--unix', sockpath, + '--user', ctprobe_user, + '--logfile', '/var/log/conntrackprobe.log'] + + try: + pid, errcode = start_child(params, [], [0, 1, 2], + [], + setsid=False, + max_close_fd=128) + logger.info('Started conntrackprobe as pid %d' % pid) + except Exception: + pid = -1 + errcode = errno.EINVAL + + return pid, errcode + + def terminate_ctprobe(self, pid): + """ + Terminate the conntrackprobe process given its PID + """ + proc = psutil.Process(pid=pid) + if proc and proc.name() == 'conntrackprobe': + os.kill(pid, signal.SIGKILL) + CTProbeContainerCrawler.ifaces_monitored = [] + + def check_ctprobe_alive(self, pid): + """ + Check whether the conntrackprobe with the given PID is still running + Returns True if the conntrackprobe is still alive, false otherwise. + """ + gone = False + try: + proc = psutil.Process(pid=pid) + if not proc or proc.name() != 'conntrackprobe': + gone = True + except Exception: + gone = True + + if gone: + CTProbeContainerCrawler.ifaces_monitored = [] + return not gone + + def configure_ctprobe(self, ipaddresses, ifname, filepath, **kwargs): + """ + Configure the CTprobe to listen for data from the current + container and have it write the data to files specific to + that container. + """ + coll = 'file+json://%s' % filepath + + cpc = ConntrackProbeClient(DEFAULT_UNIX_PATH) + try: + cpc.add_collector(coll, ipaddresses, ifname) + except Exception as ex: + logger.error('Could not add collector: %s' % ex) + return False + + return True + + def start_netlink_collection(self, ifname, ip_addresses, container_id, + **kwargs): + """ + Start the collector and program conntrackprobe. Return False in case + of an error, True otherwise + """ + + ctprobe_user, passwd = self._get_user(**kwargs) + if not passwd: + return False + + ctprobe_output_dir = kwargs.get('ctprobe_output_dir', + '/tmp/crawler-ctprobe') + if not self.setup_outputdir(ctprobe_output_dir, passwd.pw_uid, + passwd.pw_gid): + return False + + filepattern = kwargs.get('output_filepattern', + 'conntrack-{ifname}-{timestamp}') + filepath = '%s/%s' % (ctprobe_output_dir, filepattern) + + success = self.configure_ctprobe(ip_addresses, ifname, + filepath, **kwargs) + if not success: + logger.warn('Terminating malfunctioning conntrackprobe') + self.terminate_ctprobe(CTProbeContainerCrawler.ctprobe_pid) + # setting the PID to zero will cause it to be restarted + # upon next crawl() + CTProbeContainerCrawler.ctprobe_pid = 0 + + return success + + def cleanup(self, **kwargs): + """ + Check the available interfaces on the host versus those ones we + have flow probes running and remove those where the interface has + disappeared. We clean up the files with netlink data that were + written for those interfaces. + """ + devices = netifaces.interfaces() + + lst = [] + + for ifname in CTProbeContainerCrawler.ifaces_monitored: + if ifname not in devices: + self.remove_datafiles(ifname, **kwargs) + else: + lst.append(ifname) + + CTProbeContainerCrawler.ifaces_monitored = lst + + @classmethod + def remove_old_files(cls, **kwargs): + """ + Remove all old files that the crawler would never pick up. + """ + now = time.time() + output_dir = kwargs.get('ctprobe_output_dir', '/tmp/crawler-ctprobe') + + for filename in glob.glob('%s/*' % output_dir): + try: + statbuf = os.stat(filename) + # files older than 1 hour are removed + if statbuf.st_mtime + \ + CTProbeContainerCrawler.STALE_FILE_TIMEOUT < now: + os.remove(filename) + except Exception: + continue + + def crawl(self, container_id, avoid_setns=False, **kwargs): + """ + Start flow probe + data collector pairs on the interfaces of + the given container; collect the files that the collector + wrote and return their content. + """ + if not self.check_ctprobe_alive(CTProbeContainerCrawler.ctprobe_pid): + CTProbeContainerCrawler.ctprobe_pid = 0 + + if CTProbeContainerCrawler.ctprobe_pid == 0: + pid, errcode = self.start_ctprobe(**kwargs) + CTProbeContainerCrawler.ctprobe_pid = pid + if pid < 0: + logger.info('Starting conntrackprobe failed: %s' % + errcode) + + if CTProbeContainerCrawler.ctprobe_pid < 0: + return + + if time.time() > CTProbeContainerCrawler.next_cleanup: + # we won't run the cleanup of old files the first time + # but let the crawler do one full round of picking up + # relevant files and then only we do a proper cleaning + if CTProbeContainerCrawler.next_cleanup > 0: + CTProbeContainerCrawler.remove_old_files(**kwargs) + + self.cleanup(**kwargs) + CTProbeContainerCrawler.next_cleanup = time.time() + 30 + + ifnames = self.start_container_ctprobes(container_id, avoid_setns, + **kwargs) + + return self.collect_files(container_id, ifnames, **kwargs) + + def create_filenamepattern(self, **kwargs): + """ + Create the filename pattern for the files where the + socket-datacollector writes its data into. + """ + output_dir = kwargs.get('ctprobe_output_dir', '/tmp/crawler-ctprobe') + filepattern = kwargs.get('output_filepattern', + 'conntrack-{ifname}-{timestamp}') + filenamepattern = os.path.join(output_dir, filepattern) + + return filenamepattern.format(**kwargs) + + def remove_datafiles(self, ifname, **kwargs): + """ + Remove conntrack netlink data files that belong to an interface + """ + kwargs.update({ + 'container-id': '*', + 'ifname': ifname, + 'pid': '*', + 'timestamp': '*', + }) + filenamepattern = self.create_filenamepattern(**kwargs) + + for filename in glob.glob(filenamepattern): + try: + os.remove(filename) + except Exception: + pass + + def collect_files(self, container_id, ifnames, **kwargs): + """ + Collect the files with netlink data for the given interface + and container_id; + remove the files after reading their content + """ + for ifname in ifnames: + kwargs.update({ + 'container-id': container_id, + 'ifname': ifname, + 'pid': '*', + 'timestamp': '*', + }) + filenamepattern = self.create_filenamepattern(**kwargs) + + globs = glob.glob(filenamepattern) + for filename in globs: + # skip over files currently being written + if filename.endswith(".tmp"): + continue + try: + with open(filename, 'r') as f: + raw = f.read() + data = json.loads(raw) + except Exception as ex: + logger.info('Error reading datafile: %s' % ex) + continue + + try: + os.remove(filename) + except Exception as ex: + logger.info('Error removing datafile: %s' % ex) + continue + + feature_key = '{0}-{1}'.format('netlink', ifname) + + yield (feature_key, NetlinkFeature( + data + ), 'netlink') + + def start_container_ctprobes(self, container_id, avoid_setns=False, + **kwargs): + """ + Unless flow probes are already running on the interfaces of the + given container, we start them. + """ + inspect = utils.dockerutils.exec_dockerinspect(container_id) + state = inspect['State'] + pid = str(state['Pid']) + + if avoid_setns: + raise NotImplementedError('avoidsetns mode not implemented') + + ifnames = [] + + try: + peers = run_as_another_namespace(pid, + ['net'], + self._crawl_in_system) + for peer in peers or []: + # in rare cases we get an interface without IP address + # assigned ot it, yet; we skip it for now and try again + # on the next crawl + if len(peer.ip_addresses) == 0: + continue + + try: + ifname = if_indextoname(peer.peer_ifindex) + except Exception: + continue + + ifnames.append(ifname) + + if ifname not in CTProbeContainerCrawler.ifaces_monitored: + ok = self.start_netlink_collection(ifname, + peer.ip_addresses, + container_id, + **kwargs) + if ok: + CTProbeContainerCrawler.ifaces_monitored.append(ifname) + except Exception as ex: + logger.info("Error: %s" % str(ex)) + + return ifnames + + def get_ifaddresses(self, ifname): + """ + Get the list of IPv4 addresses on an interface name; in + case none could be found yet, wait a bit and try again + """ + + for ctr in range(0, 4): + res = [] + + for data in netifaces.ifaddresses(ifname).get(2, []): + addr = data.get('addr') + if addr: + res.append(addr) + if len(res): + break + time.sleep(0.01) + + return res + + def _crawl_in_system(self): + for ifname in netifaces.interfaces(): + if ifname == 'lo': + continue + + try: + peer_ifindex = ethtool_get_peer_ifindex(ifname) + except Exception: + peer_ifindex = -1 + + if peer_ifindex >= 0: + yield PeerInterface(peer_ifindex, + self.get_ifaddresses(ifname)) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/disk_container_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/disk_container_crawler.plugin new file mode 100644 index 00000000..732d9da1 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/disk_container_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = disk_container +Module = disk_container_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = "Disk crawling function for containers" diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/disk_container_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/disk_container_crawler.py new file mode 100644 index 00000000..8d75d988 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/disk_container_crawler.py @@ -0,0 +1,22 @@ +import logging + +from icrawl_plugin import IContainerCrawler +from utils.disk_utils import crawl_disk_partitions + +logger = logging.getLogger('crawlutils') + + +class DiskContainerCrawler(IContainerCrawler): + + def get_feature(self): + return 'disk' + + def crawl(self, container_id, avoid_setns=False, **kwargs): + logger.debug( + 'Crawling %s for container %s' % + (self.get_feature(), container_id)) + + if avoid_setns: + raise NotImplementedError('avoidsetns mode not implemented') + else: # in all other cases, including wrong mode set + return crawl_disk_partitions() diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/disk_host_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/disk_host_crawler.plugin new file mode 100644 index 00000000..95d243fb --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/disk_host_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = disk_host +Module = disk_host_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = "Disk crawling function for hosts" diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/disk_host_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/disk_host_crawler.py new file mode 100644 index 00000000..4523779b --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/disk_host_crawler.py @@ -0,0 +1,17 @@ +import logging + +from icrawl_plugin import IHostCrawler +from utils.disk_utils import crawl_disk_partitions + +logger = logging.getLogger('crawlutils') + + +class DiskHostCrawler(IHostCrawler): + + def get_feature(self): + return 'disk' + + def crawl(self, **kwargs): + logger.debug('Crawling %s' % (self.get_feature())) + + return crawl_disk_partitions() diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/disk_vm_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/disk_vm_crawler.plugin new file mode 100644 index 00000000..65607cda --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/disk_vm_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = disk_vm +Module = disk_vm_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = "Disk crawling function for VMs" diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/disk_vm_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/disk_vm_crawler.py new file mode 100644 index 00000000..a8030f48 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/disk_vm_crawler.py @@ -0,0 +1,19 @@ +from icrawl_plugin import IVMCrawler + +import logging + +try: + import psvmi +except ImportError: + psvmi = None + +logger = logging.getLogger('crawlutils') + + +class disk_vm_crawler(IVMCrawler): + + def get_feature(self): + return 'disk' + + def crawl(self, vm_desc, **kwargs): + raise NotImplementedError() diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/dockerhistory_container_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/dockerhistory_container_crawler.plugin new file mode 100644 index 00000000..9bccdfd7 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/dockerhistory_container_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = dockerhistory_container +Module = dockerhistory_container_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = "Dockerhistory crawling function for containers" diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/dockerhistory_container_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/dockerhistory_container_crawler.py new file mode 100644 index 00000000..8d5be3a1 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/dockerhistory_container_crawler.py @@ -0,0 +1,17 @@ +from utils.dockerutils import exec_docker_history +from icrawl_plugin import IContainerCrawler + +import logging + +logger = logging.getLogger('crawlutils') + + +class DockerhistoryContainerCrawler(IContainerCrawler): + + def get_feature(self): + return 'dockerhistory' + + def crawl(self, container_id, avoid_setns=False, **kwargs): + history = exec_docker_history(container_id) + image_id = history[0]['Id'] + yield (image_id, {'history': history}, 'dockerhistory') diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/dockerinspect_container_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/dockerinspect_container_crawler.plugin new file mode 100644 index 00000000..d62ffbb6 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/dockerinspect_container_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = dockerinspect_container +Module = dockerinspect_container_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = "Dockerinspect crawling function for containers" diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/dockerinspect_container_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/dockerinspect_container_crawler.py new file mode 100644 index 00000000..0e851660 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/dockerinspect_container_crawler.py @@ -0,0 +1,16 @@ +from utils.dockerutils import exec_dockerinspect +from icrawl_plugin import IContainerCrawler + +import logging + +logger = logging.getLogger('crawlutils') + + +class DockerinspectContainerCrawler(IContainerCrawler): + + def get_feature(self): + return 'dockerinspect' + + def crawl(self, container_id, avoid_setns=False, **kwargs): + inspect = exec_dockerinspect(container_id) + yield (container_id, inspect, 'dockerinspect') diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/dockerps_host_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/dockerps_host_crawler.plugin new file mode 100644 index 00000000..3fca91e6 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/dockerps_host_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = dockerps_host +Module = dockerps_host_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = "Dockerps crawling function for hosts" diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/dockerps_host_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/dockerps_host_crawler.py new file mode 100644 index 00000000..b92c954a --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/dockerps_host_crawler.py @@ -0,0 +1,27 @@ +import logging + +from icrawl_plugin import IHostCrawler +from utils.dockerutils import exec_dockerps +from utils.features import DockerPSFeature + +logger = logging.getLogger('crawlutils') + + +class DockerpsHostCrawler(IHostCrawler): + + def get_feature(self): + return 'dockerps' + + def crawl(self, **kwargs): + logger.debug('Crawling %s' % (self.get_feature())) + + for inspect in exec_dockerps(): + yield (inspect['Id'], DockerPSFeature._make([ + inspect['State']['Running'], + 0, + inspect['Image'], + [], + inspect['Config']['Cmd'], + inspect['Name'], + inspect['Id'], + ]), 'dockerps') diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/evil_container_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/evil_container_crawler.plugin new file mode 100644 index 00000000..c97c9cf9 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/evil_container_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = evil +Module = evil_container_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = This plugin's operations should not succeed when run inside sandbox diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/evil_container_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/evil_container_crawler.py new file mode 100644 index 00000000..3def6c70 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/evil_container_crawler.py @@ -0,0 +1,159 @@ +import logging +import os +import psutil + +from icrawl_plugin import IContainerCrawler + +logger = logging.getLogger('crawlutils') + + +class EvilContainerCrawler(IContainerCrawler): + + def get_feature(self): + return 'evil' + + def crawl(self, container_id, avoid_setns=False, **kwargs): + if avoid_setns: + raise NotImplementedError() + return self.crawl_in_system() + + def crawl_in_system(self): + yield self.kill_proc() + yield self.trace_proc() + yield self.write_guest_rootfs() + yield self.rm_guest_rootfs() + yield self.nw() + + def kill_proc(self): + for p in psutil.process_iter(): + status = (p.status() if hasattr(p.status, '__call__' + ) else p.status) + if status == psutil.STATUS_ZOMBIE: + continue + name = (p.name() if hasattr(p.name, '__call__' + ) else p.name) + pid = (p.pid() if hasattr(p.pid, '__call__') else p.pid) + try: + username = (p.username() if hasattr(p, 'username') and + hasattr(p.username, '__call__') else + p.username) + if username == 'plugincont_user': + continue + p.kill() + except psutil.AccessDenied: + return ( + 'kill_proc', + {"pname": name, "pid": pid, "username": + username, "kill_status": "expected_failed"}, + 'evil' + ) + break + except: + continue + return ( + 'kill_proc', + {"pname": name, "pid": pid, "username": + username, "kill_status": "unexpected_succeeded"}, + 'evil' + ) + break + + def trace_proc(self): + for p in psutil.process_iter(): + status = (p.status() if hasattr(p.status, '__call__' + ) else p.status) + if status == psutil.STATUS_ZOMBIE: + continue + name = (p.name() if hasattr(p.name, '__call__' + ) else p.name) + pid = (p.pid() if hasattr(p.pid, '__call__') else p.pid) + try: + username = (p.username() if hasattr(p, 'username') and + hasattr(p.username, '__call__') else + p.username) + except: + username = 'unknown' + try: + import ptrace + import ptrace.debugger + import ptrace.error + debugger = ptrace.debugger.PtraceDebugger() + process = debugger.addProcess(int(pid), False) + ret = ( + 'trace_proc', + {"pname": name, "pid": pid, "username": username, + "trace_status": "unexpected_succeeded"}, + 'evil' + ) + process.detach() + break + except ptrace.error.PtraceError: + ret = ( + 'trace_proc', + {"pname": name, "pid": pid, "username": + username, "trace_status": "expected_failed"}, + 'evil' + ) + break + return ret + + def write_guest_rootfs(self): + real_root = os.open('/', os.O_RDONLY) + os.chroot('/rootfs_local') + filename = '/bin/ls' + try: + fd = open(filename, 'w') + ret = ( + 'write_to_file', + {"filename": filename, "write_status": "unexpected_succeeded"}, + 'evil' + ) + fd.close() + except IOError: + ret = ( + 'write_to_file', + {"filename": filename, "write_status": "expected_failed"}, + 'evil' + ) + os.fchdir(real_root) + os.chroot('.') + return ret + + def rm_guest_rootfs(self): + real_root = os.open('/', os.O_RDONLY) + os.chroot('/rootfs_local') + filename = '/bin/ls' + try: + os.remove(filename) + ret = ( + 'rm_file', + {"filename": filename, "rm_status": "unexpected_succeeded"}, + 'evil' + ) + fd.close() + except OSError: + ret = ( + 'rm_file', + {"filename": filename, "rm_status": "expected_failed"}, + 'evil' + ) + os.fchdir(real_root) + os.chroot('.') + return ret + + def nw(self): + hostname = 'www.google.com' + r = os.system("wget " + hostname) + if r != 0: + ret = ( + 'nw', + {"host": hostname, "nw_status": "expected_failed"}, + 'evil' + ) + else: + ret = ( + 'nw', + {"host": hostname, "nw_status": "unexpected_succeeded"}, + 'evil' + ) + return ret diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/file_container_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/file_container_crawler.plugin new file mode 100644 index 00000000..ad8133f9 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/file_container_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = file_container +Module = file_container_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = File crawling function for containers diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/file_container_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/file_container_crawler.py new file mode 100644 index 00000000..49ca65c4 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/file_container_crawler.py @@ -0,0 +1,54 @@ +import logging + +import utils.dockerutils +import utils.misc +from icrawl_plugin import IContainerCrawler +from utils.file_utils import crawl_files +from utils.namespace import run_as_another_namespace + +logger = logging.getLogger('crawlutils') + + +class FileContainerCrawler(IContainerCrawler): + + def get_feature(self): + return 'file' + + def crawl( + self, + container_id=None, + avoid_setns=False, + root_dir='/', + exclude_dirs=[ + '/boot', + '/dev', + '/proc', + '/sys', + '/mnt', + '/tmp', + '/var/cache', + '/usr/share/man', + '/usr/share/doc', + '/usr/share/mime'], + **kwargs): + inspect = utils.dockerutils.exec_dockerinspect(container_id) + state = inspect['State'] + pid = str(state['Pid']) + logger.debug('Crawling file for container %s' % container_id) + + if avoid_setns: + rootfs_dir = utils.dockerutils.get_docker_container_rootfs_path( + container_id) + exclude_dirs = [utils.misc.join_abs_paths(rootfs_dir, d) + for d in exclude_dirs] + return crawl_files( + root_dir=utils.misc.join_abs_paths(rootfs_dir, root_dir), + exclude_dirs=exclude_dirs, + root_dir_alias=root_dir) + else: # in all other cases, including wrong mode set + return run_as_another_namespace(pid, + ['mnt'], + crawl_files, + root_dir, + exclude_dirs, + None) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/file_host_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/file_host_crawler.plugin new file mode 100644 index 00000000..08996c84 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/file_host_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = file_host +Module = file_host_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = File crawling function for the local host diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/file_host_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/file_host_crawler.py new file mode 100644 index 00000000..c176a72a --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/file_host_crawler.py @@ -0,0 +1,26 @@ +from icrawl_plugin import IHostCrawler +from utils.file_utils import crawl_files + + +class FileHostCrawler(IHostCrawler): + + def get_feature(self): + return 'file' + + def crawl( + self, + root_dir='/', + exclude_dirs=[ + '/boot', + '/dev', + '/proc', + '/sys', + '/mnt', + '/tmp', + '/var/cache', + '/usr/share/man', + '/usr/share/doc', + '/usr/share/mime'], + **kwargs): + return crawl_files(root_dir=root_dir, + exclude_dirs=exclude_dirs) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/fprobe_container_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/fprobe_container_crawler.plugin new file mode 100644 index 00000000..6ade3330 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/fprobe_container_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = fprobe_container +Module = fprobe_container_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = Crawling function for containers to start fprobe diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/fprobe_container_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/fprobe_container_crawler.py new file mode 100644 index 00000000..ed13898c --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/fprobe_container_crawler.py @@ -0,0 +1,478 @@ + +import errno +import glob +import json +import logging +import os +import pwd +import signal +import time +import pdb + +from collections import namedtuple + +import netifaces +import psutil + +from icrawl_plugin import IContainerCrawler +from utils.ethtool import ethtool_get_peer_ifindex +from utils.misc import get_uint_arg +from utils.process_utils import start_child +from utils.socket_utils import open_udp_port, if_indextoname + +logger = logging.getLogger('crawlutils') + +PeerInterface = namedtuple('PeerInterface', ['peer_ifindex', 'ip_addresses']) +Interface = namedtuple('Interface', ['ifname', 'ip_addresses']) +NetflowFeature = namedtuple('NetflowFeature', ['data']) +fprobe_out_dir = '/home/user1/fprobe-out' + + +class FprobeContainerCrawler(IContainerCrawler): + # Class for acquiring netflow data via a 'flow probe' (softflowd) + + BIND_ADDRESS = '127.0.0.1' + STALE_FILE_TIMEOUT = 3600 + + # Interface where netflow probes were started on. + # This is a map with interface names and softflowd process IDs + fprobes_started = {} + + # Since we don't get notified when a container dies + # we need to periodically check the interfaces on the host + # against those in fprobes_started. + next_cleanup = 0 + + def get_feature(self): + return 'fprobe' + + @staticmethod + def is_my_fprobe(proc): + """ + Check whether the given process is an softflowd that was started by + this plugin. We only recognize softflowd with target address for + the collector being 127.0.0.1.We determine the parameter passed + after '-i', which is the name of the interface. + + Return the interface on which it is running on, None otherwise + """ + if proc.name() == 'softflowd': + params = proc.cmdline() + targetaddress = params[-1].split(':')[0] + if targetaddress == FprobeContainerCrawler.BIND_ADDRESS: + try: + i = params.index('-i') + logger.info('softflowd running on iface %s (pid=%s)' % + (params[i+1], proc.pid)) + return params[i+1] + except: + pass + return None + + @staticmethod + def is_my_fprobe_by_pid(pid): + """ + Given a pid, check whether 'my' flow probe is running there. Return + the name of the interface for which the flow probe is running, + None otherwise. + """ + try: + proc = psutil.Process(pid=pid) + return FprobeContainerCrawler.is_my_fprobe(proc) + except: + return None + + @staticmethod + def interfaces_with_fprobes(): + """ + Get a set of interfaces for which flow probe is already running + We walk the list of processes and check the 'softflowd' ones + and record those that could have been started by this plugin. + """ + res = {} + + for proc in psutil.process_iter(): + ifname = FprobeContainerCrawler.is_my_fprobe(proc) + if ifname: + res[ifname] = proc.pid + + return res + + def setup_outputdir(self, output_dir, uid, gid): + """ + If necessary create or change ownership of the output directory. + """ + if not os.path.exists(output_dir): + try: + os.makedirs(output_dir) + except Exception as ex: + logger.error('Could not created dir %s : %s' % + (output_dir, str(ex))) + return False + + try: + os.chown(output_dir, uid, gid) + except Exception as ex: + logger.error('Could not change ownership of %s: %s' % + (output_dir, str(ex))) + return False + + return True + + def start_fprobe(self, ifname, user, bindaddr, port, **kwargs): + """ + Start the flow probe process on the given interface; + use the bindaddr and port as the collector. + This function returns the process ID of the started process + and an errcode (errno) in case an error was encountered in + the start_child function. + """ + maxlife_timeout = get_uint_arg('maxlife_timeout', 30, **kwargs) + netflow_version = get_uint_arg('netflow_version', 5, **kwargs) + if netflow_version not in [1, 5, 9, 10]: + logger.info('Unsupported netflow version was chosen: %d' % + netflow_version) + netflow_version = 5 + + terminate_process = kwargs.get('terminate_fprobe', 'FALSE').upper() + #setsid = terminate_process in ['0', 'FALSE'] + setsid = False + fprobe_bpf = kwargs.get('fprobe_bpf', '') + + params = ['softflowd', + '-i', ifname, + '-v', '%d' % netflow_version, + '-d', + '-t', 'maxlife=%d' % maxlife_timeout, + '-n', '%s:%d' % (bindaddr, port), + '-c', '/home/user1/softflowd.ctl'] + if len(fprobe_bpf.strip()): + params.insert(1, fprobe_bpf) + if netflow_version == 10: + params.insert(1, '-b') + try: + pid, errcode = start_child(params, [], [0, 1, 2], + [signal.SIGCHLD], + setsid=setsid, + max_close_fd=128) + logger.info('Started softflowd as pid %d' % pid) + except: + pid = -1 + errcode = errno.EINVAL + + return pid, errcode + + def start_collector(self, user, socket, output_dir, watch_pid, metadata, + **kwargs): + """ + Start the collector process; have it drop privileges by + switching to the given user; have it write the data to the + output_dir and use a filename pattern given by + filenamepattern; have it watch the process with the given + watch_pid + """ + #pdb.set_trace() + filepattern = kwargs.get('output_filepattern', + 'fprobe-{ifname}-{timestamp}') + + # '--user', user, + params = ['socket-datacollector', + '--sockfd', str(socket.fileno()), + '--dir', output_dir, + '--filepattern', filepattern, + '--watch-pid', str(watch_pid), + '--metadata', json.dumps(metadata), + '--md-filter', 'ip-addresses'] + #params = ['sh', '-c', + # '/usr/bin/python /usr/bin/socket-datacollector --sockfd ' + str(socket.fileno()) + ' --dir ' + output_dir + ' --filepattern ' + filepattern + ' --watch-pid ' + str(watch_pid) + ' --metadata ' + json.dumps(metadata) + ' --md-filter ip-addresses &'] + try: + pid, errcode = start_child(params, [socket.fileno()], [], + [signal.SIGCHLD], + setsid=True, + max_close_fd=128) + logger.info('Started collector as pid %d' % pid) + except: + pid = -1 + errcode = errno.EINVAL + + return pid, errcode + + def start_netflow_collection(self, ifname, ip_addresses, container_id, + **kwargs): + """ + Start the collector and the softflowd. Return None in case of an + error, the process ID of softflowd otherwise + + Note: Fprobe will terminate when the container ends. The collector + watches the softflowd via its PID and will terminate once + softflowd is gone. To enable this, we have to start the + collector after softflowd. Since this is relatively quick, + we won't miss any netflow packets in the collector. + """ + #pdb.set_trace() + fprobe_user = kwargs.get('fprobe_user', 'user1') + try: + passwd = pwd.getpwnam(fprobe_user) + except Exception as ex: + logger.error('Could not find user %s on this system: %s' % + (fprobe_user, str(ex))) + return None + + fprobe_output_dir = kwargs.get('fprobe_output_dir', + fprobe_out_dir) + if not self.setup_outputdir(fprobe_output_dir, passwd.pw_uid, + passwd.pw_gid): + return None + + # Find an open port; we pass the port number for the flow probe and the + # file descriptor of the listening socket to the collector + bindaddr = FprobeContainerCrawler.BIND_ADDRESS + sock, port = open_udp_port(bindaddr, 40000, 65535) + if not sock: + return None + + #pdb.set_trace() + fprobe_pid, errcode = self.start_fprobe(ifname, fprobe_user, + bindaddr, port, + **kwargs) + + if fprobe_pid < 0: + logger.error('Could not start softflowd: %s' % + os.strerror(errcode)) + sock.close() + return None + + metadata = { + 'ifname': ifname, + 'ip-addresses': ip_addresses, + } + + #pdb.set_trace() + collector_pid, errcode = self.start_collector(fprobe_user, sock, + fprobe_output_dir, + fprobe_pid, + metadata, + **kwargs) + + sock.close() + + if collector_pid == -1: + logger.error('Could not start collector: %s' % + os.strerror(errcode)) + os.kill(fprobe_pid, signal.SIGKILL) + return None + + return fprobe_pid + + def cleanup(self, **kwargs): + """ + Check the available interfaces on the host versus those ones we + have flow probes running and remove those where the interface has + disappeared. We clean up the files with netflow data that were + written for those interfaces. + """ + devices = netifaces.interfaces() + + for ifname in FprobeContainerCrawler.fprobes_started.keys(): + if ifname not in devices: + del FprobeContainerCrawler.fprobes_started[ifname] + self.remove_datafiles(ifname, **kwargs) + + @classmethod + def remove_old_files(cls, **kwargs): + """ + Remove all old files that the crawler would never pick up. + """ + now = time.time() + output_dir = kwargs.get('fprobe_output_dir', fprobe_out_dir) + + for filename in glob.glob('%s/*' % output_dir): + try: + statbuf = os.stat(filename) + # files older than 1 hour are removed + if statbuf.st_mtime + \ + FprobeContainerCrawler.STALE_FILE_TIMEOUT < now: + os.remove(filename) + except: + continue + + def crawl(self, container_id, avoid_setns=False, **kwargs): + """ + Start flow probe + data collector pairs on the interfaces of + the given container; collect the files that the collector + wrote and return their content. + """ + if time.time() > FprobeContainerCrawler.next_cleanup: + # we won't run the cleanup of old files the first time + # but let the crawler do one full round of picking up + # relevant files and then only we do a proper cleaning + if FprobeContainerCrawler.next_cleanup > 0: + FprobeContainerCrawler.remove_old_files(**kwargs) + + self.cleanup(**kwargs) + FprobeContainerCrawler.next_cleanup = time.time() + 30 + + ifnames = self.start_container_fprobes(container_id, avoid_setns, + **kwargs) + + return self.collect_files(container_id, ifnames, **kwargs) + + def create_filenamepattern(self, **kwargs): + """ + Create the filename pattern for the files where the + socket-datacollector writes its data into. + """ + output_dir = kwargs.get('fprobe_output_dir', fprobe_out_dir) + filepattern = kwargs.get('output_filepattern', + 'fprobe-{ifname}-{timestamp}') + filenamepattern = os.path.join(output_dir, filepattern) + + return filenamepattern.format(**kwargs) + + def remove_datafiles(self, ifname, **kwargs): + """ + Remove netflow data files that belong to an interface + """ + kwargs.update({ + 'container-id': '*', + 'ifname': ifname, + 'pid': '*', + 'timestamp': '*', + }) + filenamepattern = self.create_filenamepattern(**kwargs) + + for filename in glob.glob(filenamepattern): + try: + os.remove(filename) + except: + pass + + def collect_files(self, container_id, ifnames, **kwargs): + """ + Collect the files with netflow data for the given interface + and container_id; + remove the files after reading their content + """ + for ifname in ifnames: + kwargs.update({ + 'container-id': container_id, + 'ifname': ifname, + 'pid': '*', + 'timestamp': '*', + }) + filenamepattern = self.create_filenamepattern(**kwargs) + + globs = glob.glob(filenamepattern) + for filename in globs: + # skip over files currently being written + if filename.endswith(".tmp"): + continue + try: + with open(filename, 'r') as f: + raw = f.read() + data = json.loads(raw) + except Exception as ex: + logger.info('Error reading datafile: %s' % str(ex)) + continue + + try: + os.remove(filename) + except Exception as ex: + logger.info('Error removing datafile: %s' % str(ex)) + continue + + feature_key = '{0}-{1}'.format('fprobe', ifname) + + yield (feature_key, NetflowFeature( + data + ), 'fprobe') + + def need_start_fprobe(self, ifname): + """ + Check whether we need to start a flow probe on this interface + We need to start it + - if no softflowd process is running on it. + - if the process id now represents a different process + (pid reused) + """ + pid = FprobeContainerCrawler.fprobes_started.get(ifname) + if not pid: + return True + if ifname != FprobeContainerCrawler.is_my_fprobe_by_pid(pid): + # something different runs under this pid... + del FprobeContainerCrawler.fprobes_started[ifname] + return True + return False + + def start_container_fprobes(self, container_id, avoid_setns=False, + **kwargs): + """ + Unless flow probes are already running on the interfaces of the + given container, we start them. + """ + if avoid_setns: + raise NotImplementedError('avoidsetns mode not implemented') + + ifnames = [] + + try: + ifaces = self._crawl_in_system() + for iface in ifaces: + # in rare cases we get an interface without IP address + # assigned ot it, yet; we skip it for now and try again + # on the next crawl + if len(iface.ip_addresses) == 0: + continue + + ifname = iface.ifname + ifnames.append(ifname) + + if self.need_start_fprobe(ifname): + logger.info('Need to start softflowd on %s' % ifname) + pid = self.start_netflow_collection(ifname, + iface.ip_addresses, + container_id, + **kwargs) + if pid: + FprobeContainerCrawler.fprobes_started[ifname] = pid + except Exception as ex: + logger.info("Error: %s" % str(ex)) + + return ifnames + + def get_ifaddresses(self, ifname): + """ + Get the list of IPv4 addresses on an interface name; in + case none could be found yet, wait a bit and try again + """ + + for ctr in range(0, 4): + res = [] + + for data in netifaces.ifaddresses(ifname).get(2, []): + addr = data.get('addr') + if addr: + res.append(addr) + if len(res): + break + time.sleep(0.01) + + return res + + def _crawl_in_system(self): + for ifname in netifaces.interfaces(): + if ifname == 'lo': + continue + + #try: + # peer_ifindex = ethtool_get_peer_ifindex(ifname) + #except Exception: + # peer_ifindex = -1 + + #if peer_ifindex >= 0: + # yield PeerInterface(peer_ifindex, + # self.get_ifaddresses(ifname)) + yield Interface(ifname,self.get_ifaddresses(ifname)) + +FprobeContainerCrawler.fprobes_started = \ + FprobeContainerCrawler.interfaces_with_fprobes() diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/interface_container_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/interface_container_crawler.plugin new file mode 100644 index 00000000..85396963 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/interface_container_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = interface_container +Module = interface_container_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = "Interface crawling function for containers" diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/interface_container_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/interface_container_crawler.py new file mode 100644 index 00000000..6b79b160 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/interface_container_crawler.py @@ -0,0 +1,81 @@ +import logging +import time + +import psutil + +from icrawl_plugin import IContainerCrawler +from utils.features import InterfaceFeature + +logger = logging.getLogger('crawlutils') + + +class InterfaceContainerCrawler(IContainerCrawler): + + """ + To calculate rates like packets sent per second, we need to + store the last measurement. We store it in this dictionary. + """ + + def __init__(self): + self._cached_values = {} + + def _cache_put_value(self, key, value): + self._cached_values[key] = (value, time.time()) + + def _cache_get_value(self, key): + if key in self._cached_values: + return self._cached_values[key] + else: + return None, None + + def _crawl_interface_counters(self): + _counters = psutil.net_io_counters(pernic=True) + for ifname in _counters: + interface = _counters[ifname] + curr_count = [ + interface.bytes_sent, + interface.bytes_recv, + interface.packets_sent, + interface.packets_recv, + interface.errout, + interface.errin, + ] + yield (ifname, curr_count) + + def get_feature(self): + return 'interface' + + def crawl(self, container_id, avoid_setns=False, **kwargs): + + logger.debug( + 'Crawling %s for container %s' % + (self.get_feature(), container_id)) + + if avoid_setns: + raise NotImplementedError('avoidsetns mode not implemented') + else: + interfaces = self._crawl_interface_counters() + + for (ifname, curr_count) in interfaces: + feature_key = '{0}-{1}'.format('interface', ifname) + + cache_key = '{0}-{1}-{2}'.format(container_id, + container_id, + feature_key) + + (prev_count, prev_time) = self._cache_get_value(cache_key) + self._cache_put_value(cache_key, curr_count) + + if prev_count and prev_time: + d = time.time() - prev_time + diff = [(a - b) / d for (a, b) in zip(curr_count, + prev_count)] + else: + + # first measurement + + diff = [0] * 6 + + feature_attributes = InterfaceFeature._make(diff) + + yield (feature_key, feature_attributes, 'interface') diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/interface_host_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/interface_host_crawler.plugin new file mode 100644 index 00000000..2b30607f --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/interface_host_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = interface_host +Module = interface_host_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = "Interface crawling function for hosts" diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/interface_host_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/interface_host_crawler.py new file mode 100644 index 00000000..01cb47d6 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/interface_host_crawler.py @@ -0,0 +1,73 @@ +import logging +import time + +import psutil + +from icrawl_plugin import IHostCrawler +from utils.features import InterfaceFeature + +logger = logging.getLogger('crawlutils') + + +class InterfaceHostCrawler(IHostCrawler): + + """ + To calculate rates like packets sent per second, we need to + store the last measurement. We store it in this dictionary. + """ + + def __init__(self): + self._cached_values = {} + + def _cache_put_value(self, key, value): + self._cached_values[key] = (value, time.time()) + + def _cache_get_value(self, key): + if key in self._cached_values: + return self._cached_values[key] + else: + return None, None + + def _crawl_interface_counters(self): + _counters = psutil.net_io_counters(pernic=True) + for ifname in _counters: + interface = _counters[ifname] + curr_count = [ + interface.bytes_sent, + interface.bytes_recv, + interface.packets_sent, + interface.packets_recv, + interface.errout, + interface.errin, + ] + yield (ifname, curr_count) + + def get_feature(self): + return 'interface' + + def crawl(self, **kwargs): + + logger.debug('Crawling %s' % self.get_feature()) + + interfaces = self._crawl_interface_counters() + + for (ifname, curr_count) in interfaces: + feature_key = '{0}-{1}'.format('interface', ifname) + cache_key = '{0}-{1}'.format('INVM', feature_key) + + (prev_count, prev_time) = self._cache_get_value(cache_key) + self._cache_put_value(cache_key, curr_count) + + if prev_count and prev_time: + d = time.time() - prev_time + diff = [(a - b) / d for (a, b) in zip(curr_count, + prev_count)] + else: + + # first measurement + + diff = [0] * 6 + + feature_attributes = InterfaceFeature._make(diff) + + yield (feature_key, feature_attributes, 'interface') diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/interface_vm_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/interface_vm_crawler.plugin new file mode 100644 index 00000000..4c685f98 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/interface_vm_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = interface_vm +Module = interface_vm_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = "Interface crawling function for VMs" diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/interface_vm_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/interface_vm_crawler.py new file mode 100644 index 00000000..69186de8 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/interface_vm_crawler.py @@ -0,0 +1,83 @@ +import logging +import time + +from icrawl_plugin import IVMCrawler +from utils.features import InterfaceFeature + +try: + import psvmi +except ImportError: + psvmi = None + + +logger = logging.getLogger('crawlutils') + + +class InterfaceVmCrawler(IVMCrawler): + + """ + To calculate rates like packets sent per second, we need to + store the last measurement. We store it in this dictionary. + """ + + def __init__(self): + self._cached_values = {} + + def _cache_put_value(self, key, value): + self._cached_values[key] = (value, time.time()) + + def _cache_get_value(self, key): + if key in self._cached_values: + return self._cached_values[key] + else: + return None, None + + def _crawl_interface_counters(self, vm_context): + for interface in psvmi.interface_iter(vm_context): + curr_count = [ + interface.bytes_sent, + interface.bytes_recv, + interface.packets_sent, + interface.packets_recv, + interface.errout, + interface.errin, + ] + yield (interface.ifname, curr_count) + + def get_feature(self): + return 'interface' + + def crawl(self, vm_desc, **kwargs): + + logger.debug('Crawling %s' % self.get_feature()) + + if psvmi is None: + raise NotImplementedError() + else: + (domain_name, kernel_version, distro, arch) = vm_desc + # XXX: this has to be read from some cache instead of + # instead of once per plugin/feature + vm_context = psvmi.context_init( + domain_name, domain_name, kernel_version, distro, arch) + interfaces = self._crawl_interface_counters(vm_context) + + for (interface_name, curr_count) in interfaces: + feature_key = '{0}-{1}'.format('interface', interface_name) + cache_key = '{0}-{1}'.format('OUTVM', feature_key) + + (prev_count, prev_time) = self._cache_get_value(cache_key) + self._cache_put_value(cache_key, curr_count) + + if prev_count and prev_time: + d = time.time() - prev_time + diff = [(a - b) / d for (a, b) in zip(curr_count, + prev_count)] + else: + + # first measurement + + diff = [0] * 6 + + feature_attributes = InterfaceFeature._make(diff) + + yield (feature_key, feature_attributes, 'interface') diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/load_container_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/load_container_crawler.plugin new file mode 100644 index 00000000..eafdc984 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/load_container_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = load_container +Module = load_container_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = "Load crawling function for containers" diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/load_container_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/load_container_crawler.py new file mode 100644 index 00000000..f641a35a --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/load_container_crawler.py @@ -0,0 +1,29 @@ +import logging +import os + +from icrawl_plugin import IContainerCrawler +from utils.features import LoadFeature + +logger = logging.getLogger('crawlutils') + + +class LoadContainerCrawler(IContainerCrawler): + + def get_feature(self): + return 'load' + + def crawl_load(self): + load = os.getloadavg() + feature_key = 'load' + feature_attributes = LoadFeature(load[0], load[1], load[1]) + yield (feature_key, feature_attributes, 'load') + + def crawl(self, container_id, avoid_setns=False, **kwargs): + logger.debug( + 'Crawling %s for container %s' % + (self.get_feature(), container_id)) + + if avoid_setns: + raise NotImplementedError() + else: # in all other cases, including wrong mode set + return self.crawl_load() diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/load_host_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/load_host_crawler.plugin new file mode 100644 index 00000000..05571103 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/load_host_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = load_host +Module = load_host_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = "Load crawling function for hosts" diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/load_host_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/load_host_crawler.py new file mode 100644 index 00000000..24fcd531 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/load_host_crawler.py @@ -0,0 +1,24 @@ +import logging +import os + +from icrawl_plugin import IHostCrawler +from utils.features import LoadFeature + +logger = logging.getLogger('crawlutils') + + +class LoadHostCrawler(IHostCrawler): + + def get_feature(self): + return 'load' + + def crawl_load(self): + load = os.getloadavg() + feature_key = 'load' + feature_attributes = LoadFeature(load[0], load[1], load[1]) + yield (feature_key, feature_attributes, 'load') + + def crawl(self, **kwargs): + logger.debug('Crawling %s' % (self.get_feature())) + + return self.crawl_load() diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/load_vm_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/load_vm_crawler.plugin new file mode 100644 index 00000000..38c932eb --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/load_vm_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = load_vm +Module = load_vm_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = "Load crawling function for VMs" diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/load_vm_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/load_vm_crawler.py new file mode 100644 index 00000000..49b2dbbc --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/load_vm_crawler.py @@ -0,0 +1,18 @@ +from icrawl_plugin import IVMCrawler +import logging + +try: + import psvmi +except ImportError: + psvmi = None + +logger = logging.getLogger('crawlutils') + + +class load_vm_crawler(IVMCrawler): + + def get_feature(self): + return 'load' + + def crawl(self, vm_desc, **kwargs): + raise NotImplementedError() diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/memory_container_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/memory_container_crawler.plugin new file mode 100644 index 00000000..9bd753fe --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/memory_container_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = memory_container +Module = memory_container_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = "Memory crawling function for containers" diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/memory_container_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/memory_container_crawler.py new file mode 100644 index 00000000..2257feda --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/memory_container_crawler.py @@ -0,0 +1,72 @@ +import logging +import os +import psutil + +from icrawl_plugin import IContainerCrawler +from utils.features import MemoryFeature + +logger = logging.getLogger('crawlutils') + + +class MemoryContainerCrawler(IContainerCrawler): + + def get_feature(self): + return 'memory' + + def _get_cgroup_dir(self, devlist=[]): + for dev in devlist: + paths = [os.path.join('/cgroup/', dev), + os.path.join('/sys/fs/cgroup/', dev)] + for path in paths: + if os.path.ismount(path): + return path + + # Try getting the mount point from /proc/mounts + for l in open('/proc/mounts', 'r'): + _type, mnt, _, _, _, _ = l.split(' ') + if _type == 'cgroup' and mnt.endswith('cgroup/' + dev): + return mnt + + raise ValueError('Can not find the cgroup dir') + + def get_memory_cgroup_path(self, node='memory.stat'): + return os.path.join(self._get_cgroup_dir(['memory']), node) + + def crawl(self, container_id, avoid_setns=False, **kwargs): + real_root = os.open('/', os.O_RDONLY) + os.chroot('/sysfs_local') + os.chdir('/') + + used = buffered = cached = free = 'unknown' + with open(self.get_memory_cgroup_path('memory.stat' + ), 'r') as f: + for line in f: + (key, value) = line.strip().split(' ') + if key == 'total_cache': + cached = int(value) + if key == 'total_active_file': + buffered = int(value) + + with open(self.get_memory_cgroup_path( + 'memory.limit_in_bytes'), 'r') as f: + limit = int(f.readline().strip()) + + with open(self.get_memory_cgroup_path( + 'memory.usage_in_bytes'), 'r') as f: + used = int(f.readline().strip()) + + os.fchdir(real_root) + os.chroot('.') + + host_free = psutil.virtual_memory().free + container_total = used + min(host_free, limit - used) + free = container_total - used + + if 'unknown' not in [used, free] and (free + used) > 0: + util_percentage = float(used) / (free + used) * 100.0 + else: + util_percentage = 'unknown' + + return [('memory', MemoryFeature(used, buffered, + cached, free, util_percentage), + 'memory')] diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/memory_host_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/memory_host_crawler.plugin new file mode 100644 index 00000000..5cd3d585 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/memory_host_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = memory_host +Module = memory_host_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = "Memory crawling function for hosts" diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/memory_host_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/memory_host_crawler.py new file mode 100644 index 00000000..4454c7ee --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/memory_host_crawler.py @@ -0,0 +1,29 @@ +import logging + +import psutil + +from icrawl_plugin import IHostCrawler +from utils.features import MemoryFeature + +logger = logging.getLogger('crawlutils') + + +class MemoryHostCrawler(IHostCrawler): + + def get_feature(self): + return 'memory' + + def crawl(self, **kwargs): + logger.debug('Crawling %s' % (self.get_feature())) + + vm = psutil.virtual_memory() + + if (vm.free + vm.used) > 0: + util_percentage = float(vm.used) / (vm.free + vm.used) * 100.0 + else: + util_percentage = 'unknown' + + feature_attributes = MemoryFeature(vm.used, vm.buffers, vm.cached, + vm.free, util_percentage) + + return [('memory', feature_attributes, 'memory')] diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/memory_vm_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/memory_vm_crawler.plugin new file mode 100644 index 00000000..8f851230 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/memory_vm_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = memory_vm +Module = memory_vm_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = "Memory crawling function for VMs" diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/memory_vm_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/memory_vm_crawler.py new file mode 100644 index 00000000..b1eb3a56 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/memory_vm_crawler.py @@ -0,0 +1,37 @@ +import logging + +from icrawl_plugin import IVMCrawler +from utils.features import MemoryFeature + +try: + import psvmi +except ImportError: + psvmi = None + +logger = logging.getLogger('crawlutils') + + +class MemoryVmCrawler(IVMCrawler): + + def get_feature(self): + return 'memory' + + def crawl(self, vm_desc, **kwargs): + if psvmi is None: + raise NotImplementedError() + else: + (domain_name, kernel_version, distro, arch) = vm_desc + # XXX: this has to be read from some cache instead of + # instead of once per plugin/feature + vm_context = psvmi.context_init( + domain_name, domain_name, kernel_version, distro, arch) + + sysmem = psvmi.system_memory_info(vm_context) + feature_attributes = MemoryFeature( + sysmem.memory_used, + sysmem.memory_buffered, + sysmem.memory_cached, + sysmem.memory_free, + (sysmem.memory_used * 100 / (sysmem.memory_used + + sysmem.memory_free))) + return [('memory', feature_attributes, 'memory')] diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/metric_container_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/metric_container_crawler.plugin new file mode 100644 index 00000000..7a16da31 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/metric_container_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = metric_container +Module = metric_container_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = "Metric crawling function for containers" diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/metric_container_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/metric_container_crawler.py new file mode 100644 index 00000000..8a949202 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/metric_container_crawler.py @@ -0,0 +1,22 @@ +import logging + +from icrawl_plugin import IContainerCrawler +from utils.metric_utils import crawl_metrics + +logger = logging.getLogger('crawlutils') + + +class MetricContainerCrawler(IContainerCrawler): + + def get_feature(self): + return 'metric' + + def crawl(self, container_id, avoid_setns=False, **kwargs): + logger.debug( + 'Crawling %s for container %s' % + (self.get_feature(), container_id)) + + if avoid_setns: + raise NotImplementedError('avoidsetns mode not implemented') + else: # in all other cases, including wrong mode set + return list(crawl_metrics()) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/metric_host_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/metric_host_crawler.plugin new file mode 100644 index 00000000..2bf62970 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/metric_host_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = metric_host +Module = metric_host_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = "Metric crawling function for hosts" diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/metric_host_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/metric_host_crawler.py new file mode 100644 index 00000000..336a3628 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/metric_host_crawler.py @@ -0,0 +1,17 @@ +import logging + +from icrawl_plugin import IHostCrawler +from utils.metric_utils import crawl_metrics + +logger = logging.getLogger('crawlutils') + + +class MetricHostCrawler(IHostCrawler): + + def get_feature(self): + return 'metric' + + def crawl(self, **kwargs): + logger.debug('Crawling %s' % (self.get_feature())) + + return crawl_metrics() diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/metric_vm_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/metric_vm_crawler.plugin new file mode 100644 index 00000000..39e39f18 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/metric_vm_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = metric_vm +Module = metric_vm_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = "Metric crawling function for VMs" diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/metric_vm_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/metric_vm_crawler.py new file mode 100644 index 00000000..b1d97713 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/metric_vm_crawler.py @@ -0,0 +1,132 @@ +import logging +import time + +import psutil + +from icrawl_plugin import IVMCrawler +from utils.features import MetricFeature + +try: + import psvmi +except ImportError: + psvmi = None + +logger = logging.getLogger('crawlutils') + + +class MetricVmCrawler(IVMCrawler): + + """ + To calculate rates like packets sent per second, we need to + store the last measurement. We store it in this dictionary. + """ + + def __init__(self): + self._cached_values = {} + + def _cache_put_value(self, key, value): + self._cached_values[key] = (value, time.time()) + + def _cache_get_value(self, key): + if key in self._cached_values: + return self._cached_values[key] + else: + return None, None + + def _crawl_metrics_cpu_percent(self, process): + p = process + cpu_percent = 0 + + feature_key = '{0}-{1}'.format('process', p.ident()) + cache_key = '{0}-{1}'.format('OUTVM', feature_key) + + curr_proc_cpu_time, curr_sys_cpu_time = p.get_cpu_times() + + (cputimeList, timestamp) = self._cache_get_value(cache_key) + self._cache_put_value( + cache_key, [curr_proc_cpu_time, curr_sys_cpu_time]) + + if cputimeList is not None: + prev_proc_cpu_time = cputimeList[0] + prev_sys_cpu_time = cputimeList[1] + + if prev_proc_cpu_time and prev_sys_cpu_time: + if curr_proc_cpu_time == -1 or prev_proc_cpu_time == -1: + cpu_percent = -1 # unsupported for this VM + else: + if curr_sys_cpu_time == prev_sys_cpu_time: + cpu_percent = 0 + else: + cpu_percent = (float(curr_proc_cpu_time - + prev_proc_cpu_time) * 100 / + float(curr_sys_cpu_time - + prev_sys_cpu_time)) + + return cpu_percent + + def crawl(self, vm_desc, **kwargs): + + created_since = -1 + logger.debug('Crawling Metrics') + + if psvmi is None: + raise NotImplementedError() + else: + (domain_name, kernel_version, distro, arch) = vm_desc + # XXX: this has to be read from some cache instead of + # instead of once per plugin/feature + vm_context = psvmi.context_init( + domain_name, domain_name, kernel_version, distro, arch) + list = psvmi.process_iter(vm_context) + + for p in list: + create_time = ( + p.create_time() if hasattr( + p.create_time, + '__call__') else p.create_time) + if create_time <= created_since: + continue + + name = (p.name() if hasattr(p.name, '__call__' + ) else p.name) + pid = (p.pid() if hasattr(p.pid, '__call__') else p.pid) + status = (p.status() if hasattr(p.status, '__call__' + ) else p.status) + if status == psutil.STATUS_ZOMBIE: + continue + username = ( + p.username() if hasattr( + p.username, + '__call__') else p.username) + meminfo = ( + p.get_memory_info() if hasattr( + p.get_memory_info, + '__call__') else p.memory_info) + ioinfo = ( + p.get_io_counters() if hasattr( + p.get_io_counters, + '__call__') else p.io_counters) + + cpu_percent = self._crawl_metrics_cpu_percent(p) + + memory_percent = ( + p.get_memory_percent() if hasattr( + p.get_memory_percent, + '__call__') else p.memory_percent) + + feature_key = '{0}/{1}'.format(name, pid) + yield (feature_key, MetricFeature( + round(cpu_percent, 2), + round(memory_percent, 2), + name, + pid, + ioinfo.read_bytes, + meminfo.rss, + str(status), + username, + meminfo.vms, + ioinfo.write_bytes, + ), 'metric') + + def get_feature(self): + return 'metric' diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/os_container_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/os_container_crawler.plugin new file mode 100644 index 00000000..a99ea6fc --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/os_container_crawler.plugin @@ -0,0 +1,10 @@ +[Core] +Name = os_container +Module = os_container_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = "OS crawling function for containers. Takes following optional arguments: + crawl_mode = {MOUNPOINT, OUTCONTAINER}. Former uses docker rootfs, latter setns()" + diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/os_container_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/os_container_crawler.py new file mode 100644 index 00000000..cb2d9437 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/os_container_crawler.py @@ -0,0 +1,26 @@ +import logging +import os +from icrawl_plugin import IContainerCrawler +from utils.os_utils import crawl_os, crawl_os_mountpoint + +logger = logging.getLogger('crawlutils') + + +class OSContainerCrawler(IContainerCrawler): + + def get_feature(self): + return 'os' + + def crawl(self, container_id, avoid_setns=False, **kwargs): + logger.debug('Crawling OS for container %s' % container_id) + + if avoid_setns: + return crawl_os_mountpoint('/rootfs_local') + else: # in all other cases, including wrong mode set + real_root = os.open('/', os.O_RDONLY) + os.chroot('/rootfs_local') + os.chdir('/') + os_info = crawl_os() + os.fchdir(real_root) + os.chroot('.') + return os_info diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/os_host_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/os_host_crawler.plugin new file mode 100644 index 00000000..2c2b7125 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/os_host_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = os_host +Module = os_host_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = OS crawling function for the local host diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/os_host_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/os_host_crawler.py new file mode 100644 index 00000000..552d0b8f --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/os_host_crawler.py @@ -0,0 +1,14 @@ +from icrawl_plugin import IHostCrawler +from utils.os_utils import crawl_os, crawl_os_mountpoint + + +class OSHostCrawler(IHostCrawler): + + def get_feature(self): + return 'os' + + def crawl(self, root_dir='/', **kwargs): + if root_dir == '/': + return crawl_os() + else: + return crawl_os_mountpoint(root_dir) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/os_vm_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/os_vm_crawler.plugin new file mode 100644 index 00000000..92db3d40 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/os_vm_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = os_vm +Module = os_vm_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = OS crawling function for VMs diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/os_vm_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/os_vm_crawler.py new file mode 100644 index 00000000..bda8a380 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/os_vm_crawler.py @@ -0,0 +1,41 @@ +import logging + +from icrawl_plugin import IVMCrawler +from utils.features import OSFeature + +# External dependencies that must be pip install'ed separately + +try: + import psvmi +except ImportError: + psvmi = None + +logger = logging.getLogger('crawlutils') + + +class os_vm_crawler(IVMCrawler): + + def get_feature(self): + return 'os' + + def crawl(self, vm_desc, **kwargs): + if psvmi is None: + raise NotImplementedError() + else: + (domain_name, kernel_version, distro, arch) = vm_desc + # XXX: not good, context_init was being done once per VM + # in previous monolithic model, now it's once per plugin/feature + vm_context = psvmi.context_init( + domain_name, domain_name, kernel_version, distro, arch) + sys = psvmi.system_info(vm_context) + feature_attributes = OSFeature( + sys.boottime, + 'unknown', + sys.ipaddr, + sys.ostype, + sys.osversion, + sys.osrelease, + sys.osplatform + ) + feature_key = sys.ostype + return [(feature_key, feature_attributes, 'os')] diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/package_container_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/package_container_crawler.plugin new file mode 100644 index 00000000..0335c437 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/package_container_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = package_container +Module = package_container_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = Package crawling function for containers diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/package_container_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/package_container_crawler.py new file mode 100644 index 00000000..d86a9785 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/package_container_crawler.py @@ -0,0 +1,46 @@ +import logging +import os + +from icrawl_plugin import IContainerCrawler +from utils.crawler_exceptions import CrawlError +from utils.misc import join_abs_paths +from utils.package_utils import crawl_packages + +logger = logging.getLogger('crawlutils') + + +class PackageContainerCrawler(IContainerCrawler): + + def get_feature(self): + return 'package' + + def crawl(self, container_id=None, avoid_setns=False, + root_dir='/', **kwargs): + logger.debug('Crawling packages for container %s' % container_id) + + if avoid_setns: + rootfs_dir = '/rootfs_local' + return crawl_packages( + root_dir=join_abs_paths(rootfs_dir, root_dir), + reload_needed=True) + else: # in all other cases, including wrong mode set + try: + print "in package plugin" + real_root = os.open('/', os.O_RDONLY) + os.chroot('/rootfs_local') + os.chdir('/') + pkg_list = list(crawl_packages(None, root_dir, 0, False)) + os.fchdir(real_root) + os.chroot('.') + return pkg_list + except CrawlError: + + # Retry the crawl avoiding the setns() syscall. This is + # needed for PPC where we can not jump into the container and + # run its apt or rpm commands. + + print "Got CrawlError in package plugin" + rootfs_dir = '/rootfs_local' + return crawl_packages( + root_dir=join_abs_paths(rootfs_dir, root_dir), + reload_needed=True) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/package_host_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/package_host_crawler.plugin new file mode 100644 index 00000000..b0ad3d74 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/package_host_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = package_host +Module = package_host_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = Package crawling function for hosts diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/package_host_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/package_host_crawler.py new file mode 100644 index 00000000..4460c83b --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/package_host_crawler.py @@ -0,0 +1,15 @@ +import logging + +from icrawl_plugin import IHostCrawler +from utils.package_utils import crawl_packages + +logger = logging.getLogger('crawlutils') + + +class PackageHostCrawler(IHostCrawler): + + def get_feature(self): + return 'package' + + def crawl(self, root_dir='/', **kwargs): + return crawl_packages(root_dir=root_dir, reload_needed=False) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/process_container_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/process_container_crawler.plugin new file mode 100644 index 00000000..5e3761de --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/process_container_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = process_container +Module = process_container_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = Process crawling function for containers diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/process_container_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/process_container_crawler.py new file mode 100644 index 00000000..7a901b9d --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/process_container_crawler.py @@ -0,0 +1,97 @@ +import logging + +import psutil + +from icrawl_plugin import IContainerCrawler +from utils.features import ProcessFeature + +logger = logging.getLogger('crawlutils') + + +class ProcessContainerCrawler(IContainerCrawler): + + def get_feature(self): + return 'process' + + def crawl(self, container_id, avoid_setns=False, **kwargs): + if avoid_setns: + raise NotImplementedError() + return self._crawl_in_system() + + def _crawl_in_system(self): + created_since = -1 + for p in psutil.process_iter(): + create_time = ( + p.create_time() if hasattr( + p.create_time, + '__call__') else p.create_time) + if create_time <= created_since: + continue + yield self._crawl_single_process(p) + + def _crawl_single_process(self, p): + """Returns a ProcessFeature""" + create_time = ( + p.create_time() if hasattr( + p.create_time, + '__call__') else p.create_time) + + name = (p.name() if hasattr(p.name, '__call__' + ) else p.name) + cmdline = (p.cmdline() if hasattr(p.cmdline, '__call__' + ) else p.cmdline) + pid = (p.pid() if hasattr(p.pid, '__call__') else p.pid) + status = (p.status() if hasattr(p.status, '__call__' + ) else p.status) + if status == psutil.STATUS_ZOMBIE: + cwd = 'unknown' # invalid + else: + try: + cwd = (p.cwd() if hasattr(p, 'cwd') and + hasattr(p.cwd, '__call__') else p.getcwd()) + except Exception: + logger.error('Error crawling process %s for cwd' + % pid, exc_info=True) + cwd = 'unknown' + ppid = (p.ppid() if hasattr(p.ppid, '__call__' + ) else p.ppid) + try: + if (hasattr(p, 'num_threads') and + hasattr(p.num_threads, '__call__')): + num_threads = p.num_threads() + else: + num_threads = p.get_num_threads() + except: + num_threads = 'unknown' + + try: + username = (p.username() if hasattr(p, 'username') and + hasattr(p.username, '__call__') else + p.username) + except: + username = 'unknown' + + if username == 'nobody': + return + + openfiles = [] + try: + for f in p.get_open_files(): + openfiles.append(f.path) + openfiles.sort() + except psutil.AccessDenied: + print "got psutil.AccessDenied" + openfiles = [] + + feature_key = '{0}/{1}'.format(name, pid) + return (feature_key, ProcessFeature( + str(' '.join(cmdline)), + create_time, + cwd, + name, + openfiles, + pid, + ppid, + num_threads, + username, + ), 'process') diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/process_container_crawler.py.org b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/process_container_crawler.py.org new file mode 100644 index 00000000..28332d29 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/process_container_crawler.py.org @@ -0,0 +1,99 @@ +import logging + +import psutil + +import utils.dockerutils +from icrawl_plugin import IContainerCrawler +from utils.features import ProcessFeature +from utils.namespace import run_as_another_namespace, ALL_NAMESPACES + +logger = logging.getLogger('crawlutils') + + +class ProcessContainerCrawler(IContainerCrawler): + + def get_feature(self): + return 'process' + + def crawl(self, container_id, avoid_setns=False, **kwargs): + inspect = utils.dockerutils.exec_dockerinspect(container_id) + state = inspect['State'] + pid = str(state['Pid']) + logger.debug('Crawling Processes for container %s' % container_id) + + if avoid_setns: + raise NotImplementedError() + + return run_as_another_namespace(pid, + ALL_NAMESPACES, + self._crawl_in_system) + + def _crawl_in_system(self): + created_since = -1 + for p in psutil.process_iter(): + create_time = ( + p.create_time() if hasattr( + p.create_time, + '__call__') else p.create_time) + if create_time <= created_since: + continue + yield self._crawl_single_process(p) + + def _crawl_single_process(self, p): + """Returns a ProcessFeature""" + create_time = ( + p.create_time() if hasattr( + p.create_time, + '__call__') else p.create_time) + + name = (p.name() if hasattr(p.name, '__call__' + ) else p.name) + cmdline = (p.cmdline() if hasattr(p.cmdline, '__call__' + ) else p.cmdline) + pid = (p.pid() if hasattr(p.pid, '__call__') else p.pid) + status = (p.status() if hasattr(p.status, '__call__' + ) else p.status) + if status == psutil.STATUS_ZOMBIE: + cwd = 'unknown' # invalid + else: + try: + cwd = (p.cwd() if hasattr(p, 'cwd') and + hasattr(p.cwd, '__call__') else p.getcwd()) + except Exception: + logger.error('Error crawling process %s for cwd' + % pid, exc_info=True) + cwd = 'unknown' + ppid = (p.ppid() if hasattr(p.ppid, '__call__' + ) else p.ppid) + try: + if (hasattr(p, 'num_threads') and + hasattr(p.num_threads, '__call__')): + num_threads = p.num_threads() + else: + num_threads = p.get_num_threads() + except: + num_threads = 'unknown' + + try: + username = (p.username() if hasattr(p, 'username') and + hasattr(p.username, '__call__') else + p.username) + except: + username = 'unknown' + + openfiles = [] + for f in p.get_open_files(): + openfiles.append(f.path) + openfiles.sort() + feature_key = '{0}/{1}'.format(name, pid) + return (feature_key, ProcessFeature( + str(' '.join(cmdline)), + create_time, + cwd, + name, + openfiles, + pid, + ppid, + num_threads, + username, + ), 'process') diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/process_host_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/process_host_crawler.plugin new file mode 100644 index 00000000..2c14e8ef --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/process_host_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = process_host +Module = process_host_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = Process crawling function for the local host diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/process_host_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/process_host_crawler.py new file mode 100644 index 00000000..27714b99 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/process_host_crawler.py @@ -0,0 +1,87 @@ +import logging + +import psutil + +from icrawl_plugin import IHostCrawler +from utils.features import ProcessFeature + +logger = logging.getLogger('crawlutils') + + +class ProcessHostCrawler(IHostCrawler): + + def get_feature(self): + return 'process' + + def crawl(self, **kwargs): + return self._crawl_in_system() + + def _crawl_in_system(self): + created_since = -1 + for p in psutil.process_iter(): + create_time = ( + p.create_time() if hasattr( + p.create_time, + '__call__') else p.create_time) + if create_time <= created_since: + continue + yield self._crawl_single_process(p) + + def _crawl_single_process(self, p): + """Returns a ProcessFeature""" + create_time = ( + p.create_time() if hasattr( + p.create_time, + '__call__') else p.create_time) + + name = (p.name() if hasattr(p.name, '__call__' + ) else p.name) + cmdline = (p.cmdline() if hasattr(p.cmdline, '__call__' + ) else p.cmdline) + pid = (p.pid() if hasattr(p.pid, '__call__') else p.pid) + status = (p.status() if hasattr(p.status, '__call__' + ) else p.status) + if status == psutil.STATUS_ZOMBIE: + cwd = 'unknown' # invalid + else: + try: + cwd = (p.cwd() if hasattr(p, 'cwd') and + hasattr(p.cwd, '__call__') else p.getcwd()) + except Exception: + logger.error('Error crawling process %s for cwd' + % pid, exc_info=True) + cwd = 'unknown' + ppid = (p.ppid() if hasattr(p.ppid, '__call__' + ) else p.ppid) + try: + if (hasattr(p, 'num_threads') and + hasattr(p.num_threads, '__call__')): + num_threads = p.num_threads() + else: + num_threads = p.get_num_threads() + except: + num_threads = 'unknown' + + try: + username = (p.username() if hasattr(p, 'username') and + hasattr(p.username, '__call__') else + p.username) + except: + username = 'unknown' + + openfiles = [] + for f in p.get_open_files(): + openfiles.append(f.path) + openfiles.sort() + feature_key = '{0}/{1}'.format(name, pid) + return (feature_key, ProcessFeature( + str(' '.join(cmdline)), + create_time, + cwd, + name, + openfiles, + pid, + ppid, + num_threads, + username, + ), 'process') diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/process_vm_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/process_vm_crawler.plugin new file mode 100644 index 00000000..7ba6c0c0 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/process_vm_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = process_vm +Module = process_vm_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = Process crawling function for VMs diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/process_vm_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/process_vm_crawler.py new file mode 100644 index 00000000..8ee595b7 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/process_vm_crawler.py @@ -0,0 +1,98 @@ +import logging + +import psutil + +from icrawl_plugin import IVMCrawler +from utils.features import ProcessFeature + +try: + import psvmi +except ImportError: + psvmi = None + +logger = logging.getLogger('crawlutils') + + +class process_vm_crawler(IVMCrawler): + + def get_feature(self): + return 'process' + + def crawl(self, vm_desc, **kwargs): + if psvmi is None: + raise NotImplementedError() + else: + (domain_name, kernel_version, distro, arch) = vm_desc + # XXX: this has to be read from some cache instead of + # instead of once per plugin/feature + vm_context = psvmi.context_init( + domain_name, domain_name, kernel_version, distro, arch) + + created_since = -1 + for p in psvmi.process_iter(vm_context): + create_time = ( + p.create_time() if hasattr( + p.create_time, + '__call__') else p.create_time) + if create_time <= created_since: + continue + yield self._crawl_single_process(p) + + def _crawl_single_process(self, p): + """Returns a ProcessFeature""" + create_time = ( + p.create_time() if hasattr( + p.create_time, + '__call__') else p.create_time) + + name = (p.name() if hasattr(p.name, '__call__' + ) else p.name) + cmdline = (p.cmdline() if hasattr(p.cmdline, '__call__' + ) else p.cmdline) + pid = (p.pid() if hasattr(p.pid, '__call__') else p.pid) + status = (p.status() if hasattr(p.status, '__call__' + ) else p.status) + if status == psutil.STATUS_ZOMBIE: + cwd = 'unknown' # invalid + else: + try: + cwd = (p.cwd() if hasattr(p, 'cwd') and + hasattr(p.cwd, '__call__') else p.getcwd()) + except Exception: + logger.error('Error crawling process %s for cwd' + % pid, exc_info=True) + cwd = 'unknown' + ppid = (p.ppid() if hasattr(p.ppid, '__call__' + ) else p.ppid) + try: + if (hasattr(p, 'num_threads') and + hasattr(p.num_threads, '__call__')): + num_threads = p.num_threads() + else: + num_threads = p.get_num_threads() + except: + num_threads = 'unknown' + + try: + username = (p.username() if hasattr(p, 'username') and + hasattr(p.username, '__call__') else + p.username) + except: + username = 'unknown' + + openfiles = [] + for f in p.get_open_files(): + openfiles.append(f.path) + openfiles.sort() + feature_key = '{0}/{1}'.format(name, pid) + return (feature_key, ProcessFeature( + str(' '.join(cmdline)), + create_time, + cwd, + name, + openfiles, + pid, + ppid, + num_threads, + username, + ), 'process') diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/pythonpackage_container_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/pythonpackage_container_crawler.plugin new file mode 100644 index 00000000..0d45bb0b --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/pythonpackage_container_crawler.plugin @@ -0,0 +1,12 @@ +[Core] +Name = pythonpackage +Module = pythonpackage_container_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = Crawler for Python PyPi packages for containers and images +Format = [(pkg_name, pkg_version)] + +[Options] +avoid_setns = True|False. Default is True. diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/pythonpackage_container_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/pythonpackage_container_crawler.py new file mode 100644 index 00000000..d852c42c --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/pythonpackage_container_crawler.py @@ -0,0 +1,114 @@ +import logging +import os +import re +import subprocess + + +from icrawl_plugin import IContainerCrawler + +logger = logging.getLogger('crawlutils') + + +class PythonPackageCrawler(IContainerCrawler): + + def get_feature(self): + return 'pythonpackage' + + def _crawl_files(self, path, extensions): + output = [] + if os.path.isdir(path): + for (root_dirpath, dirs, files) in os.walk(path): + output += [ + f for ext in extensions for f in files if f.endswith(ext)] + output += [ + d for ext in extensions for d in dirs if d.endswith(ext)] + return output + + def _get_packages_by_extension(self, mountpoint): + candidate_paths = [ + "usr/lib/", + "usr/share/", + "usr/local/lib/", + "usr/local/share/", + "usr/local/bundle/", + "var/lib/"] + + packages = [] + + for path in candidate_paths: + path = os.path.join(mountpoint, path) + packages += self._crawl_files(path, ['.egg-info', '.dist-info']) + + for pkg in packages: + pkg_name = None + name_parts = re.match( + r'(.*)-([\d\.]*)(\.egg-info|\.dist-info)', pkg) + if name_parts is not None: + pkg_name = name_parts.group(1) + pkg_version = name_parts.group(2) + else: + name_parts = re.match(r'(.*)(\.egg-info|\.dist-info)', pkg) + if name_parts is not None: + pkg_name = name_parts.group(1) + pkg_version = 'unknown' + # TODO: get version from 'Version:' field in such files + # ex: /usr/lib/python2.7/argparse.egg-info: Version: 1.2.1 + if pkg_name is not None: + yield ( + pkg_name, + {"pkgname": pkg_name, "pkgversion": pkg_version}, + 'pythonpackage') + + def _get_packages_by_cmd(self): + # better coverage with pkg_resources.working_set than + # pip list, pip freeze, pip.get_installed_distributions() + # but following throws child exception from + # namespace.py:run_as_another_namespace() + # with ERROR string index out of range + # but works fine in a standalalone python file: + # ['python', '-c', 'import pkg_resources; pkgs = + # [ (p.key, p.version) for p in pkg_resources.working_set]; + # print pkgs'], + + proc = subprocess.Popen( + ['sh', '-c', ' export LC_ALL=C; pip list'], + # othewrwise pip says locale.Error: unsupported locale setting + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + output, err = proc.communicate() + + if output: + pkg_list = output.strip('\n') + for pkg in pkg_list.split('\n'): + pkg_name = pkg.split()[0] + pkg_version = pkg.split()[1][1:-1] + yield ( + pkg_name, + {"pkgname": pkg_name, "pkgversion": pkg_version}, + 'pythonpackage') + + def _crawl_without_setns(self, container_id): + return self._get_packages_by_extension('/rootfs_local') + + def _crawl_in_system(self): + real_root = os.open('/', os.O_RDONLY) + os.chroot('/rootfs_local') + os.chdir('/') + + if self.get_packages_generic is True: + mountpoint = '/' + pkg_list = list(self._get_packages_by_extension(mountpoint)) + else: + pkg_list = list(self._get_packages_by_cmd()) + + os.fchdir(real_root) + os.chroot('.') + return pkg_list + + def crawl(self, container_id, avoid_setns=False, **kwargs): + + if avoid_setns: + return self._crawl_without_setns(container_id) + else: # in all other cases, including wrong mode set + self.get_packages_generic = True # can be made an arg to crawl() + return self._crawl_in_system() diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/rubypackage_container_crawler.plugin b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/rubypackage_container_crawler.plugin new file mode 100644 index 00000000..a4a38dc2 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/rubypackage_container_crawler.plugin @@ -0,0 +1,12 @@ +[Core] +Name = rubypackage +Module = rubypackage_container_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = Crawler for Ruby GEM packages for containers and images +Format = [(pkg_name, pkg_version)] + +[Options] +avoid_setns = True|False. Default is True. diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/rubypackage_container_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/rubypackage_container_crawler.py new file mode 100644 index 00000000..ff1b8453 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/rubypackage_container_crawler.py @@ -0,0 +1,91 @@ +import logging +import os +import re +import subprocess + +from icrawl_plugin import IContainerCrawler + +logger = logging.getLogger('crawlutils') + + +class RubyPackageCrawler(IContainerCrawler): + + def get_feature(self): + return 'rubypackage' + + def _crawl_files(self, path, extension): + output = [] + if os.path.isdir(path): + for (root_dirpath, dirs, files) in os.walk(path): + output += [f for f in files if f.endswith(extension)] + return output + + def _get_packages_by_extension(self, mountpoint): + candidate_paths = [ + "usr/lib/", + "usr/share/", + "usr/local/lib/", + "usr/local/share/", + "usr/local/bundle/", + "var/lib/"] + + packages = [] + + for path in candidate_paths: + path = os.path.join(mountpoint, path) + packages += self._crawl_files(path, ".gemspec") + + for pkg in packages: + name_parts = re.match(r'(.*)-([\d\.]*)(\.gemspec)', pkg) + if name_parts is not None: + pkg_name = name_parts.group(1) + pkg_version = name_parts.group(2) + yield ( + pkg_name, + {"pkgname": pkg_name, "pkgversion": pkg_version}, + 'rubypackage') + + def _get_packages_by_cmd(self): + proc = subprocess.Popen( + ['sh', '-c', 'gem list'], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + output, err = proc.communicate() + + if output: + pkg_list = output.strip('\n') + if pkg_list: + for pkg in pkg_list.split('\n'): + pkg_name = pkg.split()[0] + pkg_versions = re.findall(r'[\d\.]+', pkg) + for pkg_version in pkg_versions: + yield ( + pkg_name, + {"pkgname": pkg_name, "pkgversion": pkg_version}, + 'rubypackage') + + def _crawl_without_setns(self, container_id): + return self._get_packages_by_extension('/rootfs_local') + + def _crawl_in_system(self): + real_root = os.open('/', os.O_RDONLY) + os.chroot('/rootfs_local') + os.chdir('/') + + if self.get_packages_generic is True: + mountpoint = '/' + pkg_list = list(self._get_packages_by_extension(mountpoint)) + else: + pkg_list = list(self._get_packages_by_cmd()) + + os.fchdir(real_root) + os.chroot('.') + + return pkg_list + + def crawl(self, container_id, avoid_setns=False, **kwargs): + if avoid_setns: + return self._crawl_without_setns(container_id) + else: # in all other cases, including wrong mode set + self.get_packages_generic = False # can be made an arg to crawl() + return self._crawl_in_system() diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/rubypackage_container_crawler.py.org b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/rubypackage_container_crawler.py.org new file mode 100644 index 00000000..7cd351da --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins/systems/rubypackage_container_crawler.py.org @@ -0,0 +1,94 @@ +import logging +import os +import re +import subprocess + +import utils.dockerutils + +from icrawl_plugin import IContainerCrawler +from utils.namespace import run_as_another_namespace, ALL_NAMESPACES + +logger = logging.getLogger('crawlutils') + + +class RubyPackageCrawler(IContainerCrawler): + + def get_feature(self): + return 'ruby-package' + + def _crawl_files(self, path, extension): + output = [] + if os.path.isdir(path): + for (root_dirpath, dirs, files) in os.walk(path): + output += [f for f in files if f.endswith(extension)] + return output + + def _get_packages_by_extension(self, mountpoint): + candidate_paths = [ + "usr/lib/", + "usr/share/", + "usr/local/lib/", + "usr/local/share/", + "usr/local/bundle/", + "var/lib/"] + + packages = [] + + for path in candidate_paths: + path = os.path.join(mountpoint, path) + packages += self._crawl_files(path, ".gemspec") + + for pkg in packages: + name_parts = re.match(r'(.*)-([\d\.]*)(\.gemspec)', pkg) + if name_parts is not None: + pkg_name = name_parts.group(1) + pkg_version = name_parts.group(2) + yield ( + pkg_name, + {"pkgname": pkg_name, "pkgversion": pkg_version}, + 'ruby-package') + + def _get_packages_by_cmd(self): + proc = subprocess.Popen( + ['sh', '-c', 'gem list'], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + output, err = proc.communicate() + + if output: + pkg_list = output.strip('\n') + if pkg_list: + for pkg in pkg_list.split('\n'): + pkg_name = pkg.split()[0] + pkg_versions = re.findall(r'[\d\.]+', pkg) + for pkg_version in pkg_versions: + yield ( + pkg_name, + {"pkgname": pkg_name, "pkgversion": pkg_version}, + 'ruby-package') + + def _crawl_without_setns(self, container_id): + mountpoint = utils.dockerutils.get_docker_container_rootfs_path( + container_id) + return self._get_packages_by_extension(mountpoint) + + def _crawl_in_system(self): + if self.get_packages_generic is True: + mountpoint = '/' + return self._get_packages_by_extension(mountpoint) + else: + return self._get_packages_by_cmd() + + def crawl(self, container_id, avoid_setns=False, **kwargs): + inspect = utils.dockerutils.exec_dockerinspect(container_id) + state = inspect['State'] + pid = str(state['Pid']) + logger.debug('Crawling OS for container %s' % container_id) + + if avoid_setns: + return self._crawl_without_setns(container_id) + else: # in all other cases, including wrong mode set + self.get_packages_generic = False # can be made an arg to crawl() + return run_as_another_namespace(pid, + ALL_NAMESPACES, + self._crawl_in_system) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/plugins_manager.py b/crawler/utils/plugincont/plugincont_img/crawler/plugins_manager.py new file mode 100644 index 00000000..9425a061 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/plugins_manager.py @@ -0,0 +1,267 @@ +import logging + +from yapsy.PluginManager import PluginManager +import urlparse +import config_parser +from icrawl_plugin import IContainerCrawler, IVMCrawler, IHostCrawler +from iemit_plugin import IEmitter +from runtime_environment import IRuntimeEnvironment +from utils import misc +from utils.crawler_exceptions import RuntimeEnvironmentPluginNotFound + +logger = logging.getLogger('crawlutils') + +# default runtime environment: cloudsigth and plugins in 'plugins/' +runtime_env = None + +container_crawl_plugins = [] +vm_crawl_plugins = [] +host_crawl_plugins = [] +emitter_plugins = [] + +# XXX make this a class + + +def get_plugins( + category_filter={}, + plugin_places=['plugins']): + + pm = PluginManager(plugin_info_ext='plugin') + + # Normalize the paths to the location of this file. + # XXX-ricarkol: there has to be a better way to do this. + plugin_places = [misc.execution_path(x) for x in plugin_places] + + pm.setPluginPlaces(plugin_places) + pm.setCategoriesFilter(category_filter) + pm.collectPlugins() + return pm.getAllPlugins() + + +def get_emitter_plugin_args(plugin, config): + plugin_args = {} + if plugin.name in config['emitters']: + plugin_args = config['emitters'][plugin.name] + return plugin_args + + +def load_emitter_plugins(urls=['stdout://'], + format='csv', + plugin_places=['plugins']): + category_filter = {"emitter": IEmitter} + + # getting all emitter plugins from crawelr/plugins/emitters/* + all_emitter_plugins = get_plugins(category_filter, plugin_places) + + # getting enabled emitter pluggins from crawler.conf file + conf_enabled_plugins = [] + config = config_parser.get_config() + if 'enabled_emitter_plugins' in config['general']: + conf_enabled_plugins = config['general']['enabled_emitter_plugins'] + if 'ALL' in conf_enabled_plugins: + conf_enabled_plugins = [p for p in config['emitters']] + + for plugin in all_emitter_plugins: + plugin_obj = plugin.plugin_object + found_plugin = False + # iterate over CLI provided emitters + for url in urls: + parsed = urlparse.urlparse(url) + proto = parsed.scheme + if plugin_obj.get_emitter_protocol() == proto: + plugin_args = get_emitter_plugin_args(plugin, config) + plugin_obj.init(url, emit_format=format) + yield (plugin_obj, plugin_args) + found_plugin = True + if found_plugin is True: + continue + # iterate over conf provided emitters + if plugin.name in conf_enabled_plugins: + plugin_args = get_emitter_plugin_args(plugin, config) + plugin_obj.init(url=plugin_args.get('url', 'missing_url'), + emit_format=plugin_args.get( + 'format', 'missing_format')) + yield (plugin_obj, plugin_args) + + # Note1: 'Same' emitters would either be picked from CLI (preference 1) + # or crawler.conf (preference 2), not both + # Note3: This does not allow different 'same' emitters to have + # different args + # Note2: This does not properly process multiple 'same' emitter plugins + # inside crawler.conf, e.g.: two 'File Emitters' + + +def get_emitter_plugins(urls=['stdout://'], + format='csv', + plugin_places=['plugins']): + global emitter_plugins + if not emitter_plugins: + emitter_plugins = list( + load_emitter_plugins(urls=urls, + format=format, + plugin_places=plugin_places)) + return emitter_plugins + + +def reload_env_plugin(environment='cloudsight', plugin_places=['plugins']): + global runtime_env + + category_filter = {"env": IRuntimeEnvironment} + env_plugins = get_plugins(category_filter, plugin_places) + + for plugin in env_plugins: + plugin_obj = plugin.plugin_object + if plugin_obj.get_environment_name() == environment: + runtime_env = plugin_obj + break + + if runtime_env is None: + raise RuntimeEnvironmentPluginNotFound('Could not find a valid "%s" ' + 'environment plugin at %s' % + (environment, plugin_places)) + return runtime_env + + +def get_runtime_env_plugin(): + global runtime_env + if not runtime_env: + runtime_env = reload_env_plugin() + return runtime_env + + +def get_plugin_args(plugin, config, options): + plugin_args = {} + + if plugin.name in config['crawlers']: + plugin_args = config['crawlers'][plugin.name] + if 'avoid_setns' in plugin_args: + plugin_args['avoid_setns'] = plugin_args.as_bool('avoid_setns') + + is_feature_crawler = getattr(plugin.plugin_object, 'get_feature', None) + if is_feature_crawler is not None: + feature = plugin.plugin_object.get_feature() + if feature in options: + for arg in options[feature]: + plugin_args[arg] = options[feature][arg] + # the alternative: plugin_args = options.get(feature) + # might overwrite options from crawler.conf + + try: + if options['avoid_setns'] is True: + plugin_args['avoid_setns'] = options['avoid_setns'] + if options['mountpoint'] != '/': + plugin_args['root_dir'] = options['mountpoint'] + except KeyError as exc: + logger.warning( + 'Can not apply users --options configuration: %s' % exc) + + return plugin_args + + +def load_crawl_plugins( + category_filter={}, + features=['os', 'cpu'], + plugin_places=['plugins'], + options={}): + + crawl_plugins = get_plugins(category_filter, plugin_places) + config = config_parser.get_config() + + enabled_plugins = [] + if 'enabled_plugins' in config['general']: + enabled_plugins = config['general']['enabled_plugins'] + if 'ALL' in enabled_plugins: + enabled_plugins = [p for p in config['crawlers']] + # Reading from 'crawlers' section inside crawler.conf + # Alternatively, 'ALL' can be made to signify + # all crawlers in plugins/* + + for plugin in crawl_plugins: + if ((plugin.name in enabled_plugins) or ( + plugin.plugin_object.get_feature() in features)): + plugin_args = get_plugin_args(plugin, config, options) + yield (plugin.plugin_object, plugin_args) + + +def reload_container_crawl_plugins( + features=['os', 'cpu'], + plugin_places=['plugins'], + options={}): + global container_crawl_plugins + + container_crawl_plugins = list( + load_crawl_plugins( + category_filter={ + "crawler": IContainerCrawler}, + features=features, + plugin_places=plugin_places, + options=options)) + + +def reload_vm_crawl_plugins( + features=['os', 'cpu'], + plugin_places=['plugins'], + options={}): + global vm_crawl_plugins + + vm_crawl_plugins = list( + load_crawl_plugins( + category_filter={ + "crawler": IVMCrawler}, + features=features, + plugin_places=plugin_places, + options=options)) + + +def reload_host_crawl_plugins( + features=['os', 'cpu'], + plugin_places=['plugins'], + options={}): + global host_crawl_plugins + + host_crawl_plugins = list( + load_crawl_plugins( + category_filter={ + "crawler": IHostCrawler}, + features=features, + plugin_places=plugin_places, + options=options)) + + +def get_container_crawl_plugins( + features=[ + 'package', + 'os', + 'process', + 'file', + 'config']): + global container_crawl_plugins + if not container_crawl_plugins: + reload_container_crawl_plugins(features=features) + return container_crawl_plugins + + +def get_vm_crawl_plugins( + features=[ + 'package', + 'os', + 'process', + 'file', + 'config']): + global vm_crawl_plugins + if not vm_crawl_plugins: + reload_vm_crawl_plugins(features=features) + return vm_crawl_plugins + + +def get_host_crawl_plugins( + features=[ + 'package', + 'os', + 'process', + 'file', + 'config']): + global host_crawl_plugins + if not host_crawl_plugins: + reload_host_crawl_plugins(features=features) + return host_crawl_plugins diff --git a/crawler/utils/plugincont/plugincont_img/crawler/runtime_environment.py b/crawler/utils/plugincont/plugincont_img/crawler/runtime_environment.py new file mode 100644 index 00000000..14b00d7a --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/runtime_environment.py @@ -0,0 +1,58 @@ +from yapsy.IPlugin import IPlugin + + +class IRuntimeEnvironment(IPlugin): + + """ + Runtime Environment Plugin Interface + + Subclasses of this class can be used to specify environment specific + parameters for the crawls. These include: how to name a container, how to + link the container logs in the host (i.e. the --linkContainerLogs arg). + """ + # TODO-ricarkol: only applies to containers at the moment. + # TODO-ricarkol: options should define an actual explicit list of params. + + def get_environment_name(self): + """Returns a unique string that identifies this environment + """ + raise NotImplementedError() + + def get_container_namespace(self, long_id, options): + """ + Specifies how to create the namespace of a container. This is a string + that uniquely identifies a container instance. The default + implementation, class CloudsightEnvironment, uses + /, but some organizations might prefer something + else like: //. This is done by + implementing the get_container_namespace() method. + + :param long_id: The container ID. + :param options: Dictionary with "options". XXX-ricarkol should define + an actual explicit list of params. + """ + raise NotImplementedError() + + def get_container_log_file_list(self, long_id, options): + """ + Specifies what are the containers logs linked in the host (i.e. the + --linkContainerLogs arg). The default implementation, class + CloudsightEnvironment, uses the list in defaults.py:default_log_files. + + :param long_id: The container ID. + :param options: Dictionary with "options". + """ + raise NotImplementedError() + + def get_container_log_prefix(self, long_id, options): + """ + Specifies where are the containers logs linked in the host (i.e. the + --linkContainerLogs arg). By default, a container log like /log/a.log + is linked to ///log/a.log, but + it might be desirable to specify another way of constructing this path. + This is done by implementing the get_container_log_prefix() function. + + :param long_id: The container ID. + :param options: Dictionary with "options". + """ + raise NotImplementedError() diff --git a/crawler/utils/plugincont/plugincont_img/crawler/utils/__init__.py b/crawler/utils/plugincont/plugincont_img/crawler/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/crawler/utils/plugincont/plugincont_img/crawler/utils/config_utils.py b/crawler/utils/plugincont/plugincont_img/crawler/utils/config_utils.py new file mode 100644 index 00000000..9cbe6d3f --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/utils/config_utils.py @@ -0,0 +1,102 @@ +import codecs +import fnmatch +import logging +import os +import re + +import utils.misc +from utils.features import ConfigFeature + +logger = logging.getLogger('crawlutils') + + +def crawl_config_files( + root_dir='/', + exclude_dirs=[], + root_dir_alias=None, + known_config_files=[], + discover_config_files=False, + accessed_since=0 +): + + saved_args = locals() + logger.debug('Crawling config files: %s' % (saved_args)) + + if not os.path.isdir(root_dir): + return + + root_dir_alias = root_dir_alias or root_dir + exclude_dirs = [utils.misc.join_abs_paths(root_dir, d) for d in + exclude_dirs] + exclude_regex = r'|'.join([fnmatch.translate(d) for d in + exclude_dirs]) or r'$.' + known_config_files[:] = [utils.misc.join_abs_paths(root_dir, f) for f in + known_config_files] + known_config_files[:] = [f for f in known_config_files + if not re.match(exclude_regex, f)] + config_file_set = set() + for fpath in known_config_files: + if os.path.exists(fpath): + lstat = os.lstat(fpath) + if (lstat.st_atime > accessed_since or + lstat.st_ctime > accessed_since): + config_file_set.add(fpath) + + if discover_config_files: + discover_config_file_paths(accessed_since, config_file_set, + exclude_regex, root_dir) + + for fpath in config_file_set: + (_, fname) = os.path.split(fpath) + # realpath sanitizes the path a bit, for example: '//abc/' to '/abc/' + frelpath = os.path.realpath(fpath.replace(root_dir, root_dir_alias, 1)) + with codecs.open(filename=fpath, mode='r', + encoding='utf-8', errors='ignore') as \ + config_file: + + # Encode the contents of config_file as utf-8. + + yield (frelpath, ConfigFeature(fname, + config_file.read(), + frelpath), 'config') + + +def discover_config_file_paths(accessed_since, config_file_set, + exclude_regex, root_dir): + # Walk the directory hierarchy starting at 'root_dir' in BFS + # order looking for config files. + for (root_dirpath, dirs, files) in os.walk(root_dir): + dirs[:] = [os.path.join(root_dirpath, d) for d in + dirs] + dirs[:] = [d for d in dirs + if not re.match(exclude_regex, d)] + files = [os.path.join(root_dirpath, f) for f in + files] + files = [f for f in files + if not re.match(exclude_regex, f)] + for fpath in files: + if os.path.exists(fpath) \ + and _is_config_file(fpath): + lstat = os.lstat(fpath) + if lstat.st_atime > accessed_since \ + or lstat.st_ctime > accessed_since: + config_file_set.add(fpath) + + +def _is_config_file(fpath): + (_, ext) = os.path.splitext(fpath) + if os.path.isfile(fpath) and ext in [ + '.xml', + '.ini', + '.properties', + '.conf', + '.cnf', + '.cfg', + '.cf', + '.config', + '.allow', + '.deny', + '.lst', + ] and os.path.getsize(fpath) <= 204800: + return True + return False diff --git a/crawler/utils/plugincont/plugincont_img/crawler/utils/connection_utils.py b/crawler/utils/plugincont/plugincont_img/crawler/utils/connection_utils.py new file mode 100644 index 00000000..3550dd65 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/utils/connection_utils.py @@ -0,0 +1,65 @@ +import psutil + +from utils.features import ConnectionFeature + + +def crawl_connections(): + created_since = -1 + + proc_list = psutil.process_iter() + + for p in proc_list: + pid = (p.pid() if hasattr(p.pid, '__call__') else p.pid) + status = (p.status() if hasattr(p.status, '__call__' + ) else p.status) + if status == psutil.STATUS_ZOMBIE: + continue + + create_time = ( + p.create_time() if hasattr( + p.create_time, + '__call__') else p.create_time) + name = (p.name() if hasattr(p.name, '__call__') else p.name) + + if create_time <= created_since: + continue + for conn in p.get_connections(): + yield crawl_single_connection(conn, pid, name) + + +def crawl_single_connection(c, pid, name): + """Returns a ConnectionFeature""" + try: + (localipaddr, localport) = c.laddr[:] + except: + + # Older version of psutil uses local_address instead of + # laddr. + + (localipaddr, localport) = c.local_address[:] + try: + if c.raddr: + (remoteipaddr, remoteport) = c.raddr[:] + else: + (remoteipaddr, remoteport) = (None, None) + except: + + # Older version of psutil uses remote_address instead + # of raddr. + + if c.remote_address: + (remoteipaddr, remoteport) = \ + c.remote_address[:] + else: + (remoteipaddr, remoteport) = (None, None) + feature_key = '{0}/{1}/{2}'.format(pid, + localipaddr, localport) + return (feature_key, ConnectionFeature( + localipaddr, + localport, + name, + pid, + remoteipaddr, + remoteport, + str(c.status), + ), 'connection') diff --git a/crawler/utils/plugincont/plugincont_img/crawler/utils/crawler_exceptions.py b/crawler/utils/plugincont/plugincont_img/crawler/utils/crawler_exceptions.py new file mode 100644 index 00000000..aef4bc0b --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/utils/crawler_exceptions.py @@ -0,0 +1,122 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + + +class CrawlError(Exception): + + """Indicates that a crawl timed out.""" + + pass + + +class CrawlTimeoutError(CrawlError): + + """Indicates some error during crawling.""" + + pass + + +class CrawlUnsupportedPackageManager(CrawlError): + + """Could not detect what is the package manager.""" + + pass + + +class ContainerInvalidEnvironment(Exception): + + """Indicates that the environment can not be applied to the operation.""" + + pass + + +class ContainerNonExistent(Exception): + + """The container does not exist.""" + + pass + + +class ContainerWithoutCgroups(Exception): + + """Can not find the cgroup node for a container""" + + pass + + +class DockerutilsException(Exception): + + """Exception from the dockerutils module.""" + + pass + + +class DockerutilsNoJsonLog(DockerutilsException): + + """Could not find the json log for the container. Most likely because the + docker logging driver is not json-file.""" + + pass + + +class AlchemyInvalidMetadata(ContainerInvalidEnvironment): + + """Invalid or non-present alchemy metadata file.""" + + pass + + +class AlchemyInvalidContainer(ContainerInvalidEnvironment): + + """Invalid or non-present alchemy metadata file.""" + + pass + + +class RuntimeEnvironmentPluginNotFound(Exception): + + """Invalid or non-present plugin for the given environment.""" + + pass + + +class EmitterUnsupportedProtocol(Exception): + + """User requested an unsupported protocol for the frame emision""" + + pass + + +class EmitterUnsupportedFormat(Exception): + + """User requested an unsupported format for the emitted frame""" + + pass + + +class EmitterBadURL(Exception): + + """The emit URL is invalid""" + + pass + + +class EmitterEmitTimeout(Exception): + + """The emit timed out""" + + pass + + +class MTGraphiteInvalidTenant(Exception): + + """Invalid tenant""" + + pass + + +class NamespaceFailedSetns(Exception): + + """Invalid tenant""" + + pass diff --git a/crawler/utils/plugincont/plugincont_img/crawler/utils/disk_utils.py b/crawler/utils/plugincont/plugincont_img/crawler/utils/disk_utils.py new file mode 100644 index 00000000..c8221cce --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/utils/disk_utils.py @@ -0,0 +1,21 @@ +import psutil + +from utils.features import DiskFeature + + +def crawl_disk_partitions(): + partitions = [] + for partition in psutil.disk_partitions(all=True): + try: + pdiskusage = psutil.disk_usage(partition.mountpoint) + partitions.append((partition.mountpoint, DiskFeature( + partition.device, + 100.0 - pdiskusage.percent, + partition.fstype, + partition.mountpoint, + partition.opts, + pdiskusage.total, + ), 'disk')) + except OSError: + continue + return partitions diff --git a/crawler/utils/plugincont/plugincont_img/crawler/utils/dockerevent.py b/crawler/utils/plugincont/plugincont_img/crawler/utils/dockerevent.py new file mode 100644 index 00000000..96083be0 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/utils/dockerevent.py @@ -0,0 +1,23 @@ +""" +Docker container event object +""" + + +class DockerContainerEvent(object): + def __init__(self, contId, imgId, event, etime): + self.contId = contId + self.imgId = imgId + self.event = event + self.eventTime = etime + + def get_containerid(self): + return self.contId + + def get_imgageid(self): + return self.imgId + + def get_event(self): + return self.event + + def get_eventTime(self): + return self.eventTime diff --git a/crawler/utils/plugincont/plugincont_img/crawler/utils/dockerutils.py b/crawler/utils/plugincont/plugincont_img/crawler/utils/dockerutils.py new file mode 100644 index 00000000..d94e85db --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/utils/dockerutils.py @@ -0,0 +1,409 @@ +#!usr/bin/python +# -*- coding: utf-8 -*- +import logging +import os + +import dateutil.parser as dp +import docker +import semantic_version +import itertools +import re + +from utils import misc +from crawler_exceptions import (DockerutilsNoJsonLog, + DockerutilsException) +from timeout_utils import (Timeout, TimeoutError) +from dockerevent import DockerContainerEvent + +# version at which docker image layer organization changed +VERSION_SPEC = semantic_version.Spec('>=1.10.0') + +logger = logging.getLogger('crawlutils') + +SUPPORTED_DRIVERS = ['btrfs', 'devicemapper', 'aufs', 'vfs'] + + +def exec_dockerps(): + """ + Returns a list of docker inspect jsons, one for each container. + + This call executes the `docker inspect` command every time it is invoked. + """ + try: + client = docker.Client( + base_url='unix://var/run/docker.sock', version='auto') + containers = client.containers() + inspect_arr = [] + for container in containers: + inspect = exec_dockerinspect(container['Id']) + inspect_arr.append(inspect) + except docker.errors.DockerException as e: + logger.warning(str(e)) + raise DockerutilsException('Failed to exec dockerps') + + return inspect_arr + + +def exec_docker_history(long_id): + try: + client = docker.Client(base_url='unix://var/run/docker.sock', + version='auto') + image = client.inspect_container(long_id)['Image'] + history = client.history(image) + return history + except docker.errors.DockerException as e: + logger.warning(str(e)) + raise DockerutilsException('Failed to exec dockerhistory') + + +def _reformat_inspect(inspect): + """Fixes some basic issues with the inspect json returned by docker. + """ + # For some reason, Docker inspect sometimes returns the pid in scientific + # notation. + inspect['State']['Pid'] = '%.0f' % float(inspect['State']['Pid']) + + docker_datetime = dp.parse(inspect['Created']) + epoch_seconds = docker_datetime.strftime('%s') + inspect['Created'] = epoch_seconds + + +def exec_dockerinspect(long_id): + try: + client = docker.Client( + base_url='unix://var/run/docker.sock', version='auto') + inspect = client.inspect_container(long_id) + _reformat_inspect(inspect) + except docker.errors.DockerException as e: + logger.warning(str(e)) + raise DockerutilsException('Failed to exec dockerinspect') + + try: + # get the first RepoTag + inspect['RepoTag'] = client.inspect_image( + inspect['Image'])['RepoTags'][0] + except (docker.errors.DockerException, KeyError, IndexError): + inspect['RepoTag'] = '' + + return inspect + + +def _get_docker_storage_driver_using_proc_mounts(): + for l in open('/proc/mounts', 'r'): + _, mnt, _, _, _, _ = l.split(' ') + for driver in SUPPORTED_DRIVERS: + if mnt == '/var/lib/docker/' + driver: + return driver + raise OSError('Could not find the driver in /proc/mounts') + + +def _get_docker_storage_driver(): + """ + We will try several steps in order to ensure that we return + one of the 4 types (btrfs, devicemapper, aufs, vfs). + """ + driver = None + + # Step 1, get it from "docker info" + + try: + client = docker.Client( + base_url='unix://var/run/docker.sock', version='auto') + driver = client.info()['Driver'] + except (docker.errors.DockerException, KeyError): + pass # try to continue with the default of 'devicemapper' + + if driver in SUPPORTED_DRIVERS: + return driver + + # Step 2, get it from /proc/mounts + + try: + driver = _get_docker_storage_driver_using_proc_mounts() + except (OSError, IOError): + logger.debug('Could not read /proc/mounts') + + if driver in SUPPORTED_DRIVERS: + return driver + + # Step 3, we default to "devicemapper" (last resort) + + if driver not in SUPPORTED_DRIVERS: + + driver = 'devicemapper' + + return driver + + +def get_docker_container_json_logs_path(long_id, inspect=None): + """ + Returns the path to a container (with ID=long_id) docker logs file in the + docker host file system. + + There are 2 big potential problems with this: + + 1. This assumes that the docker Logging Driver is `json-file`. Other + drivers are detailed here: + https://docs.docker.com/engine/reference/logging/overview/ + + 2. This is an abstraction violation as we are breaking the Docker + abstraction barrier. But, it is so incredibly useful to do this kind of + introspection that we are willing to pay the price. + """ + # First try is the default location + + path = '/var/lib/docker/containers/%s/%s-json.log' % (long_id, + long_id) + if os.path.isfile(path): + return path + + # Second try is to get docker inspect LogPath + + if not inspect: + inspect = exec_dockerinspect(long_id) + + path = None + try: + path = inspect['LogPath'] + except KeyError: + pass + + if path and os.path.isfile(path): + return path + + # Third try is to guess the LogPath based on the HostnamePath + + path = None + try: + path = inspect['HostnamePath'] + path = os.path.join(os.path.dirname(path), '%s-json.log' + % long_id) + except KeyError: + pass + + if path and os.path.isfile(path): + return path + + raise DockerutilsNoJsonLog( + 'Container %s does not have a json log.' % + long_id) + + +def _get_docker_server_version(): + """Run the `docker info` command to get server version + """ + try: + client = docker.Client( + base_url='unix://var/run/docker.sock', version='auto') + return client.version()['Version'] + except (docker.errors.DockerException, KeyError) as e: + logger.warning(str(e)) + raise DockerutilsException('Failed to get the docker version') + + +try: + server_version = _get_docker_server_version() + driver = _get_docker_storage_driver() +except DockerutilsException: + server_version = None + driver = None + + +def _get_container_rootfs_path_dm(long_id, inspect=None): + + if not inspect: + inspect = exec_dockerinspect(long_id) + + pid = str(inspect['State']['Pid']) + + rootfs_path = None + device = None + try: + with open('/proc/' + pid + '/mounts', 'r') as f: + for line in f: + _device, _mountpoint, _, _, _, _ = line.split() + if _mountpoint == '/' and _device != 'rootfs': + device = _device + with open('/proc/mounts', 'r') as f: + for line in f: + _device, _mountpoint, _, _, _, _ = line.split() + if device in line and _mountpoint != '/': + rootfs_path = _mountpoint + break + except IOError as e: + logger.warning(str(e)) + if not rootfs_path or rootfs_path == '/': + raise DockerutilsException('Failed to get rootfs on devicemapper') + + return rootfs_path + '/rootfs' + + +def _fix_version(v): + # removing leading zeroes from docker version + # which are not liked by semantic_version + version_parts = re.match(r'(\d+).(\d+).(\d+)', v) + if version_parts is not None: + fixed_v = '' + for item in version_parts.groups(): + if len(item) > 1 and item.startswith('0'): + item = item[1:] + fixed_v = fixed_v + item + '.' + return fixed_v[:-1] + + +def _get_container_rootfs_path_btrfs(long_id, inspect=None): + + rootfs_path = None + + if VERSION_SPEC.match(semantic_version.Version(_fix_version( + server_version))): + btrfs_path = None + mountid_path = ('/var/lib/docker/image/btrfs/layerdb/mounts/' + + long_id + '/mount-id') + try: + with open(mountid_path, 'r') as f: + btrfs_path = f.read().strip() + except IOError as e: + logger.warning(str(e)) + if not btrfs_path: + raise DockerutilsException('Failed to get rootfs on btrfs') + rootfs_path = '/var/lib/docker/btrfs/subvolumes/' + btrfs_path + else: + btrfs_path = None + try: + for submodule in misc.btrfs_list_subvolumes('/var/lib/docker'): + _, _, _, _, _, _, _, _, mountpoint = submodule + if (long_id in mountpoint) and ('init' not in mountpoint): + btrfs_path = mountpoint + break + except RuntimeError: + pass + if not btrfs_path: + raise DockerutilsException('Failed to get rootfs on btrfs') + rootfs_path = '/var/lib/docker/' + btrfs_path + + return rootfs_path + + +def _get_container_rootfs_path_aufs(long_id, inspect=None): + + rootfs_path = None + + if VERSION_SPEC.match(semantic_version.Version(_fix_version( + server_version))): + aufs_path = None + mountid_path = ('/var/lib/docker/image/aufs/layerdb/mounts/' + + long_id + '/mount-id') + try: + with open(mountid_path, 'r') as f: + aufs_path = f.read().strip() + except IOError as e: + logger.warning(str(e)) + if not aufs_path: + raise DockerutilsException('Failed to get rootfs on aufs') + rootfs_path = '/var/lib/docker/aufs/mnt/' + aufs_path + else: + rootfs_path = None + for _path in ['/var/lib/docker/aufs/mnt/' + long_id, + '/var/lib/docker/aufs/diff/' + long_id]: + if os.path.isdir(_path) and os.listdir(_path): + rootfs_path = _path + break + if not rootfs_path: + raise DockerutilsException('Failed to get rootfs on aufs') + + return rootfs_path + + +def _get_container_rootfs_path_vfs(long_id, inspect=None): + + rootfs_path = None + + vfs_path = None + mountid_path = ('/var/lib/docker/image/vfs/layerdb/mounts/' + + long_id + '/mount-id') + try: + with open(mountid_path, 'r') as f: + vfs_path = f.read().strip() + except IOError as e: + logger.warning(str(e)) + if not vfs_path: + raise DockerutilsException('Failed to get rootfs on vfs') + + rootfs_path = '/var/lib/docker/vfs/dir/' + vfs_path + + return rootfs_path + + +def get_docker_container_rootfs_path(long_id, inspect=None): + """ + Returns the path to a container root (with ID=long_id) in the docker host + file system. + + This is an abstraction violation as we are breaking the Docker abstraction + barrier. But, it is so incredibly useful to do this kind of introspection + that we are willing to pay the price. + + FIXME The mount has to be a `shared mount`, otherwise the container + rootfs will not be accessible from the host. As an example, in Docker v + 1.7.1 the daemon is started like this: + + unshare -m -- /usr/bin/docker -d + + This means that for a device mapper driver, whenever the docker daemon + mounts a dm device, this mount will only be accessible to the docker + daemon and containers. + """ + global server_version + global driver + + rootfs_path = None + + if (not server_version) or (not driver): + raise DockerutilsException('Not supported docker storage driver.') + + # should be debug, for now info + logger.info('get_docker_container_rootfs_path: long_id=' + + long_id + ', deriver=' + driver + + ', server_version=' + server_version) + + if driver == 'devicemapper': + rootfs_path = _get_container_rootfs_path_dm(long_id, inspect) + elif driver == 'btrfs': + rootfs_path = _get_container_rootfs_path_btrfs(long_id, inspect) + elif driver == 'aufs': + rootfs_path = _get_container_rootfs_path_aufs(long_id, inspect) + elif driver == 'vfs': + rootfs_path = _get_container_rootfs_path_vfs(long_id, inspect) + else: + raise DockerutilsException('Not supported docker storage driver.') + + return rootfs_path + + +def poll_container_create_events(timeout=0.1): + try: + client = docker.Client(base_url='unix://var/run/docker.sock', + version='auto') + filters = dict() + filters['type'] = 'container' + filters['event'] = 'start' + events = client.events(filters=filters, decode=True) + with Timeout(seconds=timeout): + # we are expecting a single event + event = list(itertools.islice(events, 1))[0] + + containerid = event['id'] + imageid = event['from'] + epochtime = event['time'] + cEvent = DockerContainerEvent(containerid, imageid, + event['Action'], epochtime) + return cEvent + except docker.errors.DockerException as e: + logger.warning(str(e)) + raise DockerutilsException('Failed to exec dockerhistory') + except TimeoutError: + logger.info("Container event timeout") + pass + + return None diff --git a/crawler/utils/plugincont/plugincont_img/crawler/utils/ethtool.py b/crawler/utils/plugincont/plugincont_img/crawler/utils/ethtool.py new file mode 100644 index 00000000..9a84a667 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/utils/ethtool.py @@ -0,0 +1,92 @@ + +import array +import fcntl +import socket +import struct + +SIOCETHTOOL = 0x8946 + +ETHTOOL_GSET = 0x00000001 +ETHTOOL_GSTRINGS = 0x0000001b +ETHTOOL_GSTATS = 0x0000001d +ETHTOOL_GSSET_INFO = 0x00000037 + +ETH_SS_STATS = 1 + + +def stripped(name): + return "".join(i for i in name if 31 < ord(i) < 127) + + +def ethtool_get_stats(nic): + sockfd = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + + ecmd_sset_info = array.array('B', struct.pack('@IIQI', + ETHTOOL_GSSET_INFO, + 0, + 1 << ETH_SS_STATS, + 0)) + ifreq = struct.pack('@16sP16x', nic, ecmd_sset_info.buffer_info()[0]) + try: + fcntl.ioctl(sockfd, SIOCETHTOOL, ifreq) + except IOError as err: + raise err + res = ecmd_sset_info.tostring() + _, _, _, n_stats = struct.unpack('IIQI', res) + + if not n_stats: + return {} + + ecmd_gstrings = array.array('B', struct.pack('@III%ds' % (n_stats * 32), + ETHTOOL_GSTRINGS, + ETH_SS_STATS, + 0, + '\x00' * 32 * n_stats)) + ifreq = struct.pack('@16sP16x', nic, ecmd_gstrings.buffer_info()[0]) + try: + fcntl.ioctl(sockfd, SIOCETHTOOL, ifreq) + except IOError as err: + raise err + + gstrings = ecmd_gstrings.tostring() + name = gstrings[12:32].strip() + + # Get the peer ifindex number + ecmd_gstats = array.array('B', struct.pack('@II%ds' % (n_stats * 8), + ETHTOOL_GSTATS, + ETH_SS_STATS, + '\x00' * 8 * n_stats)) + ifreq = struct.pack('@16sP16x', nic, ecmd_gstats.buffer_info()[0]) + try: + fcntl.ioctl(sockfd, SIOCETHTOOL, ifreq) + except IOError as err: + raise err + + gstats = ecmd_gstats.tostring() + + res = {} + gstrings_idx = 12 + gstats_idx = 8 + + while n_stats > 0: + name = stripped(gstrings[gstrings_idx:gstrings_idx + 32]) + gstrings_idx += 32 + value, = struct.unpack('@Q', gstats[gstats_idx:gstats_idx + 8]) + gstats_idx += 8 + res[name] = value + n_stats -= 1 + + return res + + +def ethtool_get_peer_ifindex(nic): + """ + Get the interface index of the peer device of a veth device. + Returns a positive number in case the peer device's interface + index could be determined, a negative value otherwise. + """ + try: + res = ethtool_get_stats(nic) + return int(res.get('peer_ifindex', -1)) + except: + return -2 diff --git a/crawler/utils/plugincont/plugincont_img/crawler/utils/features.py b/crawler/utils/plugincont/plugincont_img/crawler/utils/features.py new file mode 100644 index 00000000..d55f2895 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/utils/features.py @@ -0,0 +1,117 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +from collections import namedtuple + +OSFeature = namedtuple('OSFeature', [ + 'boottime', + 'uptime', + 'ipaddr', + 'os', + 'os_version', + 'os_kernel', + 'architecture', +]) +FileFeature = namedtuple('FileFeature', [ + 'atime', + 'ctime', + 'gid', + 'linksto', + 'mode', + 'mtime', + 'name', + 'path', + 'size', + 'type', + 'uid', +]) +ConfigFeature = namedtuple('ConfigFeature', ['name', 'content', 'path']) +DiskFeature = namedtuple('DiskFeature', [ + 'partitionname', + 'freepct', + 'fstype', + 'mountpt', + 'mountopts', + 'partitionsize', +]) +ProcessFeature = namedtuple('ProcessFeature', [ + 'cmd', + 'created', + 'cwd', + 'pname', + 'openfiles', + 'pid', + 'ppid', + 'threads', + 'user', +]) +MetricFeature = namedtuple('MetricFeature', [ + 'cpupct', + 'mempct', + 'pname', + 'pid', + 'read', + 'rss', + 'status', + 'user', + 'vms', + 'write', +]) +ConnectionFeature = namedtuple('ConnectionFeature', [ + 'localipaddr', + 'localport', + 'pname', + 'pid', + 'remoteipaddr', + 'remoteport', + 'connstatus', +]) +PackageFeature = namedtuple('PackageFeature', ['installed', 'pkgname', + 'pkgsize', 'pkgversion', + 'pkgarchitecture']) +MemoryFeature = namedtuple('MemoryFeature', [ + 'memory_used', + 'memory_buffered', + 'memory_cached', + 'memory_free', + 'memory_util_percentage' +]) +CpuFeature = namedtuple('CpuFeature', [ + 'cpu_idle', + 'cpu_nice', + 'cpu_user', + 'cpu_wait', + 'cpu_system', + 'cpu_interrupt', + 'cpu_steal', + 'cpu_util', +]) +InterfaceFeature = namedtuple('InterfaceFeature', [ + 'if_octets_tx', + 'if_octets_rx', + 'if_packets_tx', + 'if_packets_rx', + 'if_errors_tx', + 'if_errors_rx', +]) +LoadFeature = namedtuple('LoadFeature', ['shortterm', 'midterm', + 'longterm']) +DockerPSFeature = namedtuple('DockerPSFeature', [ + 'Status', + 'Created', + 'Image', + 'Ports', + 'Command', + 'Names', + 'Id', +]) +DockerHistoryFeature = namedtuple('DockerHistoryFeature', ['history']) +ModuleFeature = namedtuple('ModuleFeature', ['name', 'state']) +CpuHwFeature = namedtuple('CpuHwFeature', [ + 'cpu_family', + 'cpu_vendor', + 'cpu_model', + 'cpu_vedor_id', + 'cpu_module_id', + 'cpu_khz', + 'cpu_cache_size_kb', + 'cpu_num_cores']) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/utils/file_utils.py b/crawler/utils/plugincont/plugincont_img/crawler/utils/file_utils.py new file mode 100644 index 00000000..889232df --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/utils/file_utils.py @@ -0,0 +1,164 @@ +import fnmatch +import logging +import os +import re +import stat + +from utils.features import FileFeature + +logger = logging.getLogger('crawlutils') + + +def crawl_files( + root_dir='/', + exclude_dirs=[], + root_dir_alias=None, + accessed_since=0): + + if not os.path.isdir(root_dir): + return + + saved_args = locals() + logger.debug('crawl_files: %s' % (saved_args)) + + assert os.path.isdir(root_dir) + if root_dir_alias is None: + root_dir_alias = root_dir + exclude_dirs = [os.path.join(root_dir, d) for d in + exclude_dirs] + exclude_regex = r'|'.join([fnmatch.translate(d) + for d in exclude_dirs]) or r'$.' + + # walk the directory hierarchy starting at 'root_dir' in BFS + # order + + feature = _crawl_file(root_dir, root_dir, + root_dir_alias) + if feature and (feature.ctime > accessed_since or + feature.atime > accessed_since): + yield (feature.path, feature, 'file') + for (root_dirpath, dirs, files) in os.walk(root_dir): + dirs[:] = [os.path.join(root_dirpath, d) for d in + dirs] + dirs[:] = [d for d in dirs + if not re.match(exclude_regex, d)] + files = [os.path.join(root_dirpath, f) for f in + files] + files = [f for f in files + if not re.match(exclude_regex, f)] + for fpath in files: + feature = _crawl_file(root_dir, fpath, + root_dir_alias) + if feature and (feature.ctime > accessed_since or + feature.atime > accessed_since): + yield (feature.path, feature, 'file') + for fpath in dirs: + feature = _crawl_file(root_dir, fpath, + root_dir_alias) + if feature and (feature.ctime > accessed_since or + feature.atime > accessed_since): + yield (feature.path, feature, 'file') + + +def _filetype(fpath, fperm): + modebit = fperm[0] + ftype = { + 'l': 'link', + '-': 'file', + 'b': 'block', + 'd': 'dir', + 'c': 'char', + 'p': 'pipe', + }.get(modebit) + return ftype + +_filemode_table = ( + ( + (stat.S_IFLNK, 'l'), + (stat.S_IFREG, '-'), + (stat.S_IFBLK, 'b'), + (stat.S_IFDIR, 'd'), + (stat.S_IFCHR, 'c'), + (stat.S_IFIFO, 'p'), + ), + ((stat.S_IRUSR, 'r'), ), + ((stat.S_IWUSR, 'w'), ), + ((stat.S_IXUSR | stat.S_ISUID, 's'), (stat.S_ISUID, 'S'), + (stat.S_IXUSR, 'x')), + ((stat.S_IRGRP, 'r'), ), + ((stat.S_IWGRP, 'w'), ), + ((stat.S_IXGRP | stat.S_ISGID, 's'), (stat.S_ISGID, 'S'), + (stat.S_IXGRP, 'x')), + ((stat.S_IROTH, 'r'), ), + ((stat.S_IWOTH, 'w'), ), + ((stat.S_IXOTH | stat.S_ISVTX, 't'), (stat.S_ISVTX, 'T'), + (stat.S_IXOTH, 'x')), +) + + +def _fileperm(mode): + + # Convert a file's mode to a string of the form '-rwxrwxrwx' + + perm = [] + for table in _filemode_table: + for (bit, char) in table: + if mode & bit == bit: + perm.append(char) + break + else: + perm.append('-') + return ''.join(perm) + + +def _is_executable(fpath): + return os.access(fpath, os.X_OK) + +# crawl a single file + + +def _crawl_file( + root_dir, + fpath, + root_dir_alias, +): + lstat = os.lstat(fpath) + fmode = lstat.st_mode + fperm = _fileperm(fmode) + ftype = _filetype(fpath, fperm) + flinksto = None + if ftype == 'link': + try: + + # This has to be an absolute path, not a root-relative path + + flinksto = os.readlink(fpath) + except: + logger.error('Error reading linksto info for file %s' + % fpath, exc_info=True) + fgroup = lstat.st_gid + fuser = lstat.st_uid + + # This replaces `//a/b/c` with `//a/b/c` + + frelpath = os.path.join(root_dir_alias, + os.path.relpath(fpath, root_dir)) + + # This converts something like `/.` to `/` + + frelpath = os.path.normpath(frelpath) + + (_, fname) = os.path.split(frelpath) + return FileFeature( + lstat.st_atime, + lstat.st_ctime, + fgroup, + flinksto, + fmode, + lstat.st_mtime, + fname, + frelpath, + lstat.st_size, + ftype, + fuser, + ) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/utils/mesos.py b/crawler/utils/plugincont/plugincont_img/crawler/utils/mesos.py new file mode 100644 index 00000000..6dbcf8c9 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/utils/mesos.py @@ -0,0 +1,90 @@ +#! /usr/bin/python +# Copyright 2015 Ray Rodriguez + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import urllib2 +import logging +import logging.handlers +import collections +import os + +logger = None +PREFIX = "mesos-master" +MESOS_INSTANCE = "" +MESOS_HOST = "localhost" +MESOS_PORT = 5050 +MESOS_VERSION = "0.22.0" +MESOS_URL = "" +VERBOSE_LOGGING = False + +CONFIGS = [] + +Stat = collections.namedtuple('Stat', ('type', 'path')) + +logger = logging.getLogger('crawlutils') + + +def configure_crawler_mesos(inurl): + logger.debug('Mesos url %s' % inurl) + CONFIGS.append({ + 'mesos_url': inurl + }) + + +def fetch_stats(mesos_version): + if CONFIGS == []: + CONFIGS.append({ + 'mesos_url': 'http://localhost:5050/metrics/snapshot' + }) + logger.debug('connecting to %s' % CONFIGS[0]['mesos_url']) + try: + result = json.loads( + urllib2.urlopen(CONFIGS[0]['mesos_url'], timeout=10).read()) + except urllib2.URLError: + logger.exception('Exception opening mesos url %s', None) + return None + logger.debug('mesos_stats %s' % result) + return result + + +def setup_logger(logger_name, logfile='crawler.log', process_id=None): + _logger = logging.getLogger(logger_name) + _logger.setLevel(logging.DEBUG) + (logfile_name, logfile_xtnsion) = os.path.splitext(logfile) + if process_id is None: + fname = logfile + else: + fname = '{0}-{1}{2}'.format(logfile_name, process_id, + logfile_xtnsion) + h = logging.handlers.RotatingFileHandler(filename=fname, + maxBytes=10e6, backupCount=1) + f = logging.Formatter( + '%(asctime)s %(processName)-10s %(levelname)-8s %(message)s') + h.setFormatter(f) + _logger.addHandler(h) + + +def log_verbose(enabled, msg): + if not enabled: + return + logger.debug('mesos-master plugin [verbose]: %s' % msg) + + +def snapshot_crawler_mesos_frame(inurl='http://localhost:9092'): + setup_logger('crawler-mesos', 'crawler-mesos.log') + mesos_version = MESOS_VERSION + configure_crawler_mesos(inurl) + + return fetch_stats(mesos_version) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/utils/metric_utils.py b/crawler/utils/plugincont/plugincont_img/crawler/utils/metric_utils.py new file mode 100644 index 00000000..ec30064c --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/utils/metric_utils.py @@ -0,0 +1,76 @@ +import os +import psutil +from collections import namedtuple +from utils.features import MetricFeature + + +def _crawl_metrics_cpu_percent(process): + cpu_percent = ( + process.get_cpu_percent( + interval=0) if hasattr( + process.get_cpu_percent, + '__call__') else process.cpu_percent) + return cpu_percent + + +def crawl_metrics(): + created_since = -1 + + for p in psutil.process_iter(): + create_time = ( + p.create_time() if hasattr( + p.create_time, + '__call__') else p.create_time) + if create_time <= created_since: + continue + + name = (p.name() if hasattr(p.name, '__call__' + ) else p.name) + pid = (p.pid() if hasattr(p.pid, '__call__') else p.pid) + status = (p.status() if hasattr(p.status, '__call__' + ) else p.status) + if status == psutil.STATUS_ZOMBIE: + continue + username = ( + p.username() if hasattr( + p.username, + '__call__') else p.username) + meminfo = ( + p.get_memory_info() if hasattr( + p.get_memory_info, + '__call__') else p.memory_info) + try: + ioinfo = ( + p.get_io_counters() if hasattr( + p.get_io_counters, + '__call__') else p.io_counters) + except psutil.AccessDenied: + selfpid = os.getpid() + if pid != selfpid: + # http://lukasz.langa.pl/5/error-opening-file-for-reading-permission-denied/ + print "got psutil.AccessDenied for pid:", pid + ioinfo = namedtuple('ioinfo', ['read_count', 'write_count', + 'read_bytes', 'write_bytes']) + ioinfo.read_bytes = 0 + ioinfo.write_bytes = 0 + + cpu_percent = _crawl_metrics_cpu_percent(p) + + memory_percent = ( + p.get_memory_percent() if hasattr( + p.get_memory_percent, + '__call__') else p.memory_percent) + + feature_key = '{0}/{1}'.format(name, pid) + yield (feature_key, MetricFeature( + round(cpu_percent, 2), + round(memory_percent, 2), + name, + pid, + ioinfo.read_bytes, + meminfo.rss, + str(status), + username, + meminfo.vms, + ioinfo.write_bytes, + ), 'metric') diff --git a/crawler/utils/plugincont/plugincont_img/crawler/utils/misc.py b/crawler/utils/plugincont/plugincont_img/crawler/utils/misc.py new file mode 100644 index 00000000..e666c02c --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/utils/misc.py @@ -0,0 +1,253 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +import os +import sys +import inspect +import socket +import subprocess +import psutil +import logging +import logging.handlers +import time +import random + +# Additional modules + +# External dependencies that must be pip install'ed separately + +from netifaces import interfaces, ifaddresses, AF_INET + +logger = logging.getLogger('crawlutils') + + +def setup_logger(logger_name, logfile='crawler.log'): + """ + Setup a logger node called logger_name with rotation every 10MBs. + + :param logger_name: logger node + :param logfile: filename for the log + :return: a logger object + """ + _logger = logging.getLogger(logger_name) + _logger.setLevel(logging.INFO) + h = logging.handlers.RotatingFileHandler(filename=logfile, + maxBytes=10e6, backupCount=1) + f = logging.Formatter( + '%(asctime)s %(processName)-10s %(levelname)-8s %(message)s') + h.setFormatter(f) + _logger.addHandler(h) + return _logger + + +def subprocess_run(cmd, ignore_failure=False, shell=True): + """ + Runs cmd_string as a shell command. It returns stdout as a string, and + raises RuntimeError if the return code is not equal to `good_rc`. + + It returns the tuple: (stdout, stderr, returncode) + Can raise AttributeError or RuntimeError: + """ + try: + proc = subprocess.Popen( + cmd, + shell=shell, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + out, err = proc.communicate() + rc = proc.returncode + + except OSError as exc: + raise RuntimeError('Failed to run ' + cmd + ': [Errno: %d] ' % + exc.errno + exc.strerror + ' [Exception: ' + + type(exc).__name__ + ']') + if (not ignore_failure) and (rc != 0): + raise RuntimeError('(%s) failed with rc=%s: %s' % + (cmd, rc, err)) + return out + + +def enum(**enums): + return type('Enum', (), enums) + + +def get_process_env(pid=1): + """the environment settings from the processes perpective, + @return C{dict} + """ + + try: + pid = int(pid) + except ValueError: + raise TypeError('pid has to be an integer') + + env = {} + envlist = open('/proc/%s/environ' % pid).read().split('\000') + for e in envlist: + (k, _, v) = e.partition('=') + (k, v) = (k.strip(), v.strip()) + if not k: + continue + env[k] = v + return env + + +def process_is_crawler(pid): + """This is really checking if proc is the current process. + """ + try: + pid = int(pid) + except ValueError: + raise TypeError('pid has to be an integer') + + try: + proc = psutil.Process(pid) + cmdline = (proc.cmdline() if hasattr(proc.cmdline, '__call__' + ) else proc.cmdline) + # curr is the crawler process + + curr = psutil.Process(os.getpid()) + curr_cmdline = ( + curr.cmdline() if hasattr( + curr.cmdline, + '__call__') else curr.cmdline) + if cmdline == curr_cmdline: + return True + + # Process not found + return False + except psutil.NoSuchProcess: + # If the process does not exist, then it's definitely not the crawler + return False + except psutil.AccessDenied: + # If we don't have permissions to see that process details, then it can + # not be this process. + return False + + +class NullHandler(logging.Handler): + + def emit(self, record): + pass + + +# try to determine this host's IP address + +def get_host_ipaddr(): + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + try: + s.connect(('www.ibm.com', 9)) + return s.getsockname()[0] + except socket.error: + return socket.gethostname() + finally: + del s + + +def get_host_ip4_addresses(): + ip_list = [] + for interface in interfaces(): + if AF_INET in ifaddresses(interface): + for link in ifaddresses(interface)[AF_INET]: + ip_list.append(link['addr']) + return ip_list + + +# Find the mountpoint of a given path + +def find_mount_point(path): + path = os.path.abspath(path) + while not os.path.ismount(path): + path = os.path.dirname(path) + return path + + +def join_abs_paths(root, appended_root): + """ Join absolute paths: appended_root is appended after root + """ + if not os.path.isabs(appended_root): + appended_root = '/' + appended_root + return os.path.normpath(os.path.join(root, + os.path.relpath(appended_root, '/'))) + + +def is_process_running(pid): + """ Check For the existence of a unix pid. + """ + try: + pid = int(pid) + except ValueError: + raise TypeError('pid has to be an integer') + + try: + os.kill(pid, 0) + except OSError as exc: + if 'not permitted' in str(exc): + return True + return False + else: + return True + + +def execution_path(filename): + # if filename is an absolute path, os.path.join will return filename + return os.path.join(os.path.dirname(inspect.getfile(sys._getframe(1))), + filename) + + +def btrfs_list_subvolumes(path): + out = subprocess_run('btrfs subvolume list ' + path) + + for line in out.strip().split('\n'): + submodule = line.split() + if len(submodule) != 9: + raise RuntimeError('Expecting the output of `btrfs subvolume` to' + ' have 9 columns. Received this: %s' % line) + yield submodule + + +def call_with_retries(function, max_retries=10, + exception_types=(Exception), + _args=(), _kwargs={}): + """ + Call `function` with up to `max_retries` retries. A retry is only + performed if the exception thrown is in `exception_types`. + + :param function: Function to be called. + :param max_retries: Max number of retries. For example if retries is 1, + then a failing function will be called twice before exiting with the + latest exception thrown. + :param exception_types: List of exceptions for which `function` will + be retried. + :param _args: List of args passed to the called function. + :param _kwargs: Key value arguments passed to the called function. + :return: Return value of `function`. + """ + assert max_retries >= 0 + + retries = 0 + last_exc = Exception('Unknown exception') + while retries <= max_retries: + try: + return function(*_args, **_kwargs) + except exception_types as exc: + retries += 1 + wait = 2.0 ** retries * 0.1 + (random.randint(0, 1000) / 1000) + time.sleep(wait) + last_exc = exc + raise last_exc + + +def get_uint_arg(name, default, **kwargs): + """ + Get an unsigned int argument. Return the default value + if no parameter with the given name can be found. + """ + try: + val = int(kwargs.get(name, default)) + if val < 0: + logger.error('Parameter %s must not be negative') + val = default + return val + except: + logger.error('Parameter %s is not an integer' % name) + return default diff --git a/crawler/utils/plugincont/plugincont_img/crawler/utils/mtgraphite.py b/crawler/utils/plugincont/plugincont_img/crawler/utils/mtgraphite.py new file mode 100644 index 00000000..21277618 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/utils/mtgraphite.py @@ -0,0 +1,297 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +import logging +import socket +import ssl +import struct +import time +import re + +from crawler_exceptions import MTGraphiteInvalidTenant + +# This code is based upon the Kafka producer/client classes + +logger = logging.getLogger('crawlutils') + +DEFAULT_SOCKET_TIMEOUT_SECONDS = 120 + + +class MTGraphiteClient(object): + + """ + xxx + """ + + def __init__( + self, + host_url, + batch_send_every_t=5, + batch_send_every_n=1000, + ): + self.host_url = host_url + + # A MTGraphite URL should look like: + # mtgraphite://:/: + + regex = re.compile( + r'^mtgraphite://([^:/]+):([^:/]+)/([^:/]+):([^:/]+)$') + if not regex.match(host_url): + raise ValueError('The MTGraphite URL is invalid. It should be:' + ' mtgraphite://:/:') + + matches = regex.search(host_url) + self.host = matches.group(1) + self.port = matches.group(2) + self.tenant = matches.group(3) + self.password = matches.group(4) + + # create a connection only when we need it, but keep it alive + + self.conn = None + self.socket = None + self.batch_send_every_n = batch_send_every_n + self.batch_send_every_t = batch_send_every_t + self.msgset = [] + self.next_timeout = time.time() + batch_send_every_t + + # + # Private API # + # + + def _create_identification_message(self, self_identifier): + identification_message = """""" + identification_message += '1I' + identification_message += chr(len(self_identifier)) + identification_message += self_identifier + return identification_message + + def _create_authentication_msg( + self, + tenant, + password, + supertenant=True): + authentication_message = """""" + if supertenant: + authentication_message += '2S' + else: + authentication_message += '2T' + authentication_message += chr(len(tenant)) + authentication_message += tenant + authentication_message += \ + chr(len(password)) + authentication_message += password + return authentication_message + + def _send_and_check_identification_message(self, identification_message): + identification_message_sent = self.conn.write(identification_message) + + if identification_message_sent != len(identification_message): + logger.warning( + 'Identification message not sent properly, returned ' + 'len = %d', identification_message_sent) + return False + else: + return True + + def _send_and_check_authentication_message(self, authentication_message): + authentication_message_sent = self.conn.write(authentication_message) + logger.info( + 'Sent authentication with mtgraphite, returned length = ' + '%d' % authentication_message_sent) + if authentication_message_sent != len(authentication_message): + raise RuntimeError('failed to send tenant/password') + chunk = self.conn.read(6) # Expecting "1A" + code = bytearray(chunk)[:2] + + logger.info('MTGraphite authentication server response of %s' + % code) + if code == '0A': + raise MTGraphiteInvalidTenant('Invalid password') + + def _get_socket(self): + '''Get or create a connection to a broker using host and port''' + if self.conn is not None: + return self.conn + + logger.debug('Creating a new socket with _get_socket()') + while self.conn is None: + try: + self.sequence = 1 # start with 1 as last_ack = 0 + self.socket = socket.socket(socket.AF_INET, + socket.SOCK_STREAM) + self.socket.settimeout(DEFAULT_SOCKET_TIMEOUT_SECONDS) + self.conn = ssl.wrap_socket(self.socket, + cert_reqs=ssl.CERT_NONE) + self.conn.connect((self.host, int(self.port))) + + # We send this identifier message so that the server-side can + # identify this specific crawler in the logs (its behind + # load-balancer so it never sees our source-ip without this). + + self_identifier = str(self.conn.getsockname()[0]) + logger.debug('self_identifier = %s', self_identifier) + identification_message = self._create_identification_message( + self_identifier) + self._send_and_check_identification_message( + identification_message) + + msg = self._create_authentication_msg(self.tenant, + self.password, + supertenant=True) + # We first try with a super tenant account. + try: + self._send_and_check_authentication_message(msg) + except Exception as e: + logger.info("Attempting to log in as tenant") + msg = self._create_authentication_msg(self.tenant, + self.password, + supertenant=False) + self._send_and_check_authentication_message(msg) + return self.conn + + except Exception as e: + logger.exception(e) + if self.conn: + self.conn.close() + self.conn = None + time.sleep(2) # sleep for 2 seconds for now + raise e + + def _write_messages_no_retries(self, msgset): + s = self._get_socket() + messages_string = bytearray('1W') + messages_string.extend(bytearray(struct.pack('!I', + len(msgset)))) + for m in msgset: + if m == msgset[0]: + + # logger.debug the first message + + logger.debug(m.strip()) + messages_string.extend('1M') + messages_string.extend(bytearray(struct.pack('!I', + self.sequence))) + messages_string.extend(bytearray(struct.pack('!I', len(m)))) + messages_string.extend(m) + self.sequence += 1 + len_to_send = len(messages_string) + len_sent = 0 + while len_sent < len_to_send: + t = time.time() * 1000 + logger.debug( + 'About to write to the socket (already sent %d out of %d ' + 'bytes)' % (len_sent, len_to_send)) + written = s.write(buffer(messages_string, len_sent)) + write_time = time.time() * 1000 - t + logger.debug('Written %d bytes to socket in %f ms' + % (written, write_time)) + if written == 0: + raise RuntimeError('socket connection broken') + self.close() + return False + len_sent += written + logger.debug('Waiting for response from mtgraphite server') + chunk = s.read(6) # Expecting "1A"+4byte_num_of_metrics_received + code = bytearray(chunk)[:2] + logger.debug('MTGraphite server response of %s' + % bytearray(chunk).strip()) + if code == '1A': + logger.info('Confirmed write to mtgraphite socket.') + return True + + def _write_messages(self, msgset, max_emit_retries=10): + msg_sent = False + retries = 0 + while not msg_sent and retries <= max_emit_retries: + try: + retries += 1 + self._write_messages_no_retries(msgset) + msg_sent = True + except Exception: + if retries <= max_emit_retries: + + # Wait for (2^retries * 100) milliseconds + + wait_time = 2.0 ** retries * 0.1 + logger.error( + 'Could not connect to the mtgraphite server.Retry in ' + '%f seconds.' % wait_time) + + # The connection will be created again by + # _write_messages_no_retries(). + + self.close() + time.sleep(wait_time) + else: + logger.error('Bail out on sending to mtgraphite server' + ) + raise + + # + # Public API # + # + + def close(self): + if self.conn: + try: + self.conn.close() + except Exception as e: + logger.exception(e) + self.conn = None + + def send_messages(self, messages): + """ + Helper method to send produce requests + @param: *messages, one or more message payloads -- type str + @returns: # of messages sent + raises on error + """ + + # Guarantee that messages is actually a list or tuple (should always be + # true) + + if not isinstance(messages, (list, tuple)): + raise TypeError('messages is not a list or tuple!') + + # Raise TypeError if any message is not encoded as a str + + for m in messages: + if not isinstance(m, str): + raise TypeError('all produce message payloads must be type str' + ) + + logger.debug("""""") + logger.debug('New message:') + logger.debug('len(msgset)=%d, batch_every_n=%d, time=%d, ' + 'next_timeout=%d' % (len(self.msgset), + self.batch_send_every_n, + time.time(), + self.next_timeout)) + if messages: + self.msgset.extend(messages) + if len(self.msgset) >= self.batch_send_every_n or time.time() \ + > self.next_timeout: + self._write_messages(self.msgset) + self.msgset = [] + self.next_timeout = time.time() + self.batch_send_every_t + + return len(messages) + + def construct_message(self, space_id, group_id, metric_type, value, + timestamp=None): + """ + Message constructor. Creates a message that you can then append to a + list and send using send_messages. + + params: + :param string space_id: space id (you can get this via logmet) + :param string group_id: group id to access the metric + :param string metric_type: type of metric (e.g., cpu, memory) + :param int value: value of the metric + :param int timestamp: None by default. If left as None, the current + time is used instead. + + returns: a string that contains the message you want to send. + """ + return '%s.%s.%s %d %d\r\n' % (space_id, group_id, metric_type, + value, timestamp or int(time.time())) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/utils/namespace.py b/crawler/utils/plugincont/plugincont_img/crawler/utils/namespace.py new file mode 100644 index 00000000..7077e186 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/utils/namespace.py @@ -0,0 +1,256 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +import os +import multiprocessing +import Queue +import logging +import sys +import types +import signal +import ctypes +from crawler_exceptions import (CrawlTimeoutError, + CrawlError, + NamespaceFailedSetns) + +logger = logging.getLogger('crawlutils') + +try: + libc = ctypes.CDLL('libc.so.6') +except Exception as e: + logger.warning('Can not crawl containers as there is no libc: %s' % e) + libc = None + + +ALL_NAMESPACES = 'user pid uts ipc net mnt'.split() +IN_PROCESS_TIMEOUT = 30 + + +def get_errno_msg(): + try: + libc.__errno_location.restype = ctypes.POINTER(ctypes.c_int) + errno = libc.__errno_location().contents.value + errno_msg = os.strerror(errno) + return errno_msg + except (OSError, AttributeError): + # Getting an error while trying to get the errorno + return 'unknown error' + + +def get_libc(): + global libc + return libc + + +def get_pid_namespace(pid): + try: + ns = os.stat('/proc/' + str(pid) + '/ns/pid').st_ino + return ns + except OSError: + logger.debug('There is no container with pid=%s running.' + % pid) + return None + + +class ProcessContext: + + def __init__(self, pid, namespaces): + self.namespaces = namespaces + self.pid = pid + self.host_ns_fds = {} + self.container_ns_fds = {} + self.host_cwd = os.getcwd() + open_process_namespaces('self', self.host_ns_fds, + self.namespaces) + open_process_namespaces(self.pid, self.container_ns_fds, + self.namespaces) + + def attach(self): + # Disable logging just to be sure log rotation does not happen in + # the container. + logging.disable(logging.CRITICAL) + attach_to_process_namespaces(self.container_ns_fds, self.namespaces) + + def detach(self): + try: + # Re-attach to the process original namespaces. + attach_to_process_namespaces(self.host_ns_fds, + self.namespaces) + # We are now in host context + os.chdir(self.host_cwd) + close_process_namespaces(self.container_ns_fds, + self.namespaces) + close_process_namespaces(self.host_ns_fds, self.namespaces) + finally: + # Enable logging again + logging.disable(logging.NOTSET) + + +def run_as_another_namespace( + pid, + namespaces, + function, + *args, + **kwargs +): + hack_to_pre_load_modules() + + _args = (pid, namespaces, function) + _kwargs = {'_args': tuple(args), '_kwargs': dict(kwargs)} + return run_as_another_process(_run_as_another_namespace, _args, _kwargs) + + +def run_as_another_process(function, _args=(), _kwargs={}): + try: + queue = multiprocessing.Queue(2 ** 15) + except OSError: + # try again with a smaller queue + queue = multiprocessing.Queue(2 ** 14) + + child_process = multiprocessing.Process( + target=_function_wrapper, + args=(queue, function), + kwargs={'_args': _args, '_kwargs': _kwargs}) + child_process.start() + + child_exception, result = None, None + try: + (result, child_exception) = queue.get(timeout=IN_PROCESS_TIMEOUT) + except Queue.Empty: + child_exception = CrawlTimeoutError() + except Exception as exc: + logger.warn(exc) + + child_process.join(IN_PROCESS_TIMEOUT) + + # The join failed and the process might still be alive + + if child_process.is_alive(): + errmsg = ('Timed out waiting for process %d to exit.' % + child_process.pid) + queue.close() + os.kill(child_process.pid, 9) + logger.error(errmsg) + raise CrawlTimeoutError(errmsg) + + if result is None: + if child_exception: + raise child_exception + raise CrawlError('Unknown crawl error.') + return result + + +def _function_wrapper( + queue, + function, + _args=(), + _kwargs={} +): + """ + Function to be used by run_as_another_process to wrap `function` + and call it with _args and _kwargs. `queue` is used to get the result + and any exception raised. + :param queue: + :param function: + :param _args: + :param _kwargs: + :return: + """ + + # Die if the parent dies + PR_SET_PDEATHSIG = 1 + get_libc().prctl(PR_SET_PDEATHSIG, signal.SIGHUP) + + def signal_handler_sighup(*args): + logger.warning('Crawler parent process died, so exiting... Bye!') + queue.close() + exit(1) + + signal.signal(signal.SIGHUP, signal_handler_sighup) + + try: + result = function(*_args, **_kwargs) + + # if res is a generator (i.e. function uses yield) + + if isinstance(result, types.GeneratorType): + result = list(result) + queue.put((result, None)) + queue.close() + sys.exit(0) + except Exception as e: + queue.put((None, e)) + queue.close() + sys.exit(1) + + +def _run_as_another_namespace( + pid, + namespaces, + function, + _args=(), + _kwargs={} +): + + # os.closerange(1, 1000) + context = ProcessContext(pid, namespaces) + context.attach() + try: + return run_as_another_process(function, _args, _kwargs) + finally: + context.detach() + + +def hack_to_pre_load_modules(): + queue = multiprocessing.Queue() + + def foo(queue): + queue.put('dummy') + pass + + p = multiprocessing.Process(target=foo, args=(queue, )) + p.start() + queue.get() + p.join() + + +def open_process_namespaces(pid, namespace_fd, namespaces): + for ct_ns in namespaces: + ns_path = os.path.join('/proc', pid, 'ns', ct_ns) + # arg 0 means readonly + namespace_fd[ct_ns] = get_libc().open(ns_path, 0) + if namespace_fd[ct_ns] == -1: + errno_msg = get_errno_msg() + error_msg = 'Opening the %s namespace file failed: %s' \ + % (ct_ns, errno_msg) + logger.warning(error_msg) + raise NamespaceFailedSetns(error_msg) + + +def close_process_namespaces(namespace_fd, namespaces): + for ct_ns in namespaces: + r = get_libc().close(namespace_fd[ct_ns]) + if r == -1: + errno_msg = get_errno_msg() + error_msg = ('Could not close the %s ' + 'namespace (fd=%s): %s' % + (ct_ns, namespace_fd[ct_ns], errno_msg)) + logger.warning(error_msg) + + +def attach_to_process_namespaces(namespace_fd, ct_namespaces): + for ct_ns in ct_namespaces: + if hasattr(get_libc(), 'setns'): + r = get_libc().setns(namespace_fd[ct_ns], 0) + else: + # The Linux kernel ABI should be stable enough + __NR_setns = 308 + r = get_libc().syscall(__NR_setns, namespace_fd[ct_ns], 0) + if r == -1: + errno_msg = get_errno_msg() + error_msg = ('Could not attach to the container %s ' + 'namespace (fd=%s): %s' % + (ct_ns, namespace_fd[ct_ns], errno_msg)) + logger.warning(error_msg) + if ct_ns == 'user': + continue + raise NamespaceFailedSetns(error_msg) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/utils/os_utils.py b/crawler/utils/plugincont/plugincont_img/crawler/utils/os_utils.py new file mode 100644 index 00000000..7329b1db --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/utils/os_utils.py @@ -0,0 +1,68 @@ +import logging +import platform +import time + +import psutil + +import utils.misc +from utils import osinfo +from utils.features import OSFeature + +logger = logging.getLogger('crawlutils') + + +def crawl_os(): + feature_key = platform.system().lower() + try: + os_kernel = platform.platform() + except: + os_kernel = 'unknown' + + result = osinfo.get_osinfo(mount_point='/') + if result: + os_distro = result['os'] + os_version = result['version'] + else: + os_distro = 'unknown' + os_version = 'unknown' + + ips = utils.misc.get_host_ip4_addresses() + + #boot_time = psutil.boot_time() + #uptime = int(time.time()) - boot_time + boot_time = 'unknown' + uptime = 'unknown' + feature_attributes = OSFeature( + boot_time, + uptime, + ips, + os_distro, + os_version, + os_kernel, + platform.machine() + ) + + return [(feature_key, feature_attributes, 'os')] + + +def crawl_os_mountpoint(mountpoint='/'): + result = osinfo.get_osinfo(mount_point=mountpoint) + if result: + os_distro = result['os'] + os_version = result['version'] + else: + os_distro = 'unknown' + os_version = 'unknown' + + feature_key = 'linux' + feature_attributes = OSFeature( # boot time unknown for img + # live IP unknown for img + 'unsupported', + 'unsupported', + '0.0.0.0', + os_distro, + os_version, + 'unknown', + 'unknown' + ) + return [(feature_key, feature_attributes, 'os')] diff --git a/crawler/utils/plugincont/plugincont_img/crawler/utils/osinfo.py b/crawler/utils/plugincont/plugincont_img/crawler/utils/osinfo.py new file mode 100644 index 00000000..61754860 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/utils/osinfo.py @@ -0,0 +1,120 @@ +import os +import re + +LSB_RELEASE = 'etc/lsb-release' +OS_RELEASE = 'etc/os-release' +USR_OS_RELEASE = 'usr/lib/os-release' +APT_SOURCES = 'etc/apt/sources.list' +REDHAT_RELEASE = 'etc/redhat-release' +CENTOS_RELEASE = 'etc/centos-release' +SYSTEM_RELEASE = 'etc/system-release' + +REDHAT_RE = re.compile(r'red hat enterprise linux .* release (\d+(\.\d)?).*') +CENTOS_RE = re.compile(r'centos (?:linux )?release (\d+(\.\d)?).*') + + +def _get_file_name(mount_point, filename): + if mount_point: + return os.path.join(mount_point, filename) + return os.path.join('/', filename) + + +def parse_lsb_release(data): + result = {} + for line in data: + if line.startswith('DISTRIB_ID'): + result['os'] = line.strip().split('=')[1].lower() + if line.startswith('DISTRIB_RELEASE'): + result['version'] = line.strip().split('=')[1].lower() + return result + + +def parse_os_release(data): + result = {} + for line in data: + if line.startswith('ID='): + result['os'] = line.strip().split('=')[1].lower().strip('"') + if line.startswith('VERSION_ID'): + result['version'] = line.strip().split('=')[1].lower().strip('"') + return result + + +def parse_redhat_release(data): + result = {} + for line in data: + match = REDHAT_RE.match(line.lower()) + if match: + result['os'] = 'rhel' + result['version'] = match.group(1) + return result + + +def parse_centos_release(data): + result = {} + for line in data: + match = CENTOS_RE.match(line.lower()) + if match: + result['os'] = 'centos' + result['version'] = match.group(1) + return result + + +def parse_redhat_centos_release(data): + for line in data: + if 'centos' in line.lower(): + return parse_centos_release(data) + elif 'red hat' in line.lower(): + return parse_redhat_release(data) + return {} + + +def get_osinfo_from_redhat_centos(mount_point='/'): + + try: + with open(_get_file_name(mount_point, CENTOS_RELEASE), 'r') as lsbp: + return parse_redhat_centos_release(lsbp.readlines()) + except IOError: + try: + with open(_get_file_name(mount_point, + REDHAT_RELEASE), 'r') as lsbp: + return parse_redhat_centos_release(lsbp.readlines()) + except IOError: + try: + with open(_get_file_name(mount_point, + SYSTEM_RELEASE), 'r') as lsbp: + return parse_redhat_centos_release(lsbp.readlines()) + except IOError: + return {} + + +def get_osinfo_from_lsb_release(mount_point='/'): + try: + with open(_get_file_name(mount_point, LSB_RELEASE), 'r') as lsbp: + return parse_lsb_release(lsbp.readlines()) + except IOError: + return {} + + +def get_osinfo_from_os_release(mount_point='/'): + try: + with open(_get_file_name(mount_point, OS_RELEASE), 'r') as lsbp: + return parse_os_release(lsbp.readlines()) + except IOError: + try: + with open(USR_OS_RELEASE, 'r') as lsbp: + return parse_os_release(lsbp.readlines()) + except IOError: + return {} + + +def get_osinfo(mount_point='/'): + + result = get_osinfo_from_lsb_release(mount_point) + if result: + return result + + result = get_osinfo_from_os_release(mount_point) + if result: + return result + + return get_osinfo_from_redhat_centos(mount_point) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/utils/package_utils.py b/crawler/utils/plugincont/plugincont_img/crawler/utils/package_utils.py new file mode 100644 index 00000000..b031c42b --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/utils/package_utils.py @@ -0,0 +1,193 @@ +import logging +import os +import shutil +import tempfile + +from crawler_exceptions import CrawlError, CrawlUnsupportedPackageManager +from utils import osinfo +from utils.features import PackageFeature +from utils.misc import subprocess_run + +logger = logging.getLogger('crawlutils') + + +def get_dpkg_packages( + root_dir='/', + dbpath='var/lib/dpkg', + installed_since=0): + + if os.path.isabs(dbpath): + logger.warning( + 'dbpath: ' + + dbpath + + ' is defined absolute. Ignoring prefix: ' + + root_dir + + '.') + + # Update for a different route. + + dbpath = os.path.join(root_dir, dbpath) + + output = subprocess_run(['dpkg-query', '-W', + '--admindir={0}'.format(dbpath), + '-f=${Package}|${Version}' + '|${Architecture}|${Installed-Size}\n'], + shell=False) + dpkglist = output.strip('\n') + if dpkglist: + for dpkginfo in dpkglist.split('\n'): + (name, version, architecture, size) = dpkginfo.split(r'|') + + # dpkg does not provide any installtime field + # feature_key = '{0}/{1}'.format(name, version) --> + # changed to below per Suriya's request + + feature_key = '{0}'.format(name, version) + yield (feature_key, PackageFeature("null", name, + size, version, + architecture)) + + +def get_rpm_packages( + root_dir='/', + dbpath='var/lib/rpm', + installed_since=0, + reload_needed=False): + + if os.path.isabs(dbpath): + logger.warning( + 'dbpath: ' + + dbpath + + ' is defined absolute. Ignoring prefix: ' + + root_dir + + '.') + + # update for a different route + + dbpath = os.path.join(root_dir, dbpath) + + try: + if reload_needed: + reloaded_db_dir = tempfile.mkdtemp() + _rpm_reload_db(root_dir, dbpath, reloaded_db_dir) + dbpath = reloaded_db_dir + + output = subprocess_run(['rpm', + '--dbpath', + dbpath, + '-qa', + '--queryformat', + '%{installtime}|%{name}|%{version}' + '-%{release}|%{arch}|%{size}\n'], + shell=False, + ignore_failure=True) + # We ignore failures because sometimes rpm returns rc=1 but still + # outputs all the data. + rpmlist = output.strip('\n') + finally: + if reload_needed: + logger.debug('Deleting directory: %s' % (reloaded_db_dir)) + shutil.rmtree(reloaded_db_dir) + + if rpmlist: + for rpminfo in rpmlist.split('\n'): + (installtime, name, version, architecture, size) = \ + rpminfo.split(r'|') + """ + if int(installtime) <= installed_since: --> this + barfs for sth like: 1376416422. Consider try: xxx + except ValueError: pass + """ + + if installtime <= installed_since: + continue + """ + feature_key = '{0}/{1}'.format(name, version) --> + changed to below per Suriya's request + """ + + feature_key = '{0}'.format(name, version) + yield (feature_key, + PackageFeature(installtime, + name, size, version, architecture)) + + +def _rpm_reload_db( + root_dir='/', + dbpath='var/lib/rpm', + reloaded_db_dir='/tmp/'): + """ + Dumps and reloads the rpm database. + + Returns the path to the new rpm database, or raises RuntimeError if the + dump and load commands failed. + """ + + try: + dump_dir = tempfile.mkdtemp() + + subprocess_run(['/usr/bin/db_dump', + os.path.join(dbpath, 'Packages'), + '-f', + os.path.join(dump_dir, 'Packages')], + shell=False) + subprocess_run(['/usr/bin/db_load', + '-f', + os.path.join(dump_dir, 'Packages'), + os.path.join(reloaded_db_dir, 'Packages')], + shell=False) + finally: + logger.debug('Deleting directory: %s' % (dump_dir)) + shutil.rmtree(dump_dir) + + return reloaded_db_dir + + +def crawl_packages( + dbpath=None, + root_dir='/', + installed_since=0, + reload_needed=True): + + # package attributes: ["installed", "name", "size", "version"] + + logger.debug('Crawling Packages') + + pkg_manager = _get_package_manager(root_dir) + + try: + if pkg_manager == 'dpkg': + dbpath = dbpath or 'var/lib/dpkg' + for (key, feature) in get_dpkg_packages( + root_dir, dbpath, installed_since): + yield (key, feature, 'package') + elif pkg_manager == 'rpm': + dbpath = dbpath or 'var/lib/rpm' + for (key, feature) in get_rpm_packages( + root_dir, dbpath, installed_since, reload_needed): + yield (key, feature, 'package') + else: + logger.warning('Unsupported package manager for Linux distro') + except Exception as e: + logger.error('Error crawling packages', + exc_info=True) + raise CrawlError(e) + + +def _get_package_manager(root_dir): + result = osinfo.get_osinfo(mount_point=root_dir) + if result: + os_distro = result['os'] + else: + raise CrawlUnsupportedPackageManager() + + pkg_manager = None + if os_distro in ['ubuntu', 'debian']: + pkg_manager = 'dpkg' + elif os_distro in ['redhat', 'red hat', 'rhel', 'fedora', 'centos']: + pkg_manager = 'rpm' + elif os.path.exists(os.path.join(root_dir, 'var/lib/dpkg')): + pkg_manager = 'dpkg' + elif os.path.exists(os.path.join(root_dir, 'var/lib/rpm')): + pkg_manager = 'rpm' + return pkg_manager diff --git a/crawler/utils/plugincont/plugincont_img/crawler/utils/process_utils.py b/crawler/utils/plugincont/plugincont_img/crawler/utils/process_utils.py new file mode 100644 index 00000000..492970bc --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/utils/process_utils.py @@ -0,0 +1,136 @@ + +import fcntl +import os +import signal +import struct +import subprocess + +_SC_OPEN_MAX = 4 + + +# Flake 8's complexity 10 limit requires odd code changes; so skip +# its QA here +# flake8: noqa + + +def _close_fds(keep_fds, max_close_fd=None): + """ + Have a process close all file descriptors except for stderr, stdout, + and stdin and those ones in the keep_fds list + The maximum file descriptor to close can be provided to avoid long + delays; this max_fd value depends on the program being used and could + be a low number if the program does not have many file descriptors + """ + maxfd = os.sysconf(_SC_OPEN_MAX) + if max_close_fd: + maxfd = min(maxfd, max_close_fd) + + for fd in range(3, maxfd): + if fd in keep_fds: + continue + try: + os.close(fd) + except: + pass + + +def start_child1(params, pass_fds, null_fds, ign_sigs, setsid=False, + max_close_fd=None, **kwargs): + errcode = 0 + try: + process = subprocess.Popen(params, **kwargs) + pid = process.pid + except OSError as err: + errcode = err.errno + stdout, stderr = process.communicate() + assert process.returncode == 0 + return pid, errcode + +def start_child(params, pass_fds, null_fds, ign_sigs, setsid=False, + max_close_fd=None, **kwargs): + """ + Start a child process without leaking file descriptors of the + current process. We pass a list of file descriptors to the + child process and close all other ones. We redirect a list of + null_fds (typically stderr, stdout, stdin) to /dev/null. + + This function is a wrapper for subprocess.Popen(). + + @params: start the process with the given parameters. + @pass_fds: a list of file descriptors to pass to the child process + close all file descriptors not in this list starting + at file descriptor '3'. + @null_fds: a list of file descriptors to redirect to /dev/null; + a typical list here would be 0, 1, and 2 for + stdin, stdout, and stderr + @ign_sigs: a list of signals to ignore + @set_sid: whether to call os.setsid() + @max_close_fd: max. number of file descriptors to close; + can be a low number in case program doesn't + typically have many open file descriptors; + @**kwargs: kwargs to pass to subprocess.Popen() + + This function returns the process ID of the process that + was started and an error code. In case of success the process + ID is a positive number, -1 otherwise. The error code indicates + the errno returned from subprocess.Popen() + + """ + rfd, wfd = os.pipe() + + try: + pid = os.fork() + except OSError as err: + os.close(rfd) + os.close(wfd) + return -1, err.errno + + if pid == 0: + # child + os.close(rfd) + flags = fcntl.fcntl(wfd, fcntl.F_GETFD) + fcntl.fcntl(wfd, fcntl.F_SETFD, flags | fcntl.FD_CLOEXEC) + + if len(null_fds): + nullfd = os.open('/dev/null', os.O_RDWR) + for fd in null_fds: + os.dup2(nullfd, fd) + os.close(nullfd) + + keep_fds = pass_fds + keep_fds.extend(null_fds) + keep_fds.append(wfd) + + _close_fds(keep_fds, max_close_fd=max_close_fd) + + for ign_sig in ign_sigs: + signal.signal(ign_sig, signal.SIG_IGN) + if setsid: + os.setsid() + + errcode = 0 + pid = -1 + + try: + process = subprocess.Popen(params, **kwargs) + pid = process.pid + except OSError as err: + errcode = err.errno + + data = struct.pack('ii', pid, errcode) + os.write(wfd, data) + + os._exit(0) + else: + os.close(wfd) + + try: + message = os.read(rfd, 8) + pid, errcode, = struct.unpack('ii', message) + except: + pid = -1 + os.close(rfd) + # wait for child process to _exit() + os.waitpid(-1, 0) + + return pid, errcode diff --git a/crawler/utils/plugincont/plugincont_img/crawler/utils/socket_utils.py b/crawler/utils/plugincont/plugincont_img/crawler/utils/socket_utils.py new file mode 100644 index 00000000..0a1ee5e4 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/utils/socket_utils.py @@ -0,0 +1,34 @@ +import ctypes.util +import errno +import os +import socket + +libc = ctypes.CDLL(ctypes.util.find_library('c')) + + +def if_indextoname(ifindex): + libc.if_indextoname.argtypes = [ctypes.c_uint32, ctypes.c_char_p] + libc.if_indextoname.restype = ctypes.c_char_p + + ifname = ctypes.create_string_buffer(16) + ifname = libc.if_indextoname(ifindex, ifname) + if not ifname: + err = errno.ENXIO + raise OSError(err, os.strerror(err)) + return ifname + + +def open_udp_port(bindaddr, min, max): + """ + Try to open a UDP listening port in the given range + """ + sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + for port in range(min, max + 1): + try: + sock.bind((bindaddr, port)) + return sock, port + except: + pass + + sock.close() + return None, None diff --git a/crawler/utils/plugincont/plugincont_img/crawler/utils/timeout_utils.py b/crawler/utils/plugincont/plugincont_img/crawler/utils/timeout_utils.py new file mode 100644 index 00000000..ec6b4d3f --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/utils/timeout_utils.py @@ -0,0 +1,21 @@ +import signal + + +class TimeoutError(Exception): + pass + + +class Timeout: + def __init__(self, seconds=0.1, error_message='Timeout'): + self.seconds = seconds + self.error_message = error_message + + def handle_timeout(self, signum, frame): + raise TimeoutError(self.error_message) + + def __enter__(self): + signal.signal(signal.SIGALRM, self.handle_timeout) + signal.setitimer(signal.ITIMER_REAL, self.seconds) + + def __exit__(self, type, value, traceback): + signal.alarm(0) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/virtual_machine.py b/crawler/utils/plugincont/plugincont_img/crawler/virtual_machine.py new file mode 100644 index 00000000..2ff40824 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/virtual_machine.py @@ -0,0 +1,75 @@ +import psutil + + +def get_virtual_machines(user_list=[], host_namespace=''): + """ + Returns the list of Virtual Machines running in the system. + + XXX: Only QEMU VMs are supported at the moment, this includes + kvm and non-kvm VMs. + + :param user_list: a list of VM descriptor strings 'name,kernel,distro,arch' + :return: A list of VirtualMachine objects + """ + if user_list is []: + raise NotImplementedError( + 'Discovery of virtual machines is not supported') + + vms = [] + for vm_desc in user_list: + try: + name, kernel, distro, arch = vm_desc.split(',') + vms.append(QemuVirtualMachine(name, kernel, distro, arch, + host_namespace=host_namespace)) + except (ValueError, KeyError): + continue + return vms + + +class VirtualMachine(): + + def __init__(self, name, kernel, distro, arch, host_namespace=''): + self.name = name + self.namespace = host_namespace + '/' + name + self.kernel = kernel + self.distro = distro + self.arch = arch + self.pid = 0 + + def get_vm_desc(self): + """ + Returns a list of strings, which all identify a VM + + XXX: make this a dictionary + + :return: a VM descriptor to be passed to the VM crawl plugins and used + to identify the VM. + """ + return str(self.pid), self.kernel, self.distro, self.arch + + def get_metadata_dict(self): + return {'namespace': self.namespace, + 'name': self.name, + 'emit_shortname': self.name} + + +class QemuVirtualMachine(VirtualMachine): + + def __init__(self, name, kernel, distro, arch, host_namespace='', + pid=None): + VirtualMachine.__init__(self, name, kernel, distro, arch, + host_namespace=host_namespace) + + if pid is None: + # Find the pid of the QEMU process running virtual machine `name` + self.pid = None + for proc in psutil.process_iter(): + if 'qemu' in proc.name(): + line = proc.cmdline() + if name == line[line.index('-name') + 1]: + self.pid = proc.pid + + if self.pid is None: + raise ValueError('no VM with vm_name: %s' % name) + else: + self.pid = pid diff --git a/crawler/utils/plugincont/plugincont_img/crawler/vms_crawler.py b/crawler/utils/plugincont/plugincont_img/crawler/vms_crawler.py new file mode 100644 index 00000000..fc2e12e1 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/vms_crawler.py @@ -0,0 +1,83 @@ +import plugins_manager +from base_crawler import BaseCrawler, BaseFrame +from virtual_machine import get_virtual_machines + + +class VirtualMachineFrame(BaseFrame): + + def __init__(self, feature_types, vm): + BaseFrame.__init__(self, feature_types) + self.metadata.update(vm.get_metadata_dict()) + self.metadata['system_type'] = 'vm' + + +class VirtualMachinesCrawler(BaseCrawler): + + def __init__(self, + features=['os', 'cpu'], + user_list=[], + host_namespace='', + plugin_places=['plugins'], + options={}): + + BaseCrawler.__init__( + self, + features=features, + plugin_places=plugin_places, + options=options) + self.vms_list = [] + plugins_manager.reload_vm_crawl_plugins( + features, plugin_places, options) + self.plugins = plugins_manager.get_vm_crawl_plugins(features) + self.host_namespace = host_namespace + self.user_list = user_list + + def update_vms_list(self): + """ + Updates the self.vms_list. + + :return: None + """ + self.vms_list = get_virtual_machines( + user_list=self.user_list, + host_namespace=self.host_namespace) + + def crawl_vm(self, vm, ignore_plugin_exception=True): + """ + Crawls a specific vm and returns a Frame for it. + + :param vm: a VirtualMachine object + :param ignore_plugin_exception: just ignore exceptions on a plugin + :return: a Frame object. The returned frame can have 0 features and + still have metadata. This can occur if there were no plugins, or all + the plugins raised an exception (and ignore_plugin_exception was True). + """ + frame = VirtualMachineFrame(self.features, vm) + for (plugin_obj, plugin_args) in self.plugins: + try: + frame.add_features(plugin_obj.crawl(vm_desc=vm.get_vm_desc(), + **plugin_args)) + except Exception as exc: + if not ignore_plugin_exception: + raise exc + return frame + + def crawl_vms(self, ignore_plugin_exception=True): + """ + Crawl all vms stored in self.vms_list + + :param ignore_plugin_exception: just ignore exceptions in a plugin + :return: a list generator of Frame objects + """ + for vm in self.vms_list: + yield self.crawl_vm(vm, ignore_plugin_exception) + + def crawl(self, ignore_plugin_exception=True): + """ + Crawl all vms running in the system. + + :param ignore_plugin_exception: just ignore exceptions in a plugin + :return: a list generator of Frame objects + """ + self.update_vms_list() + return self.crawl_vms(ignore_plugin_exception) diff --git a/crawler/utils/plugincont/plugincont_img/crawler/worker.py b/crawler/utils/plugincont/plugincont_img/crawler/worker.py new file mode 100644 index 00000000..f4d0aa7b --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/crawler/worker.py @@ -0,0 +1,87 @@ +import time + + +class Worker: + + def __init__(self, + emitters=None, + frequency=-1, + crawler=None): + """ + Store and check the types of the arguments. + + :param emitters: EmittersManager that holds the list of Emitters. + If it is None, then no emit is done. + :param frequency: Sleep seconds between iterations + """ + self.iter_count = 0 + self.frequency = frequency + self.next_iteration_time = None + self.emitters = emitters + self.crawler = crawler + + def iterate(self, timeout=0): + """ + Function called at each iteration. + + Side effects: increments iter_count + + :param timeout: seconds to wait for polling crawls. If 0, then + just use the regular crawl() method and do not poll. + :return: None + """ + + # Start by polling new systems created within `timeout` seconds + end_time = time.time() + timeout + while timeout > 0: + # If polling is not implemented, this is a sleep(timeout) + frame = self.crawler.polling_crawl(timeout) + if frame and self.emitters: + self.emitters.emit(frame, snapshot_num=self.iter_count) + timeout = end_time - time.time() + # just used for output purposes + self.iter_count += 1 + + # Crawl all systems now + for frame in self.crawler.crawl(): + if self.emitters is not None: + self.emitters.emit(frame, snapshot_num=self.iter_count) + + # just used for output purposes + self.iter_count += 1 + + def _get_next_iteration_time(self, snapshot_time): + """ + Returns the number of seconds to sleep before the next iteration. + + :param snapshot_time: Start timestamp of the current iteration. + :return: Seconds to sleep as a float. + """ + if self.frequency == 0: + return 0 + + if self.next_iteration_time is None: + self.next_iteration_time = snapshot_time + self.frequency + else: + self.next_iteration_time += self.frequency + + while self.next_iteration_time + self.frequency < time.time(): + self.next_iteration_time += self.frequency + + time_to_sleep = self.next_iteration_time - time.time() + return time_to_sleep + + def run(self): + """ + Main crawler loop. Each iteration is one crawl and a sleep. + + :return: None + """ + time_to_sleep = 0 + while True: + snapshot_time = time.time() + self.iterate(time_to_sleep) + # Frequency < 0 means only one run. + if self.frequency < 0: + break + time_to_sleep = self._get_next_iteration_time(snapshot_time) diff --git a/crawler/utils/plugincont/plugincont_img/dependencies/python-conntrackprobe_0.2.1-1_all.deb b/crawler/utils/plugincont/plugincont_img/dependencies/python-conntrackprobe_0.2.1-1_all.deb new file mode 100644 index 00000000..1cbf3427 Binary files /dev/null and b/crawler/utils/plugincont/plugincont_img/dependencies/python-conntrackprobe_0.2.1-1_all.deb differ diff --git a/crawler/utils/plugincont/plugincont_img/dependencies/python-socket-datacollector_0.1.4-1_all.deb b/crawler/utils/plugincont/plugincont_img/dependencies/python-socket-datacollector_0.1.4-1_all.deb new file mode 100644 index 00000000..8e5cf550 Binary files /dev/null and b/crawler/utils/plugincont/plugincont_img/dependencies/python-socket-datacollector_0.1.4-1_all.deb differ diff --git a/crawler/utils/plugincont/plugincont_img/dependencies/softflowd_0.9.9902-1_amd64.deb b/crawler/utils/plugincont/plugincont_img/dependencies/softflowd_0.9.9902-1_amd64.deb new file mode 100644 index 00000000..8b069cf8 Binary files /dev/null and b/crawler/utils/plugincont/plugincont_img/dependencies/softflowd_0.9.9902-1_amd64.deb differ diff --git a/crawler/utils/plugincont/plugincont_img/requirements.txt.template b/crawler/utils/plugincont/plugincont_img/requirements.txt.template new file mode 100644 index 00000000..7fe20159 --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/requirements.txt.template @@ -0,0 +1,14 @@ +psutil==2.1.3 +requests>=2.7.13 +netifaces==0.10.4 +kafka-python==1.3.1 +pykafka==1.1.0 +kafka==1.3.3 +docker-py==1.10.6 +python-dateutil==2.4.2 +semantic_version==2.5.0 +Yapsy==1.11.223 +configobj==4.7.0 +morph==0.1.2 +fluent-logger==0.4.6 +requests_unixsocket==0.1.5 diff --git a/crawler/utils/plugincont/plugincont_img/requirements.txt.testing b/crawler/utils/plugincont/plugincont_img/requirements.txt.testing new file mode 100644 index 00000000..723e7a0d --- /dev/null +++ b/crawler/utils/plugincont/plugincont_img/requirements.txt.testing @@ -0,0 +1,15 @@ +psutil==2.1.3 +requests>=2.7.13 +netifaces==0.10.4 +kafka-python==1.3.1 +pykafka==1.1.0 +kafka==1.3.3 +docker-py==1.10.6 +python-dateutil==2.4.2 +semantic_version==2.5.0 +Yapsy==1.11.223 +configobj==4.7.0 +morph==0.1.2 +fluent-logger==0.4.6 +requests_unixsocket==0.1.5 +python-ptrace==0.9.3 diff --git a/crawler/utils/plugincont/seccomp-no-ptrace.json b/crawler/utils/plugincont/seccomp-no-ptrace.json new file mode 100644 index 00000000..17cd3623 --- /dev/null +++ b/crawler/utils/plugincont/seccomp-no-ptrace.json @@ -0,0 +1,1593 @@ +{ + "defaultAction": "SCMP_ACT_ERRNO", + "architectures": [ + "SCMP_ARCH_X86_64", + "SCMP_ARCH_X86", + "SCMP_ARCH_X32" + ], + "syscalls": [ + { + "name": "accept", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "accept4", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "access", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "alarm", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "bind", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "brk", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "capget", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "capset", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "chdir", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "chmod", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "chown", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "chown32", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "clock_getres", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "clock_gettime", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "clock_nanosleep", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "close", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "connect", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "copy_file_range", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "creat", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "dup", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "dup2", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "dup3", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "epoll_create", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "epoll_create1", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "epoll_ctl", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "epoll_ctl_old", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "epoll_pwait", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "epoll_wait", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "epoll_wait_old", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "eventfd", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "eventfd2", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "execve", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "execveat", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "exit", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "exit_group", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "faccessat", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "fadvise64", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "fadvise64_64", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "fallocate", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "fanotify_mark", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "fchdir", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "fchmod", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "fchmodat", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "fchown", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "fchown32", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "fchownat", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "fcntl", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "fcntl64", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "fdatasync", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "fgetxattr", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "flistxattr", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "flock", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "fork", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "fremovexattr", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "fsetxattr", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "fstat", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "fstat64", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "fstatat64", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "fstatfs", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "fstatfs64", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "fsync", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "ftruncate", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "ftruncate64", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "futex", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "futimesat", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "getcpu", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "getcwd", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "getdents", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "getdents64", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "getegid", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "getegid32", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "geteuid", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "geteuid32", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "getgid", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "getgid32", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "getgroups", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "getgroups32", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "getitimer", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "getpeername", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "getpgid", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "getpgrp", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "getpid", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "getppid", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "getpriority", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "getrandom", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "getresgid", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "getresgid32", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "getresuid", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "getresuid32", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "getrlimit", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "get_robust_list", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "getrusage", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "getsid", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "getsockname", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "getsockopt", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "get_thread_area", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "gettid", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "gettimeofday", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "getuid", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "getuid32", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "getxattr", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "inotify_add_watch", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "inotify_init", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "inotify_init1", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "inotify_rm_watch", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "io_cancel", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "ioctl", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "io_destroy", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "io_getevents", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "ioprio_get", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "ioprio_set", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "io_setup", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "io_submit", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "ipc", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "kill", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "lchown", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "lchown32", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "lgetxattr", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "link", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "linkat", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "listen", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "listxattr", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "llistxattr", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "_llseek", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "lremovexattr", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "lseek", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "lsetxattr", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "lstat", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "lstat64", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "madvise", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "memfd_create", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "mincore", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "mkdir", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "mkdirat", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "mknod", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "mknodat", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "mlock", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "mlock2", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "mlockall", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "mmap", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "mmap2", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "mprotect", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "mq_getsetattr", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "mq_notify", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "mq_open", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "mq_timedreceive", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "mq_timedsend", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "mq_unlink", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "mremap", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "msgctl", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "msgget", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "msgrcv", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "msgsnd", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "msync", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "munlock", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "munlockall", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "munmap", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "nanosleep", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "newfstatat", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "_newselect", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "open", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "openat", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "pause", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "personality", + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 0, + "valueTwo": 0, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "name": "personality", + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 8, + "valueTwo": 0, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "name": "personality", + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 4294967295, + "valueTwo": 0, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "name": "pipe", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "pipe2", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "poll", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "ppoll", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "prctl", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "pread64", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "preadv", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "prlimit64", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "pselect6", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "pwrite64", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "pwritev", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "read", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "readahead", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "readlink", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "readlinkat", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "readv", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "recv", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "recvfrom", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "recvmmsg", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "recvmsg", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "remap_file_pages", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "removexattr", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "rename", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "renameat", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "renameat2", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "restart_syscall", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "rmdir", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "rt_sigaction", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "rt_sigpending", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "rt_sigprocmask", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "rt_sigqueueinfo", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "rt_sigreturn", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "rt_sigsuspend", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "rt_sigtimedwait", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "rt_tgsigqueueinfo", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "sched_getaffinity", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "sched_getattr", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "sched_getparam", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "sched_get_priority_max", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "sched_get_priority_min", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "sched_getscheduler", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "sched_rr_get_interval", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "sched_setaffinity", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "sched_setattr", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "sched_setparam", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "sched_setscheduler", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "sched_yield", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "seccomp", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "select", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "semctl", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "semget", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "semop", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "semtimedop", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "send", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "sendfile", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "sendfile64", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "sendmmsg", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "sendmsg", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "sendto", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "setfsgid", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "setfsgid32", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "setfsuid", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "setfsuid32", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "setgid", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "setgid32", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "setgroups", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "setgroups32", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "setitimer", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "setpgid", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "setpriority", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "setregid", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "setregid32", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "setresgid", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "setresgid32", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "setresuid", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "setresuid32", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "setreuid", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "setreuid32", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "setrlimit", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "set_robust_list", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "setsid", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "setsockopt", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "set_thread_area", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "set_tid_address", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "setuid", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "setuid32", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "setxattr", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "shmat", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "shmctl", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "shmdt", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "shmget", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "shutdown", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "sigaltstack", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "signalfd", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "signalfd4", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "sigreturn", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "socket", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "socketcall", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "socketpair", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "splice", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "stat", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "stat64", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "statfs", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "statfs64", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "symlink", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "symlinkat", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "sync", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "sync_file_range", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "syncfs", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "sysinfo", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "syslog", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "tee", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "tgkill", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "time", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "timer_create", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "timer_delete", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "timerfd_create", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "timerfd_gettime", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "timerfd_settime", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "timer_getoverrun", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "timer_gettime", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "timer_settime", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "times", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "tkill", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "truncate", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "truncate64", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "ugetrlimit", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "umask", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "uname", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "unlink", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "unlinkat", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "utime", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "utimensat", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "utimes", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "vfork", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "vmsplice", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "wait4", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "waitid", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "waitpid", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "write", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "writev", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "arch_prctl", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "modify_ldt", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "chroot", + "action": "SCMP_ACT_ALLOW", + "args": [] + }, + { + "name": "clone", + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 2080505856, + "valueTwo": 0, + "op": "SCMP_CMP_MASKED_EQ" + } + ] + } + ] +} diff --git a/requirements.txt b/requirements.txt index 7fe20159..286b10e6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ netifaces==0.10.4 kafka-python==1.3.1 pykafka==1.1.0 kafka==1.3.3 -docker-py==1.10.6 +docker==2.7.0 python-dateutil==2.4.2 semantic_version==2.5.0 Yapsy==1.11.223 @@ -12,3 +12,4 @@ configobj==4.7.0 morph==0.1.2 fluent-logger==0.4.6 requests_unixsocket==0.1.5 +python-iptables==0.12.0 diff --git a/tests/functional/test_functional_apk_package_crawler.py b/tests/functional/test_functional_apk_package_crawler.py index 8e4bdaaf..0d269ba8 100644 --- a/tests/functional/test_functional_apk_package_crawler.py +++ b/tests/functional/test_functional_apk_package_crawler.py @@ -31,8 +31,8 @@ def setUp(self): ch.setFormatter(formatter) root.addHandler(ch) - self.docker = docker.Client(base_url='unix://var/run/docker.sock', - version='auto') + self.docker = docker.APIClient(base_url='unix://var/run/docker.sock', + version='auto') try: if len(self.docker.containers()) != 0: raise Exception( diff --git a/tests/functional/test_functional_containers_crawler.py b/tests/functional/test_functional_containers_crawler.py index 26f322e1..d03b658b 100644 --- a/tests/functional/test_functional_containers_crawler.py +++ b/tests/functional/test_functional_containers_crawler.py @@ -31,8 +31,8 @@ def setUp(self): ch.setFormatter(formatter) root.addHandler(ch) - self.docker = docker.Client(base_url='unix://var/run/docker.sock', - version='auto') + self.docker = docker.APIClient(base_url='unix://var/run/docker.sock', + version='auto') try: if len(self.docker.containers()) != 0: raise Exception( @@ -229,8 +229,8 @@ def testCrawlContainerAvoidSetns(self): output = str(frames[0]) print output # only printed if the test fails # interface in avoid_setns mode is not supported - #assert 'interface-lo' in output - #assert 'if_octets_tx=' in output + # assert 'interface-lo' in output + # assert 'if_octets_tx=' in output assert 'cpu-0' in output assert 'cpu_nice=' in output assert 'memory' in output diff --git a/tests/functional/test_functional_ctprobe.py b/tests/functional/test_functional_ctprobe.py index 379103e0..79192602 100644 --- a/tests/functional/test_functional_ctprobe.py +++ b/tests/functional/test_functional_ctprobe.py @@ -90,7 +90,7 @@ class CtprobeFunctionalTests(unittest.TestCase): image_name = 'alpine:latest' def setUp(self): - self.docker = docker.Client( + self.docker = docker.APIClient( base_url='unix://var/run/docker.sock', version='auto') try: if len(self.docker.containers()) != 0: diff --git a/tests/functional/test_functional_dockerevents.py b/tests/functional/test_functional_dockerevents.py index f837e0d8..ac30c8a7 100644 --- a/tests/functional/test_functional_dockerevents.py +++ b/tests/functional/test_functional_dockerevents.py @@ -20,7 +20,7 @@ class CrawlerDockerEventTests(unittest.TestCase): def setUp(self): - self.docker = docker.Client( + self.docker = docker.APIClient( base_url='unix://var/run/docker.sock', version='auto') try: if len(self.docker.containers()) != 0: diff --git a/tests/functional/test_functional_dockerutils.py b/tests/functional/test_functional_dockerutils.py index 7dd2b1e2..576f0ef6 100644 --- a/tests/functional/test_functional_dockerutils.py +++ b/tests/functional/test_functional_dockerutils.py @@ -22,7 +22,7 @@ class DockerUtilsTests(unittest.TestCase): long_image_name = 'docker.io/alpine:latest' def setUp(self): - self.docker = docker.Client( + self.docker = docker.APIClient( base_url='unix://var/run/docker.sock', version='auto') try: if len(self.docker.containers()) != 0: diff --git a/tests/functional/test_functional_fprobe.py b/tests/functional/test_functional_fprobe.py index d0160e77..0584ead5 100644 --- a/tests/functional/test_functional_fprobe.py +++ b/tests/functional/test_functional_fprobe.py @@ -97,7 +97,7 @@ class FprobeFunctionalTests(unittest.TestCase): image_name = 'alpine:latest' def setUp(self): - self.docker = docker.Client( + self.docker = docker.APIClient( base_url='unix://var/run/docker.sock', version='auto') try: if len(self.docker.containers()) != 0: diff --git a/tests/functional/test_functional_k8s_environment.py b/tests/functional/test_functional_k8s_environment.py index 2984357e..707cff51 100644 --- a/tests/functional/test_functional_k8s_environment.py +++ b/tests/functional/test_functional_k8s_environment.py @@ -24,6 +24,7 @@ POD_NS = "io.kubernetes.pod.namespace" K8S_DELIMITER = "/" + class ContainersCrawlerTests(unittest.TestCase): def setUp(self): @@ -36,8 +37,8 @@ def setUp(self): ch.setFormatter(formatter) root.addHandler(ch) - self.docker = docker.Client(base_url='unix://var/run/docker.sock', - version='auto') + self.docker = docker.APIClient(base_url='unix://var/run/docker.sock', + version='auto') self.k8s_labels = dict() self.k8s_labels[CONT_NAME] = "simson" self.k8s_labels[POD_NAME] = "pod-test" @@ -58,7 +59,7 @@ def start_crawled_container(self): # start a container to be crawled self.docker.pull(repository='ubuntu', tag='latest') self.container = self.docker.create_container( - image='ubuntu:latest', labels = self.k8s_labels, command='/bin/sleep 60') + image='ubuntu:latest', labels=self.k8s_labels, command='/bin/sleep 60') self.tempd = tempfile.mkdtemp(prefix='crawlertest.') self.docker.start(container=self.container['Id']) @@ -94,6 +95,7 @@ def testCrawlContainer1(self): ''' Test for graphite o/p format. ''' + def testCrawlContainer2(self): env = os.environ.copy() mypath = os.path.dirname(os.path.realpath(__file__)) @@ -141,6 +143,7 @@ def testCrawlContainer2(self): ''' Test for csv o/p format ''' + def testCrawlContainer3(self): env = os.environ.copy() mypath = os.path.dirname(os.path.realpath(__file__)) @@ -191,6 +194,7 @@ def testCrawlContainer3(self): ''' Test for json o/p format ''' + def testCrawlContainer4(self): env = os.environ.copy() mypath = os.path.dirname(os.path.realpath(__file__)) diff --git a/tests/functional/test_functional_logs_linker.py b/tests/functional/test_functional_logs_linker.py index d1b02da7..90e23fab 100644 --- a/tests/functional/test_functional_logs_linker.py +++ b/tests/functional/test_functional_logs_linker.py @@ -36,7 +36,7 @@ def setUp(self): pass def startContainer(self): - self.docker = docker.Client( + self.docker = docker.APIClient( base_url='unix://var/run/docker.sock', version='auto') self.docker.pull(repository='ubuntu', tag='latest') self.container = self.docker.create_container( diff --git a/tests/functional/test_functional_namespace.py b/tests/functional/test_functional_namespace.py index 50baee2d..95514a88 100644 --- a/tests/functional/test_functional_namespace.py +++ b/tests/functional/test_functional_namespace.py @@ -48,7 +48,7 @@ class NamespaceLibTests(unittest.TestCase): image_name = 'alpine:latest' def setUp(self): - self.docker = docker.Client( + self.docker = docker.APIClient( base_url='unix://var/run/docker.sock', version='auto') try: if len(self.docker.containers()) != 0: diff --git a/tests/functional/test_functional_plugins.py b/tests/functional/test_functional_plugins.py index 8fd9fc69..59bdd269 100644 --- a/tests/functional/test_functional_plugins.py +++ b/tests/functional/test_functional_plugins.py @@ -21,7 +21,7 @@ class HostAndContainerPluginsFunctionalTests(unittest.TestCase): image_name = 'alpine:latest' def setUp(self): - self.docker = docker.Client( + self.docker = docker.APIClient( base_url='unix://var/run/docker.sock', version='auto') try: if len(self.docker.containers()) != 0: diff --git a/tests/functional/test_functional_safecontainers_crawler.py b/tests/functional/test_functional_safecontainers_crawler.py new file mode 100644 index 00000000..8311f261 --- /dev/null +++ b/tests/functional/test_functional_safecontainers_crawler.py @@ -0,0 +1,366 @@ +import unittest +import docker +import requests.exceptions +import tempfile +import os +import time +import shutil +import subprocess +import sys +import pykafka +import semantic_version +import platform +# Tests for crawlers in kraken crawlers configuration. + +from safe_containers_crawler import SafeContainersCrawler +from worker import Worker +from emitters_manager import EmittersManager +from utils.dockerutils import get_docker_container_rootfs_path +from utils.dockerutils import _fix_version +from utils.dockerutils import _get_docker_server_version + +import logging + +# Tests conducted with a single container running. + + +class SafeContainersCrawlerTests(unittest.TestCase): + + def setUp(self): + root = logging.getLogger() + root.setLevel(logging.INFO) + ch = logging.StreamHandler(sys.stdout) + ch.setLevel(logging.INFO) + formatter = logging.Formatter( + '%(asctime)s - %(name)s - %(levelname)s - %(message)s') + ch.setFormatter(formatter) + root.addHandler(ch) + + self.docker = docker.APIClient(base_url='unix://var/run/docker.sock', + version='auto') + try: + if len(self.docker.containers()) != 0: + raise Exception( + "Sorry, this test requires a machine with no docker" + "containers running.") + except requests.exceptions.ConnectionError: + print ("Error connecting to docker daemon, are you in the docker" + "group? You need to be in the docker group.") + + self.version_check() + self.setup_plugincont_testing2() + self.start_crawled_container() + # start a kakfa+zookeeper container to send data to (to test our + # kafka emitter) + self.start_kafka_container() + + def setup_plugincont_testing2(self): + plugincont_image_path = os.getcwd() + \ + '/crawler/utils/plugincont/plugincont_img' + shutil.copyfile( + plugincont_image_path + '/requirements.txt.testing', + plugincont_image_path + '/requirements.txt') + _platform = platform.linux_distribution() + if _platform[0] != 'Ubuntu' or _platform[1] < '16.04': + self.seccomp = False + src_file = os.getcwd() + \ + '/crawler/plugin_containers_manager.py' + os.system("sed -i.bak '/security_opt=/d; " + "/self._add_iptable_rules_in/d' " + src_file) + else: + self.seccomp = True + + def fix_test_artifacts(self): + plugincont_image_path = os.getcwd() + \ + '/crawler/utils/plugincont/plugincont_img' + shutil.copyfile( + plugincont_image_path + '/requirements.txt.template', + plugincont_image_path + '/requirements.txt') + if self.seccomp is False: + src_file = os.getcwd() + \ + '/crawler/plugin_containers_manager.py.bak' + dst_file = os.getcwd() + \ + '/crawler/plugin_containers_manager.py' + shutil.move(src_file, dst_file) + pass + + def version_check(self): + self.version_ok = False + VERSION_SPEC = semantic_version.Spec('>=1.12.1') + server_version = _get_docker_server_version() + if VERSION_SPEC.match(semantic_version.Version(_fix_version( + server_version))): + self.version_ok = True + + def start_kafka_container(self): + self.docker.pull(repository='spotify/kafka', tag='latest') + self.kafka_container = self.docker.create_container( + image='spotify/kafka', ports=[9092, 2181], + host_config=self.docker.create_host_config(port_bindings={ + 9092: 9092, + 2181: 2181 + }), + environment={'ADVERTISED_HOST': 'localhost', + 'ADVERTISED_PORT': '9092'}) + self.docker.start(container=self.kafka_container['Id']) + + def start_crawled_container(self): + # start a container to be crawled + self.docker.pull(repository='ruby', tag='latest') + self.container = self.docker.create_container( + image='ruby:latest', command='tail -f /dev/null', + ports=[8192], + host_config=self.docker.create_host_config(port_bindings={ + 8192: 8192, + }), + environment={'ADVERTISED_HOST': 'localhost', + 'ADVERTISED_PORT': '8192'}) + self.tempd = tempfile.mkdtemp(prefix='crawlertest.') + self.docker.start(container=self.container['Id']) + time.sleep(5) + rootfs = get_docker_container_rootfs_path(self.container['Id']) + fd = open(rootfs + '/crawlplugins', 'w') + fd.write('cpu\n') + fd.write('os\n') + fd.write('memory\n') + fd.write('interface\n') + fd.write('process\n') + fd.write('rubypackage\n') + fd.close() + + def tearDown(self): + self.fix_test_artifacts() + self.remove_crawled_container() + self.remove_kafka_container() + shutil.rmtree(self.tempd) + + def remove_kafka_container(self): + self.docker.stop(container=self.kafka_container['Id']) + self.docker.remove_container(container=self.kafka_container['Id']) + + def remove_crawled_container(self): + self.docker.stop(container=self.container['Id']) + self.docker.remove_container(container=self.container['Id']) + + def _testCrawlContainer1(self): + if self.version_ok is False: + pass + return + crawler = SafeContainersCrawler( + features=[], user_list=self.container['Id']) + frames = list(crawler.crawl()) + output = str(frames[0]) + print output # only printed if the test fails + assert 'interface-lo' in output + assert 'if_octets_tx' in output + assert 'cpu-0' in output + assert 'cpu_nice' in output + assert 'memory' in output + assert 'memory_buffered' in output + assert 'os' in output + assert 'linux' in output + assert 'process' in output + assert 'tail' in output + assert 'plugincont_user' in output + assert 'rubypackage' in output + assert 'rake' in output + + def _testCrawlContainer2(self): + if self.version_ok is False: + pass + return + env = os.environ.copy() + mypath = os.path.dirname(os.path.realpath(__file__)) + os.makedirs(self.tempd + '/out') + + # crawler itself needs to be root + process = subprocess.Popen( + [ + '/usr/bin/python', mypath + '/../../crawler/crawler.py', + '--url', 'file://' + self.tempd + '/out/crawler', + '--features', 'none', + '--crawlContainers', self.container['Id'], + '--format', 'graphite', + '--crawlmode', 'OUTCONTAINERSAFE', + ], + env=env) + time.sleep(30) + stdout, stderr = process.communicate() + assert process.returncode == 0 + + print stderr + print stdout + + subprocess.call(['/bin/chmod', '-R', '777', self.tempd]) + + files = os.listdir(self.tempd + '/out') + assert len(files) == 1 + + f = open(self.tempd + '/out/' + files[0], 'r') + output = f.read() + print output # only printed if the test fails + assert 'interface-lo.if_octets.tx' in output + assert 'cpu-0.cpu-idle' in output + assert 'memory.memory-used' in output + f.close() + + def testCrawlContainerNoPlugins(self): + if self.version_ok is False: + pass + return + rootfs = get_docker_container_rootfs_path(self.container['Id']) + fd = open(rootfs + '/crawlplugins', 'w') + fd.write('noplugin\n') + fd.close() + + env = os.environ.copy() + mypath = os.path.dirname(os.path.realpath(__file__)) + os.makedirs(self.tempd + '/out') + + # crawler itself needs to be root + process = subprocess.Popen( + [ + '/usr/bin/python', mypath + '/../../crawler/crawler.py', + '--url', 'file://' + self.tempd + '/out/crawler', + '--features', 'none', + '--crawlContainers', self.container['Id'], + '--crawlmode', 'OUTCONTAINERSAFE', + ], + env=env) + time.sleep(30) + stdout, stderr = process.communicate() + assert process.returncode == 0 + + print stderr + print stdout + + subprocess.call(['/bin/chmod', '-R', '777', self.tempd]) + + files = os.listdir(self.tempd + '/out') + assert len(files) == 1 + + f = open(self.tempd + '/out/' + files[0], 'r') + output = f.read() + print output # only printed if the test fails + assert 'metadata' in output + assert 'interface-lo' not in output + assert 'cpu-0' not in output + assert 'memory' not in output + f.close() + + def testCrawlContainerKafka(self): + # import pdb + # pdb.set_trace() + if self.version_ok is False: + pass + return + env = os.environ.copy() + mypath = os.path.dirname(os.path.realpath(__file__)) + os.makedirs(self.tempd + '/out') + + # crawler itself needs to be root + process = subprocess.Popen( + [ + '/usr/bin/python', mypath + '/../../crawler/crawler.py', + '--url', 'kafka://localhost:9092/test', + '--features', 'none', + '--crawlContainers', self.container['Id'], + '--crawlmode', 'OUTCONTAINERSAFE', + '--numprocesses', '1' + ], + env=env) + time.sleep(30) + + print self.docker.containers() + stdout, stderr = process.communicate() + assert process.returncode == 0 + + print stderr + print stdout + + kafka = pykafka.KafkaClient(hosts='localhost:9092') + topic = kafka.topics['test'] + consumer = topic.get_simple_consumer() + message = consumer.consume() + print message.value + assert '"cmd":"tail -f /dev/null"' in message.value + assert 'interface-lo' in message.value + assert 'if_octets_tx' in message.value + assert 'cpu-0' in message.value + assert 'cpu_nice' in message.value + assert 'memory' in message.value + assert 'memory_buffered' in message.value + assert 'os' in message.value + assert 'linux' in message.value + assert 'process' in message.value + assert 'tail' in message.value + assert 'plugincont_user' in message.value + assert 'rubypackage' in message.value + assert 'rake' in message.value + + def _setup_plugincont_testing1(self): + plugincont_name = '/plugin_cont_' + self.container['Id'] + for container in self.docker.containers(): + if plugincont_name in container['Names']: + plugincont_id = container['Id'] + exec_instance = self.docker.exec_create( + container=plugincont_id, + user='root', + cmd='pip install python-ptrace') + self.docker.exec_start(exec_instance.get("Id")) + + def testCrawlContainerEvilPlugin(self): + if self.version_ok is False: + pass + return + rootfs = get_docker_container_rootfs_path(self.container['Id']) + fd = open(rootfs + '/crawlplugins', 'w') + fd.write('evil\n') + fd.close() + + env = os.environ.copy() + mypath = os.path.dirname(os.path.realpath(__file__)) + os.makedirs(self.tempd + '/out') + + # crawler itself needs to be root + process = subprocess.Popen( + [ + '/usr/bin/python', mypath + '/../../crawler/crawler.py', + '--url', 'file://' + self.tempd + '/out/crawler', + '--features', 'none', + '--crawlContainers', self.container['Id'], + '--crawlmode', 'OUTCONTAINERSAFE', + ], + env=env) + time.sleep(30) + stdout, stderr = process.communicate() + assert process.returncode == 0 + + print self.docker.containers() + print stderr + print stdout + + subprocess.call(['/bin/chmod', '-R', '777', self.tempd]) + + files = os.listdir(self.tempd + '/out') + assert len(files) == 1 + + f = open(self.tempd + '/out/' + files[0], 'r') + output = f.read() + f.close() + print output # only printed if the test fails + assert 'kill_status' in output + assert 'trace_status' in output + assert 'write_status' in output + assert 'rm_status' in output + assert 'nw_status' in output + assert 'expected_failed' in output + ctr = output.count('unexpected_succeeded') + if self.seccomp is True: + assert ctr == 0 + else: + assert ctr == 1 + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/unit/.gitignore b/tests/unit/.gitignore deleted file mode 100644 index 9e1ea78e..00000000 --- a/tests/unit/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -*.pyc -*alchemy* -*.log* diff --git a/tests/unit/test_dockerutils.py b/tests/unit/test_dockerutils.py index 999fcad7..2294f307 100644 --- a/tests/unit/test_dockerutils.py +++ b/tests/unit/test_dockerutils.py @@ -14,8 +14,8 @@ def containers(self): return [{'Id': 'good_id'}] def info(self): - return {'Driver': 'btrfs'} - + return {'Driver': 'btrfs', 'DockerRootDir': '/var/lib/docker'} + def version(self): return {'Version': '1.10.1'} @@ -88,7 +88,7 @@ def setUp(self): def tearDown(self): pass - @mock.patch('utils.dockerutils.docker.Client', + @mock.patch('utils.dockerutils.docker.APIClient', side_effect=lambda base_url, version: MockedClient()) def test_exec_dockerps(self, *args): for c in utils.dockerutils.exec_dockerps(): @@ -118,7 +118,7 @@ def test_exec_dockerps(self, *args): 'HostIp': ''}]}}, 'Id': 'good_id'} - @mock.patch('utils.dockerutils.docker.Client', + @mock.patch('utils.dockerutils.docker.APIClient', side_effect=lambda base_url, version: MockedClient()) @mock.patch('utils.dockerutils.exec_dockerinspect', side_effect=throw_docker_exception) @@ -126,19 +126,19 @@ def test_exec_dockerps_failure(self, *args): with self.assertRaises(DockerutilsException): utils.dockerutils.exec_dockerps() - @mock.patch('utils.dockerutils.docker.Client', + @mock.patch('utils.dockerutils.docker.APIClient', side_effect=lambda base_url, version: MockedClient()) def test_exec_docker_history(self, *args): h = utils.dockerutils.exec_docker_history('ididid') assert h == [{'History': 'xxx'}] - @mock.patch('utils.dockerutils.docker.Client', + @mock.patch('utils.dockerutils.docker.APIClient', side_effect=throw_docker_exception) def test_exec_docker_history_failure(self, *args): with self.assertRaises(DockerutilsException): utils.dockerutils.exec_docker_history('ididid') - @mock.patch('utils.dockerutils.docker.Client', + @mock.patch('utils.dockerutils.docker.APIClient', side_effect=lambda base_url, version: MockedClient()) def test_exec_docker_inspect(self, *args): i = utils.dockerutils.exec_dockerinspect('ididid') @@ -167,13 +167,13 @@ def test_exec_docker_inspect(self, *args): 'HostIp': ''}]}}, 'Id': 'good_id'} - @mock.patch('utils.dockerutils.docker.Client', + @mock.patch('utils.dockerutils.docker.APIClient', side_effect=throw_docker_exception) def test_exec_docker_inspect_failure(self, *args): with self.assertRaises(DockerutilsException): utils.dockerutils.exec_dockerinspect('ididid') - @mock.patch('utils.dockerutils.docker.Client', + @mock.patch('utils.dockerutils.docker.APIClient', side_effect=throw_docker_exception) @mock.patch('utils.dockerutils.open') def test_get_docker_storage_driver_step1a(self, mock_open, mock_client): @@ -187,32 +187,32 @@ def test_get_docker_storage_driver_step1a(self, mock_open, mock_client): mock_open.return_value = open('tests/unit/proc_mounts_btrfs') assert utils.dockerutils._get_docker_storage_driver() == 'btrfs' - @mock.patch('utils.dockerutils.docker.Client', + @mock.patch('utils.dockerutils.docker.APIClient', side_effect=lambda base_url, version: MockedClient()) @mock.patch('utils.dockerutils.open', side_effect=throw_io_error) def test_get_docker_storage_driver_step2(self, mock_open, mock_client): assert utils.dockerutils._get_docker_storage_driver() == 'btrfs' - @mock.patch('utils.dockerutils.docker.Client', + @mock.patch('utils.dockerutils.docker.APIClient', side_effect=throw_docker_exception) @mock.patch('utils.dockerutils.open', side_effect=throw_io_error) def test_get_docker_storage_driver_failure(self, mock_open, mock_client): assert utils.dockerutils._get_docker_storage_driver() == 'devicemapper' - @mock.patch('utils.dockerutils.docker.Client', + @mock.patch('utils.dockerutils.docker.APIClient', side_effect=lambda base_url, version: MockedClient()) def test_get_docker_server_version(self, mock_client): assert utils.dockerutils._get_docker_server_version() == '1.10.1' - @mock.patch('utils.dockerutils.docker.Client', + @mock.patch('utils.dockerutils.docker.APIClient', side_effect=throw_docker_exception) def test_get_docker_server_version_failure(self, mock_client): with self.assertRaises(DockerutilsException): utils.dockerutils._get_docker_server_version() - @mock.patch('utils.dockerutils.docker.Client', + @mock.patch('utils.dockerutils.docker.APIClient', side_effect=lambda base_url, version: MockedClient()) @mock.patch( 'crawler.utils.dockerutils.os.path.isfile', @@ -223,7 +223,7 @@ def test_get_json_logs_path_from_path(self, mock_isfile, mock_client): assert utils.dockerutils.get_docker_container_json_logs_path( 'id') == '/var/lib/docker/containers/id/id-json.log' - @mock.patch('utils.dockerutils.docker.Client', + @mock.patch('utils.dockerutils.docker.APIClient', side_effect=lambda base_url, version: MockedClient()) @mock.patch('utils.dockerutils.os.path.isfile', side_effect=lambda p: @@ -232,7 +232,7 @@ def test_get_json_logs_path_from_daemon(self, mock_isfile, mock_client): assert utils.dockerutils.get_docker_container_json_logs_path( 'id') == '/a/b/c/log.json' - @mock.patch('utils.dockerutils.docker.Client', + @mock.patch('utils.dockerutils.docker.APIClient', side_effect=lambda base_url, version: MockedClient()) @mock.patch('utils.dockerutils.os.path.isfile', side_effect=lambda p: False) @@ -240,7 +240,7 @@ def test_get_json_logs_path_failure(self, mock_isfile, mock_client): with self.assertRaises(DockerutilsNoJsonLog): utils.dockerutils.get_docker_container_json_logs_path('id') - @mock.patch('utils.dockerutils.docker.Client', + @mock.patch('utils.dockerutils.docker.APIClient', side_effect=lambda base_url, version: MockedClient()) @mock.patch('utils.dockerutils.open', side_effect=throw_io_error) @@ -250,7 +250,7 @@ def test_get_rootfs_not_supported_driver_failure( with self.assertRaises(DockerutilsException): utils.dockerutils.get_docker_container_rootfs_path('id') - @mock.patch('utils.dockerutils.docker.Client', + @mock.patch('utils.dockerutils.docker.APIClient', side_effect=lambda base_url, version: MockedClient()) @mock.patch('utils.dockerutils.open', side_effect=[open('tests/unit/proc_pid_mounts_devicemapper'), @@ -262,7 +262,7 @@ def test_get_rootfs_devicemapper(self, mock_open, mock_client): "65fe676c24fe1faea1f06e222cc3811cc" "9b651c381702ca4f787ffe562a5e39b/rootfs") - @mock.patch('utils.dockerutils.docker.Client', + @mock.patch('utils.dockerutils.docker.APIClient', side_effect=lambda base_url, version: MockedClient()) @mock.patch('utils.dockerutils.open', side_effect=throw_io_error) @@ -280,7 +280,7 @@ def test_get_rootfs_devicemapper_failure(self, mock_open, mock_client): 'level', '5', 'path', 'sub1/abcde/sub2'), ] ) - @mock.patch('utils.dockerutils.docker.Client', + @mock.patch('utils.dockerutils.docker.APIClient', side_effect=lambda base_url, version: MockedClient()) def test_get_rootfs_btrfs_v1_8(self, mock_client, mock_list): utils.dockerutils.driver = 'btrfs' @@ -290,7 +290,7 @@ def test_get_rootfs_btrfs_v1_8(self, mock_client, mock_list): @mock.patch('utils.dockerutils.misc.btrfs_list_subvolumes', side_effect=throw_runtime_error) - @mock.patch('utils.dockerutils.docker.Client', + @mock.patch('utils.dockerutils.docker.APIClient', side_effect=lambda base_url, version: MockedClient()) def test_get_rootfs_btrfs_v1_8_failure(self, mock_client, mock_list): utils.dockerutils.driver = 'btrfs' @@ -298,7 +298,7 @@ def test_get_rootfs_btrfs_v1_8_failure(self, mock_client, mock_list): with self.assertRaises(DockerutilsException): utils.dockerutils.get_docker_container_rootfs_path('abcde') - @mock.patch('utils.dockerutils.docker.Client', + @mock.patch('utils.dockerutils.docker.APIClient', side_effect=lambda base_url, version: MockedClient()) @mock.patch('utils.dockerutils.open', side_effect=[open('tests/unit/btrfs_mount_init-id')]) @@ -308,7 +308,7 @@ def test_get_rootfs_btrfs_v1_10(self, mock_open, mock_client): assert utils.dockerutils.get_docker_container_rootfs_path( 'id') == '/var/lib/docker/btrfs/subvolumes/vol1/id/rootfs-a-b-c' - @mock.patch('utils.dockerutils.docker.Client', + @mock.patch('utils.dockerutils.docker.APIClient', side_effect=lambda base_url, version: MockedClient()) @mock.patch('utils.dockerutils.open', side_effect=throw_io_error) @@ -322,7 +322,7 @@ def test_get_rootfs_btrfs_v1_10_failure(self, mock_open, mock_client): side_effect=lambda d: True) @mock.patch('utils.dockerutils.os.listdir', side_effect=lambda d: ['usr', 'boot', 'var']) - @mock.patch('utils.dockerutils.docker.Client', + @mock.patch('utils.dockerutils.docker.APIClient', side_effect=lambda base_url, version: MockedClient()) def test_get_rootfs_aufs_v1_8(self, *args): utils.dockerutils.driver = 'aufs' @@ -334,7 +334,7 @@ def test_get_rootfs_aufs_v1_8(self, *args): side_effect=lambda d: False) @mock.patch('utils.dockerutils.os.listdir', side_effect=lambda d: ['usr', 'boot', 'var']) - @mock.patch('utils.dockerutils.docker.Client', + @mock.patch('utils.dockerutils.docker.APIClient', side_effect=lambda base_url, version: MockedClient()) def test_get_rootfs_aufs_v1_8_failure(self, *args): utils.dockerutils.driver = 'aufs' @@ -342,7 +342,7 @@ def test_get_rootfs_aufs_v1_8_failure(self, *args): with self.assertRaises(DockerutilsException): utils.dockerutils.get_docker_container_rootfs_path('abcde') - @mock.patch('utils.dockerutils.docker.Client', + @mock.patch('utils.dockerutils.docker.APIClient', side_effect=lambda base_url, version: MockedClient()) @mock.patch('utils.dockerutils.open', side_effect=[open('tests/unit/aufs_mount_init-id')]) @@ -352,7 +352,7 @@ def test_get_rootfs_aufs_v1_10(self, *args): assert utils.dockerutils.get_docker_container_rootfs_path( 'abcde') == '/var/lib/docker/aufs/mnt/vol1/id/rootfs-a-b-c' - @mock.patch('utils.dockerutils.docker.Client', + @mock.patch('utils.dockerutils.docker.APIClient', side_effect=lambda base_url, version: MockedClient()) @mock.patch('utils.dockerutils.open', side_effect=throw_io_error) @@ -362,7 +362,7 @@ def test_get_rootfs_aufs_v1_10_failure(self, *args): with self.assertRaises(DockerutilsException): utils.dockerutils.get_docker_container_rootfs_path('abcde') - @mock.patch('utils.dockerutils.docker.Client', + @mock.patch('utils.dockerutils.docker.APIClient', side_effect=lambda base_url, version: MockedClient()) @mock.patch('utils.dockerutils.open', side_effect=[open('tests/unit/vfs_mount_init-id')]) @@ -372,7 +372,7 @@ def test_get_rootfs_vfs_v1_10(self, *args): assert utils.dockerutils.get_docker_container_rootfs_path( 'abcde') == '/var/lib/docker/vfs/dir/vol1/id/rootfs-a-b-c' - @mock.patch('utils.dockerutils.docker.Client', + @mock.patch('utils.dockerutils.docker.APIClient', side_effect=lambda base_url, version: MockedClient()) @mock.patch('utils.dockerutils.open', side_effect=throw_io_error) diff --git a/tox.ini b/tox.ini new file mode 100644 index 00000000..671a93a7 --- /dev/null +++ b/tox.ini @@ -0,0 +1,2 @@ +[flake8] +exclude = crawler/utils/plugincont