From 1d03b5ae85a20aa6746af0c30756bc1c69a9de2f Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Wed, 27 May 2020 10:45:08 -0400 Subject: [PATCH 1/8] created new file with arrow and modified base function --- tfx_bsl/beam/run_inference_arrow.py | 1166 ++++++++++++++++++++++ tfx_bsl/beam/run_inference_arrow_test.py | 581 +++++++++++ 2 files changed, 1747 insertions(+) create mode 100644 tfx_bsl/beam/run_inference_arrow.py create mode 100644 tfx_bsl/beam/run_inference_arrow_test.py diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py new file mode 100644 index 00000000..316b65a5 --- /dev/null +++ b/tfx_bsl/beam/run_inference_arrow.py @@ -0,0 +1,1166 @@ +# Copyright 2019 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Run batch inference on saved model.""" + +from __future__ import absolute_import +from __future__ import division +# Standard __future__ imports +from __future__ import print_function + +import abc +import base64 +import collections +import os +import platform +import sys +import time +try: + import resource +except ImportError: + resource = None + +from absl import logging +import apache_beam as beam +import pyarrow as pa +from apache_beam.options.pipeline_options import GoogleCloudOptions +from apache_beam.options.pipeline_options import PipelineOptions +from apache_beam.utils import retry +import googleapiclient +from googleapiclient import discovery +from googleapiclient import http +import numpy as np +import six +import tensorflow as tf +from tfx_bsl.beam import shared +from tfx_bsl.public.proto import model_spec_pb2 +from tfx_bsl.telemetry import util +from typing import Any, Generator, Iterable, List, Mapping, Sequence, Text, \ + Tuple, Union + +# TODO(b/140306674): stop using the internal TF API. +from tensorflow.python.saved_model import loader_impl +from tensorflow_serving.apis import classification_pb2 +from tensorflow_serving.apis import inference_pb2 +from tensorflow_serving.apis import prediction_log_pb2 +from tensorflow_serving.apis import regression_pb2 + + +# TODO(b/131873699): Remove once 1.x support is dropped. +# pylint: disable=g-import-not-at-top +try: + # We need to import this in order to register all quantiles ops, even though + # it's not directly used. + from tensorflow.contrib.boosted_trees.python.ops import quantile_ops as _ # pylint: disable=unused-import +except ImportError: + pass + +_DEFAULT_INPUT_KEY = 'examples' +_METRICS_DESCRIPTOR_INFERENCE = 'BulkInferrer' +_METRICS_DESCRIPTOR_IN_PROCESS = 'InProcess' +_METRICS_DESCRIPTOR_CLOUD_AI_PREDICTION = 'CloudAIPlatformPrediction' +_MILLISECOND_TO_MICROSECOND = 1000 +_MICROSECOND_TO_NANOSECOND = 1000 +_SECOND_TO_MICROSECOND = 1000000 +_REMOTE_INFERENCE_NUM_RETRIES = 5 + +# We define the following aliases of Any because the actual types are not +# public. +_SignatureDef = Any +_MetaGraphDef = Any +_SavedModel = Any + +# TODO (Maxine): what is this? +_BulkInferResult = Union[prediction_log_pb2.PredictLog, + Tuple[tf.train.Example, regression_pb2.Regression], + Tuple[tf.train.Example, + inference_pb2.MultiInferenceResponse], + Tuple[tf.train.Example, + classification_pb2.Classifications]] + + +# TODO(b/151468119): Converts this into enum once we stop supporting Python 2.7 +class OperationType(object): + CLASSIFICATION = 'CLASSIFICATION' + REGRESSION = 'REGRESSION' + PREDICTION = 'PREDICTION' + MULTIHEAD = 'MULTIHEAD' + + +# TODO (Me): pTransform from examples/sequence example here + +# TODO (Me): Union[bytes, pa.RecordBatch]? +@beam.ptransform_fn +@beam.typehints.with_input_types(pa.RecordBatch) +@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) +def RunInferenceImpl( # pylint: disable=invalid-name + examples: beam.pvalue.PCollection, + inference_spec_type: model_spec_pb2.InferenceSpecType +) -> beam.pvalue.PCollection: + """Implementation of RunInference API. + + Args: + examples: A PCollection containing RecordBatch. + inference_spec_type: Model inference endpoint. + + Returns: + A PCollection containing prediction logs. + + Raises: + ValueError; when operation is not supported. + """ + logging.info('RunInference on model: %s', inference_spec_type) + + batched_examples = examples | 'BatchExamples' >> beam.BatchElements() + operation_type = _get_operation_type(inference_spec_type) + if operation_type == OperationType.CLASSIFICATION: + return batched_examples | 'Classify' >> _Classify(inference_spec_type) + elif operation_type == OperationType.REGRESSION: + return batched_examples | 'Regress' >> _Regress(inference_spec_type) + elif operation_type == OperationType.PREDICTION: + return batched_examples | 'Predict' >> _Predict(inference_spec_type) + elif operation_type == OperationType.MULTIHEAD: + return (batched_examples + | 'MultiInference' >> _MultiInference(inference_spec_type)) + else: + raise ValueError('Unsupported operation_type %s' % operation_type) + + +_IOTensorSpec = collections.namedtuple( + '_IOTensorSpec', + ['input_tensor_alias', 'input_tensor_name', 'output_alias_tensor_names']) + +_Signature = collections.namedtuple('_Signature', ['name', 'signature_def']) + + +@beam.ptransform_fn +@beam.typehints.with_input_types(pa.RecordBatch) +@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) +def _Classify(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name + inference_spec_type: model_spec_pb2.InferenceSpecType): + """Performs classify PTransform.""" + if _using_in_process_inference(inference_spec_type): + return (pcoll + | 'Classify' >> beam.ParDo( + _BatchClassifyDoFn(inference_spec_type, shared.Shared())) + | 'BuildPredictionLogForClassifications' >> beam.ParDo( + _BuildPredictionLogForClassificationsDoFn())) + else: + raise NotImplementedError + + +@beam.ptransform_fn +@beam.typehints.with_input_types(pa.RecordBatch) +@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) +def _Regress(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name + inference_spec_type: model_spec_pb2.InferenceSpecType): + """Performs regress PTransform.""" + if _using_in_process_inference(inference_spec_type): + return (pcoll + | 'Regress' >> beam.ParDo( + _BatchRegressDoFn(inference_spec_type, shared.Shared())) + | 'BuildPredictionLogForRegressions' >> beam.ParDo( + _BuildPredictionLogForRegressionsDoFn())) + else: + raise NotImplementedError + + +@beam.ptransform_fn +@beam.typehints.with_input_types(pa.RecordBatch) +@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) +def _Predict(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name + inference_spec_type: model_spec_pb2.InferenceSpecType): + """Performs predict PTransform.""" + if _using_in_process_inference(inference_spec_type): + predictions = ( + pcoll + | 'Predict' >> beam.ParDo( + _BatchPredictDoFn(inference_spec_type, shared.Shared()))) + else: + predictions = ( + pcoll + | 'RemotePredict' >> beam.ParDo( + _RemotePredictDoFn(inference_spec_type, pcoll.pipeline.options))) + return (predictions + | 'BuildPredictionLogForPredictions' >> beam.ParDo( + _BuildPredictionLogForPredictionsDoFn())) + + +@beam.ptransform_fn +@beam.typehints.with_input_types(pa.RecordBatch) +@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) +def _MultiInference(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name + inference_spec_type: model_spec_pb2.InferenceSpecType): + """Performs multi inference PTransform.""" + if _using_in_process_inference(inference_spec_type): + return ( + pcoll + | 'MultiInference' >> beam.ParDo( + _BatchMultiInferenceDoFn(inference_spec_type, shared.Shared())) + | 'BuildMultiInferenceLog' >> beam.ParDo(_BuildMultiInferenceLogDoFn())) + else: + raise NotImplementedError + + +@six.add_metaclass(abc.ABCMeta) +class _BaseDoFn(beam.DoFn): + """Base DoFn that performs bulk inference.""" + + class _MetricsCollector(object): + """A collector for beam metrics.""" + + def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType): + operation_type = _get_operation_type(inference_spec_type) + proximity_descriptor = ( + _METRICS_DESCRIPTOR_IN_PROCESS + if _using_in_process_inference(inference_spec_type) else + _METRICS_DESCRIPTOR_CLOUD_AI_PREDICTION) + namespace = util.MakeTfxNamespace( + [_METRICS_DESCRIPTOR_INFERENCE, operation_type, proximity_descriptor]) + + # Metrics + self._inference_counter = beam.metrics.Metrics.counter( + namespace, 'num_inferences') + self._num_instances = beam.metrics.Metrics.counter( + namespace, 'num_instances') + self._inference_request_batch_size = beam.metrics.Metrics.distribution( + namespace, 'inference_request_batch_size') + self._inference_request_batch_byte_size = ( + beam.metrics.Metrics.distribution( + namespace, 'inference_request_batch_byte_size')) + # Batch inference latency in microseconds. + self._inference_batch_latency_micro_secs = ( + beam.metrics.Metrics.distribution( + namespace, 'inference_batch_latency_micro_secs')) + self._model_byte_size = beam.metrics.Metrics.distribution( + namespace, 'model_byte_size') + # Model load latency in milliseconds. + self._load_model_latency_milli_secs = beam.metrics.Metrics.distribution( + namespace, 'load_model_latency_milli_secs') + + # Metrics cache + self.load_model_latency_milli_secs_cache = None + self.model_byte_size_cache = None + + def update_metrics_with_cache(self): + if self.load_model_latency_milli_secs_cache is not None: + self._load_model_latency_milli_secs.update( + self.load_model_latency_milli_secs_cache) + self.load_model_latency_milli_secs_cache = None + if self.model_byte_size_cache is not None: + self._model_byte_size.update(self.model_byte_size_cache) + self.model_byte_size_cache = None + + def update(self, elements: List[str], latency_micro_secs: int) -> None: + self._inference_batch_latency_micro_secs.update(latency_micro_secs) + self._num_instances.inc(len(elements)) + self._inference_counter.inc(len(elements)) + self._inference_request_batch_size.update(len(elements)) + self._inference_request_batch_byte_size.update( + sum(element.ByteSize() for element in elements)) + + def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType): + super(_BaseDoFn, self).__init__() + self._clock = None + self._metrics_collector = self._MetricsCollector(inference_spec_type) + + def setup(self): + self._clock = _ClockFactory.make_clock() + + def process( + self, elements: pa.RecordBatch + ) -> Iterable[Any]: + batch_start_time = self._clock.get_current_time_in_microseconds() + # TODO (Maxine): set ARROW_INPUT_COLUMN or take as a parameter + # extract record batch from here, assuming first column + serialized_examples = elements.column(0) + outputs = self.run_inference(serialized_examples) + result = self._post_process(serialized_examples, outputs) + self._metrics_collector.update( + elements, + self._clock.get_current_time_in_microseconds() - batch_start_time) + return result + + def finish_bundle(self): + self._metrics_collector.update_metrics_with_cache() + + @abc.abstractmethod + def run_inference( + self, elements: List[str] + ) -> Union[Mapping[Text, np.ndarray], Sequence[Mapping[Text, Any]]]: + raise NotImplementedError + + @abc.abstractmethod + def _post_process(self, elements: List[str], outputs: Any) -> Iterable[Any]: + raise NotImplementedError + + +def _retry_on_unavailable_and_resource_error_filter(exception: Exception): + """Retries for HttpError. + + Retries if error is unavailable (503) or resource exhausted (429). + Resource exhausted may happen when qps or bandwidth exceeds quota. + + Args: + exception: Exception from inference http request execution. + Returns: + A boolean of whether retry. + """ + + return (isinstance(exception, googleapiclient.errors.HttpError) and + exception.resp.status in (503, 429)) + +# TODO (Maxine): change all example to serialized +@beam.typehints.with_input_types(List[str]) +# Using output typehints triggers NotImplementedError('BEAM-2717)' on +# streaming mode on Dataflow runner. +# TODO(b/151468119): Consider to re-batch with online serving request size +# limit, and re-batch with RPC failures(InvalidArgument) regarding request size. +# @beam.typehints.with_output_types(prediction_log_pb2.PredictLog) +class _RemotePredictDoFn(_BaseDoFn): + """A DoFn that performs predictions from a cloud-hosted TensorFlow model. + + Supports both batch and streaming processing modes. + NOTE: Does not work on DirectRunner for streaming jobs [BEAM-7885]. + + In order to request predictions, you must deploy your trained model to AI + Platform Prediction in the TensorFlow SavedModel format. See + [Exporting a SavedModel for prediction] + (https://cloud.google.com/ai-platform/prediction/docs/exporting-savedmodel-for-prediction) + for more details. + + To send binary data, you have to make sure that the name of an input ends in + `_bytes`. + + NOTE: The returned `PredictLog` instances do not have `PredictRequest` part + filled. The reason is that it is difficult to determine the input tensor name + without having access to cloud-hosted model's signatures. + """ + + def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType, + pipeline_options: PipelineOptions): + super(_RemotePredictDoFn, self).__init__(inference_spec_type) + self._api_client = None + + project_id = ( + inference_spec_type.ai_platform_prediction_model_spec.project_id or + pipeline_options.view_as(GoogleCloudOptions).project) + if not project_id: + raise ValueError('Either a non-empty project id or project flag in ' + ' beam pipeline options needs be provided.') + + model_name = ( + inference_spec_type.ai_platform_prediction_model_spec.model_name) + if not model_name: + raise ValueError('A non-empty model name must be provided.') + + version_name = ( + inference_spec_type.ai_platform_prediction_model_spec.version_name) + name_spec = 'projects/{}/models/{}' + # If version is not specified, the default version for a model is used. + if version_name: + name_spec += '/versions/{}' + self._full_model_name = name_spec.format(project_id, model_name, + version_name) + + def setup(self): + super(_RemotePredictDoFn, self).setup() + # TODO(b/151468119): Add tfx_bsl_version and tfx_bsl_py_version to + # user agent once custom header is supported in googleapiclient. + self._api_client = discovery.build('ml', 'v1') + + # Retry _REMOTE_INFERENCE_NUM_RETRIES times with exponential backoff. + @retry.with_exponential_backoff( + initial_delay_secs=1.0, + num_retries=_REMOTE_INFERENCE_NUM_RETRIES, + retry_filter=_retry_on_unavailable_and_resource_error_filter) + def _execute_request( + self, + request: http.HttpRequest) -> Mapping[Text, Sequence[Mapping[Text, Any]]]: + result = request.execute() + if 'error' in result: + raise ValueError(result['error']) + return result + + def _make_request(self, body: Mapping[Text, List[Any]]) -> http.HttpRequest: + return self._api_client.projects().predict( + name=self._full_model_name, body=body) + + @classmethod + def _prepare_instances( + cls, elements: List[tf.train.Example] + ) -> Generator[Mapping[Text, Any], None, None]: + for example in elements: + # TODO(b/151468119): support tf.train.SequenceExample + if not isinstance(example, tf.train.Example): + raise ValueError('Remote prediction only supports tf.train.Example') + + instance = {} + for input_name, feature in example.features.feature.items(): + attr_name = feature.WhichOneof('kind') + if attr_name is None: + continue + attr = getattr(feature, attr_name) + values = cls._parse_feature_content(attr.value, attr_name, + cls._sending_as_binary(input_name)) + # Flatten a sequence if its length is 1 + values = (values[0] if len(values) == 1 else values) + instance[input_name] = values + yield instance + + @staticmethod + def _sending_as_binary(input_name: Text) -> bool: + """Whether data should be sent as binary.""" + return input_name.endswith('_bytes') + + @staticmethod + def _parse_feature_content(values: Sequence[Any], attr_name: Text, + as_binary: bool) -> Sequence[Any]: + """Parse the content of tf.train.Feature object. + + If bytes_list, parse a list of bytes-like objects to a list of strings so + that it would be JSON serializable. + + If float_list or int64_list, do nothing. + + If data should be sent as binary, mark it as binary by replacing it with + a single attribute named 'b64'. + """ + if as_binary: + return [{'b64': base64.b64encode(x).decode()} for x in values] + elif attr_name == 'bytes_list': + return [x.decode() for x in values] + else: + return values + + def run_inference( + self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]] + ) -> Sequence[Mapping[Text, Any]]: + body = {'instances': list(self._prepare_instances(elements))} + request = self._make_request(body) + response = self._execute_request(request) + return response['predictions'] + + def _post_process( + self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]], + outputs: Sequence[Mapping[Text, Any]] + ) -> Iterable[prediction_log_pb2.PredictLog]: + result = [] + for output in outputs: + predict_log = prediction_log_pb2.PredictLog() + for output_alias, values in output.items(): + values = np.array(values) + tensor_proto = tf.make_tensor_proto( + values=values, + dtype=tf.as_dtype(values.dtype).as_datatype_enum, + shape=np.expand_dims(values, axis=0).shape) + predict_log.response.outputs[output_alias].CopyFrom(tensor_proto) + result.append(predict_log) + return result + + +# TODO(b/131873699): Add typehints once +# [BEAM-8381](https://issues.apache.org/jira/browse/BEAM-8381) +# is fixed. +# TODO(b/143484017): Add batch_size back off in the case there are functional +# reasons large batch sizes cannot be handled. +class _BaseBatchSavedModelDoFn(_BaseDoFn): + """A DoFn that runs in-process batch inference with a model. + + Models need to have the required serving signature as mentioned in + [Tensorflow Serving](https://www.tensorflow.org/tfx/serving/signature_defs) + + This function will check model signatures first. Then it will load and run + model inference in batch. + """ + + def __init__( + self, + inference_spec_type: model_spec_pb2.InferenceSpecType, + shared_model_handle: shared.Shared, + ): + super(_BaseBatchSavedModelDoFn, self).__init__(inference_spec_type) + self._inference_spec_type = inference_spec_type + self._shared_model_handle = shared_model_handle + self._model_path = inference_spec_type.saved_model_spec.model_path + self._tags = None + self._signatures = _get_signatures( + inference_spec_type.saved_model_spec.model_path, + inference_spec_type.saved_model_spec.signature_name, + _get_tags(inference_spec_type)) + self._session = None + self._io_tensor_spec = None + + def setup(self): + """Load the model. + + Note that worker may crash if exception is thrown in setup due + to b/139207285. + """ + + super(_BaseBatchSavedModelDoFn, self).setup() + self._tags = _get_tags(self._inference_spec_type) + self._io_tensor_spec = self._pre_process() + + if self._has_tpu_tag(): + # TODO(b/131873699): Support TPU inference. + raise ValueError('TPU inference is not supported yet.') + self._session = self._load_model() + + def _load_model(self): + """Load a saved model into memory. + + Returns: + Session instance. + """ + + def load(): + """Function for constructing shared LoadedModel.""" + # TODO(b/143484017): Do warmup and other heavy model construction here. + result = tf.compat.v1.Session(graph=tf.compat.v1.Graph()) + memory_before = _get_current_process_memory_in_bytes() + start_time = self._clock.get_current_time_in_microseconds() + tf.compat.v1.saved_model.loader.load(result, self._tags, self._model_path) + end_time = self._clock.get_current_time_in_microseconds() + memory_after = _get_current_process_memory_in_bytes() + self._metrics_collector.load_model_latency_milli_secs_cache = ( + (end_time - start_time) / _MILLISECOND_TO_MICROSECOND) + self._metrics_collector.model_byte_size_cache = ( + memory_after - memory_before) + return result + + if not self._model_path: + raise ValueError('Model path is not valid.') + return self._shared_model_handle.acquire(load) + + def _pre_process(self) -> _IOTensorSpec: + # Pre process functions will validate for each signature. + io_tensor_specs = [] + for signature in self._signatures: + if len(signature.signature_def.inputs) != 1: + raise ValueError('Signature should have 1 and only 1 inputs') + if (list(signature.signature_def.inputs.values())[0].dtype != + tf.string.as_datatype_enum): + raise ValueError( + 'Input dtype is expected to be %s, got %s' % + tf.string.as_datatype_enum, + list(signature.signature_def.inputs.values())[0].dtype) + io_tensor_specs.append(_signature_pre_process(signature.signature_def)) + input_tensor_name = '' + input_tensor_alias = '' + output_alias_tensor_names = {} + for io_tensor_spec in io_tensor_specs: + if not input_tensor_name: + input_tensor_name = io_tensor_spec.input_tensor_name + input_tensor_alias = io_tensor_spec.input_tensor_alias + elif input_tensor_name != io_tensor_spec.input_tensor_name: + raise ValueError('Input tensor must be the same for all Signatures.') + for alias, tensor_name in io_tensor_spec.output_alias_tensor_names.items( + ): + output_alias_tensor_names[alias] = tensor_name + if (not output_alias_tensor_names or not input_tensor_name or + not input_tensor_alias): + raise ValueError('No valid fetch tensors or feed tensors.') + return _IOTensorSpec(input_tensor_alias, input_tensor_name, + output_alias_tensor_names) + + def _has_tpu_tag(self) -> bool: + return (len(self._tags) == 2 and tf.saved_model.SERVING in self._tags and + tf.saved_model.TPU in self._tags) + + def run_inference(self, elements: List[str]) -> Mapping[Text, np.ndarray]: + self._check_elements(elements) + outputs = self._run_tf_operations(elements) + return outputs + + def _run_tf_operations(self, elements: List[str]) -> Mapping[Text, np.ndarray]: + result = self._session.run( + self._io_tensor_spec.output_alias_tensor_names, + feed_dict={self._io_tensor_spec.input_tensor_name: elements}) + if len(result) != len(self._io_tensor_spec.output_alias_tensor_names): + raise RuntimeError('Output length does not match fetches') + return result + + def _check_elements( + self, elements: List[Union[tf.train.Example, + tf.train.SequenceExample]]) -> None: + """Unimplemented.""" + + raise NotImplementedError + + +@beam.typehints.with_input_types(List[str]) +@beam.typehints.with_output_types(Tuple[tf.train.Example, + classification_pb2.Classifications]) +class _BatchClassifyDoFn(_BaseBatchSavedModelDoFn): + """A DoFn that run inference on classification model.""" + + def setup(self): + signature_def = self._signatures[0].signature_def + if signature_def.method_name != tf.saved_model.CLASSIFY_METHOD_NAME: + raise ValueError( + 'BulkInferrerClassifyDoFn requires signature method ' + 'name %s, got: %s' % tf.saved_model.CLASSIFY_METHOD_NAME, + signature_def.method_name) + super(_BatchClassifyDoFn, self).setup() + + def _check_elements( + self, elements: List[Union[tf.train.Example, + tf.train.SequenceExample]]) -> None: + if not all(isinstance(element, tf.train.Example) for element in elements): + raise ValueError('Classify only supports tf.train.Example') + + def _post_process( + self, elements: Sequence[tf.train.Example], outputs: Mapping[Text, + np.ndarray] + ) -> Iterable[Tuple[tf.train.Example, classification_pb2.Classifications]]: + classifications = _post_process_classify( + self._io_tensor_spec.output_alias_tensor_names, elements, outputs) + return zip(elements, classifications) + + +@beam.typehints.with_input_types(List[str]) +@beam.typehints.with_output_types(Tuple[tf.train.Example, + regression_pb2.Regression]) +class _BatchRegressDoFn(_BaseBatchSavedModelDoFn): + """A DoFn that run inference on regression model.""" + + def setup(self): + super(_BatchRegressDoFn, self).setup() + + def _check_elements( + self, elements: List[Union[tf.train.Example, + tf.train.SequenceExample]]) -> None: + if not all(isinstance(element, tf.train.Example) for element in elements): + raise ValueError('Regress only supports tf.train.Example') + + def _post_process( + self, elements: Sequence[tf.train.Example], outputs: Mapping[Text, + np.ndarray] + ) -> Iterable[Tuple[tf.train.Example, regression_pb2.Regression]]: + regressions = _post_process_regress(elements, outputs) + return zip(elements, regressions) + + +@beam.typehints.with_input_types(List[str]) +@beam.typehints.with_output_types(prediction_log_pb2.PredictLog) +class _BatchPredictDoFn(_BaseBatchSavedModelDoFn): + """A DoFn that runs inference on predict model.""" + + def setup(self): + signature_def = self._signatures[0].signature_def + if signature_def.method_name != tf.saved_model.PREDICT_METHOD_NAME: + raise ValueError( + 'BulkInferrerPredictDoFn requires signature method ' + 'name %s, got: %s' % tf.saved_model.PREDICT_METHOD_NAME, + signature_def.method_name) + super(_BatchPredictDoFn, self).setup() + + def _check_elements( + self, elements: List[Union[tf.train.Example, + tf.train.SequenceExample]]) -> None: + pass + + def _post_process( + self, elements: Union[Sequence[tf.train.Example], + Sequence[tf.train.SequenceExample]], + outputs: Mapping[Text, np.ndarray] + ) -> Iterable[prediction_log_pb2.PredictLog]: + input_tensor_alias = self._io_tensor_spec.input_tensor_alias + signature_name = self._signatures[0].name + batch_size = len(elements) + for output_alias, output in outputs.items(): + if len(output.shape) < 1 or output.shape[0] != batch_size: + raise ValueError( + 'Expected output tensor %s to have at least one ' + 'dimension, with the first having a size equal to the input batch ' + 'size %s. Instead found %s' % + (output_alias, batch_size, output.shape)) + predict_log_tmpl = prediction_log_pb2.PredictLog() + predict_log_tmpl.request.model_spec.signature_name = signature_name + predict_log_tmpl.response.model_spec.signature_name = signature_name + input_tensor_proto = predict_log_tmpl.request.inputs[input_tensor_alias] + input_tensor_proto.dtype = tf.string.as_datatype_enum + input_tensor_proto.tensor_shape.dim.add().size = 1 + + result = [] + for i in range(batch_size): + predict_log = prediction_log_pb2.PredictLog() + predict_log.CopyFrom(predict_log_tmpl) + predict_log.request.inputs[input_tensor_alias].string_val.append( + elements[i].SerializeToString()) + for output_alias, output in outputs.items(): + # Mimic tensor::Split + tensor_proto = tf.make_tensor_proto( + values=output[i], + dtype=tf.as_dtype(output[i].dtype).as_datatype_enum, + shape=np.expand_dims(output[i], axis=0).shape) + predict_log.response.outputs[output_alias].CopyFrom(tensor_proto) + result.append(predict_log) + return result + + +@beam.typehints.with_input_types(List[str]) +@beam.typehints.with_output_types(Tuple[tf.train.Example, + inference_pb2.MultiInferenceResponse]) +class _BatchMultiInferenceDoFn(_BaseBatchSavedModelDoFn): + """A DoFn that runs inference on multi-head model.""" + + def _check_elements( + self, elements: List[Union[tf.train.Example, + tf.train.SequenceExample]]) -> None: + if not all(isinstance(element, tf.train.Example) for element in elements): + raise ValueError('Multi inference only supports tf.train.Example') + + def _post_process( + self, elements: Sequence[tf.train.Example], outputs: Mapping[Text, + np.ndarray] + ) -> Iterable[Tuple[tf.train.Example, inference_pb2.MultiInferenceResponse]]: + classifications = None + regressions = None + for signature in self._signatures: + signature_def = signature.signature_def + if signature_def.method_name == tf.saved_model.CLASSIFY_METHOD_NAME: + classifications = _post_process_classify( + self._io_tensor_spec.output_alias_tensor_names, elements, outputs) + elif signature_def.method_name == tf.saved_model.REGRESS_METHOD_NAME: + regressions = _post_process_regress(elements, outputs) + else: + raise ValueError('Signature method %s is not supported for ' + 'multi inference' % signature_def.method_name) + result = [] + for i in range(len(elements)): + response = inference_pb2.MultiInferenceResponse() + for signature in self._signatures: + signature_def = signature.signature_def + inference_result = response.results.add() + if (signature_def.method_name == tf.saved_model.CLASSIFY_METHOD_NAME and + classifications): + inference_result.classification_result.classifications.add().CopyFrom( + classifications[i]) + elif ( + signature_def.method_name == tf.saved_model.REGRESS_METHOD_NAME and + regressions): + inference_result.regression_result.regressions.add().CopyFrom( + regressions[i]) + else: + raise ValueError('Signature method %s is not supported for ' + 'multi inference' % signature_def.method_name) + inference_result.model_spec.signature_name = signature.name + if len(response.results) != len(self._signatures): + raise RuntimeError('Multi inference response result length does not ' + 'match the number of signatures') + result.append((elements[i], response)) + return result + + +@beam.typehints.with_input_types(Tuple[tf.train.Example, + classification_pb2.Classifications]) +@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) +class _BuildPredictionLogForClassificationsDoFn(beam.DoFn): + """A DoFn that builds prediction log from classifications.""" + + def process( + self, element: Tuple[tf.train.Example, classification_pb2.Classifications] + ) -> Iterable[prediction_log_pb2.PredictionLog]: + (train_example, classifications) = element + result = prediction_log_pb2.PredictionLog() + result.classify_log.request.input.example_list.examples.add().CopyFrom( + train_example) + result.classify_log.response.result.classifications.add().CopyFrom( + classifications) + yield result + + +@beam.typehints.with_input_types(Tuple[tf.train.Example, + regression_pb2.Regression]) +@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) +class _BuildPredictionLogForRegressionsDoFn(beam.DoFn): + """A DoFn that builds prediction log from regressions.""" + + def process( + self, element: Tuple[tf.train.Example, regression_pb2.Regression] + ) -> Iterable[prediction_log_pb2.PredictionLog]: + (train_example, regression) = element + result = prediction_log_pb2.PredictionLog() + result.regress_log.request.input.example_list.examples.add().CopyFrom( + train_example) + result.regress_log.response.result.regressions.add().CopyFrom(regression) + yield result + + +@beam.typehints.with_input_types(prediction_log_pb2.PredictLog) +@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) +class _BuildPredictionLogForPredictionsDoFn(beam.DoFn): + """A DoFn that builds prediction log from predictions.""" + + def process( + self, element: prediction_log_pb2.PredictLog + ) -> Iterable[prediction_log_pb2.PredictionLog]: + result = prediction_log_pb2.PredictionLog() + result.predict_log.CopyFrom(element) + yield result + + +@beam.typehints.with_input_types(Tuple[tf.train.Example, + inference_pb2.MultiInferenceResponse]) +@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) +class _BuildMultiInferenceLogDoFn(beam.DoFn): + """A DoFn that builds prediction log from multi-head inference result.""" + + def process( + self, element: Tuple[tf.train.Example, + inference_pb2.MultiInferenceResponse] + ) -> Iterable[prediction_log_pb2.PredictionLog]: + (train_example, multi_inference_response) = element + result = prediction_log_pb2.PredictionLog() + (result.multi_inference_log.request.input.example_list.examples.add() + .CopyFrom(train_example)) + result.multi_inference_log.response.CopyFrom(multi_inference_response) + yield result + + +# TODO (Maxine): moving these into class? +def _post_process_classify( + output_alias_tensor_names: Mapping[Text, Text], + elements: Sequence[tf.train.Example], outputs: Mapping[Text, np.ndarray] +) -> Sequence[classification_pb2.Classifications]: + """Returns classifications from inference output.""" + + # This is to avoid error "The truth value of an array with + # more than one element is ambiguous." + has_classes = False + has_scores = False + if tf.saved_model.CLASSIFY_OUTPUT_CLASSES in output_alias_tensor_names: + classes = outputs[tf.saved_model.CLASSIFY_OUTPUT_CLASSES] + has_classes = True + if tf.saved_model.CLASSIFY_OUTPUT_SCORES in output_alias_tensor_names: + scores = outputs[tf.saved_model.CLASSIFY_OUTPUT_SCORES] + has_scores = True + if has_classes: + if classes.ndim != 2: + raise ValueError('Expected Tensor shape: [batch_size num_classes] but ' + 'got %s' % classes.shape) + if classes.dtype != tf.string.as_numpy_dtype: + raise ValueError('Expected classes Tensor of %s. Got: %s' % + (tf.string.as_numpy_dtype, classes.dtype)) + if classes.shape[0] != len(elements): + raise ValueError('Expected classes output batch size of %s, got %s' % + (len(elements), classes.shape[0])) + if has_scores: + if scores.ndim != 2: + raise ValueError("""Expected Tensor shape: [batch_size num_classes] but + got %s""" % scores.shape) + if scores.dtype != tf.float32.as_numpy_dtype: + raise ValueError('Expected classes Tensor of %s. Got: %s' % + (tf.float32.as_numpy_dtype, scores.dtype)) + if scores.shape[0] != len(elements): + raise ValueError('Expected classes output batch size of %s, got %s' % + (len(elements), scores.shape[0])) + num_classes = 0 + if has_classes and has_scores: + if scores.shape[1] != classes.shape[1]: + raise ValueError('Tensors class and score should match in shape[1]. ' + 'Got %s vs %s' % (classes.shape[1], scores.shape[1])) + num_classes = classes.shape[1] + elif has_classes: + num_classes = classes.shape[1] + elif has_scores: + num_classes = scores.shape[1] + + result = [] + for i in range(len(elements)): + a_classification = classification_pb2.Classifications() + for c in range(num_classes): + a_class = a_classification.classes.add() + if has_classes: + a_class.label = classes[i][c] + if has_scores: + a_class.score = scores[i][c] + result.append(a_classification) + if len(result) != len(elements): + raise RuntimeError('Classifications length does not match elements') + return result + + +def _post_process_regress( + elements: Sequence[tf.train.Example], + outputs: Mapping[Text, np.ndarray]) -> Sequence[regression_pb2.Regression]: + """Returns regressions from inference output.""" + + if tf.saved_model.REGRESS_OUTPUTS not in outputs: + raise ValueError('No regression outputs found in outputs: %s' % + outputs.keys()) + output = outputs[tf.saved_model.REGRESS_OUTPUTS] + batch_size = len(elements) + if not (output.ndim == 1 or (output.ndim == 2 and output.shape[1] == 1)): + raise ValueError("""Expected output Tensor shape to be either [batch_size] + or [batch_size, 1] but got %s""" % output.shape) + if batch_size != output.shape[0]: + raise ValueError( + 'Input batch size did not match output batch size: %s vs %s' % + (batch_size, output.shape[0])) + if output.dtype != tf.float32.as_numpy_dtype: + raise ValueError('Expected output Tensor of %s. Got: %s' % + (tf.float32.as_numpy_dtype, output.dtype)) + if output.size != batch_size: + raise ValueError('Expected output batch size to be %s. Got: %s' % + (batch_size, output.size)) + flatten_output = output.flatten() + result = [] + for regression_result in flatten_output: + regression = regression_pb2.Regression() + regression.value = regression_result + result.append(regression) + + # Add additional check to save downstream consumer checks. + if len(result) != len(elements): + raise RuntimeError('Regression length does not match elements') + return result + + +def _signature_pre_process(signature: _SignatureDef) -> _IOTensorSpec: + """Returns IOTensorSpec from signature.""" + + if len(signature.inputs) != 1: + raise ValueError('Signature should have 1 and only 1 inputs') + input_tensor_alias = list(signature.inputs.keys())[0] + if list(signature.inputs.values())[0].dtype != tf.string.as_datatype_enum: + raise ValueError( + 'Input dtype is expected to be %s, got %s' % tf.string.as_datatype_enum, + list(signature.inputs.values())[0].dtype) + if signature.method_name == tf.saved_model.CLASSIFY_METHOD_NAME: + input_tensor_name, output_alias_tensor_names = ( + _signature_pre_process_classify(signature)) + elif signature.method_name == tf.saved_model.PREDICT_METHOD_NAME: + input_tensor_name, output_alias_tensor_names = ( + _signature_pre_process_predict(signature)) + elif signature.method_name == tf.saved_model.REGRESS_METHOD_NAME: + input_tensor_name, output_alias_tensor_names = ( + _signature_pre_process_regress(signature)) + else: + raise ValueError('Signature method %s is not supported' % + signature.method_name) + return _IOTensorSpec(input_tensor_alias, input_tensor_name, + output_alias_tensor_names) + + +def _signature_pre_process_classify( + signature: _SignatureDef) -> Tuple[Text, Mapping[Text, Text]]: + """Returns input tensor name and output alias tensor names from signature. + + Args: + signature: SignatureDef + + Returns: + A tuple of input tensor name and output alias tensor names. + """ + + if len(signature.outputs) != 1 and len(signature.outputs) != 2: + raise ValueError('Classify signature should have 1 or 2 outputs') + if tf.saved_model.CLASSIFY_INPUTS not in signature.inputs: + raise ValueError('No classification inputs found in SignatureDef: %s' % + signature.inputs) + input_tensor_name = signature.inputs[tf.saved_model.CLASSIFY_INPUTS].name + output_alias_tensor_names = {} + if (tf.saved_model.CLASSIFY_OUTPUT_CLASSES not in signature.outputs and + tf.saved_model.CLASSIFY_OUTPUT_SCORES not in signature.outputs): + raise ValueError( + """Expected classification signature outputs to contain at + least one of %s or %s. Signature was: %s""" % + tf.saved_model.CLASSIFY_OUTPUT_CLASSES, + tf.saved_model.CLASSIFY_OUTPUT_SCORES, signature) + if tf.saved_model.CLASSIFY_OUTPUT_CLASSES in signature.outputs: + output_alias_tensor_names[tf.saved_model.CLASSIFY_OUTPUT_CLASSES] = ( + signature.outputs[tf.saved_model.CLASSIFY_OUTPUT_CLASSES].name) + if tf.saved_model.CLASSIFY_OUTPUT_SCORES in signature.outputs: + output_alias_tensor_names[tf.saved_model.CLASSIFY_OUTPUT_SCORES] = ( + signature.outputs[tf.saved_model.CLASSIFY_OUTPUT_SCORES].name) + return input_tensor_name, output_alias_tensor_names + + +def _signature_pre_process_predict( + signature: _SignatureDef) -> Tuple[Text, Mapping[Text, Text]]: + """Returns input tensor name and output alias tensor names from signature. + + Args: + signature: SignatureDef + + Returns: + A tuple of input tensor name and output alias tensor names. + """ + + input_tensor_name = list(signature.inputs.values())[0].name + output_alias_tensor_names = dict([ + (key, output.name) for key, output in signature.outputs.items() + ]) + return input_tensor_name, output_alias_tensor_names + + +def _signature_pre_process_regress( + signature: _SignatureDef) -> Tuple[Text, Mapping[Text, Text]]: + """Returns input tensor name and output alias tensor names from signature. + + Args: + signature: SignatureDef + + Returns: + A tuple of input tensor name and output alias tensor names. + """ + + if len(signature.outputs) != 1: + raise ValueError('Regress signature should have 1 output') + if tf.saved_model.REGRESS_INPUTS not in signature.inputs: + raise ValueError('No regression inputs found in SignatureDef: %s' % + signature.inputs) + input_tensor_name = signature.inputs[tf.saved_model.REGRESS_INPUTS].name + if tf.saved_model.REGRESS_OUTPUTS not in signature.outputs: + raise ValueError('No regression outputs found in SignatureDef: %s' % + signature.outputs) + output_alias_tensor_names = { + tf.saved_model.REGRESS_OUTPUTS: + signature.outputs[tf.saved_model.REGRESS_OUTPUTS].name + } + return input_tensor_name, output_alias_tensor_names + + +def _using_in_process_inference( + inference_spec_type: model_spec_pb2.InferenceSpecType) -> bool: + return inference_spec_type.WhichOneof('type') == 'saved_model_spec' + + +def _get_signatures(model_path: Text, signatures: Sequence[Text], + tags: Sequence[Text]) -> Sequence[_Signature]: + """Returns a sequence of {model_signature_name: signature}.""" + + if signatures: + signature_names = signatures + else: + signature_names = [tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY] + + saved_model_pb = loader_impl.parse_saved_model(model_path) + meta_graph_def = _get_meta_graph_def(saved_model_pb, tags) + result = [] + for signature_name in signature_names: + if signature_name in meta_graph_def.signature_def: + result.append( + _Signature(signature_name, + meta_graph_def.signature_def[signature_name])) + else: + raise RuntimeError('Signature %s could not be found in SavedModel' % + signature_name) + return result + + +def _get_operation_type( + inference_spec_type: model_spec_pb2.InferenceSpecType) -> Text: + if _using_in_process_inference(inference_spec_type): + signatures = _get_signatures( + inference_spec_type.saved_model_spec.model_path, + inference_spec_type.saved_model_spec.signature_name, + _get_tags(inference_spec_type)) + if not signatures: + raise ValueError('Model does not have valid signature to use') + + if len(signatures) == 1: + method_name = signatures[0].signature_def.method_name + if method_name == tf.saved_model.CLASSIFY_METHOD_NAME: + return OperationType.CLASSIFICATION + elif method_name == tf.saved_model.REGRESS_METHOD_NAME: + return OperationType.REGRESSION + elif method_name == tf.saved_model.PREDICT_METHOD_NAME: + return OperationType.PREDICTION + else: + raise ValueError('Unsupported signature method_name %s' % method_name) + else: + for signature in signatures: + method_name = signature.signature_def.method_name + if (method_name != tf.saved_model.CLASSIFY_METHOD_NAME and + method_name != tf.saved_model.REGRESS_METHOD_NAME): + raise ValueError('Unsupported signature method_name for multi-head ' + 'model inference: %s' % method_name) + return OperationType.MULTIHEAD + else: + # Remote inference supports predictions only. + return OperationType.PREDICTION + + +def _get_meta_graph_def(saved_model_pb: _SavedModel, + tags: Sequence[Text]) -> _MetaGraphDef: + """Returns MetaGraphDef from SavedModel.""" + + for meta_graph_def in saved_model_pb.meta_graphs: + if set(meta_graph_def.meta_info_def.tags) == set(tags): + return meta_graph_def + raise RuntimeError('MetaGraphDef associated with tags %s could not be ' + 'found in SavedModel' % tags) + + +def _get_current_process_memory_in_bytes(): + """Returns memory usage in bytes.""" + + if resource is not None: + usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss + if _is_darwin(): + return usage + return usage * 1024 + else: + logging.warning('Resource module is not available for current platform, ' + 'memory usage cannot be fetched.') + return 0 + + +def _get_tags( + inference_spec_type: model_spec_pb2.InferenceSpecType) -> Sequence[Text]: + """Returns tags from ModelSpec.""" + + if inference_spec_type.saved_model_spec.tag: + return list(inference_spec_type.saved_model_spec.tag) + else: + return [tf.saved_model.SERVING] + + +def _is_darwin() -> bool: + return sys.platform == 'darwin' + + +def _is_windows() -> bool: + return platform.system() == 'Windows' or os.name == 'nt' + + +def _is_cygwin() -> bool: + return platform.system().startswith('CYGWIN_NT') + + +class _Clock(object): + + def get_current_time_in_microseconds(self) -> int: + return int(time.time() * _SECOND_TO_MICROSECOND) + + +class _FineGrainedClock(_Clock): + + def get_current_time_in_microseconds(self) -> int: + return int( + time.clock_gettime_ns(time.CLOCK_REALTIME) / # pytype: disable=module-attr + _MICROSECOND_TO_NANOSECOND) + + +class _ClockFactory(object): + + @staticmethod + def make_clock() -> _Clock: + if (hasattr(time, 'clock_gettime_ns') and not _is_windows() + and not _is_cygwin()): + return _FineGrainedClock() + return _Clock() diff --git a/tfx_bsl/beam/run_inference_arrow_test.py b/tfx_bsl/beam/run_inference_arrow_test.py new file mode 100644 index 00000000..a4eed521 --- /dev/null +++ b/tfx_bsl/beam/run_inference_arrow_test.py @@ -0,0 +1,581 @@ +# Copyright 2019 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for tfx_bsl.run_inference.""" + +from __future__ import absolute_import +from __future__ import division +# Standard __future__ imports +from __future__ import print_function + +import json +import os +try: + import unittest.mock as mock +except ImportError: + import mock + +import apache_beam as beam +from apache_beam.metrics.metric import MetricsFilter +from apache_beam.testing.util import assert_that +from apache_beam.testing.util import equal_to +from googleapiclient import discovery +from googleapiclient import http +from six.moves import http_client +import tensorflow as tf +from tfx_bsl.beam import run_inference +from tfx_bsl.public.proto import model_spec_pb2 + +from google.protobuf import text_format + +from tensorflow_serving.apis import prediction_log_pb2 + + +class RunInferenceFixture(tf.test.TestCase): + + def setUp(self): + super(RunInferenceFixture, self).setUp() + self._predict_examples = [ + text_format.Parse( + """ + features { + feature { key: "input1" value { float_list { value: 0 }}} + } + """, tf.train.Example()), + ] + + def _get_output_data_dir(self, sub_dir=None): + test_dir = self._testMethodName + path = os.path.join( + os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), + test_dir) + if not tf.io.gfile.exists(path): + tf.io.gfile.makedirs(path) + if sub_dir is not None: + path = os.path.join(path, sub_dir) + return path + + def _prepare_predict_examples(self, example_path): + with tf.io.TFRecordWriter(example_path) as output_file: + for example in self._predict_examples: + output_file.write(example.SerializeToString()) + + +ARROW_INPUT_COLUMN = '__raw_record__' +class RunOfflineInferenceTest(RunInferenceFixture): + + def setUp(self): + super(RunOfflineInferenceTest, self).setUp() + + self._predict_examples = [ + text_format.Parse( + """ + features { + feature { key: "input1" value { float_list { value: 0 }}} + } + """, tf.train.Example()), + text_format.Parse( + """ + features { + feature { key: "input1" value { float_list { value: 1 }}} + } + """, tf.train.Example()), + ] + self._multihead_examples = [ + text_format.Parse( + """ + features { + feature {key: "x" value { float_list { value: 0.8 }}} + feature {key: "y" value { float_list { value: 0.2 }}} + } + """, tf.train.Example()), + text_format.Parse( + """ + features { + feature {key: "x" value { float_list { value: 0.6 }}} + feature {key: "y" value { float_list { value: 0.1 }}} + } + """, tf.train.Example()), + ] + + # TODO: Ask if these example can directly transform to recordBatch + + + def _prepare_multihead_examples(self, example_path): + with tf.io.TFRecordWriter(example_path) as output_file: + for example in self._multihead_examples: + output_file.write(example.SerializeToString()) + + def _build_predict_model(self, model_path): + """Exports the dummy sum predict model.""" + + with tf.compat.v1.Graph().as_default(): + input_tensors = { + 'x': tf.compat.v1.io.FixedLenFeature( + [1], dtype=tf.float32, default_value=0) + } + serving_receiver = ( + tf.compat.v1.estimator.export.build_parsing_serving_input_receiver_fn( + input_tensors)()) + output_tensors = {'y': serving_receiver.features['x'] * 2} + sess = tf.compat.v1.Session() + sess.run(tf.compat.v1.initializers.global_variables()) + signature_def = tf.compat.v1.estimator.export.PredictOutput( + output_tensors).as_signature_def(serving_receiver.receiver_tensors) + builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(model_path) + builder.add_meta_graph_and_variables( + sess, [tf.compat.v1.saved_model.tag_constants.SERVING], + signature_def_map={ + tf.compat.v1.saved_model.signature_constants + .DEFAULT_SERVING_SIGNATURE_DEF_KEY: + signature_def, + }) + builder.save() + + def _build_regression_signature(self, input_tensor, output_tensor): + """Helper function for building a regression SignatureDef.""" + input_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info( + input_tensor) + signature_inputs = { + tf.compat.v1.saved_model.signature_constants.REGRESS_INPUTS: + input_tensor_info + } + output_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info( + output_tensor) + signature_outputs = { + tf.compat.v1.saved_model.signature_constants.REGRESS_OUTPUTS: + output_tensor_info + } + return tf.compat.v1.saved_model.signature_def_utils.build_signature_def( + signature_inputs, signature_outputs, + tf.compat.v1.saved_model.signature_constants.REGRESS_METHOD_NAME) + + def _build_classification_signature(self, input_tensor, scores_tensor): + """Helper function for building a classification SignatureDef.""" + input_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info( + input_tensor) + signature_inputs = { + tf.compat.v1.saved_model.signature_constants.CLASSIFY_INPUTS: + input_tensor_info + } + output_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info( + scores_tensor) + signature_outputs = { + tf.compat.v1.saved_model.signature_constants.CLASSIFY_OUTPUT_SCORES: + output_tensor_info + } + return tf.compat.v1.saved_model.signature_def_utils.build_signature_def( + signature_inputs, signature_outputs, + tf.compat.v1.saved_model.signature_constants.CLASSIFY_METHOD_NAME) + + def _build_multihead_model(self, model_path): + with tf.compat.v1.Graph().as_default(): + input_example = tf.compat.v1.placeholder( + tf.string, name='input_examples_tensor') + config = { + 'x': tf.compat.v1.io.FixedLenFeature( + [1], dtype=tf.float32, default_value=0), + 'y': tf.compat.v1.io.FixedLenFeature( + [1], dtype=tf.float32, default_value=0), + } + features = tf.compat.v1.parse_example(input_example, config) + x = features['x'] + y = features['y'] + sum_pred = x + y + diff_pred = tf.abs(x - y) + sess = tf.compat.v1.Session() + sess.run(tf.compat.v1.initializers.global_variables()) + signature_def_map = { + 'regress_diff': + self._build_regression_signature(input_example, diff_pred), + 'classify_sum': + self._build_classification_signature(input_example, sum_pred), + tf.compat.v1.saved_model.signature_constants + .DEFAULT_SERVING_SIGNATURE_DEF_KEY: + self._build_regression_signature(input_example, sum_pred) + } + builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(model_path) + builder.add_meta_graph_and_variables( + sess, [tf.compat.v1.saved_model.tag_constants.SERVING], + signature_def_map=signature_def_map) + builder.save() + + def _run_inference_with_beam(self, example_path, inference_spec_type, + prediction_log_path): + with beam.Pipeline() as pipeline: + _ = ( + pipeline + | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) + | 'ParseExamples' >> beam.Map(tf.train.Example.FromString) + | + 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type) + | 'WritePredictions' >> beam.io.WriteToTFRecord( + prediction_log_path, + coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog))) + + def _get_results(self, prediction_log_path): + results = [] + for f in tf.io.gfile.glob(prediction_log_path + '-?????-of-?????'): + record_iterator = tf.compat.v1.io.tf_record_iterator(path=f) + for record_string in record_iterator: + prediction_log = prediction_log_pb2.PredictionLog() + prediction_log.MergeFromString(record_string) + results.append(prediction_log) + return results + + def testModelPathInvalid(self): + example_path = self._get_output_data_dir('examples') + self._prepare_predict_examples(example_path) + prediction_log_path = self._get_output_data_dir('predictions') + with self.assertRaisesRegexp(IOError, 'SavedModel file does not exist.*'): + self._run_inference_with_beam( + example_path, + model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=self._get_output_data_dir())), prediction_log_path) + + def testEstimatorModelPredict(self): + example_path = self._get_output_data_dir('examples') + self._prepare_predict_examples(example_path) + model_path = self._get_output_data_dir('model') + self._build_predict_model(model_path) + prediction_log_path = self._get_output_data_dir('predictions') + self._run_inference_with_beam( + example_path, + model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path)), prediction_log_path) + + results = self._get_results(prediction_log_path) + self.assertLen(results, 2) + self.assertEqual( + results[0].predict_log.request.inputs[ + run_inference._DEFAULT_INPUT_KEY].string_val[0], + self._predict_examples[0].SerializeToString()) + self.assertEqual(results[0].predict_log.response.outputs['y'].dtype, + tf.float32) + self.assertLen( + results[0].predict_log.response.outputs['y'].tensor_shape.dim, 2) + self.assertEqual( + results[0].predict_log.response.outputs['y'].tensor_shape.dim[0].size, + 1) + self.assertEqual( + results[0].predict_log.response.outputs['y'].tensor_shape.dim[1].size, + 1) + + def testClassifyModel(self): + example_path = self._get_output_data_dir('examples') + self._prepare_multihead_examples(example_path) + model_path = self._get_output_data_dir('model') + self._build_multihead_model(model_path) + prediction_log_path = self._get_output_data_dir('predictions') + self._run_inference_with_beam( + example_path, + model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path, signature_name=['classify_sum'])), + prediction_log_path) + + results = self._get_results(prediction_log_path) + self.assertLen(results, 2) + classify_log = results[0].classify_log + self.assertLen(classify_log.request.input.example_list.examples, 1) + self.assertEqual(classify_log.request.input.example_list.examples[0], + self._multihead_examples[0]) + self.assertLen(classify_log.response.result.classifications, 1) + self.assertLen(classify_log.response.result.classifications[0].classes, 1) + self.assertAlmostEqual( + classify_log.response.result.classifications[0].classes[0].score, 1.0) + + def testRegressModel(self): + example_path = self._get_output_data_dir('examples') + self._prepare_multihead_examples(example_path) + model_path = self._get_output_data_dir('model') + self._build_multihead_model(model_path) + prediction_log_path = self._get_output_data_dir('predictions') + self._run_inference_with_beam( + example_path, + model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path, signature_name=['regress_diff'])), + prediction_log_path) + + results = self._get_results(prediction_log_path) + self.assertLen(results, 2) + regress_log = results[0].regress_log + self.assertLen(regress_log.request.input.example_list.examples, 1) + self.assertEqual(regress_log.request.input.example_list.examples[0], + self._multihead_examples[0]) + self.assertLen(regress_log.response.result.regressions, 1) + self.assertAlmostEqual(regress_log.response.result.regressions[0].value, + 0.6) + + def testMultiInferenceModel(self): + example_path = self._get_output_data_dir('examples') + self._prepare_multihead_examples(example_path) + model_path = self._get_output_data_dir('model') + self._build_multihead_model(model_path) + prediction_log_path = self._get_output_data_dir('predictions') + self._run_inference_with_beam( + example_path, + model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path, + signature_name=['regress_diff', 'classify_sum'])), + prediction_log_path) + results = self._get_results(prediction_log_path) + self.assertLen(results, 2) + multi_inference_log = results[0].multi_inference_log + self.assertLen(multi_inference_log.request.input.example_list.examples, 1) + self.assertEqual(multi_inference_log.request.input.example_list.examples[0], + self._multihead_examples[0]) + self.assertLen(multi_inference_log.response.results, 2) + signature_names = [] + for result in multi_inference_log.response.results: + signature_names.append(result.model_spec.signature_name) + self.assertIn('regress_diff', signature_names) + self.assertIn('classify_sum', signature_names) + result = multi_inference_log.response.results[0] + self.assertEqual(result.model_spec.signature_name, 'regress_diff') + self.assertLen(result.regression_result.regressions, 1) + self.assertAlmostEqual(result.regression_result.regressions[0].value, 0.6) + result = multi_inference_log.response.results[1] + self.assertEqual(result.model_spec.signature_name, 'classify_sum') + self.assertLen(result.classification_result.classifications, 1) + self.assertLen(result.classification_result.classifications[0].classes, 1) + self.assertAlmostEqual( + result.classification_result.classifications[0].classes[0].score, 1.0) + + def testKerasModelPredict(self): + inputs = tf.keras.Input(shape=(1,), name='input1') + output1 = tf.keras.layers.Dense( + 1, activation=tf.nn.sigmoid, name='output1')( + inputs) + output2 = tf.keras.layers.Dense( + 1, activation=tf.nn.sigmoid, name='output2')( + inputs) + inference_model = tf.keras.models.Model(inputs, [output1, output2]) + + class TestKerasModel(tf.keras.Model): + + def __init__(self, inference_model): + super(TestKerasModel, self).__init__(name='test_keras_model') + self.inference_model = inference_model + + @tf.function(input_signature=[ + tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs') + ]) + def call(self, serialized_example): + features = { + 'input1': + tf.compat.v1.io.FixedLenFeature([1], + dtype=tf.float32, + default_value=0) + } + input_tensor_dict = tf.io.parse_example(serialized_example, features) + return inference_model(input_tensor_dict['input1']) + + model = TestKerasModel(inference_model) + model.compile( + optimizer=tf.keras.optimizers.Adam(lr=.001), + loss=tf.keras.losses.binary_crossentropy, + metrics=['accuracy']) + + model_path = self._get_output_data_dir('model') + tf.compat.v1.keras.experimental.export_saved_model( + model, model_path, serving_only=True) + + example_path = self._get_output_data_dir('examples') + self._prepare_predict_examples(example_path) + prediction_log_path = self._get_output_data_dir('predictions') + self._run_inference_with_beam( + example_path, + model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path)), prediction_log_path) + + results = self._get_results(prediction_log_path) + self.assertLen(results, 2) + + def testTelemetry(self): + example_path = self._get_output_data_dir('examples') + self._prepare_multihead_examples(example_path) + model_path = self._get_output_data_dir('model') + self._build_multihead_model(model_path) + inference_spec_type = model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path, signature_name=['classify_sum'])) + pipeline = beam.Pipeline() + _ = ( + pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) + | 'ParseExamples' >> beam.Map(tf.train.Example.FromString) + | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type)) + run_result = pipeline.run() + run_result.wait_until_finish() + + num_inferences = run_result.metrics().query( + MetricsFilter().with_name('num_inferences')) + self.assertTrue(num_inferences['counters']) + self.assertEqual(num_inferences['counters'][0].result, 2) + num_instances = run_result.metrics().query( + MetricsFilter().with_name('num_instances')) + self.assertTrue(num_instances['counters']) + self.assertEqual(num_instances['counters'][0].result, 2) + inference_request_batch_size = run_result.metrics().query( + MetricsFilter().with_name('inference_request_batch_size')) + self.assertTrue(inference_request_batch_size['distributions']) + self.assertEqual( + inference_request_batch_size['distributions'][0].result.sum, 2) + inference_request_batch_byte_size = run_result.metrics().query( + MetricsFilter().with_name('inference_request_batch_byte_size')) + self.assertTrue(inference_request_batch_byte_size['distributions']) + self.assertEqual( + inference_request_batch_byte_size['distributions'][0].result.sum, + sum(element.ByteSize() for element in self._multihead_examples)) + inference_batch_latency_micro_secs = run_result.metrics().query( + MetricsFilter().with_name('inference_batch_latency_micro_secs')) + self.assertTrue(inference_batch_latency_micro_secs['distributions']) + self.assertGreaterEqual( + inference_batch_latency_micro_secs['distributions'][0].result.sum, 0) + load_model_latency_milli_secs = run_result.metrics().query( + MetricsFilter().with_name('load_model_latency_milli_secs')) + self.assertTrue(load_model_latency_milli_secs['distributions']) + self.assertGreaterEqual( + load_model_latency_milli_secs['distributions'][0].result.sum, 0) + + +class RunRemoteInferenceTest(RunInferenceFixture): + + def setUp(self): + super(RunRemoteInferenceTest, self).setUp() + self.example_path = self._get_output_data_dir('example') + self._prepare_predict_examples(self.example_path) + # This is from https://ml.googleapis.com/$discovery/rest?version=v1. + self._discovery_testdata_dir = os.path.join( + os.path.join(os.path.dirname(__file__), 'testdata'), + 'ml_discovery.json') + + @staticmethod + def _make_response_body(content, successful): + if successful: + response_dict = {'predictions': content} + else: + response_dict = {'error': content} + return json.dumps(response_dict) + + def _set_up_pipeline(self, inference_spec_type): + self.pipeline = beam.Pipeline() + self.pcoll = ( + self.pipeline + | 'ReadExamples' >> beam.io.ReadFromTFRecord(self.example_path) + | 'ParseExamples' >> beam.Map(tf.train.Example.FromString) + | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type)) + + def _run_inference_with_beam(self): + self.pipeline_result = self.pipeline.run() + self.pipeline_result.wait_until_finish() + + def test_model_predict(self): + predictions = [{'output_1': [0.901], 'output_2': [0.997]}] + builder = http.RequestMockBuilder({ + 'ml.projects.predict': + (None, self._make_response_body(predictions, successful=True)) + }) + resource = discovery.build( + 'ml', + 'v1', + http=http.HttpMock(self._discovery_testdata_dir, + {'status': http_client.OK}), + requestBuilder=builder) + with mock.patch('googleapiclient.discovery.' 'build') as response_mock: + response_mock.side_effect = lambda service, version: resource + inference_spec_type = model_spec_pb2.InferenceSpecType( + ai_platform_prediction_model_spec=model_spec_pb2 + .AIPlatformPredictionModelSpec( + project_id='test-project', + model_name='test-model', + )) + + prediction_log = prediction_log_pb2.PredictionLog() + prediction_log.predict_log.response.outputs['output_1'].CopyFrom( + tf.make_tensor_proto(values=[0.901], dtype=tf.double, shape=(1, 1))) + prediction_log.predict_log.response.outputs['output_2'].CopyFrom( + tf.make_tensor_proto(values=[0.997], dtype=tf.double, shape=(1, 1))) + + self._set_up_pipeline(inference_spec_type) + assert_that(self.pcoll, equal_to([prediction_log])) + self._run_inference_with_beam() + + def test_exception_raised_when_response_body_contains_error_entry(self): + error_msg = 'Base64 decode failed.' + builder = http.RequestMockBuilder({ + 'ml.projects.predict': + (None, self._make_response_body(error_msg, successful=False)) + }) + resource = discovery.build( + 'ml', + 'v1', + http=http.HttpMock(self._discovery_testdata_dir, + {'status': http_client.OK}), + requestBuilder=builder) + with mock.patch('googleapiclient.discovery.' 'build') as response_mock: + response_mock.side_effect = lambda service, version: resource + inference_spec_type = model_spec_pb2.InferenceSpecType( + ai_platform_prediction_model_spec=model_spec_pb2 + .AIPlatformPredictionModelSpec( + project_id='test-project', + model_name='test-model', + )) + + try: + self._set_up_pipeline(inference_spec_type) + self._run_inference_with_beam() + except ValueError as exc: + actual_error_msg = str(exc) + self.assertTrue(actual_error_msg.startswith(error_msg)) + else: + self.fail('Test was expected to throw ValueError exception') + + def test_exception_raised_when_project_id_is_empty(self): + inference_spec_type = model_spec_pb2.InferenceSpecType( + ai_platform_prediction_model_spec=model_spec_pb2 + .AIPlatformPredictionModelSpec(model_name='test-model',)) + + with self.assertRaises(ValueError): + self._set_up_pipeline(inference_spec_type) + self._run_inference_with_beam() + + def test_request_body_with_binary_data(self): + example = text_format.Parse( + """ + features { + feature { key: "x_bytes" value { bytes_list { value: ["ASa8asdf"] }}} + feature { key: "x" value { bytes_list { value: "JLK7ljk3" }}} + feature { key: "y" value { int64_list { value: [1, 2] }}} + } + """, tf.train.Example()) + result = list( + run_inference._RemotePredictDoFn._prepare_instances([example])) + self.assertEqual([ + { + 'x_bytes': { + 'b64': 'QVNhOGFzZGY=' + }, + 'x': 'JLK7ljk3', + 'y': [1, 2] + }, + ], result) + + +if __name__ == '__main__': + tf.test.main() From 1d553019cc0d48409553716f3e3389825d633203 Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Wed, 27 May 2020 11:57:39 -0400 Subject: [PATCH 2/8] make master the same as before for comparison --- tfx_bsl/beam/run_inference_arrow.py | 72 ++++++++++++++---------- tfx_bsl/beam/run_inference_arrow_test.py | 6 +- 2 files changed, 43 insertions(+), 35 deletions(-) diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py index 316b65a5..98e45148 100644 --- a/tfx_bsl/beam/run_inference_arrow.py +++ b/tfx_bsl/beam/run_inference_arrow.py @@ -32,7 +32,6 @@ from absl import logging import apache_beam as beam -import pyarrow as pa from apache_beam.options.pipeline_options import GoogleCloudOptions from apache_beam.options.pipeline_options import PipelineOptions from apache_beam.utils import retry @@ -80,7 +79,6 @@ _MetaGraphDef = Any _SavedModel = Any -# TODO (Maxine): what is this? _BulkInferResult = Union[prediction_log_pb2.PredictLog, Tuple[tf.train.Example, regression_pb2.Regression], Tuple[tf.train.Example, @@ -97,11 +95,9 @@ class OperationType(object): MULTIHEAD = 'MULTIHEAD' -# TODO (Me): pTransform from examples/sequence example here - -# TODO (Me): Union[bytes, pa.RecordBatch]? @beam.ptransform_fn -@beam.typehints.with_input_types(pa.RecordBatch) +@beam.typehints.with_input_types(Union[tf.train.Example, + tf.train.SequenceExample]) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def RunInferenceImpl( # pylint: disable=invalid-name examples: beam.pvalue.PCollection, @@ -110,7 +106,7 @@ def RunInferenceImpl( # pylint: disable=invalid-name """Implementation of RunInference API. Args: - examples: A PCollection containing RecordBatch. + examples: A PCollection containing examples. inference_spec_type: Model inference endpoint. Returns: @@ -144,7 +140,8 @@ def RunInferenceImpl( # pylint: disable=invalid-name @beam.ptransform_fn -@beam.typehints.with_input_types(pa.RecordBatch) +@beam.typehints.with_input_types(Union[tf.train.Example, + tf.train.SequenceExample]) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def _Classify(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name inference_spec_type: model_spec_pb2.InferenceSpecType): @@ -160,7 +157,8 @@ def _Classify(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name @beam.ptransform_fn -@beam.typehints.with_input_types(pa.RecordBatch) +@beam.typehints.with_input_types(Union[tf.train.Example, + tf.train.SequenceExample]) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def _Regress(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name inference_spec_type: model_spec_pb2.InferenceSpecType): @@ -176,7 +174,8 @@ def _Regress(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name @beam.ptransform_fn -@beam.typehints.with_input_types(pa.RecordBatch) +@beam.typehints.with_input_types(Union[tf.train.Example, + tf.train.SequenceExample]) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def _Predict(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name inference_spec_type: model_spec_pb2.InferenceSpecType): @@ -197,7 +196,8 @@ def _Predict(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name @beam.ptransform_fn -@beam.typehints.with_input_types(pa.RecordBatch) +@beam.typehints.with_input_types(Union[tf.train.Example, + tf.train.SequenceExample]) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def _MultiInference(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name inference_spec_type: model_spec_pb2.InferenceSpecType): @@ -261,7 +261,9 @@ def update_metrics_with_cache(self): self._model_byte_size.update(self.model_byte_size_cache) self.model_byte_size_cache = None - def update(self, elements: List[str], latency_micro_secs: int) -> None: + def update(self, elements: List[Union[tf.train.Example, + tf.train.SequenceExample]], + latency_micro_secs: int) -> None: self._inference_batch_latency_micro_secs.update(latency_micro_secs) self._num_instances.inc(len(elements)) self._inference_counter.inc(len(elements)) @@ -278,14 +280,11 @@ def setup(self): self._clock = _ClockFactory.make_clock() def process( - self, elements: pa.RecordBatch + self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]] ) -> Iterable[Any]: batch_start_time = self._clock.get_current_time_in_microseconds() - # TODO (Maxine): set ARROW_INPUT_COLUMN or take as a parameter - # extract record batch from here, assuming first column - serialized_examples = elements.column(0) - outputs = self.run_inference(serialized_examples) - result = self._post_process(serialized_examples, outputs) + outputs = self.run_inference(elements) + result = self._post_process(elements, outputs) self._metrics_collector.update( elements, self._clock.get_current_time_in_microseconds() - batch_start_time) @@ -296,12 +295,14 @@ def finish_bundle(self): @abc.abstractmethod def run_inference( - self, elements: List[str] + self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]] ) -> Union[Mapping[Text, np.ndarray], Sequence[Mapping[Text, Any]]]: raise NotImplementedError @abc.abstractmethod - def _post_process(self, elements: List[str], outputs: Any) -> Iterable[Any]: + def _post_process(self, elements: List[Union[tf.train.Example, + tf.train.SequenceExample]], + outputs: Any) -> Iterable[Any]: raise NotImplementedError @@ -320,8 +321,9 @@ def _retry_on_unavailable_and_resource_error_filter(exception: Exception): return (isinstance(exception, googleapiclient.errors.HttpError) and exception.resp.status in (503, 429)) -# TODO (Maxine): change all example to serialized -@beam.typehints.with_input_types(List[str]) + +@beam.typehints.with_input_types(List[Union[tf.train.Example, + tf.train.SequenceExample]]) # Using output typehints triggers NotImplementedError('BEAM-2717)' on # streaming mode on Dataflow runner. # TODO(b/151468119): Consider to re-batch with online serving request size @@ -578,15 +580,22 @@ def _has_tpu_tag(self) -> bool: return (len(self._tags) == 2 and tf.saved_model.SERVING in self._tags and tf.saved_model.TPU in self._tags) - def run_inference(self, elements: List[str]) -> Mapping[Text, np.ndarray]: + def run_inference( + self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]] + ) -> Mapping[Text, np.ndarray]: self._check_elements(elements) outputs = self._run_tf_operations(elements) return outputs - def _run_tf_operations(self, elements: List[str]) -> Mapping[Text, np.ndarray]: + def _run_tf_operations( + self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]] + ) -> Mapping[Text, np.ndarray]: + input_values = [] + for element in elements: + input_values.append(element.SerializeToString()) result = self._session.run( self._io_tensor_spec.output_alias_tensor_names, - feed_dict={self._io_tensor_spec.input_tensor_name: elements}) + feed_dict={self._io_tensor_spec.input_tensor_name: input_values}) if len(result) != len(self._io_tensor_spec.output_alias_tensor_names): raise RuntimeError('Output length does not match fetches') return result @@ -599,7 +608,8 @@ def _check_elements( raise NotImplementedError -@beam.typehints.with_input_types(List[str]) +@beam.typehints.with_input_types(List[Union[tf.train.Example, + tf.train.SequenceExample]]) @beam.typehints.with_output_types(Tuple[tf.train.Example, classification_pb2.Classifications]) class _BatchClassifyDoFn(_BaseBatchSavedModelDoFn): @@ -629,7 +639,8 @@ def _post_process( return zip(elements, classifications) -@beam.typehints.with_input_types(List[str]) +@beam.typehints.with_input_types(List[Union[tf.train.Example, + tf.train.SequenceExample]]) @beam.typehints.with_output_types(Tuple[tf.train.Example, regression_pb2.Regression]) class _BatchRegressDoFn(_BaseBatchSavedModelDoFn): @@ -652,7 +663,8 @@ def _post_process( return zip(elements, regressions) -@beam.typehints.with_input_types(List[str]) +@beam.typehints.with_input_types(List[Union[tf.train.Example, + tf.train.SequenceExample]]) @beam.typehints.with_output_types(prediction_log_pb2.PredictLog) class _BatchPredictDoFn(_BaseBatchSavedModelDoFn): """A DoFn that runs inference on predict model.""" @@ -710,7 +722,8 @@ def _post_process( return result -@beam.typehints.with_input_types(List[str]) +@beam.typehints.with_input_types(List[Union[tf.train.Example, + tf.train.SequenceExample]]) @beam.typehints.with_output_types(Tuple[tf.train.Example, inference_pb2.MultiInferenceResponse]) class _BatchMultiInferenceDoFn(_BaseBatchSavedModelDoFn): @@ -830,7 +843,6 @@ def process( yield result -# TODO (Maxine): moving these into class? def _post_process_classify( output_alias_tensor_names: Mapping[Text, Text], elements: Sequence[tf.train.Example], outputs: Mapping[Text, np.ndarray] diff --git a/tfx_bsl/beam/run_inference_arrow_test.py b/tfx_bsl/beam/run_inference_arrow_test.py index a4eed521..ce9ac4d0 100644 --- a/tfx_bsl/beam/run_inference_arrow_test.py +++ b/tfx_bsl/beam/run_inference_arrow_test.py @@ -71,12 +71,10 @@ def _prepare_predict_examples(self, example_path): output_file.write(example.SerializeToString()) -ARROW_INPUT_COLUMN = '__raw_record__' class RunOfflineInferenceTest(RunInferenceFixture): - + def setUp(self): super(RunOfflineInferenceTest, self).setUp() - self._predict_examples = [ text_format.Parse( """ @@ -108,8 +106,6 @@ def setUp(self): """, tf.train.Example()), ] - # TODO: Ask if these example can directly transform to recordBatch - def _prepare_multihead_examples(self, example_path): with tf.io.TFRecordWriter(example_path) as output_file: From de87e4c2a98daa27acb2253d8fd1b24c441a24eb Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Mon, 20 Jul 2020 15:34:42 -0400 Subject: [PATCH 3/8] add benchmarks for the 2 modules --- .../run_inference_arrow_benchmark.py | 87 +++++++++++++++++++ .../benchmarks/run_inference_benchmark.py | 84 ++++++++++++++++++ 2 files changed, 171 insertions(+) create mode 100644 tfx_bsl/beam/benchmarks/run_inference_arrow_benchmark.py create mode 100644 tfx_bsl/beam/benchmarks/run_inference_benchmark.py diff --git a/tfx_bsl/beam/benchmarks/run_inference_arrow_benchmark.py b/tfx_bsl/beam/benchmarks/run_inference_arrow_benchmark.py new file mode 100644 index 00000000..3e57944e --- /dev/null +++ b/tfx_bsl/beam/benchmarks/run_inference_arrow_benchmark.py @@ -0,0 +1,87 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Script to use run_inference_arrow from command line +Below is a complete command line for running this script +for benchmarks + +python3 run_inference_arrow_benchemark.py \ +PATH_TO_MODEL \ +PATH_TO_DATA \ +--output gs://YOUR_BUCKET/results/output \ +--project YOUR_PROJECT \ +--runner DataflowRunner \ +--temp_location gs://YOUR_BUCKET/temp \ +--job_name run-inference-arrow-metrics \ +--region us-central1 +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import apache_beam as beam +from tfx_bsl.tfxio import raw_tf_record +from tfx_bsl.beam import run_inference_arrow +from tfx_bsl.public.proto import model_spec_pb2 +from apache_beam.options.pipeline_options import PipelineOptions +from apache_beam.options.pipeline_options import SetupOptions + + +def run(argv=None, save_main_session=True): + """Main entry point; defines and runs the user_score pipeline.""" + parser = argparse.ArgumentParser() + + parser.add_argument( + 'model_path', + type=str, + help='The path to input model') + parser.add_argument( + 'input', + type=str, + help='Path to the data file(s) containing data.') + parser.add_argument( + '--output', + type=str, + required=True, + help='Path to the output file(s).') + + args, pipeline_args = parser.parse_known_args(argv) + options = PipelineOptions(pipeline_args) + + setup_options = options.view_as(SetupOptions) + # Path of the wheel file tfx-bsl + setup_options.extra_packages = ['./tfx-bsl/dist/tfx_bsl-0.23.0.dev0-cp37-cp37m-linux_x86_64.whl'] + setup_options.save_main_session = save_main_session + + def get_saved_model_spec(model_path): + '''returns an InferenceSpecType object for a saved model path''' + return model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path)) + + inference_spec_type = get_saved_model_spec(args.model_path) + converter = raw_tf_record.RawTfRecordTFXIO( + args.input, raw_record_column_name='__RAW_RECORD__') + + with beam.Pipeline(options=options) as p: + (p + | "GetRawRecordAndConvertToRecordBatch" >> converter.BeamSource() + | 'RunInferenceImpl' >> run_inference_arrow.RunInferenceImpl( + inference_spec_type)) + + +if __name__ == '__main__': + run() diff --git a/tfx_bsl/beam/benchmarks/run_inference_benchmark.py b/tfx_bsl/beam/benchmarks/run_inference_benchmark.py new file mode 100644 index 00000000..d42ab62d --- /dev/null +++ b/tfx_bsl/beam/benchmarks/run_inference_benchmark.py @@ -0,0 +1,84 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Script to use run_inference from command line +Below is a complete command line for running this script +for benchmarks + +python3 run_inference_benchemark.py \ +PATH_TO_MODEL \ +PATH_TO_DATA \ +--output gs://YOUR_BUCKET/results/output \ +--project YOUR_PROJECT \ +--runner DataflowRunner \ +--temp_location gs://YOUR_BUCKET/temp \ +--job_name run-inference-metrics \ +--region us-central1 +""" + + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import apache_beam as beam +from tfx_bsl.beam import run_inference +from tfx_bsl.public.proto import model_spec_pb2 +from apache_beam.options.pipeline_options import PipelineOptions +from apache_beam.options.pipeline_options import SetupOptions + + +def run(argv=None, save_main_session=True): + """Main entry point; defines and runs the user_score pipeline.""" + parser = argparse.ArgumentParser() + + parser.add_argument( + 'model_path', + type=str, + help='The path to input model') + parser.add_argument( + 'input', + type=str, + help='Path to the data file(s) containing game data.') + parser.add_argument( + '--output', + type=str, + required=True, + help='Path to the output file(s).') + + args, pipeline_args = parser.parse_known_args(argv) + options = PipelineOptions(pipeline_args) + + setup_options = options.view_as(SetupOptions) + # Path of the wheel file tfx-bsl + setup_options.extra_packages = ['./tfx-bsl/dist/tfx_bsl-0.23.0.dev0-cp37-cp37m-linux_x86_64.whl'] + setup_options.save_main_session = save_main_session + + def get_saved_model_spec(model_path): + '''returns an InferenceSpecType object for a saved model path''' + return model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path)) + + inference_spec_type = get_saved_model_spec(args.model_path) + with beam.Pipeline(options=options) as p: + (p + | 'ReadInputText' >> beam.io.ReadFromText(args.input) + | 'RunInferenceImpl' >> run_inference.RunInferenceImpl( + inference_spec_type)) + + +if __name__ == '__main__': + run() From 265dd2e6831366497346f97b0726944757e57278 Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Thu, 23 Jul 2020 18:16:10 -0400 Subject: [PATCH 4/8] remove outdated command line entry script --- .../run_inference_arrow_benchmark.py | 87 ------------------- .../benchmarks/run_inference_benchmark.py | 11 ++- 2 files changed, 7 insertions(+), 91 deletions(-) delete mode 100644 tfx_bsl/beam/benchmarks/run_inference_arrow_benchmark.py diff --git a/tfx_bsl/beam/benchmarks/run_inference_arrow_benchmark.py b/tfx_bsl/beam/benchmarks/run_inference_arrow_benchmark.py deleted file mode 100644 index 3e57944e..00000000 --- a/tfx_bsl/beam/benchmarks/run_inference_arrow_benchmark.py +++ /dev/null @@ -1,87 +0,0 @@ -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Script to use run_inference_arrow from command line -Below is a complete command line for running this script -for benchmarks - -python3 run_inference_arrow_benchemark.py \ -PATH_TO_MODEL \ -PATH_TO_DATA \ ---output gs://YOUR_BUCKET/results/output \ ---project YOUR_PROJECT \ ---runner DataflowRunner \ ---temp_location gs://YOUR_BUCKET/temp \ ---job_name run-inference-arrow-metrics \ ---region us-central1 -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -import apache_beam as beam -from tfx_bsl.tfxio import raw_tf_record -from tfx_bsl.beam import run_inference_arrow -from tfx_bsl.public.proto import model_spec_pb2 -from apache_beam.options.pipeline_options import PipelineOptions -from apache_beam.options.pipeline_options import SetupOptions - - -def run(argv=None, save_main_session=True): - """Main entry point; defines and runs the user_score pipeline.""" - parser = argparse.ArgumentParser() - - parser.add_argument( - 'model_path', - type=str, - help='The path to input model') - parser.add_argument( - 'input', - type=str, - help='Path to the data file(s) containing data.') - parser.add_argument( - '--output', - type=str, - required=True, - help='Path to the output file(s).') - - args, pipeline_args = parser.parse_known_args(argv) - options = PipelineOptions(pipeline_args) - - setup_options = options.view_as(SetupOptions) - # Path of the wheel file tfx-bsl - setup_options.extra_packages = ['./tfx-bsl/dist/tfx_bsl-0.23.0.dev0-cp37-cp37m-linux_x86_64.whl'] - setup_options.save_main_session = save_main_session - - def get_saved_model_spec(model_path): - '''returns an InferenceSpecType object for a saved model path''' - return model_spec_pb2.InferenceSpecType( - saved_model_spec=model_spec_pb2.SavedModelSpec( - model_path=model_path)) - - inference_spec_type = get_saved_model_spec(args.model_path) - converter = raw_tf_record.RawTfRecordTFXIO( - args.input, raw_record_column_name='__RAW_RECORD__') - - with beam.Pipeline(options=options) as p: - (p - | "GetRawRecordAndConvertToRecordBatch" >> converter.BeamSource() - | 'RunInferenceImpl' >> run_inference_arrow.RunInferenceImpl( - inference_spec_type)) - - -if __name__ == '__main__': - run() diff --git a/tfx_bsl/beam/benchmarks/run_inference_benchmark.py b/tfx_bsl/beam/benchmarks/run_inference_benchmark.py index d42ab62d..133532a0 100644 --- a/tfx_bsl/beam/benchmarks/run_inference_benchmark.py +++ b/tfx_bsl/beam/benchmarks/run_inference_benchmark.py @@ -1,4 +1,4 @@ -# Copyright 2019 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -27,13 +27,13 @@ --region us-central1 """ - from __future__ import absolute_import from __future__ import division from __future__ import print_function import argparse import apache_beam as beam +from tfx_bsl.tfxio import raw_tf_record from tfx_bsl.beam import run_inference from tfx_bsl.public.proto import model_spec_pb2 from apache_beam.options.pipeline_options import PipelineOptions @@ -51,7 +51,7 @@ def run(argv=None, save_main_session=True): parser.add_argument( 'input', type=str, - help='Path to the data file(s) containing game data.') + help='Path to the data file(s) containing data.') parser.add_argument( '--output', type=str, @@ -73,9 +73,12 @@ def get_saved_model_spec(model_path): model_path=model_path)) inference_spec_type = get_saved_model_spec(args.model_path) + converter = raw_tf_record.RawTfRecordTFXIO( + args.input, raw_record_column_name='__RAW_RECORD__') + with beam.Pipeline(options=options) as p: (p - | 'ReadInputText' >> beam.io.ReadFromText(args.input) + | "GetRawRecordAndConvertToRecordBatch" >> converter.BeamSource() | 'RunInferenceImpl' >> run_inference.RunInferenceImpl( inference_spec_type)) From 9067ef5265c6524ed7a2707e7156758f2283f826 Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Thu, 23 Jul 2020 18:16:37 -0400 Subject: [PATCH 5/8] Delete run_inference_arrow_test.py --- tfx_bsl/beam/run_inference_arrow_test.py | 577 ----------------------- 1 file changed, 577 deletions(-) delete mode 100644 tfx_bsl/beam/run_inference_arrow_test.py diff --git a/tfx_bsl/beam/run_inference_arrow_test.py b/tfx_bsl/beam/run_inference_arrow_test.py deleted file mode 100644 index ce9ac4d0..00000000 --- a/tfx_bsl/beam/run_inference_arrow_test.py +++ /dev/null @@ -1,577 +0,0 @@ -# Copyright 2019 Google LLC. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Tests for tfx_bsl.run_inference.""" - -from __future__ import absolute_import -from __future__ import division -# Standard __future__ imports -from __future__ import print_function - -import json -import os -try: - import unittest.mock as mock -except ImportError: - import mock - -import apache_beam as beam -from apache_beam.metrics.metric import MetricsFilter -from apache_beam.testing.util import assert_that -from apache_beam.testing.util import equal_to -from googleapiclient import discovery -from googleapiclient import http -from six.moves import http_client -import tensorflow as tf -from tfx_bsl.beam import run_inference -from tfx_bsl.public.proto import model_spec_pb2 - -from google.protobuf import text_format - -from tensorflow_serving.apis import prediction_log_pb2 - - -class RunInferenceFixture(tf.test.TestCase): - - def setUp(self): - super(RunInferenceFixture, self).setUp() - self._predict_examples = [ - text_format.Parse( - """ - features { - feature { key: "input1" value { float_list { value: 0 }}} - } - """, tf.train.Example()), - ] - - def _get_output_data_dir(self, sub_dir=None): - test_dir = self._testMethodName - path = os.path.join( - os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), - test_dir) - if not tf.io.gfile.exists(path): - tf.io.gfile.makedirs(path) - if sub_dir is not None: - path = os.path.join(path, sub_dir) - return path - - def _prepare_predict_examples(self, example_path): - with tf.io.TFRecordWriter(example_path) as output_file: - for example in self._predict_examples: - output_file.write(example.SerializeToString()) - - -class RunOfflineInferenceTest(RunInferenceFixture): - - def setUp(self): - super(RunOfflineInferenceTest, self).setUp() - self._predict_examples = [ - text_format.Parse( - """ - features { - feature { key: "input1" value { float_list { value: 0 }}} - } - """, tf.train.Example()), - text_format.Parse( - """ - features { - feature { key: "input1" value { float_list { value: 1 }}} - } - """, tf.train.Example()), - ] - self._multihead_examples = [ - text_format.Parse( - """ - features { - feature {key: "x" value { float_list { value: 0.8 }}} - feature {key: "y" value { float_list { value: 0.2 }}} - } - """, tf.train.Example()), - text_format.Parse( - """ - features { - feature {key: "x" value { float_list { value: 0.6 }}} - feature {key: "y" value { float_list { value: 0.1 }}} - } - """, tf.train.Example()), - ] - - - def _prepare_multihead_examples(self, example_path): - with tf.io.TFRecordWriter(example_path) as output_file: - for example in self._multihead_examples: - output_file.write(example.SerializeToString()) - - def _build_predict_model(self, model_path): - """Exports the dummy sum predict model.""" - - with tf.compat.v1.Graph().as_default(): - input_tensors = { - 'x': tf.compat.v1.io.FixedLenFeature( - [1], dtype=tf.float32, default_value=0) - } - serving_receiver = ( - tf.compat.v1.estimator.export.build_parsing_serving_input_receiver_fn( - input_tensors)()) - output_tensors = {'y': serving_receiver.features['x'] * 2} - sess = tf.compat.v1.Session() - sess.run(tf.compat.v1.initializers.global_variables()) - signature_def = tf.compat.v1.estimator.export.PredictOutput( - output_tensors).as_signature_def(serving_receiver.receiver_tensors) - builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(model_path) - builder.add_meta_graph_and_variables( - sess, [tf.compat.v1.saved_model.tag_constants.SERVING], - signature_def_map={ - tf.compat.v1.saved_model.signature_constants - .DEFAULT_SERVING_SIGNATURE_DEF_KEY: - signature_def, - }) - builder.save() - - def _build_regression_signature(self, input_tensor, output_tensor): - """Helper function for building a regression SignatureDef.""" - input_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info( - input_tensor) - signature_inputs = { - tf.compat.v1.saved_model.signature_constants.REGRESS_INPUTS: - input_tensor_info - } - output_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info( - output_tensor) - signature_outputs = { - tf.compat.v1.saved_model.signature_constants.REGRESS_OUTPUTS: - output_tensor_info - } - return tf.compat.v1.saved_model.signature_def_utils.build_signature_def( - signature_inputs, signature_outputs, - tf.compat.v1.saved_model.signature_constants.REGRESS_METHOD_NAME) - - def _build_classification_signature(self, input_tensor, scores_tensor): - """Helper function for building a classification SignatureDef.""" - input_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info( - input_tensor) - signature_inputs = { - tf.compat.v1.saved_model.signature_constants.CLASSIFY_INPUTS: - input_tensor_info - } - output_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info( - scores_tensor) - signature_outputs = { - tf.compat.v1.saved_model.signature_constants.CLASSIFY_OUTPUT_SCORES: - output_tensor_info - } - return tf.compat.v1.saved_model.signature_def_utils.build_signature_def( - signature_inputs, signature_outputs, - tf.compat.v1.saved_model.signature_constants.CLASSIFY_METHOD_NAME) - - def _build_multihead_model(self, model_path): - with tf.compat.v1.Graph().as_default(): - input_example = tf.compat.v1.placeholder( - tf.string, name='input_examples_tensor') - config = { - 'x': tf.compat.v1.io.FixedLenFeature( - [1], dtype=tf.float32, default_value=0), - 'y': tf.compat.v1.io.FixedLenFeature( - [1], dtype=tf.float32, default_value=0), - } - features = tf.compat.v1.parse_example(input_example, config) - x = features['x'] - y = features['y'] - sum_pred = x + y - diff_pred = tf.abs(x - y) - sess = tf.compat.v1.Session() - sess.run(tf.compat.v1.initializers.global_variables()) - signature_def_map = { - 'regress_diff': - self._build_regression_signature(input_example, diff_pred), - 'classify_sum': - self._build_classification_signature(input_example, sum_pred), - tf.compat.v1.saved_model.signature_constants - .DEFAULT_SERVING_SIGNATURE_DEF_KEY: - self._build_regression_signature(input_example, sum_pred) - } - builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(model_path) - builder.add_meta_graph_and_variables( - sess, [tf.compat.v1.saved_model.tag_constants.SERVING], - signature_def_map=signature_def_map) - builder.save() - - def _run_inference_with_beam(self, example_path, inference_spec_type, - prediction_log_path): - with beam.Pipeline() as pipeline: - _ = ( - pipeline - | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) - | 'ParseExamples' >> beam.Map(tf.train.Example.FromString) - | - 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type) - | 'WritePredictions' >> beam.io.WriteToTFRecord( - prediction_log_path, - coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog))) - - def _get_results(self, prediction_log_path): - results = [] - for f in tf.io.gfile.glob(prediction_log_path + '-?????-of-?????'): - record_iterator = tf.compat.v1.io.tf_record_iterator(path=f) - for record_string in record_iterator: - prediction_log = prediction_log_pb2.PredictionLog() - prediction_log.MergeFromString(record_string) - results.append(prediction_log) - return results - - def testModelPathInvalid(self): - example_path = self._get_output_data_dir('examples') - self._prepare_predict_examples(example_path) - prediction_log_path = self._get_output_data_dir('predictions') - with self.assertRaisesRegexp(IOError, 'SavedModel file does not exist.*'): - self._run_inference_with_beam( - example_path, - model_spec_pb2.InferenceSpecType( - saved_model_spec=model_spec_pb2.SavedModelSpec( - model_path=self._get_output_data_dir())), prediction_log_path) - - def testEstimatorModelPredict(self): - example_path = self._get_output_data_dir('examples') - self._prepare_predict_examples(example_path) - model_path = self._get_output_data_dir('model') - self._build_predict_model(model_path) - prediction_log_path = self._get_output_data_dir('predictions') - self._run_inference_with_beam( - example_path, - model_spec_pb2.InferenceSpecType( - saved_model_spec=model_spec_pb2.SavedModelSpec( - model_path=model_path)), prediction_log_path) - - results = self._get_results(prediction_log_path) - self.assertLen(results, 2) - self.assertEqual( - results[0].predict_log.request.inputs[ - run_inference._DEFAULT_INPUT_KEY].string_val[0], - self._predict_examples[0].SerializeToString()) - self.assertEqual(results[0].predict_log.response.outputs['y'].dtype, - tf.float32) - self.assertLen( - results[0].predict_log.response.outputs['y'].tensor_shape.dim, 2) - self.assertEqual( - results[0].predict_log.response.outputs['y'].tensor_shape.dim[0].size, - 1) - self.assertEqual( - results[0].predict_log.response.outputs['y'].tensor_shape.dim[1].size, - 1) - - def testClassifyModel(self): - example_path = self._get_output_data_dir('examples') - self._prepare_multihead_examples(example_path) - model_path = self._get_output_data_dir('model') - self._build_multihead_model(model_path) - prediction_log_path = self._get_output_data_dir('predictions') - self._run_inference_with_beam( - example_path, - model_spec_pb2.InferenceSpecType( - saved_model_spec=model_spec_pb2.SavedModelSpec( - model_path=model_path, signature_name=['classify_sum'])), - prediction_log_path) - - results = self._get_results(prediction_log_path) - self.assertLen(results, 2) - classify_log = results[0].classify_log - self.assertLen(classify_log.request.input.example_list.examples, 1) - self.assertEqual(classify_log.request.input.example_list.examples[0], - self._multihead_examples[0]) - self.assertLen(classify_log.response.result.classifications, 1) - self.assertLen(classify_log.response.result.classifications[0].classes, 1) - self.assertAlmostEqual( - classify_log.response.result.classifications[0].classes[0].score, 1.0) - - def testRegressModel(self): - example_path = self._get_output_data_dir('examples') - self._prepare_multihead_examples(example_path) - model_path = self._get_output_data_dir('model') - self._build_multihead_model(model_path) - prediction_log_path = self._get_output_data_dir('predictions') - self._run_inference_with_beam( - example_path, - model_spec_pb2.InferenceSpecType( - saved_model_spec=model_spec_pb2.SavedModelSpec( - model_path=model_path, signature_name=['regress_diff'])), - prediction_log_path) - - results = self._get_results(prediction_log_path) - self.assertLen(results, 2) - regress_log = results[0].regress_log - self.assertLen(regress_log.request.input.example_list.examples, 1) - self.assertEqual(regress_log.request.input.example_list.examples[0], - self._multihead_examples[0]) - self.assertLen(regress_log.response.result.regressions, 1) - self.assertAlmostEqual(regress_log.response.result.regressions[0].value, - 0.6) - - def testMultiInferenceModel(self): - example_path = self._get_output_data_dir('examples') - self._prepare_multihead_examples(example_path) - model_path = self._get_output_data_dir('model') - self._build_multihead_model(model_path) - prediction_log_path = self._get_output_data_dir('predictions') - self._run_inference_with_beam( - example_path, - model_spec_pb2.InferenceSpecType( - saved_model_spec=model_spec_pb2.SavedModelSpec( - model_path=model_path, - signature_name=['regress_diff', 'classify_sum'])), - prediction_log_path) - results = self._get_results(prediction_log_path) - self.assertLen(results, 2) - multi_inference_log = results[0].multi_inference_log - self.assertLen(multi_inference_log.request.input.example_list.examples, 1) - self.assertEqual(multi_inference_log.request.input.example_list.examples[0], - self._multihead_examples[0]) - self.assertLen(multi_inference_log.response.results, 2) - signature_names = [] - for result in multi_inference_log.response.results: - signature_names.append(result.model_spec.signature_name) - self.assertIn('regress_diff', signature_names) - self.assertIn('classify_sum', signature_names) - result = multi_inference_log.response.results[0] - self.assertEqual(result.model_spec.signature_name, 'regress_diff') - self.assertLen(result.regression_result.regressions, 1) - self.assertAlmostEqual(result.regression_result.regressions[0].value, 0.6) - result = multi_inference_log.response.results[1] - self.assertEqual(result.model_spec.signature_name, 'classify_sum') - self.assertLen(result.classification_result.classifications, 1) - self.assertLen(result.classification_result.classifications[0].classes, 1) - self.assertAlmostEqual( - result.classification_result.classifications[0].classes[0].score, 1.0) - - def testKerasModelPredict(self): - inputs = tf.keras.Input(shape=(1,), name='input1') - output1 = tf.keras.layers.Dense( - 1, activation=tf.nn.sigmoid, name='output1')( - inputs) - output2 = tf.keras.layers.Dense( - 1, activation=tf.nn.sigmoid, name='output2')( - inputs) - inference_model = tf.keras.models.Model(inputs, [output1, output2]) - - class TestKerasModel(tf.keras.Model): - - def __init__(self, inference_model): - super(TestKerasModel, self).__init__(name='test_keras_model') - self.inference_model = inference_model - - @tf.function(input_signature=[ - tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs') - ]) - def call(self, serialized_example): - features = { - 'input1': - tf.compat.v1.io.FixedLenFeature([1], - dtype=tf.float32, - default_value=0) - } - input_tensor_dict = tf.io.parse_example(serialized_example, features) - return inference_model(input_tensor_dict['input1']) - - model = TestKerasModel(inference_model) - model.compile( - optimizer=tf.keras.optimizers.Adam(lr=.001), - loss=tf.keras.losses.binary_crossentropy, - metrics=['accuracy']) - - model_path = self._get_output_data_dir('model') - tf.compat.v1.keras.experimental.export_saved_model( - model, model_path, serving_only=True) - - example_path = self._get_output_data_dir('examples') - self._prepare_predict_examples(example_path) - prediction_log_path = self._get_output_data_dir('predictions') - self._run_inference_with_beam( - example_path, - model_spec_pb2.InferenceSpecType( - saved_model_spec=model_spec_pb2.SavedModelSpec( - model_path=model_path)), prediction_log_path) - - results = self._get_results(prediction_log_path) - self.assertLen(results, 2) - - def testTelemetry(self): - example_path = self._get_output_data_dir('examples') - self._prepare_multihead_examples(example_path) - model_path = self._get_output_data_dir('model') - self._build_multihead_model(model_path) - inference_spec_type = model_spec_pb2.InferenceSpecType( - saved_model_spec=model_spec_pb2.SavedModelSpec( - model_path=model_path, signature_name=['classify_sum'])) - pipeline = beam.Pipeline() - _ = ( - pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) - | 'ParseExamples' >> beam.Map(tf.train.Example.FromString) - | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type)) - run_result = pipeline.run() - run_result.wait_until_finish() - - num_inferences = run_result.metrics().query( - MetricsFilter().with_name('num_inferences')) - self.assertTrue(num_inferences['counters']) - self.assertEqual(num_inferences['counters'][0].result, 2) - num_instances = run_result.metrics().query( - MetricsFilter().with_name('num_instances')) - self.assertTrue(num_instances['counters']) - self.assertEqual(num_instances['counters'][0].result, 2) - inference_request_batch_size = run_result.metrics().query( - MetricsFilter().with_name('inference_request_batch_size')) - self.assertTrue(inference_request_batch_size['distributions']) - self.assertEqual( - inference_request_batch_size['distributions'][0].result.sum, 2) - inference_request_batch_byte_size = run_result.metrics().query( - MetricsFilter().with_name('inference_request_batch_byte_size')) - self.assertTrue(inference_request_batch_byte_size['distributions']) - self.assertEqual( - inference_request_batch_byte_size['distributions'][0].result.sum, - sum(element.ByteSize() for element in self._multihead_examples)) - inference_batch_latency_micro_secs = run_result.metrics().query( - MetricsFilter().with_name('inference_batch_latency_micro_secs')) - self.assertTrue(inference_batch_latency_micro_secs['distributions']) - self.assertGreaterEqual( - inference_batch_latency_micro_secs['distributions'][0].result.sum, 0) - load_model_latency_milli_secs = run_result.metrics().query( - MetricsFilter().with_name('load_model_latency_milli_secs')) - self.assertTrue(load_model_latency_milli_secs['distributions']) - self.assertGreaterEqual( - load_model_latency_milli_secs['distributions'][0].result.sum, 0) - - -class RunRemoteInferenceTest(RunInferenceFixture): - - def setUp(self): - super(RunRemoteInferenceTest, self).setUp() - self.example_path = self._get_output_data_dir('example') - self._prepare_predict_examples(self.example_path) - # This is from https://ml.googleapis.com/$discovery/rest?version=v1. - self._discovery_testdata_dir = os.path.join( - os.path.join(os.path.dirname(__file__), 'testdata'), - 'ml_discovery.json') - - @staticmethod - def _make_response_body(content, successful): - if successful: - response_dict = {'predictions': content} - else: - response_dict = {'error': content} - return json.dumps(response_dict) - - def _set_up_pipeline(self, inference_spec_type): - self.pipeline = beam.Pipeline() - self.pcoll = ( - self.pipeline - | 'ReadExamples' >> beam.io.ReadFromTFRecord(self.example_path) - | 'ParseExamples' >> beam.Map(tf.train.Example.FromString) - | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type)) - - def _run_inference_with_beam(self): - self.pipeline_result = self.pipeline.run() - self.pipeline_result.wait_until_finish() - - def test_model_predict(self): - predictions = [{'output_1': [0.901], 'output_2': [0.997]}] - builder = http.RequestMockBuilder({ - 'ml.projects.predict': - (None, self._make_response_body(predictions, successful=True)) - }) - resource = discovery.build( - 'ml', - 'v1', - http=http.HttpMock(self._discovery_testdata_dir, - {'status': http_client.OK}), - requestBuilder=builder) - with mock.patch('googleapiclient.discovery.' 'build') as response_mock: - response_mock.side_effect = lambda service, version: resource - inference_spec_type = model_spec_pb2.InferenceSpecType( - ai_platform_prediction_model_spec=model_spec_pb2 - .AIPlatformPredictionModelSpec( - project_id='test-project', - model_name='test-model', - )) - - prediction_log = prediction_log_pb2.PredictionLog() - prediction_log.predict_log.response.outputs['output_1'].CopyFrom( - tf.make_tensor_proto(values=[0.901], dtype=tf.double, shape=(1, 1))) - prediction_log.predict_log.response.outputs['output_2'].CopyFrom( - tf.make_tensor_proto(values=[0.997], dtype=tf.double, shape=(1, 1))) - - self._set_up_pipeline(inference_spec_type) - assert_that(self.pcoll, equal_to([prediction_log])) - self._run_inference_with_beam() - - def test_exception_raised_when_response_body_contains_error_entry(self): - error_msg = 'Base64 decode failed.' - builder = http.RequestMockBuilder({ - 'ml.projects.predict': - (None, self._make_response_body(error_msg, successful=False)) - }) - resource = discovery.build( - 'ml', - 'v1', - http=http.HttpMock(self._discovery_testdata_dir, - {'status': http_client.OK}), - requestBuilder=builder) - with mock.patch('googleapiclient.discovery.' 'build') as response_mock: - response_mock.side_effect = lambda service, version: resource - inference_spec_type = model_spec_pb2.InferenceSpecType( - ai_platform_prediction_model_spec=model_spec_pb2 - .AIPlatformPredictionModelSpec( - project_id='test-project', - model_name='test-model', - )) - - try: - self._set_up_pipeline(inference_spec_type) - self._run_inference_with_beam() - except ValueError as exc: - actual_error_msg = str(exc) - self.assertTrue(actual_error_msg.startswith(error_msg)) - else: - self.fail('Test was expected to throw ValueError exception') - - def test_exception_raised_when_project_id_is_empty(self): - inference_spec_type = model_spec_pb2.InferenceSpecType( - ai_platform_prediction_model_spec=model_spec_pb2 - .AIPlatformPredictionModelSpec(model_name='test-model',)) - - with self.assertRaises(ValueError): - self._set_up_pipeline(inference_spec_type) - self._run_inference_with_beam() - - def test_request_body_with_binary_data(self): - example = text_format.Parse( - """ - features { - feature { key: "x_bytes" value { bytes_list { value: ["ASa8asdf"] }}} - feature { key: "x" value { bytes_list { value: "JLK7ljk3" }}} - feature { key: "y" value { int64_list { value: [1, 2] }}} - } - """, tf.train.Example()) - result = list( - run_inference._RemotePredictDoFn._prepare_instances([example])) - self.assertEqual([ - { - 'x_bytes': { - 'b64': 'QVNhOGFzZGY=' - }, - 'x': 'JLK7ljk3', - 'y': [1, 2] - }, - ], result) - - -if __name__ == '__main__': - tf.test.main() From 60fb92fe66ebe756d193ec3d0f91db089e0fc0cb Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Thu, 23 Jul 2020 18:16:49 -0400 Subject: [PATCH 6/8] Delete run_inference_arrow.py --- tfx_bsl/beam/run_inference_arrow.py | 1178 --------------------------- 1 file changed, 1178 deletions(-) delete mode 100644 tfx_bsl/beam/run_inference_arrow.py diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py deleted file mode 100644 index 98e45148..00000000 --- a/tfx_bsl/beam/run_inference_arrow.py +++ /dev/null @@ -1,1178 +0,0 @@ -# Copyright 2019 Google LLC. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Run batch inference on saved model.""" - -from __future__ import absolute_import -from __future__ import division -# Standard __future__ imports -from __future__ import print_function - -import abc -import base64 -import collections -import os -import platform -import sys -import time -try: - import resource -except ImportError: - resource = None - -from absl import logging -import apache_beam as beam -from apache_beam.options.pipeline_options import GoogleCloudOptions -from apache_beam.options.pipeline_options import PipelineOptions -from apache_beam.utils import retry -import googleapiclient -from googleapiclient import discovery -from googleapiclient import http -import numpy as np -import six -import tensorflow as tf -from tfx_bsl.beam import shared -from tfx_bsl.public.proto import model_spec_pb2 -from tfx_bsl.telemetry import util -from typing import Any, Generator, Iterable, List, Mapping, Sequence, Text, \ - Tuple, Union - -# TODO(b/140306674): stop using the internal TF API. -from tensorflow.python.saved_model import loader_impl -from tensorflow_serving.apis import classification_pb2 -from tensorflow_serving.apis import inference_pb2 -from tensorflow_serving.apis import prediction_log_pb2 -from tensorflow_serving.apis import regression_pb2 - - -# TODO(b/131873699): Remove once 1.x support is dropped. -# pylint: disable=g-import-not-at-top -try: - # We need to import this in order to register all quantiles ops, even though - # it's not directly used. - from tensorflow.contrib.boosted_trees.python.ops import quantile_ops as _ # pylint: disable=unused-import -except ImportError: - pass - -_DEFAULT_INPUT_KEY = 'examples' -_METRICS_DESCRIPTOR_INFERENCE = 'BulkInferrer' -_METRICS_DESCRIPTOR_IN_PROCESS = 'InProcess' -_METRICS_DESCRIPTOR_CLOUD_AI_PREDICTION = 'CloudAIPlatformPrediction' -_MILLISECOND_TO_MICROSECOND = 1000 -_MICROSECOND_TO_NANOSECOND = 1000 -_SECOND_TO_MICROSECOND = 1000000 -_REMOTE_INFERENCE_NUM_RETRIES = 5 - -# We define the following aliases of Any because the actual types are not -# public. -_SignatureDef = Any -_MetaGraphDef = Any -_SavedModel = Any - -_BulkInferResult = Union[prediction_log_pb2.PredictLog, - Tuple[tf.train.Example, regression_pb2.Regression], - Tuple[tf.train.Example, - inference_pb2.MultiInferenceResponse], - Tuple[tf.train.Example, - classification_pb2.Classifications]] - - -# TODO(b/151468119): Converts this into enum once we stop supporting Python 2.7 -class OperationType(object): - CLASSIFICATION = 'CLASSIFICATION' - REGRESSION = 'REGRESSION' - PREDICTION = 'PREDICTION' - MULTIHEAD = 'MULTIHEAD' - - -@beam.ptransform_fn -@beam.typehints.with_input_types(Union[tf.train.Example, - tf.train.SequenceExample]) -@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) -def RunInferenceImpl( # pylint: disable=invalid-name - examples: beam.pvalue.PCollection, - inference_spec_type: model_spec_pb2.InferenceSpecType -) -> beam.pvalue.PCollection: - """Implementation of RunInference API. - - Args: - examples: A PCollection containing examples. - inference_spec_type: Model inference endpoint. - - Returns: - A PCollection containing prediction logs. - - Raises: - ValueError; when operation is not supported. - """ - logging.info('RunInference on model: %s', inference_spec_type) - - batched_examples = examples | 'BatchExamples' >> beam.BatchElements() - operation_type = _get_operation_type(inference_spec_type) - if operation_type == OperationType.CLASSIFICATION: - return batched_examples | 'Classify' >> _Classify(inference_spec_type) - elif operation_type == OperationType.REGRESSION: - return batched_examples | 'Regress' >> _Regress(inference_spec_type) - elif operation_type == OperationType.PREDICTION: - return batched_examples | 'Predict' >> _Predict(inference_spec_type) - elif operation_type == OperationType.MULTIHEAD: - return (batched_examples - | 'MultiInference' >> _MultiInference(inference_spec_type)) - else: - raise ValueError('Unsupported operation_type %s' % operation_type) - - -_IOTensorSpec = collections.namedtuple( - '_IOTensorSpec', - ['input_tensor_alias', 'input_tensor_name', 'output_alias_tensor_names']) - -_Signature = collections.namedtuple('_Signature', ['name', 'signature_def']) - - -@beam.ptransform_fn -@beam.typehints.with_input_types(Union[tf.train.Example, - tf.train.SequenceExample]) -@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) -def _Classify(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name - inference_spec_type: model_spec_pb2.InferenceSpecType): - """Performs classify PTransform.""" - if _using_in_process_inference(inference_spec_type): - return (pcoll - | 'Classify' >> beam.ParDo( - _BatchClassifyDoFn(inference_spec_type, shared.Shared())) - | 'BuildPredictionLogForClassifications' >> beam.ParDo( - _BuildPredictionLogForClassificationsDoFn())) - else: - raise NotImplementedError - - -@beam.ptransform_fn -@beam.typehints.with_input_types(Union[tf.train.Example, - tf.train.SequenceExample]) -@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) -def _Regress(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name - inference_spec_type: model_spec_pb2.InferenceSpecType): - """Performs regress PTransform.""" - if _using_in_process_inference(inference_spec_type): - return (pcoll - | 'Regress' >> beam.ParDo( - _BatchRegressDoFn(inference_spec_type, shared.Shared())) - | 'BuildPredictionLogForRegressions' >> beam.ParDo( - _BuildPredictionLogForRegressionsDoFn())) - else: - raise NotImplementedError - - -@beam.ptransform_fn -@beam.typehints.with_input_types(Union[tf.train.Example, - tf.train.SequenceExample]) -@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) -def _Predict(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name - inference_spec_type: model_spec_pb2.InferenceSpecType): - """Performs predict PTransform.""" - if _using_in_process_inference(inference_spec_type): - predictions = ( - pcoll - | 'Predict' >> beam.ParDo( - _BatchPredictDoFn(inference_spec_type, shared.Shared()))) - else: - predictions = ( - pcoll - | 'RemotePredict' >> beam.ParDo( - _RemotePredictDoFn(inference_spec_type, pcoll.pipeline.options))) - return (predictions - | 'BuildPredictionLogForPredictions' >> beam.ParDo( - _BuildPredictionLogForPredictionsDoFn())) - - -@beam.ptransform_fn -@beam.typehints.with_input_types(Union[tf.train.Example, - tf.train.SequenceExample]) -@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) -def _MultiInference(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name - inference_spec_type: model_spec_pb2.InferenceSpecType): - """Performs multi inference PTransform.""" - if _using_in_process_inference(inference_spec_type): - return ( - pcoll - | 'MultiInference' >> beam.ParDo( - _BatchMultiInferenceDoFn(inference_spec_type, shared.Shared())) - | 'BuildMultiInferenceLog' >> beam.ParDo(_BuildMultiInferenceLogDoFn())) - else: - raise NotImplementedError - - -@six.add_metaclass(abc.ABCMeta) -class _BaseDoFn(beam.DoFn): - """Base DoFn that performs bulk inference.""" - - class _MetricsCollector(object): - """A collector for beam metrics.""" - - def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType): - operation_type = _get_operation_type(inference_spec_type) - proximity_descriptor = ( - _METRICS_DESCRIPTOR_IN_PROCESS - if _using_in_process_inference(inference_spec_type) else - _METRICS_DESCRIPTOR_CLOUD_AI_PREDICTION) - namespace = util.MakeTfxNamespace( - [_METRICS_DESCRIPTOR_INFERENCE, operation_type, proximity_descriptor]) - - # Metrics - self._inference_counter = beam.metrics.Metrics.counter( - namespace, 'num_inferences') - self._num_instances = beam.metrics.Metrics.counter( - namespace, 'num_instances') - self._inference_request_batch_size = beam.metrics.Metrics.distribution( - namespace, 'inference_request_batch_size') - self._inference_request_batch_byte_size = ( - beam.metrics.Metrics.distribution( - namespace, 'inference_request_batch_byte_size')) - # Batch inference latency in microseconds. - self._inference_batch_latency_micro_secs = ( - beam.metrics.Metrics.distribution( - namespace, 'inference_batch_latency_micro_secs')) - self._model_byte_size = beam.metrics.Metrics.distribution( - namespace, 'model_byte_size') - # Model load latency in milliseconds. - self._load_model_latency_milli_secs = beam.metrics.Metrics.distribution( - namespace, 'load_model_latency_milli_secs') - - # Metrics cache - self.load_model_latency_milli_secs_cache = None - self.model_byte_size_cache = None - - def update_metrics_with_cache(self): - if self.load_model_latency_milli_secs_cache is not None: - self._load_model_latency_milli_secs.update( - self.load_model_latency_milli_secs_cache) - self.load_model_latency_milli_secs_cache = None - if self.model_byte_size_cache is not None: - self._model_byte_size.update(self.model_byte_size_cache) - self.model_byte_size_cache = None - - def update(self, elements: List[Union[tf.train.Example, - tf.train.SequenceExample]], - latency_micro_secs: int) -> None: - self._inference_batch_latency_micro_secs.update(latency_micro_secs) - self._num_instances.inc(len(elements)) - self._inference_counter.inc(len(elements)) - self._inference_request_batch_size.update(len(elements)) - self._inference_request_batch_byte_size.update( - sum(element.ByteSize() for element in elements)) - - def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType): - super(_BaseDoFn, self).__init__() - self._clock = None - self._metrics_collector = self._MetricsCollector(inference_spec_type) - - def setup(self): - self._clock = _ClockFactory.make_clock() - - def process( - self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]] - ) -> Iterable[Any]: - batch_start_time = self._clock.get_current_time_in_microseconds() - outputs = self.run_inference(elements) - result = self._post_process(elements, outputs) - self._metrics_collector.update( - elements, - self._clock.get_current_time_in_microseconds() - batch_start_time) - return result - - def finish_bundle(self): - self._metrics_collector.update_metrics_with_cache() - - @abc.abstractmethod - def run_inference( - self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]] - ) -> Union[Mapping[Text, np.ndarray], Sequence[Mapping[Text, Any]]]: - raise NotImplementedError - - @abc.abstractmethod - def _post_process(self, elements: List[Union[tf.train.Example, - tf.train.SequenceExample]], - outputs: Any) -> Iterable[Any]: - raise NotImplementedError - - -def _retry_on_unavailable_and_resource_error_filter(exception: Exception): - """Retries for HttpError. - - Retries if error is unavailable (503) or resource exhausted (429). - Resource exhausted may happen when qps or bandwidth exceeds quota. - - Args: - exception: Exception from inference http request execution. - Returns: - A boolean of whether retry. - """ - - return (isinstance(exception, googleapiclient.errors.HttpError) and - exception.resp.status in (503, 429)) - - -@beam.typehints.with_input_types(List[Union[tf.train.Example, - tf.train.SequenceExample]]) -# Using output typehints triggers NotImplementedError('BEAM-2717)' on -# streaming mode on Dataflow runner. -# TODO(b/151468119): Consider to re-batch with online serving request size -# limit, and re-batch with RPC failures(InvalidArgument) regarding request size. -# @beam.typehints.with_output_types(prediction_log_pb2.PredictLog) -class _RemotePredictDoFn(_BaseDoFn): - """A DoFn that performs predictions from a cloud-hosted TensorFlow model. - - Supports both batch and streaming processing modes. - NOTE: Does not work on DirectRunner for streaming jobs [BEAM-7885]. - - In order to request predictions, you must deploy your trained model to AI - Platform Prediction in the TensorFlow SavedModel format. See - [Exporting a SavedModel for prediction] - (https://cloud.google.com/ai-platform/prediction/docs/exporting-savedmodel-for-prediction) - for more details. - - To send binary data, you have to make sure that the name of an input ends in - `_bytes`. - - NOTE: The returned `PredictLog` instances do not have `PredictRequest` part - filled. The reason is that it is difficult to determine the input tensor name - without having access to cloud-hosted model's signatures. - """ - - def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType, - pipeline_options: PipelineOptions): - super(_RemotePredictDoFn, self).__init__(inference_spec_type) - self._api_client = None - - project_id = ( - inference_spec_type.ai_platform_prediction_model_spec.project_id or - pipeline_options.view_as(GoogleCloudOptions).project) - if not project_id: - raise ValueError('Either a non-empty project id or project flag in ' - ' beam pipeline options needs be provided.') - - model_name = ( - inference_spec_type.ai_platform_prediction_model_spec.model_name) - if not model_name: - raise ValueError('A non-empty model name must be provided.') - - version_name = ( - inference_spec_type.ai_platform_prediction_model_spec.version_name) - name_spec = 'projects/{}/models/{}' - # If version is not specified, the default version for a model is used. - if version_name: - name_spec += '/versions/{}' - self._full_model_name = name_spec.format(project_id, model_name, - version_name) - - def setup(self): - super(_RemotePredictDoFn, self).setup() - # TODO(b/151468119): Add tfx_bsl_version and tfx_bsl_py_version to - # user agent once custom header is supported in googleapiclient. - self._api_client = discovery.build('ml', 'v1') - - # Retry _REMOTE_INFERENCE_NUM_RETRIES times with exponential backoff. - @retry.with_exponential_backoff( - initial_delay_secs=1.0, - num_retries=_REMOTE_INFERENCE_NUM_RETRIES, - retry_filter=_retry_on_unavailable_and_resource_error_filter) - def _execute_request( - self, - request: http.HttpRequest) -> Mapping[Text, Sequence[Mapping[Text, Any]]]: - result = request.execute() - if 'error' in result: - raise ValueError(result['error']) - return result - - def _make_request(self, body: Mapping[Text, List[Any]]) -> http.HttpRequest: - return self._api_client.projects().predict( - name=self._full_model_name, body=body) - - @classmethod - def _prepare_instances( - cls, elements: List[tf.train.Example] - ) -> Generator[Mapping[Text, Any], None, None]: - for example in elements: - # TODO(b/151468119): support tf.train.SequenceExample - if not isinstance(example, tf.train.Example): - raise ValueError('Remote prediction only supports tf.train.Example') - - instance = {} - for input_name, feature in example.features.feature.items(): - attr_name = feature.WhichOneof('kind') - if attr_name is None: - continue - attr = getattr(feature, attr_name) - values = cls._parse_feature_content(attr.value, attr_name, - cls._sending_as_binary(input_name)) - # Flatten a sequence if its length is 1 - values = (values[0] if len(values) == 1 else values) - instance[input_name] = values - yield instance - - @staticmethod - def _sending_as_binary(input_name: Text) -> bool: - """Whether data should be sent as binary.""" - return input_name.endswith('_bytes') - - @staticmethod - def _parse_feature_content(values: Sequence[Any], attr_name: Text, - as_binary: bool) -> Sequence[Any]: - """Parse the content of tf.train.Feature object. - - If bytes_list, parse a list of bytes-like objects to a list of strings so - that it would be JSON serializable. - - If float_list or int64_list, do nothing. - - If data should be sent as binary, mark it as binary by replacing it with - a single attribute named 'b64'. - """ - if as_binary: - return [{'b64': base64.b64encode(x).decode()} for x in values] - elif attr_name == 'bytes_list': - return [x.decode() for x in values] - else: - return values - - def run_inference( - self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]] - ) -> Sequence[Mapping[Text, Any]]: - body = {'instances': list(self._prepare_instances(elements))} - request = self._make_request(body) - response = self._execute_request(request) - return response['predictions'] - - def _post_process( - self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]], - outputs: Sequence[Mapping[Text, Any]] - ) -> Iterable[prediction_log_pb2.PredictLog]: - result = [] - for output in outputs: - predict_log = prediction_log_pb2.PredictLog() - for output_alias, values in output.items(): - values = np.array(values) - tensor_proto = tf.make_tensor_proto( - values=values, - dtype=tf.as_dtype(values.dtype).as_datatype_enum, - shape=np.expand_dims(values, axis=0).shape) - predict_log.response.outputs[output_alias].CopyFrom(tensor_proto) - result.append(predict_log) - return result - - -# TODO(b/131873699): Add typehints once -# [BEAM-8381](https://issues.apache.org/jira/browse/BEAM-8381) -# is fixed. -# TODO(b/143484017): Add batch_size back off in the case there are functional -# reasons large batch sizes cannot be handled. -class _BaseBatchSavedModelDoFn(_BaseDoFn): - """A DoFn that runs in-process batch inference with a model. - - Models need to have the required serving signature as mentioned in - [Tensorflow Serving](https://www.tensorflow.org/tfx/serving/signature_defs) - - This function will check model signatures first. Then it will load and run - model inference in batch. - """ - - def __init__( - self, - inference_spec_type: model_spec_pb2.InferenceSpecType, - shared_model_handle: shared.Shared, - ): - super(_BaseBatchSavedModelDoFn, self).__init__(inference_spec_type) - self._inference_spec_type = inference_spec_type - self._shared_model_handle = shared_model_handle - self._model_path = inference_spec_type.saved_model_spec.model_path - self._tags = None - self._signatures = _get_signatures( - inference_spec_type.saved_model_spec.model_path, - inference_spec_type.saved_model_spec.signature_name, - _get_tags(inference_spec_type)) - self._session = None - self._io_tensor_spec = None - - def setup(self): - """Load the model. - - Note that worker may crash if exception is thrown in setup due - to b/139207285. - """ - - super(_BaseBatchSavedModelDoFn, self).setup() - self._tags = _get_tags(self._inference_spec_type) - self._io_tensor_spec = self._pre_process() - - if self._has_tpu_tag(): - # TODO(b/131873699): Support TPU inference. - raise ValueError('TPU inference is not supported yet.') - self._session = self._load_model() - - def _load_model(self): - """Load a saved model into memory. - - Returns: - Session instance. - """ - - def load(): - """Function for constructing shared LoadedModel.""" - # TODO(b/143484017): Do warmup and other heavy model construction here. - result = tf.compat.v1.Session(graph=tf.compat.v1.Graph()) - memory_before = _get_current_process_memory_in_bytes() - start_time = self._clock.get_current_time_in_microseconds() - tf.compat.v1.saved_model.loader.load(result, self._tags, self._model_path) - end_time = self._clock.get_current_time_in_microseconds() - memory_after = _get_current_process_memory_in_bytes() - self._metrics_collector.load_model_latency_milli_secs_cache = ( - (end_time - start_time) / _MILLISECOND_TO_MICROSECOND) - self._metrics_collector.model_byte_size_cache = ( - memory_after - memory_before) - return result - - if not self._model_path: - raise ValueError('Model path is not valid.') - return self._shared_model_handle.acquire(load) - - def _pre_process(self) -> _IOTensorSpec: - # Pre process functions will validate for each signature. - io_tensor_specs = [] - for signature in self._signatures: - if len(signature.signature_def.inputs) != 1: - raise ValueError('Signature should have 1 and only 1 inputs') - if (list(signature.signature_def.inputs.values())[0].dtype != - tf.string.as_datatype_enum): - raise ValueError( - 'Input dtype is expected to be %s, got %s' % - tf.string.as_datatype_enum, - list(signature.signature_def.inputs.values())[0].dtype) - io_tensor_specs.append(_signature_pre_process(signature.signature_def)) - input_tensor_name = '' - input_tensor_alias = '' - output_alias_tensor_names = {} - for io_tensor_spec in io_tensor_specs: - if not input_tensor_name: - input_tensor_name = io_tensor_spec.input_tensor_name - input_tensor_alias = io_tensor_spec.input_tensor_alias - elif input_tensor_name != io_tensor_spec.input_tensor_name: - raise ValueError('Input tensor must be the same for all Signatures.') - for alias, tensor_name in io_tensor_spec.output_alias_tensor_names.items( - ): - output_alias_tensor_names[alias] = tensor_name - if (not output_alias_tensor_names or not input_tensor_name or - not input_tensor_alias): - raise ValueError('No valid fetch tensors or feed tensors.') - return _IOTensorSpec(input_tensor_alias, input_tensor_name, - output_alias_tensor_names) - - def _has_tpu_tag(self) -> bool: - return (len(self._tags) == 2 and tf.saved_model.SERVING in self._tags and - tf.saved_model.TPU in self._tags) - - def run_inference( - self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]] - ) -> Mapping[Text, np.ndarray]: - self._check_elements(elements) - outputs = self._run_tf_operations(elements) - return outputs - - def _run_tf_operations( - self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]] - ) -> Mapping[Text, np.ndarray]: - input_values = [] - for element in elements: - input_values.append(element.SerializeToString()) - result = self._session.run( - self._io_tensor_spec.output_alias_tensor_names, - feed_dict={self._io_tensor_spec.input_tensor_name: input_values}) - if len(result) != len(self._io_tensor_spec.output_alias_tensor_names): - raise RuntimeError('Output length does not match fetches') - return result - - def _check_elements( - self, elements: List[Union[tf.train.Example, - tf.train.SequenceExample]]) -> None: - """Unimplemented.""" - - raise NotImplementedError - - -@beam.typehints.with_input_types(List[Union[tf.train.Example, - tf.train.SequenceExample]]) -@beam.typehints.with_output_types(Tuple[tf.train.Example, - classification_pb2.Classifications]) -class _BatchClassifyDoFn(_BaseBatchSavedModelDoFn): - """A DoFn that run inference on classification model.""" - - def setup(self): - signature_def = self._signatures[0].signature_def - if signature_def.method_name != tf.saved_model.CLASSIFY_METHOD_NAME: - raise ValueError( - 'BulkInferrerClassifyDoFn requires signature method ' - 'name %s, got: %s' % tf.saved_model.CLASSIFY_METHOD_NAME, - signature_def.method_name) - super(_BatchClassifyDoFn, self).setup() - - def _check_elements( - self, elements: List[Union[tf.train.Example, - tf.train.SequenceExample]]) -> None: - if not all(isinstance(element, tf.train.Example) for element in elements): - raise ValueError('Classify only supports tf.train.Example') - - def _post_process( - self, elements: Sequence[tf.train.Example], outputs: Mapping[Text, - np.ndarray] - ) -> Iterable[Tuple[tf.train.Example, classification_pb2.Classifications]]: - classifications = _post_process_classify( - self._io_tensor_spec.output_alias_tensor_names, elements, outputs) - return zip(elements, classifications) - - -@beam.typehints.with_input_types(List[Union[tf.train.Example, - tf.train.SequenceExample]]) -@beam.typehints.with_output_types(Tuple[tf.train.Example, - regression_pb2.Regression]) -class _BatchRegressDoFn(_BaseBatchSavedModelDoFn): - """A DoFn that run inference on regression model.""" - - def setup(self): - super(_BatchRegressDoFn, self).setup() - - def _check_elements( - self, elements: List[Union[tf.train.Example, - tf.train.SequenceExample]]) -> None: - if not all(isinstance(element, tf.train.Example) for element in elements): - raise ValueError('Regress only supports tf.train.Example') - - def _post_process( - self, elements: Sequence[tf.train.Example], outputs: Mapping[Text, - np.ndarray] - ) -> Iterable[Tuple[tf.train.Example, regression_pb2.Regression]]: - regressions = _post_process_regress(elements, outputs) - return zip(elements, regressions) - - -@beam.typehints.with_input_types(List[Union[tf.train.Example, - tf.train.SequenceExample]]) -@beam.typehints.with_output_types(prediction_log_pb2.PredictLog) -class _BatchPredictDoFn(_BaseBatchSavedModelDoFn): - """A DoFn that runs inference on predict model.""" - - def setup(self): - signature_def = self._signatures[0].signature_def - if signature_def.method_name != tf.saved_model.PREDICT_METHOD_NAME: - raise ValueError( - 'BulkInferrerPredictDoFn requires signature method ' - 'name %s, got: %s' % tf.saved_model.PREDICT_METHOD_NAME, - signature_def.method_name) - super(_BatchPredictDoFn, self).setup() - - def _check_elements( - self, elements: List[Union[tf.train.Example, - tf.train.SequenceExample]]) -> None: - pass - - def _post_process( - self, elements: Union[Sequence[tf.train.Example], - Sequence[tf.train.SequenceExample]], - outputs: Mapping[Text, np.ndarray] - ) -> Iterable[prediction_log_pb2.PredictLog]: - input_tensor_alias = self._io_tensor_spec.input_tensor_alias - signature_name = self._signatures[0].name - batch_size = len(elements) - for output_alias, output in outputs.items(): - if len(output.shape) < 1 or output.shape[0] != batch_size: - raise ValueError( - 'Expected output tensor %s to have at least one ' - 'dimension, with the first having a size equal to the input batch ' - 'size %s. Instead found %s' % - (output_alias, batch_size, output.shape)) - predict_log_tmpl = prediction_log_pb2.PredictLog() - predict_log_tmpl.request.model_spec.signature_name = signature_name - predict_log_tmpl.response.model_spec.signature_name = signature_name - input_tensor_proto = predict_log_tmpl.request.inputs[input_tensor_alias] - input_tensor_proto.dtype = tf.string.as_datatype_enum - input_tensor_proto.tensor_shape.dim.add().size = 1 - - result = [] - for i in range(batch_size): - predict_log = prediction_log_pb2.PredictLog() - predict_log.CopyFrom(predict_log_tmpl) - predict_log.request.inputs[input_tensor_alias].string_val.append( - elements[i].SerializeToString()) - for output_alias, output in outputs.items(): - # Mimic tensor::Split - tensor_proto = tf.make_tensor_proto( - values=output[i], - dtype=tf.as_dtype(output[i].dtype).as_datatype_enum, - shape=np.expand_dims(output[i], axis=0).shape) - predict_log.response.outputs[output_alias].CopyFrom(tensor_proto) - result.append(predict_log) - return result - - -@beam.typehints.with_input_types(List[Union[tf.train.Example, - tf.train.SequenceExample]]) -@beam.typehints.with_output_types(Tuple[tf.train.Example, - inference_pb2.MultiInferenceResponse]) -class _BatchMultiInferenceDoFn(_BaseBatchSavedModelDoFn): - """A DoFn that runs inference on multi-head model.""" - - def _check_elements( - self, elements: List[Union[tf.train.Example, - tf.train.SequenceExample]]) -> None: - if not all(isinstance(element, tf.train.Example) for element in elements): - raise ValueError('Multi inference only supports tf.train.Example') - - def _post_process( - self, elements: Sequence[tf.train.Example], outputs: Mapping[Text, - np.ndarray] - ) -> Iterable[Tuple[tf.train.Example, inference_pb2.MultiInferenceResponse]]: - classifications = None - regressions = None - for signature in self._signatures: - signature_def = signature.signature_def - if signature_def.method_name == tf.saved_model.CLASSIFY_METHOD_NAME: - classifications = _post_process_classify( - self._io_tensor_spec.output_alias_tensor_names, elements, outputs) - elif signature_def.method_name == tf.saved_model.REGRESS_METHOD_NAME: - regressions = _post_process_regress(elements, outputs) - else: - raise ValueError('Signature method %s is not supported for ' - 'multi inference' % signature_def.method_name) - result = [] - for i in range(len(elements)): - response = inference_pb2.MultiInferenceResponse() - for signature in self._signatures: - signature_def = signature.signature_def - inference_result = response.results.add() - if (signature_def.method_name == tf.saved_model.CLASSIFY_METHOD_NAME and - classifications): - inference_result.classification_result.classifications.add().CopyFrom( - classifications[i]) - elif ( - signature_def.method_name == tf.saved_model.REGRESS_METHOD_NAME and - regressions): - inference_result.regression_result.regressions.add().CopyFrom( - regressions[i]) - else: - raise ValueError('Signature method %s is not supported for ' - 'multi inference' % signature_def.method_name) - inference_result.model_spec.signature_name = signature.name - if len(response.results) != len(self._signatures): - raise RuntimeError('Multi inference response result length does not ' - 'match the number of signatures') - result.append((elements[i], response)) - return result - - -@beam.typehints.with_input_types(Tuple[tf.train.Example, - classification_pb2.Classifications]) -@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) -class _BuildPredictionLogForClassificationsDoFn(beam.DoFn): - """A DoFn that builds prediction log from classifications.""" - - def process( - self, element: Tuple[tf.train.Example, classification_pb2.Classifications] - ) -> Iterable[prediction_log_pb2.PredictionLog]: - (train_example, classifications) = element - result = prediction_log_pb2.PredictionLog() - result.classify_log.request.input.example_list.examples.add().CopyFrom( - train_example) - result.classify_log.response.result.classifications.add().CopyFrom( - classifications) - yield result - - -@beam.typehints.with_input_types(Tuple[tf.train.Example, - regression_pb2.Regression]) -@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) -class _BuildPredictionLogForRegressionsDoFn(beam.DoFn): - """A DoFn that builds prediction log from regressions.""" - - def process( - self, element: Tuple[tf.train.Example, regression_pb2.Regression] - ) -> Iterable[prediction_log_pb2.PredictionLog]: - (train_example, regression) = element - result = prediction_log_pb2.PredictionLog() - result.regress_log.request.input.example_list.examples.add().CopyFrom( - train_example) - result.regress_log.response.result.regressions.add().CopyFrom(regression) - yield result - - -@beam.typehints.with_input_types(prediction_log_pb2.PredictLog) -@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) -class _BuildPredictionLogForPredictionsDoFn(beam.DoFn): - """A DoFn that builds prediction log from predictions.""" - - def process( - self, element: prediction_log_pb2.PredictLog - ) -> Iterable[prediction_log_pb2.PredictionLog]: - result = prediction_log_pb2.PredictionLog() - result.predict_log.CopyFrom(element) - yield result - - -@beam.typehints.with_input_types(Tuple[tf.train.Example, - inference_pb2.MultiInferenceResponse]) -@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) -class _BuildMultiInferenceLogDoFn(beam.DoFn): - """A DoFn that builds prediction log from multi-head inference result.""" - - def process( - self, element: Tuple[tf.train.Example, - inference_pb2.MultiInferenceResponse] - ) -> Iterable[prediction_log_pb2.PredictionLog]: - (train_example, multi_inference_response) = element - result = prediction_log_pb2.PredictionLog() - (result.multi_inference_log.request.input.example_list.examples.add() - .CopyFrom(train_example)) - result.multi_inference_log.response.CopyFrom(multi_inference_response) - yield result - - -def _post_process_classify( - output_alias_tensor_names: Mapping[Text, Text], - elements: Sequence[tf.train.Example], outputs: Mapping[Text, np.ndarray] -) -> Sequence[classification_pb2.Classifications]: - """Returns classifications from inference output.""" - - # This is to avoid error "The truth value of an array with - # more than one element is ambiguous." - has_classes = False - has_scores = False - if tf.saved_model.CLASSIFY_OUTPUT_CLASSES in output_alias_tensor_names: - classes = outputs[tf.saved_model.CLASSIFY_OUTPUT_CLASSES] - has_classes = True - if tf.saved_model.CLASSIFY_OUTPUT_SCORES in output_alias_tensor_names: - scores = outputs[tf.saved_model.CLASSIFY_OUTPUT_SCORES] - has_scores = True - if has_classes: - if classes.ndim != 2: - raise ValueError('Expected Tensor shape: [batch_size num_classes] but ' - 'got %s' % classes.shape) - if classes.dtype != tf.string.as_numpy_dtype: - raise ValueError('Expected classes Tensor of %s. Got: %s' % - (tf.string.as_numpy_dtype, classes.dtype)) - if classes.shape[0] != len(elements): - raise ValueError('Expected classes output batch size of %s, got %s' % - (len(elements), classes.shape[0])) - if has_scores: - if scores.ndim != 2: - raise ValueError("""Expected Tensor shape: [batch_size num_classes] but - got %s""" % scores.shape) - if scores.dtype != tf.float32.as_numpy_dtype: - raise ValueError('Expected classes Tensor of %s. Got: %s' % - (tf.float32.as_numpy_dtype, scores.dtype)) - if scores.shape[0] != len(elements): - raise ValueError('Expected classes output batch size of %s, got %s' % - (len(elements), scores.shape[0])) - num_classes = 0 - if has_classes and has_scores: - if scores.shape[1] != classes.shape[1]: - raise ValueError('Tensors class and score should match in shape[1]. ' - 'Got %s vs %s' % (classes.shape[1], scores.shape[1])) - num_classes = classes.shape[1] - elif has_classes: - num_classes = classes.shape[1] - elif has_scores: - num_classes = scores.shape[1] - - result = [] - for i in range(len(elements)): - a_classification = classification_pb2.Classifications() - for c in range(num_classes): - a_class = a_classification.classes.add() - if has_classes: - a_class.label = classes[i][c] - if has_scores: - a_class.score = scores[i][c] - result.append(a_classification) - if len(result) != len(elements): - raise RuntimeError('Classifications length does not match elements') - return result - - -def _post_process_regress( - elements: Sequence[tf.train.Example], - outputs: Mapping[Text, np.ndarray]) -> Sequence[regression_pb2.Regression]: - """Returns regressions from inference output.""" - - if tf.saved_model.REGRESS_OUTPUTS not in outputs: - raise ValueError('No regression outputs found in outputs: %s' % - outputs.keys()) - output = outputs[tf.saved_model.REGRESS_OUTPUTS] - batch_size = len(elements) - if not (output.ndim == 1 or (output.ndim == 2 and output.shape[1] == 1)): - raise ValueError("""Expected output Tensor shape to be either [batch_size] - or [batch_size, 1] but got %s""" % output.shape) - if batch_size != output.shape[0]: - raise ValueError( - 'Input batch size did not match output batch size: %s vs %s' % - (batch_size, output.shape[0])) - if output.dtype != tf.float32.as_numpy_dtype: - raise ValueError('Expected output Tensor of %s. Got: %s' % - (tf.float32.as_numpy_dtype, output.dtype)) - if output.size != batch_size: - raise ValueError('Expected output batch size to be %s. Got: %s' % - (batch_size, output.size)) - flatten_output = output.flatten() - result = [] - for regression_result in flatten_output: - regression = regression_pb2.Regression() - regression.value = regression_result - result.append(regression) - - # Add additional check to save downstream consumer checks. - if len(result) != len(elements): - raise RuntimeError('Regression length does not match elements') - return result - - -def _signature_pre_process(signature: _SignatureDef) -> _IOTensorSpec: - """Returns IOTensorSpec from signature.""" - - if len(signature.inputs) != 1: - raise ValueError('Signature should have 1 and only 1 inputs') - input_tensor_alias = list(signature.inputs.keys())[0] - if list(signature.inputs.values())[0].dtype != tf.string.as_datatype_enum: - raise ValueError( - 'Input dtype is expected to be %s, got %s' % tf.string.as_datatype_enum, - list(signature.inputs.values())[0].dtype) - if signature.method_name == tf.saved_model.CLASSIFY_METHOD_NAME: - input_tensor_name, output_alias_tensor_names = ( - _signature_pre_process_classify(signature)) - elif signature.method_name == tf.saved_model.PREDICT_METHOD_NAME: - input_tensor_name, output_alias_tensor_names = ( - _signature_pre_process_predict(signature)) - elif signature.method_name == tf.saved_model.REGRESS_METHOD_NAME: - input_tensor_name, output_alias_tensor_names = ( - _signature_pre_process_regress(signature)) - else: - raise ValueError('Signature method %s is not supported' % - signature.method_name) - return _IOTensorSpec(input_tensor_alias, input_tensor_name, - output_alias_tensor_names) - - -def _signature_pre_process_classify( - signature: _SignatureDef) -> Tuple[Text, Mapping[Text, Text]]: - """Returns input tensor name and output alias tensor names from signature. - - Args: - signature: SignatureDef - - Returns: - A tuple of input tensor name and output alias tensor names. - """ - - if len(signature.outputs) != 1 and len(signature.outputs) != 2: - raise ValueError('Classify signature should have 1 or 2 outputs') - if tf.saved_model.CLASSIFY_INPUTS not in signature.inputs: - raise ValueError('No classification inputs found in SignatureDef: %s' % - signature.inputs) - input_tensor_name = signature.inputs[tf.saved_model.CLASSIFY_INPUTS].name - output_alias_tensor_names = {} - if (tf.saved_model.CLASSIFY_OUTPUT_CLASSES not in signature.outputs and - tf.saved_model.CLASSIFY_OUTPUT_SCORES not in signature.outputs): - raise ValueError( - """Expected classification signature outputs to contain at - least one of %s or %s. Signature was: %s""" % - tf.saved_model.CLASSIFY_OUTPUT_CLASSES, - tf.saved_model.CLASSIFY_OUTPUT_SCORES, signature) - if tf.saved_model.CLASSIFY_OUTPUT_CLASSES in signature.outputs: - output_alias_tensor_names[tf.saved_model.CLASSIFY_OUTPUT_CLASSES] = ( - signature.outputs[tf.saved_model.CLASSIFY_OUTPUT_CLASSES].name) - if tf.saved_model.CLASSIFY_OUTPUT_SCORES in signature.outputs: - output_alias_tensor_names[tf.saved_model.CLASSIFY_OUTPUT_SCORES] = ( - signature.outputs[tf.saved_model.CLASSIFY_OUTPUT_SCORES].name) - return input_tensor_name, output_alias_tensor_names - - -def _signature_pre_process_predict( - signature: _SignatureDef) -> Tuple[Text, Mapping[Text, Text]]: - """Returns input tensor name and output alias tensor names from signature. - - Args: - signature: SignatureDef - - Returns: - A tuple of input tensor name and output alias tensor names. - """ - - input_tensor_name = list(signature.inputs.values())[0].name - output_alias_tensor_names = dict([ - (key, output.name) for key, output in signature.outputs.items() - ]) - return input_tensor_name, output_alias_tensor_names - - -def _signature_pre_process_regress( - signature: _SignatureDef) -> Tuple[Text, Mapping[Text, Text]]: - """Returns input tensor name and output alias tensor names from signature. - - Args: - signature: SignatureDef - - Returns: - A tuple of input tensor name and output alias tensor names. - """ - - if len(signature.outputs) != 1: - raise ValueError('Regress signature should have 1 output') - if tf.saved_model.REGRESS_INPUTS not in signature.inputs: - raise ValueError('No regression inputs found in SignatureDef: %s' % - signature.inputs) - input_tensor_name = signature.inputs[tf.saved_model.REGRESS_INPUTS].name - if tf.saved_model.REGRESS_OUTPUTS not in signature.outputs: - raise ValueError('No regression outputs found in SignatureDef: %s' % - signature.outputs) - output_alias_tensor_names = { - tf.saved_model.REGRESS_OUTPUTS: - signature.outputs[tf.saved_model.REGRESS_OUTPUTS].name - } - return input_tensor_name, output_alias_tensor_names - - -def _using_in_process_inference( - inference_spec_type: model_spec_pb2.InferenceSpecType) -> bool: - return inference_spec_type.WhichOneof('type') == 'saved_model_spec' - - -def _get_signatures(model_path: Text, signatures: Sequence[Text], - tags: Sequence[Text]) -> Sequence[_Signature]: - """Returns a sequence of {model_signature_name: signature}.""" - - if signatures: - signature_names = signatures - else: - signature_names = [tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY] - - saved_model_pb = loader_impl.parse_saved_model(model_path) - meta_graph_def = _get_meta_graph_def(saved_model_pb, tags) - result = [] - for signature_name in signature_names: - if signature_name in meta_graph_def.signature_def: - result.append( - _Signature(signature_name, - meta_graph_def.signature_def[signature_name])) - else: - raise RuntimeError('Signature %s could not be found in SavedModel' % - signature_name) - return result - - -def _get_operation_type( - inference_spec_type: model_spec_pb2.InferenceSpecType) -> Text: - if _using_in_process_inference(inference_spec_type): - signatures = _get_signatures( - inference_spec_type.saved_model_spec.model_path, - inference_spec_type.saved_model_spec.signature_name, - _get_tags(inference_spec_type)) - if not signatures: - raise ValueError('Model does not have valid signature to use') - - if len(signatures) == 1: - method_name = signatures[0].signature_def.method_name - if method_name == tf.saved_model.CLASSIFY_METHOD_NAME: - return OperationType.CLASSIFICATION - elif method_name == tf.saved_model.REGRESS_METHOD_NAME: - return OperationType.REGRESSION - elif method_name == tf.saved_model.PREDICT_METHOD_NAME: - return OperationType.PREDICTION - else: - raise ValueError('Unsupported signature method_name %s' % method_name) - else: - for signature in signatures: - method_name = signature.signature_def.method_name - if (method_name != tf.saved_model.CLASSIFY_METHOD_NAME and - method_name != tf.saved_model.REGRESS_METHOD_NAME): - raise ValueError('Unsupported signature method_name for multi-head ' - 'model inference: %s' % method_name) - return OperationType.MULTIHEAD - else: - # Remote inference supports predictions only. - return OperationType.PREDICTION - - -def _get_meta_graph_def(saved_model_pb: _SavedModel, - tags: Sequence[Text]) -> _MetaGraphDef: - """Returns MetaGraphDef from SavedModel.""" - - for meta_graph_def in saved_model_pb.meta_graphs: - if set(meta_graph_def.meta_info_def.tags) == set(tags): - return meta_graph_def - raise RuntimeError('MetaGraphDef associated with tags %s could not be ' - 'found in SavedModel' % tags) - - -def _get_current_process_memory_in_bytes(): - """Returns memory usage in bytes.""" - - if resource is not None: - usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss - if _is_darwin(): - return usage - return usage * 1024 - else: - logging.warning('Resource module is not available for current platform, ' - 'memory usage cannot be fetched.') - return 0 - - -def _get_tags( - inference_spec_type: model_spec_pb2.InferenceSpecType) -> Sequence[Text]: - """Returns tags from ModelSpec.""" - - if inference_spec_type.saved_model_spec.tag: - return list(inference_spec_type.saved_model_spec.tag) - else: - return [tf.saved_model.SERVING] - - -def _is_darwin() -> bool: - return sys.platform == 'darwin' - - -def _is_windows() -> bool: - return platform.system() == 'Windows' or os.name == 'nt' - - -def _is_cygwin() -> bool: - return platform.system().startswith('CYGWIN_NT') - - -class _Clock(object): - - def get_current_time_in_microseconds(self) -> int: - return int(time.time() * _SECOND_TO_MICROSECOND) - - -class _FineGrainedClock(_Clock): - - def get_current_time_in_microseconds(self) -> int: - return int( - time.clock_gettime_ns(time.CLOCK_REALTIME) / # pytype: disable=module-attr - _MICROSECOND_TO_NANOSECOND) - - -class _ClockFactory(object): - - @staticmethod - def make_clock() -> _Clock: - if (hasattr(time, 'clock_gettime_ns') and not _is_windows() - and not _is_cygwin()): - return _FineGrainedClock() - return _Clock() From ab21c43ebf99c2da019331168825170f7a162bec Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Thu, 30 Jul 2020 12:10:20 -0400 Subject: [PATCH 7/8] make additional package a command line argument and fix comments --- .../beam/benchmarks/run_inference_benchmark.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/tfx_bsl/beam/benchmarks/run_inference_benchmark.py b/tfx_bsl/beam/benchmarks/run_inference_benchmark.py index 133532a0..7933bb04 100644 --- a/tfx_bsl/beam/benchmarks/run_inference_benchmark.py +++ b/tfx_bsl/beam/benchmarks/run_inference_benchmark.py @@ -14,17 +14,20 @@ """Script to use run_inference from command line Below is a complete command line for running this script -for benchmarks +for benchmarks on dataflow python3 run_inference_benchemark.py \ PATH_TO_MODEL \ PATH_TO_DATA \ --output gs://YOUR_BUCKET/results/output \ +--extra_packages PACKAGE1 PACKAGE2 \ --project YOUR_PROJECT \ --runner DataflowRunner \ --temp_location gs://YOUR_BUCKET/temp \ --job_name run-inference-metrics \ --region us-central1 + +*In this case, one of the extra_packages should be the wheel file for tfx-bsl """ from __future__ import absolute_import @@ -57,17 +60,21 @@ def run(argv=None, save_main_session=True): type=str, required=True, help='Path to the output file(s).') + parser.add_argument( + '--extra_packages', + type=str, + nargs='*', + help='Wheel file(s) for any additional required package(s) to Beam packages') args, pipeline_args = parser.parse_known_args(argv) options = PipelineOptions(pipeline_args) setup_options = options.view_as(SetupOptions) - # Path of the wheel file tfx-bsl - setup_options.extra_packages = ['./tfx-bsl/dist/tfx_bsl-0.23.0.dev0-cp37-cp37m-linux_x86_64.whl'] + setup_options.extra_packages = args.extra_packages setup_options.save_main_session = save_main_session def get_saved_model_spec(model_path): - '''returns an InferenceSpecType object for a saved model path''' + '''Returns an InferenceSpecType object for a saved model path''' return model_spec_pb2.InferenceSpecType( saved_model_spec=model_spec_pb2.SavedModelSpec( model_path=model_path)) From 87f604b5bbc13ff6cc43292e17fa9ff4e36d5b74 Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Thu, 30 Jul 2020 14:28:55 -0400 Subject: [PATCH 8/8] add benchmark scripts for public API --- .../benchmarks/run_inference_api_benchmark.py | 92 +++++++++++++++++++ .../benchmarks/run_inference_benchmark.py | 6 +- 2 files changed, 95 insertions(+), 3 deletions(-) create mode 100644 tfx_bsl/beam/benchmarks/run_inference_api_benchmark.py diff --git a/tfx_bsl/beam/benchmarks/run_inference_api_benchmark.py b/tfx_bsl/beam/benchmarks/run_inference_api_benchmark.py new file mode 100644 index 00000000..3708d169 --- /dev/null +++ b/tfx_bsl/beam/benchmarks/run_inference_api_benchmark.py @@ -0,0 +1,92 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Script to use public.run_inference from command line +Below is a complete command in terminal for running this script +on dataflow for benchmarks. + +python3 run_inference_api_benchemark.py \ +PATH_TO_MODEL \ +PATH_TO_DATA \ +--output gs://YOUR_BUCKET/results/output \ +--extra_packages PACKAGE1 PACKAGE2 \ +--project YOUR_PROJECT \ +--runner DataflowRunner \ +--temp_location gs://YOUR_BUCKET/temp \ +--job_name run-inference-api-metrics \ +--region us-central1 + +*In this case, one of the extra_packages should be the wheel file for tfx-bsl +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import apache_beam as beam +import tensorflow as tf +from tfx_bsl.public.beam import run_inference +from tfx_bsl.public.proto import model_spec_pb2 +from apache_beam.options.pipeline_options import PipelineOptions +from apache_beam.options.pipeline_options import SetupOptions + + +def run(argv=None, save_main_session=True): + """Main entry point; defines and runs the user_score pipeline.""" + parser = argparse.ArgumentParser() + + parser.add_argument( + 'model_path', + type=str, + help='The path to input model') + parser.add_argument( + 'input', + type=str, + help='Path to the data file(s) containing data.') + parser.add_argument( + '--output', + type=str, + required=True, + help='Path to the output file(s).') + parser.add_argument( + '--extra_packages', + type=str, + nargs='*', + help='Wheel file(s) for any additional required package(s) to Beam packages') + + args, pipeline_args = parser.parse_known_args(argv) + options = PipelineOptions(pipeline_args) + + setup_options = options.view_as(SetupOptions) + setup_options.extra_packages = args.extra_packages + setup_options.save_main_session = save_main_session + + def get_saved_model_spec(model_path): + '''Returns an InferenceSpecType object for a saved model path''' + return model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path)) + + inference_spec_type = get_saved_model_spec(args.model_path) + with beam.Pipeline(options=options) as p: + (p + | 'ReadInputText' >> beam.io.ReadFromTFRecord(args.input) + | 'ParseExamples' >> beam.Map(tf.train.Example.FromString) + | 'RunInferenceImpl' >> run_inference.RunInference( + inference_spec_type)) + + +if __name__ == '__main__': + run() diff --git a/tfx_bsl/beam/benchmarks/run_inference_benchmark.py b/tfx_bsl/beam/benchmarks/run_inference_benchmark.py index 7933bb04..55e4d15c 100644 --- a/tfx_bsl/beam/benchmarks/run_inference_benchmark.py +++ b/tfx_bsl/beam/benchmarks/run_inference_benchmark.py @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Script to use run_inference from command line -Below is a complete command line for running this script -for benchmarks on dataflow +"""Script to use beam.run_inference from command line +Below is a complete command in terminal for running this script +on dataflow for benchmarks. python3 run_inference_benchemark.py \ PATH_TO_MODEL \