From 1d03b5ae85a20aa6746af0c30756bc1c69a9de2f Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Wed, 27 May 2020 10:45:08 -0400
Subject: [PATCH 1/8] created new file with arrow and modified base function

---
 tfx_bsl/beam/run_inference_arrow.py      | 1166 ++++++++++++++++++++++
 tfx_bsl/beam/run_inference_arrow_test.py |  581 +++++++++++
 2 files changed, 1747 insertions(+)
 create mode 100644 tfx_bsl/beam/run_inference_arrow.py
 create mode 100644 tfx_bsl/beam/run_inference_arrow_test.py

diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py
new file mode 100644
index 00000000..316b65a5
--- /dev/null
+++ b/tfx_bsl/beam/run_inference_arrow.py
@@ -0,0 +1,1166 @@
+# Copyright 2019 Google LLC. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Run batch inference on saved model."""
+
+from __future__ import absolute_import
+from __future__ import division
+# Standard __future__ imports
+from __future__ import print_function
+
+import abc
+import base64
+import collections
+import os
+import platform
+import sys
+import time
+try:
+  import resource
+except ImportError:
+  resource = None
+
+from absl import logging
+import apache_beam as beam
+import pyarrow as pa
+from apache_beam.options.pipeline_options import GoogleCloudOptions
+from apache_beam.options.pipeline_options import PipelineOptions
+from apache_beam.utils import retry
+import googleapiclient
+from googleapiclient import discovery
+from googleapiclient import http
+import numpy as np
+import six
+import tensorflow as tf
+from tfx_bsl.beam import shared
+from tfx_bsl.public.proto import model_spec_pb2
+from tfx_bsl.telemetry import util
+from typing import Any, Generator, Iterable, List, Mapping, Sequence, Text, \
+    Tuple, Union
+
+# TODO(b/140306674): stop using the internal TF API.
+from tensorflow.python.saved_model import loader_impl
+from tensorflow_serving.apis import classification_pb2
+from tensorflow_serving.apis import inference_pb2
+from tensorflow_serving.apis import prediction_log_pb2
+from tensorflow_serving.apis import regression_pb2
+
+
+# TODO(b/131873699): Remove once 1.x support is dropped.
+# pylint: disable=g-import-not-at-top
+try:
+  # We need to import this in order to register all quantiles ops, even though
+  # it's not directly used.
+  from tensorflow.contrib.boosted_trees.python.ops import quantile_ops as _  # pylint: disable=unused-import
+except ImportError:
+  pass
+
+_DEFAULT_INPUT_KEY = 'examples'
+_METRICS_DESCRIPTOR_INFERENCE = 'BulkInferrer'
+_METRICS_DESCRIPTOR_IN_PROCESS = 'InProcess'
+_METRICS_DESCRIPTOR_CLOUD_AI_PREDICTION = 'CloudAIPlatformPrediction'
+_MILLISECOND_TO_MICROSECOND = 1000
+_MICROSECOND_TO_NANOSECOND = 1000
+_SECOND_TO_MICROSECOND = 1000000
+_REMOTE_INFERENCE_NUM_RETRIES = 5
+
+# We define the following aliases of Any because the actual types are not
+# public.
+_SignatureDef = Any
+_MetaGraphDef = Any
+_SavedModel = Any
+
+# TODO (Maxine): what is this?
+_BulkInferResult = Union[prediction_log_pb2.PredictLog,
+                         Tuple[tf.train.Example, regression_pb2.Regression],
+                         Tuple[tf.train.Example,
+                               inference_pb2.MultiInferenceResponse],
+                         Tuple[tf.train.Example,
+                               classification_pb2.Classifications]]
+
+
+# TODO(b/151468119): Converts this into enum once we stop supporting Python 2.7
+class OperationType(object):
+  CLASSIFICATION = 'CLASSIFICATION'
+  REGRESSION = 'REGRESSION'
+  PREDICTION = 'PREDICTION'
+  MULTIHEAD = 'MULTIHEAD'
+
+
+# TODO (Me): pTransform from examples/sequence example here
+
+# TODO (Me): Union[bytes, pa.RecordBatch]?
+@beam.ptransform_fn
+@beam.typehints.with_input_types(pa.RecordBatch)
+@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
+def RunInferenceImpl(  # pylint: disable=invalid-name
+    examples: beam.pvalue.PCollection,
+    inference_spec_type: model_spec_pb2.InferenceSpecType
+) -> beam.pvalue.PCollection:
+  """Implementation of RunInference API.
+
+  Args:
+    examples: A PCollection containing RecordBatch.
+    inference_spec_type: Model inference endpoint.
+
+  Returns:
+    A PCollection containing prediction logs.
+
+  Raises:
+    ValueError; when operation is not supported.
+  """
+  logging.info('RunInference on model: %s', inference_spec_type)
+
+  batched_examples = examples | 'BatchExamples' >> beam.BatchElements()
+  operation_type = _get_operation_type(inference_spec_type)
+  if operation_type == OperationType.CLASSIFICATION:
+    return batched_examples | 'Classify' >> _Classify(inference_spec_type)
+  elif operation_type == OperationType.REGRESSION:
+    return batched_examples | 'Regress' >> _Regress(inference_spec_type)
+  elif operation_type == OperationType.PREDICTION:
+    return batched_examples | 'Predict' >> _Predict(inference_spec_type)
+  elif operation_type == OperationType.MULTIHEAD:
+    return (batched_examples
+            | 'MultiInference' >> _MultiInference(inference_spec_type))
+  else:
+    raise ValueError('Unsupported operation_type %s' % operation_type)
+
+
+_IOTensorSpec = collections.namedtuple(
+    '_IOTensorSpec',
+    ['input_tensor_alias', 'input_tensor_name', 'output_alias_tensor_names'])
+
+_Signature = collections.namedtuple('_Signature', ['name', 'signature_def'])
+
+
+@beam.ptransform_fn
+@beam.typehints.with_input_types(pa.RecordBatch)
+@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
+def _Classify(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
+              inference_spec_type: model_spec_pb2.InferenceSpecType):
+  """Performs classify PTransform."""
+  if _using_in_process_inference(inference_spec_type):
+    return (pcoll
+            | 'Classify' >> beam.ParDo(
+                _BatchClassifyDoFn(inference_spec_type, shared.Shared()))
+            | 'BuildPredictionLogForClassifications' >> beam.ParDo(
+                _BuildPredictionLogForClassificationsDoFn()))
+  else:
+    raise NotImplementedError
+
+
+@beam.ptransform_fn
+@beam.typehints.with_input_types(pa.RecordBatch)
+@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
+def _Regress(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
+             inference_spec_type: model_spec_pb2.InferenceSpecType):
+  """Performs regress PTransform."""
+  if _using_in_process_inference(inference_spec_type):
+    return (pcoll
+            | 'Regress' >> beam.ParDo(
+                _BatchRegressDoFn(inference_spec_type, shared.Shared()))
+            | 'BuildPredictionLogForRegressions' >> beam.ParDo(
+                _BuildPredictionLogForRegressionsDoFn()))
+  else:
+    raise NotImplementedError
+
+
+@beam.ptransform_fn
+@beam.typehints.with_input_types(pa.RecordBatch)
+@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
+def _Predict(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
+             inference_spec_type: model_spec_pb2.InferenceSpecType):
+  """Performs predict PTransform."""
+  if _using_in_process_inference(inference_spec_type):
+    predictions = (
+        pcoll
+        | 'Predict' >> beam.ParDo(
+            _BatchPredictDoFn(inference_spec_type, shared.Shared())))
+  else:
+    predictions = (
+        pcoll
+        | 'RemotePredict' >> beam.ParDo(
+            _RemotePredictDoFn(inference_spec_type, pcoll.pipeline.options)))
+  return (predictions
+          | 'BuildPredictionLogForPredictions' >> beam.ParDo(
+              _BuildPredictionLogForPredictionsDoFn()))
+
+
+@beam.ptransform_fn
+@beam.typehints.with_input_types(pa.RecordBatch)
+@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
+def _MultiInference(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
+                    inference_spec_type: model_spec_pb2.InferenceSpecType):
+  """Performs multi inference PTransform."""
+  if _using_in_process_inference(inference_spec_type):
+    return (
+        pcoll
+        | 'MultiInference' >> beam.ParDo(
+            _BatchMultiInferenceDoFn(inference_spec_type, shared.Shared()))
+        | 'BuildMultiInferenceLog' >> beam.ParDo(_BuildMultiInferenceLogDoFn()))
+  else:
+    raise NotImplementedError
+
+
+@six.add_metaclass(abc.ABCMeta)
+class _BaseDoFn(beam.DoFn):
+  """Base DoFn that performs bulk inference."""
+
+  class _MetricsCollector(object):
+    """A collector for beam metrics."""
+
+    def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType):
+      operation_type = _get_operation_type(inference_spec_type)
+      proximity_descriptor = (
+          _METRICS_DESCRIPTOR_IN_PROCESS
+          if _using_in_process_inference(inference_spec_type) else
+          _METRICS_DESCRIPTOR_CLOUD_AI_PREDICTION)
+      namespace = util.MakeTfxNamespace(
+          [_METRICS_DESCRIPTOR_INFERENCE, operation_type, proximity_descriptor])
+
+      # Metrics
+      self._inference_counter = beam.metrics.Metrics.counter(
+          namespace, 'num_inferences')
+      self._num_instances = beam.metrics.Metrics.counter(
+          namespace, 'num_instances')
+      self._inference_request_batch_size = beam.metrics.Metrics.distribution(
+          namespace, 'inference_request_batch_size')
+      self._inference_request_batch_byte_size = (
+          beam.metrics.Metrics.distribution(
+              namespace, 'inference_request_batch_byte_size'))
+      # Batch inference latency in microseconds.
+      self._inference_batch_latency_micro_secs = (
+          beam.metrics.Metrics.distribution(
+              namespace, 'inference_batch_latency_micro_secs'))
+      self._model_byte_size = beam.metrics.Metrics.distribution(
+          namespace, 'model_byte_size')
+      # Model load latency in milliseconds.
+      self._load_model_latency_milli_secs = beam.metrics.Metrics.distribution(
+          namespace, 'load_model_latency_milli_secs')
+
+      # Metrics cache
+      self.load_model_latency_milli_secs_cache = None
+      self.model_byte_size_cache = None
+
+    def update_metrics_with_cache(self):
+      if self.load_model_latency_milli_secs_cache is not None:
+        self._load_model_latency_milli_secs.update(
+            self.load_model_latency_milli_secs_cache)
+        self.load_model_latency_milli_secs_cache = None
+      if self.model_byte_size_cache is not None:
+        self._model_byte_size.update(self.model_byte_size_cache)
+        self.model_byte_size_cache = None
+
+    def update(self, elements: List[str], latency_micro_secs: int) -> None:
+      self._inference_batch_latency_micro_secs.update(latency_micro_secs)
+      self._num_instances.inc(len(elements))
+      self._inference_counter.inc(len(elements))
+      self._inference_request_batch_size.update(len(elements))
+      self._inference_request_batch_byte_size.update(
+          sum(element.ByteSize() for element in elements))
+
+  def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType):
+    super(_BaseDoFn, self).__init__()
+    self._clock = None
+    self._metrics_collector = self._MetricsCollector(inference_spec_type)
+
+  def setup(self):
+    self._clock = _ClockFactory.make_clock()
+
+  def process(
+      self, elements: pa.RecordBatch
+  ) -> Iterable[Any]:
+    batch_start_time = self._clock.get_current_time_in_microseconds()
+    # TODO (Maxine): set ARROW_INPUT_COLUMN or take as a parameter
+    # extract record batch from here, assuming first column
+    serialized_examples = elements.column(0)
+    outputs = self.run_inference(serialized_examples)
+    result = self._post_process(serialized_examples, outputs)
+    self._metrics_collector.update(
+        elements,
+        self._clock.get_current_time_in_microseconds() - batch_start_time)
+    return result
+
+  def finish_bundle(self):
+    self._metrics_collector.update_metrics_with_cache()
+
+  @abc.abstractmethod
+  def run_inference(
+    self, elements: List[str]
+  ) -> Union[Mapping[Text, np.ndarray], Sequence[Mapping[Text, Any]]]:
+    raise NotImplementedError
+
+  @abc.abstractmethod
+  def _post_process(self, elements: List[str], outputs: Any) -> Iterable[Any]:
+    raise NotImplementedError
+
+
+def _retry_on_unavailable_and_resource_error_filter(exception: Exception):
+  """Retries for HttpError.
+
+  Retries if error is unavailable (503) or resource exhausted (429).
+  Resource exhausted may happen when qps or bandwidth exceeds quota.
+
+  Args:
+    exception: Exception from inference http request execution.
+  Returns:
+    A boolean of whether retry.
+  """
+
+  return (isinstance(exception, googleapiclient.errors.HttpError) and
+          exception.resp.status in (503, 429))
+
+# TODO (Maxine): change all example to serialized
+@beam.typehints.with_input_types(List[str])
+# Using output typehints triggers NotImplementedError('BEAM-2717)' on
+# streaming mode on Dataflow runner.
+# TODO(b/151468119): Consider to re-batch with online serving request size
+# limit, and re-batch with RPC failures(InvalidArgument) regarding request size.
+# @beam.typehints.with_output_types(prediction_log_pb2.PredictLog)
+class _RemotePredictDoFn(_BaseDoFn):
+  """A DoFn that performs predictions from a cloud-hosted TensorFlow model.
+
+  Supports both batch and streaming processing modes.
+  NOTE: Does not work on DirectRunner for streaming jobs [BEAM-7885].
+
+  In order to request predictions, you must deploy your trained model to AI
+  Platform Prediction in the TensorFlow SavedModel format. See
+  [Exporting a SavedModel for prediction]
+  (https://cloud.google.com/ai-platform/prediction/docs/exporting-savedmodel-for-prediction)
+  for more details.
+
+  To send binary data, you have to make sure that the name of an input ends in
+  `_bytes`.
+
+  NOTE: The returned `PredictLog` instances do not have `PredictRequest` part
+  filled. The reason is that it is difficult to determine the input tensor name
+  without having access to cloud-hosted model's signatures.
+  """
+
+  def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType,
+               pipeline_options: PipelineOptions):
+    super(_RemotePredictDoFn, self).__init__(inference_spec_type)
+    self._api_client = None
+
+    project_id = (
+        inference_spec_type.ai_platform_prediction_model_spec.project_id or
+        pipeline_options.view_as(GoogleCloudOptions).project)
+    if not project_id:
+      raise ValueError('Either a non-empty project id or project flag in '
+                       ' beam pipeline options needs be provided.')
+
+    model_name = (
+        inference_spec_type.ai_platform_prediction_model_spec.model_name)
+    if not model_name:
+      raise ValueError('A non-empty model name must be provided.')
+
+    version_name = (
+        inference_spec_type.ai_platform_prediction_model_spec.version_name)
+    name_spec = 'projects/{}/models/{}'
+    # If version is not specified, the default version for a model is used.
+    if version_name:
+      name_spec += '/versions/{}'
+    self._full_model_name = name_spec.format(project_id, model_name,
+                                             version_name)
+
+  def setup(self):
+    super(_RemotePredictDoFn, self).setup()
+    # TODO(b/151468119): Add tfx_bsl_version and tfx_bsl_py_version to
+    # user agent once custom header is supported in googleapiclient.
+    self._api_client = discovery.build('ml', 'v1')
+
+  # Retry _REMOTE_INFERENCE_NUM_RETRIES times with exponential backoff.
+  @retry.with_exponential_backoff(
+      initial_delay_secs=1.0,
+      num_retries=_REMOTE_INFERENCE_NUM_RETRIES,
+      retry_filter=_retry_on_unavailable_and_resource_error_filter)
+  def _execute_request(
+      self,
+      request: http.HttpRequest) -> Mapping[Text, Sequence[Mapping[Text, Any]]]:
+    result = request.execute()
+    if 'error' in result:
+      raise ValueError(result['error'])
+    return result
+
+  def _make_request(self, body: Mapping[Text, List[Any]]) -> http.HttpRequest:
+    return self._api_client.projects().predict(
+        name=self._full_model_name, body=body)
+
+  @classmethod
+  def _prepare_instances(
+      cls, elements: List[tf.train.Example]
+  ) -> Generator[Mapping[Text, Any], None, None]:
+    for example in elements:
+      # TODO(b/151468119): support tf.train.SequenceExample
+      if not isinstance(example, tf.train.Example):
+        raise ValueError('Remote prediction only supports tf.train.Example')
+
+      instance = {}
+      for input_name, feature in example.features.feature.items():
+        attr_name = feature.WhichOneof('kind')
+        if attr_name is None:
+          continue
+        attr = getattr(feature, attr_name)
+        values = cls._parse_feature_content(attr.value, attr_name,
+                                            cls._sending_as_binary(input_name))
+        # Flatten a sequence if its length is 1
+        values = (values[0] if len(values) == 1 else values)
+        instance[input_name] = values
+      yield instance
+
+  @staticmethod
+  def _sending_as_binary(input_name: Text) -> bool:
+    """Whether data should be sent as binary."""
+    return input_name.endswith('_bytes')
+
+  @staticmethod
+  def _parse_feature_content(values: Sequence[Any], attr_name: Text,
+                             as_binary: bool) -> Sequence[Any]:
+    """Parse the content of tf.train.Feature object.
+
+    If bytes_list, parse a list of bytes-like objects to a list of strings so
+    that it would be JSON serializable.
+
+    If float_list or int64_list, do nothing.
+
+    If data should be sent as binary, mark it as binary by replacing it with
+    a single attribute named 'b64'.
+    """
+    if as_binary:
+      return [{'b64': base64.b64encode(x).decode()} for x in values]
+    elif attr_name == 'bytes_list':
+      return [x.decode() for x in values]
+    else:
+      return values
+
+  def run_inference(
+      self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]]
+  ) -> Sequence[Mapping[Text, Any]]:
+    body = {'instances': list(self._prepare_instances(elements))}
+    request = self._make_request(body)
+    response = self._execute_request(request)
+    return response['predictions']
+
+  def _post_process(
+      self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]],
+      outputs: Sequence[Mapping[Text, Any]]
+  ) -> Iterable[prediction_log_pb2.PredictLog]:
+    result = []
+    for output in outputs:
+      predict_log = prediction_log_pb2.PredictLog()
+      for output_alias, values in output.items():
+        values = np.array(values)
+        tensor_proto = tf.make_tensor_proto(
+            values=values,
+            dtype=tf.as_dtype(values.dtype).as_datatype_enum,
+            shape=np.expand_dims(values, axis=0).shape)
+        predict_log.response.outputs[output_alias].CopyFrom(tensor_proto)
+      result.append(predict_log)
+    return result
+
+
+# TODO(b/131873699): Add typehints once
+# [BEAM-8381](https://issues.apache.org/jira/browse/BEAM-8381)
+# is fixed.
+# TODO(b/143484017): Add batch_size back off in the case there are functional
+# reasons large batch sizes cannot be handled.
+class _BaseBatchSavedModelDoFn(_BaseDoFn):
+  """A DoFn that runs in-process batch inference with a model.
+
+    Models need to have the required serving signature as mentioned in
+    [Tensorflow Serving](https://www.tensorflow.org/tfx/serving/signature_defs)
+
+    This function will check model signatures first. Then it will load and run
+    model inference in batch.
+  """
+
+  def __init__(
+      self,
+      inference_spec_type: model_spec_pb2.InferenceSpecType,
+      shared_model_handle: shared.Shared,
+  ):
+    super(_BaseBatchSavedModelDoFn, self).__init__(inference_spec_type)
+    self._inference_spec_type = inference_spec_type
+    self._shared_model_handle = shared_model_handle
+    self._model_path = inference_spec_type.saved_model_spec.model_path
+    self._tags = None
+    self._signatures = _get_signatures(
+        inference_spec_type.saved_model_spec.model_path,
+        inference_spec_type.saved_model_spec.signature_name,
+        _get_tags(inference_spec_type))
+    self._session = None
+    self._io_tensor_spec = None
+
+  def setup(self):
+    """Load the model.
+
+    Note that worker may crash if exception is thrown in setup due
+    to b/139207285.
+    """
+
+    super(_BaseBatchSavedModelDoFn, self).setup()
+    self._tags = _get_tags(self._inference_spec_type)
+    self._io_tensor_spec = self._pre_process()
+
+    if self._has_tpu_tag():
+      # TODO(b/131873699): Support TPU inference.
+      raise ValueError('TPU inference is not supported yet.')
+    self._session = self._load_model()
+
+  def _load_model(self):
+    """Load a saved model into memory.
+
+    Returns:
+      Session instance.
+    """
+
+    def load():
+      """Function for constructing shared LoadedModel."""
+      # TODO(b/143484017): Do warmup and other heavy model construction here.
+      result = tf.compat.v1.Session(graph=tf.compat.v1.Graph())
+      memory_before = _get_current_process_memory_in_bytes()
+      start_time = self._clock.get_current_time_in_microseconds()
+      tf.compat.v1.saved_model.loader.load(result, self._tags, self._model_path)
+      end_time = self._clock.get_current_time_in_microseconds()
+      memory_after = _get_current_process_memory_in_bytes()
+      self._metrics_collector.load_model_latency_milli_secs_cache = (
+          (end_time - start_time) / _MILLISECOND_TO_MICROSECOND)
+      self._metrics_collector.model_byte_size_cache = (
+          memory_after - memory_before)
+      return result
+
+    if not self._model_path:
+      raise ValueError('Model path is not valid.')
+    return self._shared_model_handle.acquire(load)
+
+  def _pre_process(self) -> _IOTensorSpec:
+    # Pre process functions will validate for each signature.
+    io_tensor_specs = []
+    for signature in self._signatures:
+      if len(signature.signature_def.inputs) != 1:
+        raise ValueError('Signature should have 1 and only 1 inputs')
+      if (list(signature.signature_def.inputs.values())[0].dtype !=
+          tf.string.as_datatype_enum):
+        raise ValueError(
+            'Input dtype is expected to be %s, got %s' %
+            tf.string.as_datatype_enum,
+            list(signature.signature_def.inputs.values())[0].dtype)
+      io_tensor_specs.append(_signature_pre_process(signature.signature_def))
+    input_tensor_name = ''
+    input_tensor_alias = ''
+    output_alias_tensor_names = {}
+    for io_tensor_spec in io_tensor_specs:
+      if not input_tensor_name:
+        input_tensor_name = io_tensor_spec.input_tensor_name
+        input_tensor_alias = io_tensor_spec.input_tensor_alias
+      elif input_tensor_name != io_tensor_spec.input_tensor_name:
+        raise ValueError('Input tensor must be the same for all Signatures.')
+      for alias, tensor_name in io_tensor_spec.output_alias_tensor_names.items(
+      ):
+        output_alias_tensor_names[alias] = tensor_name
+    if (not output_alias_tensor_names or not input_tensor_name or
+        not input_tensor_alias):
+      raise ValueError('No valid fetch tensors or feed tensors.')
+    return _IOTensorSpec(input_tensor_alias, input_tensor_name,
+                         output_alias_tensor_names)
+
+  def _has_tpu_tag(self) -> bool:
+    return (len(self._tags) == 2 and tf.saved_model.SERVING in self._tags and
+            tf.saved_model.TPU in self._tags)
+
+  def run_inference(self, elements: List[str]) -> Mapping[Text, np.ndarray]:
+    self._check_elements(elements)
+    outputs = self._run_tf_operations(elements)
+    return outputs
+
+  def _run_tf_operations(self, elements: List[str]) -> Mapping[Text, np.ndarray]:
+    result = self._session.run(
+        self._io_tensor_spec.output_alias_tensor_names,
+        feed_dict={self._io_tensor_spec.input_tensor_name: elements})
+    if len(result) != len(self._io_tensor_spec.output_alias_tensor_names):
+      raise RuntimeError('Output length does not match fetches')
+    return result
+
+  def _check_elements(
+      self, elements: List[Union[tf.train.Example,
+                                 tf.train.SequenceExample]]) -> None:
+    """Unimplemented."""
+
+    raise NotImplementedError
+
+
+@beam.typehints.with_input_types(List[str])
+@beam.typehints.with_output_types(Tuple[tf.train.Example,
+                                        classification_pb2.Classifications])
+class _BatchClassifyDoFn(_BaseBatchSavedModelDoFn):
+  """A DoFn that run inference on classification model."""
+
+  def setup(self):
+    signature_def = self._signatures[0].signature_def
+    if signature_def.method_name != tf.saved_model.CLASSIFY_METHOD_NAME:
+      raise ValueError(
+          'BulkInferrerClassifyDoFn requires signature method '
+          'name %s, got: %s' % tf.saved_model.CLASSIFY_METHOD_NAME,
+          signature_def.method_name)
+    super(_BatchClassifyDoFn, self).setup()
+
+  def _check_elements(
+      self, elements: List[Union[tf.train.Example,
+                                 tf.train.SequenceExample]]) -> None:
+    if not all(isinstance(element, tf.train.Example) for element in elements):
+      raise ValueError('Classify only supports tf.train.Example')
+
+  def _post_process(
+      self, elements: Sequence[tf.train.Example], outputs: Mapping[Text,
+                                                                   np.ndarray]
+  ) -> Iterable[Tuple[tf.train.Example, classification_pb2.Classifications]]:
+    classifications = _post_process_classify(
+        self._io_tensor_spec.output_alias_tensor_names, elements, outputs)
+    return zip(elements, classifications)
+
+
+@beam.typehints.with_input_types(List[str])
+@beam.typehints.with_output_types(Tuple[tf.train.Example,
+                                        regression_pb2.Regression])
+class _BatchRegressDoFn(_BaseBatchSavedModelDoFn):
+  """A DoFn that run inference on regression model."""
+
+  def setup(self):
+    super(_BatchRegressDoFn, self).setup()
+
+  def _check_elements(
+      self, elements: List[Union[tf.train.Example,
+                                 tf.train.SequenceExample]]) -> None:
+    if not all(isinstance(element, tf.train.Example) for element in elements):
+      raise ValueError('Regress only supports tf.train.Example')
+
+  def _post_process(
+      self, elements: Sequence[tf.train.Example], outputs: Mapping[Text,
+                                                                   np.ndarray]
+  ) -> Iterable[Tuple[tf.train.Example, regression_pb2.Regression]]:
+    regressions = _post_process_regress(elements, outputs)
+    return zip(elements, regressions)
+
+
+@beam.typehints.with_input_types(List[str])
+@beam.typehints.with_output_types(prediction_log_pb2.PredictLog)
+class _BatchPredictDoFn(_BaseBatchSavedModelDoFn):
+  """A DoFn that runs inference on predict model."""
+
+  def setup(self):
+    signature_def = self._signatures[0].signature_def
+    if signature_def.method_name != tf.saved_model.PREDICT_METHOD_NAME:
+      raise ValueError(
+          'BulkInferrerPredictDoFn requires signature method '
+          'name %s, got: %s' % tf.saved_model.PREDICT_METHOD_NAME,
+          signature_def.method_name)
+    super(_BatchPredictDoFn, self).setup()
+
+  def _check_elements(
+      self, elements: List[Union[tf.train.Example,
+                                 tf.train.SequenceExample]]) -> None:
+    pass
+
+  def _post_process(
+      self, elements: Union[Sequence[tf.train.Example],
+                            Sequence[tf.train.SequenceExample]],
+      outputs: Mapping[Text, np.ndarray]
+  ) -> Iterable[prediction_log_pb2.PredictLog]:
+    input_tensor_alias = self._io_tensor_spec.input_tensor_alias
+    signature_name = self._signatures[0].name
+    batch_size = len(elements)
+    for output_alias, output in outputs.items():
+      if len(output.shape) < 1 or output.shape[0] != batch_size:
+        raise ValueError(
+            'Expected output tensor %s to have at least one '
+            'dimension, with the first having a size equal to the input batch '
+            'size %s. Instead found %s' %
+            (output_alias, batch_size, output.shape))
+    predict_log_tmpl = prediction_log_pb2.PredictLog()
+    predict_log_tmpl.request.model_spec.signature_name = signature_name
+    predict_log_tmpl.response.model_spec.signature_name = signature_name
+    input_tensor_proto = predict_log_tmpl.request.inputs[input_tensor_alias]
+    input_tensor_proto.dtype = tf.string.as_datatype_enum
+    input_tensor_proto.tensor_shape.dim.add().size = 1
+
+    result = []
+    for i in range(batch_size):
+      predict_log = prediction_log_pb2.PredictLog()
+      predict_log.CopyFrom(predict_log_tmpl)
+      predict_log.request.inputs[input_tensor_alias].string_val.append(
+          elements[i].SerializeToString())
+      for output_alias, output in outputs.items():
+        # Mimic tensor::Split
+        tensor_proto = tf.make_tensor_proto(
+            values=output[i],
+            dtype=tf.as_dtype(output[i].dtype).as_datatype_enum,
+            shape=np.expand_dims(output[i], axis=0).shape)
+        predict_log.response.outputs[output_alias].CopyFrom(tensor_proto)
+      result.append(predict_log)
+    return result
+
+
+@beam.typehints.with_input_types(List[str])
+@beam.typehints.with_output_types(Tuple[tf.train.Example,
+                                        inference_pb2.MultiInferenceResponse])
+class _BatchMultiInferenceDoFn(_BaseBatchSavedModelDoFn):
+  """A DoFn that runs inference on multi-head model."""
+
+  def _check_elements(
+      self, elements: List[Union[tf.train.Example,
+                                 tf.train.SequenceExample]]) -> None:
+    if not all(isinstance(element, tf.train.Example) for element in elements):
+      raise ValueError('Multi inference only supports tf.train.Example')
+
+  def _post_process(
+      self, elements: Sequence[tf.train.Example], outputs: Mapping[Text,
+                                                                   np.ndarray]
+  ) -> Iterable[Tuple[tf.train.Example, inference_pb2.MultiInferenceResponse]]:
+    classifications = None
+    regressions = None
+    for signature in self._signatures:
+      signature_def = signature.signature_def
+      if signature_def.method_name == tf.saved_model.CLASSIFY_METHOD_NAME:
+        classifications = _post_process_classify(
+            self._io_tensor_spec.output_alias_tensor_names, elements, outputs)
+      elif signature_def.method_name == tf.saved_model.REGRESS_METHOD_NAME:
+        regressions = _post_process_regress(elements, outputs)
+      else:
+        raise ValueError('Signature method %s is not supported for '
+                         'multi inference' % signature_def.method_name)
+    result = []
+    for i in range(len(elements)):
+      response = inference_pb2.MultiInferenceResponse()
+      for signature in self._signatures:
+        signature_def = signature.signature_def
+        inference_result = response.results.add()
+        if (signature_def.method_name == tf.saved_model.CLASSIFY_METHOD_NAME and
+            classifications):
+          inference_result.classification_result.classifications.add().CopyFrom(
+              classifications[i])
+        elif (
+            signature_def.method_name == tf.saved_model.REGRESS_METHOD_NAME and
+            regressions):
+          inference_result.regression_result.regressions.add().CopyFrom(
+              regressions[i])
+        else:
+          raise ValueError('Signature method %s is not supported for '
+                           'multi inference' % signature_def.method_name)
+        inference_result.model_spec.signature_name = signature.name
+      if len(response.results) != len(self._signatures):
+        raise RuntimeError('Multi inference response result length does not '
+                           'match the number of signatures')
+      result.append((elements[i], response))
+    return result
+
+
+@beam.typehints.with_input_types(Tuple[tf.train.Example,
+                                       classification_pb2.Classifications])
+@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
+class _BuildPredictionLogForClassificationsDoFn(beam.DoFn):
+  """A DoFn that builds prediction log from classifications."""
+
+  def process(
+      self, element: Tuple[tf.train.Example, classification_pb2.Classifications]
+  ) -> Iterable[prediction_log_pb2.PredictionLog]:
+    (train_example, classifications) = element
+    result = prediction_log_pb2.PredictionLog()
+    result.classify_log.request.input.example_list.examples.add().CopyFrom(
+        train_example)
+    result.classify_log.response.result.classifications.add().CopyFrom(
+        classifications)
+    yield result
+
+
+@beam.typehints.with_input_types(Tuple[tf.train.Example,
+                                       regression_pb2.Regression])
+@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
+class _BuildPredictionLogForRegressionsDoFn(beam.DoFn):
+  """A DoFn that builds prediction log from regressions."""
+
+  def process(
+      self, element: Tuple[tf.train.Example, regression_pb2.Regression]
+  ) -> Iterable[prediction_log_pb2.PredictionLog]:
+    (train_example, regression) = element
+    result = prediction_log_pb2.PredictionLog()
+    result.regress_log.request.input.example_list.examples.add().CopyFrom(
+        train_example)
+    result.regress_log.response.result.regressions.add().CopyFrom(regression)
+    yield result
+
+
+@beam.typehints.with_input_types(prediction_log_pb2.PredictLog)
+@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
+class _BuildPredictionLogForPredictionsDoFn(beam.DoFn):
+  """A DoFn that builds prediction log from predictions."""
+
+  def process(
+      self, element: prediction_log_pb2.PredictLog
+  ) -> Iterable[prediction_log_pb2.PredictionLog]:
+    result = prediction_log_pb2.PredictionLog()
+    result.predict_log.CopyFrom(element)
+    yield result
+
+
+@beam.typehints.with_input_types(Tuple[tf.train.Example,
+                                       inference_pb2.MultiInferenceResponse])
+@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
+class _BuildMultiInferenceLogDoFn(beam.DoFn):
+  """A DoFn that builds prediction log from multi-head inference result."""
+
+  def process(
+      self, element: Tuple[tf.train.Example,
+                           inference_pb2.MultiInferenceResponse]
+  ) -> Iterable[prediction_log_pb2.PredictionLog]:
+    (train_example, multi_inference_response) = element
+    result = prediction_log_pb2.PredictionLog()
+    (result.multi_inference_log.request.input.example_list.examples.add()
+     .CopyFrom(train_example))
+    result.multi_inference_log.response.CopyFrom(multi_inference_response)
+    yield result
+
+
+# TODO (Maxine): moving these into class?
+def _post_process_classify(
+    output_alias_tensor_names: Mapping[Text, Text],
+    elements: Sequence[tf.train.Example], outputs: Mapping[Text, np.ndarray]
+) -> Sequence[classification_pb2.Classifications]:
+  """Returns classifications from inference output."""
+
+  # This is to avoid error "The truth value of an array with
+  # more than one element is ambiguous."
+  has_classes = False
+  has_scores = False
+  if tf.saved_model.CLASSIFY_OUTPUT_CLASSES in output_alias_tensor_names:
+    classes = outputs[tf.saved_model.CLASSIFY_OUTPUT_CLASSES]
+    has_classes = True
+  if tf.saved_model.CLASSIFY_OUTPUT_SCORES in output_alias_tensor_names:
+    scores = outputs[tf.saved_model.CLASSIFY_OUTPUT_SCORES]
+    has_scores = True
+  if has_classes:
+    if classes.ndim != 2:
+      raise ValueError('Expected Tensor shape: [batch_size num_classes] but '
+                       'got %s' % classes.shape)
+    if classes.dtype != tf.string.as_numpy_dtype:
+      raise ValueError('Expected classes Tensor of %s. Got: %s' %
+                       (tf.string.as_numpy_dtype, classes.dtype))
+    if classes.shape[0] != len(elements):
+      raise ValueError('Expected classes output batch size of %s, got %s' %
+                       (len(elements), classes.shape[0]))
+  if has_scores:
+    if scores.ndim != 2:
+      raise ValueError("""Expected Tensor shape: [batch_size num_classes] but
+        got %s""" % scores.shape)
+    if scores.dtype != tf.float32.as_numpy_dtype:
+      raise ValueError('Expected classes Tensor of %s. Got: %s' %
+                       (tf.float32.as_numpy_dtype, scores.dtype))
+    if scores.shape[0] != len(elements):
+      raise ValueError('Expected classes output batch size of %s, got %s' %
+                       (len(elements), scores.shape[0]))
+  num_classes = 0
+  if has_classes and has_scores:
+    if scores.shape[1] != classes.shape[1]:
+      raise ValueError('Tensors class and score should match in shape[1]. '
+                       'Got %s vs %s' % (classes.shape[1], scores.shape[1]))
+    num_classes = classes.shape[1]
+  elif has_classes:
+    num_classes = classes.shape[1]
+  elif has_scores:
+    num_classes = scores.shape[1]
+
+  result = []
+  for i in range(len(elements)):
+    a_classification = classification_pb2.Classifications()
+    for c in range(num_classes):
+      a_class = a_classification.classes.add()
+      if has_classes:
+        a_class.label = classes[i][c]
+      if has_scores:
+        a_class.score = scores[i][c]
+    result.append(a_classification)
+  if len(result) != len(elements):
+    raise RuntimeError('Classifications length does not match elements')
+  return result
+
+
+def _post_process_regress(
+    elements: Sequence[tf.train.Example],
+    outputs: Mapping[Text, np.ndarray]) -> Sequence[regression_pb2.Regression]:
+  """Returns regressions from inference output."""
+
+  if tf.saved_model.REGRESS_OUTPUTS not in outputs:
+    raise ValueError('No regression outputs found in outputs: %s' %
+                     outputs.keys())
+  output = outputs[tf.saved_model.REGRESS_OUTPUTS]
+  batch_size = len(elements)
+  if not (output.ndim == 1 or (output.ndim == 2 and output.shape[1] == 1)):
+    raise ValueError("""Expected output Tensor shape to be either [batch_size]
+                     or [batch_size, 1] but got %s""" % output.shape)
+  if batch_size != output.shape[0]:
+    raise ValueError(
+        'Input batch size did not match output batch size: %s vs %s' %
+        (batch_size, output.shape[0]))
+  if output.dtype != tf.float32.as_numpy_dtype:
+    raise ValueError('Expected output Tensor of %s. Got: %s' %
+                     (tf.float32.as_numpy_dtype, output.dtype))
+  if output.size != batch_size:
+    raise ValueError('Expected output batch size to be %s. Got: %s' %
+                     (batch_size, output.size))
+  flatten_output = output.flatten()
+  result = []
+  for regression_result in flatten_output:
+    regression = regression_pb2.Regression()
+    regression.value = regression_result
+    result.append(regression)
+
+  # Add additional check to save downstream consumer checks.
+  if len(result) != len(elements):
+    raise RuntimeError('Regression length does not match elements')
+  return result
+
+
+def _signature_pre_process(signature: _SignatureDef) -> _IOTensorSpec:
+  """Returns IOTensorSpec from signature."""
+
+  if len(signature.inputs) != 1:
+    raise ValueError('Signature should have 1 and only 1 inputs')
+  input_tensor_alias = list(signature.inputs.keys())[0]
+  if list(signature.inputs.values())[0].dtype != tf.string.as_datatype_enum:
+    raise ValueError(
+        'Input dtype is expected to be %s, got %s' % tf.string.as_datatype_enum,
+        list(signature.inputs.values())[0].dtype)
+  if signature.method_name == tf.saved_model.CLASSIFY_METHOD_NAME:
+    input_tensor_name, output_alias_tensor_names = (
+        _signature_pre_process_classify(signature))
+  elif signature.method_name == tf.saved_model.PREDICT_METHOD_NAME:
+    input_tensor_name, output_alias_tensor_names = (
+        _signature_pre_process_predict(signature))
+  elif signature.method_name == tf.saved_model.REGRESS_METHOD_NAME:
+    input_tensor_name, output_alias_tensor_names = (
+        _signature_pre_process_regress(signature))
+  else:
+    raise ValueError('Signature method %s is not supported' %
+                     signature.method_name)
+  return _IOTensorSpec(input_tensor_alias, input_tensor_name,
+                       output_alias_tensor_names)
+
+
+def _signature_pre_process_classify(
+    signature: _SignatureDef) -> Tuple[Text, Mapping[Text, Text]]:
+  """Returns input tensor name and output alias tensor names from signature.
+
+  Args:
+    signature: SignatureDef
+
+  Returns:
+    A tuple of input tensor name and output alias tensor names.
+  """
+
+  if len(signature.outputs) != 1 and len(signature.outputs) != 2:
+    raise ValueError('Classify signature should have 1 or 2 outputs')
+  if tf.saved_model.CLASSIFY_INPUTS not in signature.inputs:
+    raise ValueError('No classification inputs found in SignatureDef: %s' %
+                     signature.inputs)
+  input_tensor_name = signature.inputs[tf.saved_model.CLASSIFY_INPUTS].name
+  output_alias_tensor_names = {}
+  if (tf.saved_model.CLASSIFY_OUTPUT_CLASSES not in signature.outputs and
+      tf.saved_model.CLASSIFY_OUTPUT_SCORES not in signature.outputs):
+    raise ValueError(
+        """Expected classification signature outputs to contain at
+        least one of %s or %s. Signature was: %s""" %
+        tf.saved_model.CLASSIFY_OUTPUT_CLASSES,
+        tf.saved_model.CLASSIFY_OUTPUT_SCORES, signature)
+  if tf.saved_model.CLASSIFY_OUTPUT_CLASSES in signature.outputs:
+    output_alias_tensor_names[tf.saved_model.CLASSIFY_OUTPUT_CLASSES] = (
+        signature.outputs[tf.saved_model.CLASSIFY_OUTPUT_CLASSES].name)
+  if tf.saved_model.CLASSIFY_OUTPUT_SCORES in signature.outputs:
+    output_alias_tensor_names[tf.saved_model.CLASSIFY_OUTPUT_SCORES] = (
+        signature.outputs[tf.saved_model.CLASSIFY_OUTPUT_SCORES].name)
+  return input_tensor_name, output_alias_tensor_names
+
+
+def _signature_pre_process_predict(
+    signature: _SignatureDef) -> Tuple[Text, Mapping[Text, Text]]:
+  """Returns input tensor name and output alias tensor names from signature.
+
+  Args:
+    signature: SignatureDef
+
+  Returns:
+    A tuple of input tensor name and output alias tensor names.
+  """
+
+  input_tensor_name = list(signature.inputs.values())[0].name
+  output_alias_tensor_names = dict([
+      (key, output.name) for key, output in signature.outputs.items()
+  ])
+  return input_tensor_name, output_alias_tensor_names
+
+
+def _signature_pre_process_regress(
+    signature: _SignatureDef) -> Tuple[Text, Mapping[Text, Text]]:
+  """Returns input tensor name and output alias tensor names from signature.
+
+  Args:
+    signature: SignatureDef
+
+  Returns:
+    A tuple of input tensor name and output alias tensor names.
+  """
+
+  if len(signature.outputs) != 1:
+    raise ValueError('Regress signature should have 1 output')
+  if tf.saved_model.REGRESS_INPUTS not in signature.inputs:
+    raise ValueError('No regression inputs found in SignatureDef: %s' %
+                     signature.inputs)
+  input_tensor_name = signature.inputs[tf.saved_model.REGRESS_INPUTS].name
+  if tf.saved_model.REGRESS_OUTPUTS not in signature.outputs:
+    raise ValueError('No regression outputs found in SignatureDef: %s' %
+                     signature.outputs)
+  output_alias_tensor_names = {
+      tf.saved_model.REGRESS_OUTPUTS:
+          signature.outputs[tf.saved_model.REGRESS_OUTPUTS].name
+  }
+  return input_tensor_name, output_alias_tensor_names
+
+
+def _using_in_process_inference(
+    inference_spec_type: model_spec_pb2.InferenceSpecType) -> bool:
+  return inference_spec_type.WhichOneof('type') == 'saved_model_spec'
+
+
+def _get_signatures(model_path: Text, signatures: Sequence[Text],
+                    tags: Sequence[Text]) -> Sequence[_Signature]:
+  """Returns a sequence of {model_signature_name: signature}."""
+
+  if signatures:
+    signature_names = signatures
+  else:
+    signature_names = [tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
+
+  saved_model_pb = loader_impl.parse_saved_model(model_path)
+  meta_graph_def = _get_meta_graph_def(saved_model_pb, tags)
+  result = []
+  for signature_name in signature_names:
+    if signature_name in meta_graph_def.signature_def:
+      result.append(
+          _Signature(signature_name,
+                     meta_graph_def.signature_def[signature_name]))
+    else:
+      raise RuntimeError('Signature %s could not be found in SavedModel' %
+                         signature_name)
+  return result
+
+
+def _get_operation_type(
+    inference_spec_type: model_spec_pb2.InferenceSpecType) -> Text:
+  if _using_in_process_inference(inference_spec_type):
+    signatures = _get_signatures(
+        inference_spec_type.saved_model_spec.model_path,
+        inference_spec_type.saved_model_spec.signature_name,
+        _get_tags(inference_spec_type))
+    if not signatures:
+      raise ValueError('Model does not have valid signature to use')
+
+    if len(signatures) == 1:
+      method_name = signatures[0].signature_def.method_name
+      if method_name == tf.saved_model.CLASSIFY_METHOD_NAME:
+        return OperationType.CLASSIFICATION
+      elif method_name == tf.saved_model.REGRESS_METHOD_NAME:
+        return OperationType.REGRESSION
+      elif method_name == tf.saved_model.PREDICT_METHOD_NAME:
+        return OperationType.PREDICTION
+      else:
+        raise ValueError('Unsupported signature method_name %s' % method_name)
+    else:
+      for signature in signatures:
+        method_name = signature.signature_def.method_name
+        if (method_name != tf.saved_model.CLASSIFY_METHOD_NAME and
+            method_name != tf.saved_model.REGRESS_METHOD_NAME):
+          raise ValueError('Unsupported signature method_name for multi-head '
+                           'model inference: %s' % method_name)
+      return OperationType.MULTIHEAD
+  else:
+    # Remote inference supports predictions only.
+    return OperationType.PREDICTION
+
+
+def _get_meta_graph_def(saved_model_pb: _SavedModel,
+                        tags: Sequence[Text]) -> _MetaGraphDef:
+  """Returns MetaGraphDef from SavedModel."""
+
+  for meta_graph_def in saved_model_pb.meta_graphs:
+    if set(meta_graph_def.meta_info_def.tags) == set(tags):
+      return meta_graph_def
+  raise RuntimeError('MetaGraphDef associated with tags %s could not be '
+                     'found in SavedModel' % tags)
+
+
+def _get_current_process_memory_in_bytes():
+  """Returns memory usage in bytes."""
+
+  if resource is not None:
+    usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
+    if _is_darwin():
+      return usage
+    return usage * 1024
+  else:
+    logging.warning('Resource module is not available for current platform, '
+                    'memory usage cannot be fetched.')
+  return 0
+
+
+def _get_tags(
+    inference_spec_type: model_spec_pb2.InferenceSpecType) -> Sequence[Text]:
+  """Returns tags from ModelSpec."""
+
+  if inference_spec_type.saved_model_spec.tag:
+    return list(inference_spec_type.saved_model_spec.tag)
+  else:
+    return [tf.saved_model.SERVING]
+
+
+def _is_darwin() -> bool:
+  return sys.platform == 'darwin'
+
+
+def _is_windows() -> bool:
+  return platform.system() == 'Windows' or os.name == 'nt'
+
+
+def _is_cygwin() -> bool:
+  return platform.system().startswith('CYGWIN_NT')
+
+
+class _Clock(object):
+
+  def get_current_time_in_microseconds(self) -> int:
+    return int(time.time() * _SECOND_TO_MICROSECOND)
+
+
+class _FineGrainedClock(_Clock):
+
+  def get_current_time_in_microseconds(self) -> int:
+    return int(
+        time.clock_gettime_ns(time.CLOCK_REALTIME) /  # pytype: disable=module-attr
+        _MICROSECOND_TO_NANOSECOND)
+
+
+class _ClockFactory(object):
+
+  @staticmethod
+  def make_clock() -> _Clock:
+    if (hasattr(time, 'clock_gettime_ns') and not _is_windows()
+        and not _is_cygwin()):
+      return _FineGrainedClock()
+    return _Clock()
diff --git a/tfx_bsl/beam/run_inference_arrow_test.py b/tfx_bsl/beam/run_inference_arrow_test.py
new file mode 100644
index 00000000..a4eed521
--- /dev/null
+++ b/tfx_bsl/beam/run_inference_arrow_test.py
@@ -0,0 +1,581 @@
+# Copyright 2019 Google LLC. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for tfx_bsl.run_inference."""
+
+from __future__ import absolute_import
+from __future__ import division
+# Standard __future__ imports
+from __future__ import print_function
+
+import json
+import os
+try:
+  import unittest.mock as mock
+except ImportError:
+  import mock
+
+import apache_beam as beam
+from apache_beam.metrics.metric import MetricsFilter
+from apache_beam.testing.util import assert_that
+from apache_beam.testing.util import equal_to
+from googleapiclient import discovery
+from googleapiclient import http
+from six.moves import http_client
+import tensorflow as tf
+from tfx_bsl.beam import run_inference
+from tfx_bsl.public.proto import model_spec_pb2
+
+from google.protobuf import text_format
+
+from tensorflow_serving.apis import prediction_log_pb2
+
+
+class RunInferenceFixture(tf.test.TestCase):
+
+  def setUp(self):
+    super(RunInferenceFixture, self).setUp()
+    self._predict_examples = [
+        text_format.Parse(
+            """
+              features {
+                feature { key: "input1" value { float_list { value: 0 }}}
+              }
+              """, tf.train.Example()),
+    ]
+
+  def _get_output_data_dir(self, sub_dir=None):
+    test_dir = self._testMethodName
+    path = os.path.join(
+        os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
+        test_dir)
+    if not tf.io.gfile.exists(path):
+      tf.io.gfile.makedirs(path)
+    if sub_dir is not None:
+      path = os.path.join(path, sub_dir)
+    return path
+
+  def _prepare_predict_examples(self, example_path):
+    with tf.io.TFRecordWriter(example_path) as output_file:
+      for example in self._predict_examples:
+        output_file.write(example.SerializeToString())
+
+
+ARROW_INPUT_COLUMN = '__raw_record__'
+class RunOfflineInferenceTest(RunInferenceFixture):
+  
+  def setUp(self):
+    super(RunOfflineInferenceTest, self).setUp()
+    
+    self._predict_examples = [
+        text_format.Parse(
+            """
+              features {
+                feature { key: "input1" value { float_list { value: 0 }}}
+              }
+              """, tf.train.Example()),
+        text_format.Parse(
+            """
+              features {
+                feature { key: "input1" value { float_list { value: 1 }}}
+              }
+              """, tf.train.Example()),
+    ]
+    self._multihead_examples = [
+        text_format.Parse(
+            """
+            features {
+              feature {key: "x" value { float_list { value: 0.8 }}}
+              feature {key: "y" value { float_list { value: 0.2 }}}
+            }
+            """, tf.train.Example()),
+        text_format.Parse(
+            """
+            features {
+              feature {key: "x" value { float_list { value: 0.6 }}}
+              feature {key: "y" value { float_list { value: 0.1 }}}
+            }
+            """, tf.train.Example()),
+    ]
+
+    # TODO: Ask if these example can directly transform to recordBatch
+    
+
+  def _prepare_multihead_examples(self, example_path):
+    with tf.io.TFRecordWriter(example_path) as output_file:
+      for example in self._multihead_examples:
+        output_file.write(example.SerializeToString())
+
+  def _build_predict_model(self, model_path):
+    """Exports the dummy sum predict model."""
+
+    with tf.compat.v1.Graph().as_default():
+      input_tensors = {
+          'x': tf.compat.v1.io.FixedLenFeature(
+              [1], dtype=tf.float32, default_value=0)
+      }
+      serving_receiver = (
+          tf.compat.v1.estimator.export.build_parsing_serving_input_receiver_fn(
+              input_tensors)())
+      output_tensors = {'y': serving_receiver.features['x'] * 2}
+      sess = tf.compat.v1.Session()
+      sess.run(tf.compat.v1.initializers.global_variables())
+      signature_def = tf.compat.v1.estimator.export.PredictOutput(
+          output_tensors).as_signature_def(serving_receiver.receiver_tensors)
+      builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(model_path)
+      builder.add_meta_graph_and_variables(
+          sess, [tf.compat.v1.saved_model.tag_constants.SERVING],
+          signature_def_map={
+              tf.compat.v1.saved_model.signature_constants
+              .DEFAULT_SERVING_SIGNATURE_DEF_KEY:
+                  signature_def,
+          })
+      builder.save()
+
+  def _build_regression_signature(self, input_tensor, output_tensor):
+    """Helper function for building a regression SignatureDef."""
+    input_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info(
+        input_tensor)
+    signature_inputs = {
+        tf.compat.v1.saved_model.signature_constants.REGRESS_INPUTS:
+            input_tensor_info
+    }
+    output_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info(
+        output_tensor)
+    signature_outputs = {
+        tf.compat.v1.saved_model.signature_constants.REGRESS_OUTPUTS:
+            output_tensor_info
+    }
+    return tf.compat.v1.saved_model.signature_def_utils.build_signature_def(
+        signature_inputs, signature_outputs,
+        tf.compat.v1.saved_model.signature_constants.REGRESS_METHOD_NAME)
+
+  def _build_classification_signature(self, input_tensor, scores_tensor):
+    """Helper function for building a classification SignatureDef."""
+    input_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info(
+        input_tensor)
+    signature_inputs = {
+        tf.compat.v1.saved_model.signature_constants.CLASSIFY_INPUTS:
+            input_tensor_info
+    }
+    output_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info(
+        scores_tensor)
+    signature_outputs = {
+        tf.compat.v1.saved_model.signature_constants.CLASSIFY_OUTPUT_SCORES:
+            output_tensor_info
+    }
+    return tf.compat.v1.saved_model.signature_def_utils.build_signature_def(
+        signature_inputs, signature_outputs,
+        tf.compat.v1.saved_model.signature_constants.CLASSIFY_METHOD_NAME)
+
+  def _build_multihead_model(self, model_path):
+    with tf.compat.v1.Graph().as_default():
+      input_example = tf.compat.v1.placeholder(
+          tf.string, name='input_examples_tensor')
+      config = {
+          'x': tf.compat.v1.io.FixedLenFeature(
+              [1], dtype=tf.float32, default_value=0),
+          'y': tf.compat.v1.io.FixedLenFeature(
+              [1], dtype=tf.float32, default_value=0),
+      }
+      features = tf.compat.v1.parse_example(input_example, config)
+      x = features['x']
+      y = features['y']
+      sum_pred = x + y
+      diff_pred = tf.abs(x - y)
+      sess = tf.compat.v1.Session()
+      sess.run(tf.compat.v1.initializers.global_variables())
+      signature_def_map = {
+          'regress_diff':
+              self._build_regression_signature(input_example, diff_pred),
+          'classify_sum':
+              self._build_classification_signature(input_example, sum_pred),
+          tf.compat.v1.saved_model.signature_constants
+          .DEFAULT_SERVING_SIGNATURE_DEF_KEY:
+              self._build_regression_signature(input_example, sum_pred)
+      }
+      builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(model_path)
+      builder.add_meta_graph_and_variables(
+          sess, [tf.compat.v1.saved_model.tag_constants.SERVING],
+          signature_def_map=signature_def_map)
+      builder.save()
+
+  def _run_inference_with_beam(self, example_path, inference_spec_type,
+                               prediction_log_path):
+    with beam.Pipeline() as pipeline:
+      _ = (
+          pipeline
+          | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
+          | 'ParseExamples' >> beam.Map(tf.train.Example.FromString)
+          |
+          'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type)
+          | 'WritePredictions' >> beam.io.WriteToTFRecord(
+              prediction_log_path,
+              coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog)))
+
+  def _get_results(self, prediction_log_path):
+    results = []
+    for f in tf.io.gfile.glob(prediction_log_path + '-?????-of-?????'):
+      record_iterator = tf.compat.v1.io.tf_record_iterator(path=f)
+      for record_string in record_iterator:
+        prediction_log = prediction_log_pb2.PredictionLog()
+        prediction_log.MergeFromString(record_string)
+        results.append(prediction_log)
+    return results
+
+  def testModelPathInvalid(self):
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_predict_examples(example_path)
+    prediction_log_path = self._get_output_data_dir('predictions')
+    with self.assertRaisesRegexp(IOError, 'SavedModel file does not exist.*'):
+      self._run_inference_with_beam(
+          example_path,
+          model_spec_pb2.InferenceSpecType(
+              saved_model_spec=model_spec_pb2.SavedModelSpec(
+                  model_path=self._get_output_data_dir())), prediction_log_path)
+
+  def testEstimatorModelPredict(self):
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_predict_examples(example_path)
+    model_path = self._get_output_data_dir('model')
+    self._build_predict_model(model_path)
+    prediction_log_path = self._get_output_data_dir('predictions')
+    self._run_inference_with_beam(
+        example_path,
+        model_spec_pb2.InferenceSpecType(
+            saved_model_spec=model_spec_pb2.SavedModelSpec(
+                model_path=model_path)), prediction_log_path)
+
+    results = self._get_results(prediction_log_path)
+    self.assertLen(results, 2)
+    self.assertEqual(
+        results[0].predict_log.request.inputs[
+            run_inference._DEFAULT_INPUT_KEY].string_val[0],
+        self._predict_examples[0].SerializeToString())
+    self.assertEqual(results[0].predict_log.response.outputs['y'].dtype,
+                     tf.float32)
+    self.assertLen(
+        results[0].predict_log.response.outputs['y'].tensor_shape.dim, 2)
+    self.assertEqual(
+        results[0].predict_log.response.outputs['y'].tensor_shape.dim[0].size,
+        1)
+    self.assertEqual(
+        results[0].predict_log.response.outputs['y'].tensor_shape.dim[1].size,
+        1)
+
+  def testClassifyModel(self):
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_multihead_examples(example_path)
+    model_path = self._get_output_data_dir('model')
+    self._build_multihead_model(model_path)
+    prediction_log_path = self._get_output_data_dir('predictions')
+    self._run_inference_with_beam(
+        example_path,
+        model_spec_pb2.InferenceSpecType(
+            saved_model_spec=model_spec_pb2.SavedModelSpec(
+                model_path=model_path, signature_name=['classify_sum'])),
+        prediction_log_path)
+
+    results = self._get_results(prediction_log_path)
+    self.assertLen(results, 2)
+    classify_log = results[0].classify_log
+    self.assertLen(classify_log.request.input.example_list.examples, 1)
+    self.assertEqual(classify_log.request.input.example_list.examples[0],
+                     self._multihead_examples[0])
+    self.assertLen(classify_log.response.result.classifications, 1)
+    self.assertLen(classify_log.response.result.classifications[0].classes, 1)
+    self.assertAlmostEqual(
+        classify_log.response.result.classifications[0].classes[0].score, 1.0)
+
+  def testRegressModel(self):
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_multihead_examples(example_path)
+    model_path = self._get_output_data_dir('model')
+    self._build_multihead_model(model_path)
+    prediction_log_path = self._get_output_data_dir('predictions')
+    self._run_inference_with_beam(
+        example_path,
+        model_spec_pb2.InferenceSpecType(
+            saved_model_spec=model_spec_pb2.SavedModelSpec(
+                model_path=model_path, signature_name=['regress_diff'])),
+        prediction_log_path)
+
+    results = self._get_results(prediction_log_path)
+    self.assertLen(results, 2)
+    regress_log = results[0].regress_log
+    self.assertLen(regress_log.request.input.example_list.examples, 1)
+    self.assertEqual(regress_log.request.input.example_list.examples[0],
+                     self._multihead_examples[0])
+    self.assertLen(regress_log.response.result.regressions, 1)
+    self.assertAlmostEqual(regress_log.response.result.regressions[0].value,
+                           0.6)
+
+  def testMultiInferenceModel(self):
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_multihead_examples(example_path)
+    model_path = self._get_output_data_dir('model')
+    self._build_multihead_model(model_path)
+    prediction_log_path = self._get_output_data_dir('predictions')
+    self._run_inference_with_beam(
+        example_path,
+        model_spec_pb2.InferenceSpecType(
+            saved_model_spec=model_spec_pb2.SavedModelSpec(
+                model_path=model_path,
+                signature_name=['regress_diff', 'classify_sum'])),
+        prediction_log_path)
+    results = self._get_results(prediction_log_path)
+    self.assertLen(results, 2)
+    multi_inference_log = results[0].multi_inference_log
+    self.assertLen(multi_inference_log.request.input.example_list.examples, 1)
+    self.assertEqual(multi_inference_log.request.input.example_list.examples[0],
+                     self._multihead_examples[0])
+    self.assertLen(multi_inference_log.response.results, 2)
+    signature_names = []
+    for result in multi_inference_log.response.results:
+      signature_names.append(result.model_spec.signature_name)
+    self.assertIn('regress_diff', signature_names)
+    self.assertIn('classify_sum', signature_names)
+    result = multi_inference_log.response.results[0]
+    self.assertEqual(result.model_spec.signature_name, 'regress_diff')
+    self.assertLen(result.regression_result.regressions, 1)
+    self.assertAlmostEqual(result.regression_result.regressions[0].value, 0.6)
+    result = multi_inference_log.response.results[1]
+    self.assertEqual(result.model_spec.signature_name, 'classify_sum')
+    self.assertLen(result.classification_result.classifications, 1)
+    self.assertLen(result.classification_result.classifications[0].classes, 1)
+    self.assertAlmostEqual(
+        result.classification_result.classifications[0].classes[0].score, 1.0)
+
+  def testKerasModelPredict(self):
+    inputs = tf.keras.Input(shape=(1,), name='input1')
+    output1 = tf.keras.layers.Dense(
+        1, activation=tf.nn.sigmoid, name='output1')(
+            inputs)
+    output2 = tf.keras.layers.Dense(
+        1, activation=tf.nn.sigmoid, name='output2')(
+            inputs)
+    inference_model = tf.keras.models.Model(inputs, [output1, output2])
+
+    class TestKerasModel(tf.keras.Model):
+
+      def __init__(self, inference_model):
+        super(TestKerasModel, self).__init__(name='test_keras_model')
+        self.inference_model = inference_model
+
+      @tf.function(input_signature=[
+          tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs')
+      ])
+      def call(self, serialized_example):
+        features = {
+            'input1':
+                tf.compat.v1.io.FixedLenFeature([1],
+                                                dtype=tf.float32,
+                                                default_value=0)
+        }
+        input_tensor_dict = tf.io.parse_example(serialized_example, features)
+        return inference_model(input_tensor_dict['input1'])
+
+    model = TestKerasModel(inference_model)
+    model.compile(
+        optimizer=tf.keras.optimizers.Adam(lr=.001),
+        loss=tf.keras.losses.binary_crossentropy,
+        metrics=['accuracy'])
+
+    model_path = self._get_output_data_dir('model')
+    tf.compat.v1.keras.experimental.export_saved_model(
+        model, model_path, serving_only=True)
+
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_predict_examples(example_path)
+    prediction_log_path = self._get_output_data_dir('predictions')
+    self._run_inference_with_beam(
+        example_path,
+        model_spec_pb2.InferenceSpecType(
+            saved_model_spec=model_spec_pb2.SavedModelSpec(
+                model_path=model_path)), prediction_log_path)
+
+    results = self._get_results(prediction_log_path)
+    self.assertLen(results, 2)
+
+  def testTelemetry(self):
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_multihead_examples(example_path)
+    model_path = self._get_output_data_dir('model')
+    self._build_multihead_model(model_path)
+    inference_spec_type = model_spec_pb2.InferenceSpecType(
+        saved_model_spec=model_spec_pb2.SavedModelSpec(
+            model_path=model_path, signature_name=['classify_sum']))
+    pipeline = beam.Pipeline()
+    _ = (
+        pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
+        | 'ParseExamples' >> beam.Map(tf.train.Example.FromString)
+        | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type))
+    run_result = pipeline.run()
+    run_result.wait_until_finish()
+
+    num_inferences = run_result.metrics().query(
+        MetricsFilter().with_name('num_inferences'))
+    self.assertTrue(num_inferences['counters'])
+    self.assertEqual(num_inferences['counters'][0].result, 2)
+    num_instances = run_result.metrics().query(
+        MetricsFilter().with_name('num_instances'))
+    self.assertTrue(num_instances['counters'])
+    self.assertEqual(num_instances['counters'][0].result, 2)
+    inference_request_batch_size = run_result.metrics().query(
+        MetricsFilter().with_name('inference_request_batch_size'))
+    self.assertTrue(inference_request_batch_size['distributions'])
+    self.assertEqual(
+        inference_request_batch_size['distributions'][0].result.sum, 2)
+    inference_request_batch_byte_size = run_result.metrics().query(
+        MetricsFilter().with_name('inference_request_batch_byte_size'))
+    self.assertTrue(inference_request_batch_byte_size['distributions'])
+    self.assertEqual(
+        inference_request_batch_byte_size['distributions'][0].result.sum,
+        sum(element.ByteSize() for element in self._multihead_examples))
+    inference_batch_latency_micro_secs = run_result.metrics().query(
+        MetricsFilter().with_name('inference_batch_latency_micro_secs'))
+    self.assertTrue(inference_batch_latency_micro_secs['distributions'])
+    self.assertGreaterEqual(
+        inference_batch_latency_micro_secs['distributions'][0].result.sum, 0)
+    load_model_latency_milli_secs = run_result.metrics().query(
+        MetricsFilter().with_name('load_model_latency_milli_secs'))
+    self.assertTrue(load_model_latency_milli_secs['distributions'])
+    self.assertGreaterEqual(
+        load_model_latency_milli_secs['distributions'][0].result.sum, 0)
+
+
+class RunRemoteInferenceTest(RunInferenceFixture):
+
+  def setUp(self):
+    super(RunRemoteInferenceTest, self).setUp()
+    self.example_path = self._get_output_data_dir('example')
+    self._prepare_predict_examples(self.example_path)
+    # This is from https://ml.googleapis.com/$discovery/rest?version=v1.
+    self._discovery_testdata_dir = os.path.join(
+        os.path.join(os.path.dirname(__file__), 'testdata'),
+        'ml_discovery.json')
+
+  @staticmethod
+  def _make_response_body(content, successful):
+    if successful:
+      response_dict = {'predictions': content}
+    else:
+      response_dict = {'error': content}
+    return json.dumps(response_dict)
+
+  def _set_up_pipeline(self, inference_spec_type):
+    self.pipeline = beam.Pipeline()
+    self.pcoll = (
+        self.pipeline
+        | 'ReadExamples' >> beam.io.ReadFromTFRecord(self.example_path)
+        | 'ParseExamples' >> beam.Map(tf.train.Example.FromString)
+        | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type))
+
+  def _run_inference_with_beam(self):
+    self.pipeline_result = self.pipeline.run()
+    self.pipeline_result.wait_until_finish()
+
+  def test_model_predict(self):
+    predictions = [{'output_1': [0.901], 'output_2': [0.997]}]
+    builder = http.RequestMockBuilder({
+        'ml.projects.predict':
+            (None, self._make_response_body(predictions, successful=True))
+    })
+    resource = discovery.build(
+        'ml',
+        'v1',
+        http=http.HttpMock(self._discovery_testdata_dir,
+                           {'status': http_client.OK}),
+        requestBuilder=builder)
+    with mock.patch('googleapiclient.discovery.' 'build') as response_mock:
+      response_mock.side_effect = lambda service, version: resource
+      inference_spec_type = model_spec_pb2.InferenceSpecType(
+          ai_platform_prediction_model_spec=model_spec_pb2
+          .AIPlatformPredictionModelSpec(
+              project_id='test-project',
+              model_name='test-model',
+          ))
+
+      prediction_log = prediction_log_pb2.PredictionLog()
+      prediction_log.predict_log.response.outputs['output_1'].CopyFrom(
+          tf.make_tensor_proto(values=[0.901], dtype=tf.double, shape=(1, 1)))
+      prediction_log.predict_log.response.outputs['output_2'].CopyFrom(
+          tf.make_tensor_proto(values=[0.997], dtype=tf.double, shape=(1, 1)))
+
+      self._set_up_pipeline(inference_spec_type)
+      assert_that(self.pcoll, equal_to([prediction_log]))
+      self._run_inference_with_beam()
+
+  def test_exception_raised_when_response_body_contains_error_entry(self):
+    error_msg = 'Base64 decode failed.'
+    builder = http.RequestMockBuilder({
+        'ml.projects.predict':
+            (None, self._make_response_body(error_msg, successful=False))
+    })
+    resource = discovery.build(
+        'ml',
+        'v1',
+        http=http.HttpMock(self._discovery_testdata_dir,
+                           {'status': http_client.OK}),
+        requestBuilder=builder)
+    with mock.patch('googleapiclient.discovery.' 'build') as response_mock:
+      response_mock.side_effect = lambda service, version: resource
+      inference_spec_type = model_spec_pb2.InferenceSpecType(
+          ai_platform_prediction_model_spec=model_spec_pb2
+          .AIPlatformPredictionModelSpec(
+              project_id='test-project',
+              model_name='test-model',
+          ))
+
+      try:
+        self._set_up_pipeline(inference_spec_type)
+        self._run_inference_with_beam()
+      except ValueError as exc:
+        actual_error_msg = str(exc)
+        self.assertTrue(actual_error_msg.startswith(error_msg))
+      else:
+        self.fail('Test was expected to throw ValueError exception')
+
+  def test_exception_raised_when_project_id_is_empty(self):
+    inference_spec_type = model_spec_pb2.InferenceSpecType(
+        ai_platform_prediction_model_spec=model_spec_pb2
+        .AIPlatformPredictionModelSpec(model_name='test-model',))
+
+    with self.assertRaises(ValueError):
+      self._set_up_pipeline(inference_spec_type)
+      self._run_inference_with_beam()
+
+  def test_request_body_with_binary_data(self):
+    example = text_format.Parse(
+        """
+      features {
+        feature { key: "x_bytes" value { bytes_list { value: ["ASa8asdf"] }}}
+        feature { key: "x" value { bytes_list { value: "JLK7ljk3" }}}
+        feature { key: "y" value { int64_list { value: [1, 2] }}}
+      }
+      """, tf.train.Example())
+    result = list(
+        run_inference._RemotePredictDoFn._prepare_instances([example]))
+    self.assertEqual([
+        {
+            'x_bytes': {
+                'b64': 'QVNhOGFzZGY='
+            },
+            'x': 'JLK7ljk3',
+            'y': [1, 2]
+        },
+    ], result)
+
+
+if __name__ == '__main__':
+  tf.test.main()

From 1d553019cc0d48409553716f3e3389825d633203 Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Wed, 27 May 2020 11:57:39 -0400
Subject: [PATCH 2/8] make master the same as before for comparison

---
 tfx_bsl/beam/run_inference_arrow.py      | 72 ++++++++++++++----------
 tfx_bsl/beam/run_inference_arrow_test.py |  6 +-
 2 files changed, 43 insertions(+), 35 deletions(-)

diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py
index 316b65a5..98e45148 100644
--- a/tfx_bsl/beam/run_inference_arrow.py
+++ b/tfx_bsl/beam/run_inference_arrow.py
@@ -32,7 +32,6 @@
 
 from absl import logging
 import apache_beam as beam
-import pyarrow as pa
 from apache_beam.options.pipeline_options import GoogleCloudOptions
 from apache_beam.options.pipeline_options import PipelineOptions
 from apache_beam.utils import retry
@@ -80,7 +79,6 @@
 _MetaGraphDef = Any
 _SavedModel = Any
 
-# TODO (Maxine): what is this?
 _BulkInferResult = Union[prediction_log_pb2.PredictLog,
                          Tuple[tf.train.Example, regression_pb2.Regression],
                          Tuple[tf.train.Example,
@@ -97,11 +95,9 @@ class OperationType(object):
   MULTIHEAD = 'MULTIHEAD'
 
 
-# TODO (Me): pTransform from examples/sequence example here
-
-# TODO (Me): Union[bytes, pa.RecordBatch]?
 @beam.ptransform_fn
-@beam.typehints.with_input_types(pa.RecordBatch)
+@beam.typehints.with_input_types(Union[tf.train.Example,
+                                       tf.train.SequenceExample])
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def RunInferenceImpl(  # pylint: disable=invalid-name
     examples: beam.pvalue.PCollection,
@@ -110,7 +106,7 @@ def RunInferenceImpl(  # pylint: disable=invalid-name
   """Implementation of RunInference API.
 
   Args:
-    examples: A PCollection containing RecordBatch.
+    examples: A PCollection containing examples.
     inference_spec_type: Model inference endpoint.
 
   Returns:
@@ -144,7 +140,8 @@ def RunInferenceImpl(  # pylint: disable=invalid-name
 
 
 @beam.ptransform_fn
-@beam.typehints.with_input_types(pa.RecordBatch)
+@beam.typehints.with_input_types(Union[tf.train.Example,
+                                       tf.train.SequenceExample])
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def _Classify(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
               inference_spec_type: model_spec_pb2.InferenceSpecType):
@@ -160,7 +157,8 @@ def _Classify(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
 
 
 @beam.ptransform_fn
-@beam.typehints.with_input_types(pa.RecordBatch)
+@beam.typehints.with_input_types(Union[tf.train.Example,
+                                       tf.train.SequenceExample])
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def _Regress(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
              inference_spec_type: model_spec_pb2.InferenceSpecType):
@@ -176,7 +174,8 @@ def _Regress(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
 
 
 @beam.ptransform_fn
-@beam.typehints.with_input_types(pa.RecordBatch)
+@beam.typehints.with_input_types(Union[tf.train.Example,
+                                       tf.train.SequenceExample])
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def _Predict(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
              inference_spec_type: model_spec_pb2.InferenceSpecType):
@@ -197,7 +196,8 @@ def _Predict(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
 
 
 @beam.ptransform_fn
-@beam.typehints.with_input_types(pa.RecordBatch)
+@beam.typehints.with_input_types(Union[tf.train.Example,
+                                       tf.train.SequenceExample])
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def _MultiInference(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
                     inference_spec_type: model_spec_pb2.InferenceSpecType):
@@ -261,7 +261,9 @@ def update_metrics_with_cache(self):
         self._model_byte_size.update(self.model_byte_size_cache)
         self.model_byte_size_cache = None
 
-    def update(self, elements: List[str], latency_micro_secs: int) -> None:
+    def update(self, elements: List[Union[tf.train.Example,
+                                          tf.train.SequenceExample]],
+               latency_micro_secs: int) -> None:
       self._inference_batch_latency_micro_secs.update(latency_micro_secs)
       self._num_instances.inc(len(elements))
       self._inference_counter.inc(len(elements))
@@ -278,14 +280,11 @@ def setup(self):
     self._clock = _ClockFactory.make_clock()
 
   def process(
-      self, elements: pa.RecordBatch
+      self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]]
   ) -> Iterable[Any]:
     batch_start_time = self._clock.get_current_time_in_microseconds()
-    # TODO (Maxine): set ARROW_INPUT_COLUMN or take as a parameter
-    # extract record batch from here, assuming first column
-    serialized_examples = elements.column(0)
-    outputs = self.run_inference(serialized_examples)
-    result = self._post_process(serialized_examples, outputs)
+    outputs = self.run_inference(elements)
+    result = self._post_process(elements, outputs)
     self._metrics_collector.update(
         elements,
         self._clock.get_current_time_in_microseconds() - batch_start_time)
@@ -296,12 +295,14 @@ def finish_bundle(self):
 
   @abc.abstractmethod
   def run_inference(
-    self, elements: List[str]
+      self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]]
   ) -> Union[Mapping[Text, np.ndarray], Sequence[Mapping[Text, Any]]]:
     raise NotImplementedError
 
   @abc.abstractmethod
-  def _post_process(self, elements: List[str], outputs: Any) -> Iterable[Any]:
+  def _post_process(self, elements: List[Union[tf.train.Example,
+                                               tf.train.SequenceExample]],
+                    outputs: Any) -> Iterable[Any]:
     raise NotImplementedError
 
 
@@ -320,8 +321,9 @@ def _retry_on_unavailable_and_resource_error_filter(exception: Exception):
   return (isinstance(exception, googleapiclient.errors.HttpError) and
           exception.resp.status in (503, 429))
 
-# TODO (Maxine): change all example to serialized
-@beam.typehints.with_input_types(List[str])
+
+@beam.typehints.with_input_types(List[Union[tf.train.Example,
+                                            tf.train.SequenceExample]])
 # Using output typehints triggers NotImplementedError('BEAM-2717)' on
 # streaming mode on Dataflow runner.
 # TODO(b/151468119): Consider to re-batch with online serving request size
@@ -578,15 +580,22 @@ def _has_tpu_tag(self) -> bool:
     return (len(self._tags) == 2 and tf.saved_model.SERVING in self._tags and
             tf.saved_model.TPU in self._tags)
 
-  def run_inference(self, elements: List[str]) -> Mapping[Text, np.ndarray]:
+  def run_inference(
+      self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]]
+  ) -> Mapping[Text, np.ndarray]:
     self._check_elements(elements)
     outputs = self._run_tf_operations(elements)
     return outputs
 
-  def _run_tf_operations(self, elements: List[str]) -> Mapping[Text, np.ndarray]:
+  def _run_tf_operations(
+      self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]]
+  ) -> Mapping[Text, np.ndarray]:
+    input_values = []
+    for element in elements:
+      input_values.append(element.SerializeToString())
     result = self._session.run(
         self._io_tensor_spec.output_alias_tensor_names,
-        feed_dict={self._io_tensor_spec.input_tensor_name: elements})
+        feed_dict={self._io_tensor_spec.input_tensor_name: input_values})
     if len(result) != len(self._io_tensor_spec.output_alias_tensor_names):
       raise RuntimeError('Output length does not match fetches')
     return result
@@ -599,7 +608,8 @@ def _check_elements(
     raise NotImplementedError
 
 
-@beam.typehints.with_input_types(List[str])
+@beam.typehints.with_input_types(List[Union[tf.train.Example,
+                                            tf.train.SequenceExample]])
 @beam.typehints.with_output_types(Tuple[tf.train.Example,
                                         classification_pb2.Classifications])
 class _BatchClassifyDoFn(_BaseBatchSavedModelDoFn):
@@ -629,7 +639,8 @@ def _post_process(
     return zip(elements, classifications)
 
 
-@beam.typehints.with_input_types(List[str])
+@beam.typehints.with_input_types(List[Union[tf.train.Example,
+                                            tf.train.SequenceExample]])
 @beam.typehints.with_output_types(Tuple[tf.train.Example,
                                         regression_pb2.Regression])
 class _BatchRegressDoFn(_BaseBatchSavedModelDoFn):
@@ -652,7 +663,8 @@ def _post_process(
     return zip(elements, regressions)
 
 
-@beam.typehints.with_input_types(List[str])
+@beam.typehints.with_input_types(List[Union[tf.train.Example,
+                                            tf.train.SequenceExample]])
 @beam.typehints.with_output_types(prediction_log_pb2.PredictLog)
 class _BatchPredictDoFn(_BaseBatchSavedModelDoFn):
   """A DoFn that runs inference on predict model."""
@@ -710,7 +722,8 @@ def _post_process(
     return result
 
 
-@beam.typehints.with_input_types(List[str])
+@beam.typehints.with_input_types(List[Union[tf.train.Example,
+                                            tf.train.SequenceExample]])
 @beam.typehints.with_output_types(Tuple[tf.train.Example,
                                         inference_pb2.MultiInferenceResponse])
 class _BatchMultiInferenceDoFn(_BaseBatchSavedModelDoFn):
@@ -830,7 +843,6 @@ def process(
     yield result
 
 
-# TODO (Maxine): moving these into class?
 def _post_process_classify(
     output_alias_tensor_names: Mapping[Text, Text],
     elements: Sequence[tf.train.Example], outputs: Mapping[Text, np.ndarray]
diff --git a/tfx_bsl/beam/run_inference_arrow_test.py b/tfx_bsl/beam/run_inference_arrow_test.py
index a4eed521..ce9ac4d0 100644
--- a/tfx_bsl/beam/run_inference_arrow_test.py
+++ b/tfx_bsl/beam/run_inference_arrow_test.py
@@ -71,12 +71,10 @@ def _prepare_predict_examples(self, example_path):
         output_file.write(example.SerializeToString())
 
 
-ARROW_INPUT_COLUMN = '__raw_record__'
 class RunOfflineInferenceTest(RunInferenceFixture):
-  
+
   def setUp(self):
     super(RunOfflineInferenceTest, self).setUp()
-    
     self._predict_examples = [
         text_format.Parse(
             """
@@ -108,8 +106,6 @@ def setUp(self):
             """, tf.train.Example()),
     ]
 
-    # TODO: Ask if these example can directly transform to recordBatch
-    
 
   def _prepare_multihead_examples(self, example_path):
     with tf.io.TFRecordWriter(example_path) as output_file:

From de87e4c2a98daa27acb2253d8fd1b24c441a24eb Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Mon, 20 Jul 2020 15:34:42 -0400
Subject: [PATCH 3/8] add benchmarks for the 2 modules

---
 .../run_inference_arrow_benchmark.py          | 87 +++++++++++++++++++
 .../benchmarks/run_inference_benchmark.py     | 84 ++++++++++++++++++
 2 files changed, 171 insertions(+)
 create mode 100644 tfx_bsl/beam/benchmarks/run_inference_arrow_benchmark.py
 create mode 100644 tfx_bsl/beam/benchmarks/run_inference_benchmark.py

diff --git a/tfx_bsl/beam/benchmarks/run_inference_arrow_benchmark.py b/tfx_bsl/beam/benchmarks/run_inference_arrow_benchmark.py
new file mode 100644
index 00000000..3e57944e
--- /dev/null
+++ b/tfx_bsl/beam/benchmarks/run_inference_arrow_benchmark.py
@@ -0,0 +1,87 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Script to use run_inference_arrow from command line
+Below is a complete command line for running this script
+for benchmarks
+
+python3 run_inference_arrow_benchemark.py \
+PATH_TO_MODEL \
+PATH_TO_DATA \
+--output gs://YOUR_BUCKET/results/output \
+--project YOUR_PROJECT \
+--runner DataflowRunner \
+--temp_location gs://YOUR_BUCKET/temp \
+--job_name run-inference-arrow-metrics \
+--region us-central1
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import apache_beam as beam
+from tfx_bsl.tfxio import raw_tf_record
+from tfx_bsl.beam import run_inference_arrow
+from tfx_bsl.public.proto import model_spec_pb2
+from apache_beam.options.pipeline_options import PipelineOptions
+from apache_beam.options.pipeline_options import SetupOptions
+
+
+def run(argv=None, save_main_session=True):
+    """Main entry point; defines and runs the user_score pipeline."""
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        'model_path',
+        type=str,
+        help='The path to input model')
+    parser.add_argument(
+        'input',
+        type=str,
+        help='Path to the data file(s) containing data.')
+    parser.add_argument(
+        '--output',
+        type=str,
+        required=True,
+        help='Path to the output file(s).')
+
+    args, pipeline_args = parser.parse_known_args(argv)
+    options = PipelineOptions(pipeline_args)
+
+    setup_options = options.view_as(SetupOptions)
+    # Path of the wheel file tfx-bsl
+    setup_options.extra_packages = ['./tfx-bsl/dist/tfx_bsl-0.23.0.dev0-cp37-cp37m-linux_x86_64.whl']
+    setup_options.save_main_session = save_main_session
+
+    def get_saved_model_spec(model_path):
+        '''returns an InferenceSpecType object for a saved model path'''
+        return model_spec_pb2.InferenceSpecType(
+            saved_model_spec=model_spec_pb2.SavedModelSpec(
+                model_path=model_path))
+
+    inference_spec_type = get_saved_model_spec(args.model_path)
+    converter = raw_tf_record.RawTfRecordTFXIO(
+        args.input, raw_record_column_name='__RAW_RECORD__')
+
+    with beam.Pipeline(options=options) as p:
+        (p
+            | "GetRawRecordAndConvertToRecordBatch" >> converter.BeamSource()
+            | 'RunInferenceImpl' >> run_inference_arrow.RunInferenceImpl(
+                inference_spec_type))
+
+
+if __name__ == '__main__':
+    run()
diff --git a/tfx_bsl/beam/benchmarks/run_inference_benchmark.py b/tfx_bsl/beam/benchmarks/run_inference_benchmark.py
new file mode 100644
index 00000000..d42ab62d
--- /dev/null
+++ b/tfx_bsl/beam/benchmarks/run_inference_benchmark.py
@@ -0,0 +1,84 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Script to use run_inference from command line
+Below is a complete command line for running this script
+for benchmarks
+
+python3 run_inference_benchemark.py \
+PATH_TO_MODEL \
+PATH_TO_DATA \
+--output gs://YOUR_BUCKET/results/output \
+--project YOUR_PROJECT \
+--runner DataflowRunner \
+--temp_location gs://YOUR_BUCKET/temp \
+--job_name run-inference-metrics \
+--region us-central1
+"""
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import apache_beam as beam
+from tfx_bsl.beam import run_inference
+from tfx_bsl.public.proto import model_spec_pb2
+from apache_beam.options.pipeline_options import PipelineOptions
+from apache_beam.options.pipeline_options import SetupOptions
+
+
+def run(argv=None, save_main_session=True):
+    """Main entry point; defines and runs the user_score pipeline."""
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        'model_path',
+        type=str,
+        help='The path to input model')
+    parser.add_argument(
+        'input',
+        type=str,
+        help='Path to the data file(s) containing game data.')
+    parser.add_argument(
+        '--output',
+        type=str,
+        required=True,
+        help='Path to the output file(s).')
+
+    args, pipeline_args = parser.parse_known_args(argv)
+    options = PipelineOptions(pipeline_args)
+
+    setup_options = options.view_as(SetupOptions)
+    # Path of the wheel file tfx-bsl
+    setup_options.extra_packages = ['./tfx-bsl/dist/tfx_bsl-0.23.0.dev0-cp37-cp37m-linux_x86_64.whl']
+    setup_options.save_main_session = save_main_session
+
+    def get_saved_model_spec(model_path):
+        '''returns an InferenceSpecType object for a saved model path'''
+        return model_spec_pb2.InferenceSpecType(
+            saved_model_spec=model_spec_pb2.SavedModelSpec(
+                model_path=model_path))
+
+    inference_spec_type = get_saved_model_spec(args.model_path)
+    with beam.Pipeline(options=options) as p:
+        (p
+            | 'ReadInputText' >> beam.io.ReadFromText(args.input)
+            | 'RunInferenceImpl' >> run_inference.RunInferenceImpl(
+                inference_spec_type))
+
+
+if __name__ == '__main__':
+    run()

From 265dd2e6831366497346f97b0726944757e57278 Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Thu, 23 Jul 2020 18:16:10 -0400
Subject: [PATCH 4/8] remove outdated command line entry script

---
 .../run_inference_arrow_benchmark.py          | 87 -------------------
 .../benchmarks/run_inference_benchmark.py     | 11 ++-
 2 files changed, 7 insertions(+), 91 deletions(-)
 delete mode 100644 tfx_bsl/beam/benchmarks/run_inference_arrow_benchmark.py

diff --git a/tfx_bsl/beam/benchmarks/run_inference_arrow_benchmark.py b/tfx_bsl/beam/benchmarks/run_inference_arrow_benchmark.py
deleted file mode 100644
index 3e57944e..00000000
--- a/tfx_bsl/beam/benchmarks/run_inference_arrow_benchmark.py
+++ /dev/null
@@ -1,87 +0,0 @@
-# Copyright 2020 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Script to use run_inference_arrow from command line
-Below is a complete command line for running this script
-for benchmarks
-
-python3 run_inference_arrow_benchemark.py \
-PATH_TO_MODEL \
-PATH_TO_DATA \
---output gs://YOUR_BUCKET/results/output \
---project YOUR_PROJECT \
---runner DataflowRunner \
---temp_location gs://YOUR_BUCKET/temp \
---job_name run-inference-arrow-metrics \
---region us-central1
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import argparse
-import apache_beam as beam
-from tfx_bsl.tfxio import raw_tf_record
-from tfx_bsl.beam import run_inference_arrow
-from tfx_bsl.public.proto import model_spec_pb2
-from apache_beam.options.pipeline_options import PipelineOptions
-from apache_beam.options.pipeline_options import SetupOptions
-
-
-def run(argv=None, save_main_session=True):
-    """Main entry point; defines and runs the user_score pipeline."""
-    parser = argparse.ArgumentParser()
-
-    parser.add_argument(
-        'model_path',
-        type=str,
-        help='The path to input model')
-    parser.add_argument(
-        'input',
-        type=str,
-        help='Path to the data file(s) containing data.')
-    parser.add_argument(
-        '--output',
-        type=str,
-        required=True,
-        help='Path to the output file(s).')
-
-    args, pipeline_args = parser.parse_known_args(argv)
-    options = PipelineOptions(pipeline_args)
-
-    setup_options = options.view_as(SetupOptions)
-    # Path of the wheel file tfx-bsl
-    setup_options.extra_packages = ['./tfx-bsl/dist/tfx_bsl-0.23.0.dev0-cp37-cp37m-linux_x86_64.whl']
-    setup_options.save_main_session = save_main_session
-
-    def get_saved_model_spec(model_path):
-        '''returns an InferenceSpecType object for a saved model path'''
-        return model_spec_pb2.InferenceSpecType(
-            saved_model_spec=model_spec_pb2.SavedModelSpec(
-                model_path=model_path))
-
-    inference_spec_type = get_saved_model_spec(args.model_path)
-    converter = raw_tf_record.RawTfRecordTFXIO(
-        args.input, raw_record_column_name='__RAW_RECORD__')
-
-    with beam.Pipeline(options=options) as p:
-        (p
-            | "GetRawRecordAndConvertToRecordBatch" >> converter.BeamSource()
-            | 'RunInferenceImpl' >> run_inference_arrow.RunInferenceImpl(
-                inference_spec_type))
-
-
-if __name__ == '__main__':
-    run()
diff --git a/tfx_bsl/beam/benchmarks/run_inference_benchmark.py b/tfx_bsl/beam/benchmarks/run_inference_benchmark.py
index d42ab62d..133532a0 100644
--- a/tfx_bsl/beam/benchmarks/run_inference_benchmark.py
+++ b/tfx_bsl/beam/benchmarks/run_inference_benchmark.py
@@ -1,4 +1,4 @@
-# Copyright 2019 Google LLC
+# Copyright 2020 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -27,13 +27,13 @@
 --region us-central1
 """
 
-
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
 import argparse
 import apache_beam as beam
+from tfx_bsl.tfxio import raw_tf_record
 from tfx_bsl.beam import run_inference
 from tfx_bsl.public.proto import model_spec_pb2
 from apache_beam.options.pipeline_options import PipelineOptions
@@ -51,7 +51,7 @@ def run(argv=None, save_main_session=True):
     parser.add_argument(
         'input',
         type=str,
-        help='Path to the data file(s) containing game data.')
+        help='Path to the data file(s) containing data.')
     parser.add_argument(
         '--output',
         type=str,
@@ -73,9 +73,12 @@ def get_saved_model_spec(model_path):
                 model_path=model_path))
 
     inference_spec_type = get_saved_model_spec(args.model_path)
+    converter = raw_tf_record.RawTfRecordTFXIO(
+        args.input, raw_record_column_name='__RAW_RECORD__')
+
     with beam.Pipeline(options=options) as p:
         (p
-            | 'ReadInputText' >> beam.io.ReadFromText(args.input)
+            | "GetRawRecordAndConvertToRecordBatch" >> converter.BeamSource()
             | 'RunInferenceImpl' >> run_inference.RunInferenceImpl(
                 inference_spec_type))
 

From 9067ef5265c6524ed7a2707e7156758f2283f826 Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Thu, 23 Jul 2020 18:16:37 -0400
Subject: [PATCH 5/8] Delete run_inference_arrow_test.py

---
 tfx_bsl/beam/run_inference_arrow_test.py | 577 -----------------------
 1 file changed, 577 deletions(-)
 delete mode 100644 tfx_bsl/beam/run_inference_arrow_test.py

diff --git a/tfx_bsl/beam/run_inference_arrow_test.py b/tfx_bsl/beam/run_inference_arrow_test.py
deleted file mode 100644
index ce9ac4d0..00000000
--- a/tfx_bsl/beam/run_inference_arrow_test.py
+++ /dev/null
@@ -1,577 +0,0 @@
-# Copyright 2019 Google LLC. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Tests for tfx_bsl.run_inference."""
-
-from __future__ import absolute_import
-from __future__ import division
-# Standard __future__ imports
-from __future__ import print_function
-
-import json
-import os
-try:
-  import unittest.mock as mock
-except ImportError:
-  import mock
-
-import apache_beam as beam
-from apache_beam.metrics.metric import MetricsFilter
-from apache_beam.testing.util import assert_that
-from apache_beam.testing.util import equal_to
-from googleapiclient import discovery
-from googleapiclient import http
-from six.moves import http_client
-import tensorflow as tf
-from tfx_bsl.beam import run_inference
-from tfx_bsl.public.proto import model_spec_pb2
-
-from google.protobuf import text_format
-
-from tensorflow_serving.apis import prediction_log_pb2
-
-
-class RunInferenceFixture(tf.test.TestCase):
-
-  def setUp(self):
-    super(RunInferenceFixture, self).setUp()
-    self._predict_examples = [
-        text_format.Parse(
-            """
-              features {
-                feature { key: "input1" value { float_list { value: 0 }}}
-              }
-              """, tf.train.Example()),
-    ]
-
-  def _get_output_data_dir(self, sub_dir=None):
-    test_dir = self._testMethodName
-    path = os.path.join(
-        os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
-        test_dir)
-    if not tf.io.gfile.exists(path):
-      tf.io.gfile.makedirs(path)
-    if sub_dir is not None:
-      path = os.path.join(path, sub_dir)
-    return path
-
-  def _prepare_predict_examples(self, example_path):
-    with tf.io.TFRecordWriter(example_path) as output_file:
-      for example in self._predict_examples:
-        output_file.write(example.SerializeToString())
-
-
-class RunOfflineInferenceTest(RunInferenceFixture):
-
-  def setUp(self):
-    super(RunOfflineInferenceTest, self).setUp()
-    self._predict_examples = [
-        text_format.Parse(
-            """
-              features {
-                feature { key: "input1" value { float_list { value: 0 }}}
-              }
-              """, tf.train.Example()),
-        text_format.Parse(
-            """
-              features {
-                feature { key: "input1" value { float_list { value: 1 }}}
-              }
-              """, tf.train.Example()),
-    ]
-    self._multihead_examples = [
-        text_format.Parse(
-            """
-            features {
-              feature {key: "x" value { float_list { value: 0.8 }}}
-              feature {key: "y" value { float_list { value: 0.2 }}}
-            }
-            """, tf.train.Example()),
-        text_format.Parse(
-            """
-            features {
-              feature {key: "x" value { float_list { value: 0.6 }}}
-              feature {key: "y" value { float_list { value: 0.1 }}}
-            }
-            """, tf.train.Example()),
-    ]
-
-
-  def _prepare_multihead_examples(self, example_path):
-    with tf.io.TFRecordWriter(example_path) as output_file:
-      for example in self._multihead_examples:
-        output_file.write(example.SerializeToString())
-
-  def _build_predict_model(self, model_path):
-    """Exports the dummy sum predict model."""
-
-    with tf.compat.v1.Graph().as_default():
-      input_tensors = {
-          'x': tf.compat.v1.io.FixedLenFeature(
-              [1], dtype=tf.float32, default_value=0)
-      }
-      serving_receiver = (
-          tf.compat.v1.estimator.export.build_parsing_serving_input_receiver_fn(
-              input_tensors)())
-      output_tensors = {'y': serving_receiver.features['x'] * 2}
-      sess = tf.compat.v1.Session()
-      sess.run(tf.compat.v1.initializers.global_variables())
-      signature_def = tf.compat.v1.estimator.export.PredictOutput(
-          output_tensors).as_signature_def(serving_receiver.receiver_tensors)
-      builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(model_path)
-      builder.add_meta_graph_and_variables(
-          sess, [tf.compat.v1.saved_model.tag_constants.SERVING],
-          signature_def_map={
-              tf.compat.v1.saved_model.signature_constants
-              .DEFAULT_SERVING_SIGNATURE_DEF_KEY:
-                  signature_def,
-          })
-      builder.save()
-
-  def _build_regression_signature(self, input_tensor, output_tensor):
-    """Helper function for building a regression SignatureDef."""
-    input_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info(
-        input_tensor)
-    signature_inputs = {
-        tf.compat.v1.saved_model.signature_constants.REGRESS_INPUTS:
-            input_tensor_info
-    }
-    output_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info(
-        output_tensor)
-    signature_outputs = {
-        tf.compat.v1.saved_model.signature_constants.REGRESS_OUTPUTS:
-            output_tensor_info
-    }
-    return tf.compat.v1.saved_model.signature_def_utils.build_signature_def(
-        signature_inputs, signature_outputs,
-        tf.compat.v1.saved_model.signature_constants.REGRESS_METHOD_NAME)
-
-  def _build_classification_signature(self, input_tensor, scores_tensor):
-    """Helper function for building a classification SignatureDef."""
-    input_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info(
-        input_tensor)
-    signature_inputs = {
-        tf.compat.v1.saved_model.signature_constants.CLASSIFY_INPUTS:
-            input_tensor_info
-    }
-    output_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info(
-        scores_tensor)
-    signature_outputs = {
-        tf.compat.v1.saved_model.signature_constants.CLASSIFY_OUTPUT_SCORES:
-            output_tensor_info
-    }
-    return tf.compat.v1.saved_model.signature_def_utils.build_signature_def(
-        signature_inputs, signature_outputs,
-        tf.compat.v1.saved_model.signature_constants.CLASSIFY_METHOD_NAME)
-
-  def _build_multihead_model(self, model_path):
-    with tf.compat.v1.Graph().as_default():
-      input_example = tf.compat.v1.placeholder(
-          tf.string, name='input_examples_tensor')
-      config = {
-          'x': tf.compat.v1.io.FixedLenFeature(
-              [1], dtype=tf.float32, default_value=0),
-          'y': tf.compat.v1.io.FixedLenFeature(
-              [1], dtype=tf.float32, default_value=0),
-      }
-      features = tf.compat.v1.parse_example(input_example, config)
-      x = features['x']
-      y = features['y']
-      sum_pred = x + y
-      diff_pred = tf.abs(x - y)
-      sess = tf.compat.v1.Session()
-      sess.run(tf.compat.v1.initializers.global_variables())
-      signature_def_map = {
-          'regress_diff':
-              self._build_regression_signature(input_example, diff_pred),
-          'classify_sum':
-              self._build_classification_signature(input_example, sum_pred),
-          tf.compat.v1.saved_model.signature_constants
-          .DEFAULT_SERVING_SIGNATURE_DEF_KEY:
-              self._build_regression_signature(input_example, sum_pred)
-      }
-      builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(model_path)
-      builder.add_meta_graph_and_variables(
-          sess, [tf.compat.v1.saved_model.tag_constants.SERVING],
-          signature_def_map=signature_def_map)
-      builder.save()
-
-  def _run_inference_with_beam(self, example_path, inference_spec_type,
-                               prediction_log_path):
-    with beam.Pipeline() as pipeline:
-      _ = (
-          pipeline
-          | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
-          | 'ParseExamples' >> beam.Map(tf.train.Example.FromString)
-          |
-          'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type)
-          | 'WritePredictions' >> beam.io.WriteToTFRecord(
-              prediction_log_path,
-              coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog)))
-
-  def _get_results(self, prediction_log_path):
-    results = []
-    for f in tf.io.gfile.glob(prediction_log_path + '-?????-of-?????'):
-      record_iterator = tf.compat.v1.io.tf_record_iterator(path=f)
-      for record_string in record_iterator:
-        prediction_log = prediction_log_pb2.PredictionLog()
-        prediction_log.MergeFromString(record_string)
-        results.append(prediction_log)
-    return results
-
-  def testModelPathInvalid(self):
-    example_path = self._get_output_data_dir('examples')
-    self._prepare_predict_examples(example_path)
-    prediction_log_path = self._get_output_data_dir('predictions')
-    with self.assertRaisesRegexp(IOError, 'SavedModel file does not exist.*'):
-      self._run_inference_with_beam(
-          example_path,
-          model_spec_pb2.InferenceSpecType(
-              saved_model_spec=model_spec_pb2.SavedModelSpec(
-                  model_path=self._get_output_data_dir())), prediction_log_path)
-
-  def testEstimatorModelPredict(self):
-    example_path = self._get_output_data_dir('examples')
-    self._prepare_predict_examples(example_path)
-    model_path = self._get_output_data_dir('model')
-    self._build_predict_model(model_path)
-    prediction_log_path = self._get_output_data_dir('predictions')
-    self._run_inference_with_beam(
-        example_path,
-        model_spec_pb2.InferenceSpecType(
-            saved_model_spec=model_spec_pb2.SavedModelSpec(
-                model_path=model_path)), prediction_log_path)
-
-    results = self._get_results(prediction_log_path)
-    self.assertLen(results, 2)
-    self.assertEqual(
-        results[0].predict_log.request.inputs[
-            run_inference._DEFAULT_INPUT_KEY].string_val[0],
-        self._predict_examples[0].SerializeToString())
-    self.assertEqual(results[0].predict_log.response.outputs['y'].dtype,
-                     tf.float32)
-    self.assertLen(
-        results[0].predict_log.response.outputs['y'].tensor_shape.dim, 2)
-    self.assertEqual(
-        results[0].predict_log.response.outputs['y'].tensor_shape.dim[0].size,
-        1)
-    self.assertEqual(
-        results[0].predict_log.response.outputs['y'].tensor_shape.dim[1].size,
-        1)
-
-  def testClassifyModel(self):
-    example_path = self._get_output_data_dir('examples')
-    self._prepare_multihead_examples(example_path)
-    model_path = self._get_output_data_dir('model')
-    self._build_multihead_model(model_path)
-    prediction_log_path = self._get_output_data_dir('predictions')
-    self._run_inference_with_beam(
-        example_path,
-        model_spec_pb2.InferenceSpecType(
-            saved_model_spec=model_spec_pb2.SavedModelSpec(
-                model_path=model_path, signature_name=['classify_sum'])),
-        prediction_log_path)
-
-    results = self._get_results(prediction_log_path)
-    self.assertLen(results, 2)
-    classify_log = results[0].classify_log
-    self.assertLen(classify_log.request.input.example_list.examples, 1)
-    self.assertEqual(classify_log.request.input.example_list.examples[0],
-                     self._multihead_examples[0])
-    self.assertLen(classify_log.response.result.classifications, 1)
-    self.assertLen(classify_log.response.result.classifications[0].classes, 1)
-    self.assertAlmostEqual(
-        classify_log.response.result.classifications[0].classes[0].score, 1.0)
-
-  def testRegressModel(self):
-    example_path = self._get_output_data_dir('examples')
-    self._prepare_multihead_examples(example_path)
-    model_path = self._get_output_data_dir('model')
-    self._build_multihead_model(model_path)
-    prediction_log_path = self._get_output_data_dir('predictions')
-    self._run_inference_with_beam(
-        example_path,
-        model_spec_pb2.InferenceSpecType(
-            saved_model_spec=model_spec_pb2.SavedModelSpec(
-                model_path=model_path, signature_name=['regress_diff'])),
-        prediction_log_path)
-
-    results = self._get_results(prediction_log_path)
-    self.assertLen(results, 2)
-    regress_log = results[0].regress_log
-    self.assertLen(regress_log.request.input.example_list.examples, 1)
-    self.assertEqual(regress_log.request.input.example_list.examples[0],
-                     self._multihead_examples[0])
-    self.assertLen(regress_log.response.result.regressions, 1)
-    self.assertAlmostEqual(regress_log.response.result.regressions[0].value,
-                           0.6)
-
-  def testMultiInferenceModel(self):
-    example_path = self._get_output_data_dir('examples')
-    self._prepare_multihead_examples(example_path)
-    model_path = self._get_output_data_dir('model')
-    self._build_multihead_model(model_path)
-    prediction_log_path = self._get_output_data_dir('predictions')
-    self._run_inference_with_beam(
-        example_path,
-        model_spec_pb2.InferenceSpecType(
-            saved_model_spec=model_spec_pb2.SavedModelSpec(
-                model_path=model_path,
-                signature_name=['regress_diff', 'classify_sum'])),
-        prediction_log_path)
-    results = self._get_results(prediction_log_path)
-    self.assertLen(results, 2)
-    multi_inference_log = results[0].multi_inference_log
-    self.assertLen(multi_inference_log.request.input.example_list.examples, 1)
-    self.assertEqual(multi_inference_log.request.input.example_list.examples[0],
-                     self._multihead_examples[0])
-    self.assertLen(multi_inference_log.response.results, 2)
-    signature_names = []
-    for result in multi_inference_log.response.results:
-      signature_names.append(result.model_spec.signature_name)
-    self.assertIn('regress_diff', signature_names)
-    self.assertIn('classify_sum', signature_names)
-    result = multi_inference_log.response.results[0]
-    self.assertEqual(result.model_spec.signature_name, 'regress_diff')
-    self.assertLen(result.regression_result.regressions, 1)
-    self.assertAlmostEqual(result.regression_result.regressions[0].value, 0.6)
-    result = multi_inference_log.response.results[1]
-    self.assertEqual(result.model_spec.signature_name, 'classify_sum')
-    self.assertLen(result.classification_result.classifications, 1)
-    self.assertLen(result.classification_result.classifications[0].classes, 1)
-    self.assertAlmostEqual(
-        result.classification_result.classifications[0].classes[0].score, 1.0)
-
-  def testKerasModelPredict(self):
-    inputs = tf.keras.Input(shape=(1,), name='input1')
-    output1 = tf.keras.layers.Dense(
-        1, activation=tf.nn.sigmoid, name='output1')(
-            inputs)
-    output2 = tf.keras.layers.Dense(
-        1, activation=tf.nn.sigmoid, name='output2')(
-            inputs)
-    inference_model = tf.keras.models.Model(inputs, [output1, output2])
-
-    class TestKerasModel(tf.keras.Model):
-
-      def __init__(self, inference_model):
-        super(TestKerasModel, self).__init__(name='test_keras_model')
-        self.inference_model = inference_model
-
-      @tf.function(input_signature=[
-          tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs')
-      ])
-      def call(self, serialized_example):
-        features = {
-            'input1':
-                tf.compat.v1.io.FixedLenFeature([1],
-                                                dtype=tf.float32,
-                                                default_value=0)
-        }
-        input_tensor_dict = tf.io.parse_example(serialized_example, features)
-        return inference_model(input_tensor_dict['input1'])
-
-    model = TestKerasModel(inference_model)
-    model.compile(
-        optimizer=tf.keras.optimizers.Adam(lr=.001),
-        loss=tf.keras.losses.binary_crossentropy,
-        metrics=['accuracy'])
-
-    model_path = self._get_output_data_dir('model')
-    tf.compat.v1.keras.experimental.export_saved_model(
-        model, model_path, serving_only=True)
-
-    example_path = self._get_output_data_dir('examples')
-    self._prepare_predict_examples(example_path)
-    prediction_log_path = self._get_output_data_dir('predictions')
-    self._run_inference_with_beam(
-        example_path,
-        model_spec_pb2.InferenceSpecType(
-            saved_model_spec=model_spec_pb2.SavedModelSpec(
-                model_path=model_path)), prediction_log_path)
-
-    results = self._get_results(prediction_log_path)
-    self.assertLen(results, 2)
-
-  def testTelemetry(self):
-    example_path = self._get_output_data_dir('examples')
-    self._prepare_multihead_examples(example_path)
-    model_path = self._get_output_data_dir('model')
-    self._build_multihead_model(model_path)
-    inference_spec_type = model_spec_pb2.InferenceSpecType(
-        saved_model_spec=model_spec_pb2.SavedModelSpec(
-            model_path=model_path, signature_name=['classify_sum']))
-    pipeline = beam.Pipeline()
-    _ = (
-        pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
-        | 'ParseExamples' >> beam.Map(tf.train.Example.FromString)
-        | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type))
-    run_result = pipeline.run()
-    run_result.wait_until_finish()
-
-    num_inferences = run_result.metrics().query(
-        MetricsFilter().with_name('num_inferences'))
-    self.assertTrue(num_inferences['counters'])
-    self.assertEqual(num_inferences['counters'][0].result, 2)
-    num_instances = run_result.metrics().query(
-        MetricsFilter().with_name('num_instances'))
-    self.assertTrue(num_instances['counters'])
-    self.assertEqual(num_instances['counters'][0].result, 2)
-    inference_request_batch_size = run_result.metrics().query(
-        MetricsFilter().with_name('inference_request_batch_size'))
-    self.assertTrue(inference_request_batch_size['distributions'])
-    self.assertEqual(
-        inference_request_batch_size['distributions'][0].result.sum, 2)
-    inference_request_batch_byte_size = run_result.metrics().query(
-        MetricsFilter().with_name('inference_request_batch_byte_size'))
-    self.assertTrue(inference_request_batch_byte_size['distributions'])
-    self.assertEqual(
-        inference_request_batch_byte_size['distributions'][0].result.sum,
-        sum(element.ByteSize() for element in self._multihead_examples))
-    inference_batch_latency_micro_secs = run_result.metrics().query(
-        MetricsFilter().with_name('inference_batch_latency_micro_secs'))
-    self.assertTrue(inference_batch_latency_micro_secs['distributions'])
-    self.assertGreaterEqual(
-        inference_batch_latency_micro_secs['distributions'][0].result.sum, 0)
-    load_model_latency_milli_secs = run_result.metrics().query(
-        MetricsFilter().with_name('load_model_latency_milli_secs'))
-    self.assertTrue(load_model_latency_milli_secs['distributions'])
-    self.assertGreaterEqual(
-        load_model_latency_milli_secs['distributions'][0].result.sum, 0)
-
-
-class RunRemoteInferenceTest(RunInferenceFixture):
-
-  def setUp(self):
-    super(RunRemoteInferenceTest, self).setUp()
-    self.example_path = self._get_output_data_dir('example')
-    self._prepare_predict_examples(self.example_path)
-    # This is from https://ml.googleapis.com/$discovery/rest?version=v1.
-    self._discovery_testdata_dir = os.path.join(
-        os.path.join(os.path.dirname(__file__), 'testdata'),
-        'ml_discovery.json')
-
-  @staticmethod
-  def _make_response_body(content, successful):
-    if successful:
-      response_dict = {'predictions': content}
-    else:
-      response_dict = {'error': content}
-    return json.dumps(response_dict)
-
-  def _set_up_pipeline(self, inference_spec_type):
-    self.pipeline = beam.Pipeline()
-    self.pcoll = (
-        self.pipeline
-        | 'ReadExamples' >> beam.io.ReadFromTFRecord(self.example_path)
-        | 'ParseExamples' >> beam.Map(tf.train.Example.FromString)
-        | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type))
-
-  def _run_inference_with_beam(self):
-    self.pipeline_result = self.pipeline.run()
-    self.pipeline_result.wait_until_finish()
-
-  def test_model_predict(self):
-    predictions = [{'output_1': [0.901], 'output_2': [0.997]}]
-    builder = http.RequestMockBuilder({
-        'ml.projects.predict':
-            (None, self._make_response_body(predictions, successful=True))
-    })
-    resource = discovery.build(
-        'ml',
-        'v1',
-        http=http.HttpMock(self._discovery_testdata_dir,
-                           {'status': http_client.OK}),
-        requestBuilder=builder)
-    with mock.patch('googleapiclient.discovery.' 'build') as response_mock:
-      response_mock.side_effect = lambda service, version: resource
-      inference_spec_type = model_spec_pb2.InferenceSpecType(
-          ai_platform_prediction_model_spec=model_spec_pb2
-          .AIPlatformPredictionModelSpec(
-              project_id='test-project',
-              model_name='test-model',
-          ))
-
-      prediction_log = prediction_log_pb2.PredictionLog()
-      prediction_log.predict_log.response.outputs['output_1'].CopyFrom(
-          tf.make_tensor_proto(values=[0.901], dtype=tf.double, shape=(1, 1)))
-      prediction_log.predict_log.response.outputs['output_2'].CopyFrom(
-          tf.make_tensor_proto(values=[0.997], dtype=tf.double, shape=(1, 1)))
-
-      self._set_up_pipeline(inference_spec_type)
-      assert_that(self.pcoll, equal_to([prediction_log]))
-      self._run_inference_with_beam()
-
-  def test_exception_raised_when_response_body_contains_error_entry(self):
-    error_msg = 'Base64 decode failed.'
-    builder = http.RequestMockBuilder({
-        'ml.projects.predict':
-            (None, self._make_response_body(error_msg, successful=False))
-    })
-    resource = discovery.build(
-        'ml',
-        'v1',
-        http=http.HttpMock(self._discovery_testdata_dir,
-                           {'status': http_client.OK}),
-        requestBuilder=builder)
-    with mock.patch('googleapiclient.discovery.' 'build') as response_mock:
-      response_mock.side_effect = lambda service, version: resource
-      inference_spec_type = model_spec_pb2.InferenceSpecType(
-          ai_platform_prediction_model_spec=model_spec_pb2
-          .AIPlatformPredictionModelSpec(
-              project_id='test-project',
-              model_name='test-model',
-          ))
-
-      try:
-        self._set_up_pipeline(inference_spec_type)
-        self._run_inference_with_beam()
-      except ValueError as exc:
-        actual_error_msg = str(exc)
-        self.assertTrue(actual_error_msg.startswith(error_msg))
-      else:
-        self.fail('Test was expected to throw ValueError exception')
-
-  def test_exception_raised_when_project_id_is_empty(self):
-    inference_spec_type = model_spec_pb2.InferenceSpecType(
-        ai_platform_prediction_model_spec=model_spec_pb2
-        .AIPlatformPredictionModelSpec(model_name='test-model',))
-
-    with self.assertRaises(ValueError):
-      self._set_up_pipeline(inference_spec_type)
-      self._run_inference_with_beam()
-
-  def test_request_body_with_binary_data(self):
-    example = text_format.Parse(
-        """
-      features {
-        feature { key: "x_bytes" value { bytes_list { value: ["ASa8asdf"] }}}
-        feature { key: "x" value { bytes_list { value: "JLK7ljk3" }}}
-        feature { key: "y" value { int64_list { value: [1, 2] }}}
-      }
-      """, tf.train.Example())
-    result = list(
-        run_inference._RemotePredictDoFn._prepare_instances([example]))
-    self.assertEqual([
-        {
-            'x_bytes': {
-                'b64': 'QVNhOGFzZGY='
-            },
-            'x': 'JLK7ljk3',
-            'y': [1, 2]
-        },
-    ], result)
-
-
-if __name__ == '__main__':
-  tf.test.main()

From 60fb92fe66ebe756d193ec3d0f91db089e0fc0cb Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Thu, 23 Jul 2020 18:16:49 -0400
Subject: [PATCH 6/8] Delete run_inference_arrow.py

---
 tfx_bsl/beam/run_inference_arrow.py | 1178 ---------------------------
 1 file changed, 1178 deletions(-)
 delete mode 100644 tfx_bsl/beam/run_inference_arrow.py

diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py
deleted file mode 100644
index 98e45148..00000000
--- a/tfx_bsl/beam/run_inference_arrow.py
+++ /dev/null
@@ -1,1178 +0,0 @@
-# Copyright 2019 Google LLC. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Run batch inference on saved model."""
-
-from __future__ import absolute_import
-from __future__ import division
-# Standard __future__ imports
-from __future__ import print_function
-
-import abc
-import base64
-import collections
-import os
-import platform
-import sys
-import time
-try:
-  import resource
-except ImportError:
-  resource = None
-
-from absl import logging
-import apache_beam as beam
-from apache_beam.options.pipeline_options import GoogleCloudOptions
-from apache_beam.options.pipeline_options import PipelineOptions
-from apache_beam.utils import retry
-import googleapiclient
-from googleapiclient import discovery
-from googleapiclient import http
-import numpy as np
-import six
-import tensorflow as tf
-from tfx_bsl.beam import shared
-from tfx_bsl.public.proto import model_spec_pb2
-from tfx_bsl.telemetry import util
-from typing import Any, Generator, Iterable, List, Mapping, Sequence, Text, \
-    Tuple, Union
-
-# TODO(b/140306674): stop using the internal TF API.
-from tensorflow.python.saved_model import loader_impl
-from tensorflow_serving.apis import classification_pb2
-from tensorflow_serving.apis import inference_pb2
-from tensorflow_serving.apis import prediction_log_pb2
-from tensorflow_serving.apis import regression_pb2
-
-
-# TODO(b/131873699): Remove once 1.x support is dropped.
-# pylint: disable=g-import-not-at-top
-try:
-  # We need to import this in order to register all quantiles ops, even though
-  # it's not directly used.
-  from tensorflow.contrib.boosted_trees.python.ops import quantile_ops as _  # pylint: disable=unused-import
-except ImportError:
-  pass
-
-_DEFAULT_INPUT_KEY = 'examples'
-_METRICS_DESCRIPTOR_INFERENCE = 'BulkInferrer'
-_METRICS_DESCRIPTOR_IN_PROCESS = 'InProcess'
-_METRICS_DESCRIPTOR_CLOUD_AI_PREDICTION = 'CloudAIPlatformPrediction'
-_MILLISECOND_TO_MICROSECOND = 1000
-_MICROSECOND_TO_NANOSECOND = 1000
-_SECOND_TO_MICROSECOND = 1000000
-_REMOTE_INFERENCE_NUM_RETRIES = 5
-
-# We define the following aliases of Any because the actual types are not
-# public.
-_SignatureDef = Any
-_MetaGraphDef = Any
-_SavedModel = Any
-
-_BulkInferResult = Union[prediction_log_pb2.PredictLog,
-                         Tuple[tf.train.Example, regression_pb2.Regression],
-                         Tuple[tf.train.Example,
-                               inference_pb2.MultiInferenceResponse],
-                         Tuple[tf.train.Example,
-                               classification_pb2.Classifications]]
-
-
-# TODO(b/151468119): Converts this into enum once we stop supporting Python 2.7
-class OperationType(object):
-  CLASSIFICATION = 'CLASSIFICATION'
-  REGRESSION = 'REGRESSION'
-  PREDICTION = 'PREDICTION'
-  MULTIHEAD = 'MULTIHEAD'
-
-
-@beam.ptransform_fn
-@beam.typehints.with_input_types(Union[tf.train.Example,
-                                       tf.train.SequenceExample])
-@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
-def RunInferenceImpl(  # pylint: disable=invalid-name
-    examples: beam.pvalue.PCollection,
-    inference_spec_type: model_spec_pb2.InferenceSpecType
-) -> beam.pvalue.PCollection:
-  """Implementation of RunInference API.
-
-  Args:
-    examples: A PCollection containing examples.
-    inference_spec_type: Model inference endpoint.
-
-  Returns:
-    A PCollection containing prediction logs.
-
-  Raises:
-    ValueError; when operation is not supported.
-  """
-  logging.info('RunInference on model: %s', inference_spec_type)
-
-  batched_examples = examples | 'BatchExamples' >> beam.BatchElements()
-  operation_type = _get_operation_type(inference_spec_type)
-  if operation_type == OperationType.CLASSIFICATION:
-    return batched_examples | 'Classify' >> _Classify(inference_spec_type)
-  elif operation_type == OperationType.REGRESSION:
-    return batched_examples | 'Regress' >> _Regress(inference_spec_type)
-  elif operation_type == OperationType.PREDICTION:
-    return batched_examples | 'Predict' >> _Predict(inference_spec_type)
-  elif operation_type == OperationType.MULTIHEAD:
-    return (batched_examples
-            | 'MultiInference' >> _MultiInference(inference_spec_type))
-  else:
-    raise ValueError('Unsupported operation_type %s' % operation_type)
-
-
-_IOTensorSpec = collections.namedtuple(
-    '_IOTensorSpec',
-    ['input_tensor_alias', 'input_tensor_name', 'output_alias_tensor_names'])
-
-_Signature = collections.namedtuple('_Signature', ['name', 'signature_def'])
-
-
-@beam.ptransform_fn
-@beam.typehints.with_input_types(Union[tf.train.Example,
-                                       tf.train.SequenceExample])
-@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
-def _Classify(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
-              inference_spec_type: model_spec_pb2.InferenceSpecType):
-  """Performs classify PTransform."""
-  if _using_in_process_inference(inference_spec_type):
-    return (pcoll
-            | 'Classify' >> beam.ParDo(
-                _BatchClassifyDoFn(inference_spec_type, shared.Shared()))
-            | 'BuildPredictionLogForClassifications' >> beam.ParDo(
-                _BuildPredictionLogForClassificationsDoFn()))
-  else:
-    raise NotImplementedError
-
-
-@beam.ptransform_fn
-@beam.typehints.with_input_types(Union[tf.train.Example,
-                                       tf.train.SequenceExample])
-@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
-def _Regress(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
-             inference_spec_type: model_spec_pb2.InferenceSpecType):
-  """Performs regress PTransform."""
-  if _using_in_process_inference(inference_spec_type):
-    return (pcoll
-            | 'Regress' >> beam.ParDo(
-                _BatchRegressDoFn(inference_spec_type, shared.Shared()))
-            | 'BuildPredictionLogForRegressions' >> beam.ParDo(
-                _BuildPredictionLogForRegressionsDoFn()))
-  else:
-    raise NotImplementedError
-
-
-@beam.ptransform_fn
-@beam.typehints.with_input_types(Union[tf.train.Example,
-                                       tf.train.SequenceExample])
-@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
-def _Predict(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
-             inference_spec_type: model_spec_pb2.InferenceSpecType):
-  """Performs predict PTransform."""
-  if _using_in_process_inference(inference_spec_type):
-    predictions = (
-        pcoll
-        | 'Predict' >> beam.ParDo(
-            _BatchPredictDoFn(inference_spec_type, shared.Shared())))
-  else:
-    predictions = (
-        pcoll
-        | 'RemotePredict' >> beam.ParDo(
-            _RemotePredictDoFn(inference_spec_type, pcoll.pipeline.options)))
-  return (predictions
-          | 'BuildPredictionLogForPredictions' >> beam.ParDo(
-              _BuildPredictionLogForPredictionsDoFn()))
-
-
-@beam.ptransform_fn
-@beam.typehints.with_input_types(Union[tf.train.Example,
-                                       tf.train.SequenceExample])
-@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
-def _MultiInference(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
-                    inference_spec_type: model_spec_pb2.InferenceSpecType):
-  """Performs multi inference PTransform."""
-  if _using_in_process_inference(inference_spec_type):
-    return (
-        pcoll
-        | 'MultiInference' >> beam.ParDo(
-            _BatchMultiInferenceDoFn(inference_spec_type, shared.Shared()))
-        | 'BuildMultiInferenceLog' >> beam.ParDo(_BuildMultiInferenceLogDoFn()))
-  else:
-    raise NotImplementedError
-
-
-@six.add_metaclass(abc.ABCMeta)
-class _BaseDoFn(beam.DoFn):
-  """Base DoFn that performs bulk inference."""
-
-  class _MetricsCollector(object):
-    """A collector for beam metrics."""
-
-    def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType):
-      operation_type = _get_operation_type(inference_spec_type)
-      proximity_descriptor = (
-          _METRICS_DESCRIPTOR_IN_PROCESS
-          if _using_in_process_inference(inference_spec_type) else
-          _METRICS_DESCRIPTOR_CLOUD_AI_PREDICTION)
-      namespace = util.MakeTfxNamespace(
-          [_METRICS_DESCRIPTOR_INFERENCE, operation_type, proximity_descriptor])
-
-      # Metrics
-      self._inference_counter = beam.metrics.Metrics.counter(
-          namespace, 'num_inferences')
-      self._num_instances = beam.metrics.Metrics.counter(
-          namespace, 'num_instances')
-      self._inference_request_batch_size = beam.metrics.Metrics.distribution(
-          namespace, 'inference_request_batch_size')
-      self._inference_request_batch_byte_size = (
-          beam.metrics.Metrics.distribution(
-              namespace, 'inference_request_batch_byte_size'))
-      # Batch inference latency in microseconds.
-      self._inference_batch_latency_micro_secs = (
-          beam.metrics.Metrics.distribution(
-              namespace, 'inference_batch_latency_micro_secs'))
-      self._model_byte_size = beam.metrics.Metrics.distribution(
-          namespace, 'model_byte_size')
-      # Model load latency in milliseconds.
-      self._load_model_latency_milli_secs = beam.metrics.Metrics.distribution(
-          namespace, 'load_model_latency_milli_secs')
-
-      # Metrics cache
-      self.load_model_latency_milli_secs_cache = None
-      self.model_byte_size_cache = None
-
-    def update_metrics_with_cache(self):
-      if self.load_model_latency_milli_secs_cache is not None:
-        self._load_model_latency_milli_secs.update(
-            self.load_model_latency_milli_secs_cache)
-        self.load_model_latency_milli_secs_cache = None
-      if self.model_byte_size_cache is not None:
-        self._model_byte_size.update(self.model_byte_size_cache)
-        self.model_byte_size_cache = None
-
-    def update(self, elements: List[Union[tf.train.Example,
-                                          tf.train.SequenceExample]],
-               latency_micro_secs: int) -> None:
-      self._inference_batch_latency_micro_secs.update(latency_micro_secs)
-      self._num_instances.inc(len(elements))
-      self._inference_counter.inc(len(elements))
-      self._inference_request_batch_size.update(len(elements))
-      self._inference_request_batch_byte_size.update(
-          sum(element.ByteSize() for element in elements))
-
-  def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType):
-    super(_BaseDoFn, self).__init__()
-    self._clock = None
-    self._metrics_collector = self._MetricsCollector(inference_spec_type)
-
-  def setup(self):
-    self._clock = _ClockFactory.make_clock()
-
-  def process(
-      self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]]
-  ) -> Iterable[Any]:
-    batch_start_time = self._clock.get_current_time_in_microseconds()
-    outputs = self.run_inference(elements)
-    result = self._post_process(elements, outputs)
-    self._metrics_collector.update(
-        elements,
-        self._clock.get_current_time_in_microseconds() - batch_start_time)
-    return result
-
-  def finish_bundle(self):
-    self._metrics_collector.update_metrics_with_cache()
-
-  @abc.abstractmethod
-  def run_inference(
-      self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]]
-  ) -> Union[Mapping[Text, np.ndarray], Sequence[Mapping[Text, Any]]]:
-    raise NotImplementedError
-
-  @abc.abstractmethod
-  def _post_process(self, elements: List[Union[tf.train.Example,
-                                               tf.train.SequenceExample]],
-                    outputs: Any) -> Iterable[Any]:
-    raise NotImplementedError
-
-
-def _retry_on_unavailable_and_resource_error_filter(exception: Exception):
-  """Retries for HttpError.
-
-  Retries if error is unavailable (503) or resource exhausted (429).
-  Resource exhausted may happen when qps or bandwidth exceeds quota.
-
-  Args:
-    exception: Exception from inference http request execution.
-  Returns:
-    A boolean of whether retry.
-  """
-
-  return (isinstance(exception, googleapiclient.errors.HttpError) and
-          exception.resp.status in (503, 429))
-
-
-@beam.typehints.with_input_types(List[Union[tf.train.Example,
-                                            tf.train.SequenceExample]])
-# Using output typehints triggers NotImplementedError('BEAM-2717)' on
-# streaming mode on Dataflow runner.
-# TODO(b/151468119): Consider to re-batch with online serving request size
-# limit, and re-batch with RPC failures(InvalidArgument) regarding request size.
-# @beam.typehints.with_output_types(prediction_log_pb2.PredictLog)
-class _RemotePredictDoFn(_BaseDoFn):
-  """A DoFn that performs predictions from a cloud-hosted TensorFlow model.
-
-  Supports both batch and streaming processing modes.
-  NOTE: Does not work on DirectRunner for streaming jobs [BEAM-7885].
-
-  In order to request predictions, you must deploy your trained model to AI
-  Platform Prediction in the TensorFlow SavedModel format. See
-  [Exporting a SavedModel for prediction]
-  (https://cloud.google.com/ai-platform/prediction/docs/exporting-savedmodel-for-prediction)
-  for more details.
-
-  To send binary data, you have to make sure that the name of an input ends in
-  `_bytes`.
-
-  NOTE: The returned `PredictLog` instances do not have `PredictRequest` part
-  filled. The reason is that it is difficult to determine the input tensor name
-  without having access to cloud-hosted model's signatures.
-  """
-
-  def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType,
-               pipeline_options: PipelineOptions):
-    super(_RemotePredictDoFn, self).__init__(inference_spec_type)
-    self._api_client = None
-
-    project_id = (
-        inference_spec_type.ai_platform_prediction_model_spec.project_id or
-        pipeline_options.view_as(GoogleCloudOptions).project)
-    if not project_id:
-      raise ValueError('Either a non-empty project id or project flag in '
-                       ' beam pipeline options needs be provided.')
-
-    model_name = (
-        inference_spec_type.ai_platform_prediction_model_spec.model_name)
-    if not model_name:
-      raise ValueError('A non-empty model name must be provided.')
-
-    version_name = (
-        inference_spec_type.ai_platform_prediction_model_spec.version_name)
-    name_spec = 'projects/{}/models/{}'
-    # If version is not specified, the default version for a model is used.
-    if version_name:
-      name_spec += '/versions/{}'
-    self._full_model_name = name_spec.format(project_id, model_name,
-                                             version_name)
-
-  def setup(self):
-    super(_RemotePredictDoFn, self).setup()
-    # TODO(b/151468119): Add tfx_bsl_version and tfx_bsl_py_version to
-    # user agent once custom header is supported in googleapiclient.
-    self._api_client = discovery.build('ml', 'v1')
-
-  # Retry _REMOTE_INFERENCE_NUM_RETRIES times with exponential backoff.
-  @retry.with_exponential_backoff(
-      initial_delay_secs=1.0,
-      num_retries=_REMOTE_INFERENCE_NUM_RETRIES,
-      retry_filter=_retry_on_unavailable_and_resource_error_filter)
-  def _execute_request(
-      self,
-      request: http.HttpRequest) -> Mapping[Text, Sequence[Mapping[Text, Any]]]:
-    result = request.execute()
-    if 'error' in result:
-      raise ValueError(result['error'])
-    return result
-
-  def _make_request(self, body: Mapping[Text, List[Any]]) -> http.HttpRequest:
-    return self._api_client.projects().predict(
-        name=self._full_model_name, body=body)
-
-  @classmethod
-  def _prepare_instances(
-      cls, elements: List[tf.train.Example]
-  ) -> Generator[Mapping[Text, Any], None, None]:
-    for example in elements:
-      # TODO(b/151468119): support tf.train.SequenceExample
-      if not isinstance(example, tf.train.Example):
-        raise ValueError('Remote prediction only supports tf.train.Example')
-
-      instance = {}
-      for input_name, feature in example.features.feature.items():
-        attr_name = feature.WhichOneof('kind')
-        if attr_name is None:
-          continue
-        attr = getattr(feature, attr_name)
-        values = cls._parse_feature_content(attr.value, attr_name,
-                                            cls._sending_as_binary(input_name))
-        # Flatten a sequence if its length is 1
-        values = (values[0] if len(values) == 1 else values)
-        instance[input_name] = values
-      yield instance
-
-  @staticmethod
-  def _sending_as_binary(input_name: Text) -> bool:
-    """Whether data should be sent as binary."""
-    return input_name.endswith('_bytes')
-
-  @staticmethod
-  def _parse_feature_content(values: Sequence[Any], attr_name: Text,
-                             as_binary: bool) -> Sequence[Any]:
-    """Parse the content of tf.train.Feature object.
-
-    If bytes_list, parse a list of bytes-like objects to a list of strings so
-    that it would be JSON serializable.
-
-    If float_list or int64_list, do nothing.
-
-    If data should be sent as binary, mark it as binary by replacing it with
-    a single attribute named 'b64'.
-    """
-    if as_binary:
-      return [{'b64': base64.b64encode(x).decode()} for x in values]
-    elif attr_name == 'bytes_list':
-      return [x.decode() for x in values]
-    else:
-      return values
-
-  def run_inference(
-      self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]]
-  ) -> Sequence[Mapping[Text, Any]]:
-    body = {'instances': list(self._prepare_instances(elements))}
-    request = self._make_request(body)
-    response = self._execute_request(request)
-    return response['predictions']
-
-  def _post_process(
-      self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]],
-      outputs: Sequence[Mapping[Text, Any]]
-  ) -> Iterable[prediction_log_pb2.PredictLog]:
-    result = []
-    for output in outputs:
-      predict_log = prediction_log_pb2.PredictLog()
-      for output_alias, values in output.items():
-        values = np.array(values)
-        tensor_proto = tf.make_tensor_proto(
-            values=values,
-            dtype=tf.as_dtype(values.dtype).as_datatype_enum,
-            shape=np.expand_dims(values, axis=0).shape)
-        predict_log.response.outputs[output_alias].CopyFrom(tensor_proto)
-      result.append(predict_log)
-    return result
-
-
-# TODO(b/131873699): Add typehints once
-# [BEAM-8381](https://issues.apache.org/jira/browse/BEAM-8381)
-# is fixed.
-# TODO(b/143484017): Add batch_size back off in the case there are functional
-# reasons large batch sizes cannot be handled.
-class _BaseBatchSavedModelDoFn(_BaseDoFn):
-  """A DoFn that runs in-process batch inference with a model.
-
-    Models need to have the required serving signature as mentioned in
-    [Tensorflow Serving](https://www.tensorflow.org/tfx/serving/signature_defs)
-
-    This function will check model signatures first. Then it will load and run
-    model inference in batch.
-  """
-
-  def __init__(
-      self,
-      inference_spec_type: model_spec_pb2.InferenceSpecType,
-      shared_model_handle: shared.Shared,
-  ):
-    super(_BaseBatchSavedModelDoFn, self).__init__(inference_spec_type)
-    self._inference_spec_type = inference_spec_type
-    self._shared_model_handle = shared_model_handle
-    self._model_path = inference_spec_type.saved_model_spec.model_path
-    self._tags = None
-    self._signatures = _get_signatures(
-        inference_spec_type.saved_model_spec.model_path,
-        inference_spec_type.saved_model_spec.signature_name,
-        _get_tags(inference_spec_type))
-    self._session = None
-    self._io_tensor_spec = None
-
-  def setup(self):
-    """Load the model.
-
-    Note that worker may crash if exception is thrown in setup due
-    to b/139207285.
-    """
-
-    super(_BaseBatchSavedModelDoFn, self).setup()
-    self._tags = _get_tags(self._inference_spec_type)
-    self._io_tensor_spec = self._pre_process()
-
-    if self._has_tpu_tag():
-      # TODO(b/131873699): Support TPU inference.
-      raise ValueError('TPU inference is not supported yet.')
-    self._session = self._load_model()
-
-  def _load_model(self):
-    """Load a saved model into memory.
-
-    Returns:
-      Session instance.
-    """
-
-    def load():
-      """Function for constructing shared LoadedModel."""
-      # TODO(b/143484017): Do warmup and other heavy model construction here.
-      result = tf.compat.v1.Session(graph=tf.compat.v1.Graph())
-      memory_before = _get_current_process_memory_in_bytes()
-      start_time = self._clock.get_current_time_in_microseconds()
-      tf.compat.v1.saved_model.loader.load(result, self._tags, self._model_path)
-      end_time = self._clock.get_current_time_in_microseconds()
-      memory_after = _get_current_process_memory_in_bytes()
-      self._metrics_collector.load_model_latency_milli_secs_cache = (
-          (end_time - start_time) / _MILLISECOND_TO_MICROSECOND)
-      self._metrics_collector.model_byte_size_cache = (
-          memory_after - memory_before)
-      return result
-
-    if not self._model_path:
-      raise ValueError('Model path is not valid.')
-    return self._shared_model_handle.acquire(load)
-
-  def _pre_process(self) -> _IOTensorSpec:
-    # Pre process functions will validate for each signature.
-    io_tensor_specs = []
-    for signature in self._signatures:
-      if len(signature.signature_def.inputs) != 1:
-        raise ValueError('Signature should have 1 and only 1 inputs')
-      if (list(signature.signature_def.inputs.values())[0].dtype !=
-          tf.string.as_datatype_enum):
-        raise ValueError(
-            'Input dtype is expected to be %s, got %s' %
-            tf.string.as_datatype_enum,
-            list(signature.signature_def.inputs.values())[0].dtype)
-      io_tensor_specs.append(_signature_pre_process(signature.signature_def))
-    input_tensor_name = ''
-    input_tensor_alias = ''
-    output_alias_tensor_names = {}
-    for io_tensor_spec in io_tensor_specs:
-      if not input_tensor_name:
-        input_tensor_name = io_tensor_spec.input_tensor_name
-        input_tensor_alias = io_tensor_spec.input_tensor_alias
-      elif input_tensor_name != io_tensor_spec.input_tensor_name:
-        raise ValueError('Input tensor must be the same for all Signatures.')
-      for alias, tensor_name in io_tensor_spec.output_alias_tensor_names.items(
-      ):
-        output_alias_tensor_names[alias] = tensor_name
-    if (not output_alias_tensor_names or not input_tensor_name or
-        not input_tensor_alias):
-      raise ValueError('No valid fetch tensors or feed tensors.')
-    return _IOTensorSpec(input_tensor_alias, input_tensor_name,
-                         output_alias_tensor_names)
-
-  def _has_tpu_tag(self) -> bool:
-    return (len(self._tags) == 2 and tf.saved_model.SERVING in self._tags and
-            tf.saved_model.TPU in self._tags)
-
-  def run_inference(
-      self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]]
-  ) -> Mapping[Text, np.ndarray]:
-    self._check_elements(elements)
-    outputs = self._run_tf_operations(elements)
-    return outputs
-
-  def _run_tf_operations(
-      self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]]
-  ) -> Mapping[Text, np.ndarray]:
-    input_values = []
-    for element in elements:
-      input_values.append(element.SerializeToString())
-    result = self._session.run(
-        self._io_tensor_spec.output_alias_tensor_names,
-        feed_dict={self._io_tensor_spec.input_tensor_name: input_values})
-    if len(result) != len(self._io_tensor_spec.output_alias_tensor_names):
-      raise RuntimeError('Output length does not match fetches')
-    return result
-
-  def _check_elements(
-      self, elements: List[Union[tf.train.Example,
-                                 tf.train.SequenceExample]]) -> None:
-    """Unimplemented."""
-
-    raise NotImplementedError
-
-
-@beam.typehints.with_input_types(List[Union[tf.train.Example,
-                                            tf.train.SequenceExample]])
-@beam.typehints.with_output_types(Tuple[tf.train.Example,
-                                        classification_pb2.Classifications])
-class _BatchClassifyDoFn(_BaseBatchSavedModelDoFn):
-  """A DoFn that run inference on classification model."""
-
-  def setup(self):
-    signature_def = self._signatures[0].signature_def
-    if signature_def.method_name != tf.saved_model.CLASSIFY_METHOD_NAME:
-      raise ValueError(
-          'BulkInferrerClassifyDoFn requires signature method '
-          'name %s, got: %s' % tf.saved_model.CLASSIFY_METHOD_NAME,
-          signature_def.method_name)
-    super(_BatchClassifyDoFn, self).setup()
-
-  def _check_elements(
-      self, elements: List[Union[tf.train.Example,
-                                 tf.train.SequenceExample]]) -> None:
-    if not all(isinstance(element, tf.train.Example) for element in elements):
-      raise ValueError('Classify only supports tf.train.Example')
-
-  def _post_process(
-      self, elements: Sequence[tf.train.Example], outputs: Mapping[Text,
-                                                                   np.ndarray]
-  ) -> Iterable[Tuple[tf.train.Example, classification_pb2.Classifications]]:
-    classifications = _post_process_classify(
-        self._io_tensor_spec.output_alias_tensor_names, elements, outputs)
-    return zip(elements, classifications)
-
-
-@beam.typehints.with_input_types(List[Union[tf.train.Example,
-                                            tf.train.SequenceExample]])
-@beam.typehints.with_output_types(Tuple[tf.train.Example,
-                                        regression_pb2.Regression])
-class _BatchRegressDoFn(_BaseBatchSavedModelDoFn):
-  """A DoFn that run inference on regression model."""
-
-  def setup(self):
-    super(_BatchRegressDoFn, self).setup()
-
-  def _check_elements(
-      self, elements: List[Union[tf.train.Example,
-                                 tf.train.SequenceExample]]) -> None:
-    if not all(isinstance(element, tf.train.Example) for element in elements):
-      raise ValueError('Regress only supports tf.train.Example')
-
-  def _post_process(
-      self, elements: Sequence[tf.train.Example], outputs: Mapping[Text,
-                                                                   np.ndarray]
-  ) -> Iterable[Tuple[tf.train.Example, regression_pb2.Regression]]:
-    regressions = _post_process_regress(elements, outputs)
-    return zip(elements, regressions)
-
-
-@beam.typehints.with_input_types(List[Union[tf.train.Example,
-                                            tf.train.SequenceExample]])
-@beam.typehints.with_output_types(prediction_log_pb2.PredictLog)
-class _BatchPredictDoFn(_BaseBatchSavedModelDoFn):
-  """A DoFn that runs inference on predict model."""
-
-  def setup(self):
-    signature_def = self._signatures[0].signature_def
-    if signature_def.method_name != tf.saved_model.PREDICT_METHOD_NAME:
-      raise ValueError(
-          'BulkInferrerPredictDoFn requires signature method '
-          'name %s, got: %s' % tf.saved_model.PREDICT_METHOD_NAME,
-          signature_def.method_name)
-    super(_BatchPredictDoFn, self).setup()
-
-  def _check_elements(
-      self, elements: List[Union[tf.train.Example,
-                                 tf.train.SequenceExample]]) -> None:
-    pass
-
-  def _post_process(
-      self, elements: Union[Sequence[tf.train.Example],
-                            Sequence[tf.train.SequenceExample]],
-      outputs: Mapping[Text, np.ndarray]
-  ) -> Iterable[prediction_log_pb2.PredictLog]:
-    input_tensor_alias = self._io_tensor_spec.input_tensor_alias
-    signature_name = self._signatures[0].name
-    batch_size = len(elements)
-    for output_alias, output in outputs.items():
-      if len(output.shape) < 1 or output.shape[0] != batch_size:
-        raise ValueError(
-            'Expected output tensor %s to have at least one '
-            'dimension, with the first having a size equal to the input batch '
-            'size %s. Instead found %s' %
-            (output_alias, batch_size, output.shape))
-    predict_log_tmpl = prediction_log_pb2.PredictLog()
-    predict_log_tmpl.request.model_spec.signature_name = signature_name
-    predict_log_tmpl.response.model_spec.signature_name = signature_name
-    input_tensor_proto = predict_log_tmpl.request.inputs[input_tensor_alias]
-    input_tensor_proto.dtype = tf.string.as_datatype_enum
-    input_tensor_proto.tensor_shape.dim.add().size = 1
-
-    result = []
-    for i in range(batch_size):
-      predict_log = prediction_log_pb2.PredictLog()
-      predict_log.CopyFrom(predict_log_tmpl)
-      predict_log.request.inputs[input_tensor_alias].string_val.append(
-          elements[i].SerializeToString())
-      for output_alias, output in outputs.items():
-        # Mimic tensor::Split
-        tensor_proto = tf.make_tensor_proto(
-            values=output[i],
-            dtype=tf.as_dtype(output[i].dtype).as_datatype_enum,
-            shape=np.expand_dims(output[i], axis=0).shape)
-        predict_log.response.outputs[output_alias].CopyFrom(tensor_proto)
-      result.append(predict_log)
-    return result
-
-
-@beam.typehints.with_input_types(List[Union[tf.train.Example,
-                                            tf.train.SequenceExample]])
-@beam.typehints.with_output_types(Tuple[tf.train.Example,
-                                        inference_pb2.MultiInferenceResponse])
-class _BatchMultiInferenceDoFn(_BaseBatchSavedModelDoFn):
-  """A DoFn that runs inference on multi-head model."""
-
-  def _check_elements(
-      self, elements: List[Union[tf.train.Example,
-                                 tf.train.SequenceExample]]) -> None:
-    if not all(isinstance(element, tf.train.Example) for element in elements):
-      raise ValueError('Multi inference only supports tf.train.Example')
-
-  def _post_process(
-      self, elements: Sequence[tf.train.Example], outputs: Mapping[Text,
-                                                                   np.ndarray]
-  ) -> Iterable[Tuple[tf.train.Example, inference_pb2.MultiInferenceResponse]]:
-    classifications = None
-    regressions = None
-    for signature in self._signatures:
-      signature_def = signature.signature_def
-      if signature_def.method_name == tf.saved_model.CLASSIFY_METHOD_NAME:
-        classifications = _post_process_classify(
-            self._io_tensor_spec.output_alias_tensor_names, elements, outputs)
-      elif signature_def.method_name == tf.saved_model.REGRESS_METHOD_NAME:
-        regressions = _post_process_regress(elements, outputs)
-      else:
-        raise ValueError('Signature method %s is not supported for '
-                         'multi inference' % signature_def.method_name)
-    result = []
-    for i in range(len(elements)):
-      response = inference_pb2.MultiInferenceResponse()
-      for signature in self._signatures:
-        signature_def = signature.signature_def
-        inference_result = response.results.add()
-        if (signature_def.method_name == tf.saved_model.CLASSIFY_METHOD_NAME and
-            classifications):
-          inference_result.classification_result.classifications.add().CopyFrom(
-              classifications[i])
-        elif (
-            signature_def.method_name == tf.saved_model.REGRESS_METHOD_NAME and
-            regressions):
-          inference_result.regression_result.regressions.add().CopyFrom(
-              regressions[i])
-        else:
-          raise ValueError('Signature method %s is not supported for '
-                           'multi inference' % signature_def.method_name)
-        inference_result.model_spec.signature_name = signature.name
-      if len(response.results) != len(self._signatures):
-        raise RuntimeError('Multi inference response result length does not '
-                           'match the number of signatures')
-      result.append((elements[i], response))
-    return result
-
-
-@beam.typehints.with_input_types(Tuple[tf.train.Example,
-                                       classification_pb2.Classifications])
-@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
-class _BuildPredictionLogForClassificationsDoFn(beam.DoFn):
-  """A DoFn that builds prediction log from classifications."""
-
-  def process(
-      self, element: Tuple[tf.train.Example, classification_pb2.Classifications]
-  ) -> Iterable[prediction_log_pb2.PredictionLog]:
-    (train_example, classifications) = element
-    result = prediction_log_pb2.PredictionLog()
-    result.classify_log.request.input.example_list.examples.add().CopyFrom(
-        train_example)
-    result.classify_log.response.result.classifications.add().CopyFrom(
-        classifications)
-    yield result
-
-
-@beam.typehints.with_input_types(Tuple[tf.train.Example,
-                                       regression_pb2.Regression])
-@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
-class _BuildPredictionLogForRegressionsDoFn(beam.DoFn):
-  """A DoFn that builds prediction log from regressions."""
-
-  def process(
-      self, element: Tuple[tf.train.Example, regression_pb2.Regression]
-  ) -> Iterable[prediction_log_pb2.PredictionLog]:
-    (train_example, regression) = element
-    result = prediction_log_pb2.PredictionLog()
-    result.regress_log.request.input.example_list.examples.add().CopyFrom(
-        train_example)
-    result.regress_log.response.result.regressions.add().CopyFrom(regression)
-    yield result
-
-
-@beam.typehints.with_input_types(prediction_log_pb2.PredictLog)
-@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
-class _BuildPredictionLogForPredictionsDoFn(beam.DoFn):
-  """A DoFn that builds prediction log from predictions."""
-
-  def process(
-      self, element: prediction_log_pb2.PredictLog
-  ) -> Iterable[prediction_log_pb2.PredictionLog]:
-    result = prediction_log_pb2.PredictionLog()
-    result.predict_log.CopyFrom(element)
-    yield result
-
-
-@beam.typehints.with_input_types(Tuple[tf.train.Example,
-                                       inference_pb2.MultiInferenceResponse])
-@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
-class _BuildMultiInferenceLogDoFn(beam.DoFn):
-  """A DoFn that builds prediction log from multi-head inference result."""
-
-  def process(
-      self, element: Tuple[tf.train.Example,
-                           inference_pb2.MultiInferenceResponse]
-  ) -> Iterable[prediction_log_pb2.PredictionLog]:
-    (train_example, multi_inference_response) = element
-    result = prediction_log_pb2.PredictionLog()
-    (result.multi_inference_log.request.input.example_list.examples.add()
-     .CopyFrom(train_example))
-    result.multi_inference_log.response.CopyFrom(multi_inference_response)
-    yield result
-
-
-def _post_process_classify(
-    output_alias_tensor_names: Mapping[Text, Text],
-    elements: Sequence[tf.train.Example], outputs: Mapping[Text, np.ndarray]
-) -> Sequence[classification_pb2.Classifications]:
-  """Returns classifications from inference output."""
-
-  # This is to avoid error "The truth value of an array with
-  # more than one element is ambiguous."
-  has_classes = False
-  has_scores = False
-  if tf.saved_model.CLASSIFY_OUTPUT_CLASSES in output_alias_tensor_names:
-    classes = outputs[tf.saved_model.CLASSIFY_OUTPUT_CLASSES]
-    has_classes = True
-  if tf.saved_model.CLASSIFY_OUTPUT_SCORES in output_alias_tensor_names:
-    scores = outputs[tf.saved_model.CLASSIFY_OUTPUT_SCORES]
-    has_scores = True
-  if has_classes:
-    if classes.ndim != 2:
-      raise ValueError('Expected Tensor shape: [batch_size num_classes] but '
-                       'got %s' % classes.shape)
-    if classes.dtype != tf.string.as_numpy_dtype:
-      raise ValueError('Expected classes Tensor of %s. Got: %s' %
-                       (tf.string.as_numpy_dtype, classes.dtype))
-    if classes.shape[0] != len(elements):
-      raise ValueError('Expected classes output batch size of %s, got %s' %
-                       (len(elements), classes.shape[0]))
-  if has_scores:
-    if scores.ndim != 2:
-      raise ValueError("""Expected Tensor shape: [batch_size num_classes] but
-        got %s""" % scores.shape)
-    if scores.dtype != tf.float32.as_numpy_dtype:
-      raise ValueError('Expected classes Tensor of %s. Got: %s' %
-                       (tf.float32.as_numpy_dtype, scores.dtype))
-    if scores.shape[0] != len(elements):
-      raise ValueError('Expected classes output batch size of %s, got %s' %
-                       (len(elements), scores.shape[0]))
-  num_classes = 0
-  if has_classes and has_scores:
-    if scores.shape[1] != classes.shape[1]:
-      raise ValueError('Tensors class and score should match in shape[1]. '
-                       'Got %s vs %s' % (classes.shape[1], scores.shape[1]))
-    num_classes = classes.shape[1]
-  elif has_classes:
-    num_classes = classes.shape[1]
-  elif has_scores:
-    num_classes = scores.shape[1]
-
-  result = []
-  for i in range(len(elements)):
-    a_classification = classification_pb2.Classifications()
-    for c in range(num_classes):
-      a_class = a_classification.classes.add()
-      if has_classes:
-        a_class.label = classes[i][c]
-      if has_scores:
-        a_class.score = scores[i][c]
-    result.append(a_classification)
-  if len(result) != len(elements):
-    raise RuntimeError('Classifications length does not match elements')
-  return result
-
-
-def _post_process_regress(
-    elements: Sequence[tf.train.Example],
-    outputs: Mapping[Text, np.ndarray]) -> Sequence[regression_pb2.Regression]:
-  """Returns regressions from inference output."""
-
-  if tf.saved_model.REGRESS_OUTPUTS not in outputs:
-    raise ValueError('No regression outputs found in outputs: %s' %
-                     outputs.keys())
-  output = outputs[tf.saved_model.REGRESS_OUTPUTS]
-  batch_size = len(elements)
-  if not (output.ndim == 1 or (output.ndim == 2 and output.shape[1] == 1)):
-    raise ValueError("""Expected output Tensor shape to be either [batch_size]
-                     or [batch_size, 1] but got %s""" % output.shape)
-  if batch_size != output.shape[0]:
-    raise ValueError(
-        'Input batch size did not match output batch size: %s vs %s' %
-        (batch_size, output.shape[0]))
-  if output.dtype != tf.float32.as_numpy_dtype:
-    raise ValueError('Expected output Tensor of %s. Got: %s' %
-                     (tf.float32.as_numpy_dtype, output.dtype))
-  if output.size != batch_size:
-    raise ValueError('Expected output batch size to be %s. Got: %s' %
-                     (batch_size, output.size))
-  flatten_output = output.flatten()
-  result = []
-  for regression_result in flatten_output:
-    regression = regression_pb2.Regression()
-    regression.value = regression_result
-    result.append(regression)
-
-  # Add additional check to save downstream consumer checks.
-  if len(result) != len(elements):
-    raise RuntimeError('Regression length does not match elements')
-  return result
-
-
-def _signature_pre_process(signature: _SignatureDef) -> _IOTensorSpec:
-  """Returns IOTensorSpec from signature."""
-
-  if len(signature.inputs) != 1:
-    raise ValueError('Signature should have 1 and only 1 inputs')
-  input_tensor_alias = list(signature.inputs.keys())[0]
-  if list(signature.inputs.values())[0].dtype != tf.string.as_datatype_enum:
-    raise ValueError(
-        'Input dtype is expected to be %s, got %s' % tf.string.as_datatype_enum,
-        list(signature.inputs.values())[0].dtype)
-  if signature.method_name == tf.saved_model.CLASSIFY_METHOD_NAME:
-    input_tensor_name, output_alias_tensor_names = (
-        _signature_pre_process_classify(signature))
-  elif signature.method_name == tf.saved_model.PREDICT_METHOD_NAME:
-    input_tensor_name, output_alias_tensor_names = (
-        _signature_pre_process_predict(signature))
-  elif signature.method_name == tf.saved_model.REGRESS_METHOD_NAME:
-    input_tensor_name, output_alias_tensor_names = (
-        _signature_pre_process_regress(signature))
-  else:
-    raise ValueError('Signature method %s is not supported' %
-                     signature.method_name)
-  return _IOTensorSpec(input_tensor_alias, input_tensor_name,
-                       output_alias_tensor_names)
-
-
-def _signature_pre_process_classify(
-    signature: _SignatureDef) -> Tuple[Text, Mapping[Text, Text]]:
-  """Returns input tensor name and output alias tensor names from signature.
-
-  Args:
-    signature: SignatureDef
-
-  Returns:
-    A tuple of input tensor name and output alias tensor names.
-  """
-
-  if len(signature.outputs) != 1 and len(signature.outputs) != 2:
-    raise ValueError('Classify signature should have 1 or 2 outputs')
-  if tf.saved_model.CLASSIFY_INPUTS not in signature.inputs:
-    raise ValueError('No classification inputs found in SignatureDef: %s' %
-                     signature.inputs)
-  input_tensor_name = signature.inputs[tf.saved_model.CLASSIFY_INPUTS].name
-  output_alias_tensor_names = {}
-  if (tf.saved_model.CLASSIFY_OUTPUT_CLASSES not in signature.outputs and
-      tf.saved_model.CLASSIFY_OUTPUT_SCORES not in signature.outputs):
-    raise ValueError(
-        """Expected classification signature outputs to contain at
-        least one of %s or %s. Signature was: %s""" %
-        tf.saved_model.CLASSIFY_OUTPUT_CLASSES,
-        tf.saved_model.CLASSIFY_OUTPUT_SCORES, signature)
-  if tf.saved_model.CLASSIFY_OUTPUT_CLASSES in signature.outputs:
-    output_alias_tensor_names[tf.saved_model.CLASSIFY_OUTPUT_CLASSES] = (
-        signature.outputs[tf.saved_model.CLASSIFY_OUTPUT_CLASSES].name)
-  if tf.saved_model.CLASSIFY_OUTPUT_SCORES in signature.outputs:
-    output_alias_tensor_names[tf.saved_model.CLASSIFY_OUTPUT_SCORES] = (
-        signature.outputs[tf.saved_model.CLASSIFY_OUTPUT_SCORES].name)
-  return input_tensor_name, output_alias_tensor_names
-
-
-def _signature_pre_process_predict(
-    signature: _SignatureDef) -> Tuple[Text, Mapping[Text, Text]]:
-  """Returns input tensor name and output alias tensor names from signature.
-
-  Args:
-    signature: SignatureDef
-
-  Returns:
-    A tuple of input tensor name and output alias tensor names.
-  """
-
-  input_tensor_name = list(signature.inputs.values())[0].name
-  output_alias_tensor_names = dict([
-      (key, output.name) for key, output in signature.outputs.items()
-  ])
-  return input_tensor_name, output_alias_tensor_names
-
-
-def _signature_pre_process_regress(
-    signature: _SignatureDef) -> Tuple[Text, Mapping[Text, Text]]:
-  """Returns input tensor name and output alias tensor names from signature.
-
-  Args:
-    signature: SignatureDef
-
-  Returns:
-    A tuple of input tensor name and output alias tensor names.
-  """
-
-  if len(signature.outputs) != 1:
-    raise ValueError('Regress signature should have 1 output')
-  if tf.saved_model.REGRESS_INPUTS not in signature.inputs:
-    raise ValueError('No regression inputs found in SignatureDef: %s' %
-                     signature.inputs)
-  input_tensor_name = signature.inputs[tf.saved_model.REGRESS_INPUTS].name
-  if tf.saved_model.REGRESS_OUTPUTS not in signature.outputs:
-    raise ValueError('No regression outputs found in SignatureDef: %s' %
-                     signature.outputs)
-  output_alias_tensor_names = {
-      tf.saved_model.REGRESS_OUTPUTS:
-          signature.outputs[tf.saved_model.REGRESS_OUTPUTS].name
-  }
-  return input_tensor_name, output_alias_tensor_names
-
-
-def _using_in_process_inference(
-    inference_spec_type: model_spec_pb2.InferenceSpecType) -> bool:
-  return inference_spec_type.WhichOneof('type') == 'saved_model_spec'
-
-
-def _get_signatures(model_path: Text, signatures: Sequence[Text],
-                    tags: Sequence[Text]) -> Sequence[_Signature]:
-  """Returns a sequence of {model_signature_name: signature}."""
-
-  if signatures:
-    signature_names = signatures
-  else:
-    signature_names = [tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
-
-  saved_model_pb = loader_impl.parse_saved_model(model_path)
-  meta_graph_def = _get_meta_graph_def(saved_model_pb, tags)
-  result = []
-  for signature_name in signature_names:
-    if signature_name in meta_graph_def.signature_def:
-      result.append(
-          _Signature(signature_name,
-                     meta_graph_def.signature_def[signature_name]))
-    else:
-      raise RuntimeError('Signature %s could not be found in SavedModel' %
-                         signature_name)
-  return result
-
-
-def _get_operation_type(
-    inference_spec_type: model_spec_pb2.InferenceSpecType) -> Text:
-  if _using_in_process_inference(inference_spec_type):
-    signatures = _get_signatures(
-        inference_spec_type.saved_model_spec.model_path,
-        inference_spec_type.saved_model_spec.signature_name,
-        _get_tags(inference_spec_type))
-    if not signatures:
-      raise ValueError('Model does not have valid signature to use')
-
-    if len(signatures) == 1:
-      method_name = signatures[0].signature_def.method_name
-      if method_name == tf.saved_model.CLASSIFY_METHOD_NAME:
-        return OperationType.CLASSIFICATION
-      elif method_name == tf.saved_model.REGRESS_METHOD_NAME:
-        return OperationType.REGRESSION
-      elif method_name == tf.saved_model.PREDICT_METHOD_NAME:
-        return OperationType.PREDICTION
-      else:
-        raise ValueError('Unsupported signature method_name %s' % method_name)
-    else:
-      for signature in signatures:
-        method_name = signature.signature_def.method_name
-        if (method_name != tf.saved_model.CLASSIFY_METHOD_NAME and
-            method_name != tf.saved_model.REGRESS_METHOD_NAME):
-          raise ValueError('Unsupported signature method_name for multi-head '
-                           'model inference: %s' % method_name)
-      return OperationType.MULTIHEAD
-  else:
-    # Remote inference supports predictions only.
-    return OperationType.PREDICTION
-
-
-def _get_meta_graph_def(saved_model_pb: _SavedModel,
-                        tags: Sequence[Text]) -> _MetaGraphDef:
-  """Returns MetaGraphDef from SavedModel."""
-
-  for meta_graph_def in saved_model_pb.meta_graphs:
-    if set(meta_graph_def.meta_info_def.tags) == set(tags):
-      return meta_graph_def
-  raise RuntimeError('MetaGraphDef associated with tags %s could not be '
-                     'found in SavedModel' % tags)
-
-
-def _get_current_process_memory_in_bytes():
-  """Returns memory usage in bytes."""
-
-  if resource is not None:
-    usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
-    if _is_darwin():
-      return usage
-    return usage * 1024
-  else:
-    logging.warning('Resource module is not available for current platform, '
-                    'memory usage cannot be fetched.')
-  return 0
-
-
-def _get_tags(
-    inference_spec_type: model_spec_pb2.InferenceSpecType) -> Sequence[Text]:
-  """Returns tags from ModelSpec."""
-
-  if inference_spec_type.saved_model_spec.tag:
-    return list(inference_spec_type.saved_model_spec.tag)
-  else:
-    return [tf.saved_model.SERVING]
-
-
-def _is_darwin() -> bool:
-  return sys.platform == 'darwin'
-
-
-def _is_windows() -> bool:
-  return platform.system() == 'Windows' or os.name == 'nt'
-
-
-def _is_cygwin() -> bool:
-  return platform.system().startswith('CYGWIN_NT')
-
-
-class _Clock(object):
-
-  def get_current_time_in_microseconds(self) -> int:
-    return int(time.time() * _SECOND_TO_MICROSECOND)
-
-
-class _FineGrainedClock(_Clock):
-
-  def get_current_time_in_microseconds(self) -> int:
-    return int(
-        time.clock_gettime_ns(time.CLOCK_REALTIME) /  # pytype: disable=module-attr
-        _MICROSECOND_TO_NANOSECOND)
-
-
-class _ClockFactory(object):
-
-  @staticmethod
-  def make_clock() -> _Clock:
-    if (hasattr(time, 'clock_gettime_ns') and not _is_windows()
-        and not _is_cygwin()):
-      return _FineGrainedClock()
-    return _Clock()

From ab21c43ebf99c2da019331168825170f7a162bec Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Thu, 30 Jul 2020 12:10:20 -0400
Subject: [PATCH 7/8] make additional package a command line argument and fix
 comments

---
 .../beam/benchmarks/run_inference_benchmark.py    | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/tfx_bsl/beam/benchmarks/run_inference_benchmark.py b/tfx_bsl/beam/benchmarks/run_inference_benchmark.py
index 133532a0..7933bb04 100644
--- a/tfx_bsl/beam/benchmarks/run_inference_benchmark.py
+++ b/tfx_bsl/beam/benchmarks/run_inference_benchmark.py
@@ -14,17 +14,20 @@
 
 """Script to use run_inference from command line
 Below is a complete command line for running this script
-for benchmarks
+for benchmarks on dataflow
 
 python3 run_inference_benchemark.py \
 PATH_TO_MODEL \
 PATH_TO_DATA \
 --output gs://YOUR_BUCKET/results/output \
+--extra_packages PACKAGE1 PACKAGE2 \
 --project YOUR_PROJECT \
 --runner DataflowRunner \
 --temp_location gs://YOUR_BUCKET/temp \
 --job_name run-inference-metrics \
 --region us-central1
+
+*In this case, one of the extra_packages should be the wheel file for tfx-bsl
 """
 
 from __future__ import absolute_import
@@ -57,17 +60,21 @@ def run(argv=None, save_main_session=True):
         type=str,
         required=True,
         help='Path to the output file(s).')
+    parser.add_argument(
+        '--extra_packages',
+        type=str,
+        nargs='*',
+        help='Wheel file(s) for any additional required package(s) to Beam packages')
 
     args, pipeline_args = parser.parse_known_args(argv)
     options = PipelineOptions(pipeline_args)
 
     setup_options = options.view_as(SetupOptions)
-    # Path of the wheel file tfx-bsl
-    setup_options.extra_packages = ['./tfx-bsl/dist/tfx_bsl-0.23.0.dev0-cp37-cp37m-linux_x86_64.whl']
+    setup_options.extra_packages = args.extra_packages
     setup_options.save_main_session = save_main_session
 
     def get_saved_model_spec(model_path):
-        '''returns an InferenceSpecType object for a saved model path'''
+        '''Returns an InferenceSpecType object for a saved model path'''
         return model_spec_pb2.InferenceSpecType(
             saved_model_spec=model_spec_pb2.SavedModelSpec(
                 model_path=model_path))

From 87f604b5bbc13ff6cc43292e17fa9ff4e36d5b74 Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Thu, 30 Jul 2020 14:28:55 -0400
Subject: [PATCH 8/8] add benchmark scripts for public API

---
 .../benchmarks/run_inference_api_benchmark.py | 92 +++++++++++++++++++
 .../benchmarks/run_inference_benchmark.py     |  6 +-
 2 files changed, 95 insertions(+), 3 deletions(-)
 create mode 100644 tfx_bsl/beam/benchmarks/run_inference_api_benchmark.py

diff --git a/tfx_bsl/beam/benchmarks/run_inference_api_benchmark.py b/tfx_bsl/beam/benchmarks/run_inference_api_benchmark.py
new file mode 100644
index 00000000..3708d169
--- /dev/null
+++ b/tfx_bsl/beam/benchmarks/run_inference_api_benchmark.py
@@ -0,0 +1,92 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Script to use public.run_inference from command line
+Below is a complete command in terminal for running this script
+on dataflow for benchmarks.
+
+python3 run_inference_api_benchemark.py \
+PATH_TO_MODEL \
+PATH_TO_DATA \
+--output gs://YOUR_BUCKET/results/output \
+--extra_packages PACKAGE1 PACKAGE2 \
+--project YOUR_PROJECT \
+--runner DataflowRunner \
+--temp_location gs://YOUR_BUCKET/temp \
+--job_name run-inference-api-metrics \
+--region us-central1
+
+*In this case, one of the extra_packages should be the wheel file for tfx-bsl
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import apache_beam as beam
+import tensorflow as tf
+from tfx_bsl.public.beam import run_inference
+from tfx_bsl.public.proto import model_spec_pb2
+from apache_beam.options.pipeline_options import PipelineOptions
+from apache_beam.options.pipeline_options import SetupOptions
+
+
+def run(argv=None, save_main_session=True):
+    """Main entry point; defines and runs the user_score pipeline."""
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        'model_path',
+        type=str,
+        help='The path to input model')
+    parser.add_argument(
+        'input',
+        type=str,
+        help='Path to the data file(s) containing data.')
+    parser.add_argument(
+        '--output',
+        type=str,
+        required=True,
+        help='Path to the output file(s).')
+    parser.add_argument(
+        '--extra_packages',
+        type=str,
+        nargs='*',
+        help='Wheel file(s) for any additional required package(s) to Beam packages')
+
+    args, pipeline_args = parser.parse_known_args(argv)
+    options = PipelineOptions(pipeline_args)
+
+    setup_options = options.view_as(SetupOptions)
+    setup_options.extra_packages = args.extra_packages
+    setup_options.save_main_session = save_main_session
+
+    def get_saved_model_spec(model_path):
+        '''Returns an InferenceSpecType object for a saved model path'''
+        return model_spec_pb2.InferenceSpecType(
+            saved_model_spec=model_spec_pb2.SavedModelSpec(
+                model_path=model_path))
+
+    inference_spec_type = get_saved_model_spec(args.model_path)
+    with beam.Pipeline(options=options) as p:
+        (p
+            | 'ReadInputText' >> beam.io.ReadFromTFRecord(args.input)
+            | 'ParseExamples' >> beam.Map(tf.train.Example.FromString)
+            | 'RunInferenceImpl' >> run_inference.RunInference(
+                inference_spec_type))
+
+
+if __name__ == '__main__':
+    run()
diff --git a/tfx_bsl/beam/benchmarks/run_inference_benchmark.py b/tfx_bsl/beam/benchmarks/run_inference_benchmark.py
index 7933bb04..55e4d15c 100644
--- a/tfx_bsl/beam/benchmarks/run_inference_benchmark.py
+++ b/tfx_bsl/beam/benchmarks/run_inference_benchmark.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""Script to use run_inference from command line
-Below is a complete command line for running this script
-for benchmarks on dataflow
+"""Script to use beam.run_inference from command line
+Below is a complete command in terminal for running this script
+on dataflow for benchmarks.
 
 python3 run_inference_benchemark.py \
 PATH_TO_MODEL \