diff --git a/include/xnnpack.h b/include/xnnpack.h index c2adf2ee9ea..10030d11ad8 100644 --- a/include/xnnpack.h +++ b/include/xnnpack.h @@ -309,6 +309,9 @@ enum xnn_datatype { /// Quantized 2-bit signed integer with shared per-channel quantization /// parameters, but packed into 8-bit integers. xnn_datatype_qcint2 = 18, + /// Quantized 4-bit signed integer with shared per-Value quantization + /// parameters. + xnn_datatype_qint4 = 19, }; /// Define a tensor-type Value and add it to a Subgraph. diff --git a/src/datatype.c b/src/datatype.c index df09fae33d6..2cdc3e28a9d 100644 --- a/src/datatype.c +++ b/src/datatype.c @@ -22,6 +22,7 @@ bool xnn_datatype_is_real(enum xnn_datatype t) { case xnn_datatype_qcint8: case xnn_datatype_qcint32: case xnn_datatype_qcint4: + case xnn_datatype_qint4: case xnn_datatype_qcint2: case xnn_datatype_qdint8: case xnn_datatype_qduint8: @@ -48,6 +49,7 @@ bool xnn_datatype_is_integral(enum xnn_datatype t) { case xnn_datatype_qcint8: case xnn_datatype_qcint32: case xnn_datatype_qcint4: + case xnn_datatype_qint4: case xnn_datatype_qcint2: case xnn_datatype_qdint8: case xnn_datatype_qduint8: @@ -72,6 +74,7 @@ bool xnn_datatype_is_quantized(enum xnn_datatype t) { case xnn_datatype_qcint8: case xnn_datatype_qcint32: case xnn_datatype_qcint4: + case xnn_datatype_qint4: case xnn_datatype_qcint2: case xnn_datatype_qdint8: case xnn_datatype_qduint8: @@ -119,6 +122,7 @@ size_t xnn_datatype_log2_size_bits(enum xnn_datatype t) { return -1; case xnn_datatype_qcint2: return 1; + case xnn_datatype_qint4: case xnn_datatype_qcint4: case xnn_datatype_qbint4: return 2; @@ -172,6 +176,7 @@ bool xnn_datatype_is_byte_addressable(enum xnn_datatype t) { case xnn_datatype_fp16: case xnn_datatype_bf16: case xnn_datatype_qint8: + case xnn_datatype_qint4: case xnn_datatype_pqint8: case xnn_datatype_quint8: case xnn_datatype_qint32: diff --git a/src/enums/datatype-strings.c b/src/enums/datatype-strings.c index 85689e2e0f3..7882633b4d2 100644 --- a/src/enums/datatype-strings.c +++ b/src/enums/datatype-strings.c @@ -52,6 +52,8 @@ const char* xnn_datatype_to_string(enum xnn_datatype type) { return "QBINT4"; case xnn_datatype_qcint2: return "QCINT2"; + case xnn_datatype_qint4: + return "QINT4"; } XNN_UNREACHABLE; return NULL; diff --git a/src/subgraph.c b/src/subgraph.c index 0b96241cefa..02cc77585dc 100644 --- a/src/subgraph.c +++ b/src/subgraph.c @@ -3727,7 +3727,7 @@ static enum xnn_status optimize_common_subgraphs_iter( // is static. if (xnn_shape_multiply_all_dims( &node->params.static_reshape.new_shape) != 0) { - xnn_log_info( + xnn_log_debug( "Marking output of static_reshape[#%u](v%03u) as static shaped.", node->id, node->inputs[0]); subgraph->values[node->outputs[0]].shape = @@ -3796,7 +3796,7 @@ enum xnn_status xnn_subgraph_optimize_common_subgraphs( if (xnn_shape_multiply_all_dims(&value->shape) == 1) { // Get the value as a float. const float value_as_float = get_scalar_value_as_float(value); - xnn_log_info("v%03u is a constant: %e.", value->id, value_as_float); + xnn_log_debug("v%03u is a constant: %e.", value->id, value_as_float); // Mark the value accordingly. value->flags |= (value_as_float == 0.0f) ? XNN_VALUE_FLAG_IS_ZERO diff --git a/src/tensor.c b/src/tensor.c index df6de4bb4d6..19435b82789 100644 --- a/src/tensor.c +++ b/src/tensor.c @@ -61,6 +61,7 @@ static enum xnn_status check_zero_point( return xnn_status_invalid_parameter; } break; + case xnn_datatype_qint4: case xnn_datatype_qint8: if ((int32_t) (int8_t) zero_point != zero_point) { xnn_log_error( diff --git a/ynnpack/xnnpack/utils.cc b/ynnpack/xnnpack/utils.cc index fcdc4cf7ae4..5726b98b89e 100644 --- a/ynnpack/xnnpack/utils.cc +++ b/ynnpack/xnnpack/utils.cc @@ -1202,6 +1202,7 @@ ynn_type type_from_xnn(xnn_datatype type) { return ynn_type_bf16; case xnn_datatype_qduint8: return ynn_type_uint8; + case xnn_datatype_qint4: case xnn_datatype_qpint8: case xnn_datatype_pfp32: case xnn_datatype_pfp16: