From 162217c15e1e2050eddb49fb90f105772392a0e2 Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Mon, 25 May 2026 00:15:51 -0700 Subject: [PATCH 01/59] Generalized Tensor Parallelism (GTP) init commit Co-authored-by: Jieming Zhang Signed-off-by: Shiqing Fan --- .../distributed/distributed_data_parallel.py | 150 +- .../core/distributed/finalize_model_grads.py | 31 + .../core/distributed/param_and_grad_buffer.py | 2 +- .../core/extensions/transformer_engine.py | 47 +- megatron/core/model_parallel_config.py | 10 + .../embeddings/language_model_embedding.py | 2 + megatron/core/models/hybrid/hybrid_model.py | 2 + megatron/core/optimizer/__init__.py | 46 +- megatron/core/optimizer/clip_grads.py | 55 +- megatron/core/optimizer/distrib_optimizer.py | 2 +- megatron/core/optimizer/optimizer.py | 127 +- megatron/core/parallel_state.py | 320 ++- megatron/core/process_groups_config.py | 51 +- megatron/core/ssm/mamba_mixer.py | 2 + megatron/core/tensor_parallel/layers.py | 83 +- megatron/core/transformer/attention.py | 2 + megatron/core/transformer/cuda_graphs.py | 296 ++- megatron/core/transformer/mlp.py | 3 + .../core/transformer/moe/shared_experts.py | 7 +- .../transformer/multi_token_prediction.py | 1 + megatron/core/utils.py | 5 +- megatron/experimental/__init__.py | 0 megatron/experimental/gtp/README.md | 229 ++ megatron/experimental/gtp/__init__.py | 80 + .../gtp/generalized_tensor_parallelism.py | 1839 +++++++++++++++++ megatron/training/arguments.py | 60 + megatron/training/global_vars.py | 2 + megatron/training/initialize.py | 2 + megatron/training/training.py | 37 + megatron/training/utils.py | 218 +- .../test_layer_wise_optimizer.py | 2 +- .../generalized_tensor_parallel/__init__.py | 13 + .../generalized_tensor_parallel/test_gtp.py | 1619 +++++++++++++++ .../test_tp_gtp.py | 458 ++++ 34 files changed, 5587 insertions(+), 216 deletions(-) create mode 100644 megatron/experimental/__init__.py create mode 100644 megatron/experimental/gtp/README.md create mode 100644 megatron/experimental/gtp/__init__.py create mode 100644 megatron/experimental/gtp/generalized_tensor_parallelism.py create mode 100644 tests/unit_tests/generalized_tensor_parallel/__init__.py create mode 100644 tests/unit_tests/generalized_tensor_parallel/test_gtp.py create mode 100644 tests/unit_tests/generalized_tensor_parallel/test_tp_gtp.py diff --git a/megatron/core/distributed/distributed_data_parallel.py b/megatron/core/distributed/distributed_data_parallel.py index 2cb454a2027..bfb74d25534 100644 --- a/megatron/core/distributed/distributed_data_parallel.py +++ b/megatron/core/distributed/distributed_data_parallel.py @@ -6,6 +6,8 @@ import torch +from megatron.experimental.gtp import HAVE_GTP, GTPShardedParam + from ..config_logger import has_config_logger_enabled, log_config_to_disk from ..fp8_utils import is_float8tensor, post_all_gather_processing from ..optimizer.param_layout import FullParamLayout @@ -85,6 +87,14 @@ def __init__( self.intra_dp_cp_group = process_group_dict['intra_dp_cp_group'] self.expt_dp_group = process_group_dict['expt_dp_group'] self.intra_expt_dp_group = process_group_dict['intra_expt_dp_group'] + # GTP-aware DP subgroups. Fall back to the corresponding non-GTP group when + # the caller didn't configure GTP, so non-GTP runs work unchanged. + self.intra_dp_cp_with_gtp_group = process_group_dict.get( + 'intra_dp_cp_with_gtp_group', self.intra_dp_cp_group + ) + self.intra_expt_dp_with_egtp_group = process_group_dict.get( + 'intra_expt_dp_with_egtp_group', self.intra_expt_dp_group + ) self.tp_group = process_group_dict['tp_group'] self.pp_group = process_group_dict['pp_group'] self.ep_group = process_group_dict['ep_group'] @@ -112,6 +122,7 @@ def __init__( param_to_name = {} self.params_with_grad = [] all_params = [] + gtp_params = [] for name, param in self.module.named_parameters(): if not param.requires_grad: continue @@ -122,10 +133,31 @@ def __init__( param.grad_added_to_main_grad = False param_to_name[param] = name - all_params.append(param) + # Carve out DENSE GTPShardedParam (mamba/attn, allreduce=True) only — + # they need intra_dp_cp_with_gtp_group. Routed-expert GTPShardedParam + # has allreduce=False and goes through main's expert path (which uses + # intra_expt_dp_with_egtp_group); non-GTP dense + expert both fall + # through to all_params where group_params_for_buffers splits them + # via is_expert_parallel. + is_dense_gtp = ( + HAVE_GTP + and isinstance(param, GTPShardedParam) + and getattr(param, 'allreduce', True) + ) + if is_dense_gtp: + gtp_params.append(param) + else: + all_params.append(param) # Group parameters by (param_dtype, grad_dtype, is_expert_parallel). buffer_groups = group_params_for_buffers(all_params, self.ddp_config.grad_reduce_in_fp32) + # GTP params are grouped separately — they will be routed through + # intra_dp_cp_with_gtp_group because GTP's RS already reduced over the GTP axis. + gtp_buffer_groups = ( + group_params_for_buffers(gtp_params, self.ddp_config.grad_reduce_in_fp32) + if gtp_params + else {} + ) # Auto-compute layouts when using distributed optimizer but no layout was provided. # This maintains backward compatibility for callers that create DDP directly @@ -151,7 +183,9 @@ def __init__( # When a full_param_layout is provided, verify that the grouping is consistent # with the layout (same buffer keys, same params per key, same param_indices). - if full_param_layout is not None: + # Skip strict equality if GTP carved params out — the caller-supplied layout + # was computed without that carve-out, so a literal == would always fail. + if full_param_layout is not None and not gtp_params: assert set(buffer_groups.keys()) == set(full_param_layout.layouts.keys()), ( f"Buffer keys from param grouping {set(buffer_groups.keys())} do not match " f"full_param_layout keys {set(full_param_layout.layouts.keys())}" @@ -174,6 +208,7 @@ def __init__( ), "Cannot average in collective when calculating per-token loss!" gradient_scaling_factor = 1.0 expert_gradient_scaling_factor = 1.0 + gtp_gradient_scaling_factor = 1.0 else: # The goal is to scale reduced gradients by 1/dp_size. # This can be achieved in two ways: @@ -198,19 +233,30 @@ def __init__( if self.ddp_config.average_in_collective: gradient_scaling_factor = 1.0 expert_gradient_scaling_factor = self.expt_dp_group.size() / self.dp_cp_group.size() + # GTP: collective averages over with_gtp group (size = dp_cp_size / ps_size). + # GTP RS already summed over ps_size ranks. To total 1/dp_cp_size scaling: + # pre_scale * (1/with_gtp_size) = 1/dp_cp_size => pre_scale = with_gtp_size / dp_cp_size. + gtp_gradient_scaling_factor = ( + self.intra_dp_cp_with_gtp_group.size() / self.dp_cp_group.size() + ) else: data_parallel_world_size = self.dp_cp_group.size() gradient_scaling_factor = 1.0 / data_parallel_world_size expert_gradient_scaling_factor = 1.0 / data_parallel_world_size + gtp_gradient_scaling_factor = 1.0 / data_parallel_world_size # Allocate buffers for each group. self.buffers = [] self.expert_parallel_buffers = [] + self.gtp_buffers = [] pg_collection = ProcessGroupCollection(tp=self.tp_group, dp_cp=self.dp_cp_group) for buffer_key, (params, param_indices) in buffer_groups.items(): if buffer_key.is_expert_parallel: - data_parallel_group = self.intra_expt_dp_group + # Use the with_eps group so EGTP-sharded routed experts (whose grads + # are already RS'd over the expert-GTP axis) only DP-reduce over true + # weight replicas. Falls back to intra_expt_dp_group when GTP is off. + data_parallel_group = self.intra_expt_dp_with_egtp_group scaling_factor = expert_gradient_scaling_factor else: data_parallel_group = self.intra_dp_cp_group @@ -234,10 +280,19 @@ def __init__( else: assert scaling_factor == target_gradient_scaling_factor + # If GTP carved params out of all_params, the caller-supplied (or auto-computed) + # full_param_layout.layouts[buffer_key].param_index_map will contain GTP entries + # that aren't in this buffer's `params_with_names`. _build_gbuf_range_map iterates + # param_index_map, so those stray GTP entries would surface as KeyErrors in + # DistOpt's world_param_group_map. Force buffer to compute its own layout in + # that case. param_layout = ( - full_param_layout.layouts.get(buffer_key) if full_param_layout is not None else None + full_param_layout.layouts.get(buffer_key) + if full_param_layout is not None and not gtp_params + else None ) params_with_names = [(p, param_to_name[p]) for p in params] + buffer = _ParamAndGradBuffer( self.ddp_config, buffer_key.param_dtype, @@ -257,6 +312,27 @@ def __init__( else: self.buffers.append(buffer) + # Allocate GTP buffers separately, routed through intra_dp_cp_with_gtp_group. + # GTP's RS already reduced over the GTP axis, so DDP must NOT re-reduce over GTP. + # full_param_layout is not applied to GTP buffers (GTP manages its own sharding). + for buffer_key, (params, param_indices) in gtp_buffer_groups.items(): + params_with_names = [(p, param_to_name[p]) for p in params] + buffer = _ParamAndGradBuffer( + self.ddp_config, + buffer_key.param_dtype, + buffer_key.grad_dtype, + params_with_names, + self.intra_dp_cp_with_gtp_group, + self.bucket_size, + param_to_name, + gtp_gradient_scaling_factor, + param_indices, + self.ddp_config.nccl_ub, + pg_collection, + param_layout=None, + ) + self.gtp_buffers.append(buffer) + # In some scenarios, we want to put buckets from different buffers into a group so that # their communication can be aggregated. For example, when there are both fp8 buffers # and bf16 buffers in the model and vpp is enabled, each model chunk will have an fp8 @@ -280,12 +356,31 @@ def __init__( self.ddp_config.reduce_scatter_with_fp32_accumulation ), ) + self.gtp_bucket_groups = partition_buckets( + self.gtp_buffers, + force_single_bucket_group=disable_bucketing, + reduce_scatter_with_fp32_accumulation=( + self.ddp_config.reduce_scatter_with_fp32_accumulation + ), + ) + # Flat view across all three bucket-group lists; used wherever + # callers need to iterate every bucket group regardless of dense / + # expert-parallel / GTP category. The per-category lists above are + # kept for code paths that need per-category state (e.g. one + # communication_stream per category). + self.all_bucket_groups = ( + self.bucket_groups + self.expert_parallel_bucket_groups + self.gtp_bucket_groups + ) if self.ddp_config.num_distributed_optimizer_instances > 1: assert ( self.ddp_config.use_distributed_optimizer ), 'Partial DistOpt cannot be used without DistOpt' - for bucket_groups in [self.bucket_groups, self.expert_parallel_bucket_groups]: + for bucket_groups in [ + self.bucket_groups, + self.expert_parallel_bucket_groups, + self.gtp_bucket_groups, + ]: communication_stream = torch.cuda.Stream(device=torch.cuda.current_device()) for bucket_group in bucket_groups: bucket_group.inter_distributed_optimizer_instance_group = ( @@ -299,7 +394,11 @@ def __init__( # layer-wise optimizer cases; the latter sets overlap_param_gather=True # without use_distributed_optimizer. if self.ddp_config.overlap_param_gather: - for bucket_groups in [self.bucket_groups, self.expert_parallel_bucket_groups]: + for bucket_groups in [ + self.bucket_groups, + self.expert_parallel_bucket_groups, + self.gtp_bucket_groups, + ]: num_bucket_groups = len(bucket_groups) for i in range(1, num_bucket_groups): bucket_groups[num_bucket_groups - i].next_param_gather_bucket_group = ( @@ -307,7 +406,11 @@ def __init__( ) # Create map from param to bucket group, used in pre_hook. - for bucket_groups in [self.bucket_groups, self.expert_parallel_bucket_groups]: + for bucket_groups in [ + self.bucket_groups, + self.expert_parallel_bucket_groups, + self.gtp_bucket_groups, + ]: for bucket_group in bucket_groups: for bucket in bucket_group.buckets: for param in bucket.params_list: @@ -444,9 +547,14 @@ def hook(*unused): if param in self.param_to_bucket_group: assert param.requires_grad if self.ddp_config.overlap_grad_reduce: - assert ( - param.grad is not None - ), 'param.grad being None is not safe when overlap_grad_reduce is True' + # GTP params may legitimately have grad=None: TE's + # wgrad_reduce_scatter returns None for async RS and writes + # the wgrad straight into param.main_grad. Skip the assertion + # for GTPShardedParam — otherwise it fires every iter. + if not (HAVE_GTP and isinstance(param, GTPShardedParam)): + assert ( + param.grad is not None + ), 'param.grad being None is not safe when overlap_grad_reduce is True' if param.grad is not None and ( not param.grad_added_to_main_grad or getattr(param, 'zero_out_wgrad', False) ): @@ -465,12 +573,12 @@ def no_sync(self): """ Context manager that turns off gradient synchronization. """ - for bucket_group in self.bucket_groups + self.expert_parallel_bucket_groups: + for bucket_group in self.all_bucket_groups: bucket_group.is_last_microbatch = False try: yield finally: - for bucket_group in self.bucket_groups + self.expert_parallel_bucket_groups: + for bucket_group in self.all_bucket_groups: bucket_group.is_last_microbatch = True def start_param_sync(self, *unused, force_sync: bool = False, force_dispatch: bool = False): @@ -492,7 +600,7 @@ def start_param_sync(self, *unused, force_sync: bool = False, force_dispatch: bo if self.overlap_param_gather_with_optimizer_step and not force_dispatch: return - for bucket_group in self.bucket_groups + self.expert_parallel_bucket_groups: + for bucket_group in self.all_bucket_groups: bucket_group.start_param_sync(force_sync=force_sync) if not self.ddp_config.overlap_param_gather: @@ -540,7 +648,7 @@ def start_grad_sync(self, *unused): calls. When overlap_grad_reduce is set to False, calls synchronous communication ops. """ - for bucket_group in self.bucket_groups + self.expert_parallel_bucket_groups: + for bucket_group in self.all_bucket_groups: bucket_group.start_grad_sync() def finish_grad_sync(self, force_all_reduce: Optional[bool] = False): @@ -552,17 +660,17 @@ def finish_grad_sync(self, force_all_reduce: Optional[bool] = False): calls to complete. When overlap_grad_reduce is set to False, calls synchronous communication ops. """ - for bucket_group in self.bucket_groups + self.expert_parallel_bucket_groups: + for bucket_group in self.all_bucket_groups: bucket_group.finish_grad_sync(force_all_reduce=force_all_reduce) def free_overlap_buffers(self): """Free overlap param-gather GPU buffers across all bucket groups.""" - for bucket_group in self.bucket_groups + self.expert_parallel_bucket_groups: + for bucket_group in self.all_bucket_groups: bucket_group.free_overlap_buffers() def scale_gradients(self, scaling_factor: float): """Scale all gradients inside the buffers by `scaling_factor`.""" - for buffer in self.buffers + self.expert_parallel_buffers: + for buffer in self.buffers + self.expert_parallel_buffers + self.gtp_buffers: buffer.scale_gradients(scaling_factor) def zero_grad_buffer(self): @@ -576,9 +684,9 @@ def zero_grad_buffer(self): # to True, and there will be a double-GA. for param in self.params_with_grad: param.grad_added_to_main_grad = False - for buffer in self.buffers + self.expert_parallel_buffers: + for buffer in self.buffers + self.expert_parallel_buffers + self.gtp_buffers: buffer.reset() - for bucket_group in self.bucket_groups + self.expert_parallel_bucket_groups: + for bucket_group in self.all_bucket_groups: bucket_group.reset() def broadcast_params(self): @@ -613,7 +721,7 @@ def offload_grad_buffers(self, synchronize: bool = True, empty_cache: bool = Tru if synchronize: torch.cuda.synchronize() - for buffer in self.buffers + self.expert_parallel_buffers: + for buffer in self.buffers + self.expert_parallel_buffers + self.gtp_buffers: buffer.offload_to_cpu(move_params=False, move_grads=True) if empty_cache: @@ -630,7 +738,7 @@ def restore_grad_buffers(self, synchronize: bool = True) -> None: Args: synchronize: Whether to call torch.cuda.synchronize() after allocation. """ - for buffer in self.buffers + self.expert_parallel_buffers: + for buffer in self.buffers + self.expert_parallel_buffers + self.gtp_buffers: buffer.reload_from_cpu(move_params=False, move_grads=True) if synchronize: diff --git a/megatron/core/distributed/finalize_model_grads.py b/megatron/core/distributed/finalize_model_grads.py index 7d9179d1c50..b4b16d7b139 100644 --- a/megatron/core/distributed/finalize_model_grads.py +++ b/megatron/core/distributed/finalize_model_grads.py @@ -491,6 +491,37 @@ def finalize_model_grads( pos_emb_group = parallel_state.get_position_embedding_group(check_initialized=False) dp_cp_group = parallel_state.get_data_parallel_group(with_context_parallel=True) + # Drain any in-flight GTP reduce-scatters on rs_stream before the DP gradient sync. + # Expert backward runs eagerly (not in CUDA graphs), so its GTP RS operations on + # rs_stream may still be writing to main_grad when finish_grad_sync starts the DP + # allreduce on main_stream. + if ( + config.generalized_tensor_parallel_size > 1 + or config.expert_generalized_tensor_parallel_size > 1 + ): + from megatron.experimental.gtp import ( + HAVE_GTP, + get_all_ag_streams, + get_all_rs_streams, + wait_async_comms, + ) + + if HAVE_GTP: + wait_async_comms() + for s in get_all_ag_streams(): + torch.cuda.current_stream().wait_stream(s) + for s in get_all_rs_streams(): + torch.cuda.current_stream().wait_stream(s) + + # Wait for captured bwd Phase 2 (main_grad.add_) on each CG runner's + # stream. bwd_completion_event only covers Phase 1; Phase 2 runs after + # it on runner.stream with no other sync to main_stream. + if config.generalized_tensor_parallel_size > 1: + from megatron.core.transformer.cuda_graphs import get_gtp_phase2_completion_events + + for evt in get_gtp_phase2_completion_events(): + torch.cuda.current_stream().wait_event(evt) + # All-reduce / reduce-scatter across DP replicas. if config.timers is not None: config.timers('all-grads-sync', log_level=1).start(barrier=config.barrier_with_L1_time) diff --git a/megatron/core/distributed/param_and_grad_buffer.py b/megatron/core/distributed/param_and_grad_buffer.py index 29f8de1d2d9..1cca26d2eb9 100644 --- a/megatron/core/distributed/param_and_grad_buffer.py +++ b/megatron/core/distributed/param_and_grad_buffer.py @@ -1588,4 +1588,4 @@ def partition_buckets( buffer.data_parallel_world_size, ) ) - return bucket_groups + return bucket_groups \ No newline at end of file diff --git a/megatron/core/extensions/transformer_engine.py b/megatron/core/extensions/transformer_engine.py index ae362a02309..03569bc29bc 100644 --- a/megatron/core/extensions/transformer_engine.py +++ b/megatron/core/extensions/transformer_engine.py @@ -61,6 +61,7 @@ is_te_min_version, is_torch_min_version, ) +from megatron.experimental.gtp import HAVE_GTP try: import transformer_engine as te @@ -695,6 +696,7 @@ def __init__( is_expert: bool = False, symmetric_ar_type: Optional[str] = None, tp_group: Optional[torch.distributed.ProcessGroup] = None, + gtp_group: Optional[torch.distributed.ProcessGroup] = None, ): if not HAVE_TE: raise ImportError( @@ -817,6 +819,10 @@ def __init__( tp_size = 1 tp_group_for_te = None + if HAVE_GTP: + self.gtp_size = get_pg_size(gtp_group) if gtp_group is not None else 1 + extra_kwargs["gtp_group"] = gtp_group if torch.distributed.is_initialized() else None + super().__init__( in_features=input_size, out_features=output_size, @@ -926,6 +932,7 @@ def __init__( skip_weight_param_allocation: bool = False, tp_comm_buffer_name: Optional[str] = None, tp_group: Optional[torch.distributed.ProcessGroup] = None, + gtp_group: Optional[torch.distributed.ProcessGroup] = None, stride: int = 1, ): if not HAVE_TE: @@ -1018,6 +1025,10 @@ def __init__( ), "Must have at least TE version 2.3 or higher to use symmetric memory all reduce" extra_kwargs["symmetric_ar_type"] = self.config.symmetric_ar_type + if HAVE_GTP: + self.gtp_size = get_pg_size(gtp_group) if gtp_group is not None else 1 + extra_kwargs["gtp_group"] = gtp_group if torch.distributed.is_initialized() else None + self.stride = stride super().__init__( @@ -1126,7 +1137,10 @@ def extra_repr(self) -> str: f"in_features={self.in_features}, " f"out_features={self.out_features}, " f"bias={self.use_bias}, " - f"TP={self.tp_size}" + f"TP={self.tp_size}, " + f"GTP={self.gtp_size})" + if hasattr(self, "gtp_size") + else ")" ) def backward_dw(self): @@ -1153,6 +1167,7 @@ def __init__( skip_weight_param_allocation: bool = False, tp_comm_buffer_name: Optional[str] = None, tp_group: Optional[torch.distributed.ProcessGroup] = None, + gtp_group: Optional[torch.distributed.ProcessGroup] = None, stride: int = 1, ): if not HAVE_TE: @@ -1186,6 +1201,7 @@ def __init__( tp_comm_buffer_name=tp_comm_buffer_name, symmetric_ar_type=config.symmetric_ar_type, tp_group=tp_group, + gtp_group=gtp_group, ) # Set proper partition_stride @@ -1236,7 +1252,10 @@ def extra_repr(self) -> str: f"in_features={self.in_features}, " f"out_features={self.out_features}, " f"bias={self.use_bias}, " - f"TP={self.tp_size}" + f"TP={self.tp_size}, " + f"GTP={self.gtp_size})" + if hasattr(self, "gtp_size") + else ")" ) def backward_dw(self): @@ -1262,6 +1281,7 @@ def __init__( is_expert: bool, tp_comm_buffer_name: Optional[str] = None, tp_group: Optional[torch.distributed.ProcessGroup] = None, + gtp_group: Optional[torch.distributed.ProcessGroup] = None, ): if not HAVE_TE: raise ImportError( @@ -1294,6 +1314,7 @@ def __init__( tp_comm_buffer_name=tp_comm_buffer_name, symmetric_ar_type=config.symmetric_ar_type, tp_group=tp_group, + gtp_group=gtp_group, ) if config.use_cpu_initialization: world_size = get_pg_size(tp_group) @@ -1340,7 +1361,10 @@ def extra_repr(self) -> str: f"in_features={self.in_features}, " f"out_features={self.out_features}, " f"bias={self.use_bias}, " - f"TP={self.tp_size}" + f"TP={self.tp_size}, " + f"GTP={self.gtp_size})" + if hasattr(self, "gtp_size") + else ")" ) def backward_dw(self): @@ -1737,6 +1761,7 @@ def __init__( self._tp_group = tp_group tp_size = get_pg_size(tp_group) tp_group_for_te = tp_group + gtp_group = pg_collection.expt_gtp self.explicit_expert_comm = is_expert and (tp_size > 1 or self.expert_parallel) @@ -1756,6 +1781,11 @@ def __init__( tp_size = 1 tp_group_for_te = None + if HAVE_GTP: + self.gtp_size = get_pg_size(gtp_group) if gtp_group is not None else 1 + extra_kwargs["gtp_group"] = ( + gtp_group if torch.distributed.is_initialized() else None + ) if is_te_min_version("2.14.0"): extra_kwargs["single_grouped_weight"] = getattr( config, "moe_single_grouped_weight", False @@ -2147,6 +2177,17 @@ def backward_dw(self): if self.delay_wgrad_compute: super().backward_dw() + def __repr__(self): + return ( + f"{type(self).__name__}(per expert([" + f"in={self.in_features}, out={self.out_features}]) " + f"X num_gemms={self.num_gemms}, " + f"bias={self.use_bias}, TP={self.tp_size}, " + f"GTP={self.gtp_size})" + if hasattr(self, "gtp_size") + else ")" + ) + class TEColumnParallelGroupedLinear(TEGroupedLinear): """ Wrapper for the Transformer-Engine's `GroupedLinear` layer but specialized diff --git a/megatron/core/model_parallel_config.py b/megatron/core/model_parallel_config.py index dabe0d0aced..dc2196169ee 100644 --- a/megatron/core/model_parallel_config.py +++ b/megatron/core/model_parallel_config.py @@ -20,6 +20,11 @@ class ModelParallelConfig: tensor_model_parallel_size: int = 1 """Intra-layer model parallelism. Splits tensors across GPU ranks.""" + generalized_tensor_parallel_size: int = 1 + """Generalized tensor parallelism. Shards model weights (but not activations) across GPU ranks. + Placed right after tensor parallelism in the parallelism ordering. + """ + pipeline_model_parallel_comm_backend: Optional[Literal["nccl", "ucc"]] = None """Configuring backend option of pipeline parallel communication (e.g., nccl, ucc) If None, the default backend will be used. @@ -72,6 +77,11 @@ class ModelParallelConfig: expert_model_parallel_size: int = 1 """Distributes Moe Experts across sub data parallel dimension.""" + expert_generalized_tensor_parallel_size: int = 1 + """Generalized tensor parallelism for expert layers. Shards expert weights (but not activations) + across GPU ranks. Independent from the decoder's generalized_tensor_parallel_size. + """ + expert_tensor_parallel_size: Optional[int] = None """Intra-layer tensor model parallelism for expert layer. Splits tensors across GPU ranks. Default is None, which will be set to the value of tensor_model_parallel_size. diff --git a/megatron/core/models/common/embeddings/language_model_embedding.py b/megatron/core/models/common/embeddings/language_model_embedding.py index 7e49ec6c02d..ff2e86b3a0d 100644 --- a/megatron/core/models/common/embeddings/language_model_embedding.py +++ b/megatron/core/models/common/embeddings/language_model_embedding.py @@ -35,6 +35,7 @@ def __init__( num_tokentypes: int = 0, scatter_to_sequence_parallel: bool = True, tp_group: Optional[torch.distributed.ProcessGroup] = None, + gtp_group: Optional[torch.distributed.ProcessGroup] = None, ): super().__init__(config=config) @@ -60,6 +61,7 @@ def __init__( reduce_scatter_embeddings=self.reduce_scatter_embeddings, config=self.config, tp_group=self.tp_group, + gtp_group=gtp_group, ) # Position embedding (serial). diff --git a/megatron/core/models/hybrid/hybrid_model.py b/megatron/core/models/hybrid/hybrid_model.py index c77251a5dc9..1bdd4aac709 100644 --- a/megatron/core/models/hybrid/hybrid_model.py +++ b/megatron/core/models/hybrid/hybrid_model.py @@ -218,6 +218,7 @@ def __init__( position_embedding_type=position_embedding_type, scatter_to_sequence_parallel=scatter_embedding_sequence_parallel, tp_group=self.pg_collection.tp, + gtp_group=self.pg_collection.gtp, ) # MLA (also used by DeepSeek Sparse Attention) uses its own decoupled RoPE, therefore we do @@ -299,6 +300,7 @@ def __init__( skip_weight_param_allocation=self.pre_process and self.share_embeddings_and_output_weights, tp_group=self.pg_collection.tp, + gtp_group=self.pg_collection.gtp, ) if self.pre_process or self.post_process or self.mtp_process: diff --git a/megatron/core/optimizer/__init__.py b/megatron/core/optimizer/__init__.py index ebdd42effe2..32d4db272cc 100644 --- a/megatron/core/optimizer/__init__.py +++ b/megatron/core/optimizer/__init__.py @@ -56,6 +56,7 @@ ) from megatron.core.process_groups_config import ProcessGroupCollection from megatron.core.transformer.fsdp_dtensor_checkpoint import get_global_unique_param_name +from megatron.experimental.gtp import HAVE_GTP, GTPShardedParam from ..distributed.param_and_grad_buffer import _ParamAndGradBuffer from ..transformer.module import MegatronModule @@ -346,12 +347,13 @@ def _get_param_groups( param_override = None is_expert_parallel = not getattr(param, 'allreduce', True) + is_gtp = HAVE_GTP and isinstance(param, GTPShardedParam) # Create config_tuple that is hash-able, and has a consistent ordering of the keys. param_override_tuple: tuple[tuple[str, Any], ...] | None = ( param_group_override_to_tuple(param_override) ) - key = (param_override_tuple, is_expert_parallel) + key = (param_override_tuple, is_expert_parallel, is_gtp) if key not in params_map: params_map[key] = [] params_map[key].append(param) @@ -370,7 +372,7 @@ def _get_param_groups( param_groups = [] # Sort keys, None first. for key in sorted(params_key, key=lambda x: (x[0] is not None, x[0])): - param_override_tuple, is_expert_parallel = key + param_override_tuple, is_expert_parallel, is_gtp = key params = params_map[key] if key in params_map else [] if param_override_tuple is None: param_override: ParamGroupOverride = {} @@ -403,6 +405,7 @@ def _get_param_groups( param_group = { 'params': params, 'is_expert_parallel': is_expert_parallel, + 'is_gtp': is_gtp, 'default_config': uses_default_lr_schedule, **default_config, **param_override, # keep **param_override last so that users can override other fields. @@ -920,7 +923,9 @@ def get_megatron_optimizer( dp_cp_group = process_groups_dict['dp_cp_group'] intra_dp_cp_group = process_groups_dict['intra_dp_cp_group'] + intra_dp_cp_with_gtp_group = process_groups_dict['intra_dp_cp_with_gtp_group'] intra_expt_dp_group = process_groups_dict['intra_expt_dp_group'] + intra_expt_dp_with_egtp_group = process_groups_dict['intra_expt_dp_with_egtp_group'] mp_group = process_groups_dict['mp_group'] expt_tp_pp_group = process_groups_dict['expt_tp_pp_group'] intra_dp_cp_group_gloo = process_groups_dict['intra_dp_cp_group_gloo'] @@ -997,7 +1002,7 @@ def get_megatron_optimizer( model_chunk_offset=model_chunk_offset, config=config, config_overrides=config_overrides, - filter_fn=lambda g: not g['is_expert_parallel'], + filter_fn=lambda g: not g['is_expert_parallel'] and not g.get('is_gtp', False), buffer_name='buffers', ) for model_chunk in dense_model_chunks: @@ -1029,6 +1034,39 @@ def get_megatron_optimizer( ) model_chunk_offset += 1 + # GTP params: separate optimizer with with_gtp DP group. + # GTP params are sharded across GTP peers; their DDP buffers use the with_gtp group. + gtp_param_groups, gtp_buffers = _get_param_groups_and_buffers( + model_chunks, + model_chunk_offset=0, + config=config, + config_overrides=config_overrides, + filter_fn=lambda g: g.get('is_gtp', False) and not g['is_expert_parallel'], + buffer_name='gtp_buffers', + ) + if dump_param_to_param_group_map is not None: + for param_group in gtp_param_groups: + for param in param_group["params"]: + param_name = get_global_unique_param_name(model_chunks, param) + param_to_param_group[param_name] = param_group_id + param_group_id += 1 + if len(gtp_param_groups) > 0: + optimizers.append( + _get_megatron_optimizer_based_on_param_groups( + config=config, + model_chunks=model_chunks, + param_groups=gtp_param_groups, + per_model_buffers=gtp_buffers, + model_parallel_group=mp_group, + data_parallel_group=intra_dp_cp_with_gtp_group, + data_parallel_group_gloo=None, + data_parallel_group_idx=model_parallel_rank, + intra_dist_opt_group=intra_dist_opt_group, + distributed_optimizer_instance_id=distributed_optimizer_instance_id, + pg_collection=pg_collection, + ) + ) + moe_param_groups, moe_buffers = _get_param_groups_and_buffers( model_chunks, model_chunk_offset=0, @@ -1057,7 +1095,7 @@ def get_megatron_optimizer( param_groups=moe_param_groups, per_model_buffers=moe_buffers, model_parallel_group=expt_tp_pp_group, - data_parallel_group=intra_expt_dp_group, + data_parallel_group=intra_expt_dp_with_egtp_group, data_parallel_group_gloo=expt_data_parallel_group_gloo, data_parallel_group_idx=expt_model_parallel_rank, intra_dist_opt_group=intra_dist_opt_group, diff --git a/megatron/core/optimizer/clip_grads.py b/megatron/core/optimizer/clip_grads.py index 3d796654db4..657b3d11d33 100644 --- a/megatron/core/optimizer/clip_grads.py +++ b/megatron/core/optimizer/clip_grads.py @@ -47,6 +47,9 @@ multi_tensor_scale_tensor_impl = None +from megatron.experimental.gtp import HAVE_GTP, GTPShardedParam + +from .. import parallel_state from ..tensor_parallel import param_is_not_tensor_parallel_duplicate from ..transformer.module import param_is_not_shared from ..utils import get_data_parallel_group_if_dtensor, to_local_if_dtensor @@ -59,18 +62,18 @@ def get_grad_norm_fp32( ) -> float: """Calculate the p-norm of gradients in FP32 precision. - This function is adapted from `torch.nn.utils.clip_grad.clip_grad_norm_` - and extends it with functionality to handle model-parallel parameters. - It ensures that the norm is correctly computed and reduced across - the specified process group (typically the model-parallel group for + This function is adapted from `torch.nn.utils.clip_grad.clip_grad_norm_` + and extends it with functionality to handle model-parallel parameters. + It ensures that the norm is correctly computed and reduced across + the specified process group (typically the model-parallel group for non-distributed optimizers or the entire world for distributed optimizers). Args: - grads_for_norm (Union[List[torch.Tensor], torch.Tensor]): An iterable + grads_for_norm (Union[List[torch.Tensor], torch.Tensor]): An iterable of Tensors or a single Tensor used to calculate the gradient norm. - norm_type (Union[int, float]): The type of the p-norm to use. Can be + norm_type (Union[int, float]): The type of the p-norm to use. Can be 'inf' for infinity norm. Defaults to 2. - grad_stats_parallel_group (ProcessGroup, optional): The process group + grad_stats_parallel_group (ProcessGroup, optional): The process group used for reducing gradient statistics (e.g., norms and zero counts). Returns: @@ -155,13 +158,13 @@ def clip_grad_by_total_norm_fp32( Note that the gradients are modified in-place. Args: - parameters (Union[List[torch.Tensor], torch.Tensor]): An iterable of + parameters (Union[List[torch.Tensor], torch.Tensor]): An iterable of Tensors or a single Tensor that will have gradients normalized. - max_norm (Union[int, float]): The maximum permissible total norm + max_norm (Union[int, float]): The maximum permissible total norm of the gradients. total_norm (float): The current total norm of the gradients. - use_decoupled_grad (bool, optional): Whether to read from the - '.decoupled_grad' attribute instead of the standard '.grad'. + use_decoupled_grad (bool, optional): Whether to read from the + '.decoupled_grad' attribute instead of the standard '.grad'. Defaults to False. """ # Grads. @@ -201,22 +204,23 @@ def count_zeros_fp32( grad_stats_parallel_group: torch.distributed.ProcessGroup, use_decoupled_grad: bool = False, tp_group: Optional[torch.distributed.ProcessGroup] = None, + use_distributed_optimizer: bool = False, ) -> float: """Counts the number of zero values in the gradients of the given parameters. - The count is performed in FP32. This method filters parameters to ensure - gradients are not double-counted by checking if the gradient is not None, - the parameter is not shared, and the parameter is not a replica due - to tensor model parallelism. It also handles parameters managed by + The count is performed in FP32. This method filters parameters to ensure + gradients are not double-counted by checking if the gradient is not None, + the parameter is not shared, and the parameter is not a replica due + to tensor model parallelism. It also handles parameters managed by Megatron FSDP specifically. Args: - parameters (Union[List[torch.Tensor], torch.Tensor]): An iterable of + parameters (Union[List[torch.Tensor], torch.Tensor]): An iterable of Tensors or a single Tensor whose gradients will be checked for zeros. - grad_stats_parallel_group (ProcessGroup): The process group used for + grad_stats_parallel_group (ProcessGroup): The process group used for reducing the zero count across distributed ranks. - use_decoupled_grad (bool, optional): If True, reads from the - '.decoupled_grad' attribute instead of the standard '.grad'. + use_decoupled_grad (bool, optional): If True, reads from the + '.decoupled_grad' attribute instead of the standard '.grad'. Defaults to False. Returns: @@ -230,9 +234,11 @@ def count_zeros_fp32( # - grad should not be none # - parameter should not be shared # - should not be a replica due to tensor model parallelism + # - should not be a GTP duplicate (non-GTP params identical across GTP peers) total_num_zeros = torch.zeros(1, dtype=torch.int64, device='cuda') data_parallel_group = None use_megatron_fsdp = False + ps_rank = parallel_state.get_generalized_tensor_parallel_rank() for param in parameters: if getattr(param, "__fsdp_param__", False) and param.grad is not None: # If the parameter is managed by Megatron FSDP, we need to handle it differently. @@ -246,7 +252,14 @@ def count_zeros_fp32( grad_not_none = hasattr(param, grad_attr) and getattr(param, grad_attr) is not None is_not_shared = param_is_not_shared(param) is_not_tp_duplicate = param_is_not_tensor_parallel_duplicate(param, tp_group=tp_group) - if grad_not_none and is_not_shared and is_not_tp_duplicate: + if use_distributed_optimizer: + is_not_ps_duplicate = True + else: + is_gtp_param = getattr(param, 'is_gtp', False) or ( + HAVE_GTP and isinstance(param, GTPShardedParam) + ) + is_not_ps_duplicate = is_gtp_param or ps_rank == 0 + if grad_not_none and is_not_shared and is_not_tp_duplicate and is_not_ps_duplicate: grad_obj = getattr(param, grad_attr) data_parallel_group = get_data_parallel_group_if_dtensor(grad_obj, data_parallel_group) grad = to_local_if_dtensor(grad_obj).detach() @@ -271,4 +284,4 @@ def count_zeros_fp32( total_num_zeros = total_num_zeros.item() - return total_num_zeros \ No newline at end of file + return total_num_zeros diff --git a/megatron/core/optimizer/distrib_optimizer.py b/megatron/core/optimizer/distrib_optimizer.py index e0d3c2a54ac..832326d1f63 100644 --- a/megatron/core/optimizer/distrib_optimizer.py +++ b/megatron/core/optimizer/distrib_optimizer.py @@ -3009,4 +3009,4 @@ def step_with_ready_grads(self) -> bool: if timers is not None: timers('params-all-gather').stop() - return update_successful + return update_successful \ No newline at end of file diff --git a/megatron/core/optimizer/optimizer.py b/megatron/core/optimizer/optimizer.py index 9ae23bb4b7f..7a74ee35ae5 100644 --- a/megatron/core/optimizer/optimizer.py +++ b/megatron/core/optimizer/optimizer.py @@ -37,6 +37,8 @@ multi_tensor_applier = local_multi_tensor_applier multi_tensor_scale_impl = local_multi_tensor_scale +from megatron.experimental.gtp import HAVE_GTP, GTPShardedParam + from .. import parallel_state, tensor_parallel from ..config_logger import has_config_logger_enabled, log_config_to_disk from ..dist_checkpointing.mapping import ShardedStateDict @@ -138,17 +140,33 @@ def get_parameters(self) -> List[torch.nn.Parameter]: return params def get_main_grads_for_grad_norm(self) -> List[torch.Tensor]: - """Collects gradients for norm calculation, filtering duplicates. - + """ + Collects gradients for norm calculation, filtering duplicates. This method filters parameters based on whether the gradient is not None, - the parameter is not shared (to avoid double-counting gradients), and - the parameter is not a replica due to tensor model parallelism. + the parameter is not shared (to avoid double-counting gradients), + the parameter is not a replica due to tensor model parallelism, and + the parameter is not be a GTP duplicate (non-GTP params are identical across GTP peers; + only GTP rank 0 should contribute to avoid over-counting). - Returns: - List[torch.Tensor]: A list of gradient tensors filtered for norm calculation. + Returns all filtered grads as a single list (for backward compatibility). + Use get_main_grads_for_grad_norm_split() to get GTP and non-GTP grads separately. + """ + non_gtp_grads, gtp_grads = self.get_main_grads_for_grad_norm_split() + return non_gtp_grads + gtp_grads + + def get_main_grads_for_grad_norm_split(self) -> Tuple[List[torch.Tensor], List[torch.Tensor]]: + """ + Get main_grads split into (non_gtp_grads, gtp_grads). + + GTP grads may need an extra GTP/EGTP reduction that differs from the + optimizer's grad_stats_parallel_group, so callers that compute norms + need them separated. """ params = self.get_parameters() - grads_for_norm = [] + non_gtp_grads = [] + gtp_grads = [] + ps_rank = parallel_state.get_generalized_tensor_parallel_rank() + eps_rank = parallel_state.get_expert_generalized_tensor_parallel_rank() for param in params: if self.config.use_precision_aware_optimizer_no_fp8_or_ds_fp8 or ( # Megatron-FSDP always uses decoupled_grad with FusedAdam. @@ -170,13 +188,36 @@ def get_main_grads_for_grad_norm(self) -> List[torch.Tensor]: grad = param.grad grad_not_none = grad is not None is_not_shared = param_is_not_shared(param) - is_not_tp_duplicate = tensor_parallel.param_is_not_tensor_parallel_duplicate( - param, getattr(self, 'tp_group', None) + + is_gtp_param = getattr(param, 'is_gtp', False) or ( + HAVE_GTP and isinstance(param, GTPShardedParam) ) - if grad_not_none and is_not_shared and is_not_tp_duplicate: - grads_for_norm.append(grad) - return grads_for_norm + # GTP params are always unique across TP ranks (tensor_model_parallel + # attribute is lost during wrap_gtp_sharded_tensor), so skip TP filter. + is_not_tp_duplicate = is_gtp_param or ( + tensor_parallel.param_is_not_tensor_parallel_duplicate( + param, getattr(self, 'tp_group', None) + ) + ) + + # GTP-duplicate filter: only needed for non-distributed optimizer. + is_expert = not getattr(param, 'allreduce', True) + if hasattr(self, 'ddp_config') and self.ddp_config.use_distributed_optimizer: + is_not_ps_duplicate = True + else: + if is_expert: + is_not_ps_duplicate = is_gtp_param or eps_rank == 0 + else: + is_not_ps_duplicate = is_gtp_param or ps_rank == 0 + + if grad_not_none and is_not_shared and is_not_tp_duplicate and is_not_ps_duplicate: + if is_gtp_param: + gtp_grads.append(grad) + else: + non_gtp_grads.append(grad) + + return non_gtp_grads, gtp_grads def get_grad_stats_parallel_group(self) -> torch.distributed.ProcessGroup: """Process group for reducing gradient statistics (num_zeros & norm). @@ -208,25 +249,59 @@ def step_with_ready_grads(self) -> bool: """Step the optimizer with ready gradients, return successful.""" return True + def _compute_grad_norm_with_gtp(self, non_gtp_grads, gtp_grads): + """Compute grad norm handling GTP grads that may need extra GTP/EGTP reduction. + + For MoE optimizers, grad_stats_parallel_group = TP×EP×PP which does NOT + include EPS. MoE-GTP grads need an extra EPS reduction. + For dense-GTP optimizers, grad_stats_parallel_group = TP×PP×GTP which + already includes GTP, so no extra reduction is needed. + """ + grad_stats_group = self.get_grad_stats_parallel_group() + + if not gtp_grads: + return get_grad_norm_fp32(non_gtp_grads, grad_stats_parallel_group=grad_stats_group) + + # Check if this optimizer handles expert params that need EPS reduction. + # The model_parallel group for dense/GTP optimizers = TP×PP×GTP (includes GTP), + # but for MoE optimizers = TP×EP×PP (does NOT include EPS). + eps_world_size = parallel_state.get_expert_generalized_tensor_parallel_world_size() + is_expert_optimizer = any(not getattr(p, 'allreduce', True) for p in self.get_parameters()) + needs_eps_reduce = is_expert_optimizer and eps_world_size > 1 + + if not needs_eps_reduce: + # Dense/GTP optimizer: grad_stats_group already covers GTP. + return get_grad_norm_fp32( + non_gtp_grads + gtp_grads, grad_stats_parallel_group=grad_stats_group + ) + + # MoE optimizer with EPS: compute GTP norm separately, add EPS reduction. + non_gtp_norm = get_grad_norm_fp32(non_gtp_grads, grad_stats_parallel_group=grad_stats_group) + gtp_norm = get_grad_norm_fp32(gtp_grads, grad_stats_parallel_group=grad_stats_group) + # get_grad_norm_fp32 returns a float. We need to do the EPS reduction on GPU. + gtp_norm_2 = torch.tensor([gtp_norm**2], dtype=torch.float, device='cuda') + torch.distributed.all_reduce( + gtp_norm_2, + op=torch.distributed.ReduceOp.SUM, + group=parallel_state.get_expert_generalized_tensor_parallel_group(), + ) + total_norm_2 = non_gtp_norm**2 + gtp_norm_2.item() + return total_norm_2**0.5 + @torch.no_grad() def get_grad_norm(self): """Compute and return grad norm.""" - grads_for_norm = self.get_main_grads_for_grad_norm() - total_norm = get_grad_norm_fp32( - grads_for_norm, grad_stats_parallel_group=self.get_grad_stats_parallel_group() - ) - return total_norm + non_gtp_grads, gtp_grads = self.get_main_grads_for_grad_norm_split() + return self._compute_grad_norm_with_gtp(non_gtp_grads, gtp_grads) def clip_grad_norm(self, clip_grad: float) -> float: """Compute and return grad norm, also clip grads.""" params = self.get_parameters() if params: - grads_for_norm = self.get_main_grads_for_grad_norm() + non_gtp_grads, gtp_grads = self.get_main_grads_for_grad_norm_split() else: - grads_for_norm = [] - grad_norm = get_grad_norm_fp32( - grads_for_norm, grad_stats_parallel_group=self.get_grad_stats_parallel_group() - ) + non_gtp_grads, gtp_grads = [], [] + grad_norm = self._compute_grad_norm_with_gtp(non_gtp_grads, gtp_grads) if params: clip_grad_by_total_norm_fp32( @@ -246,6 +321,7 @@ def clip_grad_norm(self, clip_grad: float) -> float: def count_zeros(self) -> float: """Count number of zeros in model's gradients.""" params = self.get_parameters() + use_dist_opt = hasattr(self, 'ddp_config') and self.ddp_config.use_distributed_optimizer return count_zeros_fp32( params, grad_stats_parallel_group=self.get_grad_stats_parallel_group(), @@ -256,6 +332,7 @@ def count_zeros(self) -> float: and getattr(params[0], "__fsdp_param__", False) ), tp_group=getattr(self, 'tp_group', None), + use_distributed_optimizer=use_dist_opt, ) @abstractmethod @@ -698,8 +775,12 @@ def __init__( # float16 params: if param.type() in ['torch.cuda.HalfTensor', 'torch.cuda.BFloat16Tensor']: float16_params_this_group.append(param) - # Create a copy main_param = param.detach().clone().float() + if HAVE_GTP and isinstance(param, GTPShardedParam): + main_param.is_gtp = True + else: + main_param.is_gtp = False + # Copy tensor model parallel attributes. tensor_parallel.copy_tensor_model_parallel_attributes(main_param, param) if hasattr(param, 'shared'): diff --git a/megatron/core/parallel_state.py b/megatron/core/parallel_state.py index 337485b4d12..a6b2f9e1a97 100644 --- a/megatron/core/parallel_state.py +++ b/megatron/core/parallel_state.py @@ -27,6 +27,9 @@ # Intra-layer model parallel group that the current rank belongs to. _TENSOR_MODEL_PARALLEL_GROUP = None +# Generalized tensor parallelism group that the current rank belongs to. +_GENERALIZED_TENSOR_PARALLEL_GROUP = None +_GENERALIZED_TENSOR_PARALLEL_GLOBAL_RANKS = None # Inter-layer model parallel group that the current rank belongs to. _PIPELINE_MODEL_PARALLEL_GROUP = None # Model parallel group (both intra- and pipeline) that the current rank belongs to. @@ -50,6 +53,9 @@ # _EXPERT_TENSOR denotes tensor parallelism of expert which splits tensor across the group. # _EXPERT_DATA denotes data parallelism of expert which replicates weight across the group. +# Expert generalized tensor parallelism group that current rank belongs to. +_EXPERT_GENERALIZED_TENSOR_PARALLEL_GROUP = None +_EXPERT_GENERALIZED_TENSOR_PARALLEL_GLOBAL_RANKS = None # Expert model parallel group that current rank belongs to. _EXPERT_MODEL_PARALLEL_GROUP = None # Expert tensor parallel group that current rank belongs to. @@ -60,6 +66,7 @@ _EXPERT_TENSOR_MODEL_PIPELINE_PARALLEL_GROUP = None # Expert data parallel group _EXPERT_DATA_PARALLEL_GROUP = None +_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP = None _EXPERT_DATA_PARALLEL_GROUP_GLOO = None _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP = None _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_GLOO = None @@ -118,6 +125,10 @@ # Hybrid context parallel groups _HYBRID_DP_CP_GROUPS = {} +# Data parallel group information with generalized tensor parallel accounted for. +_DATA_PARALLEL_GROUP_WITH_GTP = None +_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP = None + # Data parallel group information with context parallel combined. _DATA_PARALLEL_GROUP_WITH_CP = None _DATA_PARALLEL_GROUP_WITH_CP_GLOO = None @@ -554,10 +565,12 @@ def initialize_model_parallel( hierarchical_context_parallel_sizes: Optional[List[int]] = None, hybrid_context_parallel: bool = False, expert_model_parallel_size: int = 1, + expert_generalized_tensor_parallel_size: int = 1, num_distributed_optimizer_instances: int = 1, expert_tensor_parallel_size: Optional[int] = None, nccl_communicator_config_path: Optional[str] = None, distributed_timeout_minutes: int = 30, + generalized_tensor_parallel_size: int = 1, order: str = "tp-cp-ep-dp-pp", get_embedding_ranks: Optional[Callable[[List[int], Optional[int]], List[int]]] = None, get_position_embedding_ranks: Optional[Callable[[List[int], Optional[int]], List[int]]] = None, @@ -737,6 +750,13 @@ def initialize_model_parallel( data_parallel_size: int = world_size // model_size + if (data_parallel_size * context_parallel_size) % generalized_tensor_parallel_size != 0: + raise RuntimeError( + f"data_parallel_size * context_parallel_size " + f"({data_parallel_size * context_parallel_size}) is not divisible by " + f"generalized_tensor_parallel_size ({generalized_tensor_parallel_size})" + ) + if virtual_pipeline_model_parallel_size is not None: if not pipeline_model_parallel_size > 1: raise RuntimeError( @@ -789,6 +809,12 @@ def initialize_model_parallel( f"world_size ({world_size}) is not divisible by expert_tensor_model_pipeline_parallel size ({expert_tensor_model_pipeline_parallel_size})" ) + if expert_data_parallel_size % expert_generalized_tensor_parallel_size != 0: + raise RuntimeError( + f"expert_data_parallel_size ({expert_data_parallel_size}) is not divisible by " + f"expert_generalized_tensor_parallel_size ({expert_generalized_tensor_parallel_size})" + ) + # TODO: support expert specific ordering expert_decoder_rank_generator = RankGenerator( tp=expert_tensor_parallel_size, @@ -833,6 +859,27 @@ def initialize_model_parallel( data_parallel_size * context_parallel_size ) // num_distributed_optimizer_instances + # Build the generalized tensor parallel groups. + # GTP overlaps with the CP-DP domain because GTP only shards weights + # while CP only shards activations — they are independent and can share ranks. + global _GENERALIZED_TENSOR_PARALLEL_GROUP + global _GENERALIZED_TENSOR_PARALLEL_GLOBAL_RANKS + assert ( + _GENERALIZED_TENSOR_PARALLEL_GROUP is None + ), "generalized tensor parallel group is already initialized" + for cp_dp_ranks in decoder_rank_generator.get_ranks('cp-dp'): + for i in range(0, len(cp_dp_ranks), generalized_tensor_parallel_size): + ps_ranks = cp_dp_ranks[i : i + generalized_tensor_parallel_size] + group = create_group( + ps_ranks, + timeout=timeout, + pg_options=get_nccl_options("ps", nccl_comm_cfgs), + group_desc="GENERALIZED_TENSOR_PARALLEL_GROUP", + ) + if rank in ps_ranks: + _GENERALIZED_TENSOR_PARALLEL_GROUP = group + _GENERALIZED_TENSOR_PARALLEL_GLOBAL_RANKS = ps_ranks + # Set NCCL_COLLNET_ENABLE to 1 to enable SHARP for the dp group. if sharp_enabled_group == "dp": os.environ["NCCL_COLLNET_ENABLE"] = "1" @@ -950,6 +997,49 @@ def initialize_model_parallel( _DATA_PARALLEL_GROUP_GLOO = group_gloo _DATA_PARALLEL_GLOBAL_RANKS = ranks + # Build DP groups with generalized tensor parallel accounted for. + # with_gtp DP = only ranks that share the same GTP-rank (true weight replicas). + global _DATA_PARALLEL_GROUP_WITH_GTP + global _DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP + if generalized_tensor_parallel_size > 1: + # Build rank→ps_rank mapping. + rank_to_ps_rank = {} + for cp_dp_ranks in decoder_rank_generator.get_ranks('cp-dp'): + for i in range(0, len(cp_dp_ranks), generalized_tensor_parallel_size): + ps_chunk = cp_dp_ranks[i : i + generalized_tensor_parallel_size] + for ps_rank_idx, r in enumerate(ps_chunk): + rank_to_ps_rank[r] = ps_rank_idx + + # DP-only with GTP: create one group per (dp_group, ps_rank) pair. + # All ranks must participate in every create_group call (collective). + for dp_ranks in decoder_rank_generator.get_ranks('dp'): + for ps_rank_val in range(generalized_tensor_parallel_size): + dp_ps_ranks = [r for r in dp_ranks if rank_to_ps_rank[r] == ps_rank_val] + group = create_group( + dp_ps_ranks, + timeout=timeout, + pg_options=get_nccl_options("dp_ps", nccl_comm_cfgs), + group_desc="DATA_PARALLEL_GROUP_WITH_GTP", + ) + if rank in dp_ps_ranks: + _DATA_PARALLEL_GROUP_WITH_GTP = group + + # DP-CP with GTP + for dp_cp_ranks in decoder_rank_generator.get_ranks('dp-cp'): + for ps_rank_val in range(generalized_tensor_parallel_size): + dp_cp_ps_ranks = [r for r in dp_cp_ranks if rank_to_ps_rank[r] == ps_rank_val] + group = create_group( + dp_cp_ps_ranks, + timeout=timeout, + pg_options=get_nccl_options("dp_cp_ps", nccl_comm_cfgs), + group_desc="DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP", + ) + if rank in dp_cp_ps_ranks: + _DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP = group + else: + _DATA_PARALLEL_GROUP_WITH_GTP = _DATA_PARALLEL_GROUP + _DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP = _DATA_PARALLEL_GROUP_WITH_CP + # Build the context-parallel groups. global _CONTEXT_PARALLEL_GROUP global _CONTEXT_PARALLEL_GLOBAL_RANKS @@ -980,19 +1070,54 @@ def initialize_model_parallel( _HIERARCHICAL_CONTEXT_PARALLEL_GROUPS = hierarchical_groups # Build the model-parallel groups. + # Model parallel includes TP, GTP, and PP. Since GTP overlaps with CP-DP (not in the + # RankGenerator), we build model parallel groups by merging 'tp-pp' groups across GTP peers. global _MODEL_PARALLEL_GROUP global _MODEL_PARALLEL_GLOBAL_RANKS assert _MODEL_PARALLEL_GROUP is None, 'model parallel group is already initialized' - for ranks in decoder_rank_generator.get_ranks('tp-pp'): - group = create_group( - ranks, - timeout=timeout, - pg_options=get_nccl_options("mp", nccl_comm_cfgs), - group_desc="MODEL_PARALLEL_GROUP", - ) - if rank in ranks: - _MODEL_PARALLEL_GROUP = group - _MODEL_PARALLEL_GLOBAL_RANKS = ranks + if generalized_tensor_parallel_size == 1: + # No GTP — model parallel is just tp-pp + for ranks in decoder_rank_generator.get_ranks('tp-pp'): + group = create_group( + ranks, + timeout=timeout, + pg_options=get_nccl_options("mp", nccl_comm_cfgs), + group_desc="MODEL_PARALLEL_GROUP", + ) + if rank in ranks: + _MODEL_PARALLEL_GROUP = group + _MODEL_PARALLEL_GLOBAL_RANKS = ranks + else: + # With GTP: merge tp-pp groups across GTP peers. + # Build a mapping from each rank to its tp-pp group. + tp_pp_groups = decoder_rank_generator.get_ranks('tp-pp') + rank_to_tp_pp = {} + for tp_pp_ranks in tp_pp_groups: + for r in tp_pp_ranks: + rank_to_tp_pp[r] = tp_pp_ranks + + # For each set of GTP peers, union their tp-pp groups to form model parallel groups. + model_parallel_groups_set = set() + for cp_dp_ranks in decoder_rank_generator.get_ranks('cp-dp'): + for i in range(0, len(cp_dp_ranks), generalized_tensor_parallel_size): + ps_ranks = cp_dp_ranks[i : i + generalized_tensor_parallel_size] + # Merge tp-pp groups of all GTP peers + mp_ranks = [] + for ps_r in ps_ranks: + mp_ranks.extend(rank_to_tp_pp[ps_r]) + mp_ranks = sorted(set(mp_ranks)) + mp_key = tuple(mp_ranks) + if mp_key not in model_parallel_groups_set: + model_parallel_groups_set.add(mp_key) + group = create_group( + list(mp_ranks), + timeout=timeout, + pg_options=get_nccl_options("mp", nccl_comm_cfgs), + group_desc="MODEL_PARALLEL_GROUP", + ) + if rank in mp_ranks: + _MODEL_PARALLEL_GROUP = group + _MODEL_PARALLEL_GLOBAL_RANKS = list(mp_ranks) # Build the tensor model-parallel groups. global _TENSOR_MODEL_PARALLEL_GROUP @@ -1167,6 +1292,25 @@ def initialize_model_parallel( _TENSOR_AND_CONTEXT_PARALLEL_GROUP = group ### Expert-related parallel groups initialization + # Build the expert generalized tensor parallel group + # Expert GTP overlaps with the expert DP domain (experts don't use CP). + global _EXPERT_GENERALIZED_TENSOR_PARALLEL_GROUP, _EXPERT_GENERALIZED_TENSOR_PARALLEL_GLOBAL_RANKS + assert ( + _EXPERT_GENERALIZED_TENSOR_PARALLEL_GROUP is None + ), 'Expert generalized tensor parallel group is already initialized' + for dp_ranks in expert_decoder_rank_generator.get_ranks('dp'): + for i in range(0, len(dp_ranks), expert_generalized_tensor_parallel_size): + eps_ranks = dp_ranks[i : i + expert_generalized_tensor_parallel_size] + group = create_group( + eps_ranks, + timeout=timeout, + pg_options=get_nccl_options("expt_gtp", nccl_comm_cfgs), + group_desc="EXPERT_GENERALIZED_TENSOR_PARALLEL_GROUP", + ) + if rank in eps_ranks: + _EXPERT_GENERALIZED_TENSOR_PARALLEL_GROUP = group + _EXPERT_GENERALIZED_TENSOR_PARALLEL_GLOBAL_RANKS = eps_ranks + # Build the expert model parallel group global _EXPERT_MODEL_PARALLEL_GROUP, _EXPERT_MODEL_PARALLEL_RANKS assert _EXPERT_MODEL_PARALLEL_GROUP is None, 'Expert parallel group is already initialized' @@ -1307,6 +1451,32 @@ def initialize_model_parallel( else: _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP = _EXPERT_DATA_PARALLEL_GROUP _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_GLOO = _EXPERT_DATA_PARALLEL_GROUP_GLOO + # Build expert DP group with expert generalized tensor parallel accounted for. + global _EXPERT_DATA_PARALLEL_GROUP_WITH_GTP + if expert_generalized_tensor_parallel_size > 1: + # Build rank→expert_ps_rank mapping. + rank_to_expert_ps_rank = {} + for dp_ranks in expert_decoder_rank_generator.get_ranks('dp'): + for i in range(0, len(dp_ranks), expert_generalized_tensor_parallel_size): + eps_chunk = dp_ranks[i : i + expert_generalized_tensor_parallel_size] + for eps_rank_idx, r in enumerate(eps_chunk): + rank_to_expert_ps_rank[r] = eps_rank_idx + + # Create one group per (expert_dp_group, expert_ps_rank) pair (collective). + for dp_ranks in expert_decoder_rank_generator.get_ranks('dp'): + for eps_rank_val in range(expert_generalized_tensor_parallel_size): + edp_ps_ranks = [r for r in dp_ranks if rank_to_expert_ps_rank[r] == eps_rank_val] + group = create_group( + edp_ps_ranks, + timeout=timeout, + pg_options=get_nccl_options("ep_dp_ps", nccl_comm_cfgs), + group_desc="EXPERT_DATA_PARALLEL_GROUP_WITH_GTP", + ) + if rank in edp_ps_ranks: + _EXPERT_DATA_PARALLEL_GROUP_WITH_GTP = group + else: + _EXPERT_DATA_PARALLEL_GROUP_WITH_GTP = _EXPERT_DATA_PARALLEL_GROUP + ### End of expert related parallel groups initialization # build the intra distributed optimizer instance group @@ -1455,6 +1625,40 @@ def get_tensor_model_parallel_group(check_initialized=True): return _TENSOR_MODEL_PARALLEL_GROUP +def get_generalized_tensor_parallel_group(check_initialized=True): + """Get the parameter-sharding group the caller rank belongs to.""" + if check_initialized: + assert ( + _GENERALIZED_TENSOR_PARALLEL_GROUP is not None + ), "generalized tensor parallel group is not initialized" + return _GENERALIZED_TENSOR_PARALLEL_GROUP + + +def get_generalized_tensor_parallel_world_size(): + """Return world size for the parameter-sharding group.""" + if torch.distributed.is_available() and torch.distributed.is_initialized(): + return get_generalized_tensor_parallel_group().size() + else: + return 0 + + +def get_generalized_tensor_parallel_rank(): + """Return caller's rank in the parameter-sharding group.""" + if torch.distributed.is_available() and torch.distributed.is_initialized(): + return get_generalized_tensor_parallel_group().rank() + else: + return 0 + + +def get_generalized_tensor_parallel_global_ranks(check_initialized=True): + """Get all global ranks of the parameter-sharding group that the caller rank belongs to.""" + if check_initialized: + assert ( + _GENERALIZED_TENSOR_PARALLEL_GLOBAL_RANKS is not None + ), "generalized tensor parallel group is not initialized" + return _GENERALIZED_TENSOR_PARALLEL_GLOBAL_RANKS + + def get_pipeline_model_parallel_group(check_initialized=True): """Get the pipeline-model-parallel group the caller rank belongs to.""" if check_initialized: @@ -1464,8 +1668,27 @@ def get_pipeline_model_parallel_group(check_initialized=True): return _PIPELINE_MODEL_PARALLEL_GROUP -def get_data_parallel_group(with_context_parallel=False, partial_data_parallel=False): - """Get the data-parallel group the caller rank belongs to.""" +def get_data_parallel_group( + with_context_parallel=False, with_gtp=False, partial_data_parallel=False +): + """Get the data-parallel group the caller rank belongs to. + + Args: + with_context_parallel: If True, include context-parallel ranks in the group. + with_gtp: If True, return only the true weight-replica ranks (exclude GTP peers). + partial_data_parallel: If True, return partial DP group (requires with_context_parallel). + """ + if with_gtp: + if with_context_parallel: + assert ( + _DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP is not None + ), "data parallel group with context parallel and generalized tensor parallel is not initialized" + return _DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP + else: + assert ( + _DATA_PARALLEL_GROUP_WITH_GTP is not None + ), "data parallel group with generalized tensor parallel is not initialized" + return _DATA_PARALLEL_GROUP_WITH_GTP if with_context_parallel: if partial_data_parallel: assert ( @@ -1788,14 +2011,18 @@ def get_pipeline_model_parallel_prev_rank(): return _PIPELINE_GLOBAL_RANKS[(rank_in_pipeline - 1) % world_size] -def get_data_parallel_world_size(with_context_parallel=False, partial_data_parallel=False): +def get_data_parallel_world_size( + with_context_parallel=False, with_gtp=False, partial_data_parallel=False +): """Return world size for the data parallel group.""" global _MPU_DATA_PARALLEL_WORLD_SIZE if _MPU_DATA_PARALLEL_WORLD_SIZE is not None: return _MPU_DATA_PARALLEL_WORLD_SIZE if torch.distributed.is_available() and torch.distributed.is_initialized(): return get_data_parallel_group( - with_context_parallel=with_context_parallel, partial_data_parallel=partial_data_parallel + with_context_parallel=with_context_parallel, + with_gtp=with_gtp, + partial_data_parallel=partial_data_parallel, ).size() else: return 0 @@ -1807,14 +2034,18 @@ def set_data_parallel_rank(rank): _MPU_DATA_PARALLEL_RANK = rank -def get_data_parallel_rank(with_context_parallel=False, partial_data_parallel=False): +def get_data_parallel_rank( + with_context_parallel=False, with_gtp=False, partial_data_parallel=False +): """Return caller's rank in the data-parallel group.""" global _MPU_DATA_PARALLEL_RANK if _MPU_DATA_PARALLEL_RANK is not None: return _MPU_DATA_PARALLEL_RANK if torch.distributed.is_available() and torch.distributed.is_initialized(): return get_data_parallel_group( - with_context_parallel=with_context_parallel, partial_data_parallel=partial_data_parallel + with_context_parallel=with_context_parallel, + with_gtp=with_gtp, + partial_data_parallel=partial_data_parallel, ).rank() else: return 0 @@ -1853,6 +2084,40 @@ def get_tensor_and_context_parallel_rank(): ### Expert-related parallel states functions +def get_expert_generalized_tensor_parallel_group(check_initialized=True): + """Get the expert-parameter-sharding group the caller rank belongs to.""" + if check_initialized: + assert ( + _EXPERT_GENERALIZED_TENSOR_PARALLEL_GROUP is not None + ), "expert generalized tensor parallel group is not initialized" + return _EXPERT_GENERALIZED_TENSOR_PARALLEL_GROUP + + +def get_expert_generalized_tensor_parallel_world_size(): + """Return world size for the expert-parameter-sharding group.""" + if torch.distributed.is_available() and torch.distributed.is_initialized(): + return get_expert_generalized_tensor_parallel_group().size() + else: + return 0 + + +def get_expert_generalized_tensor_parallel_rank(): + """Return caller's rank in the expert-parameter-sharding group.""" + if torch.distributed.is_available() and torch.distributed.is_initialized(): + return get_expert_generalized_tensor_parallel_group().rank() + else: + return 0 + + +def get_expert_generalized_tensor_parallel_global_ranks(check_initialized=True): + """Get all global ranks of the expert-parameter-sharding group that the caller rank belongs to.""" + if check_initialized: + assert ( + _EXPERT_GENERALIZED_TENSOR_PARALLEL_GLOBAL_RANKS is not None + ), "expert generalized tensor parallel group is not initialized" + return _EXPERT_GENERALIZED_TENSOR_PARALLEL_GLOBAL_RANKS + + def get_expert_model_parallel_group(check_initialized=True): """Get the expert-model-parallel group the caller rank belongs to.""" if check_initialized: @@ -1983,8 +2248,16 @@ def get_expert_tensor_model_pipeline_parallel_group(check_initialized=True): return _EXPERT_TENSOR_MODEL_PIPELINE_PARALLEL_GROUP -def get_expert_data_parallel_group(check_initialized=True, partial_expert_data_parallel=False): +def get_expert_data_parallel_group( + check_initialized=True, with_gtp=False, partial_expert_data_parallel=False +): """Get expert data parallel group.""" + if with_gtp: + if check_initialized: + assert ( + _EXPERT_DATA_PARALLEL_GROUP_WITH_GTP is not None + ), "Expert data parallel group with generalized tensor parallel is not initialized" + return _EXPERT_DATA_PARALLEL_GROUP_WITH_GTP if partial_expert_data_parallel: if check_initialized: assert ( @@ -2082,6 +2355,7 @@ def get_all_ranks(): pipeline-model-parallel and expert-model-parallel groups.""" ranks = [ get_tensor_model_parallel_rank(), + get_generalized_tensor_parallel_rank(), get_data_parallel_rank(), get_context_parallel_rank(), get_pipeline_model_parallel_rank(), @@ -2098,6 +2372,12 @@ def destroy_model_parallel(): global _TENSOR_MODEL_PARALLEL_GROUP _TENSOR_MODEL_PARALLEL_GROUP = None + global _GENERALIZED_TENSOR_PARALLEL_GROUP + _GENERALIZED_TENSOR_PARALLEL_GROUP = None + + global _GENERALIZED_TENSOR_PARALLEL_GLOBAL_RANKS + _GENERALIZED_TENSOR_PARALLEL_GLOBAL_RANKS = None + global _PIPELINE_MODEL_PARALLEL_GROUP _PIPELINE_MODEL_PARALLEL_GROUP = None @@ -2173,6 +2453,12 @@ def destroy_model_parallel(): _DATA_PARALLEL_GROUP_WITH_CP_GLOO = None # Destroy parallel state related to expert parallelism. + global _EXPERT_GENERALIZED_TENSOR_PARALLEL_GROUP + _EXPERT_GENERALIZED_TENSOR_PARALLEL_GROUP = None + + global _EXPERT_GENERALIZED_TENSOR_PARALLEL_GLOBAL_RANKS + _EXPERT_GENERALIZED_TENSOR_PARALLEL_GLOBAL_RANKS = None + global _EXPERT_MODEL_PARALLEL_GROUP _EXPERT_MODEL_PARALLEL_GROUP = None diff --git a/megatron/core/process_groups_config.py b/megatron/core/process_groups_config.py index 6c1e3651387..07ac28e16f1 100644 --- a/megatron/core/process_groups_config.py +++ b/megatron/core/process_groups_config.py @@ -117,6 +117,12 @@ class ProcessGroupCollection: # Separate dp_cp communicator for param all-gather (AG/RS overlap) dp_cp_ag: torch.distributed.ProcessGroup = field(init=False) + # _GENERALIZED_TENSOR_PARALLEL_GROUP + gtp: torch.distributed.ProcessGroup = field(init=False) + + # _EXPERT_GENERALIZED_TENSOR_PARALLEL_GROUP + expt_gtp: torch.distributed.ProcessGroup = field(init=False) + # MoE layers need expt_dp group for sharded state dict # we need this workaround until distributed checkpoint is refactored # to have sharded_state_dict can take the PG and pass it down @@ -247,6 +253,12 @@ def use_mpu_process_groups(cls, required_pgs: Optional[List[str]] = None): check_initialized=False, with_context_parallel=True, ), + 'gtp': partial( + parallel_state.get_generalized_tensor_parallel_group, check_initialized=False + ), + 'expt_gtp': partial( + parallel_state.get_expert_generalized_tensor_parallel_group, check_initialized=False + ), } assert all( @@ -292,7 +304,10 @@ def setup_process_groups_for_optimizer( from megatron.core.utils import get_model_config if pg_collection is None: - # Use parallel_state groups + # Use parallel_state groups. + # Dense (non-GTP) params use with_gtp=False (full DP group) to maximize + # optimizer state sharding. GTP params use with_gtp=True (smaller group) + # since GTP's reduce-scatter already handled the GTP dimension. dp_group = parallel_state.get_data_parallel_group( with_context_parallel=False, partial_data_parallel=False ) @@ -302,9 +317,15 @@ def setup_process_groups_for_optimizer( intra_dp_cp_group = parallel_state.get_data_parallel_group( with_context_parallel=True, partial_data_parallel=True ) + intra_dp_cp_with_gtp_group = parallel_state.get_data_parallel_group( + with_context_parallel=True, with_gtp=True + ) expt_dp_group = parallel_state.get_expert_data_parallel_group() intra_expt_dp_group = parallel_state.get_expert_data_parallel_group( - partial_expert_data_parallel=True + partial_expert_data_parallel=True, with_gtp=True + ) + intra_expt_dp_with_egtp_group = parallel_state.get_expert_data_parallel_group( + with_gtp=True ) intra_dist_opt_group = parallel_state.get_intra_distributed_optimizer_instance_group() @@ -427,6 +448,18 @@ def setup_process_groups_for_optimizer( ) expt_tp_pp_group = pg_collection.tp_ep_pp + # 6. GTP with_gtp group (fallback to intra_dp_cp if not provided) + if hasattr(pg_collection, 'intra_dp_cp_with_ps'): + intra_dp_cp_with_gtp_group = pg_collection.intra_dp_cp_with_ps + else: + intra_dp_cp_with_gtp_group = intra_dp_cp_group + + # 7. EPS group (fallback to intra_expt_dp if not provided) + if hasattr(pg_collection, 'intra_expt_dp_with_eps'): + intra_expt_dp_with_egtp_group = pg_collection.intra_expt_dp_with_eps + else: + intra_expt_dp_with_egtp_group = intra_expt_dp_group + # Gloo groups - not supported when pg_collection is provided if use_gloo_process_groups: raise ValueError( @@ -440,8 +473,10 @@ def setup_process_groups_for_optimizer( 'dp_group': dp_group, 'dp_cp_group': dp_cp_group, 'intra_dp_cp_group': intra_dp_cp_group, + 'intra_dp_cp_with_gtp_group': intra_dp_cp_with_gtp_group, 'expt_dp_group': expt_dp_group, 'intra_expt_dp_group': intra_expt_dp_group, + 'intra_expt_dp_with_egtp_group': intra_expt_dp_with_egtp_group, 'mp_group': mp_group, 'expt_tp_pp_group': expt_tp_pp_group, 'inter_dist_opt_group': inter_dist_opt_group, @@ -503,6 +538,12 @@ def setup_process_groups_for_ddp( if ddp_config.use_distributed_optimizer else None ), + 'intra_dp_cp_with_gtp_group': parallel_state.get_data_parallel_group( + with_context_parallel=True, with_gtp=True + ), + 'intra_expt_dp_with_egtp_group': parallel_state.get_expert_data_parallel_group( + with_gtp=True + ), } else: # Use provided process group collection with validation and fallbacks @@ -578,6 +619,12 @@ def setup_process_groups_for_ddp( result['pp_group'] = pg_collection.pp result['ep_group'] = pg_collection.ep + # EPS group (fallback to intra_expt_dp if not provided) + if hasattr(pg_collection, 'intra_expt_dp_with_eps'): + result['intra_expt_dp_with_egtp_group'] = pg_collection.intra_expt_dp_with_eps + else: + result['intra_expt_dp_with_egtp_group'] = result['intra_expt_dp_group'] + return result diff --git a/megatron/core/ssm/mamba_mixer.py b/megatron/core/ssm/mamba_mixer.py index 58b776538c8..a1a0ddc00c4 100644 --- a/megatron/core/ssm/mamba_mixer.py +++ b/megatron/core/ssm/mamba_mixer.py @@ -258,6 +258,7 @@ def __init__( is_expert=False, tp_comm_buffer_name="fc1", tp_group=self.pg_collection.tp, + gtp_group=self.pg_collection.gtp, ) # in_proj packs [z, x, B, C, dt] into one ColumnParallelLinear. Each # component is independently TP-sharded but with different sizes. When @@ -392,6 +393,7 @@ def __init__( is_expert=False, tp_comm_buffer_name="fc2", tp_group=self.pg_collection.tp, + gtp_group=self.pg_collection.gtp, ) # Regarding `conv1d`.{`weight`, `bias`}, `dt_bias`, `A_log`, and `D`: these are the diff --git a/megatron/core/tensor_parallel/layers.py b/megatron/core/tensor_parallel/layers.py index 662373064cf..2b55f2fd557 100644 --- a/megatron/core/tensor_parallel/layers.py +++ b/megatron/core/tensor_parallel/layers.py @@ -29,6 +29,7 @@ make_tp_sharded_tensor_for_checkpoint, prepare_input_tensors_for_wgrad_compute, ) +from megatron.experimental.gtp import HAVE_GTP, GTPEmbeddingWeight, wrap_module_params_gtp from ..dist_checkpointing.mapping import ShardedStateDict from ..transformer.utils import make_sharded_tensors_for_checkpoint @@ -218,6 +219,7 @@ def __init__( reduce_scatter_embeddings: bool = False, config: ModelParallelConfig, tp_group: Optional[torch.distributed.ProcessGroup] = None, + gtp_group: Optional[torch.distributed.ProcessGroup] = None, ): super(VocabParallelEmbedding, self).__init__() # Keep the input dimensions. @@ -280,6 +282,20 @@ def __init__( tensor=self.weight, is_parallel=True, dim=0, stride=1 ) + self.ps_size = 1 + if gtp_group is not None and gtp_group.size() > 1: + assert HAVE_GTP, ( + "generalized_tensor_parallel_size > 1 requires megatron.experimental.gtp to import " + "successfully (it pulls in low-precision tensor primitives from " + "transformer_engine)." + ) + wrap_module_params_gtp(self, ["weight"], gtp_group) + self.ps_size = gtp_group.size() + # Nothing prefetches embedding — it is head of the UNGRAPHED + # chain in fwd, and its bwd bypasses all_gather_and_prefetch_bwd + # via GTPEmbeddingWeight.backward. + self.weight._need_weight_prefetch = False + def forward(self, input_): """Forward. @@ -294,12 +310,17 @@ def forward(self, input_): masked_input[input_mask] = 0 else: masked_input = input_ + + weight = self.weight + if self.ps_size > 1: + weight = GTPEmbeddingWeight.apply(self.weight) + # Get the embeddings. if self.deterministic_mode: - output_parallel = self.weight[masked_input] + output_parallel = weight[masked_input] else: # F.embedding currently has a non-deterministic backward function - output_parallel = F.embedding(masked_input, self.weight) + output_parallel = F.embedding(masked_input, weight) # Mask the output embedding. if self.tp_group.size() > 1: output_parallel[input_mask, :] = 0.0 @@ -469,6 +490,7 @@ def forward( grad_output_buffer, wgrad_deferral_limit, tp_group, + ps_size, ): """Forward.""" if gradient_accumulation_fusion and hasattr(weight, "main_grad"): @@ -476,6 +498,10 @@ def forward( else: main_grad = None ctx.save_for_backward(input, weight) + + if ps_size > 1: + weight = weight.all_gather_and_prefetch(fwd=True) + # We can't save main_grad in save_for_backward as this module would be # reused across layers like MTP logits. So, to prevent in-place modification # checks we save the tensor in ctx. @@ -487,6 +513,7 @@ def forward( ctx.wgrad_deferral_limit = wgrad_deferral_limit ctx.grad_output_buffer = grad_output_buffer ctx.tp_group = tp_group + ctx.ps_size = ps_size if sequence_parallel: dim_size = list(input.size()) @@ -510,6 +537,13 @@ def backward(ctx, grad_output): input, weight = ctx.saved_tensors main_grad = ctx.main_grad use_bias = ctx.use_bias + + # GTP: re-gather weight for dgrad + if ctx.ps_size > 1: + sharded_weight = weight + weight = sharded_weight.all_gather_and_prefetch_bwd() + ctx.gradient_accumulation_fusion = False + grad_output_buffer = ctx.grad_output_buffer wgrad_deferral_limit = ctx.wgrad_deferral_limit handle = None @@ -643,16 +677,31 @@ def backward(ctx, grad_output): grad_weight = grad_output.t().matmul(total_input) grad_bias = grad_output.sum(dim=0) if use_bias else None + # GTP: reduce-scatter wgrad + if ctx.ps_size > 1 and grad_weight is not None: + grad_weight = sharded_weight.wgrad_reduce_scatter(grad_weight) + if ctx.sequence_parallel: handle.wait() # Need to return None's as gradient has to flow for all the input arguments # provided during forward - return (sub_grad_input, grad_weight, grad_bias, None, None, None, None, None, None) + return ( + sub_grad_input, + grad_weight, + grad_bias, + None, + None, + None, + None, + None, + None, + None, + ) if ctx.allreduce_dgrad: handle.wait() - return grad_input, grad_weight, grad_bias, None, None, None, None, None, None + return grad_input, grad_weight, grad_bias, None, None, None, None, None, None, None def linear_with_grad_accumulation_and_async_allreduce( @@ -665,6 +714,7 @@ def linear_with_grad_accumulation_and_async_allreduce( grad_output_buffer: Optional[List[torch.Tensor]] = None, wgrad_deferral_limit: Optional[int] = 0, tp_group: Optional[torch.distributed.ProcessGroup] = None, + ps_size: int = 1, ) -> torch.Tensor: """Linear layer execution with asynchronous communication and gradient accumulation fusion in backprop. @@ -741,6 +791,7 @@ def linear_with_grad_accumulation_and_async_allreduce( grad_output_buffer, wgrad_deferral_limit, tp_group, + ps_size, ] if not linear_with_grad_accumulation_and_async_allreduce.warned: @@ -835,6 +886,7 @@ def __init__( tp_comm_buffer_name: Optional[str] = None, # Not used disable_grad_reduce: bool = False, tp_group: Optional[torch.distributed.ProcessGroup] = None, + gtp_group: Optional[torch.distributed.ProcessGroup] = None, ): super(ColumnParallelLinear, self).__init__() @@ -914,6 +966,16 @@ def __init__( else: self.weight = None + self.ps_size = 1 + if gtp_group is not None and gtp_group.size() > 1: + assert HAVE_GTP, ( + "generalized_tensor_parallel_size > 1 requires megatron.experimental.gtp to import " + "successfully (it pulls in low-precision tensor primitives from " + "transformer_engine)." + ) + wrap_module_params_gtp(self, ["weight"], gtp_group) + self.ps_size = gtp_group.size() + if bias: if config.use_cpu_initialization: self.bias = Parameter( @@ -1066,6 +1128,7 @@ def forward( else None ), tp_group=self.tp_group, + ps_size=self.ps_size, ) gather_output = self.gather_output @@ -1181,6 +1244,7 @@ def __init__( is_expert: bool = False, tp_comm_buffer_name: str | None = None, # Not used tp_group: Optional[torch.distributed.ProcessGroup] = None, + gtp_group: Optional[torch.distributed.ProcessGroup] = None, ): super(RowParallelLinear, self).__init__() @@ -1261,6 +1325,16 @@ def __init__( ) setattr(self.weight, "allreduce", not (self.is_expert and self.expert_parallel)) + self.ps_size = 1 + if gtp_group is not None and gtp_group.size() > 1: + assert HAVE_GTP, ( + "generalized_tensor_parallel_size > 1 requires megatron.experimental.gtp to import " + "successfully (it pulls in low-precision tensor primitives from " + "transformer_engine)." + ) + wrap_module_params_gtp(self, ["weight"], gtp_group) + self.ps_size = gtp_group.size() + if bias: if config.use_cpu_initialization: self.bias = Parameter(torch.empty(self.output_size, dtype=config.params_dtype)) @@ -1333,6 +1407,7 @@ def forward(self, input_: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: sequence_parallel=False, tp_group=None, grad_output_buffer=None, + ps_size=self.ps_size, ) # All-reduce across all the partitions. diff --git a/megatron/core/transformer/attention.py b/megatron/core/transformer/attention.py index 2e9178e9754..f5121a7439e 100644 --- a/megatron/core/transformer/attention.py +++ b/megatron/core/transformer/attention.py @@ -397,6 +397,7 @@ def __init__( is_expert=False, tp_comm_buffer_name='proj', tp_group=self.pg_collection.tp, + gtp_group=self.pg_collection.gtp, ) if ( @@ -1405,6 +1406,7 @@ def __init__( is_expert=False, tp_comm_buffer_name='qkv', tp_group=self.pg_collection.tp, + gtp_group=self.pg_collection.gtp, ) # Resolve which norm class to use for Q and K. diff --git a/megatron/core/transformer/cuda_graphs.py b/megatron/core/transformer/cuda_graphs.py index 4b1dc3260ab..5db0f911a7a 100644 --- a/megatron/core/transformer/cuda_graphs.py +++ b/megatron/core/transformer/cuda_graphs.py @@ -20,6 +20,7 @@ import torch from torch.utils._pytree import tree_map as tree_map_pyt +from megatron.core import parallel_state from megatron.core.num_microbatches_calculator import get_num_microbatches from megatron.core.process_groups_config import ProcessGroupCollection from megatron.core.tensor_parallel.random import ( @@ -57,6 +58,17 @@ except: HAVE_TE_GRAPHS = False +from megatron.experimental.gtp import ( + GTP_CONFIG, + HAVE_GTP, + GTPChain, + GTPShardedParam, + get_ag_stream, + get_rs_stream, + reallocate_gtp_cache_to_mempool, + wait_async_comms, +) + try: from tqdm import tqdm @@ -68,6 +80,29 @@ _IS_GRAPH_WARMUP = False logger = logging.getLogger(__name__) + +def _set_skip_fp8_weight_update_tensor(value): + """Compat shim — TE main removed FP8GlobalStateManager.set_skip_fp8_weight_update_tensor + in favor of direct access to quantization_state.skip_fp8_weight_update_tensor. + """ + qstate = FP8GlobalStateManager.quantization_state + if qstate.skip_fp8_weight_update_tensor is None: + qstate.skip_fp8_weight_update_tensor = torch.empty(1, dtype=torch.float32, device="cuda") + qstate.skip_fp8_weight_update_tensor.fill_(value) + + +_GTP_PHASE2_COMPLETION_EVENTS: List[torch.cuda.Event] = [] + + +def get_gtp_phase2_completion_events() -> List[torch.cuda.Event]: + """Return all GTP bwd Phase 2 completion events from CG runners. + + finalize_model_grads waits on these before reading main_grad, + ensuring captured main_grad.add_ on runner.stream has completed. + """ + return _GTP_PHASE2_COMPLETION_EVENTS + + # Freeze GC during capture. # TODO (@lmcafee): remove all freeze-GC code once most users are on PyTorch 2.9+. FREEZE_GC = os.getenv("CUDA_GRAPH_CAPTURE_FREEZE_GC") != "0" @@ -391,6 +426,17 @@ def create_cudagraphs(cls): "https://github.com/NVIDIA/TransformerEngine/blob/v2.10/transformer_engine/pytorch/utils.py#L759" # pylint: disable=line-too-long ) + if any(r[0].generalized_tensor_parallel for r in cls.cudagraph_record): + assert HAVE_GTP, ( + "generalized_tensor_parallel_size > 1 requires megatron.experimental.gtp to import " + "successfully (it pulls in low-precision tensor primitives from " + "transformer_engine)." + ) + reallocate_gtp_cache_to_mempool( + torch.cuda.current_device(), CudaGraphManager.global_mempool + ) + GTP_CONFIG.check_param_states = False + gc.collect() torch.cuda.empty_cache() @@ -510,6 +556,7 @@ def delete_cuda_graphs(): _CudagraphGlobalRecord.cudagraph_created = False _CudagraphGlobalRecord.cudagraph_record = [] _CudagraphGlobalRecord.cudagraph_inference_record = [] + _GTP_PHASE2_COMPLETION_EVENTS.clear() # TODO: Optional?: Force garbage collection to clean up memory gc.collect() @@ -561,6 +608,27 @@ class _CudagraphReplayNode(torch.autograd.Function): """Replays the runner's cudagraphs with autograd. Handles copying data into/out of the cudagraph io and fp8/fp4 if used.""" + ## Capture-time sync schemes (wait_async_comms is called INSIDE the captured + # graph so the drain ops are embedded in the graph itself, not before replay). + # + # Fwd — single-phase drain (full join before completion_event): + # runner_N.stream: GEMM ──▶ wait_async_comms ▶ _wait_side_streams ──fwd_completion_event.record + # ag_stream: AG ──────────────────────▶ ag_event.record + # main_stream: fwd_completion_event.wait ▶ [next runner] + # main_stream unblocks after ag/rs streams are fully drained, so eager + # ops that follow see completed data. + # + # Bwd — phased drain (cross-graph RS overlap, see _CudaGraphRunner.backward): + # runner_N.stream: GEMM ─▶ Phase 1 (drain AG) ─▶ fence ─▶ bwd_completion_event.record ─▶ Phase 2 (wait_side_streams) ─▶ phase2_event + # ag_stream: AG ──────────────────▶ ag_event.record ▶ fence_event.record + # rs_stream: RS_issue ──▶ handle.wait ▶ rs_event.record ▶ main_grad.add_ ─────────────────────────────────────────────────────┐ + # main_stream: bwd_completion_event.wait ▶ [next runner] │ + # phase2_event.wait (in finalize_model_grads) ◀─┘ + # main_grad.add_ runs on rs_stream right after NCCL RS — concurrent with + # Phase 1 AG drain. By the time bwd_completion_event fires and the next + # runner launches, the add_ is done (no SM saturation blocking overlap). + # finalize_model_grads waits phase2_completion_event before DP grad sync. + @staticmethod def forward(ctx, runner, is_first_microbatch, *inputs): """Replay the forward graph of the passed runner.""" @@ -608,10 +676,17 @@ def forward(ctx, runner, is_first_microbatch, *inputs): # Note that FP8GlobalStateManager.is_first_fp8_module() is inacccurate as each # layer may be in its own fp8 context, when the fp8 recipe != delayed_scaling if runner.is_first_layer and (runner.fp8_param_cache_updated != is_first_microbatch): - FP8GlobalStateManager.set_skip_fp8_weight_update_tensor(not is_first_microbatch) + _set_skip_fp8_weight_update_tensor(not is_first_microbatch) runner.fp8_param_cache_updated = is_first_microbatch - runner.fwd_graph.replay() + if runner.use_stream: + runner.stream.wait_stream(torch.cuda.current_stream()) + with torch.cuda.stream(runner.stream): + runner.fwd_graph.replay() + torch.cuda.current_stream().wait_event(runner.fwd_completion_event) + else: + runner.fwd_graph.replay() + return runner.fwd_graph_output_surface @staticmethod @@ -644,7 +719,14 @@ def backward(ctx, *grads): if user_output_grad.data_ptr() != cudagraph_output_grad.data_ptr(): cudagraph_output_grad.copy_(user_output_grad) - runner.bwd_graph.replay() + if runner.use_stream: + runner.stream.wait_stream(torch.cuda.current_stream()) + with torch.cuda.stream(runner.stream): + runner.bwd_graph.replay() + torch.cuda.current_stream().wait_event(runner.bwd_completion_event) + else: + runner.bwd_graph.replay() + runner.status = _GraphStatus.FWD_READY # Update FP8 scale factors if needed @@ -659,10 +741,38 @@ def backward(ctx, *grads): for param, grad_added in runner.groundtruth_grad_added_to_main_grad.items(): param.grad_added_to_main_grad = grad_added + # Fire DDP grad-ready hooks for GTP params whose main_grad.add_ was + # captured in this runner's bwd_graph. DDP's autograd hook returns + # early under is_graph_capturing and doesn't re-run from Python at + # replay, so trigger it explicitly here to let DDP RS overlap with + # the rest of backward. See _compute_finalized_during_bwd_capture + # for how the set is built. + # + # Fire on rs_stream (GRAPHED chain, GTP group) — the stream that + # ran the captured main_grad.add_(wgrad_rs). Stream FIFO orders the + # hook (check_grads' grad_data.norm and DDP-RS preEvent record) + # after that write. wait_event(bwd_phase2_completion_event) is + # defensive against future Phase 2 work on other sub-streams. + # main_stream stays unblocked so the next runner can start in + # parallel. + if runner.generalized_tensor_parallel and runner.finalized_during_bwd_capture: + gtp_rs_stream = get_rs_stream( + GTPChain.GRAPHED.value, parallel_state.get_generalized_tensor_parallel_group() + ) + gtp_rs_stream.wait_event(runner.bwd_phase2_completion_event) + with torch.cuda.stream(gtp_rs_stream): + for param in runner.finalized_during_bwd_capture: + hook = getattr(param, '_grad_accum_hook', None) + if hook is not None: + hook() + # Replaying the next bwd graph destroys the data held in static_grad_inputs, so clone # wgrads as autograd may launch the next graph before wgrads are accumulated dgrads = runner.static_grad_inputs[: runner.num_dgrads] - wgrads = (g.clone() for g in runner.static_grad_inputs[runner.num_dgrads :]) + wgrads = ( + g.clone() if torch.is_tensor(g) else g + for g in runner.static_grad_inputs[runner.num_dgrads :] + ) return None, None, *dgrads, *wgrads @@ -711,6 +821,14 @@ def __init__( self.fp4_runtime_enabled = None self.deallocate_pipeline_outputs = False self.num_warmup_steps = 0 + self.use_stream = False + self.generalized_tensor_parallel = False + self.fwd_side_streams = [] + self.bwd_side_streams = [] + # Populated by create_bwd_graph: GTP params whose main_grad.add_ was captured in THIS + # graph. Used in Graphed.backward's post-replay hook loop to fire DDP hooks only in the + # graph whose replay populates main_grad. + self.finalized_during_bwd_capture = [] self.grad_enabled = need_backward and torch.is_grad_enabled() self.func = super(MegatronModule, self.base_module).__call__ if func is None else func @@ -735,16 +853,99 @@ def __init__( self.fp4_enabled = self.base_module.config.fp4 is not None self.fp8_runtime_enabled = None self.fp4_runtime_enabled = None + self.generalized_tensor_parallel = ( + self.base_module.config.generalized_tensor_parallel_size > 1 + ) + + # Ensure internal warmup (inside create_fwd_graph) has >= 2 steps + # for GTP: 1st builds chain + tickets, 2nd exercises prefetch path. + if self.generalized_tensor_parallel: + self.num_warmup_steps = max(self.num_warmup_steps, 2) + + if self.generalized_tensor_parallel: + self.use_stream = True + self.stream = torch.cuda.Stream() + self.fwd_completion_event = torch.cuda.Event(external=True, interprocess=True) + self.bwd_completion_event = torch.cuda.Event(external=True, interprocess=True) + # GRAPHED chain only hits dense modules (mamba/attn/moe_router), + # all sharded across PARAMETER_SHARDING_GROUP. Materialize that + # (chain, group) stream pair now so it is registered as a + # captured side stream before the first forward. + from megatron.core.parallel_state import get_generalized_tensor_parallel_group + + gtp_group = get_generalized_tensor_parallel_group() + graphed_ag = get_ag_stream(GTPChain.GRAPHED.value, gtp_group) + graphed_rs = get_rs_stream(GTPChain.GRAPHED.value, gtp_group) + self._register_side_stream(self.fwd_side_streams, graphed_ag) + self._register_side_stream(self.bwd_side_streams, graphed_ag) + self._register_side_stream(self.bwd_side_streams, graphed_rs) + # Bridges Phase 1 (AG drain on ag_stream) into runner_stream + # so bwd_completion_event records past NCCL_AG completion. + self.bwd_ag_fence_event = torch.cuda.Event() + # Records after Phase 2 (RS drain + main_grad.add_) completes + # on runner.stream. finalize_model_grads waits on this before + # reading main_grad for the DP gradient sync. + self.bwd_phase2_completion_event = torch.cuda.Event( + external=True, interprocess=True + ) + _GTP_PHASE2_COMPLETION_EVENTS.append(self.bwd_phase2_completion_event) if self.fp8_enabled: self.fp8_recipe = FP8GlobalStateManager.get_fp8_recipe() - FP8GlobalStateManager.set_skip_fp8_weight_update_tensor(False) + _set_skip_fp8_weight_update_tensor(False) if self.fp4_enabled: from megatron.core.fp4_utils import get_fp4_recipe # to avoid circular import self.fp4_recipe = get_fp4_recipe(self.base_module.config) - FP8GlobalStateManager.set_skip_fp8_weight_update_tensor(False) + _set_skip_fp8_weight_update_tensor(False) + + def _register_side_stream(self, side_streams, stream): + """Register a side stream for graph capture/replay synchronization.""" + side_streams.append(stream) + + def _sync_against_side_streams(self, side_streams): + """Make registered side streams wait for the current stream. + Also injects a dummy kernel into each stream to ensure it is non-empty, + which is required for CUDA graph capture (joining an empty captured + stream is a CUDA error).""" + for s in side_streams: + s.wait_stream(torch.cuda.current_stream()) + with torch.cuda.stream(s): + torch.cuda._sleep(1) + + def _wait_side_streams(self, side_streams): + """Make the current stream wait for all registered side streams.""" + for s in side_streams: + torch.cuda.current_stream().wait_stream(s) + + def _compute_finalized_during_bwd_capture(self): + """Return GTP params whose DDP grad-ready hook fires post-replay + of THIS bwd_graph. + + A param's hook must fire in the graph that physically populates its + main_grad. Rules, given the cascade walk in wgrad_reduce_scatter + finalizes p.next_w on behalf of p: + - p.prev_w is None → p is sync-finalized in p's own graph; add p. + - p.next_w is not None → p.next_w's main_grad.add_ is captured here + via p's cascade; add p.next_w. (For cross-graph chain tails the + wait was captured in the producer's Phase 2, but the add lives + here regardless, bridged by external rs_event.) + """ + if not HAVE_GTP or GTPShardedParam is None: + return [] + finalized = {} # id → param + for p in self.params_to_backprop: + if not isinstance(p, GTPShardedParam): + continue + if getattr(p, "prev_w", None) is None: + for w in getattr(p, "_weights", [p]): + finalized[id(w)] = w + next_w = getattr(p, "next_w", None) + if next_w is not None: + for w in getattr(next_w, "_weights", [next_w]): + finalized[id(w)] = w + return list(finalized.values()) def __str__(self): return "%s; hid %s" % ( @@ -919,6 +1120,7 @@ def _resolve_input_buffer(ten): with ctx: # warmup again as case graph capture mode may execute a different codepath _set_warmup_start() + for _ in range(self.num_warmup_steps): with self.get_quantization_context(): @@ -943,6 +1145,10 @@ def clone_ten(ten): allow_unused=True, ) + if self.generalized_tensor_parallel: + wait_async_comms(GTPChain.GRAPHED.value) + self._sync_against_side_streams(self.bwd_side_streams) + _set_warmup_end() with self.get_quantization_context(): @@ -963,10 +1169,22 @@ def clone_ten(ten): with torch.cuda.graph( self.fwd_graph, pool=self.mempool, capture_error_mode="thread_local" ): + + self._sync_against_side_streams(self.fwd_side_streams) + fwd_graph_outputs = self.func( *self.fwd_graph_input_args, **self.fwd_graph_input_kwargs ) + if self.generalized_tensor_parallel: + wait_async_comms(GTPChain.GRAPHED.value) + + if self.fwd_side_streams: + self._wait_side_streams(self.fwd_side_streams) + + if self.use_stream: + self.fwd_completion_event.record() + # Unfreeze GC. if FREEZE_GC: gc.unfreeze() @@ -1033,6 +1251,19 @@ def create_bwd_graph(self): # to 0 when activation checkpointing is used. See [interaction with recompute]. global bwd_buffer_reuse_ref_count + # Tag cross-graph chain-tail GTP params: those whose prev_w lives in + # another runner's params_to_backprop. Read by TE's wgrad_reduce_scatter + # cascade and wait_async_comms to split the captured RS wait/add across + # producer and consumer graphs (avoids cross-capture cudaStreamWaitEvent + # on c10d Work.postEvent). + if self.generalized_tensor_parallel and HAVE_GTP and GTPShardedParam is not None: + pset = {id(p) for p in self.params_to_backprop} + for p in self.params_to_backprop: + if not isinstance(p, GTPShardedParam): + continue + prev_w = getattr(p, "prev_w", None) + p._is_cross_graph_tail = prev_w is not None and id(prev_w) not in pset + assert self.grad_enabled self.bwd_graph = torch.cuda.CUDAGraph() @@ -1065,11 +1296,16 @@ def create_bwd_graph(self): out_grad.requires_grad = True self.static_grad_outputs.append(out_grad) + torch.cuda.synchronize() + # Freeze GC, to speed up capture time ~15-20x. if FREEZE_GC: gc.freeze() with torch.cuda.graph(self.bwd_graph, pool=self.mempool): + + self._sync_against_side_streams(self.bwd_side_streams) + grad_inputs = torch.autograd.grad( outputs=tuple(o for o in self.fwd_graph_output_surface if o.requires_grad), inputs=tuple(i for i in self.fwd_graph_input_surface if i.requires_grad), @@ -1079,10 +1315,54 @@ def create_bwd_graph(self): allow_unused=True, ) + # GTP cross-graph RS overlap, two phases: + # Phase 1 — drain AG, fence runner_stream past ag_stream's tail, + # then record bwd_completion_event so main_stream can + # release the next runner while RS is still in flight. + # Phase 2 — drain RS wait on rs_stream. For cross-graph chain + # tails the wait is captured here, the add in the + # consumer's cascade; for within-graph tails both + # happen here (see wait_async_comms). + if self.generalized_tensor_parallel: + # Phase 1: drain AG; fence runner_stream past ag_stream so + # bwd_completion_event records AFTER NCCL_AG completion. + wait_async_comms(GTPChain.GRAPHED.value, skip_rs=True) + from megatron.core.parallel_state import get_generalized_tensor_parallel_group + + gtp_group = get_generalized_tensor_parallel_group() + graphed_ag = get_ag_stream(GTPChain.GRAPHED.value, gtp_group) + self.bwd_ag_fence_event.record(graphed_ag) + torch.cuda.current_stream().wait_event(self.bwd_ag_fence_event) + + # Record completion AFTER AG drain + fence but BEFORE RS drain, + # so main_stream can trigger the next runner while RS is still + # in flight on rs_stream. + self.bwd_completion_event.record() + + # Phase 2: in-graph RS drain + finalize. + wait_async_comms(GTPChain.GRAPHED.value, finalize_after_drain=True) + + if self.bwd_side_streams: + self._wait_side_streams(self.bwd_side_streams) + + if self.generalized_tensor_parallel: + # Phase 2 + side-stream join done — record so + # finalize_model_grads can wait for main_grad.add_ completion. + self.bwd_phase2_completion_event.record() + + if self.use_stream and not self.generalized_tensor_parallel: + # Non-GTP path: record after the side-stream join. + self.bwd_completion_event.record() + # Unfreeze GC. if FREEZE_GC: gc.unfreeze() + # See _compute_finalized_during_bwd_capture for what's in this set and why. + self.finalized_during_bwd_capture = ( + self._compute_finalized_during_bwd_capture() if self.generalized_tensor_parallel else [] + ) + # Constructs a tuple suitable for returning from Graphed.backward: # Pads out the actually-needed grads with Nones in gradient slots for inputs # that don't require grad @@ -1677,8 +1957,8 @@ def __call__(self, megatron_module, args, kwargs, cache_key=None): return super(MegatronModule, megatron_module).__call__(*args, **kwargs) self.is_first_microbatch = False - # If forward only, next replay should be a forward pass as well - if is_inference_mode or not torch.is_grad_enabled(): + # If forward only, next replay should be a forward pass as well. + if is_inference_mode or not torch.is_grad_enabled() or not runner.fwd_graph_recorded: runner.status = _GraphStatus.FWD_READY else: runner.status = _GraphStatus.BWD_READY diff --git a/megatron/core/transformer/mlp.py b/megatron/core/transformer/mlp.py index 46979a8ba8f..32da4c792a6 100644 --- a/megatron/core/transformer/mlp.py +++ b/megatron/core/transformer/mlp.py @@ -169,6 +169,7 @@ def __init__( input_size: Optional[int] = None, ffn_hidden_size: Optional[int] = None, tp_group: Optional[torch.distributed.ProcessGroup] = None, + gtp_group: Optional[torch.distributed.ProcessGroup] = None, ): super().__init__(config=config) @@ -217,6 +218,7 @@ def __init__( is_expert=is_expert, tp_comm_buffer_name="fc1", tp_group=tp_group, + gtp_group=gtp_group, stride=fc1_stride, ) @@ -238,6 +240,7 @@ def __init__( is_expert=is_expert, tp_comm_buffer_name="fc2", tp_group=tp_group, + gtp_group=gtp_group, ) def forward( diff --git a/megatron/core/transformer/moe/shared_experts.py b/megatron/core/transformer/moe/shared_experts.py index a565e2ec718..296a1abb76c 100644 --- a/megatron/core/transformer/moe/shared_experts.py +++ b/megatron/core/transformer/moe/shared_experts.py @@ -115,7 +115,12 @@ def __init__( config.ffn_hidden_size = config.moe_shared_expert_intermediate_size # TODO(Hepteract): pass pg_collection to MLP after refactoring MLP - super().__init__(config=config, submodules=submodules, tp_group=pg_collection.tp) + super().__init__( + config=config, + submodules=submodules, + tp_group=pg_collection.tp, + gtp_group=pg_collection.gtp, + ) self.use_shared_expert_gate = gate if self.use_shared_expert_gate: diff --git a/megatron/core/transformer/multi_token_prediction.py b/megatron/core/transformer/multi_token_prediction.py index b167f7f3127..363e7e7b4af 100755 --- a/megatron/core/transformer/multi_token_prediction.py +++ b/megatron/core/transformer/multi_token_prediction.py @@ -823,6 +823,7 @@ def __init__( is_expert=False, tp_comm_buffer_name="mtp_eh_proj", tp_group=pg_collection.tp if pg_collection is not None else None, + gtp_group=pg_collection.gtp, ) # Build inner layers: two possible paths diff --git a/megatron/core/utils.py b/megatron/core/utils.py index d04fd180bb2..a85c3653947 100644 --- a/megatron/core/utils.py +++ b/megatron/core/utils.py @@ -875,7 +875,10 @@ def check_param_hashes_across_dp_replicas( for params, local_param_hashes, all_gather_group in zip( [non_expert_params, expert_params], [local_non_expert_param_hashes, local_expert_param_hashes], - [parallel_state.get_data_parallel_group(), parallel_state.get_expert_data_parallel_group()], + [ + parallel_state.get_data_parallel_group(with_gtp=True), + parallel_state.get_expert_data_parallel_group(with_gtp=True), + ], ): # Collect per-parameter hashes across all ranks in group. assert len(params) == len(local_param_hashes) diff --git a/megatron/experimental/__init__.py b/megatron/experimental/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/megatron/experimental/gtp/README.md b/megatron/experimental/gtp/README.md new file mode 100644 index 00000000000..0b80d50082b --- /dev/null +++ b/megatron/experimental/gtp/README.md @@ -0,0 +1,229 @@ +# Generalized Tensor Parallelism (GTP) — Key Features + +> ⚠️ **Experimental.** GTP is an experimental feature and its API, configuration, and behavior may change in future versions without notice. + +**Scope**: this doc is a high-level summary of GTP. + +Core implementation: `megatron/experimental/gtp/generalized_tensor_parallelism.py`. The public surface is re-exported from `megatron/experimental/gtp/__init__.py`, which also owns the `HAVE_GTP` fallback used by callers that need to remain importable when GTP is unavailable. Low-precision tensor primitives (FP8 / MXFP8 / NVFP4) remain in TransformerEngine and are imported by `generalized_tensor_parallelism.py`. + +--- + +## 1. Fine-Grained, Per-Weight Materialization & Gradient Reduction + +Each weight is sharded 1/N across an GTP group along `out_features`, stored as an `GTPShardedParam` subclass of `nn.Parameter`. Materialization and gradient reduction are both **per-weight, per-call** — not per-model or per-module: + +- **Independent state per param**: each has its own AG state (`state`) and RS state (`rs_state`) machines, both cycling `NONE → ASYNC_WAIT → DATA_READY → NONE` and tracked separately so fwd and bwd async ops don't interfere. +- **Prefetch chain for AG** (doubly-linked `prev_w` / `next_w`): during fwd, each weight's `all_gather_and_prefetch` issues async AG for `next_w`; during bwd, `all_gather_and_prefetch_bwd` issues async AG for `prev_w`. Layer *i*'s AG overlaps with layer *i−1*'s GEMM. For an L-layer model, L−1 all-gathers are fully hidden behind compute. +- **Deferred RS finalize for wgrad**: `wgrad_reduce_scatter` on param *i* launches an **async** reduce-scatter (handle stashed in `_wgrad_rs_handle`) and returns `None` to autograd — the wgrad is NOT finalized into `main_grad` yet. Finalization is **deferred one step**: the next bwd step (param *i−1*'s `wgrad_reduce_scatter`) calls `self.next_w._wait_reduce_scatter()` + `_finalize_wgrad()`, which waits on the stashed handle, accumulates the reduced wgrad into `main_grad`, and fires the DDP `register_grad_ready` hook. The chain's head (first-in-fwd, last-in-bwd) uses a synchronous RS since nothing follows it. This one-step deferral is what lets layer *i*'s RS overlap with layer *i−1*'s bwd GEMMs. +- **Cold start only**: every weight's very first AG is synchronous (`DATA_READY_SYNC`, no prefetch has run yet); the async prefetch chain kicks in from the second forward onward. + +Contrast with FSDP: FSDP gathers at module-group granularity in full precision with PyTorch-managed lifecycle. GTP works at individual-weight granularity, in quantized form, with its own explicit ticket-based buffer pool and a one-step-deferred RS finalizer. + +> **FSDP can't shrink into GTP because FSDP's overlap is bucket-grained by design** — bucket granularity exists *to avoid* paying NCCL launch latency on tiny params (LayerNorm γ/β, biases, Mamba `dt_bias`/`D`/`A_log`) and *to avoid* the per-weight scheduling state that GTP relies on (per-param prefetch chain, ticket-based buffer cache, stream choreography). Removing buckets doesn't make FSDP faster; it makes FSDP into GTP, with all the engineering that entails — selective wrapping (only large GEMM weights), per-weight prefetch chain, per-param buffer ticket, and explicit AG/RS stream coreography on a side stream so external drains have something meaningful to wait on. + +## 2. CUDA Graph Compatibility + +CG compatibility is designed-in from day one, not retrofitted. The entire sync / buffer / chain architecture is shaped around making **captured fwd/bwd replays produce identical bit-for-bit behavior** — without the usual capture-vs-eager pitfalls that force other weight-sharding schemes to either disable CG or require special handling. + +- **Two chains, never cross-linked** (`GTPChain.GRAPHED` / `GTPChain.UNGRAPHED`). `prev_w` / `next_w` only connect same-chain params, so a captured traversal never reaches into eager Python and vice-versa. +- **`torch.cuda.Event(external=True)`** for `ag_event` / `rs_event` — the events survive CG capture boundaries and can be waited on from replay-time streams. +- **Idempotent ticket cache**: `GTPWeightCache.get(ticket)` keeps `slot.buf` set even after `release()`, so replays read the same buffer address as capture. `clear()` drops buffers while keeping tickets valid → supports CG re-capture with lazy re-allocation. +- **`reallocate_to_mempool(device, mempool)`** pre-migrates GRAPHED-chain buffers into the CG memory pool *before* capture, so no CUDA allocations happen inside the captured graph. UNGRAPHED buffers stay in regular allocator memory. +- **Lazy, one-shot chain linking**: `prefetch_initialized` is flipped during the first fwd (warmup), so the chain-construction Python side-effects never execute inside a captured graph. The link table is buffered and flushed atomically at the second forward. +- **DDP hook manual triggering**: `register_grad_accum_hook` stores the DDP hook on the param; `_CudagraphReplayNode.backward` calls it manually after replay (since `AccumulateGrad` hooks are silenced by replay). This is also how the `assert self.grad_reduce_handle is not None` failure from partial-CG + overlap-grad-reduce is resolved. +- **Drains at CG / eager boundary**: `_drain_gtp_side_streams()` before eager MoE expert compute. Inside bwd capture, two-phase drain: Phase 1 joins the within-graph cascade and records `bwd_completion_event` (next runner unblocks); Phase 2 calls `wait_async_comms(GRAPHED)` to drain the chain-tail handle and re-joins side streams (queued after the event so it doesn't delay the next runner). +- **Side-stream registration**: the `(GRAPHED, gtp_group)` ag/rs streams are materialized at runner init so `_register_side_stream` captures them before the first forward. + +## 3. Low-Precision Quantize-Then-Gather + +Wire bandwidth scales with the **quantized** size, not BF16 size — GTP composes with low-precision training rather than fighting it. + +- **FP8 / MXFP8**: quantize kernel runs per microbatch on the local shard with no GTP-group amax reduction (FP8 amax allreduce is the standard DP-group one in `reduce_and_update_fp8_tensors`, unchanged by GTP). On subsequent microbatches, `skip_weight_cast=True` reuses the quantized buffer. +- **NVFP4** (4-bit, block-scaled): amax reduced across the GTP group before scaling so ranks share a consistent scale for the full weight; custom `_all_gather_nvfp4` handles rowwise + columnwise views and interleaved layout. Post-processing (re-assemble interleaved data, re-pad `scale_inv`, transition to `GEMM_READY`) is deferred into `_NVFP4AllGatherAsyncHandle.wait()` so it stays off the critical path. +- **Coalesced NCCL**: `grouped_gather_along_first_dim` uses `torch.distributed._coalescing_manager` to batch E experts' AGs into a single NCCL op. `BatchedNVFP4AllGatherAsyncHandle` wraps per-expert post-processing. +- **Padding**: at construction the **full tensor** is padded along dim0 to a multiple of `pad_for_alignment × gtp_size`, then sharded equally across the group. After all-gather, the padding ends up contiguous at the tail, so stripping is a single trailing slice (`tensor[:-pad_length]`) — no per-shard reshuffle, and the design naturally supports `pad_length` large enough to span multiple ranks' shards when the unpadded dim0 is small. + +### Per-microbatch schedule + +``` +Steady-state fwd (NVFP4): + default: ──GEMM(W_0)──quant+amax(W_1)──GEMM(W_1)──quant+amax(W_2)──GEMM(W_2)──... + ag_str: [AG_issue W_1] [AG_issue W_2] + +Steady-state fwd (FP8 / MXFP8): + default: ──GEMM(W_0)────quant(W_1)─────GEMM(W_1)────quant(W_2)─────GEMM(W_2)──... + ag_str: [AG_issue W_1] [AG_issue W_2] + (no GTP-group amax allreduce) + +Steady-state bwd (all recipes): + default: ──bwd GEMMs(W_i)──... + ag_str: [AG_issue W_{i-1}] + (bwd reuses fwd's quantized buffer; no quant, no amax) +``` + +quant+amax run sequentially with surrounding compute on the default stream; only the `dist.all_gather` issue is wrapped in `with torch.cuda.stream(ag_stream)`. The NCCL kernel runs on c10d's private ncclStream and overlaps with the next GEMM until it reaches its wait. + +For NVFP4 the per-microbatch prefetch cost is **two** NCCL ops on the GTP ncclStream (amax allreduce + AG) serialized on the same communicator. FP8 and MXFP8 incur only the AG; their standard DP-group amax allreduce in `reduce_and_update_fp8_tensors` is unchanged by GTP. BF16 skips quant entirely. + +### Communication volume breakdown + +Per-microbatch per-weight comm budget (assuming bf16 wgrad reduce-scatter): + +| Format | Block | Data B/elem | Scale_inv B/elem | Per-elem | Fwd AR(amax) | Fwd AG | Bwd AG | Wgrad RS (bf16) | Total B/elem | vs BF16 | +|--------|-------|-------------|------------------|----------|--------------------------------|--------|--------|-----------------|--------------|----------------| +| BF16 | n/a | 2.0000 | — | 2.0000 | — | 2.0000 | 2.0000 | 2.0000 | 6.0000 | 1.00× (baseline) | +| MXFP8 | 32 | 1.0000 | 1/32 = 0.0313 | 1.0313 | — (microscale, no global amax) | 1.0313 | 1.0313 | 2.0000 | 4.0626 | 0.68× (–32%) | +| NVFP4 | 16 | 0.5000 | 1/16 = 0.0625 | 0.5625 | ≈0 in volume (latency-bound) | 0.5625 | 0.5625 | 2.0000 | 3.1250 | 0.52× (–48%) | + +How to read the columns: +- `Per-elem` = `Data B/elem + Scale_inv B/elem` — wire cost of one quantized weight buffer (data + scale_inv together). +- `Fwd AG` and `Bwd AG` each carry the quantized buffer once, so they equal `Per-elem`. Bwd reuses fwd's `self.quantized` buffer — no re-quantize, no AR(amax). +- `Wgrad RS (bf16)` = 2.0 B/elem — gradient is reduce-scattered in bf16 regardless of weight precision. +- `Fwd AR(amax)` is a separate NCCL collective: NVFP4 needs it (one fp32 scalar per tensor → ~0 B/elem volume but a fixed launch latency); MXFP8 doesn't (microscale-only). +- `Total B/elem` = `Fwd AG + Bwd AG + Wgrad RS` — amax AR is omitted because its volume is essentially 0. + +Concrete numbers for one weight of shape `[16384, 4096]` (67.1M params), per microbatch: + +| Format | Per-microbatch volume | +|--------|-----------------------| +| BF16 | 403 MB | +| MXFP8 | 273 MB (–32%) | +| NVFP4 | 210 MB (–48%) | + +Quantize-then-gather attacks AG only: AG portion shrinks ~72% from BF16 → NVFP4, but RS is untouched, so the wgrad RS becomes the dominant comm path in NVFP4 (~64% of the budget at bf16 RS, ~78% at fp32 RS). + +### Insights from the comm-volume table + +1. **AG savings ≠ end-to-end comm savings.** The headline "NVFP4 → 4× smaller wire" applies to AG only. End-to-end per-weight comm goes 6.0 → 3.125 B/elem, which is **2×, not 4×**. The other half of the savings is left on the table because RS doesn't shrink. + +2. **Wgrad RS becomes the new bottleneck under NVFP4.** RS share of the budget jumps from ~33% (BF16) to ~64% (NVFP4) at bf16 RS dtype. Future optimization should target the RS path (RS dtype reduction, coalesced gradient reduction), not more aggressive AG quantization. + +3. **Diminishing returns from precision halving.** BF16 → MXFP8 saves 1.94 B/elem (–32% of baseline); MXFP8 → NVFP4 saves only an additional 0.94 B/elem (–16% of baseline). Each step saves a smaller share of the original budget — there's no point chasing more aggressive AG quantization until RS shrinks. + +4. **AR(amax) is latency-only, not volume.** One fp32 scalar per tensor → effectively 0 B/elem in a bandwidth budget. It belongs in a launch-overhead model, not in the bytes table. + +5. **Bwd inherits AG savings for free.** The quantize + amax + cast cost is paid once per microbatch in fwd. Bwd reuses the cached `self.quantized` buffer with no re-quantize and no AR — but still gets the small-AG payload. The cost-benefit of quantize-then-gather is asymmetric: fwd pays once, both fwd-AG and bwd-AG benefit. + +**Net takeaway:** "NVFP4 → 4× smaller wire" is half-true. GTP+NVFP4 cuts AG ~4× but leaves RS untouched, so end-to-end comm is only ~2× faster. The next lever isn't more aggressive quantization — it's RS dtype reduction or coalesced gradient reduction. + +## 4. Buffer / Memory Management + +Two distinct pools with explicit lifecycle rules: + +- **`GTPWeightCache`** (AG/RS output buffers) — ticket-based, keyed on `(shape, dtype, fwd, expert_idx, reduce_scatter)`. Same-shape buffers across layers are shared. Tickets persistent; buffer allocated lazily on first `get()`; addresses stable across iterations for CG replay. +- **`_wgrad_buf_pool`** (UNGRAPHED wgrad input recycling) — tagged with `_from_gtp_wgrad_pool=True` at `_wgrad_pool_get`. `_wgrad_pool_put` no-ops on foreign buffers (fresh allocs from Megatron `layers.py` or aten F.embedding bwd) → caching allocator handles those. Prevents the pool from accumulating untagged buffers each iter. + +## 5. Composability with TP / SP / EP / DDP + +- **TP** (intra-layer): orthogonal axis — GTP shards `out_features` regardless of TP's parallel mode (column or row). 2D grid naturally formed via `tp_group × gtp_group`. +- **SP** (sequence-parallel): transparent — GTP operates at weight dim, SP at sequence dim. +- **EP** (MoE): `GroupedLinear` with GTP → each routed expert sharded across `EXPERT_GENERALIZED_TENSOR_PARALLEL_GROUP`, independent of EP. MoE AllToAll (HybridEP/NVLink) runs independently of GTP AG/RS (NCCL/IB). +- **DDP**: GTP bypasses autograd's grad accumulator (async RS returns `None`; `_finalize_wgrad` accumulates directly into `main_grad`). `register_grad_accum_hook` + manual invocation from `_finalize_wgrad` (eager path) and `_CudagraphReplayNode.backward` (captured path) serializes DDP RS strictly after GTP RS — critical at IB scale to avoid deadlock between DDP and GTP on the same NIC. + +## 6. Opt-in, Minimally Invasive Integration + +- Drop-in `gtp_group` kwarg on `Linear` / `LayerNormLinear` / `LayerNormMLP` / `GroupedLinear`; no framework-level refactor required. +- `classify_gtp_chains(model)` walks `named_parameters()` once at init and sets `chain_id` on every `GTPShardedParam` based on the current `cuda_graph_modules`. +- Turning it off is a no-op: when `gtp_group.size() == 1`, `wrap_module_params_gtp` short-circuits; when `generalized_tensor_parallel_size == 1`, the GTP path in `layers.py` is skipped entirely. +- User-tunable knobs (`GTPConfig.pad_for_alignment`, `weight_prefetch`, `check_param_states`) plus a debug-name tagger (`tag_gtp_params_with_names`) for readable link-table output. + +## 7. Overlap Design Summary + +``` +fwd: AG(W_{i+1}) ∥ GEMM(W_i) ∥ CG replay of captured layers +bwd: AG(W_{i-1}) ∥ dgrad(W_i) ∥ wgrad(W_i) ∥ RS(wgrad_i) ∥ [finalize wgrad_{i+1} + DDP hook] +``` + +At bwd step *i* the step is launching *RS of wgrad_i* while finalizing the *previous* iter's wgrad (`wgrad_{i+1}` in bwd order = the next-one-over in fwd order). That one-step deferral is what makes the RS run concurrent with the next layer's dgrad/wgrad GEMMs instead of blocking after every layer. + +Communication never blocks compute except at the very first layer of each direction (cold start) and at enforced serialization points (CG/eager drains, finalize-grads barrier). + +### 7a. wgrad-before-dgrad schedule + +Default: backward runs dgrad GEMM, then wgrad GEMM, then issues the GTP wgrad RS — the RS overlaps with the *next* layer's bwd GEMMs (the one-step deferral above). + +Opt-in via `GTPConfig.wgrad_before_dgrad = True`: backward runs wgrad GEMM first, then issues the GTP wgrad RS, then runs dgrad GEMM — the RS NCCL overlaps with the dgrad GEMM of the **same** layer, and the prev_w AG prefetch issued at the top of bwd overlaps with the wgrad GEMM. Only affects `_Linear` and `_LayerNormLinear`; `LayerNormMLP` and `GroupedLinear` keep the original schedule. + +When to enable it: GTP + no-TP. The TP comm-overlap path assumes the original dgrad-first order, so under TP > 1 the flag stays False. Megatron auto-sets it for the GTP+no-TP case in `validate_args`. + +## 8. Scaling + +Effective per-GPU weight size = `W / (TP × GTP)`. Example: TP=4 + GTP=8 with NVFP4 → 32× weight-memory reduction and 128× wire-bandwidth reduction vs full BF16 replication, before data parallelism. + +## 9. Usage + +GTP is enabled through two CLI flags on Megatron's training launcher; everything else (process-group construction, parameter slicing, prefetch chain wiring, optimizer routing) is automatic once the flags are set. + +### Required flags + +```bash +# Shard dense weights (attention, mamba, MLP linears) 1/N along out_features. +--generalized-tensor-parallel-size + +# Shard MoE routed-expert weights 1/M along out_features. Independent from +# `--generalized-tensor-parallel-size`; can be 1 for non-MoE models. +--expert-generalized-tensor-parallel-size +``` + +### High-priority streams (Blackwell and later) + +Required on GB200 / B100 so the GTP comm streams get the SM priority needed for AG/RS overlap with compute: + +```bash +--high-priority-stream-groups ep gtp expt_gtp tp +``` + +The launcher also exports `CUDA_GRAPHS_USE_NODE_PRIORITY=1` so captured CUDA graphs respect the inherited stream priority. + +### Minimal end-to-end example + +```bash +# 4 ranks, GTP=2 across out_features, no TP, BF16 weights. +torchrun --nproc-per-node 4 pretrain_gpt.py \ + --tensor-model-parallel-size 1 \ + --pipeline-model-parallel-size 1 \ + --generalized-tensor-parallel-size 2 \ + --expert-generalized-tensor-parallel-size 1 \ + --bf16 \ + --num-layers 12 --hidden-size 1024 --num-attention-heads 16 \ + --seq-length 1024 --max-position-embeddings 1024 \ + --micro-batch-size 1 --global-batch-size 4 \ + --train-iters 10 \ + --use-mcore-models \ + --transformer-impl transformer_engine \ + --tokenizer-type NullTokenizer --vocab-size 32000 \ + --data-path --split 99,1,0 +``` + +At iter-0 you'll see one rank-0 log line confirming the active config: + +``` +GTP enabled. GTPConfig(pad_for_alignment=16, check_param_states=False, + weight_prefetch=True, async_reduction=True, wgrad_before_dgrad=True, + fp8_param_gather=False, coalesce_amax_allreduce=True) +``` + +### What the flags do under the hood + +1. `parallel_state.initialize_model_parallel(...)` builds two new groups: `_GENERALIZED_TENSOR_PARALLEL_GROUP` (size = `--generalized-tensor-parallel-size`) and `_EXPERT_GENERALIZED_TENSOR_PARALLEL_GROUP` (size = `--expert-generalized-tensor-parallel-size`), plus the corresponding DP-with-GTP carve-outs (`_DATA_PARALLEL_GROUP_WITH_GTP`, `_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP`). +2. Megatron's `extensions/transformer_engine.py` reads `pg_collection.gtp` / `pg_collection.expt_gtp` and forwards them as the `gtp_group=` kwarg to `te.Linear` / `te.LayerNormLinear` / `te.GroupedLinear`. TE's `module/base.py` calls back into `megatron.experimental.gtp` via the hook registry (`register_gtp_hooks`) to slice each weight at `reset_parameters` time. +3. DDP carves out GTP shards into a separate bucket pool (`gtp_buffer_groups`) reduced over `intra_dp_cp_with_gtp_group` rather than full DP — the wgrad RS already reduced over the GTP axis. +4. Optimizer state is sharded across the same `with_gtp` subgroup; clip-by-global-norm sums squared norms over `model_parallel × with_gtp` so the reduction count matches the actual replica count. +5. `classify_gtp_chains(model)` runs once after model build (in `training.py`'s `get_model`) and wires each `GTPShardedParam` into a `GRAPHED` or `UNGRAPHED` prefetch chain based on the active `cuda_graph_modules`. + +### Tuning knobs + +Set via `from megatron.experimental.gtp import GTP_CONFIG, update_gtp_config`: + +```python +update_gtp_config( + pad_for_alignment=16, # NVFP4: 16, MXFP8: 32, BF16: any; auto-set in training.py + weight_prefetch=True, # Disable to debug the cold-start path + async_reduction=True, # Wheter perform GTP gradient reduction asynchronously + wgrad_before_dgrad=False, # Auto-set True for GTP+no-TP + fp8_param_gather=False, # Companion to Megatron's --fp8-param-gather; currently asserted off + coalesce_amax_allreduce=True, # NVFP4 only; falls back if TE lacks compute_amax_nvfp4 +) +``` + +`training.py` auto-tunes `pad_for_alignment` based on the quantization recipe (`--fp4`, `--fp8-recipe=mxfp8`, etc.) before model construction. The other knobs are usually left at defaults. diff --git a/megatron/experimental/gtp/__init__.py b/megatron/experimental/gtp/__init__.py new file mode 100644 index 00000000000..340e5b12a86 --- /dev/null +++ b/megatron/experimental/gtp/__init__.py @@ -0,0 +1,80 @@ +# Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# See LICENSE for license information. + +"""Generalized Tensor Parallelism (GTP) for Megatron Core. + +See ``README.md`` in this folder for the design overview. The whole +implementation lives in ``generalized_tensor_parallelism.py``; this +``__init__`` re-exports the public surface and owns the ``HAVE_GTP`` +capability flag (False when the implementation module fails to import, +e.g. when TransformerEngine's low-precision tensor primitives are missing). +""" + +try: + from megatron.experimental.gtp.generalized_tensor_parallelism import ( + GTP_CONFIG, + GTPChain, + GTPEmbeddingWeight, + GTPShardedParam, + classify_gtp_chains, + get_ag_stream, + get_ag_streams_for_chain, + get_all_ag_streams, + get_all_rs_streams, + get_rs_stream, + get_rs_streams_for_chain, + reallocate_gtp_cache_to_mempool, + set_cuda_graph_modules, + tag_gtp_params_with_names, + ) + from megatron.experimental.gtp.generalized_tensor_parallelism import ( + update_config as update_gtp_config, + ) + from megatron.experimental.gtp.generalized_tensor_parallelism import ( + wait_async_comms, + wrap_module_params_gtp, + ) + + HAVE_GTP = True +except ImportError: + GTP_CONFIG = None + GTPChain = None + GTPEmbeddingWeight = None + GTPShardedParam = None + classify_gtp_chains = None + get_ag_stream = None + get_ag_streams_for_chain = None + get_all_ag_streams = None + get_all_rs_streams = None + get_rs_stream = None + get_rs_streams_for_chain = None + reallocate_gtp_cache_to_mempool = None + set_cuda_graph_modules = None + tag_gtp_params_with_names = None + update_gtp_config = None + wait_async_comms = None + wrap_module_params_gtp = None + HAVE_GTP = False + + +__all__ = [ + "GTP_CONFIG", + "GTPChain", + "GTPEmbeddingWeight", + "GTPShardedParam", + "HAVE_GTP", + "classify_gtp_chains", + "get_ag_stream", + "get_ag_streams_for_chain", + "get_all_ag_streams", + "get_all_rs_streams", + "get_rs_stream", + "get_rs_streams_for_chain", + "reallocate_gtp_cache_to_mempool", + "set_cuda_graph_modules", + "tag_gtp_params_with_names", + "update_gtp_config", + "wait_async_comms", + "wrap_module_params_gtp", +] diff --git a/megatron/experimental/gtp/generalized_tensor_parallelism.py b/megatron/experimental/gtp/generalized_tensor_parallelism.py new file mode 100644 index 00000000000..85dd701654f --- /dev/null +++ b/megatron/experimental/gtp/generalized_tensor_parallelism.py @@ -0,0 +1,1839 @@ +# Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# See LICENSE for license information. + +"""Generalized Tensor Parallelism (GTP). + +Shards weight tensors 1/N across a GTP process group along ``out_features`` +and materializes them on-demand via async all-gather, with a per-weight +prefetch chain + ticket-based buffer cache co-designed for CUDA graph +capture/replay. Quantized AG (FP8 / MXFP8 / NVFP4) composes with the +sharding for compounding bandwidth reduction. +""" + +import math +import re +from collections import defaultdict +from contextlib import nullcontext +from dataclasses import dataclass, field +from enum import Enum +from typing import Dict, List, Optional + +import torch + +try: + import transformer_engine as te # noqa: F401 + import transformer_engine_torch as tex + from transformer_engine.pytorch.constants import ( + MXFP8_BLOCK_SCALING_SIZE, + NVFP4_BLOCK_SCALING_SIZE, + ) + from transformer_engine.pytorch.distributed import ( + _NVFP4AllGatherAsyncHandle, + gather_along_first_dim, + reduce_scatter_along_first_dim, + ) + from transformer_engine.pytorch.module.base import get_dummy_wgrad + from transformer_engine.pytorch.quantized_tensor import QuantizedTensor + from transformer_engine.pytorch.tensor import MXFP8TensorStorage, NVFP4TensorStorage + from transformer_engine.pytorch.tensor.mxfp8_tensor import MXFP8Quantizer + from transformer_engine.pytorch.utils import ( + nvtx_range_pop, + nvtx_range_push, + round_up_to_nearest_multiple, + ) +except (ImportError, ModuleNotFoundError) as _gtp_te_import_err: + raise ImportError( + "megatron.experimental.gtp requires TransformerEngine with FP8 / MXFP8 / " + "NVFP4 tensor primitives. Original error: " + str(_gtp_te_import_err) + ) from _gtp_te_import_err + + +class GTPChain(str, Enum): + """Prefetch chain identifier for an GTPShardedParam. + + GRAPHED — fwd/bwd captured by a CUDA graph (MLM _CudaGraphRunner). + UNGRAPHED — fwd/bwd runs eagerly; includes embedding/output_layer and + routed grouped experts always, plus router/shared_experts + when their scope tag is not in cuda_graph_modules. + + Chains never cross-link (prev_w/next_w stay within one chain). CG + disabled → single UNGRAPHED chain; full-iteration graph → single GRAPHED. + """ + + GRAPHED = "GTP_graphed" + UNGRAPHED = "GTP_ungraphed" + + +# Module-level cuda_graph_modules, set by the integrator at init via set_cuda_graph_modules(). +# None or empty → CG is disabled; every GTP param classifies as UNGRAPHED. +# Value is a set of scope tags; e.g. {"mamba","attn","moe_router"}. +_CUDA_GRAPH_MODULES: Optional[set] = None +# Whether shared_experts are run with overlap (cannot be captured). When True, +# shared_experts stay UNGRAPHED regardless of moe_router scope inclusion, matching +# the transformer_layer.py guard that excludes them from the captured submodules. +_MOE_SHARED_EXPERT_OVERLAP: bool = False + + +def set_cuda_graph_modules(scope, moe_shared_expert_overlap: bool = False): + """Record the active cuda_graph_modules for GTP chain classification. + + Called by MLM at init, BEFORE classify_gtp_chains(). ``scope`` may be + None, an empty iterable (CG disabled), or an iterable of scope tags. + """ + global _CUDA_GRAPH_MODULES, _MOE_SHARED_EXPERT_OVERLAP + _CUDA_GRAPH_MODULES = set(scope) if scope else None + _MOE_SHARED_EXPERT_OVERLAP = bool(moe_shared_expert_overlap) + + +def _classify_param_chain(param_name: str) -> "GTPChain": + """Classify an GTPShardedParam by name + active cuda_graph_modules. + + embedding / output_layer are always UNGRAPHED. Other kinds (mamba mixer, + self/cross_attention, shared_experts, routed experts) are GRAPHED iff + their scope tag is present in cuda_graph_modules; otherwise UNGRAPHED. + """ + n = param_name + + # Always ungraphed — embedding and output_layer live outside any CG runner. + if "embedding" in n or "output_layer" in n: + return GTPChain.UNGRAPHED + + scope = _CUDA_GRAPH_MODULES + if not scope: + # CG disabled: every GTP param goes to the single UNGRAPHED chain. + return GTPChain.UNGRAPHED + + if ".mlp.shared_experts." in n: + if _MOE_SHARED_EXPERT_OVERLAP: + return GTPChain.UNGRAPHED + return GTPChain.GRAPHED if ("moe" in scope or "moe_router" in scope) else GTPChain.UNGRAPHED + + if ".mlp.experts." in n: + return GTPChain.GRAPHED if "moe" in scope else GTPChain.UNGRAPHED + + if ".self_attention." in n or ".cross_attention." in n: + return GTPChain.GRAPHED if "attn" in scope else GTPChain.UNGRAPHED + + if ".mixer." in n: + return GTPChain.GRAPHED if "mamba" in scope else GTPChain.UNGRAPHED + + return GTPChain.UNGRAPHED + + +def classify_gtp_chains(model) -> None: + """Walk model.named_parameters() and set chain_id on every GTPShardedParam. + + Call once at init, AFTER set_cuda_graph_modules() and BEFORE the first fwd + of any graphed param. Raises if an already chain-initialized param would + be reclassified into a different chain (its prev/next links are already + wired into the wrong list). + """ + conflicts = [] + for name, param in model.named_parameters(): + if not isinstance(param, GTPShardedParam): + continue + target = _classify_param_chain(name).value + if param.prefetch_initialized and param.chain_id != target: + conflicts.append((name, param.chain_id, target)) + continue + param.chain_id = target + + # Bwd-prefetch opt-out: embedding.word_embeddings.weight does not need + # an AG in the bwd pass (its wgrad is a scatter-add on sharded rows + # and its input has no dgrad). Skipping its bwd AG saves one collective. + if "embedding" in name: + param._need_weight_prefetch_bwd = False + if conflicts: + raise RuntimeError( + "classify_gtp_chains: the following params were already chain-initialized " + "with a different chain_id than the classifier would assign — this means " + "their chain links are already wired into the wrong list. Move classification " + "earlier in init. Conflicts: " + + ", ".join(f"{n}: {old!r}->{new!r}" for n, old, new in conflicts[:3]) + + ("..." if len(conflicts) > 3 else "") + ) + + +class GTPWeightState(Enum): + """State of a GTPShardedParam's AG / RS lifecycle (debug / stale-read guard).""" + + NONE = "NONE" # Sharded, no pending operation + ASYNC_WAIT = "ASYNC_WAIT" # Async all-gather in progress + DATA_READY = "DATA_READY" # Async all-gather complete, result in cache + DATA_READY_SYNC = "DATA_READY_SYNC" # Sync all-gather complete, result in cache + + +# Global GTP buffer cache (persists across clear(); never set to None after creation). +_GTP_CACHE = None +_GTP_PARAMS = [] + +# Global set of GTPShardedParam with in-flight async comms (AG or RS). +_inflight_comm_params: set = set() +_AG_STREAMS: Dict[str, torch.cuda.Stream] = {} +_RS_STREAMS: Dict[str, torch.cuda.Stream] = {} + +# Wgrad input buffer pool, keyed by (shape, dtype). UNGRAPHED-only: GRAPHED +# wgrad bufs need address stability for CG replay and are not pool-recycled. +_wgrad_buf_pool: Dict[tuple, list] = {} + + +def _wgrad_pool_get(shape: tuple, dtype: torch.dtype, device) -> torch.Tensor: + """Get a pool buffer or allocate fresh. Tagged so _wgrad_pool_put accepts + only pool-owned buffers — callers that don't use _wgrad_pool_get (e.g. + Megatron layers.py wgrad GEMM, aten F.embedding bwd) fall through to the + caching allocator on release.""" + key = (shape, dtype) + pool = _wgrad_buf_pool.get(key) + if pool: + buf = pool.pop() + else: + buf = torch.empty(shape, dtype=dtype, device=device, requires_grad=False) + buf._from_gtp_wgrad_pool = True + return buf + + +def _wgrad_pool_put(buf: torch.Tensor): + """Return a pool-owned buffer for reuse (no-op for untagged buffers; see + _wgrad_pool_get).""" + if not getattr(buf, "_from_gtp_wgrad_pool", False): + return + key = (tuple(buf.shape), buf.dtype) + if key not in _wgrad_buf_pool: + _wgrad_buf_pool[key] = [] + _wgrad_buf_pool[key].append(buf) + + +def _stream_key(chain_id: str, group) -> tuple: + """Key for the per-(chain, group) AG/RS stream dicts. + + Two partitioning axes: + - chain_id: captured (GRAPHED) vs eager (UNGRAPHED) ops must not share + a stream (eager ops would contaminate capture/replay state). + - group: independent NCCL communicators (e.g. GTP vs EGTP) get their + own user-level stream to avoid cross-group serialization. + """ + return (chain_id, id(group) if group is not None else 0) + + +def get_ag_stream(chain_id: str = GTPChain.GRAPHED.value, group=None) -> torch.cuda.Stream: + """Return the GTP all-gather stream for (chain_id, group). See _stream_key.""" + key = _stream_key(chain_id, group) + if key not in _AG_STREAMS: + _AG_STREAMS[key] = torch.cuda.Stream() + return _AG_STREAMS[key] + + +def get_rs_stream(chain_id: str = GTPChain.GRAPHED.value, group=None) -> torch.cuda.Stream: + """Return the GTP reduce-scatter stream for (chain_id, group). See _stream_key.""" + key = _stream_key(chain_id, group) + if key not in _RS_STREAMS: + _RS_STREAMS[key] = torch.cuda.Stream() + return _RS_STREAMS[key] + + +def get_all_ag_streams() -> list: + """All AG streams created so far, across chains and groups.""" + return list(_AG_STREAMS.values()) + + +def get_all_rs_streams() -> list: + """All RS streams created so far, across chains and groups.""" + return list(_RS_STREAMS.values()) + + +def get_ag_streams_for_chain(chain_id: str) -> list: + """AG streams for one chain (all groups that chain has touched).""" + return [s for k, s in _AG_STREAMS.items() if k[0] == chain_id] + + +def get_rs_streams_for_chain(chain_id: str) -> list: + """RS streams for one chain (all groups that chain has touched).""" + return [s for k, s in _RS_STREAMS.items() if k[0] == chain_id] + + +# Cached once per process: whether the TE build exposes the split-phase APIs. +_COALESCED_AMAX_TE_APIS_AVAILABLE = hasattr(tex, "compute_amax_nvfp4") and hasattr( + tex, "quantize_cast_only_nvfp4" +) + +# Tier-2: multi-tensor amax kernel fuses N per-expert (zero_amax + amax + D2D) chains +# into two multi-tensor kernel launches. Independent of Tier-1 coalesced allreduce. +_MULTI_AMAX_TE_API_AVAILABLE = hasattr(tex, "compute_multi_amax_nvfp4") + + +def _coalesced_amax_static_eligible(weights): + """Check whether the coalesced-amax path is applicable (NVFP4 only). + + Caller already gates on GTP_CONFIG.coalesce_amax_allreduce (False for + non-NVFP4). Here we additionally verify TE API availability, batch size, + quantizer type (must have amax reduction), and the RHT flag.""" + if not _COALESCED_AMAX_TE_APIS_AVAILABLE: + return False + if len(weights) <= 1: + return False + has_amax = [getattr(w._quantizer, "with_amax_reduction", False) for w in weights] + if not all(has_amax): + return False + has_rht = any(getattr(w._quantizer, "with_rht", False) for w in weights) + if has_rht: + return False + return True + + +def _quantize_with_coalesced_amax(weights, cast_noop_flag): + """Replace the per-weight (compute_amax + allreduce + cast) loop with: + compute_amax loop → one coalesced allreduce → cast loop. + + The caller has already gated on ``skip_weight_cast`` (see + ``_all_gather_weight``); inside this function we always do the work. + """ + group = weights[0]._quantizer.amax_reduction_group + + # Materialize padded shards once; on padded last-rank get_padded_shard() + # launches an F.pad kernel, and we'd otherwise pay it twice per expert. + padded_shards = [w.get_padded_shard() for w in weights] + + # Phase 1: per-weight local amax into each w.quantized's amax buffers. + # Keep rowwise/columnwise both populated so the group allreduce sees + # whichever the consumer GEMM will read. + for w in weights: + w._quantizer.set_usage(rowwise=True, columnwise=True) + if _MULTI_AMAX_TE_API_AVAILABLE: + # Tier-2: single multi-tensor launch writes both rowwise and columnwise + # amax directly (no per-expert D2D replicate), fusing N per-expert chains. + # Reuse the _cached_quantizers list already populated by _all_gather_weight + anchor = weights[0] + quantizer_list = anchor._cached_quantizers + if quantizer_list is None: + quantizer_list = [w._quantizer for w in weights] + anchor._cached_quantizers = quantizer_list + tex.compute_multi_amax_nvfp4(padded_shards, quantizer_list, [w.quantized for w in weights]) + else: + for w, shard in zip(weights, padded_shards): + tex.compute_amax_nvfp4(tensor=shard, quantizer=w._quantizer, output=w.quantized) + + # Phase 2: one coalesced allreduce across every weight's amax tensors. + amax_tensors = [] + for w in weights: + rw = w.quantized._amax_rowwise + cw = w.quantized._amax_columnwise + if rw is not None: + amax_tensors.append(rw) + if cw is not None and (rw is None or cw.data_ptr() != rw.data_ptr()): + amax_tensors.append(cw) + torch.distributed.all_reduce_coalesced( + amax_tensors, op=torch.distributed.ReduceOp.MAX, group=group + ) + + # Phase 3: per-weight cast using the pre-reduced amax; skips the internal + # allreduce inside the quantizer. + for w, shard in zip(weights, padded_shards): + tex.quantize_cast_only_nvfp4( + tensor=shard, quantizer=w._quantizer, output=w.quantized, noop=cast_noop_flag + ) + w.did_cast_to_low_precision = True + + +@dataclass +class GTPConfig: + """Global configuration for Generalized Tensor Parallelism.""" + + pad_for_alignment: int = 16 + check_param_states: bool = False + weight_prefetch: bool = True + # When True (default), wgrad reduce-scatter for non-chain-head GTP + # params uses async_op=True; finalize (handle.wait + main_grad.add_) + # runs in the cascade walk of a later bwd call, allowing RS-compute + # overlap. When False, every wgrad RS is synchronous and finalizes + # inline, at the cost of that overlap. + async_reduction: bool = True + # When True, _Linear.backward and _LayerNormLinear.backward run wgrad + # GEMM before dgrad GEMM. The GTP wgrad reduce-scatter is issued between + # them so its NCCL kernel overlaps with the dgrad GEMM, and the prev_w + # AG prefetch (issued by all_gather_and_prefetch_bwd at the top of bwd) + # overlaps with wgrad GEMM. When False (default), use the original + # dgrad-first order. Only affects _Linear and _LayerNormLinear; MLP and + # GroupedLinear keep the original schedule. + wgrad_before_dgrad: bool = False + # GTP companion to Megatron --fp8-param-gather: optimizer casts FP32 master + # directly into GTPShardedParam.quantized; forward's _quantize_if_needed + # short-circuits to the cached FP8. Moves BF16->FP8 off the fwd critical path. + fp8_param_gather: bool = False + # When True and the weight list in _all_gather_weight contains >1 NVFP4 + # shards that share an amax reduction group, coalesce their per-expert + # amax allreduces into a single NCCL call. Requires TE with + # tex.compute_amax_nvfp4 / tex.quantize_cast_only_nvfp4; the eligibility + # guard in _coalesced_amax_static_eligible falls back to the per-weight + # path when either binding is missing. + coalesce_amax_allreduce: bool = True + + +GTP_CONFIG = GTPConfig() + + +def update_config(**kwargs): + """Update the global GTP configuration.""" + for key, value in kwargs.items(): + if not hasattr(GTP_CONFIG, key): + raise ValueError(f"Unknown GTP config option: {key}") + setattr(GTP_CONFIG, key, value) + + +def tag_gtp_params_with_names(model): + """Populate _debug_name on every GTPShardedParam with its full dotted parameter name. + + Call once after model construction so the linking log prints human-readable names + instead of raw tensor ids. + """ + for name, param in model.named_parameters(): + if isinstance(param, GTPShardedParam): + param._debug_name = name + + +def _gtp_slice_one_param(param, gtp_group, *, name=""): + """Pad + slice a full-size BF16 weight to this rank's GTP shard. + + Caller attaches GTP attrs (see _gtp_attach_attrs). When called from the + legacy post-init path under fp8_model_init, tensor may be a + QuantizedTensor — F.pad dequantizes it before slicing. + """ + gtp_size = gtp_group.size() + gtp_rank = gtp_group.rank() + tensor = param.data + + if GTP_CONFIG.pad_for_alignment > 0: + # Pad before slicing so shards stay alignment-divisible and padding + # ends up contiguous at the tail of the gathered result. + alignment = GTP_CONFIG.pad_for_alignment * gtp_size + dim0 = tensor.shape[0] + pad_length = (alignment - dim0 % alignment) % alignment + if pad_length > 0: + tensor = torch.nn.functional.pad(tensor, (0, 0, 0, pad_length)) + else: + # No-pad mode: dim-0 must divide gtp_size or AG output loses tail rows. + assert tensor.shape[0] % gtp_size == 0, ( + f"_gtp_slice_one_param: {name}.shape[0]={tensor.shape[0]} is not " + f"divisible by gtp_size={gtp_size}. Either enable padding by " + "setting GTP_CONFIG.pad_for_alignment > 0, or ensure the weight's " + "dim-0 is a multiple of the GTP group size." + ) + pad_length = 0 + + shard_size = tensor.shape[0] // gtp_size + shard = tensor[gtp_rank * shard_size : (gtp_rank + 1) * shard_size] + gtp_shard = GTPShardedParam(shard.clone()) + gtp_shard.pad_length = pad_length + return gtp_shard + + +def _gtp_attach_attrs(gtp_shard, gtp_group, *, is_grouped=False, expert_idx=0): + """Attach group / ps_size / routed-expert tags and register in _GTP_PARAMS. + + Kept separate from _gtp_slice_one_param so attrs land on the post-quantize + param (when quantize fires between slice and attach). + """ + if is_grouped: + gtp_shard.expert_idx = expert_idx + gtp_shard.is_routed_expert = True + # Default to UNGRAPHED; classify_gtp_chains() reclassifies based on the + # cuda_graph_modules at init time. + gtp_shard.chain_id = GTPChain.UNGRAPHED.value + gtp_shard.group = gtp_group + gtp_shard.ps_size = gtp_group.size() + global _GTP_PARAMS + _GTP_PARAMS.append(gtp_shard) + + +def wrap_module_params_gtp(module, weight_names, gtp_group, is_grouped=None): + """Shard and re-register module params as GTPShardedParam. + + Two call paths: + 1. Megatron-style modules (ColumnParallelLinear, etc.): full post-init slice. + 2. TE modules: per-param body no-ops because the reset_parameters hook + already produced GTPShardedParam instances. + + Also stamps GTP_CONFIG.wgrad_before_dgrad onto the module so TE's + autograd backward can read it without importing GTP_CONFIG. + """ + if gtp_group.size() == 1: + return + + for idx, name in enumerate(weight_names): + param = getattr(module, name, None) + if param is None: + continue + + # TE-side hook already sliced this one. + if isinstance(param, GTPShardedParam): + continue + + # delete the original parameter, which will be replaced by an GTP sharded one + delattr(module, name) + gtp_shard = _gtp_slice_one_param(param, gtp_group, name=name) + del param + _gtp_attach_attrs(gtp_shard, gtp_group, is_grouped=bool(is_grouped), expert_idx=idx) + # register the newly sharded param back to the module + module._parameters[name] = gtp_shard + + if is_grouped: + allweights = [getattr(module, name) for name in weight_names] + allweights[0].weight_list = allweights + + # Stamp scheduling flag onto the TE module so its autograd functions can + # read it without naming GTP_CONFIG. Default is False on the TE side; we + # only override when GTP is actually active for this module. + module.wgrad_before_dgrad = GTP_CONFIG.wgrad_before_dgrad + + +def gtp_slice_in_reset_parameters(module, name, param, expert_idx=0): + """Slice + attach attrs for one param. Called between init_fn(param) and + the optional quantizer(param) in TransformerEngineBaseModule.reset_parameters. + + Only fires for params in module.weight_names (the GEMM weights); + layer-norm gammas, biases, etc. are left full-size. + + Returns the new GTPShardedParam or None (GTP not active for this param). + """ + gtp_group = getattr(module, "_gtp_group", None) + if gtp_group is None or gtp_group.size() == 1: + return None + weight_names = getattr(module, "weight_names", None) + if weight_names is None or name not in weight_names: + return None + is_grouped = bool(getattr(module, "_gtp_is_grouped", False)) + gtp_shard = _gtp_slice_one_param(param, gtp_group, name=name) + _gtp_attach_attrs(gtp_shard, gtp_group, is_grouped=is_grouped, expert_idx=expert_idx) + return gtp_shard + + +def gtp_finalize_module_in_reset_parameters(module, weight_names): + """GroupedLinear-only: attach weight_list to expert 0's shard for batched + all-gather. No-op when module._gtp_is_grouped is False. + """ + if not getattr(module, "_gtp_is_grouped", False): + return + gtp_group = getattr(module, "_gtp_group", None) + if gtp_group is None or gtp_group.size() == 1: + return + allweights = [getattr(module, n) for n in weight_names] + if allweights: + allweights[0].weight_list = allweights + + +class GTPShardHandle: + """Wrapper around a ``dist`` async-work handle for a GTP AG / RS. + + Tracks the participating shards so the wait-site can transition their + ``GTPWeightState`` and so the GTP module can prune the param from + ``_inflight_comm_params`` when the collective completes. + """ + + def __init__(self, handle, gtp_shards, reduce_scatter=False): + self.handle = handle + self.gtp_shards = gtp_shards + self.reduce_scatter = reduce_scatter + _inflight_comm_params.add(gtp_shards[0]) + + def wait(self): + """Wait on the underlying NCCL work and update the shards' state.""" + if self.handle is not None: + self.handle.wait() + self.handle = None # Release NCCL Work and its C++ tensor references promptly + if GTP_CONFIG.check_param_states: + for w in self.gtp_shards: + if self.reduce_scatter: + w._set_rs_state(GTPWeightState.DATA_READY) + else: + w._set_state(GTPWeightState.DATA_READY) + + _inflight_comm_params.discard(self.gtp_shards[0]) + + +class GTPShardedParam(torch.nn.Parameter): + """A weight parameter sharded 1/N across a GTP process group. + + Materialized on-demand via async all-gather and gradient-reduced via + reduce-scatter. Carries its own prefetch-chain wiring (``prev_w`` / + ``next_w``), per-chain state, AG/RS cache tickets, and the metadata the + integrator needs to drive overlap with captured compute. + """ + + # Per-chain state: each chain_id (GTPChain.GRAPHED / GTPChain.UNGRAPHED) has + # its own linked list. Chains never cross-link: prev_w/next_w only connect + # params with the same chain_id. + _chain_state: Dict[str, dict] = {} + + @classmethod + def _get_chain_state(cls, chain_id: str) -> dict: + if chain_id not in cls._chain_state: + cls._chain_state[chain_id] = { + "last_weight": None, + "link_node_count": 0, + "link_table_buffer": [], + "link_table_flushed": False, + } + return cls._chain_state[chain_id] + + @classmethod + def _buffer_link_table_row( + cls, prev: "GTPShardedParam", curr: "GTPShardedParam", chain: dict + ) -> None: + """Buffer one row of the prefetch-link table (flushed atomically on the second forward pass).""" + _W = 70 + + def _layer_id(name: str) -> str: + m = re.search(r"\d+", name) + return m.group() if m else "-" + + chain["link_node_count"] += 1 + if chain["link_node_count"] == 1: + chain_id = getattr(curr, "chain_id", GTPChain.UNGRAPHED.value) + chain["link_table_buffer"].append( + f"\n[{chain_id} chain]\n{'node_id':>7} | {'layer_id':>8} |" + f" {'curr_weight_name':<{_W}} |" + f" prev_weight_name\n{'-'*7}-+-{'-'*8}-+-{'-'*_W}-+-{'-'*_W}" + ) + # Seed weight (first GTP param) as row 0 + chain["link_table_buffer"].append( + f"{'0':>7} | {_layer_id(prev._debug_name):>8} | {prev._debug_name:<{_W}} | -" + ) + chain["link_table_buffer"].append( + f"{chain['link_node_count']:>7} | {_layer_id(curr._debug_name):>8} | " + f"{curr._debug_name:<{_W}} | {prev._debug_name}" + ) + + @staticmethod + def __new__(cls, tensor, *args, **kwargs): # pylint: disable=unused-argument + requires_grad = kwargs.get("requires_grad", True) + # pylint: disable-next=unexpected-keyword-arg + return super(GTPShardedParam, cls).__new__(cls, tensor, requires_grad=requires_grad) + + def __init__(self, tensor, *args, **kwargs): + del tensor, args, kwargs + super().__init__() + + # all gather + self.state = GTPWeightState.NONE + self._ag_ticket_fwd = None + self._ag_ticket_bwd = None + self._prefetch_handle = None + self._need_weight_prefetch = True + # Per-direction prefetch opt-outs. Default True. The embedding weight + # never needs an AG during bwd (its wgrad is a scatter-add indexed by + # token ids, and its input is non-differentiable, so no dgrad either). + # classify_gtp_chains() sets this to False for embedding.word_embeddings.weight. + self._need_weight_prefetch_bwd = True + self.ag_event = torch.cuda.Event(external=True) + # DDP backward hook (set by register_grad_accum_hook); invoked after + # the wgrad RS accumulation completes (Graphed.backward / chain cascade). + self._grad_accum_hook = None + # Quantization + self._quantizer = None + self.did_cast_to_low_precision = False + self.quantized = None + # Prefetching linked list + self.prefetch_initialized = False + self.next_w = None + self.prev_w = None + # Chain identity (GTPChain.GRAPHED / GTPChain.UNGRAPHED). Defaults to + # UNGRAPHED as a safe fallback; classify_gtp_chains(model) walks the + # model at init time (after set_cuda_graph_modules) and reclassifies + # based on param name + active cuda_graph_modules. + self.chain_id = GTPChain.UNGRAPHED.value + # Grouped gemm + self.is_routed_expert = False + self.expert_idx = None + self.group = None + self.weight_list = None + # Reduce-scatter state (set during wgrad_reduce_scatter) + self.rs_state = GTPWeightState.NONE + self._wgrad_rs_handle = None + self.rs_event = torch.cuda.Event(external=True) + self._rs_ticket = None + # Padding + self.pad_length = 0 + # Debug + self._debug_name = "" + # Hot-path caches (populated lazily on first use). chain_id/group are + # set after __init__, so we can't resolve streams eagerly here. + self._cached_ag_stream = None + self._cached_rs_stream = None + self._cached_quantizers = None + self._cached_dtypes = None + self._cached_gtp_group = None + + def setup(self, weight_quantizer=None): + """Set quantizer and create quantized shard.""" + + if self._quantizer is None: + + def _configure_quantizer(q, group): + q = q.copy() + if hasattr(q, "with_amax_reduction"): + q.with_amax_reduction = True + q.amax_reduction_group = group + q.internal = False + # MXFP8 scales must stay in compact (unswizzled) layout so that + # per-shard scale_inv can be all-gathered via byte concatenation. + # GEMM-swizzled scales from independent shards don't compose into + # a valid swizzled layout for the full tensor after AG. + q.optimize_for_gemm = not isinstance(q, MXFP8Quantizer) + return q + + weights = ( + self.weight_list + if self.is_routed_expert and self.weight_list is not None + else [self] + ) + for quantizer, weight in zip(weight_quantizer, weights): + if quantizer is None: + continue + + weight._quantizer = _configure_quantizer(quantizer, weight.group) + weight.quantized = weight._quantizer.quantize(weight.get_padded_shard()) + weight.quantized.is_routed_expert = getattr(weight, "is_routed_expert", False) + # fp8_param_gather: the init quantize above already produced a + # valid FP8 cache from the BF16 shard; flag did_cast so iter-0's + # forward _quantize_if_needed short-circuits and the redundant + # BF16->FP8 cast on iter 0 is skipped. + if GTP_CONFIG.fp8_param_gather: + weight.did_cast_to_low_precision = True + + @property + def _weights(self): + """Return the list of individual weight shards (self for non-routed, weight_list for routed).""" + weights = self.weight_list if self.is_routed_expert else [self] + # Only meaningful when _set_state is actively tracking transitions. + if GTP_CONFIG.check_param_states: + assert all(w.state == weights[0].state for w in weights) + return list(weights) + + @property + def _unsharded_shape_padded(self): + """Full unsharded shape *including* the pad rows on the last rank.""" + out_shape = list(self.size()) + out_shape[0] = out_shape[0] * self.group.size() + return tuple(out_shape) + + @property + def _unsharded_shape(self): + """Full unsharded shape with the pad rows stripped (logical shape).""" + out_shape = list(self._unsharded_shape_padded) + out_shape[0] -= self.pad_length + return tuple(out_shape) + + @property + def _sharded_padded_shape(self): + """This rank's local shard shape, padding included.""" + return tuple(self.size()) + + def get_padded_shard(self): + """Return the local shard already containing its share of padding (identity).""" + return self + + def _set_state(self, new_state: GTPWeightState): + """Advance the AG state (only inspected when ``check_param_states`` is on).""" + # Only inspected when check_param_states is on; skip writes otherwise. + if not GTP_CONFIG.check_param_states: + return + self.state = new_state + + def _set_rs_state(self, new_state: GTPWeightState): + """Advance the RS state (only inspected when ``check_param_states`` is on).""" + if not GTP_CONFIG.check_param_states: + return + self.rs_state = new_state + + def _get_cache_key(self, dtype, fwd: bool, reduce_scatter: bool) -> tuple: + """Build cache key using output shape + dtype. + + Weights with matching gathered shape and dtype share a buffer. + For expert weights gathered in parallel, self.expert_idx distinguishes them so + each gets a distinct buffer, while same-indexed experts across layers share. + """ + + if not isinstance(dtype, torch.dtype): + return ( + self._unsharded_shape_padded, + dtype, + fwd, + not fwd, + self.expert_idx, + reduce_scatter, + ) + return (self._unsharded_shape_padded, dtype, self.expert_idx, reduce_scatter) + + def _quantize_if_needed(self, skip_weight_cast=False, cast_noop_flag=None): + """Re-quantize sharded weight into existing buffer. Returns quantized weight or self.""" + if self._quantizer is None: + self.did_cast_to_low_precision = False + return self + + # fp8_param_gather fast-path: optimizer already filled self.quantized; + # reuse it and keep BF16->FP8 off the forward critical path. + if GTP_CONFIG.fp8_param_gather and self.did_cast_to_low_precision: + return self.quantized + + self._quantizer.set_usage(rowwise=True, columnwise=True) + if skip_weight_cast is False or cast_noop_flag is not None: + tex.quantize( + tensor=self.get_padded_shard(), + quantizer=self._quantizer, + output=self.quantized, + noop=cast_noop_flag, + ) + self.did_cast_to_low_precision = True + + return self.quantized + + def _strip_padding(self, tensor): + if self.pad_length == 0: + return tensor + + if isinstance(tensor, QuantizedTensor): + assert isinstance( + tensor, (NVFP4TensorStorage, MXFP8TensorStorage) + ), f"Unsupported quantized tensor type for GTP padding: {type(tensor)}" + + metadata = tensor.get_metadata() + if metadata.get("rowwise_data") is not None: + metadata["rowwise_data"] = metadata["rowwise_data"][: -self.pad_length] + if metadata.get("columnwise_data") is not None: + if isinstance(tensor, NVFP4TensorStorage): + # NVFP4 transposes columnwise and packs 2 values per byte + metadata["columnwise_data"] = metadata["columnwise_data"][ + ..., : -self.pad_length // 2 + ].contiguous() + else: + # MXFP8 columnwise is not transposed, strip first dim + metadata["columnwise_data"] = metadata["columnwise_data"][: -self.pad_length] + M = self._unsharded_shape[0] + if isinstance(tensor, NVFP4TensorStorage): + # NVFP4 scale_inv shapes (see NVFP4Quantizer.get_scale_shape): + # rowwise_scale_inv: [round_up(M, 128), round_up(ceil(K/16), 4)] + # columnwise_scale_inv: [round_up(K, 128), round_up(ceil(M/16), 4)] + # GTP shards M (dim 0 of the weight), so strip to the unpadded sizes. + if metadata.get("rowwise_scale_inv") is not None: + m_rows = round_up_to_nearest_multiple(M, 128) + metadata["rowwise_scale_inv"] = metadata["rowwise_scale_inv"][:m_rows] + if metadata.get("columnwise_scale_inv") is not None: + m_tiles = round_up_to_nearest_multiple( + math.ceil(M / NVFP4_BLOCK_SCALING_SIZE), 4 + ) + metadata["columnwise_scale_inv"] = metadata["columnwise_scale_inv"][ + :, :m_tiles + ].contiguous() + else: + # MXFP8 scale_inv shapes (see MXFP8Quantizer.get_scale_shape): + # rowwise_scale_inv: [round_up(M, 128), round_up(K//32, 4)] + # columnwise_scale_inv: [round_up(M//32, 4), round_up(K, 128)] + # GTP shards M (dim 0 of the weight), so strip to the unpadded sizes. + if metadata.get("rowwise_scale_inv") is not None: + m_rows = round_up_to_nearest_multiple(M, 128) + metadata["rowwise_scale_inv"] = metadata["rowwise_scale_inv"][:m_rows] + if metadata.get("columnwise_scale_inv") is not None: + m_tiles = round_up_to_nearest_multiple(M // MXFP8_BLOCK_SCALING_SIZE, 4) + metadata["columnwise_scale_inv"] = metadata["columnwise_scale_inv"][:m_tiles] + + return type(tensor)(**metadata, shape=self._unsharded_shape, dtype=torch.bfloat16) + + return tensor[: -self.pad_length] + + def _all_gather_weight(self, async_op, skip_weight_cast, cast_noop_flag, fwd, nvtx_label=None): + """Quantize (if needed) and all-gather weight. Returns (weight_total, handle).""" + if nvtx_label is None: + nvtx_label = ( + self._debug_name + (".fwd" if fwd else ".bwd") + (".async" if async_op else ".sync") + ) + nvtx_range_push(f"{nvtx_label}.all_gather_weight") + + weights = self._weights + + # 1. Transition state for async gathers. + if GTP_CONFIG.check_param_states: + new_state = GTPWeightState.ASYNC_WAIT if async_op else GTPWeightState.DATA_READY_SYNC + for w in weights: + w._set_state(new_state) + + # 2. Prepare: quantize, set usage direction. + # Static eligibility (quantizer class, flags, amax group) is fixed + # after model construction — compute once and cache on self so the + # hot path only pays the cheap per-call skip_weight_cast check. + if GTP_CONFIG.coalesce_amax_allreduce: + static_ok = getattr(self, "_coalesced_amax_static", None) + if static_ok is None: + static_ok = _coalesced_amax_static_eligible(weights) + self._coalesced_amax_static = static_ok + # Per-call: match the skip_weight_cast gate in _quantize_if_needed + # (fire when either skip_weight_cast is False or cast_noop_flag + # was provided by the FP8/NVFP4 recipe). + use_coalesced = static_ok and not (skip_weight_cast is True and cast_noop_flag is None) + else: + use_coalesced = False + + # Quantize step: coalesced batch / fp8_param_gather cache hit (skip) / + # legacy per-weight. set_usage runs uniformly after, gated by did_cast. + fp8_pg_hit = GTP_CONFIG.fp8_param_gather and self.did_cast_to_low_precision + + if use_coalesced: + _quantize_with_coalesced_amax(weights, cast_noop_flag) + elif not fp8_pg_hit: + for w in weights: + w._quantize_if_needed(skip_weight_cast, cast_noop_flag) + + for w in weights: + if w.did_cast_to_low_precision: + w._quantizer.set_usage(rowwise=fwd, columnwise=not fwd) + + # 3. Build gather inputs. + # quantizers / dtypes / gtp_group are stable after model construction — + # cache on the anchor (self == weights[0]) to avoid rebuilding lists + # every call. w.quantized is NOT cached because it can rebind. + quantizers = self._cached_quantizers + if quantizers is None: + quantizers = [w._quantizer for w in weights] + self._cached_quantizers = quantizers + if weights[0].did_cast_to_low_precision: + gather_weights = [w.quantized for w in weights] + else: + gather_weights = list(w.get_padded_shard() for w in weights) + + # 4. Cache checkout — use pooled buffers for both async and sync gathers + # to avoid allocating fresh memory each iteration. + dtypes = self._cached_dtypes + if dtypes is None: + dtypes = [q.dtype if q is not None else w.dtype for q, w in zip(quantizers, weights)] + self._cached_dtypes = dtypes + out_buffers = [] + cache = get_global_GTP_cache() + for p, dt in zip(weights, dtypes): + if fwd: + if p._ag_ticket_fwd is None: + p._ag_ticket_fwd = cache.reserve(p, dt, fwd=True) + cache.get(p._ag_ticket_fwd) + cache.release(p._ag_ticket_fwd) + out_buffers.append(cache.get(p._ag_ticket_fwd)) + else: + if p._ag_ticket_bwd is None: + p._ag_ticket_bwd = cache.reserve(p, dt, fwd=False) + out_buffers.append(cache.get(p._ag_ticket_bwd)) + + # 5. Communicate. + gtp_group = self._cached_gtp_group + if gtp_group is None: + gtp_group = weights[0].group + self._cached_gtp_group = gtp_group + if GTP_CONFIG.check_param_states and len(gather_weights) > 1: + # Debug invariant: batched AG needs distinct output buffers per expert. + assert len(set(id(b) for b in out_buffers)) == len( + out_buffers + ), "Duplicate output buffers in batched all-gather — experts need distinct cache keys" + + # ASYNC AG: wrap issue on ag_stream — ag_stream's tail then reflects + # the collective's full lifecycle (what external wait_stream(ag_stream) + # drains depend on). The explicit outer→ag_stream sync event preserves + # the upstream quantize writer edge that the bare stream context would + # drop; held on self so PyTorch's event pool can't recycle the handle + # between capture and replay. + # SYNC AG: stay on caller — output ready on return. + if async_op: + outer_stream = torch.cuda.current_stream() + ag_stream = get_ag_stream(self.chain_id, gtp_group) + if getattr(self, "_ag_outer_sync_event", None) is None: + self._ag_outer_sync_event = torch.cuda.Event() + outer_sync_event = self._ag_outer_sync_event + outer_sync_event.record(outer_stream) + ag_stream.wait_event(outer_sync_event) + ag_ctx = torch.cuda.stream(ag_stream) + else: + ag_ctx = nullcontext() + + with ag_ctx: + if len(gather_weights) > 1: + nvtx_range_push(f"{nvtx_label}.batched_gtp_ag") + results, handle = grouped_gather_along_first_dim( + gather_weights, + gtp_group, + async_op=async_op, + quantizers=quantizers, + output_tensors=out_buffers, + ) + nvtx_range_pop(f"{nvtx_label}.batched_gtp_ag") + else: + nvtx_range_push(f"{nvtx_label}.gtp_ag") + weight_total, handle = gather_along_first_dim( + gather_weights[0], + gtp_group, + quantizer=quantizers[0], + async_op=async_op, + output_tensor=out_buffers[0] if out_buffers is not None else None, + ) + nvtx_range_pop(f"{nvtx_label}.gtp_ag") + results = [weight_total] + + result = results if self.is_routed_expert else results[0] + + # 6. Wrap handle. + if async_op: + handle = GTPShardHandle(handle, weights) + else: + handle = None + + nvtx_range_pop(f"{nvtx_label}.all_gather_weight") + return result, handle + + def _wait_param_gather(self): + # Enter ag_stream context so handle.wait() + ag_event.record() both + # land on ag_stream. That makes ag_event mark ag_stream's tail, which + # is what external drains via wait_stream(ag_stream) actually block on. + ag_stream = self._cached_ag_stream + if ag_stream is None: + ag_stream = get_ag_stream(self.chain_id, self.group) + self._cached_ag_stream = ag_stream + with torch.cuda.stream(ag_stream): + if self._prefetch_handle is not None: + self._prefetch_handle.wait() + self._prefetch_handle = None + self.ag_event.record() + + def _all_gather_weight_on_demand(self, fwd, skip_weight_cast=False, cast_noop_flag=None): + result, _ = self._all_gather_weight( + async_op=False, + skip_weight_cast=skip_weight_cast, + cast_noop_flag=cast_noop_flag, + fwd=fwd, + ) + result = result if self.is_routed_expert else [result] + result = [self._strip_padding(r) for r in result] + result = [r.detach().requires_grad_(w.requires_grad) for r, w in zip(result, self._weights)] + return result if self.is_routed_expert else result[0] + + def _get_prefetched_weight(self, fwd, skip_weight_cast=False, cast_noop_flag=None): + # ``skip_weight_cast`` and ``cast_noop_flag`` are accepted to keep the + # signature symmetric with ``_all_gather_weight_on_demand``. + del skip_weight_cast, cast_noop_flag + # Stale-read guard: state must reflect an AG issued for this cycle; + # otherwise cache.get() would return the prior iter's AG buffer. + if GTP_CONFIG.check_param_states: + for w in self._weights: + assert w.state in ( + GTPWeightState.ASYNC_WAIT, + GTPWeightState.DATA_READY, + GTPWeightState.DATA_READY_SYNC, + ), ( + f"[GTP] _get_prefetched_weight({'fwd' if fwd else 'bwd'}) on " + f"{self._debug_name} with state={w.state!r} — no AG issued; " + "cache.get() would return stale data. Check the chain's " + "_need_weight_prefetch flag and issuer's prefetch logic." + ) + _was_drained = getattr(self, "_already_ag_drained", False) + if _was_drained: + # Producer already drained via wait_async_comms; skip the captured + # cross-graph wait (CUDA no-op anyway). Correctness is provided by + # the eager main_stream sync chain in the surrounding training loop. + self._already_ag_drained = False + else: + # Intra-graph or eager consume: drain inline. + self._wait_param_gather() + self.ag_event.wait() + + # Retrieve prefetched results from cache + result = [] + cache = get_global_GTP_cache() + for w in self._weights: + ticket = w._ag_ticket_fwd if fwd else w._ag_ticket_bwd + result.append(cache.get(ticket)) + + result = [self._strip_padding(r) for r in result] + + result = [r.detach().requires_grad_(w.requires_grad) for r, w in zip(result, self._weights)] + return result if self.is_routed_expert else result[0] + + def all_gather_and_prefetch_bwd(self, nvtx_label=None): + """ + Backward variant: get current weight (from cache if prefetched, else + sync gather) and async-prefetch prev_w. + + Safe thanks to the coat-check cache: get() returns the current buffer + to the pool, and the prefetch's checkout() will allocate a separate + buffer if the pool is empty (i.e. the current buffer is still live + via the caller's tensor reference). + + Returns: + weight_total + """ + + if GTP_CONFIG.weight_prefetch and self.next_w is not None: + result = self._get_prefetched_weight(False, skip_weight_cast=True) + else: + result = self._all_gather_weight_on_demand(False, skip_weight_cast=True) + + if ( + GTP_CONFIG.weight_prefetch + and self.prev_w is not None + and self.prev_w._need_weight_prefetch + and self.prev_w._need_weight_prefetch_bwd + ): + # Pre-AG work (quantize, ticket lookup) runs on caller's stream; + # the NCCL collective itself is wrapped on ag_stream inside + # _all_gather_weight (see the async/sync gate there for rationale). + _, handle = self.prev_w._all_gather_weight( + async_op=True, + skip_weight_cast=True, + cast_noop_flag=None, + fwd=False, + nvtx_label=nvtx_label, + ) + self.prev_w._prefetch_handle = handle + + # The unsharded tensor has been returned, no pending work so reset state to NONE + if GTP_CONFIG.check_param_states: + for w in self._weights: + w._set_state(GTPWeightState.NONE) + + if GTP_CONFIG.weight_prefetch and self.next_w is not None: + cache = get_global_GTP_cache() + for w in self._weights: + cache.release(w._ag_ticket_bwd) + + return result + + def batched_all_gather_and_prefetch_bwd(self, nvtx_label=None): + """Batched backward all-gather + prefetch. Wrapper around all_gather_and_prefetch_bwd.""" + assert self.is_routed_expert and self.weight_list is not None + return self.all_gather_and_prefetch_bwd(nvtx_label=nvtx_label) + + def all_gather_and_prefetch( + self, + fwd: bool = True, + skip_weight_cast: bool = False, + cast_noop_flag: torch.Tensor = None, + nvtx_label: str = None, + ): + """ + All-gather current weight and async-prefetch the next weight. + + Returns: + weight_total + """ + if GTP_CONFIG.weight_prefetch and self.prev_w is not None: + result = self._get_prefetched_weight(True, skip_weight_cast, cast_noop_flag) + else: + result = self._all_gather_weight_on_demand(True, skip_weight_cast, cast_noop_flag) + + # Prefetch next weight + if ( + GTP_CONFIG.weight_prefetch + and self.next_w is not None + and self.next_w._need_weight_prefetch + ): + # Pre-AG work on caller; NCCL wrap lives at the collective site + # inside _all_gather_weight. See all_gather_and_prefetch_bwd. + _, handle = self.next_w._all_gather_weight( + async_op=True, + skip_weight_cast=skip_weight_cast, + cast_noop_flag=cast_noop_flag, + fwd=fwd, + nvtx_label=nvtx_label, + ) + self.next_w._prefetch_handle = handle + + # The unsharded tensor has been returned, no pending work so reset state to NONE + if GTP_CONFIG.check_param_states: + for w in self._weights: + w._set_state(GTPWeightState.NONE) + + # Lazy population of linked list: link previous weight to current weight + # Uses per-chain state so dense and expert chains never cross-link. + cls = type(self) + chain = cls._get_chain_state(self.chain_id) + if not self.prefetch_initialized: + last_w = chain["last_weight"] + if last_w is not None and last_w.next_w is None: + cls._buffer_link_table_row(last_w, self, chain) + last_w.next_w = self + self.prev_w = last_w + + cache = get_global_GTP_cache() + + # Set the fwd ag buffer + quantizers = [w._quantizer for w in self._weights] + dtypes = [ + q.dtype if q is not None else w.dtype for q, w in zip(quantizers, self._weights) + ] + for w, dt in zip(self._weights, dtypes): + w._ag_ticket_fwd = cache.reserve(w, dt, fwd=True) + cache.get(w._ag_ticket_fwd) + cache.release(w._ag_ticket_fwd) + + self.prefetch_initialized = True + chain["last_weight"] = self + elif not chain["link_table_flushed"] and chain["link_table_buffer"]: + # Second forward pass: flush the complete table atomically to avoid interleaving + chain["link_table_flushed"] = True + print_rank_0("\n".join(chain["link_table_buffer"]) + "\n") + + return result + + def batched_all_gather_and_prefetch(self, **kwargs): + """Batched all-gather + prefetch for expert weights. Wrapper around all_gather_and_prefetch.""" + assert self.is_routed_expert and self.weight_list is not None + return self.all_gather_and_prefetch(**kwargs) + + def get_wgrad_tensor(self): + """Pool-allocate a wgrad scratch tensor of unsharded shape for the bwd GEMM.""" + return _wgrad_pool_get(self._unsharded_shape, self.main_grad.dtype, self.device) + + def register_grad_accum_hook(self, grad_accum_node, hook): + """Register a DDP backward hook to be called after the wgrad RS finalize. + + For GTP params, autograd may receive None (async RS) so the normal grad + accumulator hook never fires. Instead, the integrator (Graphed.backward + for captured chains, or the eager chain-tail cascade) calls this hook + explicitly after RS wait + gradient accumulation, ensuring DDP's + register_grad_ready fires at exactly the right time. + + ``grad_accum_node`` is accepted for caller-API compatibility but the + node itself is not retained — only the hook callable. + """ + del grad_accum_node + self._grad_accum_hook = hook + + @staticmethod + def _handle_megatron_grad_accum(param): + """Handle megatron DDP and gradient accumulation fusion. + + Do NOT set param.grad before calling the hook — the hook checks + param.grad and would accumulate it into main_grad if zero_out_wgrad + is True, corrupting the gradient with a non-zero dummy. + """ + if hasattr(param, "grad_added_to_main_grad"): + param.grad_added_to_main_grad = True + dummy_grad = get_dummy_wgrad(list(param.main_grad.shape), param.dtype) + if getattr(param, "_grad_accum_hook", None) is not None: + param._grad_accum_hook() + + param._set_rs_state(GTPWeightState.NONE) + return dummy_grad + + def _wait_reduce_scatter(self, finalize_grad=False): + # Enter rs_stream context so handle.wait() + rs_event.record() land + # on rs_stream — mirrors _wait_param_gather for the RS path. + # When finalize_grad=True, main_grad.add_ also runs on rs_stream + # (right after NCCL RS), so it starts during AG drain rather than + # after it — avoids SM-saturation blocking cross-graph overlap. + rs_stream = self._cached_rs_stream + if rs_stream is None: + rs_stream = get_rs_stream(self.chain_id, self.group) + self._cached_rs_stream = rs_stream + with torch.cuda.stream(rs_stream): + if self._wgrad_rs_handle is not None: + self._wgrad_rs_handle.wait() + self._wgrad_rs_handle = None + self.rs_event.record() + if finalize_grad: + cache = get_global_GTP_cache() + for w in self._weights: + w._set_rs_state(GTPWeightState.NONE) + wgrad_rs = cache.get(w._rs_ticket) + w.main_grad.add_(wgrad_rs) + cache.release(w._rs_ticket) + if hasattr(w, "grad_added_to_main_grad"): + w.grad_added_to_main_grad = True + self._already_finalized = True + # Release stashed wgrad inputs: UNGRAPHED buffers go back to the pool; + # GRAPHED just drops Python refs (addresses must stay stable for CG). + if getattr(self, "_wgrad_input_bufs", None) is not None: + if self.chain_id == GTPChain.UNGRAPHED.value: + for buf in self._wgrad_input_bufs: + _wgrad_pool_put(buf) + self._wgrad_input_bufs = None + + def _reduce_scatter(self, wgrads, async_op, nvtx_label=None): + """Reduce-scatter one or more wgrads. Returns (outputs, handle). + + Single tensor: plain reduce-scatter (no coalescing). + Multiple tensors: coalesced reduce-scatter. + """ + if nvtx_label is None: + nvtx_label = self._debug_name + ".bwd" + (".async" if async_op else ".sync") + + if GTP_CONFIG.check_param_states: + new_rs_state = GTPWeightState.ASYNC_WAIT if async_op else GTPWeightState.DATA_READY_SYNC + for w in self._weights: + w._set_rs_state(new_rs_state) + + if self.pad_length > 0: + wgrads = [torch.nn.functional.pad(w, (0, 0, 0, self.pad_length)) for w in wgrads] + + if async_op: + dtypes = [w.dtype for w in wgrads] + out_buffers = [] + cache = get_global_GTP_cache() + for p, dt in zip(self._weights, dtypes): + if p._rs_ticket is None: + p._rs_ticket = cache.reserve(p, dt, fwd=False, reduce_scatter=True) + out_buffers.append(cache.get(p._rs_ticket)) + else: + out_buffers = [None] * len(wgrads) + + # ASYNC RS: wrap issue on rs_stream — rs_stream's tail then reflects + # the collective's full lifecycle (what external wait_stream(rs_stream) + # drains depend on). The explicit outer→rs_stream sync event preserves + # the wgrad-GEMM writer edge that the bare stream context would drop; + # held on self so PyTorch's event pool can't recycle the handle + # between capture and replay. Mirrors AG path. + # SYNC RS: stay on caller — output ready on return. + if async_op: + outer_stream = torch.cuda.current_stream() + rs_stream = get_rs_stream(self.chain_id, self.group) + if getattr(self, "_rs_outer_sync_event", None) is None: + self._rs_outer_sync_event = torch.cuda.Event() + outer_sync_event = self._rs_outer_sync_event + outer_sync_event.record(outer_stream) + rs_stream.wait_event(outer_sync_event) + rs_ctx = torch.cuda.stream(rs_stream) + else: + rs_ctx = nullcontext() + + with rs_ctx: + if len(wgrads) == 1: + nvtx_range_push(f"{nvtx_label}.gtp_rs") + out, handle = reduce_scatter_along_first_dim( + wgrads[0], self.group, async_op=async_op, output=out_buffers[0] + ) + nvtx_range_pop(f"{nvtx_label}.gtp_rs") + return [out], handle + + outputs = [] + nvtx_range_push(f"{nvtx_label}.batched_gtp_rs") + with torch.distributed._coalescing_manager( + group=self.group, device=wgrads[0].device, async_ops=async_op + ) as cm: + for out_buffer, tensor in zip(out_buffers, wgrads): + out, _ = reduce_scatter_along_first_dim(tensor, self.group, output=out_buffer) + outputs.append(out) + nvtx_range_pop(f"{nvtx_label}.batched_gtp_rs") + + return outputs, cm if async_op else None + + def wgrad_reduce_scatter(self, wgrad, nvtx_label=None): + """Reduce-scatter wgrad(s). Sync for last weight, async+deferred for others. + + Accepts a single tensor (non-routed) or list of tensors (routed experts). + + Returns: + Single tensor or list for sync (last weight) — backward should return this. + None or tuple of Nones for async — backward should return this. + """ + batched = isinstance(wgrad, (list, tuple)) + wgrads = list(wgrad) if batched else [wgrad] + weights = self._weights + + # UNGRAPHED-chain wgrads are recycled via the standalone pool (_wgrad_pool_put). + # GRAPHED-chain wgrads cannot pool-recycle because CUDA graphs require + # stable buffer addresses across replay. + poolable = self.chain_id == GTPChain.UNGRAPHED.value + + if GTP_CONFIG.async_reduction and self.prev_w is not None: + # Async reduce-scatter (not last weight — deferred finish). Pre-RS + # work on caller; NCCL wrap lives at the collective site inside + # _reduce_scatter (mirrors the AG prefetch sites). + _, rs_handle = self._reduce_scatter(wgrads, async_op=True, nvtx_label=nvtx_label) + self._wgrad_rs_handle = GTPShardHandle(rs_handle, weights, reduce_scatter=True) + # Stash wgrad input buffers — cannot recycle yet because the async RS + # kernel is still reading them on rs_stream. + self._wgrad_input_bufs = wgrads + ret = tuple([None] * len(wgrads)) if batched else None + else: + # Sync reduce-scatter — reached as the natural chain-head case, recycle immediately + wgrads, _ = self._reduce_scatter(wgrads, async_op=False, nvtx_label=nvtx_label) + torch._foreach_add_([p.main_grad for p in weights], wgrads) + result = [self._handle_megatron_grad_accum(p) for p in weights] + + if poolable: + for buf in wgrads: + _wgrad_pool_put(buf) + ret = result if batched else result[0] + + # Wait for last reduce scatter if it was async + # Currently only support reduce scattering in reverse order + if GTP_CONFIG.async_reduction and self.next_w is not None: + self.next_w._wait_reduce_scatter() + + if getattr(self.next_w, "_already_finalized", False): + self.next_w._already_finalized = False + else: + self.next_w.rs_event.wait() + cache = get_global_GTP_cache() + next_weights = self.next_w._weights + wgrads = [cache.get(w._rs_ticket) for w in next_weights] + torch._foreach_add_([w.main_grad for w in next_weights], wgrads) + for w in next_weights: + self._handle_megatron_grad_accum(w) + cache.release(w._rs_ticket) + + return ret + + def batched_wgrad_reduce_scatter(self, wgrad_list, nvtx_label=None): + """Batched version of wgrad_reduce_scatter.""" + assert self.is_routed_expert and self.weight_list is not None + return self.wgrad_reduce_scatter(wgrad_list, nvtx_label=nvtx_label) + + def __torch_function__(self, func, types, args=(), kwargs=None): + """Subclass-preserving dispatch for ``detach`` (other ops fall through).""" + del types # required by protocol, unused here + if kwargs is None: + kwargs = {} + + if func is torch.Tensor.detach: + with torch._C.DisableTorchFunctionSubclass(): + # Perform the raw detach + result = func(*args, **kwargs) + # Re-wrap it in your subclass so PyTorch is happy + return result.as_subclass(type(self)) + + # 2. For everything else (add, mul, etc.), be transparent/decay. + with torch._C.DisableTorchFunctionSubclass(): + return func(*args, **kwargs) + + +def print_rank_0(message, rank=None): + """If distributed is initialized or rank is specified, print only on rank 0.""" + if rank is not None: + if rank == 0: + print(message, flush=True) + elif torch.distributed.is_initialized(): + if torch.distributed.get_rank() == 0: + print(message, flush=True) + else: + print(message, flush=True) + + +@dataclass +class _TicketSlot: + """Internal slot backing a persistent ticket in the GTP buffer cache.""" + + key: tuple # cache key (shape, dtype, ...) + param: "GTPShardedParam" # for lazy allocation metadata + dtype: object # torch.dtype or tex.DType + reduce_scatter: bool + fwd: bool + chain_id: str = GTPChain.GRAPHED.value # chain this slot belongs to + buf: Optional[torch.Tensor] = field(default=None) # None when released or after clear() + + +class GTPWeightCache: + """ + Ticket-based buffer pool for GTP all-gather / reduce-scatter buffers. + + - ``reserve(param, dtype, fwd)`` → ``ticket`` + Assigns a persistent ticket (no buffer allocated yet). + - ``get(ticket)`` → ``buffer`` + Returns the buffer, lazily allocating from pool or fresh if needed. + - ``release(ticket)`` + Returns the buffer to the pool. Ticket remains valid; next ``get()`` + will re-allocate from the pool. + - ``clear()`` + Drops all buffers and pools. Tickets remain valid; next ``get()`` + lazily allocates fresh buffers. + """ + + # Bytes per element for known dtypes (used for logging). Add new entries + # here when GTP starts caching buffers of additional quantized dtypes. + # Only DType values guaranteed exposed by the TE pybind bindings — verify + # via ``hasattr(tex.DType, ...)`` before adding speculative entries. + _BYTES_PER_ELEMENT = { + torch.bfloat16: 2, + torch.float16: 2, + torch.float32: 4, + tex.DType.kFloat4E2M1: 0.5, + tex.DType.kFloat8E4M3: 1, + tex.DType.kFloat8E5M2: 1, + } + + def __init__(self): + self._pool: Dict[tuple, List[torch.Tensor]] = defaultdict(list) + self._slots: Dict[int, _TicketSlot] = {} + self._next_ticket: int = 0 + self._total_bytes: int = 0 # running total of allocated bytes + self.key_to_allocate_func = {} + + @staticmethod + def _buf_bytes(shape, dtype) -> int: + """Estimate buffer size in bytes.""" + numel = 1 + for d in shape: + numel *= d + if dtype not in GTPWeightCache._BYTES_PER_ELEMENT: + raise KeyError( + f"GTPWeightCache._buf_bytes: unknown dtype {dtype!r}. " + "Add it to GTPWeightCache._BYTES_PER_ELEMENT with its bytes-per-element." + ) + return int(numel * GTPWeightCache._BYTES_PER_ELEMENT[dtype]) + + def _allocate_buffer( + self, param: "GTPShardedParam", dtype, reduce_scatter, fwd + ) -> torch.Tensor: + if reduce_scatter: + out_shape = param._sharded_padded_shape + else: + out_shape = param._unsharded_shape_padded + + if not isinstance(dtype, torch.dtype): + quantizer = param._quantizer + assert quantizer is not None + param._quantizer.set_usage(rowwise=fwd, columnwise=not fwd) + + buf = param._quantizer.make_empty( + out_shape, dtype=torch.bfloat16, device=torch.cuda.current_device() + ) + else: + buf = torch.empty( + out_shape, dtype=dtype, device=param.device, memory_format=torch.contiguous_format + ) + + buf_bytes = self._buf_bytes(out_shape, dtype) + self._total_bytes += buf_bytes + print_rank_0( + f"[GTP Cache] +{buf_bytes / 1024**2:.1f} MB (shape={out_shape}, dtype={dtype}) " + f"total={self._total_bytes / 1024**2:.1f} MB id: {id(buf)} fwd: {fwd}" + ) + return buf + + def reserve(self, param: "GTPShardedParam", dtype, fwd: bool, reduce_scatter=False) -> int: + """Assign a persistent ticket. No buffer is allocated until ``get()``.""" + key = param._get_cache_key(dtype, fwd, reduce_scatter) + ticket = self._next_ticket + self._next_ticket += 1 + + self._slots[ticket] = _TicketSlot( + key=key, + param=param, + dtype=dtype, + reduce_scatter=reduce_scatter, + fwd=fwd, + chain_id=getattr(param, "chain_id", GTPChain.UNGRAPHED.value), + ) + return ticket + + def get(self, ticket: int) -> torch.Tensor: + """Return the buffer for *ticket*, lazily allocating if needed.""" + slot = self._slots[ticket] + if slot.buf is None: + pool = self._pool[slot.key] + slot.buf = ( + pool.pop() + if pool + else self._allocate_buffer( + slot.param, slot.dtype, slot.reduce_scatter, fwd=slot.fwd + ) + ) + self.key_to_allocate_func[slot.key] = ( + slot.param, + slot.dtype, + slot.reduce_scatter, + slot.fwd, + ) + + return slot.buf + + def release(self, ticket: int): + """Return the buffer to the pool. Ticket remains valid. + + slot.buf is intentionally NOT cleared: get() must stay idempotent so that + CUDA-graph-captured buffers keep their fixed address across replays, and + reallocate_to_mempool() can find every dense-chain buffer. + """ + slot = self._slots[ticket] + if slot.buf is None: + return + # Use identity check — tensor == tensor returns a multi-element bool tensor + # which crashes in a boolean context ("Boolean value of Tensor is ambiguous"). + if not any(b is slot.buf for b in self._pool.get(slot.key, [])): + self._pool[slot.key].append(slot.buf) + + def clear(self): + """Drop all buffers; tickets remain valid and lazily re-allocate on next get().""" + for slot in self._slots.values(): + slot.buf = None + self._pool.clear() + self._total_bytes = 0 + + def reallocate_to_mempool(self, device, mempool): + """Re-allocate GRAPHED-chain ticket buffers into a CUDA graph memory pool. + + Call BEFORE graph capture so every GRAPHED-chain buffer lives in the capture + pool and no allocations are recorded inside the graph. UNGRAPHED-chain + buffers are left in regular memory (they are never referenced by any + captured graph). + """ + + # Identify keys that belong to the GRAPHED chain + graphed_keys = set() + for slot in self._slots.values(): + if slot.chain_id == GTPChain.GRAPHED.value: + graphed_keys.add(slot.key) + + # Clone only GRAPHED-chain pool buffers into the passed in mempool + self._total_bytes = 0 + new_pool = defaultdict(list) + torch._C._cuda_beginAllocateCurrentThreadToPool(device, mempool) + for key, buffers in self._pool.items(): + if key not in graphed_keys: + continue + new_buffers = [] + for _ in range(len(buffers)): + buf = self._allocate_buffer(*self.key_to_allocate_func[key]) + new_buffers.append(buf) + new_pool[key] = new_buffers + torch._C._cuda_endAllocateToPool(device, mempool) + + # Map each buffer in the old pool to its corresponding new one (GRAPHED only) + old_to_new_buff = {} + for key, old_pool in self._pool.items(): + if key not in graphed_keys: + continue + new = new_pool[key] + for old_buf, new_buf in zip(old_pool, new): + old_to_new_buff[old_buf] = new_buf + + # Replace each GRAPHED slot's reference; keep UNGRAPHED slots unchanged + for slot in self._slots.values(): + if ( + slot.chain_id == GTPChain.GRAPHED.value + and slot.buf is not None + and slot.buf in old_to_new_buff + ): + slot.buf = old_to_new_buff[slot.buf] + + # Merge: GRAPHED keys get new buffers, UNGRAPHED keys keep old ones + for key, buffers in self._pool.items(): + if key not in graphed_keys: + new_pool[key] = buffers + self._pool = new_pool + + # Remap quantized params into the CG mempool — but only for params on + # the GRAPHED chain. UNGRAPHED-chain params (embedding, output_layer, + # and MoE paths whose scope is not captured) run eagerly and don't + # need their quantized storage in the CG mempool. + torch._C._cuda_beginAllocateCurrentThreadToPool(device, mempool) + for w in _GTP_PARAMS: + if getattr(w, "chain_id", GTPChain.GRAPHED.value) != GTPChain.GRAPHED.value: + continue + if w.quantized is None: + continue + if isinstance(w.quantized, NVFP4TensorStorage): + w.quantized._rowwise_data = torch.clone(w.quantized._rowwise_data) + w.quantized._columnwise_data = torch.clone(w.quantized._columnwise_data) + w.quantized._rowwise_scale_inv = torch.clone(w.quantized._rowwise_scale_inv) + w.quantized._columnwise_scale_inv = torch.clone(w.quantized._columnwise_scale_inv) + w.quantized._amax_columnwise = torch.clone(w.quantized._amax_columnwise) + w.quantized._amax_rowwise = torch.clone(w.quantized._amax_rowwise) + elif isinstance(w.quantized, MXFP8TensorStorage): + w.quantized._rowwise_data = torch.clone(w.quantized._rowwise_data) + w.quantized._columnwise_data = torch.clone(w.quantized._columnwise_data) + w.quantized._rowwise_scale_inv = torch.clone(w.quantized._rowwise_scale_inv) + w.quantized._columnwise_scale_inv = torch.clone(w.quantized._columnwise_scale_inv) + else: + assert False + torch._C._cuda_endAllocateToPool(device, mempool) + + +def get_global_GTP_cache() -> GTPWeightCache: + """Get or lazily create the global cache instance.""" + global _GTP_CACHE + if _GTP_CACHE is None: + _GTP_CACHE = GTPWeightCache() + return _GTP_CACHE + + +def reallocate_gtp_cache_to_mempool(device, mempool): + """Re-allocate all GTP cache buffers into a CUDA graph memory pool.""" + if _GTP_CACHE is not None: + _GTP_CACHE.reallocate_to_mempool(device, mempool) + + +def wait_async_comms( + chain_id: str = None, skip_rs: bool = False, finalize_after_drain: bool = False +): + """Drain in-flight GTP async AG / RS handles. + + When called inside CUDA graph capture, the drains are captured into that + graph. This is the producer-side hook for cross-graph AG/RS overlap: + captured cudaStreamWaitEvent on an event recorded in a different capture + session is a CUDA no-op, so consumer graphs can't safely wait on + cross-graph events. Instead, the producer drains here and flags the + param; the consumer reads the flag and skips its captured wait. + + Args: + chain_id: If specified, only drain params on this chain. + skip_rs: Drain AG only; leave RS in flight. + finalize_after_drain: After RS drain, also accumulate wgrad into + main_grad. Runs main_grad.add_ on rs_stream (right after + NCCL RS) so it starts during AG drain rather than after, + avoiding SM-saturation that blocks cross-graph overlap. + Falls back to caller-stream accumulation if no RS handle. + + Per-param side effects: + * _already_ag_drained = True (if an AG handle was drained) + * _already_finalized = True (if finalize_after_drain=True) + """ + for param in list(_inflight_comm_params): + if ( + chain_id is not None + and getattr(param, "chain_id", GTPChain.UNGRAPHED.value) != chain_id + ): + continue + had_ag = param._prefetch_handle is not None + param._wait_param_gather() + if had_ag: + param._already_ag_drained = True + if not skip_rs: + param._wait_reduce_scatter(finalize_grad=finalize_after_drain) + # Fallback inline-accumulation: only when finalize is requested, + # _wait_reduce_scatter didn't already finalize, and an RS actually + # ran for this param (rs_ticket set). Skips pure-AG prefetches in + # _inflight_comm_params (no wgrad to accumulate). + need_fallback_accumulation = ( + finalize_after_drain + and not getattr(param, "_already_finalized", False) + and any(w._rs_ticket is not None for w in param._weights) + ) + if need_fallback_accumulation: + cache = get_global_GTP_cache() + param.rs_event.wait() + for w in param._weights: + w._set_rs_state(GTPWeightState.NONE) + wgrad_rs = cache.get(w._rs_ticket) + w.main_grad.add_(wgrad_rs) + cache.release(w._rs_ticket) + if hasattr(w, "grad_added_to_main_grad"): + w.grad_added_to_main_grad = True + param._already_finalized = True + + +@dataclass +class BatchedNVFP4AllGatherAsyncHandle: + """Handle for batched asynchronous NVFP4 all-gathers.""" + + output_handles: List[_NVFP4AllGatherAsyncHandle] + outer_async_handle: torch.distributed.Work + _synchronized: bool = False + + def wait(self) -> None: + """Wait for the async operation to complete and post-process the tensor.""" + if self._synchronized: + return + self.outer_async_handle.wait() + # Fixes interleaved data for transposed tensor/scale inv and pads scale inv if needed. + for output_handle in self.output_handles: + if output_handle is not None: + assert output_handle.async_handle is None + output_handle.post_process_nvfp4_gather() + # release any tensor references just in case + output_handle.output = None + output_handle.columnwise_data_interleaved = None + output_handle.columnwise_scale_inv_interleaved = None + + self._synchronized = True + + +def grouped_gather_along_first_dim( + weights: list, + process_group, + async_op: bool = False, + quantizers: list = None, + output_tensors: list = None, +): + """ + All-gather multiple weights in a single coalesced operation. + + Handles NVFP4 post-processing for both sync and async paths. + """ + # Determine device from first weight. + inp = weights[0] + if isinstance(inp, NVFP4TensorStorage): + device = ( + inp._rowwise_data.device + if inp._rowwise_data is not None + else inp._columnwise_data.device + ) + else: + device = inp.device + + weights_all = [] + weight_handles = [] + with torch.distributed._coalescing_manager( + group=process_group, device=device, async_ops=async_op + ) as gather_coalescing_manager: + for i, weight in enumerate(weights): + weight_all, weight_handle = gather_along_first_dim( + weight, + process_group, + quantizer=quantizers[i], + output_tensor=output_tensors[i] if output_tensors is not None else None, + grouped=True, + ) + weights_all.append(weight_all) + weight_handles.append(weight_handle) + + if async_op: + handle = gather_coalescing_manager + has_nvfp4_handles = any(isinstance(wh, _NVFP4AllGatherAsyncHandle) for wh in weight_handles) + if has_nvfp4_handles: + handle = BatchedNVFP4AllGatherAsyncHandle(weight_handles, handle) + else: + for wh in weight_handles: + if isinstance(wh, _NVFP4AllGatherAsyncHandle): + wh.post_process_nvfp4_gather() + handle = None + + return weights_all, handle + + +class GTPEmbeddingWeight(torch.autograd.Function): + """All-gather the embedding weight across the GTP group in forward, and + reduce-scatter its gradient back in backward. + + The embedding weight is stored sharded along the vocab dimension across + the GTP group; this autograd function materializes the full weight for + the embedding lookup and distributes the gradient back to the shard. + """ + + @staticmethod + def forward(ctx, weight): + ctx.save_for_backward(weight) + return weight.all_gather_and_prefetch(fwd=True) + + @staticmethod + def backward(ctx, grad_output): + (weight,) = ctx.saved_tensors + return weight.wgrad_reduce_scatter(grad_output) + + +# Wire GTP into TE's hook registry. Done at module import time so any later +# ``te.Linear(gtp_group=...)`` call routes through the hooks below. The +# warning fires if TE is too old to expose ``register_gtp_hooks`` — in that +# case GTP silently no-ops, which is the failure mode we want to surface. +try: + from transformer_engine.pytorch.module.base import ( # noqa: E402 + register_gtp_hooks as _te_register_gtp_hooks, + ) + + _te_register_gtp_hooks( + slice_fn=gtp_slice_in_reset_parameters, + finalize_fn=gtp_finalize_module_in_reset_parameters, + wrap_fn=wrap_module_params_gtp, + ) +except ImportError: + import warnings + + warnings.warn( + "megatron.experimental.gtp: TransformerEngine does not expose register_gtp_hooks; " + "GTP will be a no-op for te.Linear / te.LayerNormLinear / te.GroupedLinear. " + "Upgrade TransformerEngine to a build that includes the GTP hook registry.", + RuntimeWarning, + stacklevel=2, + ) diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py index f3c2ded6907..49adaaee116 100644 --- a/megatron/training/arguments.py +++ b/megatron/training/arguments.py @@ -1426,6 +1426,66 @@ def validate_args(args, defaults={}): if args.expert_model_parallel_size > 1 and 'ep_dp' not in args.high_priority_stream_groups: args.high_priority_stream_groups.append('ep_dp') + + if args.generalized_tensor_parallel_size > 1 or args.expert_generalized_tensor_parallel_size > 1: + ps_size = args.generalized_tensor_parallel_size + eps_size = args.expert_generalized_tensor_parallel_size + if get_device_arch_version() >= 10: + # Setting GTP communication groups for high priority streams for Blackwell and later + # architectures. Assigning high priority to communication streams ensures that + # communication kernels are scheduled with higher priority, minimizing the exposed + # communication when it is overlapped with other computation kernels. + if 'ps' not in args.high_priority_stream_groups: + args.high_priority_stream_groups.append('ps') + warn_rank_0("Setting 'ps' group for high priority streams.") + if eps_size > 1 and 'expt_gtp' not in args.high_priority_stream_groups: + args.high_priority_stream_groups.append('expt_gtp') + warn_rank_0("Setting 'expt_gtp' group for high priority streams.") + + # Sanity check for 'CUDA_GRAPHS_USE_NODE_PRIORITY'. + if args.cuda_graph_impl != "none": + assert os.environ.get('CUDA_GRAPHS_USE_NODE_PRIORITY') == "1", \ + 'GTP requires CUDA_GRAPHS_USE_NODE_PRIORITY=1 to make sure fine-grained GTP ' \ + 'comms can be well overlapped with GEMMs when CudaGraph is enabled for ' \ + 'Blackwell and later architecture.' + + # Sanity check for 'NCCL_PROTO'. + if os.environ.get('NCCL_PROTO', '').lower() == "simple": + warn_rank_0( + "Generally GTP prefers 'NCCL_PROTO=LL128 or LL' while get 'NCCL_PROTO=simple', " + "force setting NCCL_PROTO=Simple might introduce bad perf." + ) + + # When GTP is enabled and TP is disabled, default the bwd schedule to + # wgrad-before-dgrad on _Linear / _LayerNormLinear. The GTP wgrad + # reduce-scatter then overlaps with the dgrad GEMM, and the prev_w + # AG prefetch overlaps with the wgrad GEMM. With TP enabled, the + # TP comm-overlap path assumes dgrad-first, so leave the default + # order untouched there. + if args.tensor_model_parallel_size == 1: + from megatron.experimental.gtp import HAVE_GTP, update_gtp_config + if HAVE_GTP: + update_gtp_config(wgrad_before_dgrad=True) + warn_rank_0( + "GTP+no-TP detected: setting " + "GTPConfig.wgrad_before_dgrad=True (wgrad GEMM runs before " + "dgrad GEMM so RS NCCL overlaps with dgrad)." + ) + + # Propagate --fp8-param-gather into GTPConfig: enables optimizer-side + # FP32->FP8 cast for GTP shards, so the forward skips BF16->FP8. + if getattr(args, 'fp8_param_gather', False): + assert False, 'GTP+fp8-param-gather not supported yet!' + from megatron.experimental.gtp import HAVE_GTP, update_gtp_config + if HAVE_GTP: + update_gtp_config(fp8_param_gather=True) + warn_rank_0( + "GTP + --fp8-param-gather: setting " + "GTPConfig.fp8_param_gather=True (optimizer step " + "pre-quantizes GTP shards, skipping the per-forward " + "BF16->FP8 cast)." + ) + # Disable bias gelu fusion if we are disabling bias altogether if not args.add_bias_linear: args.bias_gelu_fusion = False diff --git a/megatron/training/global_vars.py b/megatron/training/global_vars.py index ec0bc532f59..d23ca169242 100644 --- a/megatron/training/global_vars.py +++ b/megatron/training/global_vars.py @@ -255,6 +255,8 @@ def _set_wandb_writer(args): wandb_kwargs['entity'] = args.wandb_entity os.makedirs(wandb_kwargs['dir'], exist_ok=True) wandb.init(**wandb_kwargs) + # Log all env vars (as a dictionary) in config.yaml + wandb.config.update({"env_vars": dict(os.environ)}) _GLOBAL_WANDB_WRITER = wandb diff --git a/megatron/training/initialize.py b/megatron/training/initialize.py index ff655502019..817d5c19f96 100644 --- a/megatron/training/initialize.py +++ b/megatron/training/initialize.py @@ -344,6 +344,8 @@ def _initialize_distributed(get_embedding_ranks, get_position_embedding_ranks, s args.virtual_pipeline_model_parallel_size, pipeline_model_parallel_comm_backend=args.pipeline_model_parallel_comm_backend, use_sharp=args.use_sharp, + generalized_tensor_parallel_size=args.generalized_tensor_parallel_size, + expert_generalized_tensor_parallel_size=args.expert_generalized_tensor_parallel_size, context_parallel_size=args.context_parallel_size, hierarchical_context_parallel_sizes=args.hierarchical_context_parallel_sizes, hybrid_context_parallel=args.hybrid_context_parallel, diff --git a/megatron/training/training.py b/megatron/training/training.py index e55272402cc..1bd67fe2cb2 100644 --- a/megatron/training/training.py +++ b/megatron/training/training.py @@ -1448,6 +1448,19 @@ def get_model(model_provider_func, model_type=ModelType.encoder_or_decoder, wrap # For distillation ckpts without ModelOpt state args.modelopt_enabled = True + # Configure GTP padding alignment based on quantization recipe before model construction. + from megatron.experimental.gtp import HAVE_GTP, update_gtp_config + if HAVE_GTP and ( + getattr(args, 'generalized_tensor_parallel_size', 1) > 1 + or getattr(args, 'expert_generalized_tensor_parallel_size', 1) > 1 + ): + if getattr(args, 'fp4', None) is not None: + update_gtp_config(pad_for_alignment=16) + elif getattr(args, 'fp8_recipe', None) == 'mxfp8': + update_gtp_config(pad_for_alignment=32, coalesce_amax_allreduce=False) + elif getattr(args, 'fp8', None) is not None: + update_gtp_config(pad_for_alignment=16) + # Build model. def build_model(): if ( @@ -1496,6 +1509,30 @@ def build_model(): if not isinstance(model, list): model = [model] + # Classify each GTP param into its prefetch chain (GRAPHED vs UNGRAPHED) + # from args.cuda_graph_modules + moe_shared_expert_overlap. Must run after + # model build, before the first forward (which lazily builds chain links). + from megatron.experimental.gtp import ( + GTP_CONFIG, + HAVE_GTP, + classify_gtp_chains, + set_cuda_graph_modules, + tag_gtp_params_with_names, + ) + if HAVE_GTP: + _raw_modules = getattr(args, 'cuda_graph_modules', None) or [] + _cg_modules = {getattr(s, 'name', str(s)) for s in _raw_modules} if _raw_modules else None + _mse_overlap = getattr(args, 'moe_shared_expert_overlap', False) + set_cuda_graph_modules(_cg_modules, moe_shared_expert_overlap=_mse_overlap) + for model_module in model: + tag_gtp_params_with_names(model_module) + classify_gtp_chains(model_module) + if ( + getattr(args, 'generalized_tensor_parallel_size', 1) > 1 + or getattr(args, 'expert_generalized_tensor_parallel_size', 1) > 1 + ): + print_rank_0(f"GTP enabled. {GTP_CONFIG}") + # Set tensor model parallel attributes if not set. # Only parameters that are already tensor model parallel have these # attributes set for them. We should make sure the default attributes diff --git a/megatron/training/utils.py b/megatron/training/utils.py index 7abb80de14f..0583aa741f5 100644 --- a/megatron/training/utils.py +++ b/megatron/training/utils.py @@ -39,13 +39,42 @@ from megatron.core.tensor_parallel import param_is_not_tensor_parallel_duplicate from megatron.core.utils import ( get_batch_on_this_cp_rank, - get_data_parallel_group_if_dtensor, to_local_if_dtensor, unwrap_model, ) from megatron.core.transformer.module import param_is_not_shared +from megatron.experimental.gtp import GTPShardedParam, HAVE_GTP + + +def _compute_norm_2(params_list): + """Compute squared L2 norm of a list of tensors. Returns a CUDA scalar.""" + if len(params_list) > 0: + dummy_overflow_buf = torch.tensor([0], dtype=torch.int, device='cuda') + norm, _ = multi_tensor_applier( + multi_tensor_l2norm, dummy_overflow_buf, [params_list], False, + ) + return norm * norm + return torch.zeros((1,), dtype=torch.float32, device='cuda') + + +def _get_param_data(param, force_create_fp32_copy, bf16): + """Extract the appropriate data tensor from a param for norm computation. + + Returns (data_tensor, is_sharded) where is_sharded indicates the param has + a sharded main_param from the distributed optimizer. + """ + if bf16: + if not force_create_fp32_copy and hasattr(param, 'main_param'): + if getattr(param, 'main_param_sharded', False): + if param.main_param is not None: + return param.main_param, True + return None, True + return param.main_param, False + return param.data.float(), False + return param.data, False + def calc_params_l2_norm(model, force_create_fp32_copy=False): """Calculate l2 norm of parameters""" @@ -55,7 +84,6 @@ def calc_params_l2_norm(model, force_create_fp32_copy=False): if getattr(args, 'use_megatron_fsdp', False): # All Megatron FSDP parameters are expected to be PyTorch DTensor. - # params_data is a dict of device_mesh -> list of local tensors. params = [] for model_chunk in model: model_chunk.stop_communication() @@ -69,129 +97,103 @@ def calc_params_l2_norm(model, force_create_fp32_copy=False): return calc_dtensor_params_l2_norm(params) - # Seperate moe and dense params - params_data = [] - moe_params_data = [] - sharded_params_data = [] - data_parallel_group = None + # 8 buckets: 4 categories × (non-sharded, sharded optimizer main_param). + # Each category needs different reduction groups. + params_data = [] # Dense, non-sharded + sharded_params_data = [] # Dense, sharded → reduce over dp_cp + gtp_params_data = [] # GTP, non-sharded + gtp_sharded_params_data = [] # GTP, sharded → reduce over dp_cp_with_ps + moe_params_data = [] # MoE, non-sharded + moe_sharded_params_data = [] # MoE, sharded → reduce over expert_dp + moe_gtp_params_data = [] # MoE-GTP, non-sharded + moe_gtp_sharded_params_data = [] # MoE-GTP, sharded → reduce over expert_dp_with_ps + + ps_rank = mpu.get_generalized_tensor_parallel_rank() + eps_rank = mpu.get_expert_generalized_tensor_parallel_rank() for model_chunk in model: for param in model_chunk.parameters(): - data_parallel_group = get_data_parallel_group_if_dtensor(param, data_parallel_group) - is_not_tp_duplicate = param_is_not_tensor_parallel_duplicate(param) - if not is_not_tp_duplicate: + is_gtp = HAVE_GTP and isinstance(param, GTPShardedParam) + + # Filter TP duplicates. GTP params are always unique across TP ranks + # so skip this check for them. + if not is_gtp and not param_is_not_tensor_parallel_duplicate(param): continue - assert is_not_tp_duplicate - if not getattr(param, 'allreduce', True): + is_expert = not getattr(param, 'allreduce', True) + + # Filter GTP duplicates: non-GTP params are replicated across GTP ranks. + if is_expert: + if not is_gtp and eps_rank != 0: + continue + else: + if not is_gtp and ps_rank != 0: + continue + + # Route to the correct bucket. + if is_expert: assert param_is_not_shared(param) param = to_local_if_dtensor(param) - if args.bf16: - if not force_create_fp32_copy and hasattr(param, 'main_param'): - if getattr(param, 'main_param_sharded', False): - if param.main_param is not None: - sharded_params_data.append(param.main_param) - else: - moe_params_data.append(param.main_param) - else: - # Fallback to original logic of making a fp32 copy of the - # parameter if `.main_param` attribute is not available. - moe_params_data.append(param.data.float()) + data, is_sharded = _get_param_data(param, force_create_fp32_copy, args.bf16) + if data is None: + continue + if is_gtp: + (moe_gtp_sharded_params_data if is_sharded else moe_gtp_params_data).append(data) else: - moe_params_data.append(param.data) + (moe_sharded_params_data if is_sharded else moe_params_data).append(data) else: if param_is_not_shared(param): param = to_local_if_dtensor(param) - if args.bf16: - if not force_create_fp32_copy and hasattr(param, 'main_param'): - if getattr(param, 'main_param_sharded', False): - if param.main_param is not None: - sharded_params_data.append(param.main_param) - else: - params_data.append(param.main_param) - else: - # Fallback to original logic of making a fp32 copy of the - # parameter if `.main_param` attribute is not available. - params_data.append(param.data.float()) + data, is_sharded = _get_param_data(param, force_create_fp32_copy, args.bf16) + if data is None: + continue + if is_gtp: + (gtp_sharded_params_data if is_sharded else gtp_params_data).append(data) else: - params_data.append(param.data) - - # Calculate norm. - dummy_overflow_buf = torch.tensor([0], dtype=torch.int, device='cuda') - if len(params_data) > 0: - norm, _ = multi_tensor_applier( - multi_tensor_l2norm, dummy_overflow_buf, [params_data], False # no per-parameter norm. - ) - norm_2 = norm * norm - else: - norm_2 = torch.zeros((1,), dtype=torch.float32, device='cuda') - - if data_parallel_group is not None: - torch.distributed.all_reduce( - norm_2, op=torch.distributed.ReduceOp.SUM, group=data_parallel_group - ) - - # Add norm contribution from params with sharded main_params. These norms need to be - # accumulated across the DP group since the main parameters are sharded because - # of distributed optimizer. - if len(sharded_params_data) > 0: - dummy_overflow_buf = torch.tensor([0], dtype=torch.int, device='cuda') - sharded_norm, _ = multi_tensor_applier( - multi_tensor_l2norm, - dummy_overflow_buf, - [sharded_params_data], - False, # no per-parameter norm. - ) - sharded_norm_2 = sharded_norm * sharded_norm - else: - sharded_norm_2 = torch.zeros((1,), dtype=torch.float32, device='cuda') - # Sum over all DP groups, including CP since distributed optimizer state is - # sharded jointly over DP+CP. - torch.distributed.all_reduce( - sharded_norm_2, - op=torch.distributed.ReduceOp.SUM, - group=mpu.get_data_parallel_group(with_context_parallel=True) - ) - norm_2 += sharded_norm_2 - - # Add norm contribution from expert layers in MoEs. - if len(moe_params_data) > 0: - moe_norm, _ = multi_tensor_applier( - multi_tensor_l2norm, - dummy_overflow_buf, - [moe_params_data], - False, # no per-parameter norm. - ) - moe_norm_2 = moe_norm * moe_norm - - # Account for MoE norm even if current rank doesn't have any expert params to prevent - # hang in models with un-even numbers of MoE layers. - # See details in https://gitlab-master.nvidia.com/ADLR/megatron-lm/-/issues/409 - else: - moe_norm_2 = torch.zeros_like(norm_2) - - # Reduce norm across model parallel groups (dense and expert). - # Dense params should sum across all model-parallel GPUs (tensor + pipeline). + (sharded_params_data if is_sharded else params_data).append(data) + + # --- Compute local norm^2 for each bucket --- + params_norm_2 = _compute_norm_2(params_data) + sharded_norm_2 = _compute_norm_2(sharded_params_data) + gtp_norm_2 = _compute_norm_2(gtp_params_data) + gtp_sharded_norm_2 = _compute_norm_2(gtp_sharded_params_data) + moe_norm_2 = _compute_norm_2(moe_params_data) + moe_sharded_norm_2 = _compute_norm_2(moe_sharded_params_data) + moe_gtp_norm_2 = _compute_norm_2(moe_gtp_params_data) + moe_gtp_sharded_norm_2 = _compute_norm_2(moe_gtp_sharded_params_data) + + def _sum_reduce(tensor, group): + torch.distributed.all_reduce(tensor, op=torch.distributed.ReduceOp.SUM, group=group) + + # --- Sharded optimizer DP reductions (each category uses its own group) --- + _sum_reduce(sharded_norm_2, mpu.get_data_parallel_group(with_context_parallel=True)) + _sum_reduce(gtp_sharded_norm_2, mpu.get_data_parallel_group(with_context_parallel=True, with_gtp=True)) + _sum_reduce(moe_sharded_norm_2, mpu.get_expert_data_parallel_group()) + _sum_reduce(moe_gtp_sharded_norm_2, mpu.get_expert_data_parallel_group(with_gtp=True)) + + # --- Combine dense + GTP norms --- + # model_parallel group = TP×PP×GTP, so GTP reduction is implicit. + norm_2 = params_norm_2 + sharded_norm_2 + gtp_norm_2 + gtp_sharded_norm_2 + + # --- Combine MoE + MoE-GTP norms --- + # expert_model_parallel = TP×EP×PP (does NOT include EPS), so we need + # an explicit EPS reduction for MoE-GTP before the model-parallel reduce. + moe_gtp_combined_norm_2 = moe_gtp_norm_2 + moe_gtp_sharded_norm_2 + _sum_reduce(moe_gtp_combined_norm_2, mpu.get_expert_generalized_tensor_parallel_group()) + moe_total_norm_2 = moe_norm_2 + moe_sharded_norm_2 + moe_gtp_combined_norm_2 + + # --- Model-parallel reductions --- dense_reduce_group = mpu.get_model_parallel_group() - ranks_in_dense_reduce_group = torch.distributed.get_process_group_ranks(dense_reduce_group) - # Expert params should sum across all model-parallel GPUs (expert + tensor + pipeline). expert_reduce_group = mpu.get_expert_tensor_model_pipeline_parallel_group() + ranks_in_dense_reduce_group = torch.distributed.get_process_group_ranks(dense_reduce_group) ranks_in_expert_reduce_group = torch.distributed.get_process_group_ranks(expert_reduce_group) - # If dense and expert reduce groups are the same, sum then reduce. if ranks_in_dense_reduce_group == ranks_in_expert_reduce_group: - norm_2 += moe_norm_2 - torch.distributed.all_reduce( - norm_2, op=torch.distributed.ReduceOp.SUM, group=dense_reduce_group - ) - # If dense and expert reduce groups are different, reduce then sum. + norm_2 += moe_total_norm_2 + _sum_reduce(norm_2, dense_reduce_group) else: - torch.distributed.all_reduce( - norm_2, op=torch.distributed.ReduceOp.SUM, group=dense_reduce_group - ) - torch.distributed.all_reduce( - moe_norm_2, op=torch.distributed.ReduceOp.SUM, group=expert_reduce_group - ) - norm_2 += moe_norm_2 + _sum_reduce(norm_2, dense_reduce_group) + _sum_reduce(moe_total_norm_2, expert_reduce_group) + norm_2 += moe_total_norm_2 return norm_2.item() ** 0.5 diff --git a/tests/unit_tests/dist_checkpointing/test_layer_wise_optimizer.py b/tests/unit_tests/dist_checkpointing/test_layer_wise_optimizer.py index 3f60658a005..40f64fac04b 100644 --- a/tests/unit_tests/dist_checkpointing/test_layer_wise_optimizer.py +++ b/tests/unit_tests/dist_checkpointing/test_layer_wise_optimizer.py @@ -130,7 +130,7 @@ def setup_method(self, method): def teardown_method(self, method): Utils.destroy_model_parallel() - def test_parameter_sharding(self): + def test_generalized_tensor_parallel(self): """Test that parameters are correctly sharded across DP ranks.""" Utils.initialize_model_parallel(1, 1) diff --git a/tests/unit_tests/generalized_tensor_parallel/__init__.py b/tests/unit_tests/generalized_tensor_parallel/__init__.py new file mode 100644 index 00000000000..e093dccdbb0 --- /dev/null +++ b/tests/unit_tests/generalized_tensor_parallel/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. + +"""GTP unit tests — launched torchrun-native (same as the rest of Megatron's unit tests). + + export TE_PATH=/path/to/TransformerEngine + export PYTHONPATH="${TE_PATH}:${PYTHONPATH}" + torchrun --nproc-per-node 4 -m pytest tests/unit_tests/generalized_tensor_parallel/ -v + +Tests use the torchrun-managed dist group (initialized once per module via +``Utils.initialize_model_parallel``) and build their own GTP subgroups with +``dist.new_group(...)``. Multi-GPU tests skip when the world_size requested by a +test doesn't match what torchrun launched with (all GTP multi-GPU tests need 4). +""" diff --git a/tests/unit_tests/generalized_tensor_parallel/test_gtp.py b/tests/unit_tests/generalized_tensor_parallel/test_gtp.py new file mode 100644 index 00000000000..02b86ae5666 --- /dev/null +++ b/tests/unit_tests/generalized_tensor_parallel/test_gtp.py @@ -0,0 +1,1619 @@ +# Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# See LICENSE for license information. + +"""Unit tests for Generalized Tensor Parallelism (GTP). + +Test groups +----------- +1. TestGTPWeightState – state-machine transitions (single-process) +2. TestGTPWeightCache – coat-check buffer pool (single-process) +3. TestGTPSharding – wrap_module_params_gtp: shard content + padding (multi-GPU) +4. TestWrapModuleParams – wrap_module_params_gtp: param replacement + weight_list (multi-GPU) +5. TestLinearGTP – Linear forward/backward numerical correctness (multi-GPU) +6. TestLayerNormLinearGTP – LayerNormLinear forward/backward smoke test (multi-GPU) +7. TestGroupedLinearGTP – GroupedLinear forward/backward smoke test (multi-GPU) +8. TestGTPPrefetchChain – linked-list next_w/prev_w wiring (multi-GPU) +9. TestGTPWgradRS – wgrad reduce-scatter shape + multi-layer deferred path (multi-GPU) +10. TestGTPMicrobatches – output consistency across microbatches (multi-GPU) +11. TestNVFP4LinearGTP – Linear + NVFP4 recipe: quantized shard setup, fwd/bwd (multi-GPU) +12. TestNVFP4GroupedLinearGTP – GroupedLinear + NVFP4 recipe: coalesced AG + fwd/bwd (multi-GPU) +13. TestMXFP8LinearGTP – Linear + MXFP8 recipe: quantized shard setup, fwd/bwd, padding (multi-GPU) +14. TestGTPConfig – update_config: valid/invalid keys (single-process) +15. TestGTPShardedParamProperties – shape computations, get_padded_shard, _strip_padding (single-process) +16. TestGTPCacheKey – _get_cache_key: expert vs non-expert, fwd vs bwd (single-process) +17. TestGTPCacheRelease – reserve/get/release pool semantics (single-process) +18. TestTagGTPParamsWithNames – _debug_name population on GTPShardedParam (single-process) +19. TestGTPGroupSizeOne – wrap_module_params_gtp no-op when gtp_group.size()==1 (single-process) +20. TestGTPPrefetchDisabled – weight_prefetch=False: single-pass forward still works (multi-GPU) +21. TestFuseWgradAccumulation – fuse_wgrad_accumulation=True: wgrad→main_grad (multi-GPU) +22. TestGTPGradAccumHook – main_grad updated after reduce-scatter backward (multi-GPU) +23. TestWaitAsyncCommsFallback – wait_async_comms(finalize_after_drain=True) inline-accumulation fallback when _wgrad_rs_handle is None (single-process) + +Run via torchrun (matches the rest of Megatron's unit tests): + + torchrun --nproc-per-node 4 -m pytest tests/unit_tests/generalized_tensor_parallel/test_gtp.py -v + +Multi-GPU tests skip when ``torch.distributed.get_world_size()`` doesn't match the required +world size (4 for everything in this file). +""" + +import pytest +import torch +import torch.distributed as dist +import torch.nn as nn +import transformer_engine.pytorch as te +from transformer_engine.common.recipe import NVFP4BlockScaling +from transformer_engine.pytorch import fp8_autocast, is_mxfp8_available, is_nvfp4_available +from transformer_engine.pytorch.quantization import FP8GlobalStateManager +from transformer_engine.pytorch.quantized_tensor import QuantizedTensor + +import megatron.experimental.gtp.generalized_tensor_parallelism as gtp_module +from megatron.experimental.gtp import GTPShardedParam, wrap_module_params_gtp +from megatron.experimental.gtp.generalized_tensor_parallelism import GTPWeightCache, GTPWeightState +from tests.unit_tests.test_utilities import Utils + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture(scope="module", autouse=True) +def _torchrun_dist_init(): + """Initialize the torchrun-managed dist group once per module. + + GTP tests use ``dist.new_group(...)`` to build their own GTP subgroup + within the world that torchrun set up. Each test runs on every torchrun + rank in parallel (standard Mcore convention); ``_run_distributed`` below + only skips when the required world size doesn't match what torchrun + provides. + """ + Utils.initialize_model_parallel() + yield + Utils.destroy_model_parallel() + + +@pytest.fixture(autouse=True) +def reset_fp8_state(): + yield + FP8GlobalStateManager.reset() + + +@pytest.fixture(autouse=True) +def reset_gtp_globals(): + """Reset all GTP mutable class/module-level state between tests.""" + yield + GTPShardedParam._chain_state = {} + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _run_distributed(fn, required_world_size: int, *args) -> None: + """Run ``fn`` on every torchrun rank. + + ``fn(rank, world_size, port, *args)`` matches the pre-existing worker + signature; ``port`` is unused (dist already initialized by torchrun) but + kept so the worker bodies don't need editing. + """ + actual_world_size = torch.distributed.get_world_size() + if actual_world_size != required_world_size: + pytest.skip( + f"Requires world_size={required_world_size}, " + f"got {actual_world_size} (launch with torchrun --nproc-per-node={required_world_size})" + ) + fn(torch.distributed.get_rank(), actual_world_size, None, *args) + + +def _requires_multi_gpu(n: int = 4): + if torch.cuda.device_count() < n: + pytest.skip(f"Requires at least {n} CUDA devices") + + +def _requires_nvfp4(): + if not is_nvfp4_available(): + pytest.skip("NVFP4 not available (requires compute capability >= 10.0)") + + +# --------------------------------------------------------------------------- +# 1. GTPWeightState – state-machine transition tests +# --------------------------------------------------------------------------- + + +class TestGTPWeightState: + + @staticmethod + def _param(): + return GTPShardedParam(torch.zeros(4, 4)) + + def test_full_cycle(self): + p = self._param() + assert p.state == GTPWeightState.NONE + p._set_state(GTPWeightState.ASYNC_WAIT) + p._set_state(GTPWeightState.DATA_READY) + p._set_state(GTPWeightState.NONE) + assert p.state == GTPWeightState.NONE + + def test_sync_path_cycle(self): + """NONE → DATA_READY_SYNC → NONE (sync all-gather path).""" + p = self._param() + p._set_state(GTPWeightState.DATA_READY_SYNC) + p._set_state(GTPWeightState.NONE) + assert p.state == GTPWeightState.NONE + + def test_rs_state_full_cycle(self): + """RS state machine: NONE → ASYNC_WAIT → DATA_READY → NONE.""" + p = self._param() + assert p.rs_state == GTPWeightState.NONE + p._set_rs_state(GTPWeightState.ASYNC_WAIT) + p._set_rs_state(GTPWeightState.DATA_READY) + p._set_rs_state(GTPWeightState.NONE) + assert p.rs_state == GTPWeightState.NONE + + +# --------------------------------------------------------------------------- +# 2. GTPWeightCache – coat-check buffer pool tests +# --------------------------------------------------------------------------- + + +class TestGTPWeightCache: + + class _FakeGroup: + def __init__(self, size=2): + self._size = size + + def size(self): + return self._size + + def rank(self): + return 0 + + def _param(self, shape=(8, 4), gtp_size=2): + p = GTPShardedParam(torch.zeros(*shape)) + p.group = self._FakeGroup(gtp_size) + p.expert_idx = None + p.pad_length = 0 + p._quantizer = None + return p + + def test_reserve_returns_ticket(self): + cache = GTPWeightCache() + p = self._param() + ticket = cache.reserve(p, torch.bfloat16, fwd=True) + assert isinstance(ticket, int) + + def test_reserve_get_roundtrip(self): + cache = GTPWeightCache() + p = self._param() + ticket = cache.reserve(p, torch.bfloat16, fwd=True) + buf = cache.get(ticket) + assert buf is not None + # get() returns same buf on second call (buf cached in slot) + buf2 = cache.get(ticket) + assert buf2 is buf + + def test_buffer_reused_after_release(self): + cache = GTPWeightCache() + p = self._param() + t1 = cache.reserve(p, torch.bfloat16, fwd=True) + buf1 = cache.get(t1) + cache.release(t1) + # Reserve a new ticket, buf should come from pool + t2 = cache.reserve(p, torch.bfloat16, fwd=True) + buf2 = cache.get(t2) + assert buf1 is buf2, "Buffer should be reused from pool after release" + cache.release(t2) + + def test_two_simultaneous_reserves_are_distinct(self): + cache = GTPWeightCache() + p = self._param() + t1 = cache.reserve(p, torch.bfloat16, fwd=True) + buf1 = cache.get(t1) + t2 = cache.reserve(p, torch.bfloat16, fwd=True) + buf2 = cache.get(t2) + assert buf1 is not buf2, "Concurrent reserves must get distinct buffers" + + def test_tickets_are_unique(self): + """Each reserve() call returns a new unique ticket.""" + cache = GTPWeightCache() + p = self._param() + t1 = cache.reserve(p, torch.bfloat16, fwd=True) + t2 = cache.reserve(p, torch.bfloat16, fwd=True) + assert t1 != t2, "Each reserve() must return a unique ticket" + + def test_invalid_ticket_raises(self): + cache = GTPWeightCache() + with pytest.raises(KeyError): + cache.get(9999) + + def test_different_shapes_use_distinct_pool_slots(self): + cache = GTPWeightCache() + p1 = self._param(shape=(8, 4)) + p2 = self._param(shape=(16, 4)) + t1 = cache.reserve(p1, torch.bfloat16, fwd=True) + buf1 = cache.get(t1) + t2 = cache.reserve(p2, torch.bfloat16, fwd=True) + buf2 = cache.get(t2) + assert buf1.shape != buf2.shape + cache.release(t1) + cache.release(t2) + + def test_fwd_bwd_tickets_are_distinct(self): + """fwd=True and fwd=False reserves always receive distinct ticket IDs.""" + cache = GTPWeightCache() + p = self._param() + t_fwd = cache.reserve(p, torch.bfloat16, fwd=True) + t_bwd = cache.reserve(p, torch.bfloat16, fwd=False) + assert t_fwd != t_bwd + + +# --------------------------------------------------------------------------- +# 3. GTP weight sharding: shard content and alignment padding +# --------------------------------------------------------------------------- + + +def _worker_sharding_aligned(rank, world_size, port): + K, M = world_size * 32, 16 # K divisible by 16*world_size → no padding + full_weight = torch.arange(K * M, dtype=torch.float32).reshape(K, M).cuda() + dist.broadcast(full_weight, src=0) + + gtp_group = dist.new_group(list(range(world_size))) + mod = nn.Module() + mod.weight = nn.Parameter(full_weight.clone(), requires_grad=False) + wrap_module_params_gtp(mod, ["weight"], gtp_group) + shard = mod.weight + + rows_per_rank = K // world_size + assert shard.shape == (rows_per_rank, M), f"rank {rank}: unexpected shape {shard.shape}" + assert shard.pad_length == 0 + expected = full_weight[rank * rows_per_rank : (rank + 1) * rows_per_rank] + assert torch.allclose(shard.data, expected), f"rank {rank}: shard content mismatch" + + +def _worker_sharding_padding(rank, world_size, port): + alignment = 16 * world_size + K = alignment - 1 # deliberately unaligned + M = 16 + full_weight = torch.ones(K, M, dtype=torch.float32).cuda() + dist.broadcast(full_weight, src=0) + + gtp_group = dist.new_group(list(range(world_size))) + mod = nn.Module() + mod.weight = nn.Parameter(full_weight.clone(), requires_grad=False) + wrap_module_params_gtp(mod, ["weight"], gtp_group) + shard = mod.weight + + padded_K = alignment + rows_per_rank = padded_K // world_size + + if rank == world_size - 1: + assert shard.pad_length > 0 + # The shard tensor holds only the real rows; get_padded_shard() appends zero rows. + padded = shard.get_padded_shard() + assert ( + padded.shape[0] == rows_per_rank + ), f"rank {rank}: expected padded shard {rows_per_rank} rows, got {padded.shape[0]}" + n_real = K - rank * rows_per_rank + assert torch.all(padded[n_real:] == 0), "Padding rows must be zero" + else: + # pad_length is set globally on every rank's shard (slicer attaches the + # global padding amount), so we don't assert anything about it here — + # only the last rank's shard contains the actual padding rows. + assert ( + shard.shape[0] == rows_per_rank + ), f"rank {rank}: expected {rows_per_rank} rows, got {shard.shape[0]}" + + +class TestGTPSharding: + def test_aligned_shard_content(self): + _requires_multi_gpu(4) + _run_distributed(_worker_sharding_aligned, 4) + + def test_unaligned_shard_padding(self): + _requires_multi_gpu(4) + _run_distributed(_worker_sharding_padding, 4) + + +# --------------------------------------------------------------------------- +# 4. wrap_module_params_gtp: param replacement and GroupedLinear weight_list +# --------------------------------------------------------------------------- + + +def _worker_linear_param_replaced(rank, world_size, port): + in_f, out_f = 64, 128 + gtp_group = dist.new_group(list(range(world_size))) + layer = te.Linear( + in_features=in_f, + out_features=out_f, + bias=False, + params_dtype=torch.bfloat16, + device="cuda", + gtp_group=gtp_group, + ) + w = layer.weight + assert isinstance(w, GTPShardedParam), "weight must be GTPShardedParam" + assert w.shape == (out_f // world_size, in_f), f"unexpected shard shape {w.shape}" + assert w.group is gtp_group + + +def _worker_grouped_weight_list(rank, world_size, port): + num_gemms, in_f, out_f = 3, 32, 64 + gtp_group = dist.new_group(list(range(world_size))) + layer = te.GroupedLinear( + num_gemms=num_gemms, + in_features=in_f, + out_features=out_f, + bias=False, + params_dtype=torch.bfloat16, + device="cuda", + gtp_group=gtp_group, + ) + w0 = layer.weight0 + assert isinstance(w0, GTPShardedParam) + assert w0.weight_list is not None + assert len(w0.weight_list) == num_gemms + assert [w.expert_idx for w in w0.weight_list] == list(range(num_gemms)) + + +class TestWrapModuleParams: + def test_linear_weight_replaced(self): + _requires_multi_gpu(4) + _run_distributed(_worker_linear_param_replaced, 4) + + def test_grouped_linear_weight_list(self): + _requires_multi_gpu(4) + _run_distributed(_worker_grouped_weight_list, 4) + + +# --------------------------------------------------------------------------- +# 5. Linear forward/backward numerical correctness +# --------------------------------------------------------------------------- + + +def _worker_linear_correctness(rank, world_size, port): + """GTP output == (all-gathered weight) @ input, and dX matches.""" + torch.manual_seed(0) + batch, in_f, out_f = 16, 64, 128 # out_f % (16*world_size)==0 → no padding + dtype = torch.bfloat16 + gtp_group = dist.new_group(list(range(world_size))) + + layer = te.Linear( + in_features=in_f, + out_features=out_f, + bias=False, + params_dtype=dtype, + device="cuda", + gtp_group=gtp_group, + ) + + # Reconstruct full weight from shards (all-gather) + shard = layer.weight.data.clone() + all_shards = [torch.zeros_like(shard) for _ in range(world_size)] + dist.all_gather(all_shards, shard, group=gtp_group) + full_weight = torch.cat(all_shards, dim=0).float()[:out_f] # strip any padding + + # Shared input across ranks + inp = torch.randn(batch, in_f, dtype=dtype, device="cuda") + dist.broadcast(inp, src=0) + + inp_gtp = inp.clone().requires_grad_(True) + inp_ref = inp.clone().requires_grad_(True) + + # GTP forward + out_gtp = layer(inp_gtp, is_first_microbatch=True) + + # Reference forward + out_ref = inp_ref.float() @ full_weight.T + out_ref = out_ref.to(dtype) + + assert out_gtp.shape == out_ref.shape, f"Shape mismatch {out_gtp.shape} vs {out_ref.shape}" + assert torch.allclose( + out_gtp.float(), out_ref.float(), atol=0.1, rtol=0.1 + ), f"Output mismatch max_diff={(out_gtp.float()-out_ref.float()).abs().max():.4f}" + + # wgrad RS path always accumulates into main_grad; allocate before backward. + layer.weight.main_grad = torch.zeros(layer.weight.shape, dtype=dtype, device="cuda") + + # Backward: compare input gradient + grad_out = torch.randn_like(out_gtp) + dist.broadcast(grad_out, src=0) + out_gtp.backward(grad_out) + out_ref.backward(grad_out.float()) + + assert inp_gtp.grad is not None + assert torch.allclose( + inp_gtp.grad.float(), inp_ref.grad.float(), atol=0.1, rtol=0.1 + ), f"dX mismatch max_diff={(inp_gtp.grad.float()-inp_ref.grad.float()).abs().max():.4f}" + + +class TestLinearGTP: + def test_forward_backward_correctness(self): + _requires_multi_gpu(4) + _run_distributed(_worker_linear_correctness, 4) + + +# --------------------------------------------------------------------------- +# 6. LayerNormLinear forward/backward smoke test +# --------------------------------------------------------------------------- + + +def _worker_layernorm_linear(rank, world_size, port): + torch.manual_seed(0) + seq, batch, in_f, out_f = 4, 2, 64, 128 + dtype = torch.bfloat16 + gtp_group = dist.new_group(list(range(world_size))) + + layer = te.LayerNormLinear( + in_features=in_f, + out_features=out_f, + bias=False, + params_dtype=dtype, + device="cuda", + gtp_group=gtp_group, + ) + assert isinstance(layer.weight, GTPShardedParam) + + inp = torch.randn(seq, batch, in_f, dtype=dtype, device="cuda", requires_grad=True) + dist.broadcast(inp, src=0) + + out = layer(inp, is_first_microbatch=True) + assert out.shape == (seq, batch, out_f), f"unexpected output shape {out.shape}" + + layer.weight.main_grad = torch.zeros(layer.weight.shape, dtype=dtype, device="cuda") + out.sum().backward() + assert inp.grad is not None and inp.grad.shape == inp.shape + + +class TestLayerNormLinearGTP: + def test_forward_backward(self): + _requires_multi_gpu(4) + _run_distributed(_worker_layernorm_linear, 4) + + +# --------------------------------------------------------------------------- +# 7. GroupedLinear forward/backward smoke test +# --------------------------------------------------------------------------- + + +def _worker_grouped_linear(rank, world_size, port, num_gemms): + torch.manual_seed(0) + in_f, out_f, total_tokens = 32, 64, num_gemms * 4 + dtype = torch.bfloat16 + gtp_group = dist.new_group(list(range(world_size))) + + layer = te.GroupedLinear( + num_gemms=num_gemms, + in_features=in_f, + out_features=out_f, + bias=False, + params_dtype=dtype, + device="cuda", + gtp_group=gtp_group, + ) + assert isinstance(layer.weight0, GTPShardedParam) + + m_splits = [total_tokens // num_gemms] * num_gemms + m_splits[-1] += total_tokens - sum(m_splits) + + inp = torch.randn(total_tokens, in_f, dtype=dtype, device="cuda", requires_grad=True) + dist.broadcast(inp, src=0) + + out = layer(inp, m_splits=m_splits, is_first_microbatch=True) + assert out.shape == (total_tokens, out_f), f"unexpected output shape {out.shape}" + + for i in range(num_gemms): + w = getattr(layer, f"weight{i}") + w.main_grad = torch.zeros(w.shape, dtype=dtype, device="cuda") + out.sum().backward() + assert inp.grad is not None and inp.grad.shape == inp.shape + + +class TestGroupedLinearGTP: + @pytest.mark.parametrize("num_gemms", [2, 4]) + def test_forward_backward(self, num_gemms): + _requires_multi_gpu(4) + _run_distributed(_worker_grouped_linear, 4, num_gemms) + + +# --------------------------------------------------------------------------- +# 8. Prefetch chain: next_w / prev_w wiring after first forward pass +# --------------------------------------------------------------------------- + + +def _worker_chain_wired(rank, world_size, port): + torch.manual_seed(0) + in_f, out_f = 32, 64 + dtype = torch.bfloat16 + gtp_group = dist.new_group(list(range(world_size))) + + l0 = te.Linear( + in_features=in_f, + out_features=out_f, + bias=False, + params_dtype=dtype, + device="cuda", + gtp_group=gtp_group, + ) + l1 = te.Linear( + in_features=in_f, + out_features=out_f, + bias=False, + params_dtype=dtype, + device="cuda", + gtp_group=gtp_group, + ) + + inp = torch.randn(4, in_f, dtype=dtype, device="cuda") + dist.broadcast(inp, src=0) + + # First forward pass builds the linked list + l0(inp, is_first_microbatch=True) + l1(inp, is_first_microbatch=True) + + w0, w1 = l0.weight, l1.weight + assert w0.next_w is w1, "w0.next_w should point to w1" + assert w1.prev_w is w0, "w1.prev_w should point back to w0" + assert w1.next_w is None + assert w0.prev_w is None + + +def _worker_chain_async_prefetch(rank, world_size, port): + """On the second forward pass, w1 should be in DATA_READY before its forward runs.""" + torch.manual_seed(0) + in_f, out_f = 32, 64 + dtype = torch.bfloat16 + gtp_group = dist.new_group(list(range(world_size))) + + l0 = te.Linear( + in_features=in_f, + out_features=out_f, + bias=False, + params_dtype=dtype, + device="cuda", + gtp_group=gtp_group, + ) + l1 = te.Linear( + in_features=in_f, + out_features=out_f, + bias=False, + params_dtype=dtype, + device="cuda", + gtp_group=gtp_group, + ) + + inp = torch.randn(4, in_f, dtype=dtype, device="cuda") + dist.broadcast(inp, src=0) + + # First pass builds chain, second pass uses async prefetch + for _ in range(2): + out = l0(inp, is_first_microbatch=True) + l1(inp, is_first_microbatch=True) + assert torch.isfinite(out).all(), "Non-finite output on second pass" + + +class TestGTPPrefetchChain: + def test_chain_wired_after_first_pass(self): + _requires_multi_gpu(4) + _run_distributed(_worker_chain_wired, 4) + + def test_async_prefetch_second_pass(self): + _requires_multi_gpu(4) + _run_distributed(_worker_chain_async_prefetch, 4) + + +# --------------------------------------------------------------------------- +# 9. Wgrad reduce-scatter: shape and deferred async path +# --------------------------------------------------------------------------- + + +def _worker_wgrad_shape(rank, world_size, port): + """After backward, weight.grad shape must match the local shard shape.""" + torch.manual_seed(0) + in_f, out_f = 32, 64 + dtype = torch.bfloat16 + gtp_group = dist.new_group(list(range(world_size))) + + layer = te.Linear( + in_features=in_f, + out_features=out_f, + bias=False, + params_dtype=dtype, + device="cuda", + gtp_group=gtp_group, + fuse_wgrad_accumulation=False, + ) + inp = torch.randn(8, in_f, dtype=dtype, device="cuda", requires_grad=True) + dist.broadcast(inp, src=0) + + layer.weight.main_grad = torch.zeros(layer.weight.shape, dtype=dtype, device="cuda") + layer(inp, is_first_microbatch=True).sum().backward() + + w = layer.weight + if w.grad is not None: + assert w.grad.shape == w.shape, f"wgrad shape {w.grad.shape} != shard shape {w.shape}" + + +def _worker_multilayer_deferred_rs(rank, world_size, port): + """Two-layer GTP: async RS deferred for layer0 (non-last), sync for layer1 (last in bwd).""" + torch.manual_seed(0) + in_f, out_f = 32, 64 + dtype = torch.bfloat16 + gtp_group = dist.new_group(list(range(world_size))) + + l0 = te.Linear( + in_features=in_f, + out_features=out_f, + bias=False, + params_dtype=dtype, + device="cuda", + gtp_group=gtp_group, + ) + l1 = te.Linear( + in_features=in_f, + out_features=out_f, + bias=False, + params_dtype=dtype, + device="cuda", + gtp_group=gtp_group, + ) + + inp = torch.randn(8, in_f, dtype=dtype, device="cuda", requires_grad=True) + dist.broadcast(inp, src=0) + + # wgrad RS path always accumulates into main_grad; allocate before backward. + l0.weight.main_grad = torch.zeros(l0.weight.shape, dtype=dtype, device="cuda") + l1.weight.main_grad = torch.zeros(l1.weight.shape, dtype=dtype, device="cuda") + + out = l0(inp, is_first_microbatch=True) + l1(inp, is_first_microbatch=True) + out.sum().backward() + + # Both weights' main_grad should have been updated + for lyr in [l0, l1]: + w = lyr.weight + assert w.main_grad is not None, f"No main_grad on {lyr.__class__.__name__}.weight" + + +class TestGTPWgradRS: + def test_wgrad_shape_matches_shard(self): + _requires_multi_gpu(4) + _run_distributed(_worker_wgrad_shape, 4) + + def test_multilayer_deferred_rs(self): + _requires_multi_gpu(4) + _run_distributed(_worker_multilayer_deferred_rs, 4) + + +# --------------------------------------------------------------------------- +# 10. Multiple microbatches: output must be consistent when weight unchanged +# --------------------------------------------------------------------------- + + +def _worker_microbatches(rank, world_size, port): + torch.manual_seed(0) + batch, in_f, out_f = 8, 64, 128 + dtype = torch.bfloat16 + gtp_group = dist.new_group(list(range(world_size))) + + layer = te.Linear( + in_features=in_f, + out_features=out_f, + bias=False, + params_dtype=dtype, + device="cuda", + gtp_group=gtp_group, + ) + inp = torch.randn(batch, in_f, dtype=dtype, device="cuda") + dist.broadcast(inp, src=0) + + # First microbatch + out1 = layer(inp, is_first_microbatch=True).detach().clone() + + # Second microbatch with same weight (skip_weight_cast=True path) + out2 = layer(inp, is_first_microbatch=False).detach() + + assert torch.allclose( + out1, out2 + ), f"Microbatch outputs differ; max_diff={(out1-out2).abs().max():.6f}" + + +class TestGTPMicrobatches: + def test_consistent_across_microbatches(self): + _requires_multi_gpu(4) + _run_distributed(_worker_microbatches, 4) + + +# --------------------------------------------------------------------------- +# 11. NVFP4 + GTP: Linear forward/backward, quantized shard setup +# --------------------------------------------------------------------------- + + +def _worker_nvfp4_linear(rank, world_size, port): + """Verify that GTP Linear correctly quantizes, all-gathers, and computes with NVFP4.""" + torch.manual_seed(0) + # batch=32: NVFP4 wgrad GEMM (K=batch) requires K divisible by 32 + batch, in_f, out_f = 32, 64, 128 # out_f % (16*world_size)==0 → no padding + dtype = torch.bfloat16 + gtp_group = dist.new_group(list(range(world_size))) + + layer = te.Linear( + in_features=in_f, + out_features=out_f, + bias=False, + params_dtype=dtype, + device="cuda", + gtp_group=gtp_group, + ) + inp = torch.randn(batch, in_f, dtype=dtype, device="cuda", requires_grad=True) + dist.broadcast(inp, src=0) + + # Forward under NVFP4 recipe – triggers setup() and NVFP4 quantization + recipe = NVFP4BlockScaling() + with fp8_autocast(enabled=True, fp8_recipe=recipe): + out = layer(inp, is_first_microbatch=True) + + # After the first forward pass setup() must have created a quantized shard + w = layer.weight + assert w.quantized is not None, "NVFP4 quantized shard must be set after setup()" + assert isinstance( + w.quantized, QuantizedTensor + ), f"weight.quantized should be QuantizedTensor, got {type(w.quantized)}" + + assert out.shape == (batch, out_f), f"unexpected output shape {out.shape}" + assert torch.isfinite(out).all(), "NVFP4 GTP output has non-finite values" + + # Second microbatch reuses cached quantized weight (skip_weight_cast path) + with fp8_autocast(enabled=True, fp8_recipe=recipe): + out2 = layer(inp.detach(), is_first_microbatch=False) + assert torch.isfinite(out2).all(), "NVFP4 GTP second-microbatch output has non-finite values" + + +def _worker_nvfp4_linear_unaligned(rank, world_size, port): + """Verify NVFP4 GTP when out_features is not aligned to 16*world_size (padding path). + + out_f is chosen to be divisible by 8 (satisfies NVFP4 GEMM alignment) but not by + 16*world_size (so padding is needed). The last GTP rank receives a shard that is + zero-padded to reach the shard_size boundary. After all-gather, _strip_padding + removes the padded rows from the gathered weight before the GEMM, so the output + has the original out_f columns. + """ + torch.manual_seed(0) + alignment = 16 * world_size # 64 for world_size=4 + # Choose out_f divisible by 8 (NVFP4 GEMM constraint) but not by 64 (GTP alignment). + # With out_f=56: pad_length=8, shard_size=16, last rank gets 8 rows padded to 16. + out_f = alignment - 8 # 56 for world_size=4 + in_f = 64 + batch = 32 + dtype = torch.bfloat16 + gtp_group = dist.new_group(list(range(world_size))) + + layer = te.Linear( + in_features=in_f, + out_features=out_f, + bias=False, + params_dtype=dtype, + device="cuda", + gtp_group=gtp_group, + ) + inp = torch.randn(batch, in_f, dtype=dtype, device="cuda", requires_grad=True) + dist.broadcast(inp, src=0) + + with fp8_autocast(enabled=True, fp8_recipe=NVFP4BlockScaling()): + out = layer(inp, is_first_microbatch=True) + + # After _strip_padding removes the padded rows, output has out_f (not padded) cols. + assert out.shape == (batch, out_f), f"unexpected output shape {out.shape}" + assert torch.isfinite(out).all(), "NVFP4 GTP (unaligned) output has non-finite values" + + +class TestNVFP4LinearGTP: + def test_forward_backward(self): + _requires_nvfp4() + _requires_multi_gpu(4) + _run_distributed(_worker_nvfp4_linear, 4) + + def test_forward_unaligned_padding(self): + _requires_nvfp4() + _requires_multi_gpu(4) + _run_distributed(_worker_nvfp4_linear_unaligned, 4) + + +# --------------------------------------------------------------------------- +# 12. NVFP4 + GTP: GroupedLinear forward/backward (coalesced batched all-gather) +# --------------------------------------------------------------------------- + + +def _worker_nvfp4_grouped_linear(rank, world_size, port, num_gemms): + """Verify NVFP4 GTP with GroupedLinear (uses grouped_gather_along_first_dim).""" + torch.manual_seed(0) + # NVFP4 split_quantize constraints: in_f % 128 == 0, tokens_per_expert % 64 == 0 + # (Hadamard transform requirement), and K=tokens_per_expert % 32 == 0 for wgrad. + in_f, out_f, total_tokens = 128, 256, num_gemms * 64 + dtype = torch.bfloat16 + gtp_group = dist.new_group(list(range(world_size))) + + layer = te.GroupedLinear( + num_gemms=num_gemms, + in_features=in_f, + out_features=out_f, + bias=False, + params_dtype=dtype, + device="cuda", + gtp_group=gtp_group, + ) + assert isinstance(layer.weight0, GTPShardedParam) + + m_splits = [total_tokens // num_gemms] * num_gemms + m_splits[-1] += total_tokens - sum(m_splits) + + inp = torch.randn(total_tokens, in_f, dtype=dtype, device="cuda", requires_grad=True) + dist.broadcast(inp, src=0) + + with fp8_autocast(enabled=True, fp8_recipe=NVFP4BlockScaling()): + out = layer(inp, m_splits=m_splits, is_first_microbatch=True) + + assert out.shape == (total_tokens, out_f), f"unexpected output shape {out.shape}" + assert torch.isfinite(out).all(), "NVFP4 GroupedLinear GTP output has non-finite values" + + # All expert weight shards should be quantized after setup() + for i in range(num_gemms): + name = f"weight{i}" + w = getattr(layer, name) + assert isinstance(w, GTPShardedParam) + assert w.quantized is not None, f"{name}.quantized not set after NVFP4 setup()" + assert isinstance( + w.quantized, QuantizedTensor + ), f"{name}.quantized should be QuantizedTensor, got {type(w.quantized)}" + + for i in range(num_gemms): + w = getattr(layer, f"weight{i}") + w.main_grad = torch.zeros(w.shape, dtype=dtype, device="cuda") + out.sum().backward() + assert inp.grad is not None and inp.grad.shape == inp.shape + + +class TestNVFP4GroupedLinearGTP: + @pytest.mark.parametrize("num_gemms", [2, 4]) + def test_forward_backward(self, num_gemms): + _requires_nvfp4() + _requires_multi_gpu(4) + _run_distributed(_worker_nvfp4_grouped_linear, 4, num_gemms) + + +# --------------------------------------------------------------------------- +# 13. MXFP8 + GTP: Linear forward/backward, quantized shard setup +# --------------------------------------------------------------------------- + + +def _worker_mxfp8_linear(rank, world_size, port): + """Verify that GTP Linear correctly quantizes, all-gathers, and computes with MXFP8.""" + from transformer_engine.common.recipe import MXFP8BlockScaling + + torch.manual_seed(0) + # batch=32: MXFP8 wgrad GEMM (K=batch) requires K divisible by MXFP8_BLOCK_SCALING_SIZE=32 + batch, in_f, out_f = 32, 64, 128 # out_f % (16*world_size)==0 → no padding + dtype = torch.bfloat16 + gtp_group = dist.new_group(list(range(world_size))) + + layer = te.Linear( + in_features=in_f, + out_features=out_f, + bias=False, + params_dtype=dtype, + device="cuda", + gtp_group=gtp_group, + ) + inp = torch.randn(batch, in_f, dtype=dtype, device="cuda", requires_grad=True) + dist.broadcast(inp, src=0) + + # Forward under MXFP8 recipe – triggers setup() and MXFP8 quantization + recipe = MXFP8BlockScaling() + with fp8_autocast(enabled=True, fp8_recipe=recipe): + out = layer(inp, is_first_microbatch=True) + + # After the first forward pass setup() must have created a quantized shard + w = layer.weight + assert w.quantized is not None, "MXFP8 quantized shard must be set after setup()" + assert isinstance( + w.quantized, QuantizedTensor + ), f"weight.quantized should be QuantizedTensor, got {type(w.quantized)}" + + assert out.shape == (batch, out_f), f"unexpected output shape {out.shape}" + assert torch.isfinite(out).all(), "MXFP8 GTP output has non-finite values" + + # Backward should complete without error + layer.weight.main_grad = torch.zeros(layer.weight.shape, dtype=dtype, device="cuda") + out.sum().backward() + assert inp.grad is not None + assert inp.grad.shape == inp.shape + + # Second microbatch reuses cached quantized weight (skip_weight_cast path) + with fp8_autocast(enabled=True, fp8_recipe=recipe): + out2 = layer(inp.detach(), is_first_microbatch=False) + assert torch.isfinite(out2).all(), "MXFP8 GTP second-microbatch output has non-finite values" + + +def _worker_mxfp8_linear_unaligned(rank, world_size, port): + """Verify MXFP8 GTP when out_features is not aligned to 16*world_size (padding path). + + MXFP8 requires tensor dims divisible by 32, so shard_size (= M_padded / world_size) + must be a multiple of 32. With world_size=4 this requires M_padded % 128 == 0. + out_f=120 gives M_padded=128, shard_size=32 (32 % 32 == 0). The last rank has + 24 real rows zero-padded to 32. After all-gather, _strip_padding removes the padded + rows before the GEMM, so the output has the original out_f columns. + """ + from transformer_engine.common.recipe import MXFP8BlockScaling + + torch.manual_seed(0) + # out_f=120: M_padded=128, shard_size=32, last rank has 24 rows padded to 32. + # 120 is divisible by 8 (GEMM constraint), not by 64 (GTP alignment → padding needed). + out_f = 120 + in_f = 64 + batch = 32 + dtype = torch.bfloat16 + gtp_group = dist.new_group(list(range(world_size))) + + layer = te.Linear( + in_features=in_f, + out_features=out_f, + bias=False, + params_dtype=dtype, + device="cuda", + gtp_group=gtp_group, + ) + inp = torch.randn(batch, in_f, dtype=dtype, device="cuda", requires_grad=True) + dist.broadcast(inp, src=0) + + with fp8_autocast(enabled=True, fp8_recipe=MXFP8BlockScaling()): + out = layer(inp, is_first_microbatch=True) + + # After _strip_padding removes the padded rows, output has out_f (not padded) cols. + assert out.shape == (batch, out_f), f"unexpected output shape {out.shape}" + assert torch.isfinite(out).all(), "MXFP8 GTP (unaligned) output has non-finite values" + + +def _requires_mxfp8(): + available, reason = is_mxfp8_available(return_reason=True) + if not available: + pytest.skip(f"MXFP8 not available: {reason}") + + +class TestMXFP8LinearGTP: + def test_forward_backward(self): + _requires_mxfp8() + _requires_multi_gpu(4) + _run_distributed(_worker_mxfp8_linear, 4) + + def test_forward_unaligned_padding(self): + _requires_mxfp8() + _requires_multi_gpu(4) + _run_distributed(_worker_mxfp8_linear_unaligned, 4) + + +# --------------------------------------------------------------------------- +# 14. GTPConfig / update_config +# --------------------------------------------------------------------------- + + +class TestGTPConfig: + + def test_update_pad_for_alignment(self): + original = gtp_module.GTP_CONFIG.pad_for_alignment + try: + gtp_module.update_config(pad_for_alignment=8) + assert gtp_module.GTP_CONFIG.pad_for_alignment == 8 + finally: + gtp_module.update_config(pad_for_alignment=original) + + def test_update_weight_prefetch(self): + original = gtp_module.GTP_CONFIG.weight_prefetch + try: + gtp_module.update_config(weight_prefetch=False) + assert gtp_module.GTP_CONFIG.weight_prefetch is False + finally: + gtp_module.update_config(weight_prefetch=original) + + def test_invalid_key_raises(self): + with pytest.raises(ValueError, match="Unknown GTP config option"): + gtp_module.update_config(nonexistent_key=123) + + +# --------------------------------------------------------------------------- +# 15. GTPShardedParam properties – shape computations and padding +# --------------------------------------------------------------------------- + + +class TestGTPShardedParamProperties: + + class _FakeGroup: + def __init__(self, size=4, rank=0): + self._size = size + self._rank = rank + + def size(self): + return self._size + + def rank(self): + return self._rank + + def _make_param(self, shape, pad_length=0, group_size=4, group_rank=0): + p = GTPShardedParam(torch.zeros(*shape)) + p.group = self._FakeGroup(size=group_size, rank=group_rank) + p.pad_length = pad_length + p.expert_idx = None + return p + + # --- _unsharded_shape_padded --- + + def test_unsharded_shape_padded_no_padding(self): + # shape=(8, 4), group_size=4 → 8*4=32 rows, no padding + p = self._make_param((8, 4), pad_length=0, group_size=4, group_rank=2) + assert p._unsharded_shape_padded == (32, 4) + + def test_unsharded_shape_padded_last_rank_with_padding(self): + # Local shard includes its slice of padding rows: 16 rows per rank, + # pad_length=1 marks 1 of those (on the last rank) as pad → padded + # unsharded shape = 16 * 4 = 64. pad_length is global metadata, the + # same value lives on every rank's shard. + p = self._make_param((16, 32), pad_length=1, group_size=4, group_rank=3) + assert p._unsharded_shape_padded == (64, 32) + + def test_unsharded_shape_padded_non_last_rank_with_padding(self): + # Non-last rank: pad_length is the same global value, same formula. + p = self._make_param((16, 32), pad_length=1, group_size=4, group_rank=0) + assert p._unsharded_shape_padded == (64, 32) + + # --- _unsharded_shape --- + + def test_unsharded_shape_no_padding(self): + p = self._make_param((8, 4), pad_length=0, group_size=4, group_rank=0) + assert p._unsharded_shape == (32, 4) + + def test_unsharded_shape_strips_padding(self): + # Local 16 rows × 4 ranks = 64 padded; pad_length=1 → unsharded = 63. + p = self._make_param((16, 32), pad_length=1, group_size=4, group_rank=3) + assert p._unsharded_shape == (63, 32) + + # --- get_padded_shard --- + + def test_get_padded_shard_identity_when_no_padding(self): + p = self._make_param((6, 4), pad_length=0) + result = p.get_padded_shard() + assert result is p # identity – no copy needed + + def test_get_padded_shard_identity_non_last_rank(self): + # pad_length > 0 but not the padded last rank → no padding added + p = self._make_param((16, 4), pad_length=1, group_size=4, group_rank=0) + result = p.get_padded_shard() + assert result is p + + def test_get_padded_shard_identity_last_rank(self): + # Under current semantics the local shard already contains its share + # of padding (slicer F.pads with zeros before slicing), so + # get_padded_shard() is the identity on the last rank too. + p = self._make_param((8, 4), pad_length=2, group_size=4, group_rank=3) + assert p.get_padded_shard() is p + + # --- _strip_padding --- + + def test_strip_padding_identity_no_padding(self): + p = self._make_param((8, 4), pad_length=0) + t = torch.randn(32, 4) + assert p._strip_padding(t) is t + + def test_strip_padding_plain_tensor(self): + # Gathered weight [32, 4] with pad_length=1 → strip 1 row → [31, 4] + p = self._make_param((7, 4), pad_length=1, group_size=4, group_rank=0) + t = torch.randn(32, 4) + result = p._strip_padding(t) + assert result.shape == (31, 4) + assert torch.equal(result, t[:-1]) + + def test_strip_padding_multi_row(self): + # pad_length=4 strips 4 rows + p = self._make_param((12, 8), pad_length=4, group_size=4, group_rank=0) + t = torch.ones(64, 8) + result = p._strip_padding(t) + assert result.shape == (60, 8) + + +# --------------------------------------------------------------------------- +# 16. _get_cache_key – expert vs non-expert, fwd vs bwd +# --------------------------------------------------------------------------- + + +class TestGTPCacheKey: + + class _FakeGroup: + def size(self): + return 4 + + def rank(self): + return 0 + + def _param(self, shape=(16, 32), expert_idx=None): + p = GTPShardedParam(torch.zeros(*shape)) + p.group = self._FakeGroup() + p.expert_idx = expert_idx + p.pad_length = 0 + return p + + def test_non_expert_key_same_for_fwd_bwd(self): + """Non-routed params produce the same cache key for fwd and bwd.""" + p = self._param(expert_idx=None) + assert p._get_cache_key(torch.bfloat16, fwd=True, reduce_scatter=False) == p._get_cache_key( + torch.bfloat16, fwd=False, reduce_scatter=False + ) + + def test_expert_key_differs_fwd_bwd(self): + """For quantized (non-torch.dtype) recipes, expert fwd vs bwd keys differ.""" + p = self._param(expert_idx=0) + # _get_cache_key differentiates fwd/bwd only for non-torch.dtype objects + # (e.g. quantized recipe dtype descriptors). Use a mock to trigger that path. + mock_dtype = "fp8" + assert p._get_cache_key(mock_dtype, fwd=True, reduce_scatter=False) != p._get_cache_key( + mock_dtype, fwd=False, reduce_scatter=False + ) + + def test_different_expert_idx_different_keys(self): + """Two experts with same shape but different indices get distinct keys.""" + p0 = self._param(expert_idx=0) + p1 = self._param(expert_idx=1) + assert p0._get_cache_key( + torch.bfloat16, fwd=True, reduce_scatter=False + ) != p1._get_cache_key(torch.bfloat16, fwd=True, reduce_scatter=False) + + def test_same_expert_idx_same_key(self): + """Same-shaped experts with the same idx share a cache key (cross-layer buffer reuse).""" + p_l0 = self._param(expert_idx=0) + p_l1 = self._param(expert_idx=0) + assert p_l0._get_cache_key( + torch.bfloat16, fwd=True, reduce_scatter=False + ) == p_l1._get_cache_key(torch.bfloat16, fwd=True, reduce_scatter=False) + + def test_different_dtypes_different_keys(self): + p = self._param() + assert p._get_cache_key(torch.bfloat16, fwd=True, reduce_scatter=False) != p._get_cache_key( + torch.float32, fwd=True, reduce_scatter=False + ) + + def test_rs_key_differs_from_ag_key(self): + """reduce_scatter=True key must differ from reduce_scatter=False key.""" + p = self._param() + assert p._get_cache_key(torch.bfloat16, fwd=True, reduce_scatter=False) != p._get_cache_key( + torch.bfloat16, fwd=True, reduce_scatter=True + ) + + +# --------------------------------------------------------------------------- +# 17. GTPWeightCache.take() deferred vs get() immediate pool return +# --------------------------------------------------------------------------- + + +class TestGTPCacheRelease: + """Tests for GTPWeightCache reserve/get/release semantics.""" + + class _FakeGroup: + def size(self): + return 2 + + def rank(self): + return 0 + + def _param(self, shape=(8, 4)): + p = GTPShardedParam(torch.zeros(*shape)) + p.group = self._FakeGroup() + p.expert_idx = None + p.pad_length = 0 + p._quantizer = None + return p + + def test_release_returns_buffer_to_pool(self): + """release() puts the buffer back so the next reserve+get reuses it.""" + cache = GTPWeightCache() + p = self._param() + t1 = cache.reserve(p, torch.bfloat16, fwd=True) + buf1 = cache.get(t1) + cache.release(t1) + # New ticket should pop buf1 from pool + t2 = cache.reserve(p, torch.bfloat16, fwd=True) + buf2 = cache.get(t2) + assert buf2 is buf1, "Buffer should be reused after release()" + cache.release(t2) + + def test_without_release_pool_stays_empty(self): + """Without release(), subsequent reserves allocate fresh buffers.""" + cache = GTPWeightCache() + p = self._param() + t1 = cache.reserve(p, torch.bfloat16, fwd=True) + buf1 = cache.get(t1) + # Do NOT release t1 — pool stays empty + t2 = cache.reserve(p, torch.bfloat16, fwd=True) + buf2 = cache.get(t2) + assert buf2 is not buf1, "Without release, a fresh buffer must be allocated" + + def test_get_same_ticket_returns_same_buf(self): + """get() is idempotent — calling it twice returns the same buffer.""" + cache = GTPWeightCache() + p = self._param() + t = cache.reserve(p, torch.bfloat16, fwd=True) + buf_a = cache.get(t) + buf_b = cache.get(t) + assert buf_a is buf_b + cache.release(t) + + def test_release_invalid_ticket_raises(self): + cache = GTPWeightCache() + with pytest.raises(KeyError): + cache.release(9999) + + +# --------------------------------------------------------------------------- +# 18. tag_gtp_params_with_names – _debug_name population +# --------------------------------------------------------------------------- + + +class TestTagGTPParamsWithNames: + + def test_debug_name_populated_for_gtp_param(self): + """GTPShardedParam._debug_name is set to the dotted parameter path.""" + + class _FakeGroup: + def size(self): + return 1 + + def rank(self): + return 0 + + model = nn.Linear(4, 8, bias=False) + w = GTPShardedParam(torch.randn(8, 4)) + w.group = _FakeGroup() + model._parameters["weight"] = w + + gtp_module.tag_gtp_params_with_names(model) + assert w._debug_name == "weight", f"Expected 'weight', got '{w._debug_name}'" + + def test_nested_module_debug_name(self): + """Nested module produces a dotted debug name.""" + + class _FakeGroup: + def size(self): + return 1 + + def rank(self): + return 0 + + outer = nn.Sequential(nn.Linear(4, 8, bias=False)) + w = GTPShardedParam(torch.randn(8, 4)) + w.group = _FakeGroup() + outer._modules["0"]._parameters["weight"] = w + + gtp_module.tag_gtp_params_with_names(outer) + assert w._debug_name == "0.weight", f"Expected '0.weight', got '{w._debug_name}'" + + def test_non_gtp_params_are_skipped(self): + """Plain nn.Parameter instances are silently ignored.""" + model = nn.Linear(4, 8) + gtp_module.tag_gtp_params_with_names(model) # must not raise + + +# --------------------------------------------------------------------------- +# 19. wrap_module_params_gtp is a no-op when gtp_group.size() == 1 +# --------------------------------------------------------------------------- + + +class TestGTPGroupSizeOne: + + class _SingletonGroup: + def size(self): + return 1 + + def rank(self): + return 0 + + def test_no_sharding_when_gtp_size_one(self): + """wrap_module_params_gtp must be a no-op for a singleton GTP group.""" + mod = nn.Linear(32, 64, bias=False) + original_weight = mod.weight + wrap_module_params_gtp(mod, ["weight"], self._SingletonGroup()) + assert ( + mod.weight is original_weight + ), "gtp_group.size()==1 should leave parameters unchanged" + assert not isinstance(mod.weight, GTPShardedParam) + + +# --------------------------------------------------------------------------- +# 21. weight_prefetch=False: forward still produces correct output +# --------------------------------------------------------------------------- + + +def _worker_prefetch_disabled(rank, world_size, port): + torch.manual_seed(0) + in_f, out_f = 32, 64 + dtype = torch.bfloat16 + gtp_group = dist.new_group(list(range(world_size))) + + gtp_module.update_config(weight_prefetch=False) + try: + l0 = te.Linear( + in_features=in_f, + out_features=out_f, + bias=False, + params_dtype=dtype, + device="cuda", + gtp_group=gtp_group, + ) + l1 = te.Linear( + in_features=in_f, + out_features=out_f, + bias=False, + params_dtype=dtype, + device="cuda", + gtp_group=gtp_group, + ) + + inp = torch.randn(4, in_f, dtype=dtype, device="cuda") + dist.broadcast(inp, src=0) + + # Single forward pass: builds chain and verifies output is correct + out = l0(inp, is_first_microbatch=True) + l1(inp, is_first_microbatch=True) + + # Chain should still be wired even with prefetch disabled + assert l0.weight.next_w is l1.weight + assert torch.isfinite(out).all(), "Non-finite output with prefetch disabled" + finally: + gtp_module.update_config(weight_prefetch=True) + + +class TestGTPPrefetchDisabled: + def test_forward_works_without_prefetch(self): + _requires_multi_gpu(4) + _run_distributed(_worker_prefetch_disabled, 4) + + +# --------------------------------------------------------------------------- +# 22. fuse_wgrad_accumulation=True: wgrad is accumulated into main_grad +# --------------------------------------------------------------------------- + + +def _worker_fuse_wgrad(rank, world_size, port): + torch.manual_seed(0) + in_f, out_f = 32, 128 # out_f % (16*world_size)==0, no padding + dtype = torch.bfloat16 + gtp_group = dist.new_group(list(range(world_size))) + + layer = te.Linear( + in_features=in_f, + out_features=out_f, + bias=False, + params_dtype=dtype, + device="cuda", + gtp_group=gtp_group, + fuse_wgrad_accumulation=True, + ) + + # Allocate main_grad on the local shard shape + w = layer.weight + w.main_grad = torch.zeros(w.shape, dtype=dtype, device="cuda") + + inp = torch.randn(8, in_f, dtype=dtype, device="cuda", requires_grad=True) + dist.broadcast(inp, src=0) + + layer(inp, is_first_microbatch=True).sum().backward() + + # With fused accumulation, wgrad was added into main_grad + assert torch.any( + w.main_grad != 0 + ), "main_grad should have been updated by fused wgrad accumulation" + + +class TestFuseWgradAccumulation: + def test_wgrad_accumulated_into_main_grad(self): + _requires_multi_gpu(4) + _run_distributed(_worker_fuse_wgrad, 4) + + +# --------------------------------------------------------------------------- +# 23. _grad_accum_hook is called after reduce-scatter +# --------------------------------------------------------------------------- + + +def _worker_main_grad_updated_after_bwd(rank, world_size, port): + """After backward, the wgrad RS path must have accumulated wgrad into main_grad.""" + torch.manual_seed(0) + in_f, out_f = 32, 64 + dtype = torch.bfloat16 + gtp_group = dist.new_group(list(range(world_size))) + + layer = te.Linear( + in_features=in_f, + out_features=out_f, + bias=False, + params_dtype=dtype, + device="cuda", + gtp_group=gtp_group, + ) + + # wgrad RS path always accumulates into main_grad; allocate before backward. + layer.weight.main_grad = torch.zeros(layer.weight.shape, dtype=dtype, device="cuda") + + inp = torch.randn(8, in_f, dtype=dtype, device="cuda", requires_grad=True) + dist.broadcast(inp, src=0) + layer(inp, is_first_microbatch=True).sum().backward() + + assert torch.any( + layer.weight.main_grad != 0 + ), "main_grad should have been updated after the reduce-scatter accumulation" + + +class TestGTPGradAccumHook: + def test_main_grad_updated_after_backward(self): + _requires_multi_gpu(4) + _run_distributed(_worker_main_grad_updated_after_bwd, 4) + + +# --------------------------------------------------------------------------- +# 24. wait_async_comms(finalize_after_drain=True) inline-accumulation fallback +# --------------------------------------------------------------------------- + + +class TestWaitAsyncCommsFallback: + """Exercises the inline-accumulation fallback inside + ``wait_async_comms(finalize_after_drain=True)``: when a param is in + ``_inflight_comm_params`` (async AG was issued) but its ``_wgrad_rs_handle`` + is ``None`` (no async RS handle to drain), the inner + ``_wait_reduce_scatter`` call no-ops and the outer loop must inline the + accumulation itself (main_grad.add_ + ticket release + flag set). + + Production flows rarely hit this combination — chain-interior params have + both async AG and async RS, and chain-head sync RS doesn't enter + ``_inflight_comm_params`` via bwd AG. We construct the state by hand to + pin down the fallback's contract. + """ + + class _FakeGroup: + def size(self): + return 1 + + def rank(self): + return 0 + + @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA required") + def test_fallback_accumulates_when_no_rs_handle(self): + dtype = torch.bfloat16 + p = GTPShardedParam(torch.zeros(8, 4, dtype=dtype, device="cuda")) + p.group = self._FakeGroup() + p.expert_idx = None + p.pad_length = 0 + p.chain_id = gtp_module.GTPChain.UNGRAPHED.value + p._quantizer = None + p.is_routed_expert = False # ⇒ self._weights property returns [self] + p.main_grad = torch.zeros(8, 4, dtype=dtype, device="cuda") + p._prefetch_handle = None # _wait_param_gather is no-op + p._wgrad_rs_handle = None # _wait_reduce_scatter is no-op → fallback fires + p._cached_ag_stream = None + p._cached_rs_stream = None + p.ag_event = torch.cuda.Event(external=True) + p.rs_event = torch.cuda.Event(external=True) + p.rs_event.record() # so rs_event.wait() in fallback doesn't block + p._already_finalized = False + p.grad_added_to_main_grad = False + + # Place a known wgrad in the cache for the fallback to read. + cache = gtp_module.get_global_GTP_cache() + p._rs_ticket = cache.reserve(p, dtype, fwd=False, reduce_scatter=True) + cache.get(p._rs_ticket).fill_(2.0) + + # Save + replace _inflight_comm_params so we don't trip over leftover + # params from earlier tests in the loop. + saved = set(gtp_module._inflight_comm_params) + gtp_module._inflight_comm_params.clear() + gtp_module._inflight_comm_params.add(p) + try: + gtp_module.wait_async_comms( + chain_id=p.chain_id, skip_rs=False, finalize_after_drain=True + ) + finally: + gtp_module._inflight_comm_params.clear() + gtp_module._inflight_comm_params.update(saved) + + torch.cuda.synchronize() + assert torch.all( + p.main_grad == 2.0 + ), f"main_grad should be 2.0 after fallback accumulation; got {p.main_grad}" + assert p._already_finalized is True, "_already_finalized must be set" + assert p.grad_added_to_main_grad is True, "grad_added_to_main_grad must be set" + + @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA required") + def test_fallback_skipped_when_already_finalized(self): + """When _already_finalized=True, the fallback must NOT re-accumulate.""" + dtype = torch.bfloat16 + p = GTPShardedParam(torch.zeros(8, 4, dtype=dtype, device="cuda")) + p.group = self._FakeGroup() + p.expert_idx = None + p.pad_length = 0 + p.chain_id = gtp_module.GTPChain.UNGRAPHED.value + p._quantizer = None + p.is_routed_expert = False # ⇒ self._weights property returns [self] + # Pre-existing main_grad with a value the fallback must NOT overwrite. + p.main_grad = torch.full((8, 4), 5.0, dtype=dtype, device="cuda") + p._prefetch_handle = None + p._wgrad_rs_handle = None + p._cached_ag_stream = None + p._cached_rs_stream = None + p.ag_event = torch.cuda.Event(external=True) + p.rs_event = torch.cuda.Event(external=True) + p.rs_event.record() + p._already_finalized = True # ← short-circuits the fallback + + # No _rs_ticket: if the fallback ran it would AttributeError on + # cache.get(None). The skip path must not touch the cache at all. + p._rs_ticket = None + + saved = set(gtp_module._inflight_comm_params) + gtp_module._inflight_comm_params.clear() + gtp_module._inflight_comm_params.add(p) + try: + gtp_module.wait_async_comms( + chain_id=p.chain_id, skip_rs=False, finalize_after_drain=True + ) + finally: + gtp_module._inflight_comm_params.clear() + gtp_module._inflight_comm_params.update(saved) + + torch.cuda.synchronize() + assert torch.all( + p.main_grad == 5.0 + ), "main_grad must be untouched when _already_finalized=True" + + @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA required") + def test_fallback_skipped_for_pure_ag_param(self): + """Regression: cross-graph fwd-AG prefetch in flight + finalize_after_drain=True. + + A param can be in _inflight_comm_params because of an outstanding async + all-gather (e.g. a cross-graph forward prefetch reaching the + bwd→optimizer boundary). No reduce-scatter was ever issued for that + param, so _rs_ticket is None on every weight. Previously the fallback + called cache.get(None) and crashed with KeyError; the guard now skips + the inline accumulation entirely when no weight has an RS ticket. + """ + dtype = torch.bfloat16 + p = GTPShardedParam(torch.zeros(8, 4, dtype=dtype, device="cuda")) + p.group = self._FakeGroup() + p.expert_idx = None + p.pad_length = 0 + p.chain_id = gtp_module.GTPChain.UNGRAPHED.value + p._quantizer = None + p.is_routed_expert = False + # Pre-existing main_grad with a sentinel that must survive untouched. + p.main_grad = torch.full((8, 4), 7.0, dtype=dtype, device="cuda") + p._prefetch_handle = None + p._wgrad_rs_handle = None + p._cached_ag_stream = None + p._cached_rs_stream = None + p.ag_event = torch.cuda.Event(external=True) + p.rs_event = torch.cuda.Event(external=True) + p.rs_event.record() + p._already_finalized = False + # Critical: simulates a pure-AG prefetch — no RS ever issued, ticket is None. + p._rs_ticket = None + + saved = set(gtp_module._inflight_comm_params) + gtp_module._inflight_comm_params.clear() + gtp_module._inflight_comm_params.add(p) + try: + # Must NOT raise KeyError(None) from cache.get(None). + gtp_module.wait_async_comms( + chain_id=p.chain_id, skip_rs=False, finalize_after_drain=True + ) + finally: + gtp_module._inflight_comm_params.clear() + gtp_module._inflight_comm_params.update(saved) + + torch.cuda.synchronize() + assert torch.all( + p.main_grad == 7.0 + ), "main_grad must be untouched for a pure-AG param (no wgrad to accumulate)" + assert ( + p._already_finalized is False + ), "_already_finalized must stay False — no finalize happened for a pure-AG param" diff --git a/tests/unit_tests/generalized_tensor_parallel/test_tp_gtp.py b/tests/unit_tests/generalized_tensor_parallel/test_tp_gtp.py new file mode 100644 index 00000000000..b261444d315 --- /dev/null +++ b/tests/unit_tests/generalized_tensor_parallel/test_tp_gtp.py @@ -0,0 +1,458 @@ +# Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# See LICENSE for license information. + +"""Unit tests for combined Tensor Parallelism + Generalized Tensor Parallelism (TP+GTP). + +Process group layout (world_size = tp_size × gtp_size): + + rank = gtp_rank × tp_size + tp_rank + + TP group: all ranks that share the same gtp_rank (size = tp_size) + GTP group: all ranks that share the same tp_rank (size = gtp_size) + +Test groups +----------- +1. TestTPGTPProcessGroups – verify TP/GTP group sizes and rank assignment +2. TestTPGTPColumnParallelLinear – column-parallel Linear: weight shape + fwd/bwd correctness +3. TestTPGTPRowParallelLinear – row-parallel Linear: weight shape + fwd/bwd smoke test +4. TestTPGTPLayerNormLinear – LayerNormLinear column-parallel smoke test + +Tests use (tp_size, gtp_size) = (2, 2) → world_size = 4 (runs on 4-GPU machines). + +Run via torchrun (matches the rest of Megatron's unit tests): + + torchrun --nproc-per-node 4 -m pytest tests/unit_tests/generalized_tensor_parallel/test_tp_gtp.py -v + +Multi-GPU tests skip automatically when ``torch.distributed.get_world_size()`` does not match +the requested combination of tp_size × gtp_size. +""" + +import pytest +import torch +import torch.distributed as dist +import transformer_engine.pytorch as te +from transformer_engine.pytorch.quantization import FP8GlobalStateManager + +from megatron.experimental.gtp import GTPShardedParam +from tests.unit_tests.test_utilities import Utils + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture(scope="module", autouse=True) +def _torchrun_dist_init(): + """Initialize the torchrun-managed dist group once per module. + + TP+GTP tests build TP and GTP subgroups within the world torchrun set + up; ``_run_distributed`` only skips when the required world size + doesn't match what torchrun launched with. + """ + Utils.initialize_model_parallel() + yield + Utils.destroy_model_parallel() + + +@pytest.fixture(autouse=True) +def reset_fp8_state(): + yield + FP8GlobalStateManager.reset() + + +@pytest.fixture(autouse=True) +def reset_gtp_globals(): + """Reset GTP mutable class/module-level state between tests.""" + yield + GTPShardedParam._chain_state = {} + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _run_distributed(fn, required_world_size: int, *args) -> None: + """Run ``fn(rank, world_size, port, *args)`` on every torchrun rank. + + ``port`` is unused (dist already initialized by torchrun) but kept so + existing worker signatures don't need editing. + """ + actual_world_size = torch.distributed.get_world_size() + if actual_world_size != required_world_size: + pytest.skip( + f"Requires world_size={required_world_size}, " + f"got {actual_world_size} (launch with torchrun --nproc-per-node={required_world_size})" + ) + fn(torch.distributed.get_rank(), actual_world_size, None, *args) + + +def _requires_multi_gpu(n: int): + if torch.cuda.device_count() < n: + pytest.skip(f"Requires at least {n} CUDA devices") + + +def _build_groups(rank: int, world_size: int, tp_size: int, gtp_size: int): + """Create TP and GTP process groups for a 2D parallelism grid. + + Layout: rank = gtp_rank × tp_size + tp_rank + TP group: contiguous block [gtp_rank*tp_size, (gtp_rank+1)*tp_size) + GTP group: strided set {tp_rank, tp_rank+tp_size, tp_rank+2*tp_size, ...} + + Every rank must call new_group for ALL groups (PyTorch distributed requirement). + + Returns: + tp_group: this rank's TP process group + gtp_group: this rank's GTP process group + tp_rank: this rank's index within its TP group + gtp_rank: this rank's index within its GTP group + """ + assert tp_size * gtp_size == world_size + tp_rank = rank % tp_size + gtp_rank = rank // tp_size + + tp_group = None + for er in range(gtp_size): + ranks = list(range(er * tp_size, (er + 1) * tp_size)) + grp = dist.new_group(ranks) + if er == gtp_rank: + tp_group = grp + + gtp_group = None + for tr in range(tp_size): + ranks = list(range(tr, world_size, tp_size)) + grp = dist.new_group(ranks) + if tr == tp_rank: + gtp_group = grp + + return tp_group, gtp_group, tp_rank, gtp_rank + + +# --------------------------------------------------------------------------- +# 1. TestTPGTPProcessGroups – group sizes and rank membership +# --------------------------------------------------------------------------- + + +def _worker_groups(rank, world_size, port, tp_size, gtp_size): + tp_group, gtp_group, tp_rank, gtp_rank = _build_groups(rank, world_size, tp_size, gtp_size) + + assert tp_group.size() == tp_size, f"rank {rank}: TP group size {tp_group.size()} != {tp_size}" + assert ( + gtp_group.size() == gtp_size + ), f"rank {rank}: GTP group size {gtp_group.size()} != {gtp_size}" + assert ( + dist.get_rank(tp_group) == tp_rank + ), f"rank {rank}: TP rank {dist.get_rank(tp_group)} != expected {tp_rank}" + assert ( + dist.get_rank(gtp_group) == gtp_rank + ), f"rank {rank}: GTP rank {dist.get_rank(gtp_group)} != expected {gtp_rank}" + + +class TestTPGTPProcessGroups: + @pytest.mark.parametrize("tp_size,gtp_size", [(2, 2)]) + def test_group_sizes_and_ranks(self, tp_size, gtp_size): + world_size = tp_size * gtp_size + _requires_multi_gpu(world_size) + _run_distributed(_worker_groups, world_size, tp_size, gtp_size) + + +# --------------------------------------------------------------------------- +# 2. TestTPGTPColumnParallelLinear +# --------------------------------------------------------------------------- + + +def _worker_column_shape(rank, world_size, port, tp_size, gtp_size): + """Column-parallel: weight shape must be [out_f/(tp_size*gtp_size), in_f].""" + tp_group, gtp_group, _, _ = _build_groups(rank, world_size, tp_size, gtp_size) + + in_f = 64 + out_f = tp_size * gtp_size * 32 # per-rank shard = 32 rows + + layer = te.Linear( + in_features=in_f, + out_features=out_f, + parallel_mode="column", + bias=False, + params_dtype=torch.bfloat16, + device="cuda", + tp_group=tp_group, + gtp_group=gtp_group, + ) + + expected_rows = out_f // (tp_size * gtp_size) + assert isinstance( + layer.weight, GTPShardedParam + ), f"rank {rank}: weight should be GTPShardedParam" + assert layer.weight.shape == ( + expected_rows, + in_f, + ), f"rank {rank}: expected ({expected_rows}, {in_f}), got {layer.weight.shape}" + + +def _worker_column_correctness(rank, world_size, port, tp_size, gtp_size): + """Column-parallel output must equal inp @ (GTP-gathered TP-local weight)^T.""" + torch.manual_seed(0) + tp_group, gtp_group, tp_rank, gtp_rank = _build_groups(rank, world_size, tp_size, gtp_size) + + batch, in_f = 16, 64 + out_f = tp_size * gtp_size * 32 # per-rank shard = 32 rows + dtype = torch.bfloat16 + + layer = te.Linear( + in_features=in_f, + out_features=out_f, + parallel_mode="column", + bias=False, + params_dtype=dtype, + device="cuda", + tp_group=tp_group, + gtp_group=gtp_group, + ) + + # All-gather GTP shards → TP-local full weight [out_f/tp_size, in_f] + shard = layer.weight.data.clone() + all_gtp_shards = [torch.zeros_like(shard) for _ in range(gtp_size)] + dist.all_gather(all_gtp_shards, shard, group=gtp_group) + tp_local_weight = torch.cat(all_gtp_shards, dim=0).float() # strip padding + tp_local_weight = tp_local_weight[: out_f // tp_size] + + # Same full input on all ranks (column-parallel: each rank processes full input) + inp = torch.randn(batch, in_f, dtype=dtype, device="cuda") + dist.broadcast(inp, src=0) + inp_te = inp.clone().requires_grad_(True) + + # TE forward: GTP all-gathers weight internally; no TP comm in column-parallel fwd + out = layer(inp_te, is_first_microbatch=True) + assert out.shape == ( + batch, + out_f // tp_size, + ), f"rank {rank}: output shape {out.shape} != ({batch}, {out_f // tp_size})" + + # Reference: this TP rank's output = inp @ tp_local_weight^T + ref = inp.float() @ tp_local_weight.T + ref = ref.to(dtype) + assert torch.allclose( + out.float(), ref.float(), atol=0.1, rtol=0.1 + ), f"rank {rank}: output mismatch, max_diff={(out.float() - ref.float()).abs().max():.4f}" + + # Backward: dX is all-reduced across TP group internally by TE + grad = torch.randn_like(out) + dist.broadcast(grad, src=0) + # wgrad RS path always accumulates into main_grad; allocate before backward. + layer.weight.main_grad = torch.zeros(layer.weight.shape, dtype=dtype, device="cuda") + out.backward(grad) + assert inp_te.grad is not None and inp_te.grad.shape == inp.shape + assert torch.isfinite(inp_te.grad).all(), f"rank {rank}: non-finite dX" + + +class TestTPGTPColumnParallelLinear: + @pytest.mark.parametrize("tp_size,gtp_size", [(2, 2)]) + def test_weight_shape(self, tp_size, gtp_size): + world_size = tp_size * gtp_size + _requires_multi_gpu(world_size) + _run_distributed(_worker_column_shape, world_size, tp_size, gtp_size) + + @pytest.mark.parametrize("tp_size,gtp_size", [(2, 2)]) + def test_forward_backward_correctness(self, tp_size, gtp_size): + world_size = tp_size * gtp_size + _requires_multi_gpu(world_size) + _run_distributed(_worker_column_correctness, world_size, tp_size, gtp_size) + + +# --------------------------------------------------------------------------- +# 3. TestTPGTPRowParallelLinear +# --------------------------------------------------------------------------- + + +def _worker_row_shape(rank, world_size, port, tp_size, gtp_size): + """Row-parallel: weight shape must be [out_f/gtp_size, in_f/tp_size].""" + tp_group, gtp_group, _, _ = _build_groups(rank, world_size, tp_size, gtp_size) + + in_f = tp_size * 64 # TE divides by tp_size → local in_f = 64 + out_f = gtp_size * 64 # GTP divides by gtp_size → local out_f = 64 + + layer = te.Linear( + in_features=in_f, + out_features=out_f, + parallel_mode="row", + bias=False, + params_dtype=torch.bfloat16, + device="cuda", + tp_group=tp_group, + gtp_group=gtp_group, + ) + + expected_shape = (out_f // gtp_size, in_f // tp_size) + assert isinstance( + layer.weight, GTPShardedParam + ), f"rank {rank}: weight should be GTPShardedParam" + assert ( + layer.weight.shape == expected_shape + ), f"rank {rank}: expected {expected_shape}, got {layer.weight.shape}" + + +def _worker_row_forward_backward(rank, world_size, port, tp_size, gtp_size): + """Row-parallel: output is all-reduced [batch, out_f]; backward produces finite dX.""" + torch.manual_seed(0) + tp_group, gtp_group, tp_rank, _ = _build_groups(rank, world_size, tp_size, gtp_size) + + batch = 16 + in_f = tp_size * 64 # full in_features + out_f = gtp_size * 64 # full out_features + dtype = torch.bfloat16 + + layer = te.Linear( + in_features=in_f, + out_features=out_f, + parallel_mode="row", + bias=False, + params_dtype=dtype, + device="cuda", + tp_group=tp_group, + gtp_group=gtp_group, + ) + + # Row-parallel: each TP rank takes the corresponding slice of in_f + full_inp = torch.randn(batch, in_f, dtype=dtype, device="cuda") + dist.broadcast(full_inp, src=0) + local_in_f = in_f // tp_size + inp = full_inp[:, tp_rank * local_in_f : (tp_rank + 1) * local_in_f] + inp = inp.clone().requires_grad_(True) + + # TE forward: GTP all-gathers weight, row-parallel all-reduces output across TP + out = layer(inp, is_first_microbatch=True) + assert out.shape == ( + batch, + out_f, + ), f"rank {rank}: output shape {out.shape} != ({batch}, {out_f})" + assert torch.isfinite(out).all(), f"rank {rank}: non-finite output" + + # wgrad RS path always accumulates into main_grad; allocate before backward. + layer.weight.main_grad = torch.zeros(layer.weight.shape, dtype=dtype, device="cuda") + out.sum().backward() + assert inp.grad is not None and inp.grad.shape == inp.shape + assert torch.isfinite(inp.grad).all(), f"rank {rank}: non-finite dX" + + +def _worker_row_correctness(rank, world_size, port, tp_size, gtp_size): + """Row-parallel all-reduced output must equal inp_full @ full_weight^T.""" + torch.manual_seed(0) + tp_group, gtp_group, tp_rank, gtp_rank = _build_groups(rank, world_size, tp_size, gtp_size) + + batch = 16 + in_f = tp_size * 64 + out_f = gtp_size * 64 + dtype = torch.bfloat16 + + layer = te.Linear( + in_features=in_f, + out_features=out_f, + parallel_mode="row", + bias=False, + params_dtype=dtype, + device="cuda", + tp_group=tp_group, + gtp_group=gtp_group, + ) + + # Reconstruct full weight: all-gather GTP shards → TP-local, then all-gather TP shards + shard = layer.weight.data.clone() + all_gtp_shards = [torch.zeros_like(shard) for _ in range(gtp_size)] + dist.all_gather(all_gtp_shards, shard, group=gtp_group) + tp_local_weight = torch.cat(all_gtp_shards, dim=0).float() # [out_f, in_f/tp_size] + + all_tp_weights = [torch.zeros_like(tp_local_weight) for _ in range(tp_size)] + dist.all_gather(all_tp_weights, tp_local_weight, group=tp_group) + full_weight = torch.cat(all_tp_weights, dim=1).float() # [out_f, in_f] + + # Full input (same on all ranks; we slice below to simulate row-parallel) + full_inp = torch.randn(batch, in_f, dtype=dtype, device="cuda") + dist.broadcast(full_inp, src=0) + local_in_f = in_f // tp_size + inp = full_inp[:, tp_rank * local_in_f : (tp_rank + 1) * local_in_f].clone() + inp.requires_grad_(True) + + out = layer(inp, is_first_microbatch=True) + + # Reference: full input @ full weight^T — all ranks should see the same output + ref = full_inp.float() @ full_weight.T + ref = ref.to(dtype) + assert torch.allclose( + out.float(), ref.float(), atol=0.1, rtol=0.1 + ), f"rank {rank}: output mismatch, max_diff={(out.float() - ref.float()).abs().max():.4f}" + + +class TestTPGTPRowParallelLinear: + @pytest.mark.parametrize("tp_size,gtp_size", [(2, 2)]) + def test_weight_shape(self, tp_size, gtp_size): + world_size = tp_size * gtp_size + _requires_multi_gpu(world_size) + _run_distributed(_worker_row_shape, world_size, tp_size, gtp_size) + + @pytest.mark.parametrize("tp_size,gtp_size", [(2, 2)]) + def test_forward_backward(self, tp_size, gtp_size): + world_size = tp_size * gtp_size + _requires_multi_gpu(world_size) + _run_distributed(_worker_row_forward_backward, world_size, tp_size, gtp_size) + + @pytest.mark.parametrize("tp_size,gtp_size", [(2, 2)]) + def test_forward_correctness(self, tp_size, gtp_size): + world_size = tp_size * gtp_size + _requires_multi_gpu(world_size) + _run_distributed(_worker_row_correctness, world_size, tp_size, gtp_size) + + +# --------------------------------------------------------------------------- +# 4. TestTPGTPLayerNormLinear – column-parallel smoke test +# --------------------------------------------------------------------------- + + +def _worker_layernorm_linear(rank, world_size, port, tp_size, gtp_size): + torch.manual_seed(0) + tp_group, gtp_group, _, _ = _build_groups(rank, world_size, tp_size, gtp_size) + + seq, batch = 4, 2 + in_f = 64 + out_f = tp_size * gtp_size * 32 + dtype = torch.bfloat16 + + layer = te.LayerNormLinear( + in_features=in_f, + out_features=out_f, + bias=False, + params_dtype=dtype, + parallel_mode="column", + device="cuda", + tp_group=tp_group, + gtp_group=gtp_group, + ) + assert isinstance( + layer.weight, GTPShardedParam + ), f"rank {rank}: LayerNormLinear.weight should be GTPShardedParam" + expected_rows = out_f // (tp_size * gtp_size) + assert layer.weight.shape == ( + expected_rows, + in_f, + ), f"rank {rank}: unexpected weight shape {layer.weight.shape}" + + inp = torch.randn(seq, batch, in_f, dtype=dtype, device="cuda", requires_grad=True) + dist.broadcast(inp, src=0) + + out = layer(inp, is_first_microbatch=True) + assert out.shape == (seq, batch, out_f // tp_size), f"rank {rank}: output shape {out.shape}" + assert torch.isfinite(out).all(), f"rank {rank}: non-finite output" + + # wgrad RS path always accumulates into main_grad; allocate before backward. + layer.weight.main_grad = torch.zeros(layer.weight.shape, dtype=dtype, device="cuda") + out.sum().backward() + assert inp.grad is not None and inp.grad.shape == inp.shape + assert torch.isfinite(inp.grad).all(), f"rank {rank}: non-finite dX" + + +class TestTPGTPLayerNormLinear: + @pytest.mark.parametrize("tp_size,gtp_size", [(2, 2)]) + def test_forward_backward(self, tp_size, gtp_size): + world_size = tp_size * gtp_size + _requires_multi_gpu(world_size) + _run_distributed(_worker_layernorm_linear, world_size, tp_size, gtp_size) From ccfee04870f228814dde76b05b0d4a666d584c70 Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Mon, 25 May 2026 01:29:52 -0700 Subject: [PATCH 02/59] code clean Signed-off-by: Shiqing Fan --- .../distributed/distributed_data_parallel.py | 17 ++--- .../core/distributed/param_and_grad_buffer.py | 2 +- megatron/core/optimizer/clip_grads.py | 4 +- megatron/core/optimizer/optimizer.py | 24 ++++--- megatron/core/parallel_state.py | 64 +++++++++---------- megatron/core/process_groups_config.py | 6 +- megatron/core/tensor_parallel/layers.py | 32 +++++----- megatron/core/transformer/cuda_graphs.py | 2 +- .../gtp/generalized_tensor_parallelism.py | 4 +- megatron/training/arguments.py | 6 +- megatron/training/utils/common_utils.py | 13 ++-- .../test_layer_wise_optimizer.py | 2 +- .../generalized_tensor_parallel/__init__.py | 13 ---- 13 files changed, 85 insertions(+), 104 deletions(-) diff --git a/megatron/core/distributed/distributed_data_parallel.py b/megatron/core/distributed/distributed_data_parallel.py index 4fd6edbf305..885edf3b1e6 100644 --- a/megatron/core/distributed/distributed_data_parallel.py +++ b/megatron/core/distributed/distributed_data_parallel.py @@ -232,8 +232,8 @@ def __init__( if self.ddp_config.average_in_collective: gradient_scaling_factor = 1.0 expert_gradient_scaling_factor = self.expt_dp_group.size() / self.dp_cp_group.size() - # GTP: collective averages over with_gtp group (size = dp_cp_size / ps_size). - # GTP RS already summed over ps_size ranks. To total 1/dp_cp_size scaling: + # GTP: collective averages over with_gtp group (size = dp_cp_size / gtp_size). + # GTP RS already summed over gtp_size ranks. To total 1/dp_cp_size scaling: # pre_scale * (1/with_gtp_size) = 1/dp_cp_size => pre_scale = with_gtp_size / dp_cp_size. gtp_gradient_scaling_factor = ( self.intra_dp_cp_with_gtp_group.size() / self.dp_cp_group.size() @@ -405,15 +405,10 @@ def __init__( ) # Create map from param to bucket group, used in pre_hook. - for bucket_groups in [ - self.bucket_groups, - self.expert_parallel_bucket_groups, - self.gtp_bucket_groups, - ]: - for bucket_group in bucket_groups: - for bucket in bucket_group.buckets: - for param in bucket.params_list: - self.param_to_bucket_group[param] = bucket_group + for bucket_group in self.all_bucket_groups: + for bucket in bucket_group.buckets: + for param in bucket.params_list: + self.param_to_bucket_group[param] = bucket_group # Delete references to weight_tensor if they exist since we don't want two parameter copies # if we re-mapped parameters (which happens when we use the distributed optimizer). diff --git a/megatron/core/distributed/param_and_grad_buffer.py b/megatron/core/distributed/param_and_grad_buffer.py index 14c3685277a..309ac7ef6d5 100644 --- a/megatron/core/distributed/param_and_grad_buffer.py +++ b/megatron/core/distributed/param_and_grad_buffer.py @@ -1605,4 +1605,4 @@ def partition_buckets( buffer.data_parallel_world_size, ) ) - return bucket_groups \ No newline at end of file + return bucket_groups diff --git a/megatron/core/optimizer/clip_grads.py b/megatron/core/optimizer/clip_grads.py index 657b3d11d33..07a3711cf8f 100644 --- a/megatron/core/optimizer/clip_grads.py +++ b/megatron/core/optimizer/clip_grads.py @@ -238,7 +238,7 @@ def count_zeros_fp32( total_num_zeros = torch.zeros(1, dtype=torch.int64, device='cuda') data_parallel_group = None use_megatron_fsdp = False - ps_rank = parallel_state.get_generalized_tensor_parallel_rank() + gtp_rank = parallel_state.get_generalized_tensor_parallel_rank() for param in parameters: if getattr(param, "__fsdp_param__", False) and param.grad is not None: # If the parameter is managed by Megatron FSDP, we need to handle it differently. @@ -258,7 +258,7 @@ def count_zeros_fp32( is_gtp_param = getattr(param, 'is_gtp', False) or ( HAVE_GTP and isinstance(param, GTPShardedParam) ) - is_not_ps_duplicate = is_gtp_param or ps_rank == 0 + is_not_ps_duplicate = is_gtp_param or gtp_rank == 0 if grad_not_none and is_not_shared and is_not_tp_duplicate and is_not_ps_duplicate: grad_obj = getattr(param, grad_attr) data_parallel_group = get_data_parallel_group_if_dtensor(grad_obj, data_parallel_group) diff --git a/megatron/core/optimizer/optimizer.py b/megatron/core/optimizer/optimizer.py index 0e5c6a587b3..9c3058654ea 100644 --- a/megatron/core/optimizer/optimizer.py +++ b/megatron/core/optimizer/optimizer.py @@ -165,8 +165,8 @@ def get_main_grads_for_grad_norm_split(self) -> Tuple[List[torch.Tensor], List[t params = self.get_parameters() non_gtp_grads = [] gtp_grads = [] - ps_rank = parallel_state.get_generalized_tensor_parallel_rank() - eps_rank = parallel_state.get_expert_generalized_tensor_parallel_rank() + gtp_rank = parallel_state.get_generalized_tensor_parallel_rank() + egtp_rank = parallel_state.get_expert_generalized_tensor_parallel_rank() for param in params: if self.config.use_precision_aware_optimizer_no_fp8_or_ds_fp8 or ( # Megatron-FSDP always uses decoupled_grad with FusedAdam. @@ -207,9 +207,9 @@ def get_main_grads_for_grad_norm_split(self) -> Tuple[List[torch.Tensor], List[t is_not_ps_duplicate = True else: if is_expert: - is_not_ps_duplicate = is_gtp_param or eps_rank == 0 + is_not_ps_duplicate = is_gtp_param or egtp_rank == 0 else: - is_not_ps_duplicate = is_gtp_param or ps_rank == 0 + is_not_ps_duplicate = is_gtp_param or gtp_rank == 0 if grad_not_none and is_not_shared and is_not_tp_duplicate and is_not_ps_duplicate: if is_gtp_param: @@ -253,7 +253,7 @@ def _compute_grad_norm_with_gtp(self, non_gtp_grads, gtp_grads): """Compute grad norm handling GTP grads that may need extra GTP/EGTP reduction. For MoE optimizers, grad_stats_parallel_group = TP×EP×PP which does NOT - include EPS. MoE-GTP grads need an extra EPS reduction. + include EGTP. MoE-GTP grads need an extra EGTP reduction. For dense-GTP optimizers, grad_stats_parallel_group = TP×PP×GTP which already includes GTP, so no extra reduction is needed. """ @@ -262,9 +262,9 @@ def _compute_grad_norm_with_gtp(self, non_gtp_grads, gtp_grads): if not gtp_grads: return get_grad_norm_fp32(non_gtp_grads, grad_stats_parallel_group=grad_stats_group) - # Check if this optimizer handles expert params that need EPS reduction. + # Check if this optimizer handles expert params that need EGTP reduction. # The model_parallel group for dense/GTP optimizers = TP×PP×GTP (includes GTP), - # but for MoE optimizers = TP×EP×PP (does NOT include EPS). + # but for MoE optimizers = TP×EP×PP (does NOT include EGTP). eps_world_size = parallel_state.get_expert_generalized_tensor_parallel_world_size() is_expert_optimizer = any(not getattr(p, 'allreduce', True) for p in self.get_parameters()) needs_eps_reduce = is_expert_optimizer and eps_world_size > 1 @@ -275,10 +275,10 @@ def _compute_grad_norm_with_gtp(self, non_gtp_grads, gtp_grads): non_gtp_grads + gtp_grads, grad_stats_parallel_group=grad_stats_group ) - # MoE optimizer with EPS: compute GTP norm separately, add EPS reduction. + # MoE optimizer with EGTP: compute GTP norm separately, add EGTP reduction. non_gtp_norm = get_grad_norm_fp32(non_gtp_grads, grad_stats_parallel_group=grad_stats_group) gtp_norm = get_grad_norm_fp32(gtp_grads, grad_stats_parallel_group=grad_stats_group) - # get_grad_norm_fp32 returns a float. We need to do the EPS reduction on GPU. + # get_grad_norm_fp32 returns a float. We need to do the EGTP reduction on GPU. gtp_norm_2 = torch.tensor([gtp_norm**2], dtype=torch.float, device='cuda') torch.distributed.all_reduce( gtp_norm_2, @@ -775,11 +775,9 @@ def __init__( # float16 params: if param.type() in ['torch.cuda.HalfTensor', 'torch.cuda.BFloat16Tensor']: float16_params_this_group.append(param) + # Create a copy main_param = param.detach().clone().float() - if HAVE_GTP and isinstance(param, GTPShardedParam): - main_param.is_gtp = True - else: - main_param.is_gtp = False + main_param.is_gtp = HAVE_GTP and isinstance(param, GTPShardedParam) # Copy tensor model parallel attributes. tensor_parallel.copy_tensor_model_parallel_attributes(main_param, param) diff --git a/megatron/core/parallel_state.py b/megatron/core/parallel_state.py index a6b2f9e1a97..ca13bfa4b8c 100644 --- a/megatron/core/parallel_state.py +++ b/megatron/core/parallel_state.py @@ -869,16 +869,16 @@ def initialize_model_parallel( ), "generalized tensor parallel group is already initialized" for cp_dp_ranks in decoder_rank_generator.get_ranks('cp-dp'): for i in range(0, len(cp_dp_ranks), generalized_tensor_parallel_size): - ps_ranks = cp_dp_ranks[i : i + generalized_tensor_parallel_size] + gtp_ranks = cp_dp_ranks[i : i + generalized_tensor_parallel_size] group = create_group( - ps_ranks, + gtp_ranks, timeout=timeout, pg_options=get_nccl_options("ps", nccl_comm_cfgs), group_desc="GENERALIZED_TENSOR_PARALLEL_GROUP", ) - if rank in ps_ranks: + if rank in gtp_ranks: _GENERALIZED_TENSOR_PARALLEL_GROUP = group - _GENERALIZED_TENSOR_PARALLEL_GLOBAL_RANKS = ps_ranks + _GENERALIZED_TENSOR_PARALLEL_GLOBAL_RANKS = gtp_ranks # Set NCCL_COLLNET_ENABLE to 1 to enable SHARP for the dp group. if sharp_enabled_group == "dp": @@ -1002,39 +1002,39 @@ def initialize_model_parallel( global _DATA_PARALLEL_GROUP_WITH_GTP global _DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP if generalized_tensor_parallel_size > 1: - # Build rank→ps_rank mapping. - rank_to_ps_rank = {} + # Build rank→gtp_rank mapping. + rank_to_gtp_rank = {} for cp_dp_ranks in decoder_rank_generator.get_ranks('cp-dp'): for i in range(0, len(cp_dp_ranks), generalized_tensor_parallel_size): ps_chunk = cp_dp_ranks[i : i + generalized_tensor_parallel_size] - for ps_rank_idx, r in enumerate(ps_chunk): - rank_to_ps_rank[r] = ps_rank_idx + for gtp_rank_idx, r in enumerate(ps_chunk): + rank_to_gtp_rank[r] = gtp_rank_idx - # DP-only with GTP: create one group per (dp_group, ps_rank) pair. + # DP-only with GTP: create one group per (dp_group, gtp_rank) pair. # All ranks must participate in every create_group call (collective). for dp_ranks in decoder_rank_generator.get_ranks('dp'): - for ps_rank_val in range(generalized_tensor_parallel_size): - dp_ps_ranks = [r for r in dp_ranks if rank_to_ps_rank[r] == ps_rank_val] + for gtp_rank_val in range(generalized_tensor_parallel_size): + dp_gtp_ranks = [r for r in dp_ranks if rank_to_gtp_rank[r] == gtp_rank_val] group = create_group( - dp_ps_ranks, + dp_gtp_ranks, timeout=timeout, pg_options=get_nccl_options("dp_ps", nccl_comm_cfgs), group_desc="DATA_PARALLEL_GROUP_WITH_GTP", ) - if rank in dp_ps_ranks: + if rank in dp_gtp_ranks: _DATA_PARALLEL_GROUP_WITH_GTP = group # DP-CP with GTP for dp_cp_ranks in decoder_rank_generator.get_ranks('dp-cp'): - for ps_rank_val in range(generalized_tensor_parallel_size): - dp_cp_ps_ranks = [r for r in dp_cp_ranks if rank_to_ps_rank[r] == ps_rank_val] + for gtp_rank_val in range(generalized_tensor_parallel_size): + dp_cp_gtp_ranks = [r for r in dp_cp_ranks if rank_to_gtp_rank[r] == gtp_rank_val] group = create_group( - dp_cp_ps_ranks, + dp_cp_gtp_ranks, timeout=timeout, pg_options=get_nccl_options("dp_cp_ps", nccl_comm_cfgs), group_desc="DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP", ) - if rank in dp_cp_ps_ranks: + if rank in dp_cp_gtp_ranks: _DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP = group else: _DATA_PARALLEL_GROUP_WITH_GTP = _DATA_PARALLEL_GROUP @@ -1100,10 +1100,10 @@ def initialize_model_parallel( model_parallel_groups_set = set() for cp_dp_ranks in decoder_rank_generator.get_ranks('cp-dp'): for i in range(0, len(cp_dp_ranks), generalized_tensor_parallel_size): - ps_ranks = cp_dp_ranks[i : i + generalized_tensor_parallel_size] + gtp_ranks = cp_dp_ranks[i : i + generalized_tensor_parallel_size] # Merge tp-pp groups of all GTP peers mp_ranks = [] - for ps_r in ps_ranks: + for ps_r in gtp_ranks: mp_ranks.extend(rank_to_tp_pp[ps_r]) mp_ranks = sorted(set(mp_ranks)) mp_key = tuple(mp_ranks) @@ -1300,16 +1300,16 @@ def initialize_model_parallel( ), 'Expert generalized tensor parallel group is already initialized' for dp_ranks in expert_decoder_rank_generator.get_ranks('dp'): for i in range(0, len(dp_ranks), expert_generalized_tensor_parallel_size): - eps_ranks = dp_ranks[i : i + expert_generalized_tensor_parallel_size] + egtp_ranks = dp_ranks[i : i + expert_generalized_tensor_parallel_size] group = create_group( - eps_ranks, + egtp_ranks, timeout=timeout, pg_options=get_nccl_options("expt_gtp", nccl_comm_cfgs), group_desc="EXPERT_GENERALIZED_TENSOR_PARALLEL_GROUP", ) - if rank in eps_ranks: + if rank in egtp_ranks: _EXPERT_GENERALIZED_TENSOR_PARALLEL_GROUP = group - _EXPERT_GENERALIZED_TENSOR_PARALLEL_GLOBAL_RANKS = eps_ranks + _EXPERT_GENERALIZED_TENSOR_PARALLEL_GLOBAL_RANKS = egtp_ranks # Build the expert model parallel group global _EXPERT_MODEL_PARALLEL_GROUP, _EXPERT_MODEL_PARALLEL_RANKS @@ -1454,25 +1454,25 @@ def initialize_model_parallel( # Build expert DP group with expert generalized tensor parallel accounted for. global _EXPERT_DATA_PARALLEL_GROUP_WITH_GTP if expert_generalized_tensor_parallel_size > 1: - # Build rank→expert_ps_rank mapping. - rank_to_expert_ps_rank = {} + # Build rank→expert_gtp_rank mapping. + rank_to_expert_gtp_rank = {} for dp_ranks in expert_decoder_rank_generator.get_ranks('dp'): for i in range(0, len(dp_ranks), expert_generalized_tensor_parallel_size): eps_chunk = dp_ranks[i : i + expert_generalized_tensor_parallel_size] - for eps_rank_idx, r in enumerate(eps_chunk): - rank_to_expert_ps_rank[r] = eps_rank_idx + for egtp_rank_idx, r in enumerate(eps_chunk): + rank_to_expert_gtp_rank[r] = egtp_rank_idx - # Create one group per (expert_dp_group, expert_ps_rank) pair (collective). + # Create one group per (expert_dp_group, expert_gtp_rank) pair (collective). for dp_ranks in expert_decoder_rank_generator.get_ranks('dp'): - for eps_rank_val in range(expert_generalized_tensor_parallel_size): - edp_ps_ranks = [r for r in dp_ranks if rank_to_expert_ps_rank[r] == eps_rank_val] + for egtp_rank_val in range(expert_generalized_tensor_parallel_size): + edp_gtp_ranks = [r for r in dp_ranks if rank_to_expert_gtp_rank[r] == egtp_rank_val] group = create_group( - edp_ps_ranks, + edp_gtp_ranks, timeout=timeout, pg_options=get_nccl_options("ep_dp_ps", nccl_comm_cfgs), group_desc="EXPERT_DATA_PARALLEL_GROUP_WITH_GTP", ) - if rank in edp_ps_ranks: + if rank in edp_gtp_ranks: _EXPERT_DATA_PARALLEL_GROUP_WITH_GTP = group else: _EXPERT_DATA_PARALLEL_GROUP_WITH_GTP = _EXPERT_DATA_PARALLEL_GROUP diff --git a/megatron/core/process_groups_config.py b/megatron/core/process_groups_config.py index 07ac28e16f1..5148bd1bffe 100644 --- a/megatron/core/process_groups_config.py +++ b/megatron/core/process_groups_config.py @@ -304,7 +304,7 @@ def setup_process_groups_for_optimizer( from megatron.core.utils import get_model_config if pg_collection is None: - # Use parallel_state groups. + # Use parallel_state groups # Dense (non-GTP) params use with_gtp=False (full DP group) to maximize # optimizer state sharding. GTP params use with_gtp=True (smaller group) # since GTP's reduce-scatter already handled the GTP dimension. @@ -454,7 +454,7 @@ def setup_process_groups_for_optimizer( else: intra_dp_cp_with_gtp_group = intra_dp_cp_group - # 7. EPS group (fallback to intra_expt_dp if not provided) + # 7. EGTP group (fallback to intra_expt_dp if not provided) if hasattr(pg_collection, 'intra_expt_dp_with_eps'): intra_expt_dp_with_egtp_group = pg_collection.intra_expt_dp_with_eps else: @@ -619,7 +619,7 @@ def setup_process_groups_for_ddp( result['pp_group'] = pg_collection.pp result['ep_group'] = pg_collection.ep - # EPS group (fallback to intra_expt_dp if not provided) + # EGTP group (fallback to intra_expt_dp if not provided) if hasattr(pg_collection, 'intra_expt_dp_with_eps'): result['intra_expt_dp_with_egtp_group'] = pg_collection.intra_expt_dp_with_eps else: diff --git a/megatron/core/tensor_parallel/layers.py b/megatron/core/tensor_parallel/layers.py index f30490ac4a6..91524ce1102 100644 --- a/megatron/core/tensor_parallel/layers.py +++ b/megatron/core/tensor_parallel/layers.py @@ -282,7 +282,7 @@ def __init__( tensor=self.weight, is_parallel=True, dim=0, stride=1 ) - self.ps_size = 1 + self.gtp_size = 1 if gtp_group is not None and gtp_group.size() > 1: assert HAVE_GTP, ( "generalized_tensor_parallel_size > 1 requires megatron.experimental.gtp to import " @@ -290,7 +290,7 @@ def __init__( "transformer_engine)." ) wrap_module_params_gtp(self, ["weight"], gtp_group) - self.ps_size = gtp_group.size() + self.gtp_size = gtp_group.size() # Nothing prefetches embedding — it is head of the UNGRAPHED # chain in fwd, and its bwd bypasses all_gather_and_prefetch_bwd # via GTPEmbeddingWeight.backward. @@ -312,7 +312,7 @@ def forward(self, input_): masked_input = input_ weight = self.weight - if self.ps_size > 1: + if self.gtp_size > 1: weight = GTPEmbeddingWeight.apply(self.weight) # Get the embeddings. @@ -490,7 +490,7 @@ def forward( grad_output_buffer, wgrad_deferral_limit, tp_group, - ps_size, + gtp_size, ): """Forward.""" if gradient_accumulation_fusion and hasattr(weight, "main_grad"): @@ -499,7 +499,7 @@ def forward( main_grad = None ctx.save_for_backward(input, weight) - if ps_size > 1: + if gtp_size > 1: weight = weight.all_gather_and_prefetch(fwd=True) # We can't save main_grad in save_for_backward as this module would be @@ -513,7 +513,7 @@ def forward( ctx.wgrad_deferral_limit = wgrad_deferral_limit ctx.grad_output_buffer = grad_output_buffer ctx.tp_group = tp_group - ctx.ps_size = ps_size + ctx.gtp_size = gtp_size if sequence_parallel: dim_size = list(input.size()) @@ -539,7 +539,7 @@ def backward(ctx, grad_output): use_bias = ctx.use_bias # GTP: re-gather weight for dgrad - if ctx.ps_size > 1: + if ctx.gtp_size > 1: sharded_weight = weight weight = sharded_weight.all_gather_and_prefetch_bwd() ctx.gradient_accumulation_fusion = False @@ -678,7 +678,7 @@ def backward(ctx, grad_output): grad_bias = grad_output.sum(dim=0) if use_bias else None # GTP: reduce-scatter wgrad - if ctx.ps_size > 1 and grad_weight is not None: + if ctx.gtp_size > 1 and grad_weight is not None: grad_weight = sharded_weight.wgrad_reduce_scatter(grad_weight) if ctx.sequence_parallel: @@ -714,7 +714,7 @@ def linear_with_grad_accumulation_and_async_allreduce( grad_output_buffer: Optional[List[torch.Tensor]] = None, wgrad_deferral_limit: Optional[int] = 0, tp_group: Optional[torch.distributed.ProcessGroup] = None, - ps_size: int = 1, + gtp_size: int = 1, ) -> torch.Tensor: """Linear layer execution with asynchronous communication and gradient accumulation fusion in backprop. @@ -791,7 +791,7 @@ def linear_with_grad_accumulation_and_async_allreduce( grad_output_buffer, wgrad_deferral_limit, tp_group, - ps_size, + gtp_size, ] if not linear_with_grad_accumulation_and_async_allreduce.warned: @@ -967,7 +967,7 @@ def __init__( else: self.weight = None - self.ps_size = 1 + self.gtp_size = 1 if gtp_group is not None and gtp_group.size() > 1: assert HAVE_GTP, ( "generalized_tensor_parallel_size > 1 requires megatron.experimental.gtp to import " @@ -975,7 +975,7 @@ def __init__( "transformer_engine)." ) wrap_module_params_gtp(self, ["weight"], gtp_group) - self.ps_size = gtp_group.size() + self.gtp_size = gtp_group.size() if bias: if config.use_cpu_initialization: @@ -1129,7 +1129,7 @@ def forward( else None ), tp_group=self.tp_group, - ps_size=self.ps_size, + gtp_size=self.gtp_size, ) gather_output = self.gather_output @@ -1327,7 +1327,7 @@ def __init__( ) setattr(self.weight, "allreduce", not (self.is_expert and self.expert_parallel)) - self.ps_size = 1 + self.gtp_size = 1 if gtp_group is not None and gtp_group.size() > 1: assert HAVE_GTP, ( "generalized_tensor_parallel_size > 1 requires megatron.experimental.gtp to import " @@ -1335,7 +1335,7 @@ def __init__( "transformer_engine)." ) wrap_module_params_gtp(self, ["weight"], gtp_group) - self.ps_size = gtp_group.size() + self.gtp_size = gtp_group.size() if bias: if config.use_cpu_initialization: @@ -1409,7 +1409,7 @@ def forward(self, input_: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: sequence_parallel=False, tp_group=None, grad_output_buffer=None, - ps_size=self.ps_size, + gtp_size=self.gtp_size, ) # All-reduce across all the partitions. diff --git a/megatron/core/transformer/cuda_graphs.py b/megatron/core/transformer/cuda_graphs.py index d698f4eafe1..806c8c6c446 100644 --- a/megatron/core/transformer/cuda_graphs.py +++ b/megatron/core/transformer/cuda_graphs.py @@ -1971,7 +1971,7 @@ def __call__(self, megatron_module, args, kwargs, cache_key=None): return super(MegatronModule, megatron_module).__call__(*args, **kwargs) self.is_first_microbatch = False - # If forward only, next replay should be a forward pass as well. + # If forward only, next replay should be a forward pass as well if is_inference_mode or not torch.is_grad_enabled() or not runner.fwd_graph_recorded: runner.status = _GraphStatus.FWD_READY else: diff --git a/megatron/experimental/gtp/generalized_tensor_parallelism.py b/megatron/experimental/gtp/generalized_tensor_parallelism.py index 85dd701654f..a15ad88f300 100644 --- a/megatron/experimental/gtp/generalized_tensor_parallelism.py +++ b/megatron/experimental/gtp/generalized_tensor_parallelism.py @@ -428,7 +428,7 @@ def _gtp_slice_one_param(param, gtp_group, *, name=""): def _gtp_attach_attrs(gtp_shard, gtp_group, *, is_grouped=False, expert_idx=0): - """Attach group / ps_size / routed-expert tags and register in _GTP_PARAMS. + """Attach group / gtp_size / routed-expert tags and register in _GTP_PARAMS. Kept separate from _gtp_slice_one_param so attrs land on the post-quantize param (when quantize fires between slice and attach). @@ -440,7 +440,7 @@ def _gtp_attach_attrs(gtp_shard, gtp_group, *, is_grouped=False, expert_idx=0): # cuda_graph_modules at init time. gtp_shard.chain_id = GTPChain.UNGRAPHED.value gtp_shard.group = gtp_group - gtp_shard.ps_size = gtp_group.size() + gtp_shard.gtp_size = gtp_group.size() global _GTP_PARAMS _GTP_PARAMS.append(gtp_shard) diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py index 2543a955c7c..2f0da6c9e49 100644 --- a/megatron/training/arguments.py +++ b/megatron/training/arguments.py @@ -1434,8 +1434,8 @@ def validate_args(args, defaults={}): if args.generalized_tensor_parallel_size > 1 or args.expert_generalized_tensor_parallel_size > 1: - ps_size = args.generalized_tensor_parallel_size - eps_size = args.expert_generalized_tensor_parallel_size + gtp_size = args.generalized_tensor_parallel_size + egtp_size = args.expert_generalized_tensor_parallel_size if get_device_arch_version() >= 10: # Setting GTP communication groups for high priority streams for Blackwell and later # architectures. Assigning high priority to communication streams ensures that @@ -1444,7 +1444,7 @@ def validate_args(args, defaults={}): if 'ps' not in args.high_priority_stream_groups: args.high_priority_stream_groups.append('ps') warn_rank_0("Setting 'ps' group for high priority streams.") - if eps_size > 1 and 'expt_gtp' not in args.high_priority_stream_groups: + if egtp_size > 1 and 'expt_gtp' not in args.high_priority_stream_groups: args.high_priority_stream_groups.append('expt_gtp') warn_rank_0("Setting 'expt_gtp' group for high priority streams.") diff --git a/megatron/training/utils/common_utils.py b/megatron/training/utils/common_utils.py index a6806b867c6..5dc7a677b53 100644 --- a/megatron/training/utils/common_utils.py +++ b/megatron/training/utils/common_utils.py @@ -86,6 +86,7 @@ def calc_params_l2_norm(model, force_create_fp32_copy=False): if getattr(args, 'use_megatron_fsdp', False): # All Megatron FSDP parameters are expected to be PyTorch DTensor. + # params_data is a dict of device_mesh -> list of local tensors. params = [] for model_chunk in model: model_chunk.stop_communication() @@ -110,8 +111,8 @@ def calc_params_l2_norm(model, force_create_fp32_copy=False): moe_gtp_params_data = [] # MoE-GTP, non-sharded moe_gtp_sharded_params_data = [] # MoE-GTP, sharded → reduce over expert_dp_with_ps - ps_rank = mpu.get_generalized_tensor_parallel_rank() - eps_rank = mpu.get_expert_generalized_tensor_parallel_rank() + gtp_rank = mpu.get_generalized_tensor_parallel_rank() + egtp_rank = mpu.get_expert_generalized_tensor_parallel_rank() for model_chunk in model: for param in model_chunk.parameters(): @@ -125,10 +126,10 @@ def calc_params_l2_norm(model, force_create_fp32_copy=False): # Filter GTP duplicates: non-GTP params are replicated across GTP ranks. if is_expert: - if not is_gtp and eps_rank != 0: + if not is_gtp and egtp_rank != 0: continue else: - if not is_gtp and ps_rank != 0: + if not is_gtp and gtp_rank != 0: continue # Route to the correct bucket. @@ -177,8 +178,8 @@ def _sum_reduce(tensor, group): norm_2 = params_norm_2 + sharded_norm_2 + gtp_norm_2 + gtp_sharded_norm_2 # --- Combine MoE + MoE-GTP norms --- - # expert_model_parallel = TP×EP×PP (does NOT include EPS), so we need - # an explicit EPS reduction for MoE-GTP before the model-parallel reduce. + # expert_model_parallel = TP×EP×PP (does NOT include EGTP), so we need + # an explicit EGTP reduction for MoE-GTP before the model-parallel reduce. moe_gtp_combined_norm_2 = moe_gtp_norm_2 + moe_gtp_sharded_norm_2 _sum_reduce(moe_gtp_combined_norm_2, mpu.get_expert_generalized_tensor_parallel_group()) moe_total_norm_2 = moe_norm_2 + moe_sharded_norm_2 + moe_gtp_combined_norm_2 diff --git a/tests/unit_tests/dist_checkpointing/test_layer_wise_optimizer.py b/tests/unit_tests/dist_checkpointing/test_layer_wise_optimizer.py index f1525c81fa4..42ef0a401ee 100644 --- a/tests/unit_tests/dist_checkpointing/test_layer_wise_optimizer.py +++ b/tests/unit_tests/dist_checkpointing/test_layer_wise_optimizer.py @@ -130,7 +130,7 @@ def setup_method(self, method): def teardown_method(self, method): Utils.destroy_model_parallel() - def test_generalized_tensor_parallel(self): + def test_parameter_sharding(self): """Test that parameters are correctly sharded across DP ranks.""" Utils.initialize_model_parallel(1, 1) diff --git a/tests/unit_tests/generalized_tensor_parallel/__init__.py b/tests/unit_tests/generalized_tensor_parallel/__init__.py index e093dccdbb0..e69de29bb2d 100644 --- a/tests/unit_tests/generalized_tensor_parallel/__init__.py +++ b/tests/unit_tests/generalized_tensor_parallel/__init__.py @@ -1,13 +0,0 @@ -# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. - -"""GTP unit tests — launched torchrun-native (same as the rest of Megatron's unit tests). - - export TE_PATH=/path/to/TransformerEngine - export PYTHONPATH="${TE_PATH}:${PYTHONPATH}" - torchrun --nproc-per-node 4 -m pytest tests/unit_tests/generalized_tensor_parallel/ -v - -Tests use the torchrun-managed dist group (initialized once per module via -``Utils.initialize_model_parallel``) and build their own GTP subgroups with -``dist.new_group(...)``. Multi-GPU tests skip when the world_size requested by a -test doesn't match what torchrun launched with (all GTP multi-GPU tests need 4). -""" From 8b4041af76534f27575c9e9139cb326f8f1c33bc Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Mon, 25 May 2026 02:15:21 -0700 Subject: [PATCH 03/59] fix comments; defer wgrad->dgrad support in following up MR; defer nvfp4 amax reduction fusion across ExpertETP group in following up MR. Signed-off-by: Shiqing Fan --- .../distributed/distributed_data_parallel.py | 2 +- .../core/extensions/transformer_engine.py | 18 +- megatron/core/optimizer/clip_grads.py | 6 +- megatron/core/optimizer/optimizer.py | 15 +- megatron/core/process_groups_config.py | 12 +- megatron/experimental/gtp/README.md | 16 +- .../gtp/generalized_tensor_parallelism.py | 154 ++++-------------- megatron/training/arguments.py | 20 +-- megatron/training/global_vars.py | 2 - megatron/training/utils/common_utils.py | 4 +- 10 files changed, 69 insertions(+), 180 deletions(-) diff --git a/megatron/core/distributed/distributed_data_parallel.py b/megatron/core/distributed/distributed_data_parallel.py index 885edf3b1e6..2bdb306861a 100644 --- a/megatron/core/distributed/distributed_data_parallel.py +++ b/megatron/core/distributed/distributed_data_parallel.py @@ -252,7 +252,7 @@ def __init__( pg_collection = ProcessGroupCollection(tp=self.tp_group, dp_cp=self.dp_cp_group) for buffer_key, (params, param_indices) in buffer_groups.items(): if buffer_key.is_expert_parallel: - # Use the with_eps group so EGTP-sharded routed experts (whose grads + # Use the with_egtp group so EGTP-sharded routed experts (whose grads # are already RS'd over the expert-GTP axis) only DP-reduce over true # weight replicas. Falls back to intra_expt_dp_group when GTP is off. data_parallel_group = self.intra_expt_dp_with_egtp_group diff --git a/megatron/core/extensions/transformer_engine.py b/megatron/core/extensions/transformer_engine.py index c7f4534c1dc..7a986870a5a 100644 --- a/megatron/core/extensions/transformer_engine.py +++ b/megatron/core/extensions/transformer_engine.py @@ -1223,10 +1223,8 @@ def extra_repr(self) -> str: f"in_features={self.in_features}, " f"out_features={self.out_features}, " f"bias={self.use_bias}, " - f"TP={self.tp_size}, " - f"GTP={self.gtp_size})" - if hasattr(self, "gtp_size") - else ")" + f"TP={self.tp_size}" + + (f", GTP={self.gtp_size}" if hasattr(self, "gtp_size") else "") ) def backward_dw(self): @@ -1344,10 +1342,8 @@ def extra_repr(self) -> str: f"in_features={self.in_features}, " f"out_features={self.out_features}, " f"bias={self.use_bias}, " - f"TP={self.tp_size}, " - f"GTP={self.gtp_size})" - if hasattr(self, "gtp_size") - else ")" + f"TP={self.tp_size}" + + (f", GTP={self.gtp_size}" if hasattr(self, "gtp_size") else "") ) def backward_dw(self): @@ -1459,10 +1455,8 @@ def extra_repr(self) -> str: f"in_features={self.in_features}, " f"out_features={self.out_features}, " f"bias={self.use_bias}, " - f"TP={self.tp_size}, " - f"GTP={self.gtp_size})" - if hasattr(self, "gtp_size") - else ")" + f"TP={self.tp_size}" + + (f", GTP={self.gtp_size}" if hasattr(self, "gtp_size") else "") ) def backward_dw(self): diff --git a/megatron/core/optimizer/clip_grads.py b/megatron/core/optimizer/clip_grads.py index 07a3711cf8f..ce33cacac17 100644 --- a/megatron/core/optimizer/clip_grads.py +++ b/megatron/core/optimizer/clip_grads.py @@ -253,13 +253,13 @@ def count_zeros_fp32( is_not_shared = param_is_not_shared(param) is_not_tp_duplicate = param_is_not_tensor_parallel_duplicate(param, tp_group=tp_group) if use_distributed_optimizer: - is_not_ps_duplicate = True + is_not_gtp_duplicate = True else: is_gtp_param = getattr(param, 'is_gtp', False) or ( HAVE_GTP and isinstance(param, GTPShardedParam) ) - is_not_ps_duplicate = is_gtp_param or gtp_rank == 0 - if grad_not_none and is_not_shared and is_not_tp_duplicate and is_not_ps_duplicate: + is_not_gtp_duplicate = is_gtp_param or gtp_rank == 0 + if grad_not_none and is_not_shared and is_not_tp_duplicate and is_not_gtp_duplicate: grad_obj = getattr(param, grad_attr) data_parallel_group = get_data_parallel_group_if_dtensor(grad_obj, data_parallel_group) grad = to_local_if_dtensor(grad_obj).detach() diff --git a/megatron/core/optimizer/optimizer.py b/megatron/core/optimizer/optimizer.py index 9c3058654ea..7cbdd731156 100644 --- a/megatron/core/optimizer/optimizer.py +++ b/megatron/core/optimizer/optimizer.py @@ -204,14 +204,11 @@ def get_main_grads_for_grad_norm_split(self) -> Tuple[List[torch.Tensor], List[t # GTP-duplicate filter: only needed for non-distributed optimizer. is_expert = not getattr(param, 'allreduce', True) if hasattr(self, 'ddp_config') and self.ddp_config.use_distributed_optimizer: - is_not_ps_duplicate = True + is_not_gtp_duplicate = True else: - if is_expert: - is_not_ps_duplicate = is_gtp_param or egtp_rank == 0 - else: - is_not_ps_duplicate = is_gtp_param or gtp_rank == 0 + is_not_gtp_duplicate = is_gtp_param or (egtp_rank if is_expert else gtp_rank) == 0 - if grad_not_none and is_not_shared and is_not_tp_duplicate and is_not_ps_duplicate: + if grad_not_none and is_not_shared and is_not_tp_duplicate and is_not_gtp_duplicate: if is_gtp_param: gtp_grads.append(grad) else: @@ -265,11 +262,11 @@ def _compute_grad_norm_with_gtp(self, non_gtp_grads, gtp_grads): # Check if this optimizer handles expert params that need EGTP reduction. # The model_parallel group for dense/GTP optimizers = TP×PP×GTP (includes GTP), # but for MoE optimizers = TP×EP×PP (does NOT include EGTP). - eps_world_size = parallel_state.get_expert_generalized_tensor_parallel_world_size() + egtp_world_size = parallel_state.get_expert_generalized_tensor_parallel_world_size() is_expert_optimizer = any(not getattr(p, 'allreduce', True) for p in self.get_parameters()) - needs_eps_reduce = is_expert_optimizer and eps_world_size > 1 + needs_egtp_reduce = is_expert_optimizer and egtp_world_size > 1 - if not needs_eps_reduce: + if not needs_egtp_reduce: # Dense/GTP optimizer: grad_stats_group already covers GTP. return get_grad_norm_fp32( non_gtp_grads + gtp_grads, grad_stats_parallel_group=grad_stats_group diff --git a/megatron/core/process_groups_config.py b/megatron/core/process_groups_config.py index 5148bd1bffe..86a8967f54a 100644 --- a/megatron/core/process_groups_config.py +++ b/megatron/core/process_groups_config.py @@ -449,14 +449,14 @@ def setup_process_groups_for_optimizer( expt_tp_pp_group = pg_collection.tp_ep_pp # 6. GTP with_gtp group (fallback to intra_dp_cp if not provided) - if hasattr(pg_collection, 'intra_dp_cp_with_ps'): - intra_dp_cp_with_gtp_group = pg_collection.intra_dp_cp_with_ps + if hasattr(pg_collection, 'intra_dp_cp_with_gtp'): + intra_dp_cp_with_gtp_group = pg_collection.intra_dp_cp_with_gtp else: intra_dp_cp_with_gtp_group = intra_dp_cp_group # 7. EGTP group (fallback to intra_expt_dp if not provided) - if hasattr(pg_collection, 'intra_expt_dp_with_eps'): - intra_expt_dp_with_egtp_group = pg_collection.intra_expt_dp_with_eps + if hasattr(pg_collection, 'intra_expt_dp_with_egtp'): + intra_expt_dp_with_egtp_group = pg_collection.intra_expt_dp_with_egtp else: intra_expt_dp_with_egtp_group = intra_expt_dp_group @@ -620,8 +620,8 @@ def setup_process_groups_for_ddp( result['ep_group'] = pg_collection.ep # EGTP group (fallback to intra_expt_dp if not provided) - if hasattr(pg_collection, 'intra_expt_dp_with_eps'): - result['intra_expt_dp_with_egtp_group'] = pg_collection.intra_expt_dp_with_eps + if hasattr(pg_collection, 'intra_expt_dp_with_egtp'): + result['intra_expt_dp_with_egtp_group'] = pg_collection.intra_expt_dp_with_egtp else: result['intra_expt_dp_with_egtp_group'] = result['intra_expt_dp_group'] diff --git a/megatron/experimental/gtp/README.md b/megatron/experimental/gtp/README.md index 0b80d50082b..ff28f27e5de 100644 --- a/megatron/experimental/gtp/README.md +++ b/megatron/experimental/gtp/README.md @@ -138,13 +138,11 @@ At bwd step *i* the step is launching *RS of wgrad_i* while finalizing the *prev Communication never blocks compute except at the very first layer of each direction (cold start) and at enforced serialization points (CG/eager drains, finalize-grads barrier). -### 7a. wgrad-before-dgrad schedule +### 7a. wgrad-before-dgrad schedule *(deferred to a follow-up MR)* -Default: backward runs dgrad GEMM, then wgrad GEMM, then issues the GTP wgrad RS — the RS overlaps with the *next* layer's bwd GEMMs (the one-step deferral above). +Current behavior: backward always runs dgrad GEMM, then wgrad GEMM, then issues the GTP wgrad RS — the RS overlaps with the *next* layer's bwd GEMMs (the one-step deferral above). -Opt-in via `GTPConfig.wgrad_before_dgrad = True`: backward runs wgrad GEMM first, then issues the GTP wgrad RS, then runs dgrad GEMM — the RS NCCL overlaps with the dgrad GEMM of the **same** layer, and the prev_w AG prefetch issued at the top of bwd overlaps with the wgrad GEMM. Only affects `_Linear` and `_LayerNormLinear`; `LayerNormMLP` and `GroupedLinear` keep the original schedule. - -When to enable it: GTP + no-TP. The TP comm-overlap path assumes the original dgrad-first order, so under TP > 1 the flag stays False. Megatron auto-sets it for the GTP+no-TP case in `validate_args`. +A future MR will add an opt-in wgrad-before-dgrad schedule on `_Linear` / `_LayerNormLinear` so the GTP wgrad RS NCCL overlaps with the dgrad GEMM of the **same** layer (best for the GTP + no-TP case). Until that MR lands, attempting to set `GTPConfig.wgrad_before_dgrad = True` raises `NotImplementedError`. ## 8. Scaling @@ -199,8 +197,8 @@ At iter-0 you'll see one rank-0 log line confirming the active config: ``` GTP enabled. GTPConfig(pad_for_alignment=16, check_param_states=False, - weight_prefetch=True, async_reduction=True, wgrad_before_dgrad=True, - fp8_param_gather=False, coalesce_amax_allreduce=True) + weight_prefetch=True, async_reduction=True, wgrad_before_dgrad=False, + fp8_param_gather=False, coalesce_amax_allreduce=False) ``` ### What the flags do under the hood @@ -220,9 +218,9 @@ update_gtp_config( pad_for_alignment=16, # NVFP4: 16, MXFP8: 32, BF16: any; auto-set in training.py weight_prefetch=True, # Disable to debug the cold-start path async_reduction=True, # Wheter perform GTP gradient reduction asynchronously - wgrad_before_dgrad=False, # Auto-set True for GTP+no-TP + # wgrad_before_dgrad: deferred — setting True currently raises NotImplementedError fp8_param_gather=False, # Companion to Megatron's --fp8-param-gather; currently asserted off - coalesce_amax_allreduce=True, # NVFP4 only; falls back if TE lacks compute_amax_nvfp4 + # coalesce_amax_allreduce: deferred — setting True logs an info and falls back to per-weight ) ``` diff --git a/megatron/experimental/gtp/generalized_tensor_parallelism.py b/megatron/experimental/gtp/generalized_tensor_parallelism.py index a15ad88f300..fb47f1eb280 100644 --- a/megatron/experimental/gtp/generalized_tensor_parallelism.py +++ b/megatron/experimental/gtp/generalized_tensor_parallelism.py @@ -252,87 +252,12 @@ def get_rs_streams_for_chain(chain_id: str) -> list: return [s for k, s in _RS_STREAMS.items() if k[0] == chain_id] -# Cached once per process: whether the TE build exposes the split-phase APIs. -_COALESCED_AMAX_TE_APIS_AVAILABLE = hasattr(tex, "compute_amax_nvfp4") and hasattr( - tex, "quantize_cast_only_nvfp4" -) - -# Tier-2: multi-tensor amax kernel fuses N per-expert (zero_amax + amax + D2D) chains -# into two multi-tensor kernel launches. Independent of Tier-1 coalesced allreduce. -_MULTI_AMAX_TE_API_AVAILABLE = hasattr(tex, "compute_multi_amax_nvfp4") - - -def _coalesced_amax_static_eligible(weights): - """Check whether the coalesced-amax path is applicable (NVFP4 only). - - Caller already gates on GTP_CONFIG.coalesce_amax_allreduce (False for - non-NVFP4). Here we additionally verify TE API availability, batch size, - quantizer type (must have amax reduction), and the RHT flag.""" - if not _COALESCED_AMAX_TE_APIS_AVAILABLE: - return False - if len(weights) <= 1: - return False - has_amax = [getattr(w._quantizer, "with_amax_reduction", False) for w in weights] - if not all(has_amax): - return False - has_rht = any(getattr(w._quantizer, "with_rht", False) for w in weights) - if has_rht: - return False - return True - - -def _quantize_with_coalesced_amax(weights, cast_noop_flag): - """Replace the per-weight (compute_amax + allreduce + cast) loop with: - compute_amax loop → one coalesced allreduce → cast loop. - - The caller has already gated on ``skip_weight_cast`` (see - ``_all_gather_weight``); inside this function we always do the work. - """ - group = weights[0]._quantizer.amax_reduction_group - - # Materialize padded shards once; on padded last-rank get_padded_shard() - # launches an F.pad kernel, and we'd otherwise pay it twice per expert. - padded_shards = [w.get_padded_shard() for w in weights] - - # Phase 1: per-weight local amax into each w.quantized's amax buffers. - # Keep rowwise/columnwise both populated so the group allreduce sees - # whichever the consumer GEMM will read. - for w in weights: - w._quantizer.set_usage(rowwise=True, columnwise=True) - if _MULTI_AMAX_TE_API_AVAILABLE: - # Tier-2: single multi-tensor launch writes both rowwise and columnwise - # amax directly (no per-expert D2D replicate), fusing N per-expert chains. - # Reuse the _cached_quantizers list already populated by _all_gather_weight - anchor = weights[0] - quantizer_list = anchor._cached_quantizers - if quantizer_list is None: - quantizer_list = [w._quantizer for w in weights] - anchor._cached_quantizers = quantizer_list - tex.compute_multi_amax_nvfp4(padded_shards, quantizer_list, [w.quantized for w in weights]) - else: - for w, shard in zip(weights, padded_shards): - tex.compute_amax_nvfp4(tensor=shard, quantizer=w._quantizer, output=w.quantized) - - # Phase 2: one coalesced allreduce across every weight's amax tensors. - amax_tensors = [] - for w in weights: - rw = w.quantized._amax_rowwise - cw = w.quantized._amax_columnwise - if rw is not None: - amax_tensors.append(rw) - if cw is not None and (rw is None or cw.data_ptr() != rw.data_ptr()): - amax_tensors.append(cw) - torch.distributed.all_reduce_coalesced( - amax_tensors, op=torch.distributed.ReduceOp.MAX, group=group - ) - - # Phase 3: per-weight cast using the pre-reduced amax; skips the internal - # allreduce inside the quantizer. - for w, shard in zip(weights, padded_shards): - tex.quantize_cast_only_nvfp4( - tensor=shard, quantizer=w._quantizer, output=w.quantized, noop=cast_noop_flag - ) - w.did_cast_to_low_precision = True +# NOTE: Coalesced amax reduction across the GTP group is deferred to a follow-up +# MR. The TE-side split-phase APIs (`compute_amax_nvfp4`, `quantize_cast_only_nvfp4`, +# `compute_multi_amax_nvfp4`) and the Mcore-side `_quantize_with_coalesced_amax` +# helper have been removed. The `GTPConfig.coalesce_amax_allreduce` knob is kept +# as a stub: setting it to True logs an info message and falls back to the +# per-weight quantize path inside `_all_gather_weight`. @dataclass @@ -348,25 +273,21 @@ class GTPConfig: # overlap. When False, every wgrad RS is synchronous and finalizes # inline, at the cost of that overlap. async_reduction: bool = True - # When True, _Linear.backward and _LayerNormLinear.backward run wgrad - # GEMM before dgrad GEMM. The GTP wgrad reduce-scatter is issued between - # them so its NCCL kernel overlaps with the dgrad GEMM, and the prev_w - # AG prefetch (issued by all_gather_and_prefetch_bwd at the top of bwd) - # overlaps with wgrad GEMM. When False (default), use the original - # dgrad-first order. Only affects _Linear and _LayerNormLinear; MLP and - # GroupedLinear keep the original schedule. + # Stub field, reserved for a follow-up MR that will land the wgrad-before-dgrad + # schedule on the TE side (_Linear / _LayerNormLinear backward run wgrad GEMM + # before dgrad GEMM, so the GTP wgrad reduce-scatter overlaps with dgrad GEMM). + # Setting this to True via update_config() currently raises NotImplementedError. wgrad_before_dgrad: bool = False # GTP companion to Megatron --fp8-param-gather: optimizer casts FP32 master # directly into GTPShardedParam.quantized; forward's _quantize_if_needed # short-circuits to the cached FP8. Moves BF16->FP8 off the fwd critical path. fp8_param_gather: bool = False - # When True and the weight list in _all_gather_weight contains >1 NVFP4 - # shards that share an amax reduction group, coalesce their per-expert - # amax allreduces into a single NCCL call. Requires TE with - # tex.compute_amax_nvfp4 / tex.quantize_cast_only_nvfp4; the eligibility - # guard in _coalesced_amax_static_eligible falls back to the per-weight - # path when either binding is missing. - coalesce_amax_allreduce: bool = True + # Stub field, reserved for a follow-up MR that will re-land the coalesced + # NVFP4 amax allreduce across the GTP group (single NCCL call across all + # batched per-expert amax tensors, plus the TE split-phase compute_amax / + # quantize_cast primitives). Setting this to True via update_config() + # currently logs an info message and falls back to the per-weight path. + coalesce_amax_allreduce: bool = False GTP_CONFIG = GTPConfig() @@ -374,6 +295,17 @@ class GTPConfig: def update_config(**kwargs): """Update the global GTP configuration.""" + if kwargs.get("wgrad_before_dgrad"): + raise NotImplementedError("Wgrad->Dgrad schedule to be supported later") + if kwargs.get("coalesce_amax_allreduce"): + import warnings + warnings.warn( + "GTPConfig.coalesce_amax_allreduce: coalesced amax reduction across the " + "GTP group is deferred in a followup MR; falling back to per-weight amax " + "allreduce.", + stacklevel=2, + ) + kwargs["coalesce_amax_allreduce"] = False for key, value in kwargs.items(): if not hasattr(GTP_CONFIG, key): raise ValueError(f"Unknown GTP config option: {key}") @@ -453,8 +385,6 @@ def wrap_module_params_gtp(module, weight_names, gtp_group, is_grouped=None): 2. TE modules: per-param body no-ops because the reset_parameters hook already produced GTPShardedParam instances. - Also stamps GTP_CONFIG.wgrad_before_dgrad onto the module so TE's - autograd backward can read it without importing GTP_CONFIG. """ if gtp_group.size() == 1: return @@ -480,11 +410,6 @@ def wrap_module_params_gtp(module, weight_names, gtp_group, is_grouped=None): allweights = [getattr(module, name) for name in weight_names] allweights[0].weight_list = allweights - # Stamp scheduling flag onto the TE module so its autograd functions can - # read it without naming GTP_CONFIG. Default is False on the TE side; we - # only override when GTP is actually active for this module. - module.wgrad_before_dgrad = GTP_CONFIG.wgrad_before_dgrad - def gtp_slice_in_reset_parameters(module, name, param, expert_idx=0): """Slice + attach attrs for one param. Called between init_fn(param) and @@ -857,28 +782,13 @@ def _all_gather_weight(self, async_op, skip_weight_cast, cast_noop_flag, fwd, nv w._set_state(new_state) # 2. Prepare: quantize, set usage direction. - # Static eligibility (quantizer class, flags, amax group) is fixed - # after model construction — compute once and cache on self so the - # hot path only pays the cheap per-call skip_weight_cast check. - if GTP_CONFIG.coalesce_amax_allreduce: - static_ok = getattr(self, "_coalesced_amax_static", None) - if static_ok is None: - static_ok = _coalesced_amax_static_eligible(weights) - self._coalesced_amax_static = static_ok - # Per-call: match the skip_weight_cast gate in _quantize_if_needed - # (fire when either skip_weight_cast is False or cast_noop_flag - # was provided by the FP8/NVFP4 recipe). - use_coalesced = static_ok and not (skip_weight_cast is True and cast_noop_flag is None) - else: - use_coalesced = False - - # Quantize step: coalesced batch / fp8_param_gather cache hit (skip) / - # legacy per-weight. set_usage runs uniformly after, gated by did_cast. + # NOTE: The coalesced amax allreduce path (gated by + # GTPConfig.coalesce_amax_allreduce) is deferred to a follow-up MR; + # always use the per-weight quantize path here. update_config() logs + # an info message when a caller tries to enable the deferred knob. fp8_pg_hit = GTP_CONFIG.fp8_param_gather and self.did_cast_to_low_precision - if use_coalesced: - _quantize_with_coalesced_amax(weights, cast_noop_flag) - elif not fp8_pg_hit: + if not fp8_pg_hit: for w in weights: w._quantize_if_needed(skip_weight_cast, cast_noop_flag) diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py index 2f0da6c9e49..7e7de73a06a 100644 --- a/megatron/training/arguments.py +++ b/megatron/training/arguments.py @@ -1462,21 +1462,13 @@ def validate_args(args, defaults={}): "force setting NCCL_PROTO=Simple might introduce bad perf." ) - # When GTP is enabled and TP is disabled, default the bwd schedule to - # wgrad-before-dgrad on _Linear / _LayerNormLinear. The GTP wgrad - # reduce-scatter then overlaps with the dgrad GEMM, and the prev_w - # AG prefetch overlaps with the wgrad GEMM. With TP enabled, the - # TP comm-overlap path assumes dgrad-first, so leave the default - # order untouched there. + # Under GTP+no-TP the optimal bwd schedule would be wgrad-before-dgrad + # on _Linear / _LayerNormLinear (so the GTP wgrad reduce-scatter + # overlaps with the dgrad GEMM and the prev_w AG prefetch overlaps + # with the wgrad GEMM). TE-side support for this ordering is deferred + # to a follow-up MR; until then, flag any attempt to activate it. if args.tensor_model_parallel_size == 1: - from megatron.experimental.gtp import HAVE_GTP, update_gtp_config - if HAVE_GTP: - update_gtp_config(wgrad_before_dgrad=True) - warn_rank_0( - "GTP+no-TP detected: setting " - "GTPConfig.wgrad_before_dgrad=True (wgrad GEMM runs before " - "dgrad GEMM so RS NCCL overlaps with dgrad)." - ) + raise NotImplementedError("Wgrad->Dgrad schedule to be supported later") # Propagate --fp8-param-gather into GTPConfig: enables optimizer-side # FP32->FP8 cast for GTP shards, so the forward skips BF16->FP8. diff --git a/megatron/training/global_vars.py b/megatron/training/global_vars.py index d23ca169242..ec0bc532f59 100644 --- a/megatron/training/global_vars.py +++ b/megatron/training/global_vars.py @@ -255,8 +255,6 @@ def _set_wandb_writer(args): wandb_kwargs['entity'] = args.wandb_entity os.makedirs(wandb_kwargs['dir'], exist_ok=True) wandb.init(**wandb_kwargs) - # Log all env vars (as a dictionary) in config.yaml - wandb.config.update({"env_vars": dict(os.environ)}) _GLOBAL_WANDB_WRITER = wandb diff --git a/megatron/training/utils/common_utils.py b/megatron/training/utils/common_utils.py index 5dc7a677b53..28c9b0146e6 100644 --- a/megatron/training/utils/common_utils.py +++ b/megatron/training/utils/common_utils.py @@ -105,11 +105,11 @@ def calc_params_l2_norm(model, force_create_fp32_copy=False): params_data = [] # Dense, non-sharded sharded_params_data = [] # Dense, sharded → reduce over dp_cp gtp_params_data = [] # GTP, non-sharded - gtp_sharded_params_data = [] # GTP, sharded → reduce over dp_cp_with_ps + gtp_sharded_params_data = [] # GTP, sharded → reduce over dp_cp_with_gtp moe_params_data = [] # MoE, non-sharded moe_sharded_params_data = [] # MoE, sharded → reduce over expert_dp moe_gtp_params_data = [] # MoE-GTP, non-sharded - moe_gtp_sharded_params_data = [] # MoE-GTP, sharded → reduce over expert_dp_with_ps + moe_gtp_sharded_params_data = [] # MoE-GTP, sharded → reduce over expert_dp_with_gtp gtp_rank = mpu.get_generalized_tensor_parallel_rank() egtp_rank = mpu.get_expert_generalized_tensor_parallel_rank() From 19035985cee1cb381246adf89d20ef611a6fe1aa Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Tue, 26 May 2026 07:16:20 -0700 Subject: [PATCH 04/59] fix comments Signed-off-by: Shiqing Fan --- megatron/core/parallel_state.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/megatron/core/parallel_state.py b/megatron/core/parallel_state.py index ca13bfa4b8c..d0ddd2f850b 100644 --- a/megatron/core/parallel_state.py +++ b/megatron/core/parallel_state.py @@ -1006,8 +1006,8 @@ def initialize_model_parallel( rank_to_gtp_rank = {} for cp_dp_ranks in decoder_rank_generator.get_ranks('cp-dp'): for i in range(0, len(cp_dp_ranks), generalized_tensor_parallel_size): - ps_chunk = cp_dp_ranks[i : i + generalized_tensor_parallel_size] - for gtp_rank_idx, r in enumerate(ps_chunk): + gtp_chunk = cp_dp_ranks[i : i + generalized_tensor_parallel_size] + for gtp_rank_idx, r in enumerate(gtp_chunk): rank_to_gtp_rank[r] = gtp_rank_idx # DP-only with GTP: create one group per (dp_group, gtp_rank) pair. @@ -1103,8 +1103,8 @@ def initialize_model_parallel( gtp_ranks = cp_dp_ranks[i : i + generalized_tensor_parallel_size] # Merge tp-pp groups of all GTP peers mp_ranks = [] - for ps_r in gtp_ranks: - mp_ranks.extend(rank_to_tp_pp[ps_r]) + for gtp_r in gtp_ranks: + mp_ranks.extend(rank_to_tp_pp[gtp_r]) mp_ranks = sorted(set(mp_ranks)) mp_key = tuple(mp_ranks) if mp_key not in model_parallel_groups_set: @@ -1458,8 +1458,8 @@ def initialize_model_parallel( rank_to_expert_gtp_rank = {} for dp_ranks in expert_decoder_rank_generator.get_ranks('dp'): for i in range(0, len(dp_ranks), expert_generalized_tensor_parallel_size): - eps_chunk = dp_ranks[i : i + expert_generalized_tensor_parallel_size] - for egtp_rank_idx, r in enumerate(eps_chunk): + egtp_chunk = dp_ranks[i : i + expert_generalized_tensor_parallel_size] + for egtp_rank_idx, r in enumerate(egtp_chunk): rank_to_expert_gtp_rank[r] = egtp_rank_idx # Create one group per (expert_dp_group, expert_gtp_rank) pair (collective). @@ -2384,9 +2384,15 @@ def destroy_model_parallel(): global _DATA_PARALLEL_GROUP _DATA_PARALLEL_GROUP = None + global _DATA_PARALLEL_GROUP_WITH_GTP + _DATA_PARALLEL_GROUP_WITH_GTP = None + global _DATA_PARALLEL_GROUP_WITH_CP _DATA_PARALLEL_GROUP_WITH_CP = None + global _DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP + _DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP = None + global _CONTEXT_PARALLEL_GROUP _CONTEXT_PARALLEL_GROUP = None @@ -2486,6 +2492,9 @@ def destroy_model_parallel(): global _EXPERT_DATA_PARALLEL_GROUP _EXPERT_DATA_PARALLEL_GROUP = None + global _EXPERT_DATA_PARALLEL_GROUP_WITH_GTP + _EXPERT_DATA_PARALLEL_GROUP_WITH_GTP = None + global _EXPERT_DATA_PARALLEL_GROUP_GLOO if ( _EXPERT_DATA_PARALLEL_GROUP_GLOO is not None From 9eb50071743f88da20e2ea6259d97aa87a285761 Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Tue, 26 May 2026 21:12:17 -0700 Subject: [PATCH 05/59] code clean. Signed-off-by: Shiqing Fan --- .../distributed/distributed_data_parallel.py | 8 +- .../core/distributed/finalize_model_grads.py | 12 ++- .../core/extensions/transformer_engine.py | 7 +- megatron/core/optimizer/__init__.py | 4 +- megatron/core/optimizer/clip_grads.py | 44 +++++------ megatron/core/optimizer/distrib_optimizer.py | 2 +- megatron/core/optimizer/optimizer.py | 8 +- megatron/core/tensor_parallel/layers.py | 17 +--- megatron/core/transformer/cuda_graphs.py | 10 +-- megatron/experimental/gtp/__init__.py | 79 ++++++------------- .../gtp/generalized_tensor_parallelism.py | 8 +- megatron/training/arguments.py | 17 ++-- megatron/training/training.py | 30 ++++--- megatron/training/utils/common_utils.py | 4 +- 14 files changed, 94 insertions(+), 156 deletions(-) diff --git a/megatron/core/distributed/distributed_data_parallel.py b/megatron/core/distributed/distributed_data_parallel.py index 2bdb306861a..4120c1a923b 100644 --- a/megatron/core/distributed/distributed_data_parallel.py +++ b/megatron/core/distributed/distributed_data_parallel.py @@ -6,7 +6,7 @@ import torch -from megatron.experimental.gtp import HAVE_GTP, GTPShardedParam +from megatron.experimental.gtp import GTPShardedParam from ..config_logger import has_config_logger_enabled, log_config_to_disk from ..optimizer.param_layout import FullParamLayout @@ -139,8 +139,7 @@ def __init__( # through to all_params where group_params_for_buffers splits them # via is_expert_parallel. is_dense_gtp = ( - HAVE_GTP - and isinstance(param, GTPShardedParam) + isinstance(param, GTPShardedParam) and getattr(param, 'allreduce', True) ) if is_dense_gtp: @@ -291,7 +290,6 @@ def __init__( else None ) params_with_names = [(p, param_to_name[p]) for p in params] - buffer = _ParamAndGradBuffer( self.ddp_config, buffer_key.param_dtype, @@ -545,7 +543,7 @@ def hook(*unused): # wgrad_reduce_scatter returns None for async RS and writes # the wgrad straight into param.main_grad. Skip the assertion # for GTPShardedParam — otherwise it fires every iter. - if not (HAVE_GTP and isinstance(param, GTPShardedParam)): + if not isinstance(param, GTPShardedParam): assert ( param.grad is not None ), 'param.grad being None is not safe when overlap_grad_reduce is True' diff --git a/megatron/core/distributed/finalize_model_grads.py b/megatron/core/distributed/finalize_model_grads.py index b4b16d7b139..b26eb8a201d 100644 --- a/megatron/core/distributed/finalize_model_grads.py +++ b/megatron/core/distributed/finalize_model_grads.py @@ -500,18 +500,16 @@ def finalize_model_grads( or config.expert_generalized_tensor_parallel_size > 1 ): from megatron.experimental.gtp import ( - HAVE_GTP, get_all_ag_streams, get_all_rs_streams, wait_async_comms, ) - if HAVE_GTP: - wait_async_comms() - for s in get_all_ag_streams(): - torch.cuda.current_stream().wait_stream(s) - for s in get_all_rs_streams(): - torch.cuda.current_stream().wait_stream(s) + wait_async_comms() + for s in get_all_ag_streams(): + torch.cuda.current_stream().wait_stream(s) + for s in get_all_rs_streams(): + torch.cuda.current_stream().wait_stream(s) # Wait for captured bwd Phase 2 (main_grad.add_) on each CG runner's # stream. bwd_completion_event only covers Phase 1; Phase 2 runs after diff --git a/megatron/core/extensions/transformer_engine.py b/megatron/core/extensions/transformer_engine.py index 7a986870a5a..95539352a57 100644 --- a/megatron/core/extensions/transformer_engine.py +++ b/megatron/core/extensions/transformer_engine.py @@ -62,7 +62,6 @@ is_te_min_version, is_torch_min_version, ) -from megatron.experimental.gtp import HAVE_GTP try: import transformer_engine as te @@ -894,7 +893,7 @@ def __init__( init_quant_context = _get_fp8_model_init_for_quant_params( self.te_quant_params, torch.is_grad_enabled() ) - if HAVE_GTP: + if is_te_min_version("2.15.0"): self.gtp_size = get_pg_size(gtp_group) if gtp_group is not None else 1 extra_kwargs["gtp_group"] = gtp_group if torch.distributed.is_initialized() else None with init_quant_context: @@ -1104,7 +1103,7 @@ def __init__( ), "Must have at least TE version 2.3 or higher to use symmetric memory all reduce" extra_kwargs["symmetric_ar_type"] = self.config.symmetric_ar_type - if HAVE_GTP: + if is_te_min_version("2.15.0"): self.gtp_size = get_pg_size(gtp_group) if gtp_group is not None else 1 extra_kwargs["gtp_group"] = gtp_group if torch.distributed.is_initialized() else None @@ -1878,7 +1877,7 @@ def __init__( tp_size = 1 tp_group_for_te = None - if HAVE_GTP: + if is_te_min_version("2.15.0"): self.gtp_size = get_pg_size(gtp_group) if gtp_group is not None else 1 extra_kwargs["gtp_group"] = ( gtp_group if torch.distributed.is_initialized() else None diff --git a/megatron/core/optimizer/__init__.py b/megatron/core/optimizer/__init__.py index 269496ba039..3bd4a4de60d 100644 --- a/megatron/core/optimizer/__init__.py +++ b/megatron/core/optimizer/__init__.py @@ -56,7 +56,7 @@ ) from megatron.core.process_groups_config import ProcessGroupCollection from megatron.core.transformer.fsdp_dtensor_checkpoint import get_global_unique_param_name -from megatron.experimental.gtp import HAVE_GTP, GTPShardedParam +from megatron.experimental.gtp import GTPShardedParam from ..distributed.param_and_grad_buffer import _ParamAndGradBuffer from ..transformer.module import MegatronModule @@ -347,7 +347,7 @@ def _get_param_groups( param_override = None is_expert_parallel = not getattr(param, 'allreduce', True) - is_gtp = HAVE_GTP and isinstance(param, GTPShardedParam) + is_gtp = isinstance(param, GTPShardedParam) # Create config_tuple that is hash-able, and has a consistent ordering of the keys. param_override_tuple: tuple[tuple[str, Any], ...] | None = ( diff --git a/megatron/core/optimizer/clip_grads.py b/megatron/core/optimizer/clip_grads.py index ce33cacac17..d603436e38d 100644 --- a/megatron/core/optimizer/clip_grads.py +++ b/megatron/core/optimizer/clip_grads.py @@ -47,7 +47,7 @@ multi_tensor_scale_tensor_impl = None -from megatron.experimental.gtp import HAVE_GTP, GTPShardedParam +from megatron.experimental.gtp import GTPShardedParam from .. import parallel_state from ..tensor_parallel import param_is_not_tensor_parallel_duplicate @@ -62,18 +62,18 @@ def get_grad_norm_fp32( ) -> float: """Calculate the p-norm of gradients in FP32 precision. - This function is adapted from `torch.nn.utils.clip_grad.clip_grad_norm_` - and extends it with functionality to handle model-parallel parameters. - It ensures that the norm is correctly computed and reduced across - the specified process group (typically the model-parallel group for + This function is adapted from `torch.nn.utils.clip_grad.clip_grad_norm_` + and extends it with functionality to handle model-parallel parameters. + It ensures that the norm is correctly computed and reduced across + the specified process group (typically the model-parallel group for non-distributed optimizers or the entire world for distributed optimizers). Args: - grads_for_norm (Union[List[torch.Tensor], torch.Tensor]): An iterable + grads_for_norm (Union[List[torch.Tensor], torch.Tensor]): An iterable of Tensors or a single Tensor used to calculate the gradient norm. - norm_type (Union[int, float]): The type of the p-norm to use. Can be + norm_type (Union[int, float]): The type of the p-norm to use. Can be 'inf' for infinity norm. Defaults to 2. - grad_stats_parallel_group (ProcessGroup, optional): The process group + grad_stats_parallel_group (ProcessGroup, optional): The process group used for reducing gradient statistics (e.g., norms and zero counts). Returns: @@ -158,13 +158,13 @@ def clip_grad_by_total_norm_fp32( Note that the gradients are modified in-place. Args: - parameters (Union[List[torch.Tensor], torch.Tensor]): An iterable of + parameters (Union[List[torch.Tensor], torch.Tensor]): An iterable of Tensors or a single Tensor that will have gradients normalized. - max_norm (Union[int, float]): The maximum permissible total norm + max_norm (Union[int, float]): The maximum permissible total norm of the gradients. total_norm (float): The current total norm of the gradients. - use_decoupled_grad (bool, optional): Whether to read from the - '.decoupled_grad' attribute instead of the standard '.grad'. + use_decoupled_grad (bool, optional): Whether to read from the + '.decoupled_grad' attribute instead of the standard '.grad'. Defaults to False. """ # Grads. @@ -208,19 +208,19 @@ def count_zeros_fp32( ) -> float: """Counts the number of zero values in the gradients of the given parameters. - The count is performed in FP32. This method filters parameters to ensure - gradients are not double-counted by checking if the gradient is not None, - the parameter is not shared, and the parameter is not a replica due - to tensor model parallelism. It also handles parameters managed by + The count is performed in FP32. This method filters parameters to ensure + gradients are not double-counted by checking if the gradient is not None, + the parameter is not shared, and the parameter is not a replica due + to tensor model parallelism. It also handles parameters managed by Megatron FSDP specifically. Args: - parameters (Union[List[torch.Tensor], torch.Tensor]): An iterable of + parameters (Union[List[torch.Tensor], torch.Tensor]): An iterable of Tensors or a single Tensor whose gradients will be checked for zeros. - grad_stats_parallel_group (ProcessGroup): The process group used for + grad_stats_parallel_group (ProcessGroup): The process group used for reducing the zero count across distributed ranks. - use_decoupled_grad (bool, optional): If True, reads from the - '.decoupled_grad' attribute instead of the standard '.grad'. + use_decoupled_grad (bool, optional): If True, reads from the + '.decoupled_grad' attribute instead of the standard '.grad'. Defaults to False. Returns: @@ -255,9 +255,7 @@ def count_zeros_fp32( if use_distributed_optimizer: is_not_gtp_duplicate = True else: - is_gtp_param = getattr(param, 'is_gtp', False) or ( - HAVE_GTP and isinstance(param, GTPShardedParam) - ) + is_gtp_param = getattr(param, 'is_gtp', False) or isinstance(param, GTPShardedParam) is_not_gtp_duplicate = is_gtp_param or gtp_rank == 0 if grad_not_none and is_not_shared and is_not_tp_duplicate and is_not_gtp_duplicate: grad_obj = getattr(param, grad_attr) diff --git a/megatron/core/optimizer/distrib_optimizer.py b/megatron/core/optimizer/distrib_optimizer.py index 1f318a6e67d..b388161a610 100644 --- a/megatron/core/optimizer/distrib_optimizer.py +++ b/megatron/core/optimizer/distrib_optimizer.py @@ -3041,4 +3041,4 @@ def step_with_ready_grads(self) -> bool: if timers is not None and (self.ddp_config.use_megatron_fsdp or should_sync_params): timers('params-all-gather').stop() - return update_successful \ No newline at end of file + return update_successful diff --git a/megatron/core/optimizer/optimizer.py b/megatron/core/optimizer/optimizer.py index 7cbdd731156..0f13b6e4028 100644 --- a/megatron/core/optimizer/optimizer.py +++ b/megatron/core/optimizer/optimizer.py @@ -37,7 +37,7 @@ multi_tensor_applier = local_multi_tensor_applier multi_tensor_scale_impl = local_multi_tensor_scale -from megatron.experimental.gtp import HAVE_GTP, GTPShardedParam +from megatron.experimental.gtp import GTPShardedParam from .. import parallel_state, tensor_parallel from ..config_logger import has_config_logger_enabled, log_config_to_disk @@ -189,9 +189,7 @@ def get_main_grads_for_grad_norm_split(self) -> Tuple[List[torch.Tensor], List[t grad_not_none = grad is not None is_not_shared = param_is_not_shared(param) - is_gtp_param = getattr(param, 'is_gtp', False) or ( - HAVE_GTP and isinstance(param, GTPShardedParam) - ) + is_gtp_param = getattr(param, 'is_gtp', False) or isinstance(param, GTPShardedParam) # GTP params are always unique across TP ranks (tensor_model_parallel # attribute is lost during wrap_gtp_sharded_tensor), so skip TP filter. @@ -774,7 +772,7 @@ def __init__( float16_params_this_group.append(param) # Create a copy main_param = param.detach().clone().float() - main_param.is_gtp = HAVE_GTP and isinstance(param, GTPShardedParam) + main_param.is_gtp = isinstance(param, GTPShardedParam) # Copy tensor model parallel attributes. tensor_parallel.copy_tensor_model_parallel_attributes(main_param, param) diff --git a/megatron/core/tensor_parallel/layers.py b/megatron/core/tensor_parallel/layers.py index 91524ce1102..e5296d67e8b 100644 --- a/megatron/core/tensor_parallel/layers.py +++ b/megatron/core/tensor_parallel/layers.py @@ -29,7 +29,7 @@ make_tp_sharded_tensor_for_checkpoint, prepare_input_tensors_for_wgrad_compute, ) -from megatron.experimental.gtp import HAVE_GTP, GTPEmbeddingWeight, wrap_module_params_gtp +from megatron.experimental.gtp import GTPEmbeddingWeight, wrap_module_params_gtp from ..dist_checkpointing.mapping import ShardedStateDict from ..transformer.utils import make_sharded_tensors_for_checkpoint @@ -284,11 +284,6 @@ def __init__( self.gtp_size = 1 if gtp_group is not None and gtp_group.size() > 1: - assert HAVE_GTP, ( - "generalized_tensor_parallel_size > 1 requires megatron.experimental.gtp to import " - "successfully (it pulls in low-precision tensor primitives from " - "transformer_engine)." - ) wrap_module_params_gtp(self, ["weight"], gtp_group) self.gtp_size = gtp_group.size() # Nothing prefetches embedding — it is head of the UNGRAPHED @@ -969,11 +964,6 @@ def __init__( self.gtp_size = 1 if gtp_group is not None and gtp_group.size() > 1: - assert HAVE_GTP, ( - "generalized_tensor_parallel_size > 1 requires megatron.experimental.gtp to import " - "successfully (it pulls in low-precision tensor primitives from " - "transformer_engine)." - ) wrap_module_params_gtp(self, ["weight"], gtp_group) self.gtp_size = gtp_group.size() @@ -1329,11 +1319,6 @@ def __init__( self.gtp_size = 1 if gtp_group is not None and gtp_group.size() > 1: - assert HAVE_GTP, ( - "generalized_tensor_parallel_size > 1 requires megatron.experimental.gtp to import " - "successfully (it pulls in low-precision tensor primitives from " - "transformer_engine)." - ) wrap_module_params_gtp(self, ["weight"], gtp_group) self.gtp_size = gtp_group.size() diff --git a/megatron/core/transformer/cuda_graphs.py b/megatron/core/transformer/cuda_graphs.py index 806c8c6c446..f59768ae07d 100644 --- a/megatron/core/transformer/cuda_graphs.py +++ b/megatron/core/transformer/cuda_graphs.py @@ -60,7 +60,6 @@ from megatron.experimental.gtp import ( GTP_CONFIG, - HAVE_GTP, GTPChain, GTPShardedParam, get_ag_stream, @@ -441,11 +440,6 @@ def create_cudagraphs(cls): ) if any(r[0].generalized_tensor_parallel for r in cls.cudagraph_record): - assert HAVE_GTP, ( - "generalized_tensor_parallel_size > 1 requires megatron.experimental.gtp to import " - "successfully (it pulls in low-precision tensor primitives from " - "transformer_engine)." - ) reallocate_gtp_cache_to_mempool( torch.cuda.current_device(), CudaGraphManager.global_mempool ) @@ -946,8 +940,6 @@ def _compute_finalized_during_bwd_capture(self): wait was captured in the producer's Phase 2, but the add lives here regardless, bridged by external rs_event.) """ - if not HAVE_GTP or GTPShardedParam is None: - return [] finalized = {} # id → param for p in self.params_to_backprop: if not isinstance(p, GTPShardedParam): @@ -1270,7 +1262,7 @@ def create_bwd_graph(self): # cascade and wait_async_comms to split the captured RS wait/add across # producer and consumer graphs (avoids cross-capture cudaStreamWaitEvent # on c10d Work.postEvent). - if self.generalized_tensor_parallel and HAVE_GTP and GTPShardedParam is not None: + if self.generalized_tensor_parallel: pset = {id(p) for p in self.params_to_backprop} for p in self.params_to_backprop: if not isinstance(p, GTPShardedParam): diff --git a/megatron/experimental/gtp/__init__.py b/megatron/experimental/gtp/__init__.py index 340e5b12a86..f220afa7b5c 100644 --- a/megatron/experimental/gtp/__init__.py +++ b/megatron/experimental/gtp/__init__.py @@ -2,68 +2,41 @@ # # See LICENSE for license information. -"""Generalized Tensor Parallelism (GTP) for Megatron Core. +"""Generalized Tensor Parallelism (GTP) public API. -See ``README.md`` in this folder for the design overview. The whole -implementation lives in ``generalized_tensor_parallelism.py``; this -``__init__`` re-exports the public surface and owns the ``HAVE_GTP`` -capability flag (False when the implementation module fails to import, -e.g. when TransformerEngine's low-precision tensor primitives are missing). +GTP shards weight tensors 1/N across a GTP process group along ``out_features`` +and materializes them on-demand via async all-gather. The implementation lives in +``megatron.experimental.gtp.generalized_tensor_parallelism`` and depends on +TransformerEngine's FP8 / MXFP8 / NVFP4 primitives. Importing this package will +raise ``ImportError`` (with a helpful message) if TransformerEngine is missing or +too old. """ -try: - from megatron.experimental.gtp.generalized_tensor_parallelism import ( - GTP_CONFIG, - GTPChain, - GTPEmbeddingWeight, - GTPShardedParam, - classify_gtp_chains, - get_ag_stream, - get_ag_streams_for_chain, - get_all_ag_streams, - get_all_rs_streams, - get_rs_stream, - get_rs_streams_for_chain, - reallocate_gtp_cache_to_mempool, - set_cuda_graph_modules, - tag_gtp_params_with_names, - ) - from megatron.experimental.gtp.generalized_tensor_parallelism import ( - update_config as update_gtp_config, - ) - from megatron.experimental.gtp.generalized_tensor_parallelism import ( - wait_async_comms, - wrap_module_params_gtp, - ) - - HAVE_GTP = True -except ImportError: - GTP_CONFIG = None - GTPChain = None - GTPEmbeddingWeight = None - GTPShardedParam = None - classify_gtp_chains = None - get_ag_stream = None - get_ag_streams_for_chain = None - get_all_ag_streams = None - get_all_rs_streams = None - get_rs_stream = None - get_rs_streams_for_chain = None - reallocate_gtp_cache_to_mempool = None - set_cuda_graph_modules = None - tag_gtp_params_with_names = None - update_gtp_config = None - wait_async_comms = None - wrap_module_params_gtp = None - HAVE_GTP = False - +from megatron.experimental.gtp.generalized_tensor_parallelism import ( + GTP_CONFIG, + GTPChain, + GTPEmbeddingWeight, + GTPShardedParam, + classify_gtp_chains, + get_ag_stream, + get_ag_streams_for_chain, + get_all_ag_streams, + get_all_rs_streams, + get_rs_stream, + get_rs_streams_for_chain, + reallocate_gtp_cache_to_mempool, + set_cuda_graph_modules, + tag_gtp_params_with_names, + update_gtp_config, + wait_async_comms, + wrap_module_params_gtp, +) __all__ = [ "GTP_CONFIG", "GTPChain", "GTPEmbeddingWeight", "GTPShardedParam", - "HAVE_GTP", "classify_gtp_chains", "get_ag_stream", "get_ag_streams_for_chain", diff --git a/megatron/experimental/gtp/generalized_tensor_parallelism.py b/megatron/experimental/gtp/generalized_tensor_parallelism.py index fb47f1eb280..7e0b1b70ba6 100644 --- a/megatron/experimental/gtp/generalized_tensor_parallelism.py +++ b/megatron/experimental/gtp/generalized_tensor_parallelism.py @@ -276,7 +276,7 @@ class GTPConfig: # Stub field, reserved for a follow-up MR that will land the wgrad-before-dgrad # schedule on the TE side (_Linear / _LayerNormLinear backward run wgrad GEMM # before dgrad GEMM, so the GTP wgrad reduce-scatter overlaps with dgrad GEMM). - # Setting this to True via update_config() currently raises NotImplementedError. + # Setting this to True via update_gtp_config() currently raises NotImplementedError. wgrad_before_dgrad: bool = False # GTP companion to Megatron --fp8-param-gather: optimizer casts FP32 master # directly into GTPShardedParam.quantized; forward's _quantize_if_needed @@ -285,7 +285,7 @@ class GTPConfig: # Stub field, reserved for a follow-up MR that will re-land the coalesced # NVFP4 amax allreduce across the GTP group (single NCCL call across all # batched per-expert amax tensors, plus the TE split-phase compute_amax / - # quantize_cast primitives). Setting this to True via update_config() + # quantize_cast primitives). Setting this to True via update_gtp_config() # currently logs an info message and falls back to the per-weight path. coalesce_amax_allreduce: bool = False @@ -293,7 +293,7 @@ class GTPConfig: GTP_CONFIG = GTPConfig() -def update_config(**kwargs): +def update_gtp_config(**kwargs): """Update the global GTP configuration.""" if kwargs.get("wgrad_before_dgrad"): raise NotImplementedError("Wgrad->Dgrad schedule to be supported later") @@ -784,7 +784,7 @@ def _all_gather_weight(self, async_op, skip_weight_cast, cast_noop_flag, fwd, nv # 2. Prepare: quantize, set usage direction. # NOTE: The coalesced amax allreduce path (gated by # GTPConfig.coalesce_amax_allreduce) is deferred to a follow-up MR; - # always use the per-weight quantize path here. update_config() logs + # always use the per-weight quantize path here. update_gtp_config() logs # an info message when a caller tries to enable the deferred knob. fp8_pg_hit = GTP_CONFIG.fp8_param_gather and self.did_cast_to_low_precision diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py index 7e7de73a06a..9fc25abb337 100644 --- a/megatron/training/arguments.py +++ b/megatron/training/arguments.py @@ -1474,15 +1474,14 @@ def validate_args(args, defaults={}): # FP32->FP8 cast for GTP shards, so the forward skips BF16->FP8. if getattr(args, 'fp8_param_gather', False): assert False, 'GTP+fp8-param-gather not supported yet!' - from megatron.experimental.gtp import HAVE_GTP, update_gtp_config - if HAVE_GTP: - update_gtp_config(fp8_param_gather=True) - warn_rank_0( - "GTP + --fp8-param-gather: setting " - "GTPConfig.fp8_param_gather=True (optimizer step " - "pre-quantizes GTP shards, skipping the per-forward " - "BF16->FP8 cast)." - ) + from megatron.experimental.gtp import update_gtp_config + update_gtp_config(fp8_param_gather=True) + warn_rank_0( + "GTP + --fp8-param-gather: setting " + "GTPConfig.fp8_param_gather=True (optimizer step " + "pre-quantizes GTP shards, skipping the per-forward " + "BF16->FP8 cast)." + ) # Disable bias gelu fusion if we are disabling bias altogether if not args.add_bias_linear: diff --git a/megatron/training/training.py b/megatron/training/training.py index b77ddd24580..2d5760a6372 100644 --- a/megatron/training/training.py +++ b/megatron/training/training.py @@ -1652,8 +1652,8 @@ def get_model(model_provider_func, model_type=ModelType.encoder_or_decoder, wrap args.modelopt_enabled = True # Configure GTP padding alignment based on quantization recipe before model construction. - from megatron.experimental.gtp import HAVE_GTP, update_gtp_config - if HAVE_GTP and ( + from megatron.experimental.gtp import update_gtp_config + if ( getattr(args, 'generalized_tensor_parallel_size', 1) > 1 or getattr(args, 'expert_generalized_tensor_parallel_size', 1) > 1 ): @@ -1717,24 +1717,22 @@ def build_model(): # model build, before the first forward (which lazily builds chain links). from megatron.experimental.gtp import ( GTP_CONFIG, - HAVE_GTP, classify_gtp_chains, set_cuda_graph_modules, tag_gtp_params_with_names, ) - if HAVE_GTP: - _raw_modules = getattr(args, 'cuda_graph_modules', None) or [] - _cg_modules = {getattr(s, 'name', str(s)) for s in _raw_modules} if _raw_modules else None - _mse_overlap = getattr(args, 'moe_shared_expert_overlap', False) - set_cuda_graph_modules(_cg_modules, moe_shared_expert_overlap=_mse_overlap) - for model_module in model: - tag_gtp_params_with_names(model_module) - classify_gtp_chains(model_module) - if ( - getattr(args, 'generalized_tensor_parallel_size', 1) > 1 - or getattr(args, 'expert_generalized_tensor_parallel_size', 1) > 1 - ): - print_rank_0(f"GTP enabled. {GTP_CONFIG}") + _raw_modules = getattr(args, 'cuda_graph_modules', None) or [] + _cg_modules = {getattr(s, 'name', str(s)) for s in _raw_modules} if _raw_modules else None + _mse_overlap = getattr(args, 'moe_shared_expert_overlap', False) + set_cuda_graph_modules(_cg_modules, moe_shared_expert_overlap=_mse_overlap) + for model_module in model: + tag_gtp_params_with_names(model_module) + classify_gtp_chains(model_module) + if ( + getattr(args, 'generalized_tensor_parallel_size', 1) > 1 + or getattr(args, 'expert_generalized_tensor_parallel_size', 1) > 1 + ): + print_rank_0(f"GTP enabled. {GTP_CONFIG}") # Set tensor model parallel attributes if not set. # Only parameters that are already tensor model parallel have these diff --git a/megatron/training/utils/common_utils.py b/megatron/training/utils/common_utils.py index 28c9b0146e6..9bd55ff8b2e 100644 --- a/megatron/training/utils/common_utils.py +++ b/megatron/training/utils/common_utils.py @@ -47,7 +47,7 @@ from megatron.core.transformer.module import param_is_not_shared -from megatron.experimental.gtp import GTPShardedParam, HAVE_GTP +from megatron.experimental.gtp import GTPShardedParam def _compute_norm_2(params_list): @@ -116,7 +116,7 @@ def calc_params_l2_norm(model, force_create_fp32_copy=False): for model_chunk in model: for param in model_chunk.parameters(): - is_gtp = HAVE_GTP and isinstance(param, GTPShardedParam) + is_gtp = isinstance(param, GTPShardedParam) # Filter TP duplicates. GTP params are always unique across TP ranks # so skip this check for them. From 019536f2d949c27e550ce47df8d27d7219f0eb36 Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Wed, 27 May 2026 00:15:01 -0700 Subject: [PATCH 06/59] Fix GTP broadcast_params + add partial DP-CP with GTP group Signed-off-by: Shiqing Fan --- .../distributed/distributed_data_parallel.py | 59 +++++++++----- .../core/extensions/transformer_engine.py | 7 +- megatron/core/parallel_state.py | 80 +++++++++++++++++-- megatron/core/process_groups_config.py | 26 ++++++ 4 files changed, 142 insertions(+), 30 deletions(-) diff --git a/megatron/core/distributed/distributed_data_parallel.py b/megatron/core/distributed/distributed_data_parallel.py index 4120c1a923b..ba6c30013de 100644 --- a/megatron/core/distributed/distributed_data_parallel.py +++ b/megatron/core/distributed/distributed_data_parallel.py @@ -86,11 +86,18 @@ def __init__( self.intra_dp_cp_group = process_group_dict['intra_dp_cp_group'] self.expt_dp_group = process_group_dict['expt_dp_group'] self.intra_expt_dp_group = process_group_dict['intra_expt_dp_group'] - # GTP-aware DP subgroups. Fall back to the corresponding non-GTP group when - # the caller didn't configure GTP, so non-GTP runs work unchanged. + # GTP-aware DP subgroups (fall back to non-GTP variants when GTP is off): + # *_with_gtp_group : full cross-instance, GTP peers excluded (broadcast) + # intra_*_with_gtp_group : per-distopt-instance partial, GTP peers excluded (grad RS) + self.dp_cp_with_gtp_group = process_group_dict.get( + 'dp_cp_with_gtp_group', self.dp_cp_group + ) self.intra_dp_cp_with_gtp_group = process_group_dict.get( 'intra_dp_cp_with_gtp_group', self.intra_dp_cp_group ) + self.expt_dp_with_egtp_group = process_group_dict.get( + 'expt_dp_with_egtp_group', self.expt_dp_group + ) self.intra_expt_dp_with_egtp_group = process_group_dict.get( 'intra_expt_dp_with_egtp_group', self.intra_expt_dp_group ) @@ -132,12 +139,10 @@ def __init__( param.grad_added_to_main_grad = False param_to_name[param] = name - # Carve out DENSE GTPShardedParam (mamba/attn, allreduce=True) only — - # they need intra_dp_cp_with_gtp_group. Routed-expert GTPShardedParam - # has allreduce=False and goes through main's expert path (which uses - # intra_expt_dp_with_egtp_group); non-GTP dense + expert both fall - # through to all_params where group_params_for_buffers splits them - # via is_expert_parallel. + # Only dense GTP params (allreduce=True) carve out into their own bucket + # (they need the GTP-peer-excluded RS group). Routed-expert GTP params + # have allreduce=False and ride the expert path, which uses its own + # EGTP-peer-excluded group; non-GTP params fall through to all_params. is_dense_gtp = ( isinstance(param, GTPShardedParam) and getattr(param, 'allreduce', True) @@ -147,10 +152,9 @@ def __init__( else: all_params.append(param) - # Group parameters by (param_dtype, grad_dtype, is_expert_parallel). + # Group parameters by (param_dtype, grad_dtype, is_expert_parallel). GTP params + # are grouped into a separate set of buffers (RS group is chosen at line 328). buffer_groups = group_params_for_buffers(all_params, self.ddp_config.grad_reduce_in_fp32) - # GTP params are grouped separately — they will be routed through - # intra_dp_cp_with_gtp_group because GTP's RS already reduced over the GTP axis. gtp_buffer_groups = ( group_params_for_buffers(gtp_params, self.ddp_config.grad_reduce_in_fp32) if gtp_params @@ -231,9 +235,9 @@ def __init__( if self.ddp_config.average_in_collective: gradient_scaling_factor = 1.0 expert_gradient_scaling_factor = self.expt_dp_group.size() / self.dp_cp_group.size() - # GTP: collective averages over with_gtp group (size = dp_cp_size / gtp_size). - # GTP RS already summed over gtp_size ranks. To total 1/dp_cp_size scaling: - # pre_scale * (1/with_gtp_size) = 1/dp_cp_size => pre_scale = with_gtp_size / dp_cp_size. + # GTP pre-scale = (collective_size) / dp_cp_size so post-collective grad + # lands at 1/dp_cp_size. Divisor must reference the same group the RS + # fires on (line 328) — works for any collective_size. gtp_gradient_scaling_factor = ( self.intra_dp_cp_with_gtp_group.size() / self.dp_cp_group.size() ) @@ -249,11 +253,14 @@ def __init__( self.expert_parallel_buffers = [] self.gtp_buffers = [] pg_collection = ProcessGroupCollection(tp=self.tp_group, dp_cp=self.dp_cp_group) + # Grad RS for every buffer (expert / dense non-GTP here, dense GTP at line 328) + # uses a per-distopt-instance partial group. Cross-instance sync runs separately + # via inter_dist_opt_group during optim.step(); reducing cross-instance grads + # here would mix independent data slices. for buffer_key, (params, param_indices) in buffer_groups.items(): if buffer_key.is_expert_parallel: - # Use the with_egtp group so EGTP-sharded routed experts (whose grads - # are already RS'd over the expert-GTP axis) only DP-reduce over true - # weight replicas. Falls back to intra_expt_dp_group when GTP is off. + # Expert branch needs the EGTP-peer filter (routed experts already + # RS'd over the EGTP axis); reduces to intra_expt_dp_group when EGTP=1. data_parallel_group = self.intra_expt_dp_with_egtp_group scaling_factor = expert_gradient_scaling_factor else: @@ -309,9 +316,9 @@ def __init__( else: self.buffers.append(buffer) - # Allocate GTP buffers separately, routed through intra_dp_cp_with_gtp_group. - # GTP's RS already reduced over the GTP axis, so DDP must NOT re-reduce over GTP. - # full_param_layout is not applied to GTP buffers (GTP manages its own sharding). + # GTP-sharded params have already been RS'd over the GTP axis by GTP itself, + # so DDP must use the GTP-peer-excluded group here. full_param_layout is not + # applied to GTP buffers (GTP manages its own sharding). for buffer_key, (params, param_indices) in gtp_buffer_groups.items(): params_with_names = [(p, param_to_name[p]) for p in params] buffer = _ParamAndGradBuffer( @@ -669,11 +676,19 @@ def broadcast_params(self): """ for param in self.module.parameters(): is_expert_parallel = not getattr(param, 'allreduce', True) + is_gtp = isinstance(param, GTPShardedParam) + # GTPShardedParam holds a unique 1/N shard per (E)GTP peer; broadcast must + # exclude those peers and reach the FULL cross-instance group (one-shot + # init/load sync, unlike the per-instance grad-RS groups above). if is_expert_parallel: - data_parallel_group = self.expt_dp_group + data_parallel_group = ( + self.expt_dp_with_egtp_group if is_gtp else self.expt_dp_group + ) else: - data_parallel_group = self.dp_cp_group + data_parallel_group = ( + self.dp_cp_with_gtp_group if is_gtp else self.dp_cp_group + ) torch.distributed.broadcast( param.data, src=torch.distributed.get_global_rank(data_parallel_group, 0), diff --git a/megatron/core/extensions/transformer_engine.py b/megatron/core/extensions/transformer_engine.py index 95539352a57..f566f279eda 100644 --- a/megatron/core/extensions/transformer_engine.py +++ b/megatron/core/extensions/transformer_engine.py @@ -893,7 +893,8 @@ def __init__( init_quant_context = _get_fp8_model_init_for_quant_params( self.te_quant_params, torch.is_grad_enabled() ) - if is_te_min_version("2.15.0"): + + if is_te_min_version("2.14.0"): self.gtp_size = get_pg_size(gtp_group) if gtp_group is not None else 1 extra_kwargs["gtp_group"] = gtp_group if torch.distributed.is_initialized() else None with init_quant_context: @@ -1103,7 +1104,7 @@ def __init__( ), "Must have at least TE version 2.3 or higher to use symmetric memory all reduce" extra_kwargs["symmetric_ar_type"] = self.config.symmetric_ar_type - if is_te_min_version("2.15.0"): + if is_te_min_version("2.14.0"): self.gtp_size = get_pg_size(gtp_group) if gtp_group is not None else 1 extra_kwargs["gtp_group"] = gtp_group if torch.distributed.is_initialized() else None @@ -1877,7 +1878,7 @@ def __init__( tp_size = 1 tp_group_for_te = None - if is_te_min_version("2.15.0"): + if is_te_min_version("2.14.0"): self.gtp_size = get_pg_size(gtp_group) if gtp_group is not None else 1 extra_kwargs["gtp_group"] = ( gtp_group if torch.distributed.is_initialized() else None diff --git a/megatron/core/parallel_state.py b/megatron/core/parallel_state.py index d0ddd2f850b..7ff2ae4eefe 100644 --- a/megatron/core/parallel_state.py +++ b/megatron/core/parallel_state.py @@ -138,6 +138,11 @@ _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP = None _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_GLOO = None +# Partial Data parallel group information with context parallel combined and GTP peers +# excluded. Reaches only true weight-replica ranks within one distributed-optimizer instance. +_INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP = None +_INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP_GLOO = None + # combined parallel group of TP and CP _TENSOR_AND_CONTEXT_PARALLEL_GROUP = None @@ -1001,6 +1006,8 @@ def initialize_model_parallel( # with_gtp DP = only ranks that share the same GTP-rank (true weight replicas). global _DATA_PARALLEL_GROUP_WITH_GTP global _DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP + global _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP + global _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP_GLOO if generalized_tensor_parallel_size > 1: # Build rank→gtp_rank mapping. rank_to_gtp_rank = {} @@ -1018,27 +1025,78 @@ def initialize_model_parallel( group = create_group( dp_gtp_ranks, timeout=timeout, - pg_options=get_nccl_options("dp_ps", nccl_comm_cfgs), + pg_options=get_nccl_options("dp_gtp", nccl_comm_cfgs), group_desc="DATA_PARALLEL_GROUP_WITH_GTP", ) if rank in dp_gtp_ranks: _DATA_PARALLEL_GROUP_WITH_GTP = group - # DP-CP with GTP + # DP-CP with GTP. Also build the partial (per-distopt-instance) split when + # multi-instance distopt is enabled, so callers can hold one slice of the + # GTP-excluded DP-CP set without the GTP peers leaking in. for dp_cp_ranks in decoder_rank_generator.get_ranks('dp-cp'): for gtp_rank_val in range(generalized_tensor_parallel_size): dp_cp_gtp_ranks = [r for r in dp_cp_ranks if rank_to_gtp_rank[r] == gtp_rank_val] group = create_group( dp_cp_gtp_ranks, timeout=timeout, - pg_options=get_nccl_options("dp_cp_ps", nccl_comm_cfgs), + pg_options=get_nccl_options("dp_cp_gtp", nccl_comm_cfgs), group_desc="DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP", ) if rank in dp_cp_gtp_ranks: _DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP = group + + if num_distributed_optimizer_instances > 1: + assert ( + len(dp_cp_gtp_ranks) % num_distributed_optimizer_instances == 0 + ), ( + f"DP-CP minus GTP peers size ({len(dp_cp_gtp_ranks)}) must be " + f"divisible by num_distributed_optimizer_instances " + f"({num_distributed_optimizer_instances})" + ) + intra_partial_size = ( + len(dp_cp_gtp_ranks) // num_distributed_optimizer_instances + ) + for i in range(num_distributed_optimizer_instances): + chunk = dp_cp_gtp_ranks[ + i * intra_partial_size : (i + 1) * intra_partial_size + ] + intra_group = create_group( + chunk, + timeout=timeout, + pg_options=get_nccl_options("intra_dp_cp_gtp", nccl_comm_cfgs), + group_desc="INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP", + ) + if create_gloo_process_groups: + intra_group_gloo = create_group( + chunk, + timeout=timeout, + backend="gloo", + group_desc=( + "INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP_GLOO" + ), + ) + else: + intra_group_gloo = None + if rank in chunk: + _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP = intra_group + _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP_GLOO = ( + intra_group_gloo + ) + if num_distributed_optimizer_instances == 1: + _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP = ( + _DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP + ) + _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP_GLOO = None else: _DATA_PARALLEL_GROUP_WITH_GTP = _DATA_PARALLEL_GROUP _DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP = _DATA_PARALLEL_GROUP_WITH_CP + _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP = ( + _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP + ) + _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP_GLOO = ( + _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_GLOO + ) # Build the context-parallel groups. global _CONTEXT_PARALLEL_GROUP @@ -1469,7 +1527,7 @@ def initialize_model_parallel( group = create_group( edp_gtp_ranks, timeout=timeout, - pg_options=get_nccl_options("ep_dp_ps", nccl_comm_cfgs), + pg_options=get_nccl_options("ep_dp_gtp", nccl_comm_cfgs), group_desc="EXPERT_DATA_PARALLEL_GROUP_WITH_GTP", ) if rank in edp_gtp_ranks: @@ -1680,11 +1738,17 @@ def get_data_parallel_group( """ if with_gtp: if with_context_parallel: + if partial_data_parallel: + assert ( + _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP is not None + ), "Intra partial data parallel group with CP and GTP is not initialized" + return _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP assert ( _DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP is not None - ), "data parallel group with context parallel and generalized tensor parallel is not initialized" + ), "data parallel group with CP and GTP is not initialized" return _DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP else: + assert partial_data_parallel is False, "Partial DP for Optimizer needs to include CP" assert ( _DATA_PARALLEL_GROUP_WITH_GTP is not None ), "data parallel group with generalized tensor parallel is not initialized" @@ -2393,6 +2457,12 @@ def destroy_model_parallel(): global _DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP _DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP = None + global _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP + _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP = None + + global _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP_GLOO + _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP_GLOO = None + global _CONTEXT_PARALLEL_GROUP _CONTEXT_PARALLEL_GROUP = None diff --git a/megatron/core/process_groups_config.py b/megatron/core/process_groups_config.py index 86a8967f54a..1ce695e2e8e 100644 --- a/megatron/core/process_groups_config.py +++ b/megatron/core/process_groups_config.py @@ -318,6 +318,9 @@ def setup_process_groups_for_optimizer( with_context_parallel=True, partial_data_parallel=True ) intra_dp_cp_with_gtp_group = parallel_state.get_data_parallel_group( + with_context_parallel=True, with_gtp=True, partial_data_parallel=True + ) + dp_cp_with_gtp_group = parallel_state.get_data_parallel_group( with_context_parallel=True, with_gtp=True ) expt_dp_group = parallel_state.get_expert_data_parallel_group() @@ -327,6 +330,9 @@ def setup_process_groups_for_optimizer( intra_expt_dp_with_egtp_group = parallel_state.get_expert_data_parallel_group( with_gtp=True ) + expt_dp_with_egtp_group = parallel_state.get_expert_data_parallel_group( + with_gtp=True + ) intra_dist_opt_group = parallel_state.get_intra_distributed_optimizer_instance_group() # Gloo groups @@ -472,9 +478,11 @@ def setup_process_groups_for_optimizer( return { 'dp_group': dp_group, 'dp_cp_group': dp_cp_group, + 'dp_cp_with_gtp_group': dp_cp_with_gtp_group, 'intra_dp_cp_group': intra_dp_cp_group, 'intra_dp_cp_with_gtp_group': intra_dp_cp_with_gtp_group, 'expt_dp_group': expt_dp_group, + 'expt_dp_with_egtp_group': expt_dp_with_egtp_group, 'intra_expt_dp_group': intra_expt_dp_group, 'intra_expt_dp_with_egtp_group': intra_expt_dp_with_egtp_group, 'mp_group': mp_group, @@ -539,11 +547,17 @@ def setup_process_groups_for_ddp( else None ), 'intra_dp_cp_with_gtp_group': parallel_state.get_data_parallel_group( + with_context_parallel=True, with_gtp=True, partial_data_parallel=True + ), + 'dp_cp_with_gtp_group': parallel_state.get_data_parallel_group( with_context_parallel=True, with_gtp=True ), 'intra_expt_dp_with_egtp_group': parallel_state.get_expert_data_parallel_group( with_gtp=True ), + 'expt_dp_with_egtp_group': parallel_state.get_expert_data_parallel_group( + with_gtp=True + ), } else: # Use provided process group collection with validation and fallbacks @@ -625,6 +639,18 @@ def setup_process_groups_for_ddp( else: result['intra_expt_dp_with_egtp_group'] = result['intra_expt_dp_group'] + # Full (cross-instance) with-GTP-excluded variants for callers that need to + # reach ALL true weight replicas (e.g., broadcast_params at init). Fall back + # to the partial variants when the full attributes aren't on pg_collection. + if hasattr(pg_collection, 'dp_cp_with_gtp'): + result['dp_cp_with_gtp_group'] = pg_collection.dp_cp_with_gtp + else: + result['dp_cp_with_gtp_group'] = result['intra_dp_cp_with_gtp_group'] + if hasattr(pg_collection, 'expt_dp_with_egtp'): + result['expt_dp_with_egtp_group'] = pg_collection.expt_dp_with_egtp + else: + result['expt_dp_with_egtp_group'] = result['intra_expt_dp_with_egtp_group'] + return result From 375f09c0520a1010d32e235878a195a9ef3f3834 Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Wed, 27 May 2026 02:14:16 -0700 Subject: [PATCH 07/59] fix none-egpt sharded params's reduction in moe layer; fix comments Signed-off-by: Shiqing Fan --- .../distributed/distributed_data_parallel.py | 105 ++++++++++++++---- .../core/extensions/transformer_engine.py | 7 +- megatron/core/optimizer/__init__.py | 6 +- megatron/core/optimizer/clip_grads.py | 5 + megatron/core/optimizer/optimizer.py | 1 - megatron/core/parallel_state.py | 99 ++++++++++++++++- megatron/core/process_groups_config.py | 54 +++++++-- megatron/training/arguments.py | 8 -- 8 files changed, 232 insertions(+), 53 deletions(-) diff --git a/megatron/core/distributed/distributed_data_parallel.py b/megatron/core/distributed/distributed_data_parallel.py index ba6c30013de..cda425d57b6 100644 --- a/megatron/core/distributed/distributed_data_parallel.py +++ b/megatron/core/distributed/distributed_data_parallel.py @@ -129,6 +129,7 @@ def __init__( self.params_with_grad = [] all_params = [] gtp_params = [] + egtp_params = [] for name, param in self.module.named_parameters(): if not param.requires_grad: continue @@ -139,27 +140,35 @@ def __init__( param.grad_added_to_main_grad = False param_to_name[param] = name - # Only dense GTP params (allreduce=True) carve out into their own bucket - # (they need the GTP-peer-excluded RS group). Routed-expert GTP params - # have allreduce=False and ride the expert path, which uses its own - # EGTP-peer-excluded group; non-GTP params fall through to all_params. - is_dense_gtp = ( - isinstance(param, GTPShardedParam) - and getattr(param, 'allreduce', True) - ) - if is_dense_gtp: + # GTPShardedParam comes in two flavors. Both need the GTP-peer-excluded RS + # group because GTP's bwd already RS'd over the (E)GTP axis: + # - dense GTP (allreduce=True ) → gtp_params → intra_dp_cp_with_gtp_group + # - expert GTP (allreduce=False) → egtp_params → intra_expt_dp_with_egtp_group + # Non-GTP expert params (biases, LayerNorms inside experts, etc.) are + # REPLICATED across EGTP peers and stay in all_params — their expert branch + # reduces over the FULL intra_expt_dp_group at line 263. + is_gtp_shard = isinstance(param, GTPShardedParam) + is_expert = not getattr(param, 'allreduce', True) + if is_gtp_shard and not is_expert: gtp_params.append(param) + elif is_gtp_shard and is_expert: + egtp_params.append(param) else: all_params.append(param) - # Group parameters by (param_dtype, grad_dtype, is_expert_parallel). GTP params - # are grouped into a separate set of buffers (RS group is chosen at line 328). + # Group parameters by (param_dtype, grad_dtype, is_expert_parallel). (E)GTP + # params are grouped into separate buffer sets (RS groups chosen below). buffer_groups = group_params_for_buffers(all_params, self.ddp_config.grad_reduce_in_fp32) gtp_buffer_groups = ( group_params_for_buffers(gtp_params, self.ddp_config.grad_reduce_in_fp32) if gtp_params else {} ) + egtp_buffer_groups = ( + group_params_for_buffers(egtp_params, self.ddp_config.grad_reduce_in_fp32) + if egtp_params + else {} + ) # Auto-compute layouts when using distributed optimizer but no layout was provided. # This maintains backward compatibility for callers that create DDP directly @@ -211,6 +220,7 @@ def __init__( gradient_scaling_factor = 1.0 expert_gradient_scaling_factor = 1.0 gtp_gradient_scaling_factor = 1.0 + egtp_gradient_scaling_factor = 1.0 else: # The goal is to scale reduced gradients by 1/dp_size. # This can be achieved in two ways: @@ -235,23 +245,28 @@ def __init__( if self.ddp_config.average_in_collective: gradient_scaling_factor = 1.0 expert_gradient_scaling_factor = self.expt_dp_group.size() / self.dp_cp_group.size() - # GTP pre-scale = (collective_size) / dp_cp_size so post-collective grad - # lands at 1/dp_cp_size. Divisor must reference the same group the RS - # fires on (line 328) — works for any collective_size. + # (E)GTP pre-scale = (collective_size) / dp_cp_size so post-collective + # grad lands at 1/dp_cp_size. Each divisor must reference the same group + # the RS fires on (lines 341 for GTP, 372 for EGTP). gtp_gradient_scaling_factor = ( self.intra_dp_cp_with_gtp_group.size() / self.dp_cp_group.size() ) + egtp_gradient_scaling_factor = ( + self.intra_expt_dp_with_egtp_group.size() / self.dp_cp_group.size() + ) else: data_parallel_world_size = self.dp_cp_group.size() gradient_scaling_factor = 1.0 / data_parallel_world_size expert_gradient_scaling_factor = 1.0 / data_parallel_world_size gtp_gradient_scaling_factor = 1.0 / data_parallel_world_size + egtp_gradient_scaling_factor = 1.0 / data_parallel_world_size # Allocate buffers for each group. self.buffers = [] self.expert_parallel_buffers = [] self.gtp_buffers = [] + self.egtp_buffers = [] pg_collection = ProcessGroupCollection(tp=self.tp_group, dp_cp=self.dp_cp_group) # Grad RS for every buffer (expert / dense non-GTP here, dense GTP at line 328) # uses a per-distopt-instance partial group. Cross-instance sync runs separately @@ -259,9 +274,11 @@ def __init__( # here would mix independent data slices. for buffer_key, (params, param_indices) in buffer_groups.items(): if buffer_key.is_expert_parallel: - # Expert branch needs the EGTP-peer filter (routed experts already - # RS'd over the EGTP axis); reduces to intra_expt_dp_group when EGTP=1. - data_parallel_group = self.intra_expt_dp_with_egtp_group + # Non-GTP expert params (biases, expert-scoped LayerNorms, etc.) are + # replicated across EGTP peers, so reduce over the FULL intra_expt_dp + # group (includes EGTP peers). EGTP-sharded routed experts are carved + # into egtp_buffer_groups below and use the EGTP-peer-excluded group. + data_parallel_group = self.intra_expt_dp_group scaling_factor = expert_gradient_scaling_factor else: data_parallel_group = self.intra_dp_cp_group @@ -337,6 +354,29 @@ def __init__( ) self.gtp_buffers.append(buffer) + # EGTP-sharded routed experts: same story as dense GTP but on the expert side — + # their grads were RS'd over the EGTP axis by GTP, so the DP reduction here must + # exclude EGTP peers (intra_expt_dp_with_egtp_group) and use the matching + # egtp_gradient_scaling_factor (numerator = collective size). Non-GTP expert + # params took the full intra_expt_dp_group branch above. + for buffer_key, (params, param_indices) in egtp_buffer_groups.items(): + params_with_names = [(p, param_to_name[p]) for p in params] + buffer = _ParamAndGradBuffer( + self.ddp_config, + buffer_key.param_dtype, + buffer_key.grad_dtype, + params_with_names, + self.intra_expt_dp_with_egtp_group, + self.bucket_size, + param_to_name, + egtp_gradient_scaling_factor, + param_indices, + self.ddp_config.nccl_ub, + pg_collection, + param_layout=None, + ) + self.egtp_buffers.append(buffer) + # In some scenarios, we want to put buckets from different buffers into a group so that # their communication can be aggregated. For example, when there are both fp8 buffers # and bf16 buffers in the model and vpp is enabled, each model chunk will have an fp8 @@ -367,13 +407,28 @@ def __init__( self.ddp_config.reduce_scatter_with_fp32_accumulation ), ) - # Flat view across all three bucket-group lists; used wherever + self.egtp_bucket_groups = partition_buckets( + self.egtp_buffers, + force_single_bucket_group=disable_bucketing, + reduce_scatter_with_fp32_accumulation=( + self.ddp_config.reduce_scatter_with_fp32_accumulation + ), + ) + # Flat view across all four bucket-group lists; used wherever # callers need to iterate every bucket group regardless of dense / - # expert-parallel / GTP category. The per-category lists above are + # expert-parallel / GTP / EGTP category. The per-category lists above are # kept for code paths that need per-category state (e.g. one # communication_stream per category). self.all_bucket_groups = ( - self.bucket_groups + self.expert_parallel_bucket_groups + self.gtp_bucket_groups + self.bucket_groups + + self.expert_parallel_bucket_groups + + self.gtp_bucket_groups + + self.egtp_bucket_groups + ) + # Same flat-view convenience for the underlying buffers (lifecycle ops + # like reset/offload/reload iterate over every buffer once). + self.all_buffers = ( + self.buffers + self.expert_parallel_buffers + self.gtp_buffers + self.egtp_buffers ) if self.ddp_config.num_distributed_optimizer_instances > 1: @@ -384,6 +439,7 @@ def __init__( self.bucket_groups, self.expert_parallel_bucket_groups, self.gtp_bucket_groups, + self.egtp_bucket_groups, ]: communication_stream = torch.cuda.Stream(device=torch.cuda.current_device()) for bucket_group in bucket_groups: @@ -402,6 +458,7 @@ def __init__( self.bucket_groups, self.expert_parallel_bucket_groups, self.gtp_bucket_groups, + self.egtp_bucket_groups, ]: num_bucket_groups = len(bucket_groups) for i in range(1, num_bucket_groups): @@ -651,7 +708,7 @@ def free_overlap_buffers(self): def scale_gradients(self, scaling_factor: float): """Scale all gradients inside the buffers by `scaling_factor`.""" - for buffer in self.buffers + self.expert_parallel_buffers + self.gtp_buffers: + for buffer in self.all_buffers: buffer.scale_gradients(scaling_factor) def zero_grad_buffer(self): @@ -665,7 +722,7 @@ def zero_grad_buffer(self): # to True, and there will be a double-GA. for param in self.params_with_grad: param.grad_added_to_main_grad = False - for buffer in self.buffers + self.expert_parallel_buffers + self.gtp_buffers: + for buffer in self.all_buffers: buffer.reset() for bucket_group in self.all_bucket_groups: bucket_group.reset() @@ -710,7 +767,7 @@ def offload_grad_buffers(self, synchronize: bool = True, empty_cache: bool = Tru if synchronize: torch.cuda.synchronize() - for buffer in self.buffers + self.expert_parallel_buffers + self.gtp_buffers: + for buffer in self.all_buffers: buffer.offload_to_cpu(move_params=False, move_grads=True) if empty_cache: @@ -727,7 +784,7 @@ def restore_grad_buffers(self, synchronize: bool = True) -> None: Args: synchronize: Whether to call torch.cuda.synchronize() after allocation. """ - for buffer in self.buffers + self.expert_parallel_buffers + self.gtp_buffers: + for buffer in self.all_buffers: buffer.reload_from_cpu(move_params=False, move_grads=True) if synchronize: diff --git a/megatron/core/extensions/transformer_engine.py b/megatron/core/extensions/transformer_engine.py index f566f279eda..549b7956fd7 100644 --- a/megatron/core/extensions/transformer_engine.py +++ b/megatron/core/extensions/transformer_engine.py @@ -2283,14 +2283,13 @@ def backward_dw(self): super().backward_dw() def __repr__(self): + gtp_str = f", GTP={self.gtp_size}" if hasattr(self, "gtp_size") else "" return ( f"{type(self).__name__}(per expert([" f"in={self.in_features}, out={self.out_features}]) " f"X num_gemms={self.num_gemms}, " - f"bias={self.use_bias}, TP={self.tp_size}, " - f"GTP={self.gtp_size})" - if hasattr(self, "gtp_size") - else ")" + f"bias={self.use_bias}, TP={self.tp_size}" + f"{gtp_str})" ) class TEColumnParallelGroupedLinear(TEGroupedLinear): diff --git a/megatron/core/optimizer/__init__.py b/megatron/core/optimizer/__init__.py index 3bd4a4de60d..cd19143196f 100644 --- a/megatron/core/optimizer/__init__.py +++ b/megatron/core/optimizer/__init__.py @@ -1032,6 +1032,8 @@ def get_megatron_optimizer( mp_group = process_groups_dict['mp_group'] expt_tp_pp_group = process_groups_dict['expt_tp_pp_group'] intra_dp_cp_group_gloo = process_groups_dict['intra_dp_cp_group_gloo'] + intra_dp_cp_with_gtp_group_gloo = process_groups_dict['intra_dp_cp_with_gtp_group_gloo'] + intra_expt_dp_with_egtp_group_gloo = process_groups_dict['intra_expt_dp_with_egtp_group_gloo'] intra_expt_dp_group_gloo = process_groups_dict['intra_expt_dp_group_gloo'] intra_dist_opt_group = process_groups_dict['intra_dist_opt_group'] @@ -1169,7 +1171,7 @@ def get_megatron_optimizer( per_model_buffers=gtp_buffers, model_parallel_group=mp_group, data_parallel_group=intra_dp_cp_with_gtp_group, - data_parallel_group_gloo=None, + data_parallel_group_gloo=intra_dp_cp_with_gtp_group_gloo, data_parallel_group_idx=model_parallel_rank, intra_dist_opt_group=intra_dist_opt_group, distributed_optimizer_instance_id=distributed_optimizer_instance_id, @@ -1195,7 +1197,7 @@ def get_megatron_optimizer( expt_model_parallel_rank = get_pg_rank(expt_tp_pp_group) # Pass Gloo process groups into optimizer only if needed. if use_gloo_process_groups: - expt_data_parallel_group_gloo = intra_expt_dp_group_gloo + expt_data_parallel_group_gloo = intra_expt_dp_with_egtp_group_gloo else: expt_data_parallel_group_gloo = None optimizers.append( diff --git a/megatron/core/optimizer/clip_grads.py b/megatron/core/optimizer/clip_grads.py index d603436e38d..a1fb54f0205 100644 --- a/megatron/core/optimizer/clip_grads.py +++ b/megatron/core/optimizer/clip_grads.py @@ -222,6 +222,11 @@ def count_zeros_fp32( use_decoupled_grad (bool, optional): If True, reads from the '.decoupled_grad' attribute instead of the standard '.grad'. Defaults to False. + tp_group (ProcessGroup, optional): TP group for the TP-duplicate filter. + Defaults to the default TP group. + use_distributed_optimizer (bool, optional): True when params are per-rank + sharded slices (skip GTP dedup); False when replicated (keep dedup, + else zeros get counted gtp_size times). Defaults to False. Returns: float: The total number of zeros in the gradients across the process group. diff --git a/megatron/core/optimizer/optimizer.py b/megatron/core/optimizer/optimizer.py index 0f13b6e4028..31f6eb524fb 100644 --- a/megatron/core/optimizer/optimizer.py +++ b/megatron/core/optimizer/optimizer.py @@ -773,7 +773,6 @@ def __init__( # Create a copy main_param = param.detach().clone().float() main_param.is_gtp = isinstance(param, GTPShardedParam) - # Copy tensor model parallel attributes. tensor_parallel.copy_tensor_model_parallel_attributes(main_param, param) if hasattr(param, 'shared'): diff --git a/megatron/core/parallel_state.py b/megatron/core/parallel_state.py index 7ff2ae4eefe..ff7b912d7b0 100644 --- a/megatron/core/parallel_state.py +++ b/megatron/core/parallel_state.py @@ -71,6 +71,11 @@ _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP = None _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_GLOO = None _INTER_PARTIAL_EXPERT_DATA_PARALLEL_GROUP = None +# Partial expert DP group with EGTP peers excluded — per-distopt-instance slice +# of true expert-weight replicas. Mirrors _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP +# on the dense side. +_INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP = None +_INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP_GLOO = None # Parallel state values changed on the fly _MPU_EXPERT_MODEL_PARALLEL_WORLD_SIZE = None _MPU_EXPERT_MODEL_PARALLEL_RANK = None @@ -1511,6 +1516,8 @@ def initialize_model_parallel( _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_GLOO = _EXPERT_DATA_PARALLEL_GROUP_GLOO # Build expert DP group with expert generalized tensor parallel accounted for. global _EXPERT_DATA_PARALLEL_GROUP_WITH_GTP + global _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP + global _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP_GLOO if expert_generalized_tensor_parallel_size > 1: # Build rank→expert_gtp_rank mapping. rank_to_expert_gtp_rank = {} @@ -1532,8 +1539,57 @@ def initialize_model_parallel( ) if rank in edp_gtp_ranks: _EXPERT_DATA_PARALLEL_GROUP_WITH_GTP = group + + if num_distributed_optimizer_instances > 1: + assert ( + len(edp_gtp_ranks) % num_distributed_optimizer_instances == 0 + ), ( + f"Expert DP minus EGTP peers size ({len(edp_gtp_ranks)}) must be " + f"divisible by num_distributed_optimizer_instances " + f"({num_distributed_optimizer_instances})" + ) + intra_partial_size = ( + len(edp_gtp_ranks) // num_distributed_optimizer_instances + ) + for i in range(num_distributed_optimizer_instances): + chunk = edp_gtp_ranks[ + i * intra_partial_size : (i + 1) * intra_partial_size + ] + intra_group = create_group( + chunk, + timeout=timeout, + pg_options=get_nccl_options("intra_ep_dp_gtp", nccl_comm_cfgs), + group_desc="INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP", + ) + if create_gloo_process_groups: + intra_group_gloo = create_group( + chunk, + timeout=timeout, + backend="gloo", + group_desc=( + "INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP_GLOO" + ), + ) + else: + intra_group_gloo = None + if rank in chunk: + _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP = intra_group + _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP_GLOO = ( + intra_group_gloo + ) + if num_distributed_optimizer_instances == 1: + _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP = ( + _EXPERT_DATA_PARALLEL_GROUP_WITH_GTP + ) + _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP_GLOO = None else: _EXPERT_DATA_PARALLEL_GROUP_WITH_GTP = _EXPERT_DATA_PARALLEL_GROUP + _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP = ( + _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP + ) + _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP_GLOO = ( + _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_GLOO + ) ### End of expert related parallel groups initialization @@ -1769,8 +1825,25 @@ def get_data_parallel_group( return _DATA_PARALLEL_GROUP -def get_data_parallel_group_gloo(with_context_parallel=False, partial_data_parallel=False): +def get_data_parallel_group_gloo( + with_context_parallel=False, with_gtp=False, partial_data_parallel=False +): """Get the Gloo data-parallel group the caller rank belongs to.""" + if with_gtp: + assert with_context_parallel, "Gloo with_gtp variants only exist with CP" + if partial_data_parallel: + assert _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP_GLOO is not None, ( + "Intra partial data parallel group with context parallel and " + "generalized tensor parallel (gloo) is not initialized" + ) + return _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP_GLOO + # Full (non-partial) Gloo variant of with_gtp is not built; callers needing + # cross-instance Gloo over the GTP-excluded set can use the non-GTP variant + # since broadcasts are init-time only. + assert _DATA_PARALLEL_GROUP_WITH_CP_GLOO is not None, ( + "data parallel group-gloo with context parallel combined is not initialized" + ) + return _DATA_PARALLEL_GROUP_WITH_CP_GLOO if with_context_parallel: if partial_data_parallel: assert ( @@ -2317,6 +2390,13 @@ def get_expert_data_parallel_group( ): """Get expert data parallel group.""" if with_gtp: + if partial_expert_data_parallel: + if check_initialized: + assert _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP is not None, ( + "Intra partial expert data parallel group with generalized tensor " + "parallel is not initialized" + ) + return _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP if check_initialized: assert ( _EXPERT_DATA_PARALLEL_GROUP_WITH_GTP is not None @@ -2336,8 +2416,17 @@ def get_expert_data_parallel_group( return _EXPERT_DATA_PARALLEL_GROUP -def get_expert_data_parallel_group_gloo(partial_expert_data_parallel=False): +def get_expert_data_parallel_group_gloo(with_gtp=False, partial_expert_data_parallel=False): """Get expert data parallel group-gloo.""" + if with_gtp: + assert partial_expert_data_parallel, ( + "Gloo with_gtp variant is only built for the partial (per-distopt-instance) group" + ) + assert _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP_GLOO is not None, ( + "Intra partial expert data parallel group with generalized tensor parallel " + "(gloo) is not initialized" + ) + return _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP_GLOO if partial_expert_data_parallel: assert ( _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_GLOO is not None @@ -2565,6 +2654,12 @@ def destroy_model_parallel(): global _EXPERT_DATA_PARALLEL_GROUP_WITH_GTP _EXPERT_DATA_PARALLEL_GROUP_WITH_GTP = None + global _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP + _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP = None + + global _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP_GLOO + _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP_GLOO = None + global _EXPERT_DATA_PARALLEL_GROUP_GLOO if ( _EXPERT_DATA_PARALLEL_GROUP_GLOO is not None diff --git a/megatron/core/process_groups_config.py b/megatron/core/process_groups_config.py index 1ce695e2e8e..101f731cdfc 100644 --- a/megatron/core/process_groups_config.py +++ b/megatron/core/process_groups_config.py @@ -328,7 +328,7 @@ def setup_process_groups_for_optimizer( partial_expert_data_parallel=True, with_gtp=True ) intra_expt_dp_with_egtp_group = parallel_state.get_expert_data_parallel_group( - with_gtp=True + with_gtp=True, partial_expert_data_parallel=True ) expt_dp_with_egtp_group = parallel_state.get_expert_data_parallel_group( with_gtp=True @@ -340,12 +340,22 @@ def setup_process_groups_for_optimizer( intra_dp_cp_group_gloo = parallel_state.get_data_parallel_group_gloo( with_context_parallel=True, partial_data_parallel=True ) + intra_dp_cp_with_gtp_group_gloo = parallel_state.get_data_parallel_group_gloo( + with_context_parallel=True, with_gtp=True, partial_data_parallel=True + ) intra_expt_dp_group_gloo = parallel_state.get_expert_data_parallel_group_gloo( partial_expert_data_parallel=True ) + intra_expt_dp_with_egtp_group_gloo = ( + parallel_state.get_expert_data_parallel_group_gloo( + with_gtp=True, partial_expert_data_parallel=True + ) + ) else: intra_dp_cp_group_gloo = None + intra_dp_cp_with_gtp_group_gloo = None intra_expt_dp_group_gloo = None + intra_expt_dp_with_egtp_group_gloo = None # Model communication groups mp_group = parallel_state.get_model_parallel_group() @@ -454,17 +464,27 @@ def setup_process_groups_for_optimizer( ) expt_tp_pp_group = pg_collection.tp_ep_pp - # 6. GTP with_gtp group (fallback to intra_dp_cp if not provided) + # 6. GTP with_gtp groups — partial (per-distopt-instance) and full + # (cross-instance). Fall back to the non-GTP variants when not provided. if hasattr(pg_collection, 'intra_dp_cp_with_gtp'): intra_dp_cp_with_gtp_group = pg_collection.intra_dp_cp_with_gtp else: intra_dp_cp_with_gtp_group = intra_dp_cp_group + if hasattr(pg_collection, 'dp_cp_with_gtp'): + dp_cp_with_gtp_group = pg_collection.dp_cp_with_gtp + else: + dp_cp_with_gtp_group = dp_cp_group - # 7. EGTP group (fallback to intra_expt_dp if not provided) + # 7. EGTP groups — partial (per-distopt-instance) and full + # (cross-instance). Fall back to the non-EGTP variants when not provided. if hasattr(pg_collection, 'intra_expt_dp_with_egtp'): intra_expt_dp_with_egtp_group = pg_collection.intra_expt_dp_with_egtp else: intra_expt_dp_with_egtp_group = intra_expt_dp_group + if hasattr(pg_collection, 'expt_dp_with_egtp'): + expt_dp_with_egtp_group = pg_collection.expt_dp_with_egtp + else: + expt_dp_with_egtp_group = expt_dp_group # Gloo groups - not supported when pg_collection is provided if use_gloo_process_groups: @@ -473,7 +493,9 @@ def setup_process_groups_for_optimizer( "provided. Please set use_gloo_process_groups to False." ) intra_dp_cp_group_gloo = None + intra_dp_cp_with_gtp_group_gloo = None intra_expt_dp_group_gloo = None + intra_expt_dp_with_egtp_group_gloo = None return { 'dp_group': dp_group, @@ -490,6 +512,8 @@ def setup_process_groups_for_optimizer( 'inter_dist_opt_group': inter_dist_opt_group, 'intra_dist_opt_group': intra_dist_opt_group, 'intra_dp_cp_group_gloo': intra_dp_cp_group_gloo, + 'intra_dp_cp_with_gtp_group_gloo': intra_dp_cp_with_gtp_group_gloo, + 'intra_expt_dp_with_egtp_group_gloo': intra_expt_dp_with_egtp_group_gloo, 'intra_expt_dp_group_gloo': intra_expt_dp_group_gloo, } @@ -530,9 +554,15 @@ def setup_process_groups_for_ddp( with_context_parallel=True, partial_data_parallel=True ), 'expt_dp_group': parallel_state.get_expert_data_parallel_group(), + 'expt_dp_with_egtp_group': parallel_state.get_expert_data_parallel_group( + with_gtp=True + ), 'intra_expt_dp_group': parallel_state.get_expert_data_parallel_group( partial_expert_data_parallel=True ), + 'intra_expt_dp_with_egtp_group': parallel_state.get_expert_data_parallel_group( + with_gtp=True, partial_expert_data_parallel=True + ), 'tp_group': parallel_state.get_tensor_model_parallel_group(), 'pp_group': parallel_state.get_pipeline_model_parallel_group(), 'ep_group': parallel_state.get_expert_model_parallel_group(), @@ -552,12 +582,6 @@ def setup_process_groups_for_ddp( 'dp_cp_with_gtp_group': parallel_state.get_data_parallel_group( with_context_parallel=True, with_gtp=True ), - 'intra_expt_dp_with_egtp_group': parallel_state.get_expert_data_parallel_group( - with_gtp=True - ), - 'expt_dp_with_egtp_group': parallel_state.get_expert_data_parallel_group( - with_gtp=True - ), } else: # Use provided process group collection with validation and fallbacks @@ -633,6 +657,12 @@ def setup_process_groups_for_ddp( result['pp_group'] = pg_collection.pp result['ep_group'] = pg_collection.ep + # GTP group (fallback to intra_dp_cp if not provided) + if hasattr(pg_collection, 'intra_dp_cp_with_gtp'): + result['intra_dp_cp_with_gtp_group'] = pg_collection.intra_dp_cp_with_gtp + else: + result['intra_dp_cp_with_gtp_group'] = result['intra_dp_cp_group'] + # EGTP group (fallback to intra_expt_dp if not provided) if hasattr(pg_collection, 'intra_expt_dp_with_egtp'): result['intra_expt_dp_with_egtp_group'] = pg_collection.intra_expt_dp_with_egtp @@ -641,15 +671,15 @@ def setup_process_groups_for_ddp( # Full (cross-instance) with-GTP-excluded variants for callers that need to # reach ALL true weight replicas (e.g., broadcast_params at init). Fall back - # to the partial variants when the full attributes aren't on pg_collection. + # to the corresponding non-GTP-excluded full group when not provided. if hasattr(pg_collection, 'dp_cp_with_gtp'): result['dp_cp_with_gtp_group'] = pg_collection.dp_cp_with_gtp else: - result['dp_cp_with_gtp_group'] = result['intra_dp_cp_with_gtp_group'] + result['dp_cp_with_gtp_group'] = result['dp_cp_group'] if hasattr(pg_collection, 'expt_dp_with_egtp'): result['expt_dp_with_egtp_group'] = pg_collection.expt_dp_with_egtp else: - result['expt_dp_with_egtp_group'] = result['intra_expt_dp_with_egtp_group'] + result['expt_dp_with_egtp_group'] = result['expt_dp_group'] return result diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py index 9fc25abb337..dcb69644d85 100644 --- a/megatron/training/arguments.py +++ b/megatron/training/arguments.py @@ -1462,14 +1462,6 @@ def validate_args(args, defaults={}): "force setting NCCL_PROTO=Simple might introduce bad perf." ) - # Under GTP+no-TP the optimal bwd schedule would be wgrad-before-dgrad - # on _Linear / _LayerNormLinear (so the GTP wgrad reduce-scatter - # overlaps with the dgrad GEMM and the prev_w AG prefetch overlaps - # with the wgrad GEMM). TE-side support for this ordering is deferred - # to a follow-up MR; until then, flag any attempt to activate it. - if args.tensor_model_parallel_size == 1: - raise NotImplementedError("Wgrad->Dgrad schedule to be supported later") - # Propagate --fp8-param-gather into GTPConfig: enables optimizer-side # FP32->FP8 cast for GTP shards, so the forward skips BF16->FP8. if getattr(args, 'fp8_param_gather', False): From 6872981ae6fed1ad09564e8f94c7db1ad6cc7bba Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Wed, 27 May 2026 19:51:07 -0700 Subject: [PATCH 08/59] rename 'generalized_tensor_parallel_size' into 'generalized_tensor_parallel_remat_size'; fix formating Signed-off-by: Shiqing Fan --- .../distributed/distributed_data_parallel.py | 12 +- .../core/distributed/finalize_model_grads.py | 6 +- .../core/extensions/transformer_engine.py | 9 +- megatron/core/model_parallel_config.py | 19 +- megatron/core/optimizer/clip_grads.py | 2 +- megatron/core/optimizer/optimizer.py | 8 +- megatron/core/parallel_state.py | 180 +++++++++--------- megatron/core/process_groups_config.py | 13 +- megatron/core/transformer/cuda_graphs.py | 38 ++-- megatron/experimental/gtp/README.md | 16 +- megatron/training/arguments.py | 6 +- megatron/training/initialize.py | 4 +- megatron/training/training.py | 8 +- megatron/training/utils/common_utils.py | 6 +- 14 files changed, 164 insertions(+), 163 deletions(-) diff --git a/megatron/core/distributed/distributed_data_parallel.py b/megatron/core/distributed/distributed_data_parallel.py index cda425d57b6..846c27747cb 100644 --- a/megatron/core/distributed/distributed_data_parallel.py +++ b/megatron/core/distributed/distributed_data_parallel.py @@ -89,9 +89,7 @@ def __init__( # GTP-aware DP subgroups (fall back to non-GTP variants when GTP is off): # *_with_gtp_group : full cross-instance, GTP peers excluded (broadcast) # intra_*_with_gtp_group : per-distopt-instance partial, GTP peers excluded (grad RS) - self.dp_cp_with_gtp_group = process_group_dict.get( - 'dp_cp_with_gtp_group', self.dp_cp_group - ) + self.dp_cp_with_gtp_group = process_group_dict.get('dp_cp_with_gtp_group', self.dp_cp_group) self.intra_dp_cp_with_gtp_group = process_group_dict.get( 'intra_dp_cp_with_gtp_group', self.intra_dp_cp_group ) @@ -739,13 +737,9 @@ def broadcast_params(self): # exclude those peers and reach the FULL cross-instance group (one-shot # init/load sync, unlike the per-instance grad-RS groups above). if is_expert_parallel: - data_parallel_group = ( - self.expt_dp_with_egtp_group if is_gtp else self.expt_dp_group - ) + data_parallel_group = self.expt_dp_with_egtp_group if is_gtp else self.expt_dp_group else: - data_parallel_group = ( - self.dp_cp_with_gtp_group if is_gtp else self.dp_cp_group - ) + data_parallel_group = self.dp_cp_with_gtp_group if is_gtp else self.dp_cp_group torch.distributed.broadcast( param.data, src=torch.distributed.get_global_rank(data_parallel_group, 0), diff --git a/megatron/core/distributed/finalize_model_grads.py b/megatron/core/distributed/finalize_model_grads.py index b26eb8a201d..1f9524df169 100644 --- a/megatron/core/distributed/finalize_model_grads.py +++ b/megatron/core/distributed/finalize_model_grads.py @@ -496,8 +496,8 @@ def finalize_model_grads( # rs_stream may still be writing to main_grad when finish_grad_sync starts the DP # allreduce on main_stream. if ( - config.generalized_tensor_parallel_size > 1 - or config.expert_generalized_tensor_parallel_size > 1 + config.generalized_tensor_parallel_remat_size > 1 + or config.expert_generalized_tensor_parallel_remat_size > 1 ): from megatron.experimental.gtp import ( get_all_ag_streams, @@ -514,7 +514,7 @@ def finalize_model_grads( # Wait for captured bwd Phase 2 (main_grad.add_) on each CG runner's # stream. bwd_completion_event only covers Phase 1; Phase 2 runs after # it on runner.stream with no other sync to main_stream. - if config.generalized_tensor_parallel_size > 1: + if config.generalized_tensor_parallel_remat_size > 1: from megatron.core.transformer.cuda_graphs import get_gtp_phase2_completion_events for evt in get_gtp_phase2_completion_events(): diff --git a/megatron/core/extensions/transformer_engine.py b/megatron/core/extensions/transformer_engine.py index 549b7956fd7..689465928e7 100644 --- a/megatron/core/extensions/transformer_engine.py +++ b/megatron/core/extensions/transformer_engine.py @@ -1223,8 +1223,7 @@ def extra_repr(self) -> str: f"in_features={self.in_features}, " f"out_features={self.out_features}, " f"bias={self.use_bias}, " - f"TP={self.tp_size}" - + (f", GTP={self.gtp_size}" if hasattr(self, "gtp_size") else "") + f"TP={self.tp_size}" + (f", GTP={self.gtp_size}" if hasattr(self, "gtp_size") else "") ) def backward_dw(self): @@ -1342,8 +1341,7 @@ def extra_repr(self) -> str: f"in_features={self.in_features}, " f"out_features={self.out_features}, " f"bias={self.use_bias}, " - f"TP={self.tp_size}" - + (f", GTP={self.gtp_size}" if hasattr(self, "gtp_size") else "") + f"TP={self.tp_size}" + (f", GTP={self.gtp_size}" if hasattr(self, "gtp_size") else "") ) def backward_dw(self): @@ -1455,8 +1453,7 @@ def extra_repr(self) -> str: f"in_features={self.in_features}, " f"out_features={self.out_features}, " f"bias={self.use_bias}, " - f"TP={self.tp_size}" - + (f", GTP={self.gtp_size}" if hasattr(self, "gtp_size") else "") + f"TP={self.tp_size}" + (f", GTP={self.gtp_size}" if hasattr(self, "gtp_size") else "") ) def backward_dw(self): diff --git a/megatron/core/model_parallel_config.py b/megatron/core/model_parallel_config.py index dc2196169ee..cd26d3ca029 100644 --- a/megatron/core/model_parallel_config.py +++ b/megatron/core/model_parallel_config.py @@ -20,9 +20,13 @@ class ModelParallelConfig: tensor_model_parallel_size: int = 1 """Intra-layer model parallelism. Splits tensors across GPU ranks.""" - generalized_tensor_parallel_size: int = 1 - """Generalized tensor parallelism. Shards model weights (but not activations) across GPU ranks. - Placed right after tensor parallelism in the parallelism ordering. + generalized_tensor_parallel_remat_size: int = 1 + """Generalized tensor parallelism with weight rematerialization. Shards model weights + across GPU ranks along ``out_features``; each weight is rematerialized independently + (per-weight, not per-layer) via async all-gather on every forward AND backward pass. + Carved out of the data-parallel axis, so increasing this size shrinks per-rank weight + memory and shrinks the outer DP that contributes the per-rank batch. Placed right + after tensor parallelism in the parallelism ordering. """ pipeline_model_parallel_comm_backend: Optional[Literal["nccl", "ucc"]] = None @@ -77,9 +81,12 @@ class ModelParallelConfig: expert_model_parallel_size: int = 1 """Distributes Moe Experts across sub data parallel dimension.""" - expert_generalized_tensor_parallel_size: int = 1 - """Generalized tensor parallelism for expert layers. Shards expert weights (but not activations) - across GPU ranks. Independent from the decoder's generalized_tensor_parallel_size. + expert_generalized_tensor_parallel_remat_size: int = 1 + """Generalized tensor parallelism with weight rematerialization, for expert layers. Shards + expert weights across GPU ranks along ``out_features``; each expert weight is + rematerialized independently (per-weight, not per-layer) via async all-gather on every + forward AND backward pass. Independent from the decoder's + ``generalized_tensor_parallel_remat_size``. """ expert_tensor_parallel_size: Optional[int] = None diff --git a/megatron/core/optimizer/clip_grads.py b/megatron/core/optimizer/clip_grads.py index a1fb54f0205..058fabd46a5 100644 --- a/megatron/core/optimizer/clip_grads.py +++ b/megatron/core/optimizer/clip_grads.py @@ -243,7 +243,7 @@ def count_zeros_fp32( total_num_zeros = torch.zeros(1, dtype=torch.int64, device='cuda') data_parallel_group = None use_megatron_fsdp = False - gtp_rank = parallel_state.get_generalized_tensor_parallel_rank() + gtp_rank = parallel_state.get_generalized_tensor_parallel_remat_rank() for param in parameters: if getattr(param, "__fsdp_param__", False) and param.grad is not None: # If the parameter is managed by Megatron FSDP, we need to handle it differently. diff --git a/megatron/core/optimizer/optimizer.py b/megatron/core/optimizer/optimizer.py index 31f6eb524fb..c4acb2974d3 100644 --- a/megatron/core/optimizer/optimizer.py +++ b/megatron/core/optimizer/optimizer.py @@ -165,8 +165,8 @@ def get_main_grads_for_grad_norm_split(self) -> Tuple[List[torch.Tensor], List[t params = self.get_parameters() non_gtp_grads = [] gtp_grads = [] - gtp_rank = parallel_state.get_generalized_tensor_parallel_rank() - egtp_rank = parallel_state.get_expert_generalized_tensor_parallel_rank() + gtp_rank = parallel_state.get_generalized_tensor_parallel_remat_rank() + egtp_rank = parallel_state.get_expert_generalized_tensor_parallel_remat_rank() for param in params: if self.config.use_precision_aware_optimizer_no_fp8_or_ds_fp8 or ( # Megatron-FSDP always uses decoupled_grad with FusedAdam. @@ -260,7 +260,7 @@ def _compute_grad_norm_with_gtp(self, non_gtp_grads, gtp_grads): # Check if this optimizer handles expert params that need EGTP reduction. # The model_parallel group for dense/GTP optimizers = TP×PP×GTP (includes GTP), # but for MoE optimizers = TP×EP×PP (does NOT include EGTP). - egtp_world_size = parallel_state.get_expert_generalized_tensor_parallel_world_size() + egtp_world_size = parallel_state.get_expert_generalized_tensor_parallel_remat_world_size() is_expert_optimizer = any(not getattr(p, 'allreduce', True) for p in self.get_parameters()) needs_egtp_reduce = is_expert_optimizer and egtp_world_size > 1 @@ -278,7 +278,7 @@ def _compute_grad_norm_with_gtp(self, non_gtp_grads, gtp_grads): torch.distributed.all_reduce( gtp_norm_2, op=torch.distributed.ReduceOp.SUM, - group=parallel_state.get_expert_generalized_tensor_parallel_group(), + group=parallel_state.get_expert_generalized_tensor_parallel_remat_group(), ) total_norm_2 = non_gtp_norm**2 + gtp_norm_2.item() return total_norm_2**0.5 diff --git a/megatron/core/parallel_state.py b/megatron/core/parallel_state.py index ff7b912d7b0..f6b1c7604db 100644 --- a/megatron/core/parallel_state.py +++ b/megatron/core/parallel_state.py @@ -28,8 +28,8 @@ # Intra-layer model parallel group that the current rank belongs to. _TENSOR_MODEL_PARALLEL_GROUP = None # Generalized tensor parallelism group that the current rank belongs to. -_GENERALIZED_TENSOR_PARALLEL_GROUP = None -_GENERALIZED_TENSOR_PARALLEL_GLOBAL_RANKS = None +_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP = None +_GENERALIZED_TENSOR_PARALLEL_REMAT_GLOBAL_RANKS = None # Inter-layer model parallel group that the current rank belongs to. _PIPELINE_MODEL_PARALLEL_GROUP = None # Model parallel group (both intra- and pipeline) that the current rank belongs to. @@ -54,8 +54,8 @@ # _EXPERT_DATA denotes data parallelism of expert which replicates weight across the group. # Expert generalized tensor parallelism group that current rank belongs to. -_EXPERT_GENERALIZED_TENSOR_PARALLEL_GROUP = None -_EXPERT_GENERALIZED_TENSOR_PARALLEL_GLOBAL_RANKS = None +_EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP = None +_EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GLOBAL_RANKS = None # Expert model parallel group that current rank belongs to. _EXPERT_MODEL_PARALLEL_GROUP = None # Expert tensor parallel group that current rank belongs to. @@ -575,12 +575,12 @@ def initialize_model_parallel( hierarchical_context_parallel_sizes: Optional[List[int]] = None, hybrid_context_parallel: bool = False, expert_model_parallel_size: int = 1, - expert_generalized_tensor_parallel_size: int = 1, + gtp_remat_size: int = 1, + expert_gtp_remat_size: int = 1, num_distributed_optimizer_instances: int = 1, expert_tensor_parallel_size: Optional[int] = None, nccl_communicator_config_path: Optional[str] = None, distributed_timeout_minutes: int = 30, - generalized_tensor_parallel_size: int = 1, order: str = "tp-cp-ep-dp-pp", get_embedding_ranks: Optional[Callable[[List[int], Optional[int]], List[int]]] = None, get_position_embedding_ranks: Optional[Callable[[List[int], Optional[int]], List[int]]] = None, @@ -656,6 +656,21 @@ def initialize_model_parallel( The number of Mixture of Experts parallel GPUs in each expert parallel group. + gtp_remat_size (int, default = 1): + Generalized tensor parallelism with weight rematerialization (GTP). + Shards model weights along ``out_features`` across this many ranks; + each weight is rematerialized independently (per-weight, not per- + layer) via async all-gather on every forward AND backward pass. + Carved out of the data-parallel axis. Maps to the dataclass field + ``ModelParallelConfig.generalized_tensor_parallel_remat_size``. + + expert_gtp_remat_size (int, default = 1): + Expert-side counterpart of ``gtp_remat_size`` — shards routed-expert + weights along ``out_features`` and rematerializes per-weight on + every forward AND backward pass. Carved out of the expert data- + parallel axis. Independent from ``gtp_remat_size``. Maps to + ``ModelParallelConfig.expert_generalized_tensor_parallel_remat_size``. + num_distributed_optimizer_instances (int, default = 1): The number of distributed optimizer replicas across the data- parallel domain. @@ -760,11 +775,11 @@ def initialize_model_parallel( data_parallel_size: int = world_size // model_size - if (data_parallel_size * context_parallel_size) % generalized_tensor_parallel_size != 0: + if (data_parallel_size * context_parallel_size) % gtp_remat_size != 0: raise RuntimeError( f"data_parallel_size * context_parallel_size " f"({data_parallel_size * context_parallel_size}) is not divisible by " - f"generalized_tensor_parallel_size ({generalized_tensor_parallel_size})" + f"gtp_remat_size ({gtp_remat_size})" ) if virtual_pipeline_model_parallel_size is not None: @@ -819,10 +834,10 @@ def initialize_model_parallel( f"world_size ({world_size}) is not divisible by expert_tensor_model_pipeline_parallel size ({expert_tensor_model_pipeline_parallel_size})" ) - if expert_data_parallel_size % expert_generalized_tensor_parallel_size != 0: + if expert_data_parallel_size % expert_gtp_remat_size != 0: raise RuntimeError( f"expert_data_parallel_size ({expert_data_parallel_size}) is not divisible by " - f"expert_generalized_tensor_parallel_size ({expert_generalized_tensor_parallel_size})" + f"expert_gtp_remat_size ({expert_gtp_remat_size})" ) # TODO: support expert specific ordering @@ -872,23 +887,23 @@ def initialize_model_parallel( # Build the generalized tensor parallel groups. # GTP overlaps with the CP-DP domain because GTP only shards weights # while CP only shards activations — they are independent and can share ranks. - global _GENERALIZED_TENSOR_PARALLEL_GROUP - global _GENERALIZED_TENSOR_PARALLEL_GLOBAL_RANKS + global _GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP + global _GENERALIZED_TENSOR_PARALLEL_REMAT_GLOBAL_RANKS assert ( - _GENERALIZED_TENSOR_PARALLEL_GROUP is None + _GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP is None ), "generalized tensor parallel group is already initialized" for cp_dp_ranks in decoder_rank_generator.get_ranks('cp-dp'): - for i in range(0, len(cp_dp_ranks), generalized_tensor_parallel_size): - gtp_ranks = cp_dp_ranks[i : i + generalized_tensor_parallel_size] + for i in range(0, len(cp_dp_ranks), gtp_remat_size): + gtp_ranks = cp_dp_ranks[i : i + gtp_remat_size] group = create_group( gtp_ranks, timeout=timeout, pg_options=get_nccl_options("ps", nccl_comm_cfgs), - group_desc="GENERALIZED_TENSOR_PARALLEL_GROUP", + group_desc="GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP", ) if rank in gtp_ranks: - _GENERALIZED_TENSOR_PARALLEL_GROUP = group - _GENERALIZED_TENSOR_PARALLEL_GLOBAL_RANKS = gtp_ranks + _GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP = group + _GENERALIZED_TENSOR_PARALLEL_REMAT_GLOBAL_RANKS = gtp_ranks # Set NCCL_COLLNET_ENABLE to 1 to enable SHARP for the dp group. if sharp_enabled_group == "dp": @@ -1013,19 +1028,19 @@ def initialize_model_parallel( global _DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP global _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP global _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP_GLOO - if generalized_tensor_parallel_size > 1: + if gtp_remat_size > 1: # Build rank→gtp_rank mapping. rank_to_gtp_rank = {} for cp_dp_ranks in decoder_rank_generator.get_ranks('cp-dp'): - for i in range(0, len(cp_dp_ranks), generalized_tensor_parallel_size): - gtp_chunk = cp_dp_ranks[i : i + generalized_tensor_parallel_size] + for i in range(0, len(cp_dp_ranks), gtp_remat_size): + gtp_chunk = cp_dp_ranks[i : i + gtp_remat_size] for gtp_rank_idx, r in enumerate(gtp_chunk): rank_to_gtp_rank[r] = gtp_rank_idx # DP-only with GTP: create one group per (dp_group, gtp_rank) pair. # All ranks must participate in every create_group call (collective). for dp_ranks in decoder_rank_generator.get_ranks('dp'): - for gtp_rank_val in range(generalized_tensor_parallel_size): + for gtp_rank_val in range(gtp_remat_size): dp_gtp_ranks = [r for r in dp_ranks if rank_to_gtp_rank[r] == gtp_rank_val] group = create_group( dp_gtp_ranks, @@ -1040,7 +1055,7 @@ def initialize_model_parallel( # multi-instance distopt is enabled, so callers can hold one slice of the # GTP-excluded DP-CP set without the GTP peers leaking in. for dp_cp_ranks in decoder_rank_generator.get_ranks('dp-cp'): - for gtp_rank_val in range(generalized_tensor_parallel_size): + for gtp_rank_val in range(gtp_remat_size): dp_cp_gtp_ranks = [r for r in dp_cp_ranks if rank_to_gtp_rank[r] == gtp_rank_val] group = create_group( dp_cp_gtp_ranks, @@ -1052,16 +1067,12 @@ def initialize_model_parallel( _DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP = group if num_distributed_optimizer_instances > 1: - assert ( - len(dp_cp_gtp_ranks) % num_distributed_optimizer_instances == 0 - ), ( + assert len(dp_cp_gtp_ranks) % num_distributed_optimizer_instances == 0, ( f"DP-CP minus GTP peers size ({len(dp_cp_gtp_ranks)}) must be " f"divisible by num_distributed_optimizer_instances " f"({num_distributed_optimizer_instances})" ) - intra_partial_size = ( - len(dp_cp_gtp_ranks) // num_distributed_optimizer_instances - ) + intra_partial_size = len(dp_cp_gtp_ranks) // num_distributed_optimizer_instances for i in range(num_distributed_optimizer_instances): chunk = dp_cp_gtp_ranks[ i * intra_partial_size : (i + 1) * intra_partial_size @@ -1138,7 +1149,7 @@ def initialize_model_parallel( global _MODEL_PARALLEL_GROUP global _MODEL_PARALLEL_GLOBAL_RANKS assert _MODEL_PARALLEL_GROUP is None, 'model parallel group is already initialized' - if generalized_tensor_parallel_size == 1: + if gtp_remat_size == 1: # No GTP — model parallel is just tp-pp for ranks in decoder_rank_generator.get_ranks('tp-pp'): group = create_group( @@ -1162,8 +1173,8 @@ def initialize_model_parallel( # For each set of GTP peers, union their tp-pp groups to form model parallel groups. model_parallel_groups_set = set() for cp_dp_ranks in decoder_rank_generator.get_ranks('cp-dp'): - for i in range(0, len(cp_dp_ranks), generalized_tensor_parallel_size): - gtp_ranks = cp_dp_ranks[i : i + generalized_tensor_parallel_size] + for i in range(0, len(cp_dp_ranks), gtp_remat_size): + gtp_ranks = cp_dp_ranks[i : i + gtp_remat_size] # Merge tp-pp groups of all GTP peers mp_ranks = [] for gtp_r in gtp_ranks: @@ -1357,22 +1368,23 @@ def initialize_model_parallel( ### Expert-related parallel groups initialization # Build the expert generalized tensor parallel group # Expert GTP overlaps with the expert DP domain (experts don't use CP). - global _EXPERT_GENERALIZED_TENSOR_PARALLEL_GROUP, _EXPERT_GENERALIZED_TENSOR_PARALLEL_GLOBAL_RANKS + global _EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP + global _EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GLOBAL_RANKS assert ( - _EXPERT_GENERALIZED_TENSOR_PARALLEL_GROUP is None + _EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP is None ), 'Expert generalized tensor parallel group is already initialized' for dp_ranks in expert_decoder_rank_generator.get_ranks('dp'): - for i in range(0, len(dp_ranks), expert_generalized_tensor_parallel_size): - egtp_ranks = dp_ranks[i : i + expert_generalized_tensor_parallel_size] + for i in range(0, len(dp_ranks), expert_gtp_remat_size): + egtp_ranks = dp_ranks[i : i + expert_gtp_remat_size] group = create_group( egtp_ranks, timeout=timeout, pg_options=get_nccl_options("expt_gtp", nccl_comm_cfgs), - group_desc="EXPERT_GENERALIZED_TENSOR_PARALLEL_GROUP", + group_desc="EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP", ) if rank in egtp_ranks: - _EXPERT_GENERALIZED_TENSOR_PARALLEL_GROUP = group - _EXPERT_GENERALIZED_TENSOR_PARALLEL_GLOBAL_RANKS = egtp_ranks + _EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP = group + _EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GLOBAL_RANKS = egtp_ranks # Build the expert model parallel group global _EXPERT_MODEL_PARALLEL_GROUP, _EXPERT_MODEL_PARALLEL_RANKS @@ -1518,18 +1530,18 @@ def initialize_model_parallel( global _EXPERT_DATA_PARALLEL_GROUP_WITH_GTP global _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP global _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP_GLOO - if expert_generalized_tensor_parallel_size > 1: + if expert_gtp_remat_size > 1: # Build rank→expert_gtp_rank mapping. rank_to_expert_gtp_rank = {} for dp_ranks in expert_decoder_rank_generator.get_ranks('dp'): - for i in range(0, len(dp_ranks), expert_generalized_tensor_parallel_size): - egtp_chunk = dp_ranks[i : i + expert_generalized_tensor_parallel_size] + for i in range(0, len(dp_ranks), expert_gtp_remat_size): + egtp_chunk = dp_ranks[i : i + expert_gtp_remat_size] for egtp_rank_idx, r in enumerate(egtp_chunk): rank_to_expert_gtp_rank[r] = egtp_rank_idx # Create one group per (expert_dp_group, expert_gtp_rank) pair (collective). for dp_ranks in expert_decoder_rank_generator.get_ranks('dp'): - for egtp_rank_val in range(expert_generalized_tensor_parallel_size): + for egtp_rank_val in range(expert_gtp_remat_size): edp_gtp_ranks = [r for r in dp_ranks if rank_to_expert_gtp_rank[r] == egtp_rank_val] group = create_group( edp_gtp_ranks, @@ -1541,20 +1553,14 @@ def initialize_model_parallel( _EXPERT_DATA_PARALLEL_GROUP_WITH_GTP = group if num_distributed_optimizer_instances > 1: - assert ( - len(edp_gtp_ranks) % num_distributed_optimizer_instances == 0 - ), ( + assert len(edp_gtp_ranks) % num_distributed_optimizer_instances == 0, ( f"Expert DP minus EGTP peers size ({len(edp_gtp_ranks)}) must be " f"divisible by num_distributed_optimizer_instances " f"({num_distributed_optimizer_instances})" ) - intra_partial_size = ( - len(edp_gtp_ranks) // num_distributed_optimizer_instances - ) + intra_partial_size = len(edp_gtp_ranks) // num_distributed_optimizer_instances for i in range(num_distributed_optimizer_instances): - chunk = edp_gtp_ranks[ - i * intra_partial_size : (i + 1) * intra_partial_size - ] + chunk = edp_gtp_ranks[i * intra_partial_size : (i + 1) * intra_partial_size] intra_group = create_group( chunk, timeout=timeout, @@ -1739,38 +1745,38 @@ def get_tensor_model_parallel_group(check_initialized=True): return _TENSOR_MODEL_PARALLEL_GROUP -def get_generalized_tensor_parallel_group(check_initialized=True): +def get_generalized_tensor_parallel_remat_group(check_initialized=True): """Get the parameter-sharding group the caller rank belongs to.""" if check_initialized: assert ( - _GENERALIZED_TENSOR_PARALLEL_GROUP is not None + _GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP is not None ), "generalized tensor parallel group is not initialized" - return _GENERALIZED_TENSOR_PARALLEL_GROUP + return _GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP -def get_generalized_tensor_parallel_world_size(): +def get_generalized_tensor_parallel_remat_world_size(): """Return world size for the parameter-sharding group.""" if torch.distributed.is_available() and torch.distributed.is_initialized(): - return get_generalized_tensor_parallel_group().size() + return get_generalized_tensor_parallel_remat_group().size() else: return 0 -def get_generalized_tensor_parallel_rank(): +def get_generalized_tensor_parallel_remat_rank(): """Return caller's rank in the parameter-sharding group.""" if torch.distributed.is_available() and torch.distributed.is_initialized(): - return get_generalized_tensor_parallel_group().rank() + return get_generalized_tensor_parallel_remat_group().rank() else: return 0 -def get_generalized_tensor_parallel_global_ranks(check_initialized=True): +def get_generalized_tensor_parallel_remat_global_ranks(check_initialized=True): """Get all global ranks of the parameter-sharding group that the caller rank belongs to.""" if check_initialized: assert ( - _GENERALIZED_TENSOR_PARALLEL_GLOBAL_RANKS is not None + _GENERALIZED_TENSOR_PARALLEL_REMAT_GLOBAL_RANKS is not None ), "generalized tensor parallel group is not initialized" - return _GENERALIZED_TENSOR_PARALLEL_GLOBAL_RANKS + return _GENERALIZED_TENSOR_PARALLEL_REMAT_GLOBAL_RANKS def get_pipeline_model_parallel_group(check_initialized=True): @@ -1840,9 +1846,9 @@ def get_data_parallel_group_gloo( # Full (non-partial) Gloo variant of with_gtp is not built; callers needing # cross-instance Gloo over the GTP-excluded set can use the non-GTP variant # since broadcasts are init-time only. - assert _DATA_PARALLEL_GROUP_WITH_CP_GLOO is not None, ( - "data parallel group-gloo with context parallel combined is not initialized" - ) + assert ( + _DATA_PARALLEL_GROUP_WITH_CP_GLOO is not None + ), "data parallel group-gloo with context parallel combined is not initialized" return _DATA_PARALLEL_GROUP_WITH_CP_GLOO if with_context_parallel: if partial_data_parallel: @@ -2221,38 +2227,38 @@ def get_tensor_and_context_parallel_rank(): ### Expert-related parallel states functions -def get_expert_generalized_tensor_parallel_group(check_initialized=True): +def get_expert_generalized_tensor_parallel_remat_group(check_initialized=True): """Get the expert-parameter-sharding group the caller rank belongs to.""" if check_initialized: assert ( - _EXPERT_GENERALIZED_TENSOR_PARALLEL_GROUP is not None + _EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP is not None ), "expert generalized tensor parallel group is not initialized" - return _EXPERT_GENERALIZED_TENSOR_PARALLEL_GROUP + return _EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP -def get_expert_generalized_tensor_parallel_world_size(): +def get_expert_generalized_tensor_parallel_remat_world_size(): """Return world size for the expert-parameter-sharding group.""" if torch.distributed.is_available() and torch.distributed.is_initialized(): - return get_expert_generalized_tensor_parallel_group().size() + return get_expert_generalized_tensor_parallel_remat_group().size() else: return 0 -def get_expert_generalized_tensor_parallel_rank(): +def get_expert_generalized_tensor_parallel_remat_rank(): """Return caller's rank in the expert-parameter-sharding group.""" if torch.distributed.is_available() and torch.distributed.is_initialized(): - return get_expert_generalized_tensor_parallel_group().rank() + return get_expert_generalized_tensor_parallel_remat_group().rank() else: return 0 -def get_expert_generalized_tensor_parallel_global_ranks(check_initialized=True): +def get_expert_generalized_tensor_parallel_remat_global_ranks(check_initialized=True): """Get all global ranks of the expert-parameter-sharding group that the caller rank belongs to.""" if check_initialized: assert ( - _EXPERT_GENERALIZED_TENSOR_PARALLEL_GLOBAL_RANKS is not None + _EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GLOBAL_RANKS is not None ), "expert generalized tensor parallel group is not initialized" - return _EXPERT_GENERALIZED_TENSOR_PARALLEL_GLOBAL_RANKS + return _EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GLOBAL_RANKS def get_expert_model_parallel_group(check_initialized=True): @@ -2419,9 +2425,9 @@ def get_expert_data_parallel_group( def get_expert_data_parallel_group_gloo(with_gtp=False, partial_expert_data_parallel=False): """Get expert data parallel group-gloo.""" if with_gtp: - assert partial_expert_data_parallel, ( - "Gloo with_gtp variant is only built for the partial (per-distopt-instance) group" - ) + assert ( + partial_expert_data_parallel + ), "Gloo with_gtp variant is only built for the partial (per-distopt-instance) group" assert _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP_GLOO is not None, ( "Intra partial expert data parallel group with generalized tensor parallel " "(gloo) is not initialized" @@ -2508,7 +2514,7 @@ def get_all_ranks(): pipeline-model-parallel and expert-model-parallel groups.""" ranks = [ get_tensor_model_parallel_rank(), - get_generalized_tensor_parallel_rank(), + get_generalized_tensor_parallel_remat_rank(), get_data_parallel_rank(), get_context_parallel_rank(), get_pipeline_model_parallel_rank(), @@ -2525,11 +2531,11 @@ def destroy_model_parallel(): global _TENSOR_MODEL_PARALLEL_GROUP _TENSOR_MODEL_PARALLEL_GROUP = None - global _GENERALIZED_TENSOR_PARALLEL_GROUP - _GENERALIZED_TENSOR_PARALLEL_GROUP = None + global _GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP + _GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP = None - global _GENERALIZED_TENSOR_PARALLEL_GLOBAL_RANKS - _GENERALIZED_TENSOR_PARALLEL_GLOBAL_RANKS = None + global _GENERALIZED_TENSOR_PARALLEL_REMAT_GLOBAL_RANKS + _GENERALIZED_TENSOR_PARALLEL_REMAT_GLOBAL_RANKS = None global _PIPELINE_MODEL_PARALLEL_GROUP _PIPELINE_MODEL_PARALLEL_GROUP = None @@ -2618,11 +2624,11 @@ def destroy_model_parallel(): _DATA_PARALLEL_GROUP_WITH_CP_GLOO = None # Destroy parallel state related to expert parallelism. - global _EXPERT_GENERALIZED_TENSOR_PARALLEL_GROUP - _EXPERT_GENERALIZED_TENSOR_PARALLEL_GROUP = None + global _EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP + _EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP = None - global _EXPERT_GENERALIZED_TENSOR_PARALLEL_GLOBAL_RANKS - _EXPERT_GENERALIZED_TENSOR_PARALLEL_GLOBAL_RANKS = None + global _EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GLOBAL_RANKS + _EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GLOBAL_RANKS = None global _EXPERT_MODEL_PARALLEL_GROUP _EXPERT_MODEL_PARALLEL_GROUP = None diff --git a/megatron/core/process_groups_config.py b/megatron/core/process_groups_config.py index 101f731cdfc..cf9f3442dac 100644 --- a/megatron/core/process_groups_config.py +++ b/megatron/core/process_groups_config.py @@ -117,10 +117,10 @@ class ProcessGroupCollection: # Separate dp_cp communicator for param all-gather (AG/RS overlap) dp_cp_ag: torch.distributed.ProcessGroup = field(init=False) - # _GENERALIZED_TENSOR_PARALLEL_GROUP + # _GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP gtp: torch.distributed.ProcessGroup = field(init=False) - # _EXPERT_GENERALIZED_TENSOR_PARALLEL_GROUP + # _EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP expt_gtp: torch.distributed.ProcessGroup = field(init=False) # MoE layers need expt_dp group for sharded state dict @@ -254,10 +254,11 @@ def use_mpu_process_groups(cls, required_pgs: Optional[List[str]] = None): with_context_parallel=True, ), 'gtp': partial( - parallel_state.get_generalized_tensor_parallel_group, check_initialized=False + parallel_state.get_generalized_tensor_parallel_remat_group, check_initialized=False ), 'expt_gtp': partial( - parallel_state.get_expert_generalized_tensor_parallel_group, check_initialized=False + parallel_state.get_expert_generalized_tensor_parallel_remat_group, + check_initialized=False, ), } @@ -330,9 +331,7 @@ def setup_process_groups_for_optimizer( intra_expt_dp_with_egtp_group = parallel_state.get_expert_data_parallel_group( with_gtp=True, partial_expert_data_parallel=True ) - expt_dp_with_egtp_group = parallel_state.get_expert_data_parallel_group( - with_gtp=True - ) + expt_dp_with_egtp_group = parallel_state.get_expert_data_parallel_group(with_gtp=True) intra_dist_opt_group = parallel_state.get_intra_distributed_optimizer_instance_group() # Gloo groups diff --git a/megatron/core/transformer/cuda_graphs.py b/megatron/core/transformer/cuda_graphs.py index f59768ae07d..c8c5f74dab4 100644 --- a/megatron/core/transformer/cuda_graphs.py +++ b/megatron/core/transformer/cuda_graphs.py @@ -439,7 +439,7 @@ def create_cudagraphs(cls): "https://github.com/NVIDIA/TransformerEngine/blob/v2.10/transformer_engine/pytorch/utils.py#L759" # pylint: disable=line-too-long ) - if any(r[0].generalized_tensor_parallel for r in cls.cudagraph_record): + if any(r[0].gtp_remat for r in cls.cudagraph_record): reallocate_gtp_cache_to_mempool( torch.cuda.current_device(), CudaGraphManager.global_mempool ) @@ -763,9 +763,9 @@ def backward(ctx, *grads): # defensive against future Phase 2 work on other sub-streams. # main_stream stays unblocked so the next runner can start in # parallel. - if runner.generalized_tensor_parallel and runner.finalized_during_bwd_capture: + if runner.gtp_remat and runner.finalized_during_bwd_capture: gtp_rs_stream = get_rs_stream( - GTPChain.GRAPHED.value, parallel_state.get_generalized_tensor_parallel_group() + GTPChain.GRAPHED.value, parallel_state.get_generalized_tensor_parallel_remat_group() ) gtp_rs_stream.wait_event(runner.bwd_phase2_completion_event) with torch.cuda.stream(gtp_rs_stream): @@ -830,7 +830,7 @@ def __init__( self.deallocate_pipeline_outputs = False self.num_warmup_steps = 0 self.use_stream = False - self.generalized_tensor_parallel = False + self.gtp_remat = False self.fwd_side_streams = [] self.bwd_side_streams = [] # Populated by create_bwd_graph: GTP params whose main_grad.add_ was captured in THIS @@ -861,16 +861,14 @@ def __init__( self.fp4_enabled = self.base_module.config.fp4 is not None self.fp8_runtime_enabled = None self.fp4_runtime_enabled = None - self.generalized_tensor_parallel = ( - self.base_module.config.generalized_tensor_parallel_size > 1 - ) + self.gtp_remat = self.base_module.config.generalized_tensor_parallel_remat_size > 1 # Ensure internal warmup (inside create_fwd_graph) has >= 2 steps # for GTP: 1st builds chain + tickets, 2nd exercises prefetch path. - if self.generalized_tensor_parallel: + if self.gtp_remat: self.num_warmup_steps = max(self.num_warmup_steps, 2) - if self.generalized_tensor_parallel: + if self.gtp_remat: self.use_stream = True self.stream = torch.cuda.Stream() self.fwd_completion_event = torch.cuda.Event(external=True, interprocess=True) @@ -879,9 +877,9 @@ def __init__( # all sharded across PARAMETER_SHARDING_GROUP. Materialize that # (chain, group) stream pair now so it is registered as a # captured side stream before the first forward. - from megatron.core.parallel_state import get_generalized_tensor_parallel_group + from megatron.core.parallel_state import get_generalized_tensor_parallel_remat_group - gtp_group = get_generalized_tensor_parallel_group() + gtp_group = get_generalized_tensor_parallel_remat_group() graphed_ag = get_ag_stream(GTPChain.GRAPHED.value, gtp_group) graphed_rs = get_rs_stream(GTPChain.GRAPHED.value, gtp_group) self._register_side_stream(self.fwd_side_streams, graphed_ag) @@ -1151,7 +1149,7 @@ def clone_ten(ten): allow_unused=True, ) - if self.generalized_tensor_parallel: + if self.gtp_remat: wait_async_comms(GTPChain.GRAPHED.value) self._sync_against_side_streams(self.bwd_side_streams) @@ -1182,7 +1180,7 @@ def clone_ten(ten): *self.fwd_graph_input_args, **self.fwd_graph_input_kwargs ) - if self.generalized_tensor_parallel: + if self.gtp_remat: wait_async_comms(GTPChain.GRAPHED.value) if self.fwd_side_streams: @@ -1262,7 +1260,7 @@ def create_bwd_graph(self): # cascade and wait_async_comms to split the captured RS wait/add across # producer and consumer graphs (avoids cross-capture cudaStreamWaitEvent # on c10d Work.postEvent). - if self.generalized_tensor_parallel: + if self.gtp_remat: pset = {id(p) for p in self.params_to_backprop} for p in self.params_to_backprop: if not isinstance(p, GTPShardedParam): @@ -1329,13 +1327,13 @@ def create_bwd_graph(self): # tails the wait is captured here, the add in the # consumer's cascade; for within-graph tails both # happen here (see wait_async_comms). - if self.generalized_tensor_parallel: + if self.gtp_remat: # Phase 1: drain AG; fence runner_stream past ag_stream so # bwd_completion_event records AFTER NCCL_AG completion. wait_async_comms(GTPChain.GRAPHED.value, skip_rs=True) - from megatron.core.parallel_state import get_generalized_tensor_parallel_group + from megatron.core.parallel_state import get_generalized_tensor_parallel_remat_group - gtp_group = get_generalized_tensor_parallel_group() + gtp_group = get_generalized_tensor_parallel_remat_group() graphed_ag = get_ag_stream(GTPChain.GRAPHED.value, gtp_group) self.bwd_ag_fence_event.record(graphed_ag) torch.cuda.current_stream().wait_event(self.bwd_ag_fence_event) @@ -1351,12 +1349,12 @@ def create_bwd_graph(self): if self.bwd_side_streams: self._wait_side_streams(self.bwd_side_streams) - if self.generalized_tensor_parallel: + if self.gtp_remat: # Phase 2 + side-stream join done — record so # finalize_model_grads can wait for main_grad.add_ completion. self.bwd_phase2_completion_event.record() - if self.use_stream and not self.generalized_tensor_parallel: + if self.use_stream and not self.gtp_remat: # Non-GTP path: record after the side-stream join. self.bwd_completion_event.record() @@ -1366,7 +1364,7 @@ def create_bwd_graph(self): # See _compute_finalized_during_bwd_capture for what's in this set and why. self.finalized_during_bwd_capture = ( - self._compute_finalized_during_bwd_capture() if self.generalized_tensor_parallel else [] + self._compute_finalized_during_bwd_capture() if self.gtp_remat else [] ) # Constructs a tuple suitable for returning from Graphed.backward: diff --git a/megatron/experimental/gtp/README.md b/megatron/experimental/gtp/README.md index ff28f27e5de..7b009fd96a2 100644 --- a/megatron/experimental/gtp/README.md +++ b/megatron/experimental/gtp/README.md @@ -117,14 +117,14 @@ Two distinct pools with explicit lifecycle rules: - **TP** (intra-layer): orthogonal axis — GTP shards `out_features` regardless of TP's parallel mode (column or row). 2D grid naturally formed via `tp_group × gtp_group`. - **SP** (sequence-parallel): transparent — GTP operates at weight dim, SP at sequence dim. -- **EP** (MoE): `GroupedLinear` with GTP → each routed expert sharded across `EXPERT_GENERALIZED_TENSOR_PARALLEL_GROUP`, independent of EP. MoE AllToAll (HybridEP/NVLink) runs independently of GTP AG/RS (NCCL/IB). +- **EP** (MoE): `GroupedLinear` with GTP → each routed expert sharded across `EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP`, independent of EP. MoE AllToAll (HybridEP/NVLink) runs independently of GTP AG/RS (NCCL/IB). - **DDP**: GTP bypasses autograd's grad accumulator (async RS returns `None`; `_finalize_wgrad` accumulates directly into `main_grad`). `register_grad_accum_hook` + manual invocation from `_finalize_wgrad` (eager path) and `_CudagraphReplayNode.backward` (captured path) serializes DDP RS strictly after GTP RS — critical at IB scale to avoid deadlock between DDP and GTP on the same NIC. ## 6. Opt-in, Minimally Invasive Integration - Drop-in `gtp_group` kwarg on `Linear` / `LayerNormLinear` / `LayerNormMLP` / `GroupedLinear`; no framework-level refactor required. - `classify_gtp_chains(model)` walks `named_parameters()` once at init and sets `chain_id` on every `GTPShardedParam` based on the current `cuda_graph_modules`. -- Turning it off is a no-op: when `gtp_group.size() == 1`, `wrap_module_params_gtp` short-circuits; when `generalized_tensor_parallel_size == 1`, the GTP path in `layers.py` is skipped entirely. +- Turning it off is a no-op: when `gtp_group.size() == 1`, `wrap_module_params_gtp` short-circuits; when `generalized_tensor_parallel_remat_size == 1`, the GTP path in `layers.py` is skipped entirely. - User-tunable knobs (`GTPConfig.pad_for_alignment`, `weight_prefetch`, `check_param_states`) plus a debug-name tagger (`tag_gtp_params_with_names`) for readable link-table output. ## 7. Overlap Design Summary @@ -156,11 +156,11 @@ GTP is enabled through two CLI flags on Megatron's training launcher; everything ```bash # Shard dense weights (attention, mamba, MLP linears) 1/N along out_features. ---generalized-tensor-parallel-size +--generalized-tensor-parallel-remat-size # Shard MoE routed-expert weights 1/M along out_features. Independent from -# `--generalized-tensor-parallel-size`; can be 1 for non-MoE models. ---expert-generalized-tensor-parallel-size +# `--generalized-tensor-parallel-remat-size`; can be 1 for non-MoE models. +--expert-generalized-tensor-parallel-remat-size ``` ### High-priority streams (Blackwell and later) @@ -180,8 +180,8 @@ The launcher also exports `CUDA_GRAPHS_USE_NODE_PRIORITY=1` so captured CUDA gra torchrun --nproc-per-node 4 pretrain_gpt.py \ --tensor-model-parallel-size 1 \ --pipeline-model-parallel-size 1 \ - --generalized-tensor-parallel-size 2 \ - --expert-generalized-tensor-parallel-size 1 \ + --generalized-tensor-parallel-remat-size 2 \ + --expert-generalized-tensor-parallel-remat-size 1 \ --bf16 \ --num-layers 12 --hidden-size 1024 --num-attention-heads 16 \ --seq-length 1024 --max-position-embeddings 1024 \ @@ -203,7 +203,7 @@ GTP enabled. GTPConfig(pad_for_alignment=16, check_param_states=False, ### What the flags do under the hood -1. `parallel_state.initialize_model_parallel(...)` builds two new groups: `_GENERALIZED_TENSOR_PARALLEL_GROUP` (size = `--generalized-tensor-parallel-size`) and `_EXPERT_GENERALIZED_TENSOR_PARALLEL_GROUP` (size = `--expert-generalized-tensor-parallel-size`), plus the corresponding DP-with-GTP carve-outs (`_DATA_PARALLEL_GROUP_WITH_GTP`, `_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP`). +1. `parallel_state.initialize_model_parallel(...)` builds two new groups: `_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP` (size = `--generalized-tensor-parallel-remat-size`) and `_EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP` (size = `--expert-generalized-tensor-parallel-remat-size`), plus the corresponding DP-with-GTP carve-outs (`_DATA_PARALLEL_GROUP_WITH_GTP`, `_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP`). 2. Megatron's `extensions/transformer_engine.py` reads `pg_collection.gtp` / `pg_collection.expt_gtp` and forwards them as the `gtp_group=` kwarg to `te.Linear` / `te.LayerNormLinear` / `te.GroupedLinear`. TE's `module/base.py` calls back into `megatron.experimental.gtp` via the hook registry (`register_gtp_hooks`) to slice each weight at `reset_parameters` time. 3. DDP carves out GTP shards into a separate bucket pool (`gtp_buffer_groups`) reduced over `intra_dp_cp_with_gtp_group` rather than full DP — the wgrad RS already reduced over the GTP axis. 4. Optimizer state is sharded across the same `with_gtp` subgroup; clip-by-global-norm sums squared norms over `model_parallel × with_gtp` so the reduction count matches the actual replica count. diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py index dcb69644d85..34aa693409c 100644 --- a/megatron/training/arguments.py +++ b/megatron/training/arguments.py @@ -1433,9 +1433,9 @@ def validate_args(args, defaults={}): args.high_priority_stream_groups.append('ep_dp') - if args.generalized_tensor_parallel_size > 1 or args.expert_generalized_tensor_parallel_size > 1: - gtp_size = args.generalized_tensor_parallel_size - egtp_size = args.expert_generalized_tensor_parallel_size + if args.generalized_tensor_parallel_remat_size > 1 or args.expert_generalized_tensor_parallel_remat_size > 1: + gtp_size = args.generalized_tensor_parallel_remat_size + egtp_size = args.expert_generalized_tensor_parallel_remat_size if get_device_arch_version() >= 10: # Setting GTP communication groups for high priority streams for Blackwell and later # architectures. Assigning high priority to communication streams ensures that diff --git a/megatron/training/initialize.py b/megatron/training/initialize.py index 817d5c19f96..fc89ba5c3f9 100644 --- a/megatron/training/initialize.py +++ b/megatron/training/initialize.py @@ -344,8 +344,8 @@ def _initialize_distributed(get_embedding_ranks, get_position_embedding_ranks, s args.virtual_pipeline_model_parallel_size, pipeline_model_parallel_comm_backend=args.pipeline_model_parallel_comm_backend, use_sharp=args.use_sharp, - generalized_tensor_parallel_size=args.generalized_tensor_parallel_size, - expert_generalized_tensor_parallel_size=args.expert_generalized_tensor_parallel_size, + gtp_remat_size=args.generalized_tensor_parallel_remat_size, + expert_gtp_remat_size=args.expert_generalized_tensor_parallel_remat_size, context_parallel_size=args.context_parallel_size, hierarchical_context_parallel_sizes=args.hierarchical_context_parallel_sizes, hybrid_context_parallel=args.hybrid_context_parallel, diff --git a/megatron/training/training.py b/megatron/training/training.py index 2d5760a6372..cbed9c59870 100644 --- a/megatron/training/training.py +++ b/megatron/training/training.py @@ -1654,8 +1654,8 @@ def get_model(model_provider_func, model_type=ModelType.encoder_or_decoder, wrap # Configure GTP padding alignment based on quantization recipe before model construction. from megatron.experimental.gtp import update_gtp_config if ( - getattr(args, 'generalized_tensor_parallel_size', 1) > 1 - or getattr(args, 'expert_generalized_tensor_parallel_size', 1) > 1 + getattr(args, 'generalized_tensor_parallel_remat_size', 1) > 1 + or getattr(args, 'expert_generalized_tensor_parallel_remat_size', 1) > 1 ): if getattr(args, 'fp4', None) is not None: update_gtp_config(pad_for_alignment=16) @@ -1729,8 +1729,8 @@ def build_model(): tag_gtp_params_with_names(model_module) classify_gtp_chains(model_module) if ( - getattr(args, 'generalized_tensor_parallel_size', 1) > 1 - or getattr(args, 'expert_generalized_tensor_parallel_size', 1) > 1 + getattr(args, 'generalized_tensor_parallel_remat_size', 1) > 1 + or getattr(args, 'expert_generalized_tensor_parallel_remat_size', 1) > 1 ): print_rank_0(f"GTP enabled. {GTP_CONFIG}") diff --git a/megatron/training/utils/common_utils.py b/megatron/training/utils/common_utils.py index 9bd55ff8b2e..5f4d8f41ae2 100644 --- a/megatron/training/utils/common_utils.py +++ b/megatron/training/utils/common_utils.py @@ -111,8 +111,8 @@ def calc_params_l2_norm(model, force_create_fp32_copy=False): moe_gtp_params_data = [] # MoE-GTP, non-sharded moe_gtp_sharded_params_data = [] # MoE-GTP, sharded → reduce over expert_dp_with_gtp - gtp_rank = mpu.get_generalized_tensor_parallel_rank() - egtp_rank = mpu.get_expert_generalized_tensor_parallel_rank() + gtp_rank = mpu.get_generalized_tensor_parallel_remat_rank() + egtp_rank = mpu.get_expert_generalized_tensor_parallel_remat_rank() for model_chunk in model: for param in model_chunk.parameters(): @@ -181,7 +181,7 @@ def _sum_reduce(tensor, group): # expert_model_parallel = TP×EP×PP (does NOT include EGTP), so we need # an explicit EGTP reduction for MoE-GTP before the model-parallel reduce. moe_gtp_combined_norm_2 = moe_gtp_norm_2 + moe_gtp_sharded_norm_2 - _sum_reduce(moe_gtp_combined_norm_2, mpu.get_expert_generalized_tensor_parallel_group()) + _sum_reduce(moe_gtp_combined_norm_2, mpu.get_expert_generalized_tensor_parallel_remat_group()) moe_total_norm_2 = moe_norm_2 + moe_sharded_norm_2 + moe_gtp_combined_norm_2 # --- Model-parallel reductions --- From 6c28127349bc3c40cdea7a04ceff24af09494be0 Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Thu, 28 May 2026 00:34:04 -0700 Subject: [PATCH 09/59] update README Signed-off-by: Shiqing Fan --- megatron/experimental/gtp/README.md | 169 +++++++++++------- .../images/0525_gtp_mcore_te_architecture.png | Bin 0 -> 222344 bytes .../gtp/images/0527_ddp_param_bucketing.png | Bin 0 -> 196594 bytes 3 files changed, 100 insertions(+), 69 deletions(-) create mode 100644 megatron/experimental/gtp/images/0525_gtp_mcore_te_architecture.png create mode 100644 megatron/experimental/gtp/images/0527_ddp_param_bucketing.png diff --git a/megatron/experimental/gtp/README.md b/megatron/experimental/gtp/README.md index 7b009fd96a2..227eb8b1c9a 100644 --- a/megatron/experimental/gtp/README.md +++ b/megatron/experimental/gtp/README.md @@ -1,16 +1,28 @@ -# Generalized Tensor Parallelism (GTP) — Key Features +# Generalized Tensor Parallelism (GTP) > ⚠️ **Experimental.** GTP is an experimental feature and its API, configuration, and behavior may change in future versions without notice. -**Scope**: this doc is a high-level summary of GTP. +**At a glance.** GTP shards every linear weight 1/N along `out_features` across a dedicated GTP process group. The full weight is rematerialized on the fly via an asynchronous all-gather that overlaps with the previous layer's compute on every forward AND backward pass; the wgrad is reduce-scattered the same way on the way back. Effective per-GPU weight memory shrinks by `1/N`, and the design composes orthogonally with TP / SP / EP / DDP / CUDA Graphs. -Core implementation: `megatron/experimental/gtp/generalized_tensor_parallelism.py`. The public surface is re-exported from `megatron/experimental/gtp/__init__.py`, which also owns the `HAVE_GTP` fallback used by callers that need to remain importable when GTP is unavailable. Low-precision tensor primitives (FP8 / MXFP8 / NVFP4) remain in TransformerEngine and are imported by `generalized_tensor_parallelism.py`. +**Scope**: a high-level summary of GTP — design intent, public CLI surface, and Megatron-LM ↔ TransformerEngine integration touchpoints. + +Core implementation: `megatron/experimental/gtp/generalized_tensor_parallelism.py`. The public surface is re-exported from `megatron/experimental/gtp/__init__.py`. Low-precision tensor primitives (FP8 / MXFP8 / NVFP4) remain in TransformerEngine and are imported by `generalized_tensor_parallelism.py`. + +**Outline:** + +1. [Features](#1-features) +2. [Usage](#2-usage) +3. [Implementation details](#3-implementation-details) + - 3.1 [GTP architecture (Mcore ↔ TE integration)](#31-gtp-architecture-mcore--te-integration) + - 3.2 [DDP buckets with (E)GTP](#32-ddp-buckets-with-egtp) --- -## 1. Fine-Grained, Per-Weight Materialization & Gradient Reduction +## 1. Features -Each weight is sharded 1/N across an GTP group along `out_features`, stored as an `GTPShardedParam` subclass of `nn.Parameter`. Materialization and gradient reduction are both **per-weight, per-call** — not per-model or per-module: +### 1.1 Fine-grained, per-weight materialization & gradient reduction + +Each weight is sharded 1/N across a GTP group along `out_features`, stored as a `GTPShardedParam` subclass of `nn.Parameter`. Materialization and gradient reduction are both **per-weight, per-call** — not per-model or per-module: - **Independent state per param**: each has its own AG state (`state`) and RS state (`rs_state`) machines, both cycling `NONE → ASYNC_WAIT → DATA_READY → NONE` and tracked separately so fwd and bwd async ops don't interfere. - **Prefetch chain for AG** (doubly-linked `prev_w` / `next_w`): during fwd, each weight's `all_gather_and_prefetch` issues async AG for `next_w`; during bwd, `all_gather_and_prefetch_bwd` issues async AG for `prev_w`. Layer *i*'s AG overlaps with layer *i−1*'s GEMM. For an L-layer model, L−1 all-gathers are fully hidden behind compute. @@ -19,9 +31,9 @@ Each weight is sharded 1/N across an GTP group along `out_features`, stored as a Contrast with FSDP: FSDP gathers at module-group granularity in full precision with PyTorch-managed lifecycle. GTP works at individual-weight granularity, in quantized form, with its own explicit ticket-based buffer pool and a one-step-deferred RS finalizer. -> **FSDP can't shrink into GTP because FSDP's overlap is bucket-grained by design** — bucket granularity exists *to avoid* paying NCCL launch latency on tiny params (LayerNorm γ/β, biases, Mamba `dt_bias`/`D`/`A_log`) and *to avoid* the per-weight scheduling state that GTP relies on (per-param prefetch chain, ticket-based buffer cache, stream choreography). Removing buckets doesn't make FSDP faster; it makes FSDP into GTP, with all the engineering that entails — selective wrapping (only large GEMM weights), per-weight prefetch chain, per-param buffer ticket, and explicit AG/RS stream coreography on a side stream so external drains have something meaningful to wait on. +> **FSDP can't shrink into GTP because FSDP's overlap is bucket-grained by design** — bucket granularity exists *to avoid* paying NCCL launch latency on tiny params (LayerNorm γ/β, biases, Mamba `dt_bias`/`D`/`A_log`) and *to avoid* the per-weight scheduling state that GTP relies on (per-param prefetch chain, ticket-based buffer cache, stream choreography). Removing buckets doesn't make FSDP faster; it makes FSDP into GTP, with all the engineering that entails — selective wrapping (only large GEMM weights), per-weight prefetch chain, per-param buffer ticket, and explicit AG/RS stream choreography on a side stream so external drains have something meaningful to wait on. -## 2. CUDA Graph Compatibility +### 1.2 CUDA graph compatibility CG compatibility is designed-in from day one, not retrofitted. The entire sync / buffer / chain architecture is shaped around making **captured fwd/bwd replays produce identical bit-for-bit behavior** — without the usual capture-vs-eager pitfalls that force other weight-sharding schemes to either disable CG or require special handling. @@ -34,7 +46,7 @@ CG compatibility is designed-in from day one, not retrofitted. The entire sync / - **Drains at CG / eager boundary**: `_drain_gtp_side_streams()` before eager MoE expert compute. Inside bwd capture, two-phase drain: Phase 1 joins the within-graph cascade and records `bwd_completion_event` (next runner unblocks); Phase 2 calls `wait_async_comms(GRAPHED)` to drain the chain-tail handle and re-joins side streams (queued after the event so it doesn't delay the next runner). - **Side-stream registration**: the `(GRAPHED, gtp_group)` ag/rs streams are materialized at runner init so `_register_side_stream` captures them before the first forward. -## 3. Low-Precision Quantize-Then-Gather +### 1.3 Low-precision quantize-then-gather Wire bandwidth scales with the **quantized** size, not BF16 size — GTP composes with low-precision training rather than fighting it. @@ -43,7 +55,7 @@ Wire bandwidth scales with the **quantized** size, not BF16 size — GTP compose - **Coalesced NCCL**: `grouped_gather_along_first_dim` uses `torch.distributed._coalescing_manager` to batch E experts' AGs into a single NCCL op. `BatchedNVFP4AllGatherAsyncHandle` wraps per-expert post-processing. - **Padding**: at construction the **full tensor** is padded along dim0 to a multiple of `pad_for_alignment × gtp_size`, then sharded equally across the group. After all-gather, the padding ends up contiguous at the tail, so stripping is a single trailing slice (`tensor[:-pad_length]`) — no per-shard reshuffle, and the design naturally supports `pad_length` large enough to span multiple ranks' shards when the unpadded dim0 is small. -### Per-microbatch schedule +#### Per-microbatch schedule ``` Steady-state fwd (NVFP4): @@ -65,7 +77,7 @@ quant+amax run sequentially with surrounding compute on the default stream; only For NVFP4 the per-microbatch prefetch cost is **two** NCCL ops on the GTP ncclStream (amax allreduce + AG) serialized on the same communicator. FP8 and MXFP8 incur only the AG; their standard DP-group amax allreduce in `reduce_and_update_fp8_tensors` is unchanged by GTP. BF16 skips quant entirely. -### Communication volume breakdown +#### Communication volume breakdown Per-microbatch per-weight comm budget (assuming bf16 wgrad reduce-scatter): @@ -82,73 +94,30 @@ How to read the columns: - `Fwd AR(amax)` is a separate NCCL collective: NVFP4 needs it (one fp32 scalar per tensor → ~0 B/elem volume but a fixed launch latency); MXFP8 doesn't (microscale-only). - `Total B/elem` = `Fwd AG + Bwd AG + Wgrad RS` — amax AR is omitted because its volume is essentially 0. -Concrete numbers for one weight of shape `[16384, 4096]` (67.1M params), per microbatch: - -| Format | Per-microbatch volume | -|--------|-----------------------| -| BF16 | 403 MB | -| MXFP8 | 273 MB (–32%) | -| NVFP4 | 210 MB (–48%) | - Quantize-then-gather attacks AG only: AG portion shrinks ~72% from BF16 → NVFP4, but RS is untouched, so the wgrad RS becomes the dominant comm path in NVFP4 (~64% of the budget at bf16 RS, ~78% at fp32 RS). -### Insights from the comm-volume table - -1. **AG savings ≠ end-to-end comm savings.** The headline "NVFP4 → 4× smaller wire" applies to AG only. End-to-end per-weight comm goes 6.0 → 3.125 B/elem, which is **2×, not 4×**. The other half of the savings is left on the table because RS doesn't shrink. - -2. **Wgrad RS becomes the new bottleneck under NVFP4.** RS share of the budget jumps from ~33% (BF16) to ~64% (NVFP4) at bf16 RS dtype. Future optimization should target the RS path (RS dtype reduction, coalesced gradient reduction), not more aggressive AG quantization. - -3. **Diminishing returns from precision halving.** BF16 → MXFP8 saves 1.94 B/elem (–32% of baseline); MXFP8 → NVFP4 saves only an additional 0.94 B/elem (–16% of baseline). Each step saves a smaller share of the original budget — there's no point chasing more aggressive AG quantization until RS shrinks. - -4. **AR(amax) is latency-only, not volume.** One fp32 scalar per tensor → effectively 0 B/elem in a bandwidth budget. It belongs in a launch-overhead model, not in the bytes table. - -5. **Bwd inherits AG savings for free.** The quantize + amax + cast cost is paid once per microbatch in fwd. Bwd reuses the cached `self.quantized` buffer with no re-quantize and no AR — but still gets the small-AG payload. The cost-benefit of quantize-then-gather is asymmetric: fwd pays once, both fwd-AG and bwd-AG benefit. - -**Net takeaway:** "NVFP4 → 4× smaller wire" is half-true. GTP+NVFP4 cuts AG ~4× but leaves RS untouched, so end-to-end comm is only ~2× faster. The next lever isn't more aggressive quantization — it's RS dtype reduction or coalesced gradient reduction. - -## 4. Buffer / Memory Management - -Two distinct pools with explicit lifecycle rules: - -- **`GTPWeightCache`** (AG/RS output buffers) — ticket-based, keyed on `(shape, dtype, fwd, expert_idx, reduce_scatter)`. Same-shape buffers across layers are shared. Tickets persistent; buffer allocated lazily on first `get()`; addresses stable across iterations for CG replay. -- **`_wgrad_buf_pool`** (UNGRAPHED wgrad input recycling) — tagged with `_from_gtp_wgrad_pool=True` at `_wgrad_pool_get`. `_wgrad_pool_put` no-ops on foreign buffers (fresh allocs from Megatron `layers.py` or aten F.embedding bwd) → caching allocator handles those. Prevents the pool from accumulating untagged buffers each iter. - -## 5. Composability with TP / SP / EP / DDP +### 1.4 Composability with TP / SP / EP / DDP - **TP** (intra-layer): orthogonal axis — GTP shards `out_features` regardless of TP's parallel mode (column or row). 2D grid naturally formed via `tp_group × gtp_group`. - **SP** (sequence-parallel): transparent — GTP operates at weight dim, SP at sequence dim. - **EP** (MoE): `GroupedLinear` with GTP → each routed expert sharded across `EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP`, independent of EP. MoE AllToAll (HybridEP/NVLink) runs independently of GTP AG/RS (NCCL/IB). - **DDP**: GTP bypasses autograd's grad accumulator (async RS returns `None`; `_finalize_wgrad` accumulates directly into `main_grad`). `register_grad_accum_hook` + manual invocation from `_finalize_wgrad` (eager path) and `_CudagraphReplayNode.backward` (captured path) serializes DDP RS strictly after GTP RS — critical at IB scale to avoid deadlock between DDP and GTP on the same NIC. -## 6. Opt-in, Minimally Invasive Integration +### 1.5 Opt-in, minimally invasive integration - Drop-in `gtp_group` kwarg on `Linear` / `LayerNormLinear` / `LayerNormMLP` / `GroupedLinear`; no framework-level refactor required. +- **Per-weight opt-in.** GTP wraps only weights threaded with the `gtp_group=` kwarg — typically the heavy GEMM linears (`Linear` / `LayerNormLinear` / `LayerNormMLP` / `GroupedLinear`). Small replicated tensors (LayerNorm γ/β, biases, Mamba `dt_bias`/`A_log`/`D`/`conv1d`, MoE router, latent-proj MLPs) stay full — no NCCL launch latency for params where the all-gather wouldn't amortize. The split is visible in §3.2's *dense non-GTP* vs *dense GTP* membership. - `classify_gtp_chains(model)` walks `named_parameters()` once at init and sets `chain_id` on every `GTPShardedParam` based on the current `cuda_graph_modules`. - Turning it off is a no-op: when `gtp_group.size() == 1`, `wrap_module_params_gtp` short-circuits; when `generalized_tensor_parallel_remat_size == 1`, the GTP path in `layers.py` is skipped entirely. - User-tunable knobs (`GTPConfig.pad_for_alignment`, `weight_prefetch`, `check_param_states`) plus a debug-name tagger (`tag_gtp_params_with_names`) for readable link-table output. -## 7. Overlap Design Summary - -``` -fwd: AG(W_{i+1}) ∥ GEMM(W_i) ∥ CG replay of captured layers -bwd: AG(W_{i-1}) ∥ dgrad(W_i) ∥ wgrad(W_i) ∥ RS(wgrad_i) ∥ [finalize wgrad_{i+1} + DDP hook] -``` - -At bwd step *i* the step is launching *RS of wgrad_i* while finalizing the *previous* iter's wgrad (`wgrad_{i+1}` in bwd order = the next-one-over in fwd order). That one-step deferral is what makes the RS run concurrent with the next layer's dgrad/wgrad GEMMs instead of blocking after every layer. - -Communication never blocks compute except at the very first layer of each direction (cold start) and at enforced serialization points (CG/eager drains, finalize-grads barrier). - -### 7a. wgrad-before-dgrad schedule *(deferred to a follow-up MR)* - -Current behavior: backward always runs dgrad GEMM, then wgrad GEMM, then issues the GTP wgrad RS — the RS overlaps with the *next* layer's bwd GEMMs (the one-step deferral above). - -A future MR will add an opt-in wgrad-before-dgrad schedule on `_Linear` / `_LayerNormLinear` so the GTP wgrad RS NCCL overlaps with the dgrad GEMM of the **same** layer (best for the GTP + no-TP case). Until that MR lands, attempting to set `GTPConfig.wgrad_before_dgrad = True` raises `NotImplementedError`. - -## 8. Scaling +### 1.6 Scaling Effective per-GPU weight size = `W / (TP × GTP)`. Example: TP=4 + GTP=8 with NVFP4 → 32× weight-memory reduction and 128× wire-bandwidth reduction vs full BF16 replication, before data parallelism. -## 9. Usage +--- + +## 2. Usage GTP is enabled through two CLI flags on Megatron's training launcher; everything else (process-group construction, parameter slicing, prefetch chain wiring, optimizer routing) is automatic once the flags are set. @@ -182,6 +151,7 @@ torchrun --nproc-per-node 4 pretrain_gpt.py \ --pipeline-model-parallel-size 1 \ --generalized-tensor-parallel-remat-size 2 \ --expert-generalized-tensor-parallel-remat-size 1 \ + --high-priority-stream-groups ep gtp expt_gtp \ --bf16 \ --num-layers 12 --hidden-size 1024 --num-attention-heads 16 \ --seq-length 1024 --max-position-embeddings 1024 \ @@ -201,14 +171,6 @@ GTP enabled. GTPConfig(pad_for_alignment=16, check_param_states=False, fp8_param_gather=False, coalesce_amax_allreduce=False) ``` -### What the flags do under the hood - -1. `parallel_state.initialize_model_parallel(...)` builds two new groups: `_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP` (size = `--generalized-tensor-parallel-remat-size`) and `_EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP` (size = `--expert-generalized-tensor-parallel-remat-size`), plus the corresponding DP-with-GTP carve-outs (`_DATA_PARALLEL_GROUP_WITH_GTP`, `_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP`). -2. Megatron's `extensions/transformer_engine.py` reads `pg_collection.gtp` / `pg_collection.expt_gtp` and forwards them as the `gtp_group=` kwarg to `te.Linear` / `te.LayerNormLinear` / `te.GroupedLinear`. TE's `module/base.py` calls back into `megatron.experimental.gtp` via the hook registry (`register_gtp_hooks`) to slice each weight at `reset_parameters` time. -3. DDP carves out GTP shards into a separate bucket pool (`gtp_buffer_groups`) reduced over `intra_dp_cp_with_gtp_group` rather than full DP — the wgrad RS already reduced over the GTP axis. -4. Optimizer state is sharded across the same `with_gtp` subgroup; clip-by-global-norm sums squared norms over `model_parallel × with_gtp` so the reduction count matches the actual replica count. -5. `classify_gtp_chains(model)` runs once after model build (in `training.py`'s `get_model`) and wires each `GTPShardedParam` into a `GRAPHED` or `UNGRAPHED` prefetch chain based on the active `cuda_graph_modules`. - ### Tuning knobs Set via `from megatron.experimental.gtp import GTP_CONFIG, update_gtp_config`: @@ -217,7 +179,7 @@ Set via `from megatron.experimental.gtp import GTP_CONFIG, update_gtp_config`: update_gtp_config( pad_for_alignment=16, # NVFP4: 16, MXFP8: 32, BF16: any; auto-set in training.py weight_prefetch=True, # Disable to debug the cold-start path - async_reduction=True, # Wheter perform GTP gradient reduction asynchronously + async_reduction=True, # Whether to perform GTP gradient reduction asynchronously # wgrad_before_dgrad: deferred — setting True currently raises NotImplementedError fp8_param_gather=False, # Companion to Megatron's --fp8-param-gather; currently asserted off # coalesce_amax_allreduce: deferred — setting True logs an info and falls back to per-weight @@ -225,3 +187,72 @@ update_gtp_config( ``` `training.py` auto-tunes `pad_for_alignment` based on the quantization recipe (`--fp4`, `--fp8-recipe=mxfp8`, etc.) before model construction. The other knobs are usually left at defaults. + +--- + +## 3. Implementation details + +### 3.1 GTP architecture (Mcore ↔ TE integration) + +![GTP / Mcore-TE integration architecture](images/0525_gtp_mcore_te_architecture.png) + +TransformerEngine owns the linear primitives (`Linear` / `LayerNormLinear` / `LayerNormMLP` / `GroupedLinear`) and the low-precision tensor types (FP8 / MXFP8 / NVFP4). Megatron-LM owns the GTP scheduling state — the prefetch chain, the ticket-based buffer cache, the per-param AG/RS state machines, the GRAPHED/UNGRAPHED chain split, and the DDP integration. The two are bridged by: + +1. The `gtp_group` kwarg that Mcore's `extensions/transformer_engine.py` threads into the TE constructors when `is_te_min_version("2.15.0")`. +2. The hook registry (`register_gtp_hooks`), called by TE's `module/base.py` at `reset_parameters` time to slice each weight into a `GTPShardedParam` along `out_features`. +3. The `_register_side_stream` / drain calls that synchronize TE's quantize + GEMM kernels with the side stream that owns the AG/RS NCCL ops. + +#### What the flags do under the hood + +1. `parallel_state.initialize_model_parallel(...)` builds two new groups: `_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP` (size = `--generalized-tensor-parallel-remat-size`) and `_EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP` (size = `--expert-generalized-tensor-parallel-remat-size`), plus the corresponding DP-with-GTP carve-outs (`_DATA_PARALLEL_GROUP_WITH_GTP`, `_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP`). +2. Megatron's `extensions/transformer_engine.py` reads `pg_collection.gtp` / `pg_collection.expt_gtp` and forwards them as the `gtp_group=` kwarg to `te.Linear` / `te.LayerNormLinear` / `te.GroupedLinear`. TE's `module/base.py` calls back into `megatron.experimental.gtp` via the hook registry (`register_gtp_hooks`) to slice each weight at `reset_parameters` time. +3. DDP carves out GTP shards into a separate bucket pool (`gtp_buffer_groups`) reduced over `intra_dp_cp_with_gtp_group` rather than full DP — the wgrad RS already reduced over the GTP axis. See §3.2 for the full 4-bucket layout. +4. Optimizer state is sharded across the same `with_gtp` subgroup; clip-by-global-norm sums squared norms over `model_parallel × with_gtp` so the reduction count matches the actual replica count. +5. `classify_gtp_chains(model)` runs once after model build (in `training.py`'s `get_model`) and wires each `GTPShardedParam` into a `GRAPHED` or `UNGRAPHED` prefetch chain based on the active `cuda_graph_modules`. + +#### Buffer / memory management + +Two distinct pools with explicit lifecycle rules: + +- **`GTPWeightCache`** (AG/RS output buffers) — ticket-based, keyed on `(shape, dtype, fwd, expert_idx, reduce_scatter)`. Same-shape buffers across layers are shared. Tickets persistent; buffer allocated lazily on first `get()`; addresses stable across iterations for CG replay. +- **`_wgrad_buf_pool`** (UNGRAPHED wgrad input recycling) — tagged with `_from_gtp_wgrad_pool=True` at `_wgrad_pool_get`. `_wgrad_pool_put` no-ops on foreign buffers (fresh allocs from Megatron `layers.py` or aten F.embedding bwd) → caching allocator handles those. Prevents the pool from accumulating untagged buffers each iter. + +#### Overlap design summary + +``` +fwd: AG(W_{i+1}) ∥ GEMM(W_i) ∥ CG replay of captured layers +bwd: AG(W_{i-1}) ∥ dgrad(W_i) → wgrad(W_i) ∥ RS(wgrad_i) ∥ [finalize wgrad_{i+1} + DDP hook] +``` + +At bwd step *i* the step is launching *RS of wgrad_i* while finalizing the *previous* iter's wgrad (`wgrad_{i+1}` in bwd order = the next-one-over in fwd order). That one-step deferral is what makes the RS run concurrent with the next layer's dgrad/wgrad GEMMs instead of blocking after every layer. + +Communication never blocks compute except at the very first layer of each direction (cold start) and at enforced serialization points (CG/eager drains, finalize-grads barrier). + +##### wgrad-before-dgrad schedule *(deferred to a follow-up MR)* + +Current behavior: backward always runs dgrad GEMM, then wgrad GEMM, then issues the GTP wgrad RS — the RS overlaps with the *next* layer's bwd GEMMs (the one-step deferral above). + +A future MR will add an opt-in wgrad-before-dgrad schedule on `_Linear` / `_LayerNormLinear` so the GTP wgrad RS NCCL overlaps with the dgrad GEMM of the **same** layer (best for the GTP + no-TP case). Until that MR lands, attempting to set `GTPConfig.wgrad_before_dgrad = True` raises `NotImplementedError`. + +### 3.2 DDP buckets with (E)GTP + +![DDP parameter bucketing with (E)GTP](images/0527_ddp_param_bucketing.png) + +DDP carves parameters into **four buckets** based on two orthogonal axes — `is_expert_parallel` (MoE tag) × `isinstance(param, GTPShardedParam)` (GTP shard tag). Each bucket reduces over a *different* process group, because the (E)GTP wgrad RS has already reduced grads over the corresponding axis and reducing again would double-count. The diagram above shows the four buckets, their typical membership, and the reduce-scatter group each one targets. + + +**`broadcast_params`** (the post-init parameter sync) uses a parallel selection: + +| Param class | Broadcast group | +|-------------------|---------------------------------------------------------------------------------------| +| non-GTP, dense | `dp_cp_group` (full DP-CP) | +| GTP, dense | `dp_cp_with_gtp_group` (full — includes GTP peers across distopt instances) | +| non-GTP, expert | `expt_dp_group` (full expert DP) | +| EGTP, expert | `expt_dp_with_egtp_group` (full — includes EGTP peers across distopt instances) | + +For GTP-sharded params the broadcast group encodes **two** orthogonal decisions: + +- **`_with_gtp_` in the name → excludes (E)GTP peers.** Each (E)GTP rank holds a distinct 1/N shard of the same `GTPShardedParam`. If GTP peers were in the same broadcast group, rank-0's shard would overwrite every other peer's distinct shard. The `_with_gtp_` carve-out keeps the broadcast scoped to ranks that hold the *same* shard. +- **No `intra_` prefix → cross-distopt-instance ("full") group.** Broadcast is a one-shot init/load sync, so it must reach every distopt instance to keep replicas consistent. The `intra_*` per-instance variants are reserved for grad RS, where each instance reduces its own grads independently. + +**Buffer caching.** The per-bucket buffer lists are concatenated once at init into a single flat view for fast iteration in the grad-reduction hot path. Multi-instance distopt is supported via additional per-instance carve-outs of the with-(E)GTP groups in `parallel_state.py`. diff --git a/megatron/experimental/gtp/images/0525_gtp_mcore_te_architecture.png b/megatron/experimental/gtp/images/0525_gtp_mcore_te_architecture.png new file mode 100644 index 0000000000000000000000000000000000000000..672f44c9c14f0b793de73c35e717625a35b6fc64 GIT binary patch literal 222344 zcmd?R2_Tef-#>0EBQf?;_9ZH0-}ha13Ly-}axlXf`z}J(C@RX5N@XpCO7=A>6$;rx zvW4u_@4jb|mUEtSp8xxt^S;mjoT-`ny07iNuFv=LU9Rt4jDem8#hwFu2nYx$v@}(X z2ncrZ5)crDL3RTrC%Dzv2ndwL(Q0OBH(v*5S2zK;hzj->w}=|Ipc1#RAovkcv3G_! zqF`>o1p?*<=N3`r78cexVE|m~+96SJOg&}b?-Odk4dC0^9$Uu|?auE8*5F42m4v_R z72mg;uLcU{?xc&fhr0qz?R~LLg#-mfu$|fa;Y-9NuqBQt`~blG!12HWV`qQd%!Kfz zUe5M#Ph4YcMKluWigtGYzR(VdK)~(L-&KU6P)MKet2rQDagQa0?aUqKh#&D=fw3LT z_2*JkXM400ZU9k1-2A}yns8@FC*06d_#198d}UlsPbZi?(g)ifR|K<%xuc!E;VMX1 zBnmfCm=_ue&SdB2VV4=d*eE0t7;MMh?!1k!Taxh8w%X z?7+r80Q`V5C$yU@t}-ACu9$Vjthk7}pt^**8c@X(jY7I$R~&msNJ>In6JqN$e;%;Sm-V$6otcm6l+tt6h|wl^0?l{+^V+}WQu7ssIg$mXJ* z(SUGbCbeU8v3dlMPf#Z9&|ZK#5dk$1Ub7&55T3v#+6h!M&Tj6ma5sQSU}$F~0w~~h zM%cLu06$P(2(+^s{4lq$3UCD~B0;#XJJJ(S0K$SW;Exj$=>mMA;Ev9~UIUT>F3t|V z00a-iQ2+N753u+E&EPdTc=30@0^~5v z%HnY$ilcxXz!1aN*{KMGaSG%g!37%5|7%d3K*1264oH+69HoYEbVh)r0DO2)r~#7y zH%as_K(VbX{%?RnQv3%{h=^fVZwDwu@r3ff1{7|1E9sln=Z515z~lhf9*zPu0H7NH zE>QsuHY<0c#R0C)i!0f}<<2V@=S z7HdoHv=sk<_(g!WzXyjOI7L-NSRFL3f3`xDC4^LjRJQM!;;c8YZ%;G~g~l1RBA^la z=k*o->C}HPbk)_A)j&h{_b5yf92U=DprZRn7)%NW-W_25HG|=4?H{2qHz8Z>>g-Ty zMpz>S3qcT`7%Y0)0iJ>vXc_@Fx`;0fv##4eVV-^nJD{SH2GGFB_=mRcD7XV0ZRZ3O z+d09UL6Zpc!F3IoD=u&}coPN;^21HQ41!~vdeB2h0uH{r6JQo?56laQjjgzb;u_SalBAu(|=6^x$!(Nz;v6_ro|A&WKi@qz@(&F>279|-qflsC}-LpTEb zAr8*qk0ofE=KklLK@b$6a>UsHqQGzPa)$tk;x)ofwim*yfPY4SaE|B>`2n=QGXhvh zeor8>gLwcHxU4wL3-a2dfQSeHJ3(iJyB8X?{GAbCNCf9Se5VHh&KGwDEq+^|fw&|1 z>*o>`9W9`>3j%2icE*c<`?!KWHq0K3EC>q%h6wOCwp*+?t6?vIkP-ku*TWcB=J$s2 zbcccR`ppLs_wWM4GZ;YfJHXtWfx$4okO~sviF5^a0V7auNCXmS3kENMDGS;Hdd(3d zLfFbEM_XQDF+ib%Vks;NC{6H(kdP!W8?e3|7%}=*6@UhKAOD4`fi9|85i>VL3z|G!vNjID$I^J@z_P#CQL z?%LvT``zN&JA32z5#Jt^6+pG%cV@fO@2U!eyY#OEaiB5r*VtWISyfG48EE!Xux8sS z`<~r}1jK;aAiw{fcmD74IOqfX3m(T?tN(K}{-ZA^3FvBYv3KzJFAX6fArWlFe}>0_ zDT1@y!Q;SmKxT$JqVRDXPcM`Mz-R!&ZQCCPC?rq-ln8?C2E^>~G>>Ik?4_zI$O+uS zAh98VfVdmL4ya$&R5_^%G8HhSyA#kCIHUh7AO)@czhK<|v3c>Ihb0g<{d2e!2Q~rR zhkpi^B0s?re+K0W^MeCtDxlc_SjeEF2S4J1fKO?S!6IPDajOY)OA2x;i38<3*%3>P zp!o~LBQYULEL`1SZniK?PoQb;=IXxv8=w)uY_`U7Iu_+%74)}Ahcz4%>JV`MIcoVo zXFC4}$o2mkZvU)f`Oo4O_Oo656P>{k_DbLhCK$a2>_$&#JAguV#4UbOpnu>V!F`?4 zW{^ zL5UO=#&FvoA39-W`G+$z6x$N%h*qEgsp#19F?kR0LYe`81yz-a)E`L+)p|9G0X+L&|Y ze^v9R<7b?f*kM253HNu6{U3ZB(Eb6P90|NLBrb&Wb3ogWA7=-E=7BR1=EDdJFa*vU z1Abw=LkBqA9;9c$w{vg?oX&0I_j_*z+_xQu1qex5F;HNJi#c}UlEzV5x;IQ+;Z%M#ot2EfDMB3+ri!7B}m>Nj{XY6 zVu=aMV)*bBmN{@`xb6QNnfT9;c3;xa1Zm{t^uqFip+o0uAy_~k4eNIUc!p};`j2{nB9 z78}rW0CLjYfjBN2SRfFNJLK(u&b#@yPT2m=gMoh}p!Cx@@Gm@|U!DUCfv5*J3$KFz z)+{kJj?8ulIT|jY1Dxc-P{5q~bp(eOJzS6G*7G`G$cL&XS z;PdN<0Opp94`5pFkaGtgAdsML3;qq9#$od-v4<0Y800*LZ2 zLaE!S41ZTB^-mm$;Ys~RYxdu_T7N{oC_W8pM=bT1$j3{_KLdQ6#@+#Z9i$JxI|^>+ z?1_ywVb54F=)@iNxB}Kb=-UDrr$By$Gx!^DV}6Bfbps&h=NICmYxaA0^i0$ZfN%t^ z&Gy)6Klks(z@HksA`y<(4$de~w6(o6m`;xWd=u60Pmrx)KQ{S~X^8(t67j!67l`2x zfB%(42;rT-e?}z!*tx>Rj{Ye0rK~O@F8Fhb4fgElyEN|KkA(dv94)*lyVGRHYrg*~ zll_k!t^dnAh_h3^+rj@rr^^0pwbEA?$vUJM@<$-M@2G44zfsxPz8g@nD`AC%uxjcZr~NxRQyG7{9lD`@GSG+ z#3;5u`v-VD0!Rf;o}IDgJ7^&NXqJPH^iJ~#k4`Xr45r|L-$uqiyQ+T~hreTI@5CWy zn{nqYxYHMWKo0}a?Qos|XhUPt9R55Gx5xj>IQ%Jg{inc(S6w^Fa63oxzY2Xn8|irT zfyV21E+ddzhIJEvFl+xZnzmWxd#`OLn!aZ>FfrzLSxr*vuj6TZ27dxi;CACF5~tt) zh}V-59N>h35Zd7w;xPxT0O0r`y)o~?3Jc;rId!1; zn_mZB;eELuoi}W9%^$dMVB>$~wtds~z$;ux;4Mcm3}OrBQsUq9`t}0X@5=(x9S*#Z z^@j)9+qCw*Ab^Vb*S0(;2Y-rVCAV|Qeurf7%72Hl-=58%5Q^`W{l7`Fzrg!{CCOsK z+Wf9?AiWeo(htzbraoYfI=unz00JYxc`0zZyA8tceqw?Upt&l_Eh&VNn(u!AI0fG_ z#sn*P@n{9>0qJ*A;K-PFk9PF^uRBBnxjox&qF~-t+}R&yjPFkiz%KcB{J}vPJ149} z@q@2F0#)qcfRNb35q5xZfyN2~NGS&5w7?(Gg2v?}fdR;$ZI~Yrh{C@zEeXiTU(3`~ zR|5YBn*Ee$B8B;H`;IB@g_iB~5u9v+*LV?dg*$**(LcU@iAK7E-E61U16gOl<=>zD zCj9-I*5bI!9bpq;DO}dYcggjC$J;5G6VTsz=^uXr4kkomGv9WGQ!#h(|Lz0!2Cf0{ z?h@eZfd7U7+4)dM0s<-mEmfrxzJ$|P$wGBsvo?o3>s`5O(2~p>!@*&3&z?Uyxyzi{O^qdV^ z2_BrwNLT)>Pi;U#N2sRw(^s-I(OU&fISn7taf3s>*)Pr>COQ|&kpnuDFMV&@Jp+3tU6sOEFmqsL3UAp=6Mn-|(lwaWhJmCwi-*He}A3 zMKV^#LRet^Q^jOrqPH5~W)BGsrMW;D`oig3k!BpkDV_euCuA#O&mw`_9u#sb;z**R zdM^HLYCE0~oM@K@+cp0@61Xg3!E5kZkz_&Y@ECJt?fDA{A0`w?x@6w%P000DRRA8D zN{q8&RqtXEmHhOA}vMC~FQdVJ=W^sdI~2fm&- zaf?-rZJ1-|7QAQuz{gu!2X27NtJn`tuOD`o`zUZ>LsBv9-=ifz*|=k6bAs8g z{=7~??=zZaPbreyA3r=AidGmnqqoVV@lg_0pzuJ3B5SZEC_$lj;zG)5BIj4nhTt}- zYsVAlJh%kpE3YaaX4M*7rTxfr>!sj|nMjk~iH0aCIzBmNLphJ|&?kQDrN*?2EUhD} zqa&kp88NZ`f$dCVs)B>!QV82E%Z|#-?n%*}y%C--$*w$enrIPoy+3Ld3mp`($T-L9 z6`^nWERj-=`zdila*p1K@D0U1!X|;NZ<;Q8$i~rkJ)LiEzV*sF@*y{N+MWi$;T zH}CZbO?m(+Y^l^O;}=LLSI>L23~eMjwLNIkx)vgR(QvN^dhbh=*a(yV5X zM&Vf0m*+^XLnK3qdZQs8x#^r9^S3AHj<0F5G_!W-Ke}xx2R7u z4U0sm$VLZPZCG6&$zD&o8(M0Ye<&h@6G2BBv2P=FbR5Mc%_`AdyO144^72KgOH@tS zrqz&t#|m}$*SFbJ8)7^SE~x1hBkdC;bYWakp}{boCid6jM~Pw%hNWeUr+G&_^PdPE z<4ipy#{8u0-tC#yKIys%w~0@x6`u^od^@6|jvh>$a;bdv@`Zl`HJ|xSslqMAQ1cLF zpL9uzz?SnxOfU3bL2Sw#%Z`{OR<-USlyju%b`SX=8s)OT2iAPBC^NCnW#gSkqD-B} z1S#)o+6;)=Pg@^JMmO)*-iDQNQmFJ7i=^KqUeu}rD0C7b>iY0Ft#O!AeZ@575NJ2S;})p=e{By3JX z)@8FQLmESBVt9@<9PEyJ6?r3zmftLb#^H%z9pByDd@sFF|H+33^?BX~ntfb7Gpgcv zd34z`!o!2fnUkY{xr8-UqsuyV&kM)gNL{pLw8KbJUxpxS>Id2;)a@$Tu#l|LxBAA- z#D#pVU*60clS)dyi-xvS&iN>NF_Vo2oU44FcU+BfxURp><>jY`kz*_atWp^NBi#*~+&^)6)Qjmu1y#!rzg1x;RP3AA_BmJDA0aPWOa zLy4{$ym{_Rgz%HIGK_iGOGdrX5vUJSu?d+A%o;>jH%2yvx@!w#GhAj_gV}di?fvmPyYjTH8QojV2Ui-{4x9A^yA^cK{wOmL6qV(DRMUsGaQaQC&(3(C$o zQYagkDEQ>Qv_~9$-L>a>1|xRC!76GAic||*lfEkJ#bSFWVlL7SS5nFp#@E9OWelk^ z)e;hYt=F1jpv#1dHa%S9a*Gf`i8kA3#sOc37f!eJJN8$khHvmBa7;+_iHF?54qvbv1TqYw1R>brK zhj{MH?GiqFj&x8hPH2WeLU3}FuI7@YUd=OIy*-1Okcr)wqzuo?ThcPBM9e*3);rRx zFkTU;Tq$t9xw5_N?3_tk&esY^j~?1xif5q5p!z{Y@JpVibeF>`-KAG$YYST~EqDim z62)J&sxo!e&p%WMRG@*f%=#_?S_aTwJdlWmp?y5`F4@3U5+AiYtzr`Ua#&q>t^9?g zV7`ukCiSN9fER~}Xy=XDyRUk)J$S3y4A)}ufIx}$E+`691$UDvN{>F9UR`syi(A@LpiuN9x1K`vy;3B+FW=TBHx;6c5Q9FjAzrqLVdbDnCgNV-d!BgSc7nf3 z)>SFNLOGR`dOh0acBH&r4e_bGQRN^+3}RqToo7|Y6>M?5k_ers=Ac9CqpXRx4nSZ9#N>6|J7lx}Vyn!0U5 zw)R=#?jh1)rTxQUX;V}^ryz<=X)ZK9u_n9`+IfqjmU77|e&XR%rAr%~(87RHW>~QM z7vUz$>%MHH$I{cJXkUBPe^l?G@Ma!1-+i{C+_Lak1N&3<5tSM83C;-_KdOTc8}w}| zd>*=QY-rOKJvrhbX{)_FSJFD?ACDZZy%wWfc}R&VoSta7euZU4XnAu~BTGb2p_VSL zG8J`ta=O7ISma5??jdQz@p@USkE_#VUGsZ-SPLU9&v0!fPVE|0u+)~iCHaKin7yOI zJBQ&o>vQrOe4hg&rwCTqGwd={KF3TkDtru@zRSeMG#wDempxJ|$2j=%bEJ3DJ{`8D zia|ook-$jL(79dJ#8jKp@w0cEeW>iB6>XIx!sS*{v#ru9_1`xu6^aEuFzPPSQ>%!U zBi%Z!QYqlEbyNRyq|4-np4BH#R=ZkRbk5V}Al+r3#HLD{3GP0A_k^VLtfFhdxpLwO zzVsN|b!2atjdhcU$!&&wz##O9R181XlN>0DmWpx@vm9=sk{WM-8J?N)wc8wa$cU1J z(6abf!fp!k9riOM3VvYI*sQYOgDUy`a8pY0W5R|xhJaD>!&s5KY&-rLEqKGqJJHoH@)z2X)ay$H~jq zoTuhEU%lX7cc}zU&@GyKYFK?!Ge7ruxu`}!#b&^K~0!I0Xt^HE3??&j; z)jr8(M5Mi~#qN<7l{c#cG_KdGmOh=yeyH1hJf`dw6!4R}HQ*VQhB|eRiupkUy4v&0Tht|eNCb;#Ta#VwIfbgl zSou%Hgkv67M!P2tcb^UG+%hPPjiWX*U?EaBQy5vH&sUz2Qa=EFlO(lDy@2Xynqz3n zl$t;SQ^|hG+JpobS>JixvaIxvxuNOj?p6tx%{^CF{wue57h?M z851Z+T&cl&s1+|fd*W!4nRtQ=?c!4_7j|d%C0BnziRUZI=ucU}8w2fIOx=-BMLv!N zoS&oKL=11lZEzRHG@LmI7_`ts8N|89@+$?iRZ`ZQ1Bo7|$IMmaBUr;KOx}po?$ey! zIxxwZLF^gIy2w;?*(cvWJ3p_mwLJ4F?d*$HMDE^(F9t5BM+%pM%Sx5h03UCYBdL_* zVwR5i=O|p9n(72=^PaG^9SR<%tWWH_=Hl zF(Zx=7I?pS(O~KlUE_8v!26kgDyiN42Z7id?o=kbUkBtex4yi#4ZIoLDOiJ{@}Xo zJDH{cXTivqcp=3^DJ}Vs3Ass==~Y9Ph_HA^DP)#sW>v)@4x5R&s%UsG zoyDb5)C|RE8-2!vb8E%+=RGrmhiF>MiU?U)$;9=8PUup#bI4i_x~ou+Gjj2o3BME9 zmv63Lqi*x6H+=jtaYMSUI!WlTpqX_=p~_7|0fiy=>RE28$cC}a_x@T-)r037xJk&C zt~6BN%5W^bJVmj{s4$3hoL_}Zu=miFvlORR2Ns1An;^YAJ>ti!YAV~4Z7udH-27yh z|GIu`y>YE&<-kNL_ci57l|1;H^?_MKo*J+4*BnV1o2&NC^i0#5&kHMNf2WB~Im^y5F;C$V+ zBy#1H*@OMoHjLVqg0Qi3y2n2Ft{SFp(2no-dO2ZG)M}Yn8@s{wc9zajzJG8iTel*5 zgVeC@2qvD=Kt|L*)TC8wo`(sk1XI-rcd6U)J8#X8E!Z@5WIb8;wHaj(O?O>RsuMmR zl({+iRxy}%5?_TSv8qdAvl>zs4&tTv%C?eSWMP$ z>1lV){fygaiFw_f=VwpdjAo^Yf#;XTAG8>;HLiJ)I#HNOdgyKRNiyh(4>plC_0++s zK8uQBiZwL*E*86zGw1WpkfU4`VjkGGO4RWc)vNg`l8`o0YeU=EucZfRA~qor3<5;X zQ7)0(r6v-uJWBiu4qmNdzs*mm65ywq!?cm4!W6;p5uF}*Xs=KOYa(YkQ?pB|*=SX3 z;JDDJ+SgOdkz&Kn7i@X?6t#HJ~UV z-Vnqbz`R5iQcc~HSwwsI~(Nm!%PlKUDv&AzaqMhdn4 z2RBAd@_OJavOdqk6cwf``fAIMTGGbU-$|3d2jx~cS8|1`SN2e%MgNM^^lRr0rBff~ z>zM@=pK{gQ#fe&&A8>&yo_q-;G5Tk~&ZQhAf zqg;aBZjH})a<3%tjM@8}EViV~5{H#~P5Zm~-#?g4&Am6QC;)|u&m=ENBG82$vr!&A zcNH(O^@q}`7cZc{e(dZLA1N|1JIj!_7l9rKLXE0y z$tk@B7KcW)d8;;*<*i|pG85-zuK6_*KiE*}y2^5AP?xE4f^2OdHQoE#Q1V&TiA}}9 zJ-)*}J^PLjCfs23qvm_;LA~NjT~3p-hUnX+Wz+7l;l|`IblG}#iU!`h5MKG2v#O`j zW(@AZU6p3V>e0Ay&D5~N*wkdy>lrj;m62P4bY-bV!W!^R71Ww|+k%+MPAF#iIWC=9 zYW%3$Q|8$eAK!;;S2+e~cUkce%ZFy&_X|RcD(3V9`>3wEB+vPr-7udJ*w-!-%vI_! zyO=WXW%of|!R(~XGqM2r5;%E}iv@$gAX(D2M|3gnc|njHd~J*L%QfQ~Ls1zdg*}@U z;gYpU3W)r`3DrE`3Vtzuy>i0Yg&?xych9s72dsESR4CR#eZ)Hk)JJLvhXYas>w2sD zJ{RNAK}vF0{6viRD%$Yrfg*F?wZ4 zMg#IN?P{aL?_McNY1Y)I%b$lSTn6JKcEu31- zgy+)Kp^07u`z1u4bGD^~GXK#daw}dWrvo}GwTxDL_DYj!*$OMLt9&@K5pd33`Mg1q zmStvO+V1YhQ%X6N>;tc7`T_jrFcyI4zOU7dEyqyt*wI$US6 zH<$BKUesK_!8+A%r!Gt}T;nOp;!5!LWOaNH&n_SyfpU|o{RF7T59^59VJnuoLq}gU zR&@z@j&D%pTwKj-2&z8of7&|)*~{;o$8%v$JlJ*I>T*!M6ted?!O`OjoUW(B4_;h- zcRhQ~O040Y#|YKRp51jv&Qm=H!|4r&hyt@49Q@2vF^a2}YH0W6vBw|W&ve}5NlkL+ zY`1c$W+3>`m{wfc)7_ZNi;ym_H%Z#FwfigLH=3p z$I~xkcI{Cm3y2CkbCq=YY{r7`r%@%EySoIh%YMw6S>4jg8yz`@M1O2r?@d**C~=Ko zE7pD|^-MErx}M_XrMZkuz7mJl!@*Xr5f|h5>uurnUo2(xw{Gk@XF+|9V0h2))5m*y z$i~U)`YBCgBuI~P6&kz741F0U67IF;%n=#f8cRGf``ftb33v+PS8a2-=T zz$K-?1p^5D1(mZ9eG6r0(WPgHN(*M0M@iunN|7f+Ha$4dL>^jRRB(QV&n2= z^BpR_RhE%zqLB+#C7<61&^)0pobQLe5ws(QxzuM@WOoLXCzOm)pk@0l+$n$!n^xV} zQp^zU4qPd27PY7noM0OXbtQ z(Rg0o_ysj<)z+pfn7(k`F3rH0R9oK9sT_JSSA#q0+44=M3^u6q(Qd!em*&<6lo9Od zrJQ_G90ErOLo`M2?u&kJQwTd8#$tD-jD9PZqJd&`2$Fgs!A|}uvHP(4z{ir?vQSQS z_8RsJ9`k8p(q1CX#i^xZy^kmOx*I&JE4`v)xq^vg9|<{d!QK$-CG{_p?uNL;a!OZ^ zAg`@|B-yi=<@C}lr==Hll^~!Y;r#0ux3tPhi$llEY-Woeq?hGHh7R>Fyyo8P?Y%eW z%G?JH4YQG`V4aD5ep_Uo{Y{>F?z5iEr5->KcNRV;>#mZ~8;IDK{IauAU5`kCFO-v^ zM$e$8gXVCA-I1pU7Z$G4oK6l4tgh?rTVt878ct6XB;o53*7UA>LEkBpPTECAv|G8p zsj;D~+b%;t!y3_k@6v^dPTGOB$@1N048wuXLK*84iDnmyx}QC&H-DFDbM-^fra_@r zDeb-J&A=YzPl^%L<5y}^MOFtEqG@ep*&{#Pr=_cY*qV5exzCxZd4+56VFQd^i>b$J zoY>@oWnzro5@~zwT3ADO^AVaRnG5m}<_unykzWdY8SK<@@3Z7xORIU)q@&Rqh6D58v`hH(#n=1;11unEowLIRX zU_ZnoaH*oG;Qgo!S-Wnb*L%B@dl7j(_UM5+mYcJJUKRBpO~JEQHa%*CaACdS*LCz^ z5RzAmtVC4%%sA%R)~)ki8Z%QYut#;QECkC8~Gel87Dn1MEPB0Bzq>8E-9q>vTrwsX^`sve%WL*az z3CjkR?TquNmWtW(nOz6q_qiZ<={DF@r0aY?q397qWOl<1ET>ngH`-;oFQ>Y*UyGvV zqxr;H9^cK~$)9xJ%=#_&iCNLN2-f$}w;oqS_v!bZlX=%o7H0d>&0@e`Jbs<;jm@Zg ziA@Q#rT>%4mDQ0*7VYurGkFE+z7I0pc}K&q6-Ago=xAox-Hluscpui&6jq?XHufQ6E9QycN5q*vhg1`ct`v=SRljMv|!s>vl5O8b3wyKyn?Hmqg?3y259mrA6N;7Pka^EzIWgR}?O2$)XV#Z!$Lc6KiR0%WW`-smAXw=r_JFj|hRcY7h zLgUTnp$ZrjkFD*|PFRDb`W6oM%e<+IS@e0#^)9}$Z!Ul(>);-i5A!OT_xU7HPwEtzK^qh?3E|0|43Q4PB4awluhQ!{*7v-@f?AKVM-k4sM zQrypRA;4ra>DqFXK^WJJvGJ0E};dg-O4m~k&*cV6WbOj9O6doW9 zK85^vq3+O_SsEMPV%?FX8%lh;>JHQz#(ah$*}JGTb2MZ02#g{`8_YMQUa$wPUQCa& z6PI#CK1S;mvLqx< z6)(LXaEvN4YG&s7cr~yF;zcN7#u9sEa!)4N0|KVur51Fol@E6#|@=keb-) zaO(E>lIe)tvl*pOBeQb`@^xL}KEa!xDflLj1}#end{{fq>(wVKOJb~0r$RYTRT@Ff z5)pmBuAXoHspr8cZ6=~M&HZ=xvn=dC)+VnE=Uu!|A1zzsLY@B65-8ks6A+M;*{<S2(nGthx!=Q@6T(@Jd_~)~$pKvuW~_@`Q)99X zxis%H>)A0H_sO;ROb#P^=?Jp|fr7DwIebxpee5{iF#bJU#Zd~JVDVCpU93ZT2i zaDOt3Y}tlRHtLnj2bix`yLU`zNREs4p!oD+uFE!j5u|2aO!3%F*CNXI zWq#3)dqOrizro`=O*v=Q%xtB@VnvA(PZG&PWc0@?4Su%mW26)xn~XY}gsgxghyr&c zfs|!a%usb0d}4M^cJ{C&k`Xuqu@jho+~`Bb5UD4=SZkZ?LZy9qt#IT~-2u9wmFjDT zOJQ}&{N$$}d>W`ZoMOdm_1)+n_cH<7}*OWE~OS_g<2bu%o0qZZGrBs|e4mb2_*nX5yl#bWPGKfe0J zg;(LYFzKbK&+&31mB-#|(7ja;ZWt6}40_w)wU60)SiZ061$5)F{phQcKWGy`wd^7B zWg8D87;X~Tv*fGQbY$0*$cl7bkYH&8FM65y8Ch+;pAE$+CaFoAsbq-@U>n_@hhDY#NDu^6XfhuAP0$ zFn$E8J$<*eUeY(>iiE&tC#WaeiMobz!p;2w))`T5JTpZwv3EtLf93hg zrC0k>WNXLe6!zq!_o%G~k8%fX4uxMVpmmeaWzZ zjIB`rRcgMAYwUZcoHY-PoS+NK4JT+XebW{G#iljRYtn|NW)FEY_41)HQl`!M!}-cE zZdxzYOaxVP5!N7n#GYgcY4asManuMz4kiaIj4Ow}uRvr-#hgys2osz27zXb~nASsK67 z;hK5v&_qF$6cX56{ERaIZm0Y7OLs$~ZoFYyoA-X#N`f#Jr&ixrH}cM6me@25$Q3gs zyyk!r8}Q*r=m_~Ov5USlA1=%u%qnZONh*u2rG9?mTWZ<&nRA-!5GN?D0GKP+U-BaL z`&>Cph$N|(>iO~!Z1NnCE2j=*z&WYhWW+B$xC0g45GPQFZYSA+$zWYzu3WB2>RVc% zMzkAAsqF8~A=;CqP)dn$LhjX`oxSdy=+G6;EDB5k8x=6u-OB2t-_{)qfrhBia>Cl?B-Nlr1^~f}iHf zCdHN&1Y|#9K}mB2T+~<5tTgTiW?D2);I#DpXsyJAGdI3}#t^{n`J5Qrgxh8=J(AMb-#5Zt z#q0%P|0a|>Zv3~SKP>=m9&D4GS#l>xMo59Wqoj@npf&Yl-LDX*eEb}eo`SB#WqpE~ z>g^&+s^`Jmpu<(4@;b)%xtb1C@#p!2+iJrUmaOriGZ3zzbl$moRA=uvs3r0RVxTYH}5Vtl{*&QD-Frx>oTc<>ga;Q-OKW7Vr8F*=up zXCBTs9SktJYckW#z;B-G+*e+Z$Gcma6iwOC%rtbRzU-D+i7oOS`lar}FDp;Jdbt<6 zA7&==%|kz$|C&uWZ+*8rZ|7KQil_1N&gRTXAoWH@JC!eJ_Q}^Tq9^Z~=w>Q} z1Z!QD_Uf7~TLwHy5*j)pOV5gB$(}5$ankO-QEM;o&b-_+OD$rqTH{l<`=rreq< zje$u6DUoH$os)M{rChr6Szi^NmPs^r{7s{fQR_CzjWgB3-{XOXgXrxc%!1HJ5WTqv zcd^bh$c~kIjS}{N=9XJgYvw@Q-9`?KQC)P)5V+|PG4^g+tJMg}vp<9t5F)5cfffd; z5iz0V$G;ONwsPvB;dd$GZf{f7Vi|N<`yyTZu=8Ue>J&S2Rpo@ZSDPp8g-?;=(&G)y z?NNJ1Z{)nUidp_SEg6_G+bQ;;O7~&qdCN)^_gy^~M>RTPH8vr3s^It|)1{2zT|So| z&@wB4bNZ!V+d>KXwWu{)Ab{1rA)6*}`gpxOlH>$dN}?g|@uiruAUG@RG z^{kFLB=8KSySp&ST;OiTFcZ2x`L?|cRD>)a&hK~Vam7BB3=vfdB*36lwBjSVULLhirTRejwkjLUr;R4XJyP?LRehcytp0kz?~sGC zs^V+h2HHjtL~zNn3(#Cww!IQ_&hCoo;`d_9rB71YgP9yJQ7v`!O4Pks3 zu}j%z?%m^LX|LZV-#rfK`e-H^AmZ99DCPe-k4e_|`H;uj!kANP*2i<;%(q0Wnj1$P5FW8xfJ-6 ziknZvN*Ns&gE9_GwTJK0OWHk3tKXauF-Yqi{tlbSDBm-i@z53eJx;2!LSci1{ z>1DhA6dwKKH;!M-^_hETbX}d|RH@T>^oQq4=H;%3iykju$&mC=ep~M5(((vk!nXWV z_v6N3m8h}zb5Fjeia2&^M(&}1-^>~!I`(*}+d|lT+HCYe)RQmk?H5Sd&F-4;ugyFd z)afd9ewlmXwvkbmBH`rgyJ<`c!Tk*qF5No3QKjjKDy^G3mu=gS{S_rz6N*8JvI`I^ zMj_jX!=wW%?*jA%PFHa?CorcRIeDjLs^|%#?N(yC1TxjsI_M-Nvose}_EYF|Z;6Vu z&n%zKXi+BJy9Y>Xzp;y^I;Togys1N$r%z43x}9?PA)r9+xxNw3p%^CfK9ofY%8MX` z%QU8;STetK?Dabvj^xSu`?Yy8yMPgW%YDU&Jn};>T){Hd9j zZ>913R7*;6>avulgNhCfsn7%4#yyvX?9Lue9DB*Fohlj}u)01~G&UIWl^#+T#p2&W z6>R3NJgvmmc~!>r%jzhUQ5v1sdM|j(|K-g$_Z<(~(k|SyZ9I22x4#&6=0)~XnYl;C z%hPW-Pe1ggZ!wK1w>x}%4N$rdbB|waE0xo~CfkI%uwKlecFg#u&P|p*pL_J*J_`97 z%qZdzx0gxMU^kS7Z|>1z0=oi_UIwp4x$Ak|baAcg>hbR*_gQ7_Z@nh>@WL_ccdJVv zRg;ktBBp8eTsX?OI*FQ4T`3%mX3?4xwLD{2CGItz%y@n48?B$KImuSDB0AU;h>U_i zmEthnm@CfJnW4MQb%Qt@zr4x2iGZP!@4?&Ko?aH&6;9nnaaXDDVWRxH=J7NExA4Yu z^dhNI02R!24ViZZ!gI;@|#rN?|S-Fi+2)n7OydKm>R6|y35Ys zw!d9Nzzn3mHc~;Hok1kNst_}hgG2`Nwf`etseV3td(C!$R|bl*42e8 zq?3N+@pH5I#L36r>`zN>#3yoUIcV> zvsyalH6p+V(xiUw(9BE}h?E%|grynh@?GADVSOttb}Bh}<8q6>xn2$)F?3-K;Wbc& zO*XU5lKgad8)QT0o{sb#d7EKLM870p%7ojrR@l@M-8Lz>QvCjkV0(SZqX@ykjhP3# zM!Fv8@UcGGn3XZ!&5+19({zA3$77=Nszaq@`@Is>=Z+~#6(SZn|5wwdZ_4?tUvtDC zln@UvwRMh(0xe&R_U1Z5;v1urF7C!r#wbiRrLkdk{Lt}05FXCY&sgxR#c%`Yl$aSe>F9Xl3aZu|O8sk8Cg=Hj!CcE1lI$lZ=#wU!^d zzRT!?pzd%Hk(9=?k`N-sQ?yLg?kTM6Hnor34h+bSYrQkr>JVCX9cQ7m5)m%ST{|+s zd>dIIlKv7l_?Y)9?-p(eRH1~gtGgjKw^v7EkE#FGRPf;45i@G#>0xEzoEjSE<&=iR z;#(ujGkp|cE$t|)prw;J@it4*R5izwQGsbHDNQUXeA!kHy$k$q>x{Ksj7gObTECRm zZxhtgj$~8P8j6&oN?eV(Zn>!m*d)7-9)}p!B}D=*8ExBmq}<}fGYRb$cEy3U;$lGi z$(|yqOI_}*4v=M8G-3;Nok5#e=phizsqHZV}NVj%nDG|Tq*#y(# zlH92@O;k^R%!ou1t%pl>jpXgH%{WwRf)j5YJN2aEW#Do)+QLBEknHpIj_q*Q2!t?y zH~ahIj5ud{s;oz!nHuIPhebpxqS$#$R+cTR(1*!C-RjvlN^s^JM-mtHb3MTJ1K%DT36O=q&DTTn zE46m6k8iw-srO>SFaM6I+A6Xg0^UgckoH8}pKC8M`3vmJmsoqB2x7SmSrG#4_26hN z)txVG+U_FcSH(CqzDl9#YreO6l}@16a^y?7qQjo6Z}(pkS~C{g-)uH4!x9r2UItNP ztEPUwjJ6T!723<>6%Trv*G#mf(_ zcPy2=b-8lW06653*G7xqY;oEbOlO)qBUFO>@df01Wv+_P_dGMv7_vFG2 zZS}d4MsmZCO{qj)FUqy|9cwZ<&wi2i#m{~_@)3zx4 z?>{#hx&Ae~?ES~mOLGq&xTS{Zeev?z#r>jx$n$;8JFd46N%{-g)9hm6bZUZL-Tcv# zZzTYuSG8=-j{7FQ3hMnfaH*~6QP=nvC$X{D4O6G?7I#WP zTU@m4J0&{RS}d#wK14w02Kg^ER`$-mGwRCK5%4QGO)OR6bL;TUlVu|g)y8Q>`4(T& z1K-`%&i(A*GcMur@N2*QLi5=0(@Eo-vkzp-qT7Gdd7}ZHNn^OUr!|Lh@NzNBPSoJS zQJUqDhOE)!22BK_9W?vH9_`wrH4ydofL11ZiebLd1R5WvfDE~a8TQRcF3-9OZ zT$FZ7WvVO^@#k)TM}Oj&IBo2JQTMCMGG8f7=>jIZxM>|v(?;J=DfYk3fmxOmb;=~U zz6X+{y|d@0%QL%|PDUrr$J`BmxDl>47y95~+vgIMfft7B*I#~>>yv1&@v%yJ{p48O zJxlAz!3kXnKj=)KTXOM-de5~5c*cG*xa`W1+&%QLzEN>+N7wvod82A`=2OBUXCL+7 zDO3zLdH9vx(W+Ez{zURoBq8m%bP~S?qGZW5&r1J5nZJ4A!|Y<61M`)Sx~mC4dK^h> z?DK&vOBS>ombI?sFe+Slyx^SZxXbocOcyqqk7HtwbD@|-%5#cFfD_o3D|&@2>lk<2 zg|xyssIXD)d$*fvZVw;3)jhaym+SPElNVbl-z+&alfs|w3wnA+mcgXp4t-j=n6@-@ z_C&k0`{h~)J}0pBBkPFY`M)y@ zgz2hZHVY^)e^ire*e#J+%~_DK&P$vVv0$<(-IKe)GOo+)(pL9z>dd~kZJqoF^c=LfJN*xX$aS{d4SBr@;$6Oy`@+zB#5)4JxGSTsLNua*Ak*ptsCL-MIc! z)UPyk*PTqV74?0W1BN_~jTl6y-edesp?uSIU*QzDV`(+)J0Au9=o&B1xK$06|9NZq z?DFLB6s>0ca2`*eAc&vUD2K!AyB;h7XZJLVCE_I7_JQyE=8c_#Ph(5Q*>9whQ^Cy% ze4Ax`Uxq00`G%t3;T)Wb2gW4^8}>Qv$MZ;VAj8W$q^*W=)bF)nqR5r3|ISW+(f!~V zJex8+Z$Lonxpt52yJ5K%N%YT~7K7 z)&)O)<5;C>b5 zHYA6KXhJ`c5Bto%oJ*dYvA)#TMTy#~kg0>isP{Rv$n{dw@^n#n<;we~w7?se_3B?q z&&KY{3lptBp%6QXb=o)&_7t(L+gob6(VJ><{qum5as}e})w@6K)aF|Q=RdQ6suIGL zm^UR78Qv3@vOIGMG~Mot@{ZUhy}GC|hDcdwTePI?Qfdp!Ue(g?PCx#5%hC?ef1~=} zv*jRkG__`FDxCm2LwrL#X+*mB=%LWL^XTKs4?8JWd)u@o%5g&0xE+b*pCF~5y_XsD z!N+G_Gs$$enB(v*QV%U1i{Gsa z((?W)sv&o@%g7u~9aSIxgkMoP2bHkrX$9`3i%pDcnl(#FwQW{S;{4G$W}R)tG!vrhz=E^7cXz5WRZI}wFe!WDlZ~c2yfzdKhtH^ z%&lC}uJ)!z3B|}IKJ%I=xxicML-0hz+}3}zB(-r}avvYz7;Jc-ApOy1u0)R3zt#KP z2uKW(#lwl) zIXd2jTTB2^F*3~M-n@zeshoiflI21t96LU1D58CTJed1>U+rDadx5}MxrqJjM;Op{ zDFf@CwIh#Q9#`)~AqBbX{PHWu1OL49C3E*2Gn=;C_@P;wEH9V0_fH;Y=(h@)tU$AF zbB3~FoXB|;3eJrlH{qrC_C024dNRlv(-HSvSx1t#>Y{eK0e>m;i7$L;BlBSKN2%#< z_;0yBYPHN)^hQQ3%Fm>;x}&+xSeI7sB_{EibND!IADi+>zj{S>RVaifuM#izL+Qyy zu;&i`rT>hSm;bKE9@oBG&D-lt`-{2Fu3&D%PQuD`3(V*rjDaUR{D;9x-J-e#yIkYr z{Uj?F;6{y)MzVHJv;4?3Z_8p@nitFO^oLwCb~k)-?@cimOK#pn_21Rnt%tTnJc+%0 zE5gyxiS1%5@#SGsHnEjkdEvLMJ*yGb|Cii=aUbMe>3b{+KN-HgDrhdg^$!>*r+GZ3LqoglQF5~{~|BahkJaANRHnxFBtE)-HMIeWb)WA~t9bt;}oT_+7|HS5U z)}Sld!QOcKb&=D6%BwH!fj`W<1VYN>po)xygO5YFOeel;7rAuw$xvG7S6d6XObY5) z@p_I!t@$t)(5D_w@;ltD1eHZSVkzHRopAUe-FY^}pkhYyL38mVPanpdC_Yb)4Lx zvM~DN>)g4L{*J(OXiw_)Thpe_Ih!a4t)7d6p2pv5S;nP76%wfu*JT$p5eBqB;4`)x zJ~9E_rIt6-*|Tfr-xF2XQ$MmSpBr(X3!U}$6+AXN*o)Dps~ts+=l~1K(JTgVBL{EZ zkKsqIlT!KCybW!s9?}UaO!H6Lu@hlih$)<%TfvNr*eKMm zkE3R#vpT2zm=gd?C`n`t|lCiEd+C-?+T}RF(FSrieCm5(zO~B9bZyK zKl5tscFV_y0|1hJ?2AA>87E$uM{fyvmv$?zqD3Az`m1Z{&sQZnI~A*9=ch;F1js)I zb>mm#e}!7zC2D;-QT+^8#${dX$<=Z8cM{WoIP|JI-xPnhQ3 zpek}%-oo;oTKOgj@a){W*}5ptBQ^d&vFIaFkWch@T(N0X6!g?x{`1Nt=BFJs8&@1?&~3m2Jne2A^r znpWXbY8&-}jg+gIae=4}cS$Blv@aoErYMc+5JxR#FW_+1UMhO4@(|0B%=cnMr4GTY z?=5a91~+M{$k)3Fan~m5s(>)D%r~prC5Ob;C=X`!(IyrnBTql8#F)=+sT2`R-8kaj zPi9r7AR_%)Lyvy?vcQr4mwOWA4auROd_koN#lOa$cj~1~#3z^k5P*RK!}!z$Pm!HP z293KSFH6ij^9N-2qc>LrOju6CpHqlTNgQfsN=GnbTprs*aB|t?M@+*pm}RYE!Wf)s zQp{Y{xemWU*m?!FgB1UAX|(Sf9h9KdUD@BUJ!VcZfoih43p2PP_>YoyQLTuy}Z%7 zZ@uzhU-L^`)r?yq>${xm-^ko{>oiY#BQb_r|GiW72=7*`qqrOT%#VJ7jN`Bi%4B;3 zZwrj=w$x3?((O3gR`APGZHHxU%@F7eQ|JQ*#S-(1yx4WS|5G@9o9KQ|Bdm#Tj2k`<+&w5@ooC79kViolvDRvk762EOz3%I zwdaUT^FaDw?Xhx4en}2<%~P}93{|G?bfuOJUuQC3Fskyd=B<5!=uYN9=8nYqwkUpQ zvK-15_#;}dMRxnCysEQZWj4Cc!v$i&V9-GMz*_#KTlc2x2`FB%)4S7{!k?}0pf2MX z$;T3!bt(?*F;+Gk83j?kMQK3e3d$U}wxMiyRL%`5W=j zhNh-puGM=$>+_NOj6d7xQLhORhBydR!4@ZDrgo!y7WJWO-EXL>VMjrd+W_`9GN`X$9CT_1}{nS*aSO}rmI7x3+Q)2`6j zoJ6Gc|G*#aCNPaQVW*aY3uP*A5swKzXp@w>ZI}%;?XX*S4WM2=@+t6Iq7b_7Qy`+T z#{lQT?RT^Yb$&9PELL%-uvFiE2F4*FA9aLQTQE0GL>s;9+!$=fda^T;Tv#BVNM3=E zoy&g?i$rTnG?;NT*Jd->ERD)wo4%&MW$8SB%eTdrpL6EZeADyI)?`JGkPqrGogQj9 zI3H1c9?k^qQBym=SR+IZzi72NcY}I!A~*WyVmTsYkBCFB4y%{g3nzjX7d@ga=Wzr* zb;Q{2zMEcw!*}{lx3l&52a3vhWbQd%O63n*8b@m#LtJxQ3vSfv^4&ycFJ|@;vCV^z zMC5ZXU%$0H-?Dy(*Bhb#v2p&|I1TbHO>K}MVzsl#b4ipsKfGu_ z@TVAY`{4+Uzea{%O_x(u(v%_3a2+qC$zjIP4 zpXoZYXlj%Ky*Q6Yml~LAQ|H+%^$y!its{Q_jGq9k@CGefqMTE&YWeEhCLdF9*Up`6*P9|l6tBTBUIbJ} z3QjU+*u4^?FQ4#GSUejk)X|J_e7sFoJMwH+kr$L9$=qEUIcAMDhRrg9=I5kux_Q8l7yHOFIXhWHnHF%aqs{&pG#c`CZJImwd9AU!?2g=X$f|nlNtL4r^}_lqPQ3-{H=r!kXUvsIQ6sToL}B}z6h;$ABXC2Xf4lou z=yJFIGwtW6#A7tgx=jcbEDyO!vy#{b?ziyk>-U3pS+kFg^nJTsJ)6&$GC4{D_RE%H z>7YtF;&VfWyq-G`hd5ROe{gI+F~+=BL)Lgi60RcFj;2nAr&z@Egdxt!+(r{OL6xR$=OCAJT}1XatOT+^ujZ%Vt!Z`R-aKs>JmM4LQ5Vd=pVPhP zxp$I(M8h(dFUVlfqieRYf>c|Oec?c6*LqNmM9(y>{6b(;etdM{@SQyViTi=V$@EwU zHjO7^ibu_O@L|J_26@hF=GQ-A3VxV}W2xx#vdL^CR%;e+;*pij`@Zycp{L_HtIS#Y zA?X7AX9IKNkJ&ZkTA{NnH4K)`HOj`TCN#L&PzK5h7y5l8Oy3>P&CNIdvy=&%5 zz+An(e9oKAzV5B zl3~au?R_sEBSy-f-bau+K#{Koj^MSlI7B)8<}js{fjCbI!Z~buWBIE611erHtWtB< zC=jeL#$3me6SQ36AVq@OKg3bPO{DAHD#O|wkxv-IPNF!6e|0S7*Szixdu$Z7%CdZ& zL6C;+VJer6lU}BA{GX#pd3g+7+ym~8*Hd>8fg+IL8GuGhpzVFO5QuTx8OLUJd9qEg zV7=~>5b*kV>@$$QI~x4Z&4Av|L@s`?-$s+X2XuLkYS-Ox2EbEA1O2(nW4D$$5RZoc zj9j=@f`v|&>mWtWOsyO7^@aj^HpvN%& zs}rp`#kR4F3mcMVR!-Fhtf71R?CV2VMLzSI+afAi4-E%b48bG%- zV-s8TPo;9FSu$9m0d9>}{T#QxX(Z2Y>-HKm!mg0Ob|i9xPhd-$WzW-%^Bh0uZaiuKxO6 zH28m*0Phfu@&HRdU%wWOdw;njMlDB;aebz`eE8?W43=ec5P=+Cu2PUBqA(dnN-)o$ zULX8(q^p7p_y!q)zU`~1dKPuGu*)GI&+!#v1Tj8b?l8H^NS~{)HzDVfiPO+gdw^jrjL<3Hx~ige9RV2F%UDwMnK!2mGI;2k z-(7ONzwas$Zc0dzC$FVaK9UAgo=lga(Zw=PM*V-61@jcN-!wok_s9oO1B&2cOGm$; zc9)5KeKa5~H6R=4QjvZMP6o1cOg{#I8v38{XgC~Kd!3oFGiKI@vX!TSxX*^4ghopO zzd1L*BMF>#rnQs!Z9WDtGBB(p@mb%A+@@T2;!~oiM_HjCb4Q@S7S4t^xeod8khO=1lrCE|nGH8%RX|APw=N9w+i+fAo;K`E}elRZB(tu zd#V@9F)9?;oi5vx9RMw41?oQ`l&-MZf6M9&hMd^N8a<0%Rf$9rU?7L_5ipPF zz!P%DF(0$(k_jzZbyd{MHJE-hvw3fdl=yTslQKoLKfg}^=9(md{0hcX$;58vPcGt; z^xwk%f1olShVZ}6V|wQL3BE0c>L?08f}ab>prO*+=!xevV91y;(!YtOlQq2fd+m@+ zCF~+USz;MROwT#wzMdKXNMViRYP`^-anRiL$qir$lFeq1_6J_jgq1X6({Y-#hW^B~ z@tHTZYn!fgOcung8w~?g58Wi}wHnQ~xp~)R{46>A5zk13wSJeyKcpxpicJzM=pHhGR=q4M4DF93+p=%Q+A zn3a}NL3RR+pU)dX_b^4&9cUC&C%74K>DHU;Ai+t@O zld)uE*YNKj0Z@P?k3Q60L8+@&w~l|=LIF3V|C8bria zA?)XCNafAN=7Yh)_Y+0@x%PGy3lELfPt)KxmW?N$_#o$pD?M!GvmbNA_&K_&C(xtg zdko_`LTvAZby*8$WTDDCAlQeBftuNm-gVYJdpJ+~%}WH-axx^2nRA*e81kgZypJPC zEfee4`o7!dfrc@s62iS2*&{H=YXu7^LYqN50%6th&Wv(}NTT{qTMQ<;Z&Ug|h2y_& zB77~3f;M{2go$d`e2JBw6rne?c*SO2@A4ppk!?egDjW;yUf!;8EeJDTs72?^a8+zDPX6 z2Pf&_H%pR#-Yjhc-8Y>7yvb>!X#dtlLEBz2^n{JPBVkc%h9*ahNR>X5r6mA1z-DyR zA~qML(h}DH_bvSM$pNtb>ExOB-3xht7}fdb*Z(Gw|LeyU{kOs|673i%q8^&BQUnQ# zyw%=-z-y?s8GZ7C;2DTOmWKWR(MkLtQabXC!0j9v51?{y0{t&BfVA9?3#St9JlkK& z4||MNXY3!;E{ig2Ct(!zTDl(xp^qKmWs@#3pg5tcYsjc7oYDuz+;{R?;mpyTu! zfw=vmj9+((kYff2Dudr;u{)yZ19|OcbzgmbZq`Y=t1K|XfJ4du7^4Z?B9dpoyE+)? zDlM_HiSX-V(t$;>yJA_V+?Jg>;sv*PFSN!Gos{ZI{^DpI^Ce)T?rOQH<}C7nr?{JA z`SF0gdBDg5I4J$|o$6h>mlUEOOYqI<0W(Q=MEu0Sb6<7{j>z`Isrb}zM`vDqmkr5~ z4#f36Mq20BGC|GYfQ1are2Pel4Oj#jggp?KDmqg!o={yCE&34(OSF>8|LbZHq4C}c zy1LS6C%bc;+f(Iu3e@h)1d>t%KrEFdrJ6J{f&oIs^LRw-U5-<|_cfZ2L=B)BA+7&r z8A*c_F$D6+=R9^rr3=1_!)Nj1LH^)2odUI8K+V5+m>3ZHSzr`Jk3IUoNjl8`z5iz0 zBtrC^d=EeryQ1h{WPlUJH0xz(=@?ANIIVjB8n8%FfM|R{*m>L|F?B~B)}cTIK>?Aj zchn(3vj9pvtn21_>oE?W;XI985=jW2%rYQdP4^dDS`Qp&Yuu%7@o2=}KfA*zmD6so zPYwJg0AU$$BYN1)^Y8}<&D!s}6O1k-^rp(~1k3=6uJgC!UqO`ME{M{h=FI$)&(<(3 zF8}k6+l#n|rt#BZrJFn$TMx=$vsqx(9Gwp!OSMuf4e8XP{2$70c-WTII`R)p2FL!b6OuHJ`cK=3`(zd0PUIr zn5)&8*JiSWA<>JwFcqAVtU{~@L&Dx@vCbTGx3zHqXi`N$ zDVl;>S`CysI{6r1xntj0rjDjg`%v*sQA$#3dX&0BBw#)+w z_2JZn7b!B;q}elzaw|vT%qFGbF>Mi@KrHW^;JIzJs;mo8y=m2eV;rAn1rXM`?fY7; z?$e|9I7GibVBq^&e4C1(M?YaIwY7x*&o@kq3HdPNJk|j^L&|7RaOvJD*gw4(7|0Zn z=fd6ur=n-@J!LCJ|AoCHvNM1lRK0Z)Ws(P?dw)@xda1Vtts9Muj8?p3MH$>HAKA-~ z=4spe$9I{6`VTa!7I)?D!Ai1S60gTM9Qvk9w^>xq&~%CVEB=12zBWON{hb>x% zBCZ$098QDzT54@z6LHOIV~vPK??T3njws4sM<*f=_4T?v{m@YBB^)NsN8)OBj@qo_ z5q4r;%(Uk<78f$^WeSYh+b`%TFtxWinzjwjme78nvk2^cCCzwM=#=#pFOHLTdxxDd z7j9rNczfDf{LtD}f0O1=mPUJR*t%)d>$DQlcy5jrwA2)Uy^6dvx{WdL{xZR|w28QV zcOkI2lSU?4!Tr_<@tdcBZRX>&@sY2VYG6jOmh23$@c&!PSz#ousBW));RQ&2HpDaO z_r|gq4S}}mQsgCb>@aD$xV|itZ47(GTK?X&E^ZsdU%SZ?g*{L%<%Hy2%#{8U84+`I~){{RPhbicQDwZwOYrs-YQ z?s#N2xZ8fBY>h;Y9-H3&*>-@3v}L~hiXM7f>VjjsJ!oM3=&xDj`vZMJcyC3abA{W! zR36M7*E2ggqc2i{b`N8le%^h=MIvCW4?3(3#Iq9^Xxg%{8HVtd^D1QA*myzht=?4#jh!2kg=AZT$}LgmHFS3{n|gYf70$Oh*B~ma z8u3O5zLWRJtvXkUM)>I0#{FE+2O;v6ou{4IqN&8*p%uSS_=@SkjLD+opQ=H^S89IN zW0!B6MYp0r7i3jmzD3c~X_G}kFwFIMo5wuCc2G8D%j3aSBy+ccASXMG2nS?iCDlp$ zkcv1vzBQPcaGO$h75wI-?P`Tc=G>Js2M1l8$qv(jRnD88pkta*&6<`cSh%lGCw*?a zU)uRd>v+?&FICSXd9Ha+9?Z{c{xa!RIlas8jAeQAIXh^%Ct1sl3wQJzRPl}>_rGSD z*dRcv6JUM=yDa%?z-G|38p~X z&<{aRE-)vCGfySlO+HB{98tcCz2Y_Uw>MdOX50Vbz#+Ga`zF`>RV97q6#Xeh5xT?1 z`e!7vjZ2#Dm(a%#X-A&F5({ISpYIg|)nU|{04sbo`SzF}k-9ur;Sfwj-@0hK6;Feu ziGYn51)T1#!xk z5KQbqr-5)U;=|_Th$2a6efAB)aB1YWG&`e>NZEAbCufhFB=uy`2ln9h=}l3mUw5qC z{H*!~?)gxIsB{)|@c3F%?w8=Qkh_(DwMk=N{iSRMWp;S}F>$B6b^qy4%xxa(v4#0r zAUBq~t5sW4YHh)1fzXvyWb?Ix(YE?cS)LvTc2-!tt~+*n1*1pCh@+i+c50e@NVWmO zxO|#}QWNU3@!pw9}jV9L` z2pkg&jbBCLem1C^(j$YfFr4Z5@{H^5O#K#}Fzbny35NpZob~G0P$B9Qd;!?lO^xkRvEBWz**C8 z1~wiV9d_^55xP>`6;D7fjn*ahp|KMWyFtitC4qD!Pq$Ld4b>Sm%4X3q=v4mk>jhej zW_+NKE$Eysv5Yb8Nz$O*>j%|H7=#8e?rgE^5uc=2!4i&(x60|ww^w5F!jnKT>M9OC zjKdQ<`}<3`RB>X90F$6gEVhO>NNRmmV9pJP@#E=^&1^*vb)58Pw={ai*r$*!8U=B8 z?OrewddZ)om7fX=QKO>`n8-f>ff~Q_cCQI9m?V#Re;Q0-1X=H%phD{c9-)F7;=%x- zD{voC8_Fw+v!;lb?rDwa>}EGdLkZd&!O}Fwx7?Zt44~|4&|=r8MV)h(yb*GrhQKx3 z+w#RMPtg2JJaROs&dpXBI^yy_58-=X zbPY26mpRpy3K}l~#gWX|I8$X?xj&~@U1;xDx|$XP?>NNMh%poI<0o+_Nq&|wKTGx2 zY*c17CRP}s87asug!Q){*O#CjtS4sObY#- zKxluUc6?Z#>Z&H=i{!yt2s|R^QQRn5S9g_LFqw#s zX3cNqV}iC>)5mnpblt{@uM*_3kPx~MxSc#oVbIz=5xOb%j1+V3jlhAvSTh7@f~1pH z0cKOpLwLG*^g|hn5o7Yzwsu{gSQInf{dl1y_#FLYZ@Szuw_{OQ3x0x+^i|Odi z^)8RhHC8B|OtDpb_c-8D9cNjd8OHk+YRBQZ$~dm#Q&Q7bPQ6wo6qX*(tGrdmM&0W9 z>BrLiCmxl45{o7|RhGwU?)w8ejk?*koLFkt>)7Q`XlEK;bbbW|F<;y7Y_xr zQMB{4omUJY{x9wZdIf3>K`rSXy9$fb(Of2S#rYA!kjDJ=o?jCSeAAJWuEzX)&RCNrrp9M{H02KzXpRAIa1~>Wd}bp zRoi`dd1{0yFOE&Z7EA4R*T7E5ae}P(_`(&akiN3rr)NdY3CHg;!ELAqrzIec~ zP&#A)k^NvKRO6uM`)^;yNfjcw=cX$LauH2!xbCw0@^X0O(Y(1GqMzPKUVco=TH2Ks#$2b>DEH%6ln(A z(GKnnd4C6v7CberQ+@YCQ)NM>-7K*A#=hIIGF$&mM=YO*OwE+EN5M%6H!_)1=^*{O zIxJaKOMgiUZIXWxcW3E3V3a}5p6pWAz+_?arxUJpFOPnU-986fp|*MWsSX!>uSdy| zdf0NbwS>I!i7#bs6gGbc=@3F8VB`4FdG*z*kPPL%Kz!QAA886r&r4h0aFUA$ZrVPD zUNb{u5nm1BSqwZbF9+6yP$Z`phnKF0jQrMyc!#UYbtprbxqRBaJXpA-Pmz$Ig3D99anMy0Vu8pVwVj;A zXM*V#an1{*)tU@?;!W}AajBAXoALh&Bloefr=R|^cL3#zji70Qa&0?Wxp`~m>Q{)rT(qk z9GT0oGhQ;fmAS@Av)%xT;r>?a#&P9WZOK5jJD~RE@7!z$0t{*S=Wt{VR} zs79}FV4>I@fhV|nK+0+r)MD=8#p?Gi@0kg*x5{IRi3sZ8Xz55@v)7)&Fa!{l5p~i}&wCQE1%e@8^J)JVWJn?n~P1+}4dqQF7@K z6|0Y13yH%Q8P}?4fZ}=i`gJfm+Ml{F0H{fp@=iYVvlmjjJYh4!t2%$vMo`1*WASVD zTP37v{KbGC3Tiq^dtY5S_*bNm#)52UIOXpMv@Tfg(` z;^QxINH?bIt*bppiybp+)n9+EX);q_{X-1jF6>Hx)B;#buI|ddvXI|;c0rcKQHw9e zxp6)szb_@C3!t-iV5?JW9WJh zfs3Wp!1Qn2{DW5`+0w=~h^${WLWf#pxdYxl&`}B!eqwvxruJY`e)WaX{^s{VNw1LL(yOj zRHnL~tH+!T3vs(OQS7kTjG3FXrTd8I@2ZZ|)z$f-y{=6o3pHv0SfgfJ;C+g#<8ZV# zK=yUczCSjX6}V62`l|4>F{9v#BlYxg+{d@0JOW6|?)oQ)14YReSq@8WsufesSxe^% zUl(2GC3b9%`V92azxh+e)I!nlQMN%RB(MLguvtmL`xCGPLD3}+t#Vl2*C~vOi2wL# z{V=Hjjr7{M9KpvqpNk8ZX6fIK<>ODj0c$3om6|$>_mneV^BEx=06uzuFe<)w-0Sk!uvh;J>1OZrcRh)U zaC13loR4TibBwq7@U zJe**PTVB$7{<7*(A{~Yv{9I|}9{S$>+n=p$5-!EvX(fFh#h7af9-Jlw)wupM3kaSZ zV7`7zpJ4d)a>zfb@xVmBU7`xqHxayTiJLXNS?UvCS{DhV!ZVjkVk%oxaMedgnziz@ z=S**p!ps907t(5ON}#u9;ikPwP0(F)mXEu=5lQhg{bI3p)w!&%Bor3*z4^8;9R2cc zywVM7@A$GaJbeq34XVLaXQHxgE}uPA*gU)1pkmg}c+D%A@m#rw$9Ii?%66Y-V9=pM z5cCj5{A$l{Pb%5MlHJst?wJU1&=vR;nLc91~ zM-ty`oK;<7APyx{q$oNbbu8HEeR0*llS@izbaC54YS1jFI>72jdT0u42z1>4WI7b= zfBB_%Vy4Jhd!(lR3zA(uHB@S?1yo^`c6SqLzy`o$!O$f6JFEIn!`^ftnQOWc#04ug zqwl^kq}%z#FM3XUZ{L@ZjA7_k3$`hkRDHhJWQ7+ZBS4r@$owM?O#fV%(T}7F%eGI- z6mB;}SLsMzk$n`=U+iDnud2zxLD}u$Y)$5u=f`3_C9T>b3V%vxKIF* z%kpM9L=DU$&Icg`5FH5KRQ>qW*OoK3;iI}~lU> zQTu>ugMTzOZpUVgJ+~_UsJc?=fzO$*JY)L%vje-UIyH9BL;+yDJ=+s~3NU{c58Td0t44VrJG7>V;Jygd(1=&wEzZ!4u*(Y{EbIQ^9 zIrX>$>@mh#rZSwF8wklzs8F5|7CFZYWTw#M4tWXRGK8rA*=FP`{E3##ldY2M24)Wp z`2D>9$W*{~%L|R)cKT_}T9)lh)!p>^9L%ka>!a^6hLiKifeCQ6KxO{R|Knuw_3h}- z-9WFd{tsJp`0s1J|FQ%-ve97Z0b|^q-d-?Mi4Z-3srLj89RXEH5y&;j+S6I0_Eo{( z&~R)mU+(bxql+e__fNiEdLxa()&e&oT(28N=Q zLNP;<|81sX+Q6E$7(t8TyZ*ZoK#AuGUyO^j`|eE?)RFNwU~E;1+iQ0!;a1fk-KV2Pxow)}K0DFV%Mfgzd$&Ny52s!oqEN$|u{47sQ$yZ)ntRuV z{1UB6Pt2U5U$rCAKNKCC`Sd;9Q0Z=xnTc&&=Cc8&u)}3y4XS~rFcMArwrESQ~2XB`TJL<8Q{36? zNLD!LqG_y$6mr88&IMfdIe2XG#3yEk8`P|GAGOA6oTiuYcUCm5Eg`@Pd{2gi7-Tqvr_LZM%SF~q0e zoJtOT#yH>;nN5?{qlrl71RE7btq#L|@Fbf@{m4vy)GR2MWlDYuo9oD4^p0u|QcL_? z&Ku_1Si=x3^ip_+v)iZHGxJt{;mL*73hNK-o#Czs^0g^a*?nWi#oXE5(#6{SmBB3d z)x2cQe>su;d)B8w;B)7q0N{jBiC^Pl#)YuVm&7cuzkMosKcRLfxJ3iS1_Daym^&vC zvGZl3^V2!F@)eNnkPYZGyctxQ3E55XWh&q(63eR5xh5SxKBD2X!50ufQ=smzzV9Yd zWHrKCi16=8;v>qNc^Y39O2PNCfN%-<9Dpdil|X~z%&$54f**JT$SP%#u9hiahcY=) zU?klz7>0InFln8iShp~E3#5ESzz0b?_}5%)qFvY09~YI>OdY7zW4;8&Ey zc2?g}TUju7$360slPNpOP`m1Fek0uOG;sq9)b)1Y%F*FzHP|*Kk z0PZUMR)9R<=2Y>0vLY*SWVzTJOlVO9ggx`t`(7*l`5IfPq8{D4I#o_NM_UtbX1z}J z3LS&6)+tSS4?np#O=m$rAXE0nwB}Cm-=(H2o~OJ?aRd*GA#VsQe0;ouB$Gq@-kN0eiPxy&&g<;cr+uy4_4i zsK!3#JcfQ1!2Y&Z#8`pb)dS)R#asin#gC1Q4(;pk8 zqCjstsF_O*n6NW{K*73hD(X*@dLDSM*TV}^KMvb+3l6Ubx|=!v1xN(kmSh@YaI*=) zWPx!vZ9O=LW=glXygGaxlO|K* z#uH_TRFBle=ZfUtQn`z}SS0@VuOj&YDncKkjXQH^k^s)l-E2=vWEsy44)j;wM>Jx( z0IZ@a(0>!TDIvu8GWHo)qZzupVuW^e zZ?>M%Nu&erT`4!KcI%_&1%~SsZ_TXkth~Nqkll&YfTNIr(%q9Btn>1aEJ$%A#j%&W zdZQwW4Cm)efKZ>$-QJM0;2dm@zO^Hke-V@e?B6ip_|hPp@pL5=Lj7Srp)V7;Ga>G@ z!AwdCh8UvO#PPs!uNZzu6hVpEGFwJZ1yoLgN{j;Uw!+SGlhjP!;cl1b^8(a zWqFVW=r(a|iZM*ep%2UeH4*Ae&xw#cPc%=B0=xu8%}<)|7}&-CxbVQt5kO(5J?jrt z_gs#8p_B@gYX#sM1KeRYr2cXOMf>MV5K+T6#dvL}$MJmt-I*_;UuHSf3X1^N72_}- zz@lhkkwBjsN>my?jN0Hpm5SN_L>91IbjJNKGVutfkpRrHi`^}S5neDgsZWw>k`JYK zBp(?Y1;(+-P`T3@?E+Cnl5!~K=m z)qh6bjiaS#8n8tYh?rGUc}>}u1(E24?Xnm$t?}pbbvbXcu?{0{3^Oq&6sNY`xLjwD zKKB^}7+A-BeT2XM{=xmO+oJuoaM_MiISqCTfYIkdSsZbo*T->XwB0nW9^=-;zA=Y> z?AQr!LgRinmDlrA$9uuZ;{lCHk*n6ZsJl6s8SrCd) zcL%8HX^Rx)VWu_*pF<7gbj3}Xk-&IQvkQ5k`Vwnr7vS2ox?YT zHd0a3&nm&J@6a`;+u-NVVK5}he^4y$^}kTWc^%phO*jG=qb5O|i6{f|BL0!pHuwz9 z^pOrbya-llwFyCazN@wfkfm0N>gi-)rB)+B{3r@vWS@|{9mbTDLGji_qa*SqD8l;w zPz}fD{gT;ppAE)*VhsLv5)1~)RToG-2Jk^KVFV949@mN#d`rSvHUY@7!YT%82o9-F zcayW?v#V(^k;NKY+9!+V={64-1J|7a01YT482*pZSlsgr8_;vCnj+25Afse72aw2O zySj5GaFiZ6qTr{&w4_#iXgK#J=a%*{G_&vn)+AFGbUZtx6P|ytsXg;6okI9g`vx${ z{1O&78>|bY0XU&5wqq8^qK%$RDzx|62BG1wI>IvNLC!{Mm3dJK?JuHtu1N^?VS?ePJn6rLCJL6B0Mwia7G(}vE5!)`Ppw=!653u^LOc6|R|BsFC5}`A5$^JjmC$NFZ zBQRWF1J+b57>celh7!&A5gG_#YHnXQG~fQOp982z{Q%XQt<5R|H1U}3qBkP2N5;fK z#}K2St~-+~uhSWG#F24&5Fd@@jepH@!%IACSwxmGCh1RoS8{x77@5@5s-+)Gff&*> zYyueS@>(bOPXugJB-RXtf=0e2f0CsEMUyY14(%1dE&_E*{Hq{k<2?Ae5_uP%U6#W? zqqL22Y@yg?UG|0JKe%;I^ciRnQO%Mv-0$#RU}d0MnEC8)mU6JtW4(gyWqw}Cc-p>7 zf}w&i;z8#ysDs9u3`tv-kF=O|@FBt@Ga`p!FsTzG;!Z-Rpa}p&rCaH9%ZMw}enCYO z*7Sqneg9&{?BOm2^$?ler<=uc_93`lsridFp7p??7~qk)TKW8Otsde3%mNOZdebDT zn$b6M+Z@e2#t$ERHUzLgfW7Riln_$IG*K|14I%e{YHDZQE=7oAiIa((PCI@+3;*>5 zL*t}A^d%=Gn@ut?@&z-=R4b|{lqCM=m*;M4u}UdLLQ=R-M3CjzoVAq_BH9=2`SrWU z$=-ZXaNFCi;d-yt;9M5DLph(nRX8sm^}5t>>Q+3Cq91n0k7__B-X8u5a#&@bM7~t; zp3NC5PddDt1`?^SqUtU6eia3)5?gVKq_>wopABU0Y);Tmcbw|BIGa7hd((v3%x^`h zethhB0XL4-=J~gIOEVFQP8i*XZGyiaqZu0_#h{4+kv}x>U7!!lETJBwk~@X@kPPxm zeklyeM}_K%jCIR}2)8qWh&GcvQSE4*B2$H8D6vBT;~#LZroA2K8K`|Sq8Hexewz2C8nd&DrrvfMlXG~yzM#*c#nf&M zlg;{#4Q}6+Q@0updrcAQ2}!G}I& zM%&1>1EDM8nRz=Nhhew-Jxj%ux8U!>7FYN^HDu^?NA+Xb8-fChRUo zWO}s=hK9xAsus(mqc&~Gfi`&L!jDmX<bP_^&iKL&FMc@ z-N~&bxnN>$-$TdsX@BzkW-JkAlO`(8p&NRA*j%12YfEX`-oIX4mdJ2P-&nYmeroym zZC_VN#AtNa6}S(F+Hm7qGacK(p5RmV+{ba~oy!OsIUH_e->p%~a1~dafqQ1pbp9K9Y3{|90n*Ev=k+iV)HWNj%v{nVC{xA=Kya#0dfvi!h}&8XwqB{`u={j@&~iED;5O4JU5fL;#6 zO@dl24Yc;0B88Xy#_10U-W|@H-%3pi008uGks68(=5^A!gbPx?*kJ_{cSDn{#NYr{ z*!O$Xp+q+5!t$((6PEOTu6v`NEKbBW?pZN zSuCNJCUcc|F^N}5d1tDm=_e?L3U8NezX;^bbKe#*ys|*G)TBkXnQ1oi>8feQ^N~0B zd#_8?+K0@d=HhhMPT#ZyctZN*ndR^{3z%uAE42nZ^C;vt_&Z^IkG%8BvCqC2^FkJW zbv!ad{lQoF|0C-wz@lomwkf5QRFUo;Lb^dfx}-}$Qo3X49+1ufDN#bCyBq0{?k?$O zX#S19p7WmXzh+-vBW(75ay={VbuV!!-YiJJX)aYPzN_{HV3+lvQSq;~hXE;jpWs)JJUKEbxD7;3_Z*&*o+zawRZI$s~pZS?NENs0n-E^qxugZrLdrAg5RG>2zXq)=2?N1teCJ|h+&@( zo=|6Y1E=XoG+;xZ2VxsPTTVi82FNrSl-1ibzi9RKNQD-`Z6)qHy&>%5J+$k3(}n3B z{n78(mtEZfOS_4$YR^U2iVysfh0vuwx_e)ceBQk24!~_bo+D)Y?sA-``}eA=IG>jNp=hKp}d3>H0U%6H&8?*@^fz0dU) zo4za+3j$y$&AKm93Qj>sJwQ-A%PO!nMrvk%fiaGUvB88yG|sUNs8H270gnINFhLnD zwbS)+dz~9kYUAcMdrputYj0Gz*E%7%XWMS4L(LLX`s}*v)B z&&@Qx!?Epw5b5TSeyrZXJ;mgrz|Z*k!{vjHM$pxK)Qc4NEAKLK8MT*&y`&tT3H0^X zgeUSsiRN+l3F+uhGosl{(0bCD3XP>mFZ=q89WZ1yh*(Y3^SQORxl_n?xQXoz8#9W# z3fRpnCVRaiy1=+;tAH$|n0ecDHMyd9$8Xn-)}wODUYj>(pjBqo{FQhHd9DRz4|}nQ z^Nibj&BI>;VH|w5cp^~^PN7Gz8N1bM{y~EYX}rFRi5wc>utxaWTPYb@S!6_iszZ8X zacCqrD{y8cP5=&AR;U9GglGNf`+Tx3;SL$#_lZh)S6@2|xdB#~kq?9&AafKiviq(K z7EbFN;qd2l| zO?8td!ls9X~nt+kiy~*5}ktEFIIsuWi0=1qj*^lg%@!c+XkH z2E4-z7kvh?U$l+?+I1Hpy3@OM5bSM8kw0O{06M$dZiLER{s`oq(<+R7pl*6dRaI?l#(#Uf8LZanOiIR5 z+F%6$S}&J^s>%SYSzLqJwdr()MV@Yxle7e;n|>WfK7sTDifmX1fRKXg-h;{SbriXw zUy)(S7uuK=@jzffnNcPn_&=m<0H2F5*F+H9gjw|4qHoa+fYBM-q0kXH@>Z(nl~HHl z7xw{Y0*()4nlxozzSd72R&tFW7|A!OG|ytpOQx(YUljG6^$_XxHb0!&h8>FSp06tK_cUhqb=%%jkLX7#B3{p&*o4$ zl|4l2a|b!~JisHr^9%kF1aRI|UnnpKv(V1_ERn`dM#o&T%LdfFG1+83Xo_?sLdfw> zqd0zD{GI0hnv^9Gzr0aaO#TNv20nXMEPwAt0=pT@r!vJfKGIBS{{7l(_tzu*NVHya zwbxnF`Zrxd;BRNfyx`+hf$5@ft2s|qi!oA4kMmn5EGaBU2u4OCD0wtV=#v~dkxesG z^_(iGez-q@z0q7J4~JKdM>EYp{F==1&ILuk(|^4?+Q8U5dlCVEExI;JP*-Nbl@;Jo zjwNrGy0-f106v+|Sf9HLQ?4<1fxnFP6)QN4yw#3&J|p#M7w(sdSXso!#nEq?_g-x` zv&_?0C-&1HoHa;pPt*-p?6oUGlQ=L^s)*QEhtI}qm9q`u>D|0u@OrLQ+^qK3KF`PK z9gSLKJ~;ih5)C%XdQ+cgcrCqE6>g$i$ne!)!(LTP8+%+Fa?MY)4 zRFg{9aZItB0GZ42Ou>tiu+?eC?2a7F6F4{?5tVryEYgGlXZ*VHP~u?4UNrj)8uFVK zUjI2S4fwdGnak3Zn;_Y@dYEnH$WA4}1M#ma!vSI%L{mHJg4YvO&&vD z@X6luETz&|Z0!U~?ihHL#O}CapO@Hp-yzg9*Df}k+YdmnBEj9+O8EzMDfQ|)P_+Qu z{167iKh<-MYw#N7)2iOpFiT|#p?!n4-1(FogSW(^{(-izVLmTtcfLst?Kz7{cehQ{ zoaOmQfzqnC%3#j8Y()M8{^anDNc%rVVkkU(!B4W~FS8Q>X|}lLz#DE`!;ztA@&HwU z;`J>@^7_Ivnl$(+{|C+#%t)J&y;5^w!aa`UbGywe*?CYm)$bl;R>kw`MeT)-r z^2X~8!8V+c`u%>h?novd5=vfC_h$R3{)?hyIs6mf5{{I>&_#~KGLz$%UrK*IO%7!P zN6>a>28y*kxhyD})4%Rytk^G9SGt>o>22+wzMqY3mb?1adKQ5m3fY{FG|BeL9~rrd z(Lw1A*P5jS(ezXsFBpv!9Ldu}h^f|yJ!w2rrdG{f@Zv_Z{@N^#lpp7W#AddSiHSai z8!XpZDeDDzYvQgd!%+4ZqFHdIkH=A1_GLFJW7orGP}W|onlOPPVjVvd+su(i-wt4( z&pC|^K+5it7G;`9oSISZ%cibzzUTqz&bw7f^v3xwzrPZ_`}3P*0@HucmR;*NzuV))r z{PNu9{H__mZkyAj3o{^>dTAJFvYLogF#Z9g{`2&&FQ6cL5(nx5yARL~G6Ezx+Ed*uW<*31PeyTw=nF95vsd%nG)eEpfktf%e zJtPyE*MDReh^58`NAF`guCx%RiQp6{q~unxzX+G!Pt`-`la+G;Utx#|l!|A=*jf98)ZaLC3mLf<}Cb8T6 z_XdbVzXOMCGOt z^$<<-Xj+H(!-K!8G8mNn7)qUQ+@#VovX}MQZ?cU?72D6?UG=KPynV=u7hzbHT<%Xs zf7MC$y%uKBp3+ZJ5SMEm3Q^r+n}8;X17@THH$fcWuw4^4S42aK`SAqxr~B{@$~FFby#yzf%%Jn`ua^2ot#x;FsG)b2@Wy&!*H zGE#p0qpMQBLC{SAqud-Z5Krh~r*`A4u4Uk^MLS_|SB_@$Jsk|eYWRbv@C|PR{_(8c>F|uE( zLz(zN$H8>={Jwkon_aG4R9D#3Gu$d*N#!F(P8_%}s&%u>PCCVt(sCqRTjIr-v)@0f z-@WS;)q3ikNTyr8{H@2La9OT}_hMyXXT1J)FlJEk&dj9RrX8C$H{{34#tQF~*2kCO zXG_A@8BlYDl;7>uZ7vGEl~$?aUS(a>%;p_H@j~_!+wwz>D&w;vW8jgXQO5~ zdbB|WX4%AA;ohZ?3LEaUUrh}X>uz{#kF~N4WEzMNcLyErVIN9z!aj_y@G`AePcsEU zW!@+Bl6+o^ka43|+@a0I5Bu5kl@tp^+Jn1&1~)IF4A_Ght!eG#bg;+DwG<=>QQhsx z4d5b1rI62B+7aWFerAhT8Dk%Q64_Q{xs*?g_}n0X&AN@vFQ!ytX?MjPnD}6I(lKl(rDmk{CN39t0gkY#B?Aj`? zK+9EpgL#fRsv^Te&l?A%zaW4wf9w~K8)#CV@kH0OZC@;=O-uf8PpDZsaiB9=r2<$q=nt6|#z*WET#wM(07hs39f~}!qiU@ z1_HiqkciWZGpmOv3f{*lF`qC!d-82k18qL~4H<3gr$$n4D?S=rTH6;OU2-|Ev-xEu z)*9ww*Ihj~=OXi+MBykxMy-ux;rIIy=_Re2WJVF%8xsOKGP1}1G4y2YAa-W&PhZSgNx-JmukqhsH4N?i5|qoM$e4xfgp;n zX{PG}eALfBBt&uGY)-v<@`1wWvc%#DVQb`+WD}CO1<)wJqt|T6$|JXSFE%Mh<$(yz z&!F}VJFYX4@mCcUQSjzaI_FweoZ830=5A!4 zHXQ4Sf5(OYkygWEfQ0(BoISIQ63-Z?;WmthBufJ6FuY`trdM$fgcf+h7SAFn@q0*syChiD@86~FLD$zz6zm}13F-2>OQ^-3&;pIqGmDYVi^n zLQBY@JtIzuzdm*33mDu!Hh+TMHDZsFU!XOT1YP~vJR{#J?UcAuK1sVq@6}>>063i9 z4Yxtliw({nrE%IqSQF8Grg1ZIKad2n!GfQDvn)_-epxpXkFJIP)($S}$09d=n`2wI z1cK1+stl8k%8^(&sFbWRj!fm2 z>aKePc@eUt3Y(3jRCuXB!1Zq)Qj2O;BxC_Pfff76R^f3%4!p!D zF|MYWs;1q=%6Y6Ttul{luaa8W-qomEX~@l0T=k>nAUmn?U4uJO zc`W9hj|Fu)>-8(%lI*zM`HPC(>B9;wmw{MYC(TaxOL14A{#w5#!HQWjhxv3_PJ%rp zK~EmyP+!$J@%ly4rtx^O^&>Wjlb&XPcMv~CoN`v&LGk=hPpIHKouboHhbb+e)$IUyt2Kn1zyS_xkJ%>bR|fa zKQq$g^xaCIm{fqrt|^E|zQ$rM=8+6f$oG+OZ?|rDu*LEH;_scWS7o&ZXvF$uNU0#-`c~Ieo1Ph z;4zx6zM(~G$tvNU{y4LK7lewm+V?nVh+Z^e3wK=g**&i3hv=dtr`vNnJffI8 z9j*8IMpudieF@fvpTSt;bCkj?pD*9f1z@jV*$DbZ7K<>92HT|dUDNawW;^th1r@GO z4Gx-K3a+-N_QeRM<*g>u_V#{OwCvk|stBML90$)u)311`v6{b(W%Pl&|JRZ_zikl>gd&pMXT)b@yoL@g$`J^>F z*muT#2bmRgCVjRt<9e{Cezv0s(6*ft@#y7PU+75m&(f_X?#ARq3EMhdM_%>B#|ask z;`92HAE^{2U^b^Q0XZ>QXD5OtyDRV50Oqty`&D5F{ZVzA`{MGPm(|<1@_$U$e}M2c z?no6#e}V#jmSI3r`Q(dxah>aFN4#vnFA!TjwW(Y;!g_*$-;I*!Gpq1A=e6$I5cHM5 zL52C-+FkMx>Ky8yUq-68w1~biIp&OUy<}ltqcdjeYuPHVSJp!i*PpX5UB*;5D>*ht zUrU=O{i%W1FmLoLg=Ca<&xHA61CkZ&b{v68Ehlsg`9erc%=)osE=+;AsV@3!IJ)z7 z&Li}$M~{KzZLIUhPZN-yObEFWY(91*qnSi>xpMhM; zr`umCixMFz=SnkPqP?*9{(0E(?GykgZ3GGADd)bri~^uRaT1=)QG%0V4r@JTD-TR9 z6i5|$|98jgKUCR7mz|T?kS*7>SaTEGA2`x(khX$#Ywu`<(ago@FObfTe*lNR^i$^E z^7J(I^!zuh?XL-k3AdJ1)k>{GEJfQzPQMGSQM_ayu|M2YjjUhl2}Od$i*J zG6g@lxH}%|fEpppt8LK7*vDo?hiPbiG}ZXSQXRsGR3qV77@2=;q;2yLJC(nQsa)Sr z&;M=xu!xA-bD$ua-b3b;o8N(I_03Ul0-dw8SiEYL4kKp5VS*V-`i(UiP z*Of@(8J668Ui_~OpZ-)zRKijs{%>?jYT<#Psi8|)!DI3JD#zCORRd4qs`hj5{kb>h zS5sLi%krIfGY(rLCb(X$i1ga9QVV0zte4+|YoDpFPvd?jIX(B+z8RKX+ z^zzH7zBi%kX-gAhulLdLwK0%_itv`;t@N|>N(I7gXfH*(usb`N{C$s@kmtK}pKqUZ z!^vB>npGqIQzZDmws_0KuGAwseTj_XDQKD|mv(1H6z{ngwnoj~u%5&Xk+hLt`w2vB z`ijmJrE@V%C~0-SDk+o1)%aIz&rhNo-QKXf>b`yR?PTd=xPmy0d0oEN#I|oW5U0y* zb;Ia*@8Yb?Y;m!4#z3#T9n9wbAwZLfTk}1;scoE5*HAZUAB>PwHujT9G@^PSP~+kC z{&y|?<24pKO`0#>U4pQO0vr-j33Zq_OtajD3CsiEd;d?udm!GtCr}!44I zS+N6pDC6<*>N(#TiMMW*wJzmo)9v<0j)?zc^*r-yLCKPeS=j}GGYE{o>*`V&BE?FK z)=Gc{(oYm@cUK|KY2Plc>0aaeXm)AI1K|;z0-vI0@Mq(}MSTLHn>4FMaN8K{_5I>{ zl}!8E>JStSb4k+*TkoRJX1Z>#M%trp2UJ^p#1jWg#xo6lXW_o~tNBZKvps$21Gn6b>H$15DriNY6l2dz>uCxs{1yrzRR{d{r!l(cff z7tB#j373At!+I#B~HwdVS_=U|G6{d89d}(Eyv( zsRrO7%3@as6&9bgxMkXAkso+hOf4kk0kNh5-Z&uE^uB;S zOVQ-i6Eo}4C*)s`KY478X0xs5C=ds0R|K7;+8+tt`@h2^u@K~4(Y0MPuS=F)sx*Sa z+_6yt(&`ln=7eq*aYhw9Y?|(m3~?j34+o~4u^kDl0M(O?0{vv`ytjP-e0((s@_F5;!`(rg2yWigG52T)c1pxVR4F|Hzc{pY#E#{5>nRqjJ5;qq)YT z-?JS~m%JZ}#E{FAD-UDMuy%JB8d`tOG4pueexTS0dR0Sn&C#xkns7G9U+uy zdv)kOI{o05Vsd?e4wD1K44elEolH%*=UqEfRpCTDPBL{(d>W6R=-He%Fsf6f%sIl& z+8ksO^D&6ZL_uI4gF`f}(^(S4*L}3!SRp}tuosE5vYRpYUbVXJ4JV|2CFXsv$c0Tb z@-#)z^fKOHkn%DGp_#@2!4^&vyOt=CQ!ETBEDWX@`2;bDqU_O^>9q4A)^$?A0DjmP z0Sct%=pCc-Kg#+F!2HJDllef9@~hAq7CQg~jH8`Rh>1vvl8Lx7q9|xlNENW$hHp3O zx=(<7ijM|DiLbyu`gMvnOvx$>JK(6XLg%j{rd}6Y30b~xU5Ej&NGq8~l~c~H7zT5Z zae%Oq9qipBIX-J)pXJz}i42v^375hLv9#KYGzJrSvsR_zGEnFyT9&P!XL4XuUEh-p zOfH(YY{IEaKPYfVivK6jQ9N0PQC6pw--Ga07jSa10hVCi zut9k0-K4AouollD9wki$dLC2pkCM|E&D_~106%_gxK8!0$%a?;;QD$t^K;{k)V-1Y ztu5>yd%}lNTVdoY$7YF-ox^$u?Hp_dZ_<>T6|gTqYp6LgzixVOf)~U4IQ`p(-jk2v z-8jE)u6er8d<81tq5deddHPZ~7V+UtV%jH<)FuV+}Rd-06x3EM$UE!YB01%BF z1FRWqK#`ZA?2QL3mXu;tmfiN7^ZhA{l}Q$T(0%GnA81wY{KTsE@*J(K*POqvZkjvJ zW#YqJTXeZ3=05JNtUJnwsDDmaLt^+710MjF{7R?V=9YUxb`k+ln}AFlY9xS9&B62m zpr5)@W20s7zaci|wsM8lNc*+MMxKqCw2uWvM8DM?t8I{bA^4uxSpmd*vY!G&kqtel zW6uCbFCK5c>iI++5sp9clMH?Mhu9_H;A4O9(VfQFc=M_`!Bg;;H}0eN@I6Sih~&fb zL7dVXiLu<^R=4Wdw?~Y0 zL(ZC6QP{g4)e)sqP~TfwhsJBCQdIkNhor1G#J2H8(vS;?aZe{9aUyEYaK)^D#zxn} z_nT(Zw}^1!1*Mz_pP%9|_sTL3Yrt)h#potxid*e4K)(8zA7EsE^7vgk^QOk$D$Jt$ z{%Qem)#k8oMSqds#)|{oYdwqisq9qdkg96@*r!29aa^cI0}iU)3EAk{eSk?xK`%-M z-D|L`k5+E&q|!{o4&qNgkmQU3Y{!iYDnraRcC&krMF`4j?r>b|x^-H}{6D~UxfJ51X+=mUtz88X(x6vdM%zuZZ6v)2x2`3pBS^ATrMNqa$WKXYYhhKaIu)Q zwQi7PmArMp2udSm5pv7_?m1Q;jC<^@=TO9wioT$TSP0UpJx^S zWjpsRnjH$d?0av!C0)cQI1BHulf{}=i8{H`P$6YL^-vW_FM9Qo$Te!dTtzVIb;TC@ z%Ud9u63<*wS})ZI=L?UOR(+f1n+f{FK@g^7n#p>NI>zp_IZB5UJ{!u5Pb{9SQhd#~ z!o1ESmgJUa`vuxMR}pEnEa$`wn{E120s99h-p^)cYtGmeX0XghE1`$gc30v=J!uDz zS8d)kkJJFX!+$kSzi-%@0Gk}jW~=ZnPzCobV}9^&7uiub+$2rWTc8SM(yr=L{GM4I zdu&>UJ0bd&;_nyV=#M5!521plsLq@F<&?Fyu*-4HtIN*W!_rkV<5`U2@wbH%K#sQs zC1v;5#_L9P-|_r8vgpLmqw8DzM9QG!0Ouh7W9g62NLoLl5`z|94tPT+$K;b;7rA=_ z!JqG8i}zCW6xP!hqgkl!sC2DYs9@<%9ohA^7#*1n4rYkbtjcL;f$1~|tdMrTe`W}+ zZ$8~X2#E5^3qKHYS7UgN74qg-)CQmo(%R-sBLhww&b|rBs6~<3>FPnZqGMJdt^(c( z&>xt3LaI3w4lip?p9tOe^|)5h+i35uNC$jztkbP4#J)KWRZl6sBFnhGkxgKD6F(VHG01B%q;jQz zZ$_kiZo!Je1>?z>6?Dlj&9XlJ1&b}dfNQ-kgv0LjxHy5*n7S_L zV?wOQHYX<)?Q$RV@~#W75@2vq?Nu#ceqiuN%(-FC?o|(u-vS8lWiKt9A1Jqufl_WG zt4EzcT_mfXcxoLgL4@2pA#%a=55_1xxgMu+^%iq1kaKq*0Rx|Y2;+`?S}7=|7fC>2q2%b9Mr zuo-t9UB8bKbrmh~UT_J6+Ue8+mO!_%UJ4!#nvd$)JW(Ug=7+1C|G4?dlDmM0VP6d> zeN@>yWnS4W^hw{4@SP|9wqakTIF310H1gy}mi^WZk0PS`m>tcHG(N8dD^xh?UXpxJ zU(Vckb5(*@O8To8@qu2{&fDGMSFS2tgni?ZpAzCvBdJ)ukcK+VMFQG4VGu|FEe&{-LuQ zF?~EexgG&;^fa_5J*K?*Uc&p%Tsccyljd&Uk?H9)TFgw=UOA0m`KY%*$5Wt!mgnWF z!t%o*obVO+c&)iq z{iC_xCq>}4XMR(nzLvv4I=O=NLc?Xf_wAL}K|DHdC?|nGX>6?J%N44wTS?U3nl7F! z0voz$Eemxfw;gg zF2B+Tm`u^(nEJ@vpdcydM~zuq)OI%qgttq#qvCby#01OW}+Rp~` zo3qG-rcJZqn3c&O%&Jsl|M7#`R?UP3FAZTgPD zaOx4S!AKz*0&0;D@k^!Z4*6&d=&t|u+P%?a(iv0<{Icth$Y>= zLap{R{8YMbC+oGzv{b`IO7t5~`PMbVk9d!6jRVb8=!Mo~Jg> z;F9AV&y*1{1QAO1_x{hY}sXq0aDB z8r4KG96!HdbX-4ZPCoE+1Zob5dcTBy&O4c_4a-sdF#3Wj{#+BCF9)$} zQiv+EyOPbn)Yr2L@NYf_m{&>^8L})>4!z#!@2J1E0mZA4?6Ar=5jVH?H!JlTC9`Z| zt=R%l7n$_Ado7EZn}`Dz8X!P}Rd6-MY|uSvX%)*Vbhq(Z)iDY|&%McinlJ&6|CL!1 ze@B-p;Arvet1$8o&eSvhiJ3TRIgvA{k`*x`6} z=T%>YKN;6{&Do1??wtYqTC=`#d7|7#e(UbVJFKa99Q&z<5?SgGlfwc{bFGqxmvIa?w}Tt012f-x2s!*D>~whQPVFveX00;sKH>#09r>((ulbh<0%Ir9KCN&0 zCV?!b7{tdYl3;9#TS=u83il3%!SmESlx-7h2ez5QVPUwdR2w{ zbX~53wE5lu39Pp$$~&Rqcj+IzQu2t8!&q6qyuu#Le6ZaQI&&iqS7O5PmHA64c8RO4 z-dplkm6u!Tj}FJ**Y5lPr6dY6$D73861in|0`V+-;?v_m9sSF<5g~j#SaF)}3Iw;l zH>+4+DqVdtc7>nkhyG;SEhA_qH#^?8QIi7qV)3O;u3a;<>q`>~4hFKcesW^)IHpGG~E@?DQ+fp}NQFIsz$tHh+nx)Q9>~G)CmX*?1)@n5P0szGXWq*G<3r zC(cF_elkp)HqgMRzt0yNe(Jeya|tuhn6A8|zqEuo8Zx0tKMCgFYDH3xtwj?8t#`6v zdUclPTk>@?|JZ1(G$gD0e3>01U9h z@%?TZvVrQG*`USw(*-HIOc=&TI{&BGkDfo{|M;$yY^sEj&ZT3)oAG1Fmz7*ikstna z@*-KVpQwSSdaw-k6x5WGJgOA=^A7-Zy3om7&%*9ps)GOguVY}kQv(*&h>l#sbGXXsB?FKJF!o(90>?dQv1!SxH}l*1b;Bg zNKtkyV$*nq;h6k_+^fg`V5!GCs)??7x7&5QU-_4xm}D>aJ#k{x{S>Gix}OO1(Wcax zb8lu2pkA({aBdy39^dL68U|7Mp+6I)Qzd@a*z&ztGx$u#9vdZ-Yo*SX@_B;dImDxz4-c4 zM!}7rQ+0GJlX1&pKwUE8Hv8r&z~k%O?>HpLZ3L)Sq1)xChQ^V{c1z{s>=8M#2aC%yA3*L{B;UrPXz{Iz4ubi}6AM z(3m1wn>5os#7r};))h40a$qShgt_expZ>@ja@Y`QA~)DO;gmYK{ar?Pbc)O2vJi%v zD4k2anx>baOi@x{HC)#yVi(VMhp8TE%@0$`AQs}Z$pi1W;JhT#R=)EgZ7eDjk&N;v zp^D`GXPiY4)F^yeU>9XkEVmcOnB*-rAT`8&@+UleYbP&rU)mLjrl%r13n3E$4$!C} zk`w$u51t$5m2$KUt&3gds2|S4&*@5zFEP#^^5&_!y(4U2 zYv+cbgXRtPYF)O@+*#z8eW;Y=;M5~Z`au{Ylzx1aN(*LZdT~&j8 z>X^@0mC`p&`$r*g<6Z16amf6x$PH8m+FEf?Woh0QBEiFbfm6`qU=~|v zhaF<);F2SssE6Y^RG(PN(VlaJnX5@~V_}OUsRl(QoOhT)X2vh@`IhR#?n>jI; z!dF-=W*kEKX%RnI$drQLxu>;IlsUPtK25W~Ga1>di6c0G<7LAkgMYZb@gs1laS{Qw zvcrUt;51oYWBIa1XO1k7Bw#{3>Sl0Fx?@4S$dUc*x4yOZ$=seYz z*@5PQYWIQW?r1_QDuiKfmrV8_K~kQdit3&uWjF>q`Z~5ea)iqmz9?Y6T-tbw8V&c) zHI@q3^s9aO;iaI=z)GA%RZ${V3`0L`j9;YKYTa!rT6S}lg0LfeBO%+RRd$q&Hy3hk zX1GFM549d?oG3OBBxZ?diP?c9{%*%Vn^FYfX&T)10UN?a8zwS5+;#oyC%$xW`P3*# zB?kVAUAHfJk$I4}9WauH2t3c1{`J@;-SgI8Q&^8=BncX5e~-+=`1sAxwh&1BB1r)k z9Ma*BMV=6NJFLfKSS|Raa4;tdW0A+Hh?V>gYwvJ6V2F(8q!x)ZL+xyZPH6(&g-Zs3 zO6eL&lk&ga2Kvu|f`A3z>xhN^OT63n2@kS`4@Nw!HsHej{;(ijMC)MQ1o`20|L6Mr z7R#4zsYxNwixP3ruTvO#ZlgUOA$wK;%_Z<(H@A$U*fsoT;{abMp3(85|9KbzKYV1= zzwZ6>hL#9arO!^QkkOL5&V|^&)AOHsdT6jW0uedtm=HW#eC1F)=l{K@r4-x1{GZiy zNQUr?PVm=z$8Y~>LpCa_`2Wt4(mfI|JooGdm{OlhahERsZ&!b}i$K_qASxQcBx1Of z?0<&uPs?gP!Oc=WOi-t!FM`RcM-CD8zwT-&#W4W?)1=?a9E$x}WQ%RgD&%+fKZD?d z|C!YP8D-!>8Hhkv|K0h2zKg8VwqVDK1W=3km9+}-xupK@IJTT2O;P;)I}vN#&mvm0 z;##DCf6`KlYw+|xBl719iC9ZHQm?;g6u5+l4plV>?&RTBA^|i?s$-w)hX&*RJr%zB zC^a7Z>C{RyXd{OrWe)VJ{)oi?Je@-vVMEZSPrC0z$gNq%pMDWA!DEE~>1!~Ofb{1I z6WeP?be=t#em_aczq9cZ4mAnpHgR>}^4xWCV=3i&h0Z-93QqFh)Ae%-#VuBZeydq1 z#hji&^Y~!!NfD~06?^h3IUqaUArwR)%_aGIM{Q=^YDAY%>o||1#CB@Dy#apvN51^| z_k$FYfXruA%ue1 z@zw5>^*`PChKz7FO0_Wg@YuB4{S-Vk7BZQYG5%GoP7e|V>J!M{^$;(+|KhJN;Lqav zoQZY#%nwIjJ)BYC=IGatOGwpRmAy}*z`2?(d14r6JaVVEI|@8T+@JXhgj*PO(_d{& zYh0g{&TAH^)~0?z2i4!T!^|9M*jdh}`qzj6OT;$?*8s)=uZT@|W-$IpU(?|ITf2*| zvUgCz>R%)6^!T%S_h6R5-Y>-(RNcKbPOW%kbsz^N1g=eZ-~$JIL~i@x8={W%)Z2`) z7e*m*%ByASUhW>f+^#H22aYIl;9f|(Ad*bO;!(QB@ZWj$8$o(W@q4uGY!T8Kl-hbJ zSho7?!(h!*_cu4UmZXc72Zuh31w0^4q4V9jccPMjoc#Ma<3>Id>hu6_E(GixyD)os z-}fOF$-XF1aI2*iB& zgJ$!vKVL}jaJwZRSE$ZPX`>9YxJUbTd@uRl%Ndy%-wwkLqh^t4iG7*<$d~u<0MFGJ za#bdgWLS_TKG%@BT(vgK^->TyG{pTUD$LhUcHTk38c!bVE$d9e>S9u z82OSkLN=J@V#S~3)rDvP3)e+#TsBTQ8iuuuoX$mWbDprGR_b}2i)@1J9GkNuZ1fo$ z8R8)P?~h+y8e+3RCC6I2K?5ExgRThyvoB#dw2fFV@)zv4pa zDMCMRk}SPI5|IDg0mb9-jA?aX^~`I95_s0PcHJttZg&j?L6AFIxpIHUw_1z$R zgX{9o;3IoXphL56KC6O}Fy4h7x90Xd4PGB$Bh5v?U5ovd1|kU0u@5=?a6pIak1+I2 z53@T#`0aQko<}`O$x9=B7a?E&s4h!HOGI`TE%iMGu^ztZvPZ>lLR#)iKeAWcJksx_|M^{??2hCR z=KG*BJa=qq@L3j%4;-iZvl${_+zYF28SQydwEz2f5o>G%!6K@qJOt{Ot9`%==9XkJ zxTd=BNHk5B(4*})&|^?X@qX)E4-E%2uN_{c^V={O`@bG+=T-|IUJ)=>(mn>$VC zrKFuai$_>u#U2}qTMpwIW$u<=DqfyxsfDqTqJBVV_^;6av+3Ya;}wB({?aOGqy(j; z+D4A7__GuV*f!|q_?Q)cihU!=9c`!Qjpa(VYgf0?_guap4I3fQ2ggk)RJLnM>GCXL zd7OUme-xuX`>RsYmy=Tz=riH6*m|^7OB%}B23~J;z4zEVck^?;y&?T7jlrbDuTC{9 z=(PuX>j=ZxsOvxO8_z$ddL4LfdEMTxHB~1{&u);9Q1$Bz-8)0Rd!$!=@I2Xh`t3`6 z-yl;oWZ_-rs3OsI;Z22Sv4Vcxosx8Is4WxpHdKr6?ymYi2BPMDS~#0jY~1?MYi8x7 zbMrnnFrEl}Uq5iX08261JEuzXxr)+3I>V^7Ot+zRo9e3GLDKuL()76%cM0yF|F$>- zs#RSirw#>t6xGts9yK&$T2CTvMnW&euB;$THnCn`pJ6Cm@U^LcL!D=%UG58nzA)j> z9qqETw=_nQPBDrkg73%-F@BL02DO`+-qDt4yab`^kuWRz9A^+I4AB3$NMMpHpXfG@+& zM#{YcPd^Ycv&^}CnX}U~yF4#`heM1_)_m_FvY_I>-evJ*m1Un9OKKe1LOTzG^|wl& zfNk&}9pTSn@cni$n)Um4&XO&L*wYn^ZyujpF}#LfV7qKrbMV}xDKmHdXtfZ>@cwmE zY>bMMjevL?VFIgL5B^@3B1CFyfmu}H7eOIr&or3qUHY|C#IA5Ey{7lmd>Qswu^wK3 zFP7W|moMT$SE$==+c~&MU)C#S3L3;wqtmp5d#7)WsA;Q3&PjU5)MIVu*`>6_A8lvP z$+z8fbj^N;r4jvPm-zV6>Ugn=YHOZcl*X$>S&u|mq^=SbiXEW6V;Y7AVmy1w@?Y`( z*Kh%TqDG^ngJ6iwP~Jt++j1^%`gkgHW#LB^9gHO4*DPAM!*6e`CIF>NP~OSt1a1!256&1s9C0K zJzUgFXBMQHOjt24o%RFk!H)c4(D5%&tFFLnO6}gxE-uuuBOO?dbj$$FJ#0Y2dVPwG zPwGH_y8KYHYTC?h;g*uVUl$LqKJlO2GyheUb}0 z^kP?)SeM6a9{vjJfBO>|1lRXpJI8nPL89>veWIlHb0;q`m5e>zdP(nhw=g2WphVJC zSDfky$O6xGDBEUK+VS=U{CeltcGq_kVx5f*n%a7&wVut!QAOxyr>g#D`LaS#m4get z7sdQ^QM#je)3UlO3F&eARB5XGn+x~l0*z|cOju}r|UO>@dRJO2E5b7vDZF^wfAS8MOfeK@BYlKg(x08PYLcLZ}}ym;N%`*-_Qw4x*DE`m)U`%AG86i&pK9eoFTL*JSzc=_v98 zi9s6h5~7mP{EwRaM^K80k+ldtEMpO|2fH;>i1$3@UmNzb5Q>{;O6X7&fxDe33?FhJ zyxbQE04E%OC2}!Fl&|MRat(I0a@!wbb78z+l_T=RMgD*6y=71wThuii1_%~h6Wrb1 z-Gc^q3BfJ6dvFL&La-2m!{F}j?(Q1g^=p#c``qXESM}EWhhP|6Ml($sT61p8IhtvETZ}9MW1d^m5!_@tzR}KHlchcZUBtUXR~`@YsLD zTHkt<83(^}8Ta$^U@Pvtb?ez)R+6JzGtU`0#%d&jGe^g`-m}8dlA>ADZ+mBE|8La) z|3v(lb?GDmcLJD4A0U_%(c}9*tV-wM<+} z2;t)oFZgukajX6}e~O7Bl|14laomEijNEXr>d-+V+G&9(>DxO! zCy;BY?xse2SPSD?_HJhkt$ErvtSYP!K784+XM>+_?xQ6D{)yd3VLCrYVKOh@Xgzg^ zt>F4$z3*|@YRsl1g||t1uq^V^otgC5Lqv5X1<(t~z z;oB~*VQz*i*Jw%;%n11*x*EGVh2UcR)4nP=dIFv8#HTlr!(u#WxeO56T3YDZHhlra zoQ(@S_r=l3$`OTcgtz|C6hsg%6ew&5RbEYA!D1mvzaSd6k@+2O^A5+BUot9+B=b4V zB}UJ2oIKrc>Lb%vX>G7GhA&(#h!^oZxGPM1Asp_0UmU=l*X*RpT9v@ZwufIgVdJ(F zVCz~AKl2Fuxei(76+WVO@3p;6Ah3_S&Ijz?Upsi$-@nH`x3}rkKA0n^?`gOUwmiap z!}=IwyKb8AgU1b*F6u}&q6lYjlXQ*IL}&C`%24_GAJULb8I>QrsQ8^gL{{ zA*w!col03m{Q~yFUAkc_qA3uKaa;6zk?3fAlcZ5(csN3M>Gw}s$T)ZHyx+k%}_%I~!6OZ_J@K$!_E)B-^VF1vImlM859o1GZ5h<_r3Docu2>$?UxlO z7O03^7w5d%1kKK{KUKZ=XVrqEUHY05zaY=WE8#N&`jJeVENwna%X-2id>5e8m~6 za0(nzLfPE34cb*D^s;0EnHVKnuh6Pj9Ku-!-oDtJ^J`Q)j zhAZup!izikO&dLq!qw@ruJec-vR|he);XZtZ(rG zBEt z3ZyIZ>*rMhx-hODb2Z$0zrX(cN83C6acYti+>1`z<8l%lc(0STR06&a6`Wh`5L8gXD_W$WCt`R?-+8-?wo8 zd5xfKy;#zA^+~un`~K*!H}y_uv1T42epr9Og7PZW@uK2Ch{{R2#pr}UHR+<<1}&Eb zLR;yHX4~m3Vpg`<{G*xY5v%eNj)(GAw5=1c^H{);<9o#$JR(w-wPzyeiQD%IYf0ZV zdeuv9xn!XM(Ff7ViEu$HGCiB)qa(o#f(m$kH{_dJ3>9nts5}V=xKJSClzc{j* z*!K!&$<(!E(lp0vX(`mF-;s8@2U)a1f<<)NmOyCSd)Z}W8U#N%Tv*0vMPT7wVa90Ii$`C`e^wRut<=J?7l$;ZH#cHTwXuS}cu zX-5vmq1LZ~))aD{#P6ll%9nZhIe>K!iGgL*s4dB^m8`K6=oo!oVuZZVYR<27MYpez zY?acz4Cd41T8u7E^L;?wO9A)z1-cfi_6PhQ{*hS}x&3avo5lHW_P_OcXnJrjsAbU6 z(H$&jX$rajLQG%>4#}nlO3;*4OI*^ZwbLO}6!GP>k-K@3whl%|6CDVLMMRn>usD+kU0aGimaCOH5aCHVo)+*{Y_)CMl zGBZU-QY|gAw0PKC$Xg-$N9BEk+kz`3C%-ay- z3u9YW*E{OnD5?9TVjCs}@n0)oZiUcu?n^zcm2O+#%fh2y$+Rv-8o|9v0BbLOoT6Ko zYAJ%#$TGUBDGLC?rz>Ym)v~CbHnWr-IH@Z##VM^as~LWe!Dm+0mBr8RmnfFU={2|W z^SRrxLxkwUxm=*t{Gc3SFcP=biy{bO8!-e;qK0*Yr^O;qX2}#**De4jLiC0~@KyX@%?C-XRfbNGzPULymdMXFBhMFWa{}3g~?G2J2s8 z_NR|l_ecwxBMkh^XGBFFOrRK7o4fnICsZ49*A)Aqyu1>j@vw(mb85uQ{Im@|6?;A0 z%_I2QaVMDK9H*7^wh|GIy`Os^D438xfC~Gw&bMyETF-Oh zI*ve4Haq0K_f_oWpu-e|o>Yb(O2}6}+bY`ny(&<40nxw%Blp&<8w`&Cvx`194`9(S zyLQNMIpPStzV{v8o;^;dOeY35$oM19xzv}|Qj=;cW>{GvS3*Qm)~&s3z#WmAQGJ#~ zm*{!I*}0VcCHaZK`Q_Izq3qvl1D9_?HQJ5D3ibTC(xV>8Kb{WP+3QkNgkIwd>KyT# zom@`35pTS)Kf{4l>+WZw_H^ z__9OZhE-5!OA`}4h@L(~3HK=a<~D3|cqgTkQSq(*{arLl`ek?cVR$~If7(+oHp1nn zK!i7LCD+cp(34btj&VYt#q`dH3(R@VPZyNgAES&^*F|ub=tat>TCx!$Pa<9A)**Nopj+{k%5>FdgT&Onsx#~|d!wIbG1-U~z zoPC7Idvu$HdPaLg*K57X(yDOR6^E)82L^_PhUMO=49dwR-JiVGvjw(@Ar#0BoTmMv zaq-^e$o#s$$dem#cN>3wH9YGv)-8+Va%Uy3Z2EUV41&p(pEg11!{w`j!(C6WZX)te zBTmw~?%o_(H#9F^*kWIX|30epqe*Q3Hl{lk;Hc~33{ zWX*P=fs2>LNXBp{t1vLIc!#LwV|FE}gA}#2KA+EDU+rfA(uma9_%=cz$Z5tX<{P;_ z+$|-I;M%=Ho#Ig(ECkAGNB7WYQO@eJtImFH|8RPQn3ur<9|nD7KVtcWW-L-0GOwH$ zbdEx&axSH9*@@Si)C!qhgqasOWG=N|5ipAP!Z_B@V4FG_EqCWf!B}5k;L&}bnSnq3 zK}9c|f0xeQV4hbR5b)F@`)hJ0><1OU^zvP*tKX#?vEL^V-yBzL!8|1|l1q%`sk&fB zPM5(YuhXTC`_Eb>-!SV4AufB?VWAv%HtdbJe5G5^VOJV|%Wjb-4RKqC+r-?Fn}#3p z0Q?&nCG=S3L$-Rj6x(Q)_}-gWE>^!uCM@<+cA&ogLfMqnz}iUzkE%~9v7xdVF7X9+ zenZHoN|V+lIdXq;`7oPD@hBk_*);c3D%2OVQ6HBEfBB4W73ovW_@E+P_3SKGKsKNy zopfT*OKe%|DWj<;yDNq#epznPK!Z6!e<(M}+2;z^|9tIC?Ttx2%CeR=3?(~Fuf~|F z7yDC`1P;AUdar){$MML}8qOv)F`LU^$=XgCw%1RzsQdVC@$i72542+!Tip+|ub(^! zm=o$w_vm&VFuMXI?r)=gDJAYF{Va6CSl-D*;^Un@-ev!I`GeGV^Y(amFZo452|MVu z!%sau%LLU-5=+cewmQ0TZ-J5WQN%DvpwOfuUctoE`8bRVpvErA4Ccme>?0}0N41&j zv|a=Vu79`ybjgiJDA%y+J3&&HUzs`Q1ud(-7}U#ymd+(6_7-CH{pl7tm?S0;H=hXx|s zv31kXEZOvnTw^Ti=mz3vHHi^gRlhi)S|>FpzD_42hW)Qx!yl{ zo%<`&EtV)j(usixL|XQW*YQ~Si{<0B4h*3YUb*0N-T^4)FIZxRXy`r>DB)5MP}uyd zRqdq+DH%#552S^m(3j%zdW;svR^%@jP_tiF3@kS(Bk5t&o3kEHW5rY9I*tY$M|~8h zhH?64o1O_0Mjxt9dN}@8CL3c7eWN`71n*o-P5sr*6q(yq?#BndI~f{96SFeShU zROGOKa&yU_++3NDfU!eJZXYcMKL)C)S$*$vf0kWK41lo?0E0j_yC8XIi zj(fNxs-%+(2OcDldb#EXFZD$3xdr zC`pyMmvks8KGtz%NWqILYFwtXoJReF+V@^A8( zWJF_W`<+BphS2P13O&+&%kyVYTt;lqMW*Ng;+P*b!B51`@wo-hoI0FSk3T+0&RrSM zF`&C%YM{nVU)NQw@u)T}ObSY09ubCPV1=kV;-2h#M1^$HfS1}IW7amB3)3GJLW2o+ zFv9gST8Mq!S$td(t0nQEx=1r=mS?6jWV2JS2B_yu{EI>On0Zr6A0eSYjtNgcHkI<%!wKwOfIHX`e_hu5P$ZNPouM~BT58~Cn>Q8Pn%GLGFI;_Q zJ^b}|Z^>XUD%y3UNHd1V0EVQ(WdSrhe!IMq4Q#@5zA{@qatztn z#E=z@c%LraLJmbSxF|S6j#!HN8^il)5r3{eVe|{qi=?Hc!3fC&%Ln?ZCb(O1zmBax zh4E7ixyz5wQzDq;j9@gqsW$4DDN@3mq<`N(h;cCZ+H*7j?O}?;oQG5iG+e8y{to4; z29A3?YBx6=| z^MC>xWh!;my-rdT#aTBeRDavJKvRj@+~DmdzlT?6|Bhw0vYH5pfy^K>%xSmpLp3JV z`nxlox~avW;bgfOp#U-i+9k-_2zh72<8K6+?syb>Fcai7J=$9eJ5=>)yVUjRzo=yi zcSstIAyvX2U_k5n!S#C;h;jA(S=$`w*H878jr;+10=)S_R^H!(54?(#3f0{Nn)6Zv zpk2|-;&FyDws4mY~sG{`fk;`FIstj3RCzF^iSC|ZVBvP=d ziI#6i`{fi@*SmIC-y@q;5nq&#*XQl2iwk!l=)5{Wy5WwrQ^7-nn}`tdYF_?MG3>BA%LL?>se_YJ41n3t zMeU)ciX|aeb~gX8>|yU(D)i;El45?ZI6AMT@K|?0$&y-lMbvr1(a{c(b(SZ2$1=_G z(vlt#0|_Elay9KJs`s7Q-PlP=LXGP$U#e8~2_IBHy^mTs249?vwas6zl0pbvM$@ZS z7W1-?X2>sfzLHjLoYErI)UMwoOVw8-*Gzw{cn z$RhZcf;<#!30SgyQ?IJOc+6sB7a^OV#VOXp5aTXC5tz zuCZpXLy8`YWOdEOS*qETLt4*I4j}=((qQK8a(mtJ)|dLtp9;>>-kHk$N*ObVyKla) zV_C^0B_bCm1ZlH4%55I+=cCl;fL&-75Aiaxf=80WA63rwm{)egPmYRctOy&!*3QfrQ;B+N;rC~X z>YA4ZNoX*cXg39WH0!F9?#X~xJmUmyg?cFxc;X`0g#^(?KK1@ykv+#hEijdDiO%yW z_Z}J(nN9O$hd)s#B3XPPD*#w!vv?@)`S+aiHTH_=O1G(nf{O0ZINgo74~_Joq%YcJ zd9y)fl-&CFUUXk&(*h>ntLbG%u_hrG+mM2*IzC!ysJ|(Yh|^0$R&sPvPLoGE;XNJ2 zN_Pe};l1^L1<@oqro_P83_B`gHK|vdMDm`2^I|8k&lO+SpIMc-g^z3`8J2D-MgYtl z8EkB>0fk0!o;Cw^*YUQ6e5vEg1xQ0lEo(pSb>0F@=QI5caYgZ)k$r@ez;`}MoAN%2 z@{}!ksEf{%NL%xIdmUBD)F0>j4s;$dhkt+-ONH#BR*m9H;}d)CY7}rF*E_vhh7J3} zWc{sc5*kfpqH8A{D$koc{jN`UJ9kCFQRnX@xoR=6W`A@-TFTaXZJu{Z+vBUgW=swJ z*8l;ezZWqqTNms;fy*a{Wf=~(%Q#%;UG)F(=!&~|E2i1F4UET2ZFdMq%nid9o1Lx^ z2>`U!tt*=a!t#28FR9o`uxq<(jcf%KYRn;wjC9(@Is{_)fe~6SG^LVHI=+~aD}HM@ zRQLVKgYHI9xbfyEVI?$z74mU!?%XpG9i3YtY|RHV&el~np0exMSnKLsyg7EUCcJsQ z+sZ0N-0FF~unjD2-mbvg9#hULS_Y?zYP*8PIrRR~X}LWLT*9NwJLuKZ*_%smJl*^~ zW+3tRJZ^g=d?eJLu2O+cIH5`S@;pM(WG(L+_)kk6Mx5Th@Nf>&J3FWXDR^@Z)v?7iyto`~kplJfsEz-?tU-5(Av~Md_NC zz9dsd^};BhZ4S+PoVRj;H9FvCslr4pKhVjc;Q<>`sCz)m<`WwkmGm_BIwMHKru5v+ z*#=ki%mOLor4wwdT<5RVz+X0l4<=KBy|*a%A>VSG;iue`iVN#E#}~`In;T2Gig0fb zR7kIU8Wq48I*W<)Bvn~P`GiXN09Z{W2S{8RvXK|)IT2Q+H4x4zx;A7`-}SXwi(*~{ zwf~OH=lC>RfdXjP4eBLC&kv<{k)K-TeF&!dzwz=}2r}Q@b5`%$r36p!GV48he?v;Y z=u^W_JF^XQbhT>KxxPfIeO%Wab2@$LrxDVDKK~Lf(WUuD(!avfu1F7*rfpE&!`5Fc zc+IOc$6qF2d~cBBsC@Ct&8?uAu)m0tpeIJ|gnLZyVly7Qs;76@Hc_AB=kB?ND&ZUx z(p0#p{vwT6+5%Pe&|J~o_+iY&>Fz06s!4CuKvv&kLC3p_afd#M)b82r`E^`J?hV@g z;s>)Fm87I{4xE2m5dPzRyO4g7<62+l1%Lv{i_n>M85Apk%gMNeHF)aUzlGUSr-%CldhRm7=tK zw5;i3A$ZmaS}LL5^xt_J+Kcu_5*u>V~H zW!j==D(y-cBAOjF1jlc80)faglS26n{zVCJga6fafzwmehSE_~0EX)COs{*-Unn`V zs?K2B2!(~?G>f$VTV;NpM!EdIdqRK?lV6is&Fu6`G4g(8-h_UV)xg#9-<~)es}i-) z{TZ%e(v>REX7aARQ~tzS|Db<;{Uh_mT4H}Oe{FBqD5aQ9CeVcS$IuV_zDj{~Rap0m zyy93|F1}+8rLXmUk@D~K1>V?+f~In3_=|xpJmBL`^4u6S_oMg=3EnJu&Tqt&rtJ1S zo8B|*had#h1*Sjo!yE&Q=3IggQEh(FGxcAePha&Pn?7)gs^q|OSRm84cv|}3J^?w< z?;>4O?!$nd$Dftt(!yBo_c#heaj5`npcdsy71r~B4HRFDmWe#ynE(0ldnxRj3YftE ze3C2pJT`UjP0C+q0_UA|D^}xJR1YDN{x=T!LS6)G4L=uU!=fMeZeD}`+i~TW1$T^b zl=X$_6Vw|n2 zenkq~{5M|zee;GI0_iWwJM^rf{I6tIaz@$z^53sYWxRrL5$LRctN!}ms{*bKXi`&( z+payg!U5JA{+%zso7K`}eMx~u;Mnf@9%>w{!gAfalc7A!eKo(B;Y;~9iheTz0CHcV zULrT%{7*mQzs1pKp)B$iKOoE}u>1~!SpS|LQ2RbZz$yZw{eMsgFdF&K=t`Bm)6+)W zH}&7o&Ca1%w%$7ZsS!ACDF-3`0#6}msN{{;P*_J&ggdWEH>nn5o<%nF#ZvtP^naWN zzT2pFfV3(gf5Jdh2A3+yymGenZTolYgPDOt%h{vKdI&&XSp@hFs*(MJ6;?(aT7E4< zR-QB!4Du8zl``ng9#nx$JT1~>rvSk_cCWopc?uH?Q)}mUs6K})(4}$Vka58K+OYe| z+4>PeP2UO8im(lc1O$P3>vO0HQ;FUrrHffZd&i)lJ=Vw}>lu1c-;n}+ZC3WcUKvBDFnxAGcsC~6+f3ettvi1z?2F%_wV1@eyS^MmdJ6PHVM|K zmP8HlwjQkL;xf3nA*u}x6`0ra0X+7j)0a;VKgNf(MzV+9njH_ln(Ptk&iT`$fY!`E zuRTko`?-snvq!Goit@+TUccb9^*svYoZE=gJuj*mI^?p|TV3U!NVSbU6_Pk9fS~F| zjLKupR^zu~ZrhkTsZRS=X)lVQgnl-Yn_9fsb4TD;|wzF{}Cj`nT}y%LG?#*jJwVgv0iJ=Td!nFZF&n|ASkoGz3S= zpmMsG@%*24i)=;e#ZmVC2F6o-!V|~*P1n=K?OnvGskO>cNkfC@-EwVE&~^8!vGy(RUdK?Bc*k$Lr3wKNacyN;y2F0Y$8J!{O@h@S=K zqmHO=5imnkgkBsKL6$e|G_0-B%FEAznMagz#h4qmSL%U16Ti6E(#|_*BlOkz8g%pq zqz@lh?OmOn#O#bH{?d{Vhx^mY_1w+{G;|FE0l>q?G}2I2na;8u9u>U(G(tv6*`1%{ ziH*gf5fhu^x-Z!$TZEdcCk}`1u2854cQp{nXl;eQ zj;zOB*Mg6n`g;4ywl+36Tw}%}LIr@azJSYZK61*eyyA1!8EO?3=E-0{p-A}@Bn@tE zcJ&xdRwiEk(~stxg4%LNJ+rKT+|=gL@5m^)YPT{KOmWdW)K|5&xn&tM)^HSIdpeap z{H`Ho^H2XO^kZOf zupA2uODoK(s!V)AfDi1|V^n=a#w!J~kvDi)A9i%E_!a>VH`?|i#U*9&kB^9oLP+7< z1L26Voy5gMrz6U|#BpG#ml~D&TOC5VvSO)oVSalP+Bt2c&`AxaG= z(z*ffpDsJVlz~D$IKyJ^to2&QI(pDlcB7bObAxJWV@>x3W zJ^ciSv(v(xo9nEirXng_A~EMLK!WpIWNi-VYz5)GygY`#yy~X^w>MV`)cW}7fA!_i zQ`qm~_-U)Er*EiqYZIHwPx+|6I8MjDSC6Yafp(7B8nEGL&I4DIPvSA&MG7)GL1TvW ze+~&q;uaeq0l2b32om0**$RO>f58q9_Nw}TF|`%v!k~^kez0S3njb#;A!e5xP{P%q zL!{h#G18Q2#$qvsay-DFp0QS+|8>x1w4&L0rkgy~YZ1Qs0z)f9uUt12HbyUhVgptf zmF}J-WcdE*TRLOK5gzwm;$N)BW&nb|xk>crtKpLs7S8lup0~P1u71g!|C_Dk2%((h z;+-8)I8kffkF6zTSm49=3HE7WvVZ=sG?XpRqT!)<%ZZm7*whF1QCL9zVK7Bsapfbe zjC!m+FyVno#NhJB_VTA97&(T;z`&w1ENfBLPze;AxsHxOJVJGsiRK>C0ve>>mm47D z_{UG6l}p<)mT1`oP5wPq-+j1v_w?+XJCSdJQeX5x=R9uFPaFw0!0gwVK0`34#ZSf$ ze^i~xJ*}(n-$}ad&(i)##-pe!zI-u&D3ytUktaA22BwUbdoy?E{hQZY4@FNh7N7uu zzllL}kSovC$iP$mg~0$IY1ls3+Xtu7QI%m}&OsdszMB3}pq(0?;~Kz{dWUvAlGeH4 zT`#8NnmHe_#7iRLX>8oNw!hEe^>GR{e3B-S1x7h@S*|-eDdX`W6_b0bW+hG@ZQ^LW zP;+um+3VH8BJjYpMio$3dBc#jD#f`2%}2K0ba$n9GZM$ycF_+X0s%7Z$HbtX%E$V-kAp zx09g|0bE!v^9MA_yk2B`r-TnYd1lCeCE@B!>#rRw{Umw zhWD^4QXP$e$ulee-4a9fM^nv2E)ha4yWdPVVbV-*+~o{5hd+ z#-`6hg7+EXmHCNzk)Gba=A`>6)8lvcOG=cT_|=Nz324>g(UemJuQSA!U9+w`I#e_y zcRmx>6^*n8&aafhtWb-5@$p)eNs!nVf6@$Jyu$55b+^{N(xy7< zYwpKNe1W^Sa}D=p8u{P}2BQtBT^64vZ=^*mhWKP{n>;Z7Q7+YPlVhIa?ede%sW6yx zxLes%NMyyndzS}wd2>6x^9_zhEFmX~V$75(KW*TNrvGx@RQ@vTbWnMoi_$)}KF0l{ zNJB|D2VbSUc1+R4$juUCP{1?l=A>*n{2RzZ;!yAi;Xt=nEQiul(oKE!_mACQ69J=G zfpJk5&n8eT;Z0J0SDUU|4>xpbVJf*p_qD@ewoplRspxf@-sM1Qy8L>%xEh-m*u0R+ z3q6$Z;1^_g9fFGHG^xsNf%X0EaW7>!J*T>Ie2bP6xtV0W!*CImc3wqGlUFA#pH3O4 z)#tW~3T1N>NV(!hwXe%Q%~+#hW##0?vn z^N2zYYDxvHvX?o=*ZfAaTP(CIKCn^}6b( z^H+s$A!NHOwrQ94ySMu#{?2tz57mdF1$pnXgV7|xfiuEr6|%%fxuRA8WC$9F4xnZtRS@_qybNCDqH!s8+85Dv?8i>WHJ`T~c@yRNh|eptebhAGtS!~fk3))Byo>K* zCZ9|T=Idw>YRC%En656)L4e zl}!SDesR9WxRLz23p~<=fxP_*Jjth4B&c^lW5afR+(~tTdXQ{Hk-Zo* zoMhDS71wkABDU-phc!0WbXUBkp8O^6tQ}OiZknEHF3cStd<;o=VTjDT5Ww!5S(Gmj z7uMzHuB6_RmlNjl{)bRcfDpomDt{1l-{7QI+gS&zFUr*EZcb&Vfv|F{{Mpf*DtJKy zrcdW@6b7G|Z*sXo!o5G!s89EtSy`aGM0pZDJqorzeGIXD z8kMX|PnJZ9WFj2HrbjCYfraBa>4@E&%kAfSI$VU_L|uBCcz0Yu`&KSNPBggh69nWB z8c+&@$`*)Ls2K_0k&ce_BC@@cPl3+5OQ-DmT59B^TXw#B8S8#pYmofjrgaWUEu#j; z_y>FK)*F7)76=FEGyJpf6J=)M;>RAr+!1wG0=Kyzd_7UFPUB~yOe1Px5tJw9)dt_) zd#}2@Fu(Q^!WEEN2_DfNy%%#^XX3a&^dp7RM%Pb6_H$|eDhLxsFLz-Y)Td*2oKctf z$bEc&Rwi#h_cXwojoueHmtPxn6JKqcZdHG-+Ez!bs7^FPd#os4hH^V*yNI}=t! zKG;W$xfx4nsd7M&#HO+k@`qF_3cHgUMuY=qynVYgrMWD-BQi-#7wgbxNX7==j23t} zP^iL*KtTOrovibceJ~aR(N{4&`=AyPxd|T>Y9d(jp>KI2lHLZD$&b_eWj=iv-1noB zMo{t}64=H&F&`5PPed8#NOS&bg~KKlf%y%UKrYiO zAMqUG`^&ZO649S1Gi3_fW@0}K#*VNKPuRna=kyoXA0;;MUoNYLh-$zWc1k6PM)w=V zoC`k1Po#S*GfR3qN8wW9#+xOUyOpm9(ka<~e7FFWg4*Va;n3eNZ}duysfD0OIST0# zSsW<3T?yX&fX9D$mtg9zj2Q{JYX=<~3i`ksPQPO?3DZjb*ohrOheEmzj(&f(72D)m zO6GG{bdbs>Jy1hrvloGctub_)dtY1cY~IQZRv=8RJG-1W@p^g$S#nZ8GM zadD81l-0wStUGddT7kM{GmtiQVKw)%bv429^#G~bB=HLi#7k`3{K1s^piVJ;8y zyaq-inx7JN_gr31rSd4l)quIIKVD4ogj`3;h)HPj(WcL6;722fIX(@gLdywXON z4NzD7yy61qaK2JdA|osVUj`q@%7+~io4lR!5;+ku=Nk!%q|-~MyuEuB&5I)h&vQ5d zJTX@dZsTF814MXY?hJKCYhw6OVCx!2XxUA}+6dQpX~pKFW6KQ;+V}vCuwkMF!f=Pn zS;g1r%Tq;8aVWWlnD9wFwrN#ACHzqhPHy~}QcckT4)5aWeirh7;*|)XKgHzI)GbZh z@pSJ-nWwU{a6~}~=u~|@D6nr(Qa6=D<8U1nt0(l}#PcGq-w zZrv*$YWP}apqzR+VwRUr2O*{hvh?M}8%yqv)?0J|ypG&LpZ;aS5}IKsteAvINNdB_ ztc5tlc*GIgc@Hc}_g;5USl__j9m*CVjwWUCQ1)dUpx{ZqmKq`zzqnP)G{jSt4Hosn zk>1(;DL5!1D6l@6{HlZ(%(6Rz(p;ZYI+`3Y(=KeVY$#5C2OXa=nc)CRY9yEG7R^}x z8d1i&%L{*l+qR&xZ9kYOv|Rjs=~O{|7-AsJos%;u!Y9l`dGVBr;oQC7W7C;v2$l++ zcO*Axi}txOnPqz)V(<;NJ>h)5UpAtXR!jjlgIUJT=HmWLnS`i-r(H5ijJb8p`CMP{ zDiv_gGZ-K=m)(T&eURqe6UIr0xbA)dfc#e0?y_3~8P`JqD*Ky46Jwph$W9ujhHz;} z{d~Bk4V&&+C8RoMRWY_K5&OVDY@rg@__=)eQ(rQf&sikH!E#=l!9a2fm2tA^QS46g<-mVslTat*TW_>3W@< zT6pQVl54@~X(S>Dn@wMzfLaI4o!Kuxk1I^8d7;=6S&6Q^qzuuZ%1Sz0|A`iyUETv) z?R|sdtL?`HYdQXnUi$|Vxp=p#qhAf&zVwDUyPOy2y9ZgO@%Wu%m$-vTCN%7HSVW*I4XtYqqsX| ze8PU=$rp$0@7C(@m+(U%f9TtUrAWYkw8a@B&5D0m#kP;F&XUw=lN z5KpwUq4(!k=6*-}SY@lOj(Pw&vr)JIV-kT8whL{cxmn1-T+NF4OYO((q2NX)IS(EY zU9AF^8En?`2v!v-GP#R>J6|WA2(*t{e3!{B?DbcAyn?CamkgMZS!HT1=x#*Ltt_uR zf=OQap~zOr2C|U%5G`>XROfbXE&EhUQkP<@dBIjZZJ@WFwGw=VO{%%~^a}SPt8_|5 z3N_eexYQ7PEtZL#DTCATkm<4Z+z!`l5B^xAjSG6$DKjB{Def~}@%Gz|Q!(~bs(n-j@YJXYtz7Ng49j!;nPywZ-0 zx`V|LY4nb#21_JAZ>c0L$?-i2LxUn%ow6;L)pGESb!Zqj@=vaoUfAZIcd0jdLHeq& z^R!e7Yx?fegvyYzy+61e&Zl0@NTM;Uwj;}4j*IDqB>q}*ldQ}4`Rq&1OeZ{e7T}tH zrgGwcIWXouK&ug@R%(_|rrPm?id@_$Q5+yPgc^o*s@^hjsJtR36xd?&JmdBC;29=o z3e9o#IBehUSxkT7A@z4BoM0<(z@kD7F(mE5U-L9{e%K0$`z&)dI`$~PHgvbLy~1BE~M~!bd$z9=uY-o@m-omyWR6_czWZuo&pv%HlzfrC+Z<>lH^%q=zcJ9nZaAR;yM}akX9pP5Hp?pAetMj zPFEkYII&2(3!m#tk~dXQFE~SnYa|Isf06^mG?ug7&fAFu^*u73W1SPa(U*Zrcpykr z^s_=9*#O`v6yh@MY7sCV%-@v5F}yml0CS3 z@y@W$uq%U7UD6EF)dqwI`0wR2mS4-LbP@!U`#0^u1lkTT#s`qIAlbJaktS-ki;RQ2 zbVLc5I>v1HB#$M1P#B@1l~Zp*8<~l_TI(?VDYqbzf<~RVYoU;F6wwe6G&JY^Ia)V@djZ;2eEEulsP9AIRs9FY$aHP=Wq^52J_5 zp~Sz0FCf#ez(N2~;}&)m6TBhsaxKzWY^&eOqIQlD+(HzQT?Cu+@K{gOV3_f6hjppb zvC76ZB1ZV>J5*?ZZG&GSn%k&&Y`Jxc!Ele$!)*dxC_9X+kdVl1O^PiR-gkddS4Ecv zInLcvu1uG+S<`-il)(meb$;_ro~|iFa9@E(dPE>`(S~mm!|PhgzU;8T_%&ULgzG*u z_70nd+m{9uZ4P8Y25pH5lo08i=)-Wdw`r?@hEmg+r-pp<#Wf;? zYXJe?5idd>Xr~Jr3@RS>3QQW;pqNoMTdA4|;JQ4|+O zLmDe$eP`KvO?PKUgxqJqz_qn(zv%Q;Zq{`GBK7HZAHQ7P9eFAx7%|Qopfe0YOpd53 z*5@ou`fIlnJB$yryhGw z`+&^b4d3!I0-R(Rv~Ex5`09$CAb1UK%fR+)@XQ(x#|h>QR|o8H^SHXc^p#0Jl&pP> zpve-S>zp54V13(~sJP>gx*fMtiv%1$b@9n-7$+f>@w39N%85cEy4MaIwgUM~z%$u8 zQz8sNmP)V=6vfK={G1V^coRma*myi-t1|HsZ}_$9o-UH~Y>qI|z89$zAE$V_VyS*f z_aJbWN{TH0X<#{LF6C|Vb_`R2$?<+Mr1nA$71(v1@7h504b7caFx1N`Y`8R9-N?@P_ByEP&b(Y3;XQ(V^vht5 zPB1Rufk<1^g`lOFKju&HU zHfKB+9cCBT071;fqHRTVz(=sk>;9jm*)I|g2xXs+k&!j(kVju%jL*UeMJzk_lDoUr z@PAVOPc`=2(PF3Y?AgDrW%~|*Ye}*HY=kWH#|AdW{OZ4g*8vY4+ne9rUWa|HHAT?b zw8Y@&_T0Zexya9l6UZ%>6U|snTeFrBN{4zQ08s*L=RwFU;;GFZMT+Au&!^P(3Ca2z zxvLkJPOZQ-CQBj}e2aMRW{l_&?0tI!Vjuzh;tnl#oF(}sq#07WK{|$b58k)W^Q`x`7K^`_VR7bgo$K1)y+7OjKGPQ= zVPM?a2mSYa>wWF}7U)s5tmggwMSUZK78p|vpJN#Vqj8y0;86>ITBRU7NSX;J?8lPZ zvC4O1LrdVOK4E`%jdb=d3T`p=ncNAZr!er>y@G2Im-*`Z&vf^AB_SOUH$ z4e>oXtdz5;9BF4q83Ufoh@^XxH-{$GeB9Si>f| z(z4og39;0{r3w*ScB}Yd<00BXs+kNDM830+!LQf~JKaB?^TMK7bpLhd)MS`{Mg!8a zf3G=-_a5eVmP&Z|!98Ka2Muz-1Z9RoNoPw8J8_z5lD&{6HUaGNv!T(Hu+qKxMWxZ< z>0XRa6Qw6bUbo4yvF|~oG2^NL5mTve?BxaQ{6>*J8y*!nGJonokW#6oAO! zSAXR72wZXxY#~IxeR`q#X}huA^7WoF8T{S5`>V!>Bk6>_hs5*t zIg{m6Z*aX{5r5BE;qF*{n&%z41ZL0Sv#Qr;%QXl-q zzDbtUT|ZqZIt^`fSJkJu?nM01y1J)9iJf3NYy{6Jh2%jjjnB!KA*GAq-_0M%MET3b zG|<@it*xmXYhCUgESBcBYGyww92!cp57s^IkawK18Jaqd@g=dQxy|Z&VhqD$x7!8! zXL}y)xzwbw%S6--u+0WJIoDs;n$z$Pl3I29<4J83d;T=syIrutwA(KUnz2LSC_gn9 zFboh4BPiGxTp9O-q6tjtFOm~$iD^a2O2DQG0vSPH0uIjJ9I28fTfMgb`n1|xK;?gM z|G!${?lfvJf$)Q}EX8EKQr}S&5T{86a=H&O|fF<|4gZvssXYHLk9|@rT z!U+??jvMw#{g$LgJXLphhZ81WTXoO+0k3L{UW9%b9#$O~ml0V0!&lJXT?_&-P`O7G zxY9E9e7xrl#i&MYxvxcC$DGTH0Mp|xIf3-%Cg0sPpHW~#xoM-yNJxd;lQ8id zrMMRAnO;2Pr3otscb$p2fQFr0^94@VLg9UWsC>&~CKqX*KI^uVSpIIa{+`_W=ON9> z>rD>>gYKbKE^uXCMe`!k4h?`twC`A&Vd z4}F;08+^Yzkt-kku;5_HJEv{tg<0r7B0KT`8?iNA4;8$?U!YaAHN%HYk?1<7vhY62 zi@bEB7EJLc2~EiR`-=*CU}Op5aH%A4-6mX;iw(RmF5lx}5l;E&{OBS1u<}S@cBm1o zdPtL_WLs+<#YtJ4#d|77hx$9hR!pqYv>zSf+5z9_2}&-1KLV~4HSwo3yyn^tDz)o+ zQBiQW_FH#&gkxX6Gm!Yp3dDFl(PVHYirfM7HXMHn`SB~J5=Z`a+{1P{2+6NA*J&~H zm^Fb3FZ)o*hlH#%04*>twGI>Ihj;`ouL-Z=? zif{%Lw+3lY9m*|B`7sz%Zo5QeKO^4f)=%;P*ZEPC&#hk68-z<*FCnRrgXTka2b0GN z9PIv98YTN=mCY|q19XT}Xuu=;Q259^{sz`i1 zYB4S@gJSY3)w1}zd*~r~tp+@997s=2C`c2X7FkINgR*?_%xpzxMZ~^SW@|9+cM4Bv zkE#1WrM`f`E~i^f#;{JEENr7=T)Z4# z4g(S?=HAD>=%CFhDe%Sdl%(dTHBoB?eL!wiDFlMAGu=kQXE zv~|QwPm2Yq&hy=RoS*{zu7F)=H&g4+)^+$SI(lltgd(0?!w8umk9k@w#J$(;U6>Bn z11p}$a>Lj9i97iCW)XD!dep`b?t}q2*{vNp^+e3xHDp;ST*Lm)#{`&w!L@gOFfaHN zmA(8ybgoPU!CUYf6q^!TMBZ;M)LK2E!ns0^*Et|6L8mam34zX=ZEU!r5?-0pWL^B- zSW}^v%DaciuYKs&{~BAazUQ@j|0g>1XZqF!lnF!%c9CSjHQzl)u-C1)(^$z5Wup+) z7CXH>z#wc;|d;m#cHsfMvJpCxqaFFjeWfnQpdAd1R=X9y>} zw)&b?Dh}yjS~{>I_6-M@X5ANkOx$nil65-vQLn~vc7dUQt={~5NyRLEPxoeAz6$%r zps=?QwNAiE!xA6Y{jJ)JP_?fZ&DuDwdyoahZ1xN8hrZS*a9YY~7up9#8+13))7Sfb z(QtwIv38*NMO+*vgg#7j(IlI$TlAqBhZIbcnw1|2&5G*?B4PNj#&kVGCf2sz%%)kS z@oW61qXaO?d}w^x5DJe=^GdIL1CyNaigik>^Va`Lh#RKZ0? zRR2?MLtLRX!@w{;Wxr6Nd^Jbv*e#C{!$I$su9c5KB6gw4RhG-pf?rF--I1_@|I?;LGvDti#Aj z*%+!>9Xd|Vyn6}aUEW)R8YEQAJ-}vy%SF1gNo#{`y6g3P^Aqb|E#MJFj-=lpLNZKJ z0$Z0PXzlXXf>l>hJgb}f8go*uRjaz~wOrvFC#eb*%3{`q){A%&!Z;JFxo2`bplY7d zCRq|?AJhKby#4DCMs(LGB4D%TYBd%w$6s*MUdh%nr%|=kbiDUG)?Kq&aOMZWTE(o% z24%vBt+q$0<)fub-n~S@V$o+qb0@qu$HOYU)9SB#qrTE1ik@IwsjYp|q>x925&w7h z`V|@UPbv_ZvkGzhkDF-31<6n{0|UJrLQ5d}(BX^uI7p{4*9r5k+|$>1?I`#%70ucj zrCWrc($}7Tdm5+EwMw-Zy+v6)SOI3KF%G=6`s`I|-Zd=Ua7zsJeoi~*E+9(~gN?cT7N%uao%eV?Hoi7m8;Q4*zYM4n(hQ=}6t>FjrwUodvmJ}=8439kPI%g}GI z%jP#GA_ek?XAsq6?wat5oy__nLWo{YIwL$4_L@hvT$lP5#!Ph>XVLJ+DXfbqc1@nh zLA4jhWk*Z&>qtHPAKyF&=hluKf1O~j?0?P>k%EU9^17eR?vw!EoCRMj&^pzIy$~3R zwt#CjR-W?-XR4kb_naQKh9guR3sG6NU4^Sw*Q2I*e?e_{If~Qzg?=>}4AK?n%NP0} zzYl$+EDL>M0UBLr2iphV1vY&jbKb-E9jDg}dY1rAC6f~xs^*(<5 z2TC@nB~(Ri_u-&*pAnLgHnyeTb?*HE@RZCix~}@eR^V(BpbZP94c3#P5h6}eKP3nD z?Tzgq6%8eVT!nIe(zA^T@LkM*{isjKVQyWE?4~XqD{=&u&%3wwL&oQ+?BGlEY zR_kU}Hu3gMXh6-zVd=5BnQVX|e@?2P6#6Upg81W5g5SA2Bql}TA&nD;b4!ErS!fra zyTBui3u@7;xF|#!mFSd|JeTglLm*++hd9`j3o`+G!vRQTPA4xF#)ZM71Xe(VXBLyh z1xD~-M;5EeB$b2HIa+*@T24B;NCgMkJWI{fiO1gGA*+Y1K(+D%x$fns_ zjJjBbw41wkohXn|h@{6ueVew_c)|EPT%pJxX}l=r5q#EIBIr{5-0)8jB_AKQIPMRo zZo}=SL4+f`0qsd9EtaJ8vKWhmZXLv+m70>rjF~aI#mQeB-1)N9+m*XJxgi9ket8qB1i9 zUYs9f&oCH(NxLL4smNRRp){RIV)LOqh7x%#nx^$)BkoVOMDWv^wmmX(cHm35airRX z*YcK(*tou#oj2-h1gPs**l%$FkMk4ptdL6k*02gPmGFYw_HSAOKv}itF#TNXeK@eL z{aEi@%iF@+e7qj&e#=`+QZ|cqrqm>pnDFz$KON1%*-1fopGlR94eFrex{=fZ=|E6K zb`K31Vz*~vYmz5=Sn<3$$aTbg^NaF64MH(?Z!bS{I2M1NLPlUj%aJ-ED*Ku6trg?P z{Hj$A2He-v99oS0{8AEBFiIX7QeyU_L(Hv!J-z-V`~VIezpnNQvxPMO9}4lB>}}!?svjl4+*q%PR1&AZ?^tK^kkmR1oK$6sI2LFss|3~=xK*g zjsya87fsA~nXXC`>RUmQ4{=n?}U|!YHHuSvD}7h@i2J#0uD&Fusy2%d}BZR zpY&@POcO1R&1Fwhetysz1-!=Jnig?Zgh975m0dCQu8*%hGox4W$eLK)g*hlSvQ&@g z%r7T%i)&Zz5@z&!C%P@m?p({qvaPNzh&jx*GlrIz*qwie;*ag}n;r&Xr7c^J_H)#| zY+P$Oq+a4Q+&;l-TU(f(Ji*cvXj#g?3G_ge~&UagGtiSm$Rxb#+5I z$45W22>p0so34slPFI%AU!SeYzCM|8baB;behN9a``8SGe>5tH?s8_XT~C|O@3TU> z2hx){S26fCamJ4(90N@FlQPij}UuwPu#WD znGEirt=T8)ms!MEspjnzzdHWH7O&4I+%u*b97`>}cbyQ2USvgE&m8>UvkkB)Jf(`J z2fT`10Iw|XjQ6yY%iTCCUV1dSKxs$<$M#{Q$(H%d5u~gR^fy=XzLD#fP5Pm8Sqqzh ztS8t-@RHgX(NBuA`H?_dyE*2p*{rskhrI5m(@@7^|Fm1B!NYPq5%hk3%F#}h(&^55 zVAZmW-nu29%h)KiXL_^BRj#(38*`IHakM$Jq}psPRF4LMu1U{pU%}_qWvO33e9=&K zitrNwK*8!b@*5DfrRkNcrQG;p^U+2T)}+$?+9^w~Y@qr;*ySX_S0m`-@LTj8?L8<8 zf~|ei)d+-6xAbFtRa2l;DJGxpBcBj19Srl_igeyOCdLNC08Vus4~^U`*G}oS8JwmKG+}13tT~cP=dPm$lpx9#xj$r0%MXb8|vOnh) z!a&Ky&##qm{OXW;qze#mD#G)(C~s@nCJWX5Apad?WypfCA9q!$9f*~3P%f~o&ioIC zdZ0$#Rnpl**YGU2n>eSxi&0MRI5c;|6JcK4650Hd6&fP=etXD%@7c6Btuk>qx?;3p zCvAMG>z197AxNU!nRrEZXL#F{@o^P-#*?;e^fd$8m7I>vw3-%c^x+`YM}_C>eidXz_}QSXJt=9W^@ zPH~kQ#lRhk>Fq<2=KMe|b))ncE)4gnKaLW)KqBBhZWr^4;SaO6qGII7pt4Gvbv9&fdO2 znp~w8f1hwPa(}_c_{0-o7Z?|bfM3@sALkd(R@vZJFjwVYR6+K6n-Hr`g3h4u!Zh^w zm~0(Eg8XgyvwJhm+zc=6q}w5{2*;RXVoJh6f) zntE6QRz)du?D9IT)j7QHDjTwm@cNM`+q1l9FeKo1XQEmg`9WvBhy`tMJEZogQ|FOZk zk(uQ;)$<3Ur;ynD5H9rG6)WelRgb#QhS7_+%Nkr)i8+<-W?4YGj?4$4nUfK78WX2WZhMEvUoBTD5Tho{Y<8KJ`sqe>4hQhV%?M%q2;uC_ zaa@G`qiI*H&dR-~Rb6`PtA&#*jxuxTRV=arP-R@4`>5?^8}kkC9A(8)bR>NIVLT}%9ocs8dt8LBJgMmfk;Gu3=m#5Y~5w?8wiTT4)>$)Fm5MW#~(+BTqy z_YVZ%HKl8OZ?=JFAvo)$4a&vQ$HADkxc&g>gN0AGbnv%4CsgsD1^8}-+H>9doE#pN z(#x9mVBx!n2c@d4%a1*EX}AoB$;#%G(AnCTsUGryB!_79mkt>|LpCL_J+ljBCODz# zDtw|Qf^Kd+G9t|mhd2IYJXm%Y_cVluTC)4tSzYDAOQn9|a4#HQc9sRg-D>0%xjF{1 zZd#+!sa_*7@$Tk!D}y?Myfzl@ZJ{P^8zz+45-(hkiy5K@z(#9Cs@1>sVUW|q;Rf}(|Q2q9=@R#kJS5{jBuU1Yi3Rn(yH(BgfEeDw1=Cejs zp4Qpz5?uetbNTM1a}4DEL>dDqv}c%D9%d#D90lGt-_&k1eW}r(&nca$pMf8V!8SH1kq{j;=-=Ww2T;V)NF~?Z{ z?SPP~Z4n~rS^vY=-Nuf`#Tm@X5T8jOFVdx3#e~1CfC>etLD)o>36`Xe2qbh(6?b3 zCkRr@5_;a{ACG{;UNvlHv>a_^@?&b#8(DIF#(Sqga#ViL-RH; zZ2Kj*IVR&{I9`TClTezkxftFwV^Haf6(JW4J;b^@-~`JXRJ9iLQtAsI2T%8p7XKTE z049<0CUC#&TvJs%>2Wx+m72eCxzoS4YITULSQ?MU}HO$;6};P zFU`1*ml<=yfoU1`f!o6->*XGL*xQ!sW^*_0%#eg-sSH2oKoL{%qS|qZzN{GozLO%A zfoOoq6#ZWRlBhxR;w-SHfO{0Iwb}?K?D8q?UM~Y!$Tz3LJ6t6>==qLs zzLaG|B)W+CP(mtJB}B*n0B%BhY=1l95yp4D_P2#J*_`fK1!n8+3v$)Oq$Z3|`S4MX z_mrS!``&3H*X*ib-&-!E!T5T_q5;=g3d));wNC zY?{6}2Cy98<8p4}0zy)AQzAXVy$7P7;3ONGpY9nz)nuHqq0+-xU^Lm+ z5Uor8S?S(X{W@GWgteEuZ^&%quBH%nQdG_TgFFrBhlmdfp{@(2GS%06#h%@VYMtc0 z-Zoh)Zy)dLQ5z!|$lc+xNaDN7RMu}zg(bANm3o9r@*l0iW8ryj{l{1K6E6Y$596Gn zA|+ydrjCO5gOpa!{gH{AR6b^iZbNa7xodONr-uE+wA0PG%Ow0uh+jXKlT0CQR-0NY zEh7gmvAG{~uO2WTD7&Fe?M;pSPqUxA${)-MKEb+TPHkcP;1<;Oq?O!FR$Me*=F|5- zM}&41P&x92$g}v^xft8&(T|B$=U+FrHff_OGmHwH?#~NPszaWuZP6*1E2mp?r6`bi zS&l^FuJW_o&$F}Gd{$y$R&}aI2keDj=am{8ked@0>%C9A+%CR%Rjkt1A6~{&@D$-^ zQPQ&z?la%`+!y=!>U@0V3Asey-YJ*c@cFj8HteEKe+m(hw4KFs%jx>De_r`H)_FGRwr{wxjF3E^^6l%3rhJg$3E=mqJk4WFynzmUdvSc;fId~F7SYgQ6vcQ7=fK6G61X|AKiQ7rcwQolXWXUdsOB=WhR0h!2!fg>*XP# zmv{w$_pq5l`{$IB`-|}xY=+KmJ>Q!D;fe49UBw45PppiV#kekeO=KxG;e1og6B@45 z?EujZU4I>+j(@Q{PanG!*3V;!P_YL{6eTpKQk3|+Z_8T?F(0oTy3~a;VL%%qjuG5N zGI%}@Kj2})5r-G|)^U!YAEZqRmWHd+0!hx8pRk>s_tvFQu{tk{T z=g%|qkD~-=)Q#mVSc#!^ycm6NAr##5HZ;UsBcmItOgX)RrU?K@tdU^_Ec?T4L&M9T z9U7eE&Q92{GG z4N&Xw4ZIC$A$@P`&XCId55Dh?RS18V54B&CQU-1qZQ~83ZSc}4uUV+PGup(FwDc}1 zgcQZ1IWR(^Xb@k+OKeIIN9knTelM6;T~=`U^2XaHV>Bt=N!9YKgFwB3XGmSKrPrH0 zg^tJDB;W>qwA8$g+oHzVd=|jrN*^euvJC%+EhNtY_r*2DNycp*V2#KiY!upFv&2Ow z<|S8wgB5(An5usmLn-LY$G{;NmgX(cB5M?NpFvPzN+(4v#!Cfk!*D5KH|xhP`^#r* zX=pdm9t()qa{y+V@x$$V?_X0V6&JRH$jL)Y^1iWSkCO4BD-hDaNonNR?Y0JOZr(tS80JFfVoO6RUOF!?dxa4?Eqb0iW zG2cc!TYvf&%NPgnEXEd3d1;iu+MHGUIKIF3mISQwHtLfIH-CRIsSNsRJL`HVW&_(NdB#ILQVZqQ=#PEQtd(-#i6YACJX#419Zvh=?Y+geYMy^-w7mwq zS=--SK&y=VR<$}>qkcdoh*C3A4c(jRZKs1VSgzWX)f2P3H;sX2zp#l)X(;)C zQc1sh?VKVZV&5q^@jKm*yA2 zoFXCgmURv=6zq%G0y3<8Uf7Wp9{%HU8qE)Zpcp`$npBNjDV81I4aX~oI?2VG^XjeG zzA#wygSchyZ=k>yb@#OiWWaOBcCc)dGFipVpq0c-=2}Dn?zZ$kkrv4^@CXf}$XsOj)a+o&qPB^&~3payfY_WuauFt(*sg`;pE67}lznJMA z?)iR{)zv-=_xNh%re3KYFS@QobB*>3*(I$(52$Vk;}!6LcMW?*UCebhsJ_N;Xc~P@ zQ(^bHE;3q`WHfUb*b~m!B(xFdEpSFAF?}XB-C4o+qS(2S+vPE!r5!S za<5SXCfVL0+z#YDc%T@L;kg_U$wdE3caz-ayM0n^+;#EJKriaO-?|H|=irm#!F7%C zGe)b2F#d7IkSy(Lr#oaf>Tm8$58dO$#JmsTWteUs@3yS#`nD+@+&%Qa?5opFCf`@m z@Yr4~H~)IR3*XwWJA1TfKqobc>MteYpecoKKEDD z(oRN$EIqaNb9SGCF<}*#LAlJd}i!UC_4o+U^zvhfb z74f@U*~7jma0b?obzQG6oWCo>Lb~ z^U57Wk)gGpK%I1sfM%(-$bo!;5r%dZgCnpg@}9wp;D#2%rQ*G3NAG(Eq%JaCvJz7~ z{$6a1oo|{W@4ClmZ368apr(*ZlO}2|xj<4q)!v%s?QeWkL&*Y!Kkl;uRx_^8c+_=u zXDOZIHoJB`1NG(pBg_1lMk3^7sb_a7O*?YJWQ{}ezU=<|XNKJh7jyl}l*e{^R>hE# z+bGxm$h|Lt*a);$=0GDkYYVZ&JtBz!ownfsUvAh=`zV|+8i+d@F>0!k!hx^-(_C^z z!``So-AtlxnIeHinl+ymnAeFlEhMDyW)bM7$fH!6%mAPO*bW7WXJb(0zz6`ZDMo=45 z-m7!Q_=2s>?Fol$rjtbZAGvWc>MJgJPwt~r-@~*-Iti7qyqQX~maBwtDK5=@bBV3N zg5VSK`Lw%#D>_>H0gwMc0&!o^sz@xJZ%!p%U`nNtL(NCeGPC^@eHSduw|-k|dj{+$ zm`%z)Otd!SA7=o(L?f;O?a23twaBI)%nIR}XbL?R(GGEG*GGeT4;4@?n7qnE@1P$z zQeSwZbL82-j7J!A+RB9as|A?Smg$a5jqaG&eG22zs&E*2v;s6P$bzi)r=m8!Xec=fp6;-#fD~uEKE> zJTzbIc#<5&N&#u;S=l|tBI3hbD7?^hMi+k?nEkQ;_ovUo;^(L_$sy^!;rYMT!syaW zT*jIzZ+txks{@KM)bk{F8^l`i*UrcZ#YW`RjO+^u6A&qVf zmRHYpddW@xl@hY__@FuObslAZcPhQxsOLR8bCBki4V}oDu$!jcy&Ym&Td64u5Bv#- zJOIE!cFt!ea}ftQfN=8(DjMEOJVTEJmr;2AZ{gj$F0@N-PpFK6Wed*u#L3xM4O)>9 z_>NRw$BiM~MSPZ+$EDI8hO0I9ctNRAy3F4rQ8o_l-QkQl~2ag zA@LYc1yXb5%zC=l0d8u~TK2&6wvqiZGrAG>vX%fKh>Ujl%jK&n5HjC$zD6gstF=nV z8vZB=xX}n#&ck`CbuTgPh8VbMfdE;x6{Fs?#W03F=7^Y3jYjwtJ?}++J^e%;`0_@3mo^u*<#Bd=l`$*G|xPS3IcbT&<3O zR@yi=`5M31g$Tw>xma3_y{Sa-b$Ze^pvO+9e!2EhSz>wWD*I`6?WaH ziF@D?ARs$Z&i4p{H15-vD8RL0r#vwSC-s4vmGNa zz#P|M0zjSv#$R+pg{pU>SzV_+gGSyxTO2(G6nK8FGVTKsU6<8^^tg1p+%W-td4?bIX^qtErw2-i6(57fJjco8>R|6UE4yGj zKUKG;Yi;=~MtV}&km?0&(OjiVs!hp`iyFJvFz>>vbnj9-J z6SYqh6ju>*8RmQ*I5ir6((EJvi$1S6{%WqS?AmFMV&?0!;O1)s?_ZxU&SE;lUY>-9 z@cyBZL;gX&u8o_EGg8Wi!0*-jVKUN;J z8$qI`PvjMgtmBd67B#yO0s9Zkmw@CqZkI*!PN7eNHWSSuu%Lp}D*&?{BLMkeQi2{g z)bcs3&dSJTi5mTsu#cr8JuWRFg8zJay9aM1k-4n-?n* zBX%1oT$|W@jKf!e6Flc4l!<#^SW?Skv}uojEL!$=@1GR-_C%F_F_BENFExxDb^FOb zPN^w6ZveYQ&+fC_irw@AE_Coe(ky1sC*(S#{sg+1p+mDOW-E)DgcM^u+N#qKI5#%o z4d3zrA9A;;cKoF)JwvCvR3S{C1GH*$ z4@QB~CY-|r?^yC?V1i<|1a26ZH5iR7o3|9ZeXvOyAjbWbV^ze03O0VzVJ}hPpRZp= zXJdPERq;q;>8D12!k9b~26@+-wXF@g>j2BE`Mp^%Wx78u&|mVw>`Ot%6j(j~xg_^8 zV(xOyUxvNa#WcntU3IR0 zM<2---1&{20Y6!u`qy+UJaP{-zGOMiv7B0E?haTXoX+K<+8#AWpMZdz)Zz$-w7yh` zAc1ecZw}j*lQetiAXxU>N#|?O{~6Ld|UUg zXP8Wi+5gukdibb1vbvLDR^$EtDDF_T&-s@fAP$6F**Fy67m~N>USx049recppT+Bw zTcl%JW1!;m6QnsVOKp4uQbnQ{W`UTRq_6=1ynQ@AovGk#YXES2Qd|I(sVqP6h&tt# z`BGu&WqN1z`}^j>kl1qb?(2!i>#nr%0hKYBX&^0Qbgo}#5a>3AVDsMa#@3>s;Spkz zlMgmW-I=Yt69?&llfqa8EI!0MiT`VA(2dHgHNX#HAWa?z3hgAG z%|(p&hPtX1q7-1NKxve92cuY(gZ0Zn9#zh|Qut&;HUNj-_P(#lo|azNgi$(Lq7}l_ zX8sY7QXz5y9>-;(iFN!cjGiWeiw_W@lS}T2ZS|oTdk9@98o>li;83Erusz6N4d^b( z_DUA);3Erks6&RScd}P`jqqxj-7G@u_6XG?7|8|`%EvvUbO|&s+(QITNBA2qf28_s z?irtPBCDM@($8C?kqIwB$ zPb2lYlPEylY<91@5--TtU+SwL&Ko!a`eRhpPPOHSd*do8VC^54)O9CSFH%d+P!a$m z>VEj3221H4!mL$yjbTc+nYtVFey= zSchh0-(MzQ>Pr^-v*Ji;*2`;)DQ=C;@)y%zpfL7a1odPwFk?7e$H>90QFM!0ce9wL zZ;kZCC)53o%KP;b=&|bH#^UIZ%J#P+7+iY*5@+ppd$#LqaCR_ju!Ab^(dgki=0KP={dI%&|QEt}ZRX4lB##emWpri6>o$kbpLAFPz^ zGPxKbLLZ}H$*4WIO$^e~?C7=RU=ubRFs)}H9CwflC;F3Q>SH`wuJuxlj1<+QV?qb4 zq~o5V{Yvlzm=J%7I!dyQchc4CZ=y(fRM7q&+n24K_Ri8o4eWw7o7jZex|;iOr`>EXrk(XpcX#`h+} z$RB^}Q^Wz3M!9%c90ZI?ib`&dLaTS{y}KpP^rBvII2P-)>MHtUf^4o)JGZ!gjDhg+ z=86(p*$e|WO~lS*Oc=L)Yy_Y6DMAk}_AeH@FQ50P>CnTj6OERal|T096U4940S9BB zoe*o6QQnyO?@rS$X`2^;CutJ@VgSDUivjp2uoz3(4md-<(*0?@KtEjM>~AG_Elk>3 zrZuC;&bO=$On?5==jeu8F!Af)x|}b5s(ifBv9%nN_r@VSdUWacY!_`~_pZDcAUcKe zr5fG%YHyAOr&1O=uMF~LRZo>7p{EQAGZPfuB3CB_m!ny3xcWP&)p)~ zfvE^#K>OB@TYt|CzU&Utu-0D>U1szBPX#?E?rxe0+Ne-N;jbdCIcWAm>`PXDHjyZI zl}7_n4At$yzMi!ocQgAccY}{uGPENZtr%ZzM}#CJ@|;B>T&(|2M)xW@!s`4ac{LSk z;IO!x>MIR6xKLv-4wL4H{@|$P zl-f?P(#FXbkNj5)P@+cUy0_;;{f$P*3uZd(B}(eo`kq9qF{}(jURa>n;zj1(=h_vB zh6jeF@;XC_{&E1oE|3ggyZl6?4)O{_1tGzyC_GY*CgqeOS;lp@!t`9-Xs9!{BWMY`H-N^EdR|`u7^_t(DedWBAR4+i2%81p>rV_f7d$&c-UU(zhZjl=IVjEF~hb?q2r8 zYxi=sH%AEz$xQrpZqNJ&Si%;$g9g+VJ3L~jk^R`2C|Sg`ti1*6r5|q1yni|Dl+;mJ zjS+dz!AtXjRNm%_Nd$6FJl;y{O}|iJ{d52zmx#L)o=@iP@-f&%xEfkE$VM zuS4Z&$Qb*KCyD-{B7x+nf|&Gf7>Di`VtmRE3phQ(?WaSOm)J&E=KDDgEK^x+g{aBT zSPk1il<=_~rla!k7m$_nc}4#Li~mkukF-#Xtw?2<|6BRz?uYNWyzb?a&4dGit1Y^vY!FSMEty)#RR*ZRVW#qy8@2T#@aPqqK8@k)*f!iiB|3= zm<*BFJtHB0-DR!e%}_VMpLM9HhDScPwTEMgXYe&~>bVfE?oJEeA&;NVyFXW&PHSt4 zS5zC8#Par#kZ~*T?h7LB;5O>e7PntcBi z_2j*E)8euQ)-9m&OQYeR8o0k{YBj8On&r=vg?=StmS3o6(ZVLAtuZ8KnL&C5=L~u5 zQFL+r2cvwlV>Gc9h3Qq6<|Z$r7Ym!7reU3kOW~<3okk9#l6|MmWxBBFl`|KIbog=t zE!pJght`}(s7VmOntxl4M}Hz75ERN3U+l1N)Wg{QhInWm^&e z(<**XV}!=$2ez76&1Vnky?$ruDD$1yu&hiw!ril1gZ;37ZhScx-kJ#pm}r2&*7f)2*L4 zd=s63{IvT2#|w5gD)GRe-20P2e&-zXH{E0a2LzkR7@S`i_@ z$QREvYmhryCY5yR^_o9uuUftc4w}n-T4_BQEcVdv)xyr>+VJ0w!gZja+%ya4I|8i-#hg*i$L|{ ziN~-kCZhyZ5P`*Z04N8!1tpf*VE)Sg*K7xj9@|+#yyw~m2{>-2S><8LGtp@$!>Qje zrTv~@kb069@t7Co!;X_&+n?z^nB180pvI?^&+rU>BQ9fDTC;3cK#oRFP~09%IGoio z#Jd=@hgdi6wzC zsj}o#A7Ff6-Wf($yTKb@Tk)P~IJKeC8Fh{c%MKPd));yi;$kvypRjETtkh9o-Ih(5 zQg$ceM*eF6J>nvqqx;hU)Xu=U!6!IQEay4~48_&`RT^+W==bQ<&HQz2^ z?6d$G^;`HegnwEC{?7RS(N-O#+6R@I58d~#u&vyPo&Qda0g^-`1@lwtv9UPa;ole@ z(6k`_br*QuDCiGBMBQDxr=bMiSR3X0#eb8iUz^Z@W%pzLe=dk&kB8K3;CsKd)h= zXwQB6s4^O#@GI(C|0`qv`2hzRpfOwR4B?rFNWmWCtc|6&f)y76%TLm|l``gk9_S-qQ!DqTfTa`DP zh5}7wI`5Vqz4BDJH;UveN*C^q9$MqJj01q;9BnFZThPa(rpGyf&w~ZYwwk*p~d-ep95N!iQjvZEu0{oGV~=rX^| zmVkY;PA?yHR0s2yneJ$dPEIC2n9b1~H-F!2zt^(yAk2o(czZwZLyE0oVE15I#3dFO zKdV)?$3=oR%I<8)7j7o?^Ks*(<_`Cuj_haR!k%fDMDA~K_-$vdN@=FoopOk|MLM6Ns+8Af_G2$<#%tkxoQr ztUXm1KTSvUf%~%_=KqXuEPo$B;4$;@V1WWiL1+9qE-k9?&p)7%YI^^Vt8WaiEBL++ z8#TslY`d{-HEEnQY;4=MZ8f~HZQD+x#6&~ zU_~%2@vZ|DXZZ(pzd5VtP+i^C!1s3)Gh9&dX+cM%j7GH9D7P6E=CpG(wv}yZDb>O| zrj@EW#eZ)NJaNZke87Bod%AGu7DpI#x=E|?H~M22Qd`u3(|5AuGUG=a7~w1tU$|ZQBRF6MbmlPrK?U)QtTKN3rXElOG_+qz?>qJ z1V&8*Hgk8f53>){y&qjqg4-#6kL@zhV@V?Ux?LbQ>%LC1k_R=u?oUD=cnsxq+f@<>cda_4;~X(z=!EO{MZvur6uuR28&C? zmnLT~;T1aW*Nt{71Go&D@46bl`VhBk`544vXsy+b6*uH$rsmAX)Cf3ffVH7TAE!%3 zEMIRjjp;8|+@WD)1YO8wK_?RE4;bP=Izd7=@)xC>~CEzqWv|>cH>4Cqq86%gQ+vab1+)dSg4AIg_G4vll9+UK&iH2!)H+f z0x?L?PYK3HBZJDvN* zILU}wKG8M9Rx*tT`WZxl3fm$-1=x%~q?flxMNVd!sHAAW_;VkvT}sxvtd{MhHij;m ziS_$M3Lk{sSCV0^bKDtKYo78V&_z<#-f@9Y%O52#S6h9WWI-q^`{x7k=_CX8tx({`GoDnc!GwsgEtE+**&OKn}}bKBPpW!$!c zK|=q=0*a+n@SEm(BmZl9=0}6#defmY+xoILTexvzzw=vRZ>lS%Vo$4}e?WC(Gx<-} z7RJReU~kNZT}{(4`+4X{rt)1p@@2;-BQv?@7@Z75S@BVH^VzwJVZCVRZYY04!pZ2O z(=qFQu&q2=x}0ujp-Jty7G{Tqc1)9|u_g& z)M%TBw6s5s;zXn>$|oUc62^CLh0Ow4&*g@w&*r)v}S8_4c+)vM@e)_I(lswP512nV9{tqA>*lu71@Ib`a0`M%OpRQ}rCA5*Dsr zwx=WzeKXkQU~**5-Gv4p+&s2lU)YNMP(L&pIzAO6uUoL?D#)%@ z;i;~$qul0Q=l)zF1!3}}aF?O&UQ0^8<|6b8!XGPxG zrdjU%i^pr6_{jacRp#;OK<4mw z&(0Pw!YIO5si@!ot^k*fO@{-b3&=pUT#pw>0Akj2N3_(@^A{f*|1GV} z7XKAI94c(cUrt#1t@v74z5lwhe~OrmGGod7MBHm{PS57$w?jtO`X=(h;~;e6Rx(0I1y{%*N#Fh z%h=zEw(7jc5J)LwFzjwMwn|*_2}zt!C#_`bqbV+TSn47=_DYerX6X4q zLMLIa6t9tcf#`GYswf?-n76Mv_#2A+Zzo{y1k;&%8fN!JtP9FXCMttg$Avx>Q0m!G z#~|}#{3FPdCK$_&1UQh#Jl{;h(8Y+MZr98^$0DNILwp#zd?Jgy6}=DR2IbR}Sgn;r zZMVPu1ye|-AJ^%OYP*2C8v+SsLERqam78Dv4$xc|=If-_3J2NAfqEWhe}t7)-=Bgyna+7; zX-^cg(e@g3Z2h{8fEooS3Mz(Tt|vi&aEuioImlDTi2z?WQ$Xxa4nXDLw?JJX+mm;L zEFk%}K?sk3A_D=W9g1Dr2eO9(*dFHvj)8cO%H45t*|6T$XrGS-SrW#!ak8XG^>Acj zG}OPKQ6mkrTX7jB2;-w39f<>ZiNw;xD1_CmF?1+5X1Zh-j_1VCkb~MR&N5^G{Q$-p zc#zT>J|Z~oy1rkSov#3$O9l)R%> zH8=kVr+Z(^-P2|M*v!nItK#bjdUWOaD5*)AU&++@Q^ij=_pw}+;;ZW<&Eoa=xq>?9 z<$joOxG852{sWeJvVv0aXjj)NC7{&J|6RaSp9k*pEY zG>wl0dTfKzvtBB*M(D?2l$#-Nv#M6+MvzMlE{DqHA(B46;JxJI=L~q(*`srwKV)b) z-X{&VY!VghChI`Je>ZScA%gPqe&cU;8K-MDA26mvmX!Av3|TzlqlpKAF}9{xqGiDK zBJOi|OpMC7y&-JEg{&?;E<5>gzQ_G9i>_aQhyfBxye(N0Qiysm_d{D%DV60ew0x@M zpyJ{KDQ9Ot-q-#d2z|7)!S&aU@yrgL;H(At=#vd)9Uqe<-urnHvJuLhsuU9)ZJI1@ z(NUSHOzI78iIHrtqz5)qQib=oi`;eZIlJq5!(wS^^XSOVZJrH^{;{=-Cov@QpJqLG z0ocy^vK%E0E_{y}mMh6*^jd+#2vw^OG-@-?2m2f!aYH$8TIn1J$r0ktiQg3=_CgGF*#?$jY6>ZgOdR%&1h zv5J&Qk^v&VB0A+&Re?Dkl~_5-N~Jm1*Bb5ZP>UWQ%GQ)t9yA)AKutKhC!*#fpHelp zmFt@#DnD2yQs`dD77FWHo}Rb%2W7VsDjG+~gP&*So1=BT_Uk{3c9hUo zVbtfenZ!H}4#Fqvk+MAdzq?C_!JQY_v@6zwRSaTJmjh}|L+l#Txm7OIG*Xj)rPaGW z>cbsx=@w_m2CD9#G=A9 zIlBZ~3Z&8l2kGO565ivYUEayF&Y!hn@2_Lp_B*!jk6t2lYd?B|159Fr+e>W6aZd`@ zIwFd;M8?Q|yv2ppH}0NUA$r{F{v-^F<8u7!kW4uHG^T?#cT=?FC8PZDu{gG@wch%z(%T_xGPr}BxiU|o zYBuDuzSS%4wfeZaF;~=wNheG32TF9uMCUEsX|*o-V2F2vVq~Yb`?r0xG){+z+zZlQ zl~{O*bEnBc5F!2HuENWrxCiC-!&)oNaUp-JoNe?k%ltp>`Us&VK+pxqdpoHC=A@TD zfIl7~k!cb5@nS$n;qz`)tka4}d~HCUE#w=}9r8R$Xbj++opnH+U-UxedoON(yw$@G z_ARzCcfGo<9fq;_oYH2^Xmz)*{zJ05>h=B&^Xt7k`DsZY=EtB#4eDYB3gkuC2^mNk zR4|F;`hDoWOUS|g%p-*KI&e3A)Tp49L_N=9kyY}Y!y>Uh5)oBhztIfy*NuoP zD^`N3&C`Y2SP{T^#;p0bV1|z>e(dfGYZ4|xRT76TUicx}!{qJ9?Hfyi0y>=`;dECJ z*3Twuh4`+N^98dlAG?~@hle}=tK(1_o!8YSKxiOzX=!VJ_RINPvMa15*N0~fCZ2xH zzq*km(LiFgUYrR1(sT)%c72FwRia`xDQeg8Gwv!t6My~b7M2#tyr;TvPOoPpMo0dm*btU~o*7caLn zJW{E7D}|S+0=S84kfG*Pc9{ zRE4PR$PyAErAw+4^roX$g8FJHT9pWespDQL!}EPX zT;pSk?{2L+}$-3@~liSe-3r=fH zKP%k)L^r(S)F!J}3?K7L4ztLothZ96K6upRWI)Vm{?OwYveth6c3mN;NE-aomA9~CtDn52s(jA#m zxa3bKRpskB(zJAwU^2Wz`x_vQ`Dtp!0tp0Bbxw78g7-aiKTbG1)<26{MPX>yAeC1D zY*VY?)d1a#ydnuje*DLSfa~g0qczYA)>98|Oi(nJ;Bj#Oe7)2L=y(4VDR_Rqp=7(v zA40@VT3Lpa99bP7Xxa+$XCX72%!H5V<;K9C4F_^e%-X7Fe8QN52`vW6Fp`Lg#dmn~ z)glZQ*<@j;4-@Lx5sSV1fFFKqb=@4or42%N!{m@FB-La zk?VZtP#w3=`y6Vyl0lQ|+?{XNEAfpN3_aI&-@)2#yDSuNcvnzOn$$&eMcQm#|3X18 z#y(O|(>}K_4_)3aEu8n5i&^UIF|4Gm*6S64#G=YG*!p9qaq2qETKe0HQcp0aJ5+(~ zKU~&{+XiC*>R->j_TrS>%HxrD6ibB{U{j5bRLqKT7R>!7@S-8NLiJ#_ytVYm;ans) z3`efsO>!2JJ*s_0aZyNXE=bO!Sl(DvW7TUdXX|r|Awb%h@apHC4>}WQyef_(3X8`J z&jch)6R76gDvUastX}}+zL`n49QHSJ32NW#a zk<(a?gNyER;0A_dFU5HL(b;608&7+ki}AOa-Iyf5gi`7sqqQnHDZgbOZE+{w{(zkRchmfsO_>_&U$n545U`{1PyX;uXA@-epMJesLT9}K zbwx}_49A#PJKa?_fF zC>Fg+AL6P{shHqSsGuTB6wXl9gB?sir+UNeYnyXXuSkee4IDb`0bTe`sgK4)ie~pB z>9GXLS^>*i_KVOLh2A0|BD1`x()hY? z5-7B^O|QDJ>aahaUcSfkG~?dG;K&T?J!whZ=ldLssIan(3Vg0a?iDK3o6+8{KRJPK zC%g}EMimRa8F_xjxRT_SQAQISaXi*JVp5_LoE|>QqaIE>qF=?u;}zPR_f+#B zuni3kN7rnMN3!+#Y2y-q`N|QEO0T=kS4eHL7beSLk+_hw|70c-oQH;r1%msf zcCrpjo8}8^p_s55;4(}07a{fm&g^tT?>`+}-$Bu>$UM6yc9gCYvbt zreOAcdRp+|r;5dWW<#Z^lKy?}*e}?;Oc7mK)M%)y>!mgy!Yr(D+cBHeD zk7L(I)yc#?U5BE222=0#rshRy>3Wa6MPvPBZ0r!(olpKIhG%02XcqjwaPKLdDA8f5 z+=5#DY-fhPlz{9{$z`_9T?o1n;EUEW4M+$u0YND(enhOD9uegr;{U?cN{ORs4RpVD04LV1< z5tI1tN#lw(wL)B$U8;SJ=S?{-zsC$vnv6CahNYV`KW#H8sQ$K%G8dvV>@Ui5+#a^T za501`O_>vH+*>QqSiFo7(fD4k-$A0yKw55TCeYhxdn-U~iD5G&o^h;t!D!rAtaY&Y zyYXFnCgJ5uIOkZ~XlrwUiro=vqmwC6^=&?D4$EsMZccFTy}cPBg~#WSt<}Np%NSHX z=7ER7+QR~Rz~M7B!ntXkr-lzh1}5!wi~whbG|mqk?nL=UGm&B*XR=sR35%s)==7Sl z1nRK6O6ZS@_o63@fmVYi9(OyQoHA%;jf0B(mMr345A(WJ1OViZZJ1v6y*ULwF;rSr zltxlgf%>~%EIOh`#?E){gP{Eq{K6HM<<&^;`7xbXm}}C>Y_6qZwt2&n+sx#e673`z>0Q z`BwJiJ%&TS?L$0CrJ(wsy`Qa*s}_#hD1pnWF^&3@j%U(CIP4f)=kF^ggpGLxHbWWe zl2>gl7kAt7IFlA4NQT~ec+>1ZCDy(mCr0zw`r4oiJghqe+Hoy5GyCO=;xzge_!JZP zWL8Gv1W~aQwI~QYk!4{SMzRRZ{OYJ_7(ly$um^`xh7d;6{~f_JYjqIi9ybH5>J+YJ zMMm{|^@(sn@^c}jX%R(IAuo^nXDk8_^94<>--)oS^e0)69tD}T7D`m0--rl_a%ZTZ ztb{!BY2Z<b*uRrK_GuqkdpPcCoTVPs+Vh ziUj?JZC~rZ=JkZy>fmTs1T+e9uUY`KIt^XU+Yj=I_6EcxyWzbT$aLGpzZkM*qJ!d@ z?II<^QP^5n4w^#tcvzVcKC-Xi?|!E-f?U_e2=m$>!dU-`(f=G5BGh42s6F~kE(k-v zDzYZ-FRU+@_3LF?iy1Dkso!PYmQpRL6;P`=8k=+f%>M3yqGtu>=w^?S;(9ouhCbgY zgl{xNd?m4PfWY_qhO(j263?L3py`k=uOaa+)NKYg+GFK^lUO<-AVNW&^sn5YEhwl; z{hv(X8PFXmPRH9~O8+W|d#0MiOfQyROroT5ZMC z1UI_8(RQ?2D3z#V!Lr0nK|6^8?iSI6N$~eiD8&Bo3?cd``MFh)9FSO#a9mH#7jxg*x zYW+XY6Wa$nd<;sE|GxO2-y#MFBE8!{?YeeKUgq1VR{M=T&Tu0c1TsE2_20Qmgz8F$ zYO1;DF+{pzw_^5>`TU<$3tlWv1hBbk1f>visxxU?Qu6=3I|dOHg$n}>+$aQV_7jly z8tdYf-9SkvOwbu6*6klx3cd^f>pP|FGg1}R|NoDM0a07`e}03ff`H3NNgf`nCK#myY+ef=rOy#(h(3r$xKJbDqcg#U+ zmewkwXk@8ApU{gd7hdj6%kM<-B*0_0T($n8Ph6R|J88^!kM~8xLAYZJxl~^5prOzz zf0Kw1Ag^F!2A$Dj=_;50=(ut##fA00JFzMpjpcYaH;BEs@(rar0!F2v7#A?fM2My^ zVL7VqODD?7BR=s`cgVrynWv)2Mk90nV53Y~|1r3`@-fkay7?CL`OstEAe*l5T>-}d zSL~H;cQnqq!l5o>PL3Jol#DBK6loHPcEq4p#wz@plO9)>rx;jdt!aRfMkJ{Xc(m726r}-TH zB5bnbiq#_b@(iIxR;e|4vDq2g8b|F&we&zbD7@_rkUrLJhpH4~rO{Xz&$_sFd|Y2N zU1%j$w9?s-35>}L+fP0$2ZR=<#Vh|0@2F=y*|%M#{L9J3?(^pW)~reP^}<`ZIn0!#XAdCbOwdFF$X(W1NP>elWF>S~i}!l0k3VQ3CQ z(HCe)YNd4LTlB{LDv3Tl8m!;apAyn&V%CLz6yxWsUYr$l_y@|OK>bX4PNrXS?U86H zN0q|JG9@lY(hmL8K0$F>gzVGSjhT$*^E#Be}+NHJ?3j(=i6V}`U~Ln&4_8U0t2;JY26 zg`?*bdNz=fJg)0jA1LS=K~&qEp~x19MiepHci_M7jyRH-m} zZD>k0ml8?}fp;cYI^Q@LG97z1FEF)f0@e^_qT%#+ZSaT?6K)kK8nt%X_!O_kCiwO1Cjk>9&&|4p&}=oqcdUt zqEF;I&fGDFg##fo(*0_W3+{7(`?SJcTSG|~ZOV&-CH~^8co(wwY^pM%-Y{aq`_uSo z8^EY=v&gD2{U6X2Z$5FPaa6i`zSBvK$kiR$YD%+>f}9rL~P zxL$%r+mqT8i9|Zq>jQy}*_E2JuuMyxKT>i>uycNr{f2AFg~P!#kpyzk@CQ9N&QG6egG^*30HX&JwN@Xj4iHJK2dO!{@Gki5}@WQN*je7gh&I=^xhG}4=P+=5uHgRx&DMlqz<`$;y?V6Ex2e7jT^&t^%Rjrtb>y`FkXVb(b`=f-oKg(Ptqv;T!J$r6fWB{Fv@B z9iHe;t*giayli}48SbcFf#z5KR`zr$wxgV>>cgKi5<3?byeuU{_iY>vxwm^3P|le# zX6aQtL_t$auz`p`E^b{HL3qZ0SI6LCf__4RyA^zKO|Dwv6db30DTaql9ZiQtj4^XZ z)121%meo89j0aoM z%K#Zp%N7wuurrm^i-5JPjT806QRCt;B|X%!$;tVmRR+?>m@a;?y-1Uys3p8%U>zDN zGGy&`DfP#Uw@~e~$8qZ3Des*?diyJH{Lztk&`}NYeVu?jN(w$dzohv`O;P*Ht%!D2 zgnXVkdAsRk4i*;^`}F4t_{ED?$7Hw-D4}aO>yX^iAw*4}qJqYA^ZPf>B<$KKRYF)p z;pSwbLZF7}$%v#pk1KOWQ;MihjTv~=!0ia6f=09Mk5FiF1e{2^rM-SGx`qF|6{H$a)rkpxhW2W92!XkIV zi%Z6Ad0}NF@rIrGR@mLP_Zt|<6=p|G+KD`a#EM631LSZ~oZxV|ZWrHZ6t_Akk5U_% zaVbwHbW*|X2pGaU`|SZ1Sd?yk@zj44bqgxxhG$$DXN&&1cRf4Ga`=!}W+XAzQQ^yL^9Ub+`HL!}$LK5qRik}=oArGHi41;7>J z50hiY`NW7Zfqw#$pja4kCy|(@>qi_~(qb>>*LJx|HO5L2p)J(f`mEXoGzkP;ihK!- zXlg^!(e$`vFJ>d?3ZaQw?g<2=RHrvhdGa{sV$HQ;Rtk%9_fG_$2?CZ$2L3`{PlbXE z5~onCAOfH+TChRQ){(H{LAyaiAYCyL5whdKQCi-867qdVe9kd)KSTdXESb=FadBX^ z#sSS9E_@rDbvC8I0dK5?8i;k=U?OT;7PC?1vM$cy<5t#)}L*1MKo_&6cW#VruYq>3ibEk6(ALGz%_6o)qGtC-7?u_w9lX8{tOF>0B-h#)gerJQmcmsN8vZw zSH5>>^axoY-+)8c1+ZL~Q;@a`zVGrXS|S9q=V|_mcRCf^2|_S9>jgr2Q)nt|78~a! zEYS<#Wp-gt*-bY?cEbK-x549sp~{$HiFWj$puM@5HRZ*;cFse^mzi}5D6jiu{oE=a z!RyM9rx?!4i=YNvn__dOh%B(9(-_7dcnX1z4fxU*04U(9EPDj^=j$B}U#I!SNs4** zyA5J>EuYiL>RYPR)NnI~Fwdj)x4GC#oCf?re6J=F#8065VroM+ca3=@)BsWA9(HW= zCN1egM^X8G-|#O&@#K10cqs4ZyST(5h1sviTDBRfoksDWNLei2?aasG;XzItzTdI2 z>G9p#nap;l)u=_o=ed=>Lza-X4;l;Hlt74O#FTw57n%}|amn072(7bghj-$KPmtg@ z?0#I25n=0F)q`2t^8xs;C3+z5e)#FY5-Y+wnFiE7vwRGF8+>r8;z=X=(cN>OnvOrC4m5#XJnupxo z)qhendMV}MmEYWv!17cx?MDW9FA^%oDzMS;KX%&QZj}8|VKYTlJ*j;|s(bL<3Beqn zitO}I%(=m)R#ZhzU2i#zuK1JmLWJ>GicC59F!qy; z%e!olT%0z2Dik|{gWCgmanmKQ))rLgNZe7v&DszLV%cd6ky5C8en$(1)kK4`_4ZXk zfuh>(kOt^>oG^?ldikkPqzRJE%DX1SyOR&NUys0Nw;xp^uyC(MGBC~)RRG(Ztf1f_ z+?tmbVQ+FauW*B01*<1}pwsLzwixyhMiY=y>l9W0?E|_)v33c*VM2C?B z9~*jvt`=odDDansYK=Iy#3xazSsk~EFJ3q1DmY8P&wiPKgCGl9>@3x7GL%}X-Tb;h z%z+(y1NMsX%fU3?5+uO99uLE7*$6z+1aw%q)gPj4Sl^zW%80$e-jpyjIxg35I zE)g^%rVBx7@PY=Lr93CYldprW@3H5@Kb_G~(Mg)NdGL50x$f&rJ*bgX9};dD2k{mH$C{u5lsE z7VBt|&Z)u;R55}ufP;}P@V62_V78~|6RO=xjZVz0*s|zB{qQo@%kjhz{E^O7X^w?(V?kcoDy! zDIN%oXX|Ql%;X2o+HZSL!a8O9=XbZwY-8AXA6CvzL}>27on;*g1zWN;!amEF8Y==o zf9_TQA)`FDewKHnvc_~UaK(NH$&U%#4n_$>*L&;LA*yMv+}0rTn>b>hfWsL%#SahG z&%d16AfE7)`duo}hoMkjnOw%y$(YhmB2E(MD$PxTA81a}qB$YJ+!pL@S$e^b%CKK{ zvG4geN|?NwuZ5HC{s^nmlC~=&?n?2Eq1NwDyP)ify=or?Y)!K0+>Je>46{Xk*fK8; z8&bW;-+Y>)`y-Nm(5;kd#j6&beRX>ih5##*L|kw$pyiR4BT^r85p7$} z&RCx(hWT2&*yl&5jvZzA;U)nmV2^=dORfjIZmV;EUH*xwOl@sU^8Cal{;*^Jz%6>m z0~Tj@WM>E$&&roO7Z`c{p3P0bSV+u2P_pRla&COP5&o5l3MSIFlRUd%MrP`gt22!+ zCmIy*idmls{ou+gIVL_jV)$o}1H-xLyCkm8inW=X*M8Kd8$ zHE!?t!)d>wydE3#Ln5Q_zR!RQdv>`EJzj^k+FTgyfSb9d!Jx*{>HJnZ?ew6~587UX za{{Fk{zQo{II%=8qsVLGA1{|+`X?q~LRw}Mpq8xGxU8laQlDXq3^>M1JZiekyOP0O zBLb%d)N4Hpmz=4XTF<{5ZANr6emlmoY29J^@awHY-6@cj@KG{?9m>;NBQBnr_OKrl ztY{)Vw=K(O1yirod#3FWYED!Y z#R7YH+Td0jFo6zCtY4``@RVDo$HV!Wn$LO-HvJwaX(p*GEbzA33`VHSzM8|t*c73z z!JTyrieobBg-66O@uzMbrqJ`3MW#}k&dsz7;e3YWPHgf*Lb0*Hyt`j*5D!aQ@ z)28+Q<$*J4Y~iY3t_<>xX#^|h?#nKE5!^=Sv2vLwl+*-+w`XBI(ojE&U8zHYSyrV9!X_-z_~(T#WbeiUNN36mc|ObB$&!xDLZU+2_m0 zdJ|dJ&azJ)*MaO=NisjIc z6XahtOLawG=;X`N1%Vu4OezD%cS0wQ+d;_)7PW0fdK#2q&VL0f-f(vS|i| z$9(aeEV4|K?C%L2>X($01|yF8SBl%ih05g$yOY6OR<1CuZ;?0{GAK!v-jD1h#%m{rrsB3T&CIi~zg4v6Tdx!l_CQTozT?EHk z&6M%KACVIVVj3)n;{JXv&eLxBmM!Jbhwyx=(UvpB@#c!BeqH>Aw>)Y1VzYuI$>)ZR z{-lHSLzY0=;&=tS`Eyx$-fHfEOye&!rm(^C!+eXp)KcLv#Gx^7kZ~a zy4W-S+Oh|i*=()BpkmdeS-g;2PYaCnG=&wMLs;V|26iU4?!V;0?FJ$uYq-W|=XlJ+ zktdM+ttB5K!0H`qp#MS_P&Y>xfVv{S%EpMX0NZ)+$u{NP+T+oGz0U}U1H4$qgc=FR zJjJ>=HGrQ$1**!pXHA~|=+yR%i=n`uPNu;wT1+rtx>Rk!jf-<+>aGF)k{C2R#`Y$aILAMp0MR5fV zi4#=f4#)Vc8ztDO{|sJEOEh4_K?WfT!k^jE5dKk;#iBx>tNyS_S8SC{0EL3D;$V!X zn!sdjD5<0EbK~pVOD1R}&!_f_^|Uo7zSe9-8kNtKgcoFDisNJgAaJ=Gk{Apm3bqfa zLV_Rz1&;^uutZ!TV{B7J<}hH7I#`^|Y2xbBOW@u*W&{8L7E^C+T{zra-EnFv-9qmW zPw@T@>%)@4z03eZPJFRTA1CmW(S2S5Iq#2#`h!04+p}^N?9V<5d~Y|_e63%bpy>%U zA{slFd>N*!q^j!a2amKU7K^oXn`$kR5zBj0gqMG9J(gh(H2|@?_i5pJ(NiRHqb@>} zi_32`XW3`OV`C7CbJAB5Pd*8aqH>uEC7ce}2)f_2qLYIBKojk>2g$1FVaZpA=xbvR#In|!t5ga_#8$eGk58R0t%M7>8ajU@=mckL6FOqIq29xPJ zK2NZnJHJN{B)wyXxi0k_x|vL#o#!YM9%e5&qAhrwVVw{!73^FbSlnEjGkAq$;REhI zpWrmQoecbY=J7^APy7#C-`nlYoD1J_h>_lK{?!GhpST`(V&`E8znJu>zkyR8hI$sn zyPoj_MOxC>m{bpV=#l#Jxbi0i*( zqHCOvVr;SiBCzL(8))PX*BRW$Wo4!eUZ+u`DNlc1;z9AGPm zZ05r`;yz)4_VH$N3+tjl9^!1Oez2cY9O4ctQMe&l7ky@u;D^8(mi&SeZ~4n9-&S9n zmYo!!PXG=}pb0SQL-TtL_f;)t+tp?OsCBITXkT}8bHnNA=!oLq)Db6Sv?)kodaXU! z*LQSzU~+HvNY#r~!{JEi7c_`n-~vNr@KorYFvm_)m}KOFIh_Kp5MfeJ;T?Dmbb>#@ zT%@ni-88`ObN}>O-Kn3|=N@9hR24~4y`0o;j&P>_h3qkNd_7cp65ol5^29_wtN>MX z9JDW{UeWsnLc=jP{P&hA_WTK!h85&TDj6M`Mrt=I0OyFX{=gQDYnC(2Xj zd%f^5W`Q~LuWqxR?y^Ad&(FGc6dY$!5+XwK(Vn$rO-sKt!Ti?$_*T2K!v6w=23s$g zPg;@|sB#JJ$)xkh>qrV_!fev+#OXuOXA`GtVZOy>BV%Dlbi_0kSgRR?EYB*1EOqHM zs+7kfS|F4lQxmd&3I(QO>s*POjh_^`UPy5%#$Q|DYXjVa*~Xd^XjR`2;ySIIP+mSu zzkc&|G(~Cx5(uWU(3~%Ino9XZT#0w2BA+v8{@j0xJ}9%sw&7+iVxBSrQQk=`>cDtE z>vb2sHW6M*a2=;nx6s= zX~u>f?s6sPQqU`<32RZQ7p=XH`a{46i=0R^|G3bwWskT>Ec6yQ6OFYgX~Dts+7jK( zb7?DTtoR=O1~Ogt|rp7pr+CglEv%yhJ-V2PL}e z>#sY|Q$tfjU`taHD9QdMw|*zPRw$J{OG|41NaZW3kOTzh;=Eh7`!}Rr0(*F_)m*+# z!{(9v23|W(@B$SYOZ1F2(fWLSt-*gYPqx59kU&#Z`@oCe|D)=wqT*WCb%P|h6I>dH zUUXUB8{a|t8H(yhrf}*-T#_3D^#Zvr2gw5Yy_Awe8P*AhGO|(`{NY7LgDHa z$*lQn$_Uz@d`Rs8Vw%jPXBr!JrGiF!F0|8A;>do(<16SOB}^bwu0@^DK(FuR&wh6B^$~v5GD*WD z-J<)_7r|}?#`gI*=^cQ5HTPgoiFTfVYDc4sM$E6+1OYk4Pu;0d&!z2l0AFuMv;%J- znmmNO1&b4fAUwXRJi4h;UV_Hx-a;@>Xf5%?7D5V(`PRq;%>@d^V*c%Mum}&lCgku_ z$m0vIXjp^!hLdW_1_Q9qlC6IQS&*-jB# zVky&Q_^)s`Bkdq3o~OO`f%lnmh;2FrWD@mYT0X&N)K;{Qf}e5IS+~AyGanvEyYv)X z{%R*E8-ovOCKc37{{YXxYKC?-$2ukoH-1dM)nY^MBOS{942Y&&2e}1<=fB}4DOgw) z%gqvbf7EVvJL4g4hJ?MppzxR*Zyr3p2;d;89oJfr2rPe4dQ44saI?{CWX(acUR6bq z(!4`t;STYXKh{#gjC&)T66E_2FH0ib#l2KWE-8KXg2>7ybIFr7FLt8es-^gOzNQvp*@;^%@Q`AwCHIe6B3xl@*( znU_YsLT+Fo(BPj6ST5NQ?N}fB1nWU&Hh}c_Or!}nuFhiZ$TYkF1}S`A8;Wp2jF8QK zzKFz-#DP$O_Wu_buBq}tJ+{Y{9mj`7IMvKqSt;?)-t|W>e?BoG%FQBn)sCM$baH8x){6~7 zU{vYpk2I@{`dx+5fAn#gJR^KyccB;SUZE-@?vJ`-+f@ity~*En{@b;BtlxHJkc$^( z>2Ru#Ttr1g^JbU_7RRzxez|~jgF_1z|2`G8fR2WRlP0SmcNf*V;PM{a)Dz7I8lQp& z{7*_}0K3x$TE~EkISfb;Tc^r>@8%(6Mj&C-(MXn_xZzhjHb**Ycbxy;WBv=HV>9tp zl?HIs-?|A)CBAUpikY0s4iyZ_$gq!Vl;!QVCzc=!e?DgOp%l^gb9Q1SJ0TBdBI5oG zuqd7Oex$fU^<-$WD$PF9!M}gr_fo<3b3{-~sQ;8-T2JEeB`Q~$@_Qoj{Z^tq@Y7+# zrwKE?uN!}%g{mj7zTN(EVltJvB`9I{kaMN;Q^oMM)#bY|US8XHHE!~5%kkf~Cs!)~hD{@>Dmv@mY`4}c|4x6yK zs_^#`kwF)C%LZIctjdarMLP!h#EJNn&?Ejr0uV6|(=M4*RLZ=tmBXE#0nt1lw9~&h zIQaMA!1D6yn&bZVp5WUftSMbY23Q~T31Tla0>uq??uq)1*ter`i++ZtMEPqu0DzzD zFb?o$SWC>m22MMoJ~y)fR?n4Usrax6bGzTia7B%pj9y6e=R!4nJON;?%)0eYI1_tj zqL{uab4738$A0^!grUJLIc8oZa0x=m}AeesB} z5~sp~@8eu3NbBOtzrdYFPO9|QZ**D;l) za3_me`@AT<^tz>~MJB)S=lqA*RThP>jxDC1vFZc#7&fY~*kU4~A2(jUl*s+6!i+B~ zE2C~cY-ngmUgBSpOHV(hZhj-ZNo{MBh}XCK&mtvRA!`5O?}%`KHA7dTR@+Vax;j6i zeLsq?a}h+ILK6Ck@%!DeScq@}?rB;9BlFtT;N53MFEt)}iuB9~_z1d@BouQ%3sF%; zk9O=mHX3F@7Y60nc#@B(!R>M{R8j_KWkm&R*~@qRJ0kAMyBI>GEq9i|NGllPz=G z7RAbp_o*n}RTrZb748@*fGc*Wv>+0r9-jXHD7ilDE*%J!%BbompRVuW?y{f^*?yXi z`NsMuK3~4gLdokf%o8-zX(}`6MDLL?R1pf9i`m>H>IM zeXIx~txBr6k0dZ{{2iN6!y@wNGnVurOwFh-mE>rWGnenR!X&q7;|~XAyCAHKU%3R{ zbP!OrtV1jbYd6#4J+qnF<6)C5lzeJgR45xVk1I4ovK;{Kte`GFyjRg z83RPGv;UmbQ&fnwB0xO{efa4B(cGrC@Nm?TVZ|=3HZ9!1i<@3=VJW32nj?Y7%G#Mu zWGpFA+%O71=5%j0%xyEmgeTZ(q)vj(vq&d z3w^yf#n)Aa>9xn?e^`dR3p8b)_>ZrQ@!Q|_F42A+VZ>|-m%fDtl}aa_i*3IRQJK9b z{tjcp{D0aEdyRYTan+XN0P}U7eXXnT)Fbwz3qGjM5ilj9;yb);MPk16oMYK4xQ@~d zGqeSZw!<(|HWv!h|6QYrxC`$C@YAQ?-j{If{qL}RWZ&S2gyDOV=)^j;u0n)-GqtY_ zmlb(=O25g7lGkQN9jFFsIV4nMf7ObPyT6%?B$OrjrpdrIX5*QWQV*!IvDwE=1U~>b zE|}Hg{^C8t?wX;BJmX2VZaLy_Q*Kk9Vgd!Mh-F2e~Oez;+aOt z9@Q<8#Y1D}4C6@7vD@2ZjiK?-Sv-C8Dzit|aH5x(&F+XQnBLj$MrKG2XmmG_yVs5r ztF2NMM)K5*P4m>LGIK%os>EKGmHOsuj|TLa_YhynA1V9HkRWe1Alnh(wybsHr5>25 zu#)8vE9d0m>aC=v%n!+b_@r1azv%c9fW>x;{w_Lb*WoE^(LGtUq4U^ZMyHiD=64|B zNQ9V3VZp1)n~S-eJpuQJ^s1-_!)nzU$??|1yRtxpaUl<|2yL8Dx?6)t-gR>?sgcl& zuQqGMpDx3M1D+C)#F1X==brSvWj6oa8!Sh}@d1xIT7h}tiWQ74%4%iB9o(~J_@ns( z!new$N-G9zWC?vM&S4lu3FPy$A`)DSEFN6E7gk-oj;=a$m`mAa1-CMEIzdYJ9x@9~ zB2^4gXV+q(4>y{j&yLUm3J0R>``S5`@tuVxA+(5hCqd-LYS)^cU;6AKSaJZ?{FD~t zjmtZ|W;i;t_VzOYqj`z|9%^Nz1WKEIhFqbG@uDtNlt!DO*LGDpp?$lPwy&;s)f!J1 zPbmUJLILm3w1muNjQCW3qYixW6S9E{mnuVgbBvd!1!2G$Cp1eQBYFBxR(2x~cXz`l zsV}my{;*5ud8G;!tM84L$vA-J*6n@{)?pzS6Ojut!RpOQS9~QF9UrPmObXu{|Cj_A zQ7J9RB!qi;5*Qm#i{BA^18HIOsHbQ|bQ)sxaO~-Y$)BqSDIf?Vr)YB(Sg&!?(B|0D z78AHVb*av=`ldpkaW|-Oht`@oeW{!;DlzNHN-w$YHPD2AYl+;0@Xo+ebwMp`VWX&L z-d<4MF`8*f`Wwp}0G34GpX0xUNr--&%MRs+vHaZ;g==gFzg?mwZH8wHsKewY^?Ggx zRlXm4wu$GABA!9S)b!^@lu?L!W)Fobc;A}VCArLz!svwS+bLN_vem=cqNyg^{0*?c zP3XI9`tln|P&JSRrcU)en)8m!G<$!W=WluBBvXp$>hH*IgQ*JMDjMb=6)-G>J!!tD zDpeJcsnwFe048)Wb2YaecS7saZVE~d&LsnfF;!_2+eVaDDP#S5tl|3u|_xn>CXR@;8ZQPCAO!k+{YfR;aK& zGWqg=TVMhDDEvSpa7nwmTIUhn`vld*NMsYE*TQ?Qw;oN~A;$XwA8wU#tg-uy$i-^q zP%EWv)}pJc^2{BKGlWF+OHaa~Ab6shP`Hd(oG0#23mDC=_zITp2WOLOvquaqEoETn zNxE*?Ztq0V^|=LFNQZsI%Xp#%;` zJsrpG^svfp%`lN_DUcta=-y;OW*?b(I#EJq&!|rfMO~7e+gJZb_l+1-fs}Wi9%+%} z!go$foNRrROpJHyy4`@+N>I`t!|BWenH)S=aw4GnoDU;D5~8Cip1g-8;0&@Ejo2NuP+F`J1GrwGFEOj^H^90l*Ef$ zNL*O3?4#L8cwX)*eI@_?*>MV$DJbkSBoHa6!jZ&NOT|}I;>eE~+zLQEGD7}~d5p4) zK@>HFM5j4c5agkTR+ww?_a+V8ulv8makPD0Dixa!*oKrZZ^35zBU7H{xGlGTY)QD8{*%Uz;lk+Tu_8)n3$YW$WzPSFj5wo~4QewY^xnb-m zMEhvo_QG@*@8;x_a=Ooc6Zl4=XH|?R$ol{vI{vgK_%J%iK*1EaF75yJNM54e@KQ^J z5c3=ydEQlfb^~=^>>0J*&})m`j+eBCk3GyyR1m&{GV=K6M#S}#vCVI^R-}v!H|%MQ zRraUvjb)G*q|H&6cG&s???NhTR~aQs9s?mc?6k&&2r1>tL*kJ76ZYbe;Mm7^ekQ&*9l(l zOOht0T`yk}?5}*BD>K#P_pUn={_c1Lu)K>tB9B_|TW9&37)usL=YB#F@)ahOc0R}q z=yYSMelzJg779}c)%50d@4#A=tgc+KFi7kk;BtQ#sY?nEAS-%vBX%vLV-hO&mz%YnR8HejXynxkCWtF9dtne*#3 zv93Q)f8je~jDotSzt7TrOif?THqzGe1q>qI86ok!Zum$Va`u6mJ6*PL)4{v)OxUfa z-foEoyeJC>v$VY7I=cyvJbalF;bbw66c65(iH~dxhnE8BGS^9g4FQ;6}(*o zV?efn{H2e3q)_Kho@-;(JP?+(s(HL{D0dUc;A9Z!<2J$Z;aY-e&q4}+VHpjmF-QgcAk$xtrjXOzdU{@`vkE;MM$&0>ZIJGZmx3{Mcp zy=-MQ3zvM9a|=-)ZGCc~|>=RL8|*d1Nt%!&1uC zbHvl^VgO(6uBWbVnP$E=e4ld_;p*e@eS^aQsd6#(@ZmVyv*Bta!X^QmeCmUH&DML; zI2MwJS-aRI4N#)CYnU?=jMiFlBJsb$&DZQJgcrV>DPw}y*{oK#7|8ow|B3&dOjcna zLo0XBZX6OYv=Fy<2M1-r*yD5#@YRwa9rBW0xSb4=t;pP;u-xcCvb+4;1I zN}jLpIuL%<@KsvbIC77r;-rdk+(m=6IfG}K>!2dJ6WQ>kqGI~pvCa0jSgn#1)2AAK zE+$S2m5p{X!G>re?iXQ!m;NpLtlDnI>FwpI)96XeY<d|JK>_E{>|}fI7T%k;Bv5>mfJlS&h@RBQuolCoV1r&$?^E^*#CK{|PK} z5kf1aKl*r6SA0!Bws6w$Jr`H8d(8loPKrp=VYt-8yT6D#@~~IYwVb&i*g21nWzQCm z^DYnYX=RaQ%>+3!NG;NEX5JfmkYwo$mI;Kyd*?EoJBpS$Xls^Q#>w0uBx=Di^X~{8 zEEz>vQ_M6(+&Tt5|5EI&iEz_ac@*9_Vz-D4b6_W$|H@$4g&QB!=ANZTo?)zBVmWbW zTvYIQtE%3$67RFqon(<=JYQXMgX(tF&-ITt;M5zwZ{yztnF|OOnx;(iTIjt&RRyrN z<`090m;md^78Gi|ty%LJQi!ixm>}G8v}3OM#5M8)ssFSC6YkENnb1pGls+m#5FfSvw4#0XG@( zqwpriOX;xZX-tG$$!+Rh?j?O}1#jju-+z(Qxzm%{JR2IVN?mFwq@=a?h% zXs=0t-;lYSc4ag@a6@_ZU5q?wRiQ83C{i?@eWADVk}a4ZisI_i)a;80j!LH+J&!!m z2{gJamNYhE&-pVW@S3aKX;s0WpKBf~qD{l{5KD6tv=C*kqw6`tS9ctAnarY#P~H|I zGiL!8K^R7L!StsUuhO^CUkxk7jSkC^Sxo zxT?NLXOH0c3tXDu4Y_(OIbLp#5%9h1gFEzf*Mc_=Om9jSBJ*UL0yO*G_}6|K$OXI! zVxq!Sjpttepm(`^LtC49-L7xpl@;mX)9m&gKAc0T4o`9E+$FWCaqQ0JuY|`d?*Te551lE8y$XpD83yy$P3w^C8^=${f3!PpUO0I< zQcXIG$rPaw^O21YXK^n188Fu~HO-N3w198ik)~=wr%vP9wQ-i8o~54;kln-Eqv?Nd ztrz7Di1AC&&(%=-uASRWD)lY|6k6evBBG0aR|*aJ!NDY(w*NL-wRmi}&)@^eUvcG9 zO3ghGr*QH&aKaxde0!PD5q51H%vNo? zp}{ct5y+UrvK*^Au+&`%A7pD8@7{xno_GHee_)hUdR4i|44lX5nwN>}5)qtmJCDe7oA$n&go6P?K2q$hMHX$jE$6o-{lY(bDj~#}cAY;Wl z+g}))K8zRbzTLaaB!+vx_n{nksROdHki(Z{!2_X`8m$l1L}t455N*>}UzMw$cdgdb zSC3wdSuMB`!S(BvYyNZ@<(c}3<%%z}-zP9Em*o@LJ5WceWThquYF|EcY_HzEkHJMP zO+0lf?cI2HTi2_e(2w*^?a^|FW;F0RH?w&!z(S1t^C?D!M~4hg)hMstO_4A{({W0D zS{rVRxc+J3sL#WZ*I!?>fxt2?rBB;So~sX~A_=jh8L2RPJKkS&FcnU&I0zyT!}|?X zzgf&y1=alMeVt^?i%uFS1I@7DS}F_|Z1j&QLk@HdKG*?;a_nBzgJUOlWF(jM@rMiU zfYG^m>Bl)Lil#$oerc*U$<fG!O918fAkgArL~Qauc|et^)p!8G672ewr2Bq-OP2z%$OcR;+$1=)sko%o@z)u3 zOZMZs5T6{_j*#lBpw3@I5X6>#E=CvJ=gMASkbZvlTK;^=?J8vo-vJ>#l+0I>3ZE`F@1u9 zylah`Bxeg~bQ}n)_8||(yBu!r#H}NabECXh3XyzSmP;4e8){rDWe&E{Wn6i2i1{`Ch2g`pQOw>|nmoVxY6)%vOAwQQZSy8GfN(`@&UXI2OsCJZ;+u{A1XN&1&khyn{8VZK@{-U?|T zCTGYZzr9e2Vl>ub9V+SPqOP=gm?9b-o@2d_cuM`!3_f31*KiziY-dofwOz;*k~NI~ zfCs2B`lMhvh}*iRQLL;R{PxH4@SxA;vMWh@nU@Kq;h1_(ugxuz)91@Eu~JfR-^>7M zPcz!SzG>?jYDlozmlu+-HOs*h4s_=vuBb5W^efVoZmn|q z5{V@P3ItGzBffDjQnx;aNRNPEsS=5<>RuV0>G4Afy`5YV2sd@<`C^bjT5a@%ODh9= z^Ffksqhj{zejo_9@K-?MiQo5||MOoiAVkmPyXUQ}>pqZPfk(>!nj5epg~Nf-r+m#f zk#N{7+TMWvxfuqzv4`Kf=`zi8#&T{{g0HpytnQ(Nl~7CP$*8>Sh~Bu`Jw2~(o2*ss z645At8e<$8W?XfaR{D(|f_*73O8r8FSqR%d4qz^!H_XpKtTU+WRhvcl z7;M9LPvu-#g~7#&6NISKzs>ZanIrpr4YP2nAmb@i4AjXhPj?L9rgK20vwlyy{EN-} zrz*VduGT(|8>*xXVDGfifMp99q}g9YuE)6SnSJkDaEQos?ollhy-kgH{|I{*r6Jy0 zK9tpd!>PD_a{M{^{e}cSA=dd%S4`RQ?H%jT(vL6338W!H@8G$PL?Cd!$-~sp-1t=v z?!<{Pd2HZvG_p2%?fw>VMmxGM8vnXf0lOIJfAQeiUxgJJ&Q80&X>w=*Bdukk7Oxo}PiCI?H{Io){d=X0 zc-k%Tu?hd{Eg=2xu&xSSV9GGRchFXDi?=J-LxXQRn@o#pY`{3LPo}5)pUBfOOrMlR zlamVSVmml^BX!7Hw|HRpdv>_u$1uU;`i?&pDDLwW1edWNgT1fmmP+tgKa5I?o2MfH zB)kBzi?efL z7HeMh(f4#u911jeAjWhZ4%u+ic!Znw3~c(n)6Kv&6uT2)!EEH|RdvFpTJt{d)Qbwc9g*H!Qe zPD$8dUuBQ3A4WQNQW3Xd68y0-v220Ze&-dE1I-uf_#TkMLFfi;@XjDmo>?I>6Q=V2GP`M+Z(!@? zuJrtERh~XoMRe!7-$0T|?|LXrMBI$mmAD%IqY_)!z>lZ*#XYjvSGNCM2WBYb=-TQF zY9C!pUyPssl96yLR<<>lawTB;o8jqBI2x1eWo|1p?oT)<4CR?c(svcw*&rIC@;!Y# zu;H488J6zzVEvH%&fPKfqDyI8aa$;rB4_7QaloNL>gk|sX8<1;;pGuiUb0G$Yi3`l z<*H5Fsu`L7dgAMIsfU9q2$vK%M-%fn`*7D*jcOlLPDV}RTw`%K|Q9^p(D z>QcvkmT1sOBFgLzf`vS*fV5!`O+~sou>MekS<9R= zMncdy&+IK5oj__;sj0TKua3Q-omF3xQGoN71S6KTfUsqmmdHav&h=x|`njSN{YF;- zNeQ|6UT&9{y@#T1%gCmm=hBZBh5~WVnW#ynk)zF1KaCxKyfS8Y_d&SQqV`12pK?nQ zXAJS|geU|VsKne1jQ`-E*T=nO3|o<|r!AxYq&{C?4;vw^RCJKOzvD!T#lc*Yijug9 zqJH44;amB$`OhMc=tMO976WvYmxH&j?y~Hw;Um;G8o4v$#WkRd{w7edze95m2=Bj! zW8f2eK*YE;m#L0PgTPWq4i4ULZ3!E7wgB=pzJXc%lYj^j@zpN0D0~47kf~UAb;|3r z992-J@UCXz^HNVc#G7G8pJ_(y4fOo;FbgMVzb;@aP64cC!uQwYj6o$jOXSlTLy#td z>weUtoQHs*^md>Xh1L&czV_*?@Uc|)iw9QWT4JsIYt5#=KyN^75!Hh8h++|!J8uCy zL0b@r*`UpnCS<#iO`o^!0r25-FIgWeqH+SHaaTQ`F(4Kns@2uMoOHnusrE1Gms_uY zMD$Ji6TtTuW~*IH2#++?P#vhr$WXsqT+!_N@EV1HD4=`q=uOS~(S7Uu!O|S`>j`q4v;bbpfW;=gtWdo?aYo;0J5YAb5KQ6tuV+d}X z(_JaZ_4Bg3^(?&=Y+K7t*Y1UEcxEIzBr_Ob0tcw!v+=lUFO&xMqkhL&kQ!q;QN=W= zPl+YJON5ckd$X1V$RE|k&kx2SEnbTYf%DNEM^Cd_AQ}BZ&4G?9swMN8gp}B~cK(J1 z|3LcooS-*sbid2?6fEgtAW4~0veHUQnPR4FbYE}I5u>XCoZ?vwRFds@&Ke@U!e$EC z7!>LSAm^T`{+Bk2AgXlZH^RGrdBdu}-rOB{|L&cF)T0JaK9O)HPn(yzEV5HCIYP*hizvoSBvrR+CChn#uQ-*O z2)mx#x8Tv2Xt(zf?8fq=1HO#?)ykhQkf<Cr3o1C2J6+Wf7KHO{! z&;GiYda2ZWOnkOGF+u&wH3}Ho&35~AK)o7=DQZ6 zC9h04y`cq{q!%)k+;=Ez6%Gcc$o>q6NfuK?1fj*o_Z1VMI|hzb8H6Ix*jvD2nSnZx z)f6hljUEM&=xlGKF<;E7wpg|JWJ6UILYwOH5Yv$8n5=NTcovD=f`shwt+izn^%2bY zo+QWra%G;OkXs5VT2b7cig(r;RR=L{QIn}{q{pY`)9a|Crw=9-`t=mj353R6m+tVW z&|5tv=AtmjH)2k657=zuQIReRkL8MbC|>B0S=!LCth7qYN+Nc$+V+-fc}@7#7&O~P zZojhvNmCf#&_Gg*a(p&kblM#ksvz$sMzM*-bH|lB6He$hT6&Y$T3GXVXUogyf|Zy^ zv;JBK#jFo3Q}7>77xehN;rJOYyWpF&{Ye4^r0U3(fWQv#H-EZsQ?`ju(lAIDV|-Eka;$qN-c0C|v~@Y(m9; zDvP)AuSDkBxG)HarK6IPW`0`D$O6DglDF=S!*AUhVZl_>7wHH%e5QgAcBuR)!;v~ecx1|hA)Uv7^V8~Pe;r? zux$!Xyr*LhUO<o4;^ybKvj9aUDq#8_W~e9NAOSMlb42bidS{!1*O)<70l?-3#x5 zDfPH3iTY>SZxU?K`JDq_sYVx!x|#IG59CZ?%qOF$i$b}1L@%nrUum=m3#fC9ivg*H zZ>lBEPx*&KelUr>*4~it!x}`9eu0&xX8rWZlM+|s#!eI)cf*If=Ke@3aZp*{| zHN@a!@^L)R_mSE;xUlq?6_x~6hoK7anhl#T+M*$gUes&om&qXUpL`Kp41|#Uz7jLn zeR?)Z3#wlH3=#$8{59)xj4d$|JPEs`h<-`x%lLlz93PEbYSsU80oglfn_Q{jeNHp{ zyVl>{OFz}+MLKb8-ZPI}2CH-5$6N>N(KONj14saSYm~RtW81jF#}DFXVX6HlpNCv& z?f#Eq_Gh%iHMrU0fAxU+Xi2OoTDF&hX{5UwLh6{5&O=H|OyT9Y(ZAOJg?;C*UJc4_ zPA5sU|45hcvQLFTjv<UH;gxK$V)8i-Ww4%Hu0LxN6 zR{gait`(?hw$!5boNZLd-SO_pLV}mZ@Uv%T_Y4S#a(UqB?D>g%J3*iY>kpesDy2b; zWkD$rsvk|d5I|SptUTf4*lf5|u&NSmcBi~(|7$`kw*hlsEL78P#0K86iG6H5_&;Tx z))?(pP_g4l_ZiU+2f8bWv0)zbQ1PvO0lz416 z#%xqElYT`93NNPqr8A&Io3iiii}XjTNW(qk^b8u#Wsba_deU#1*sl;OdDj%$d(+LO}$}kcfWvIL78%j(|sOi4I+zObF@s z{yZ}z`Jio11B}Eg{EviSVP0C8QiVu#ock!tsg6~hJYnS<+h<_3iAgUAMkqiadu(-n zo&oNpVsnuZo^OgX8;ohUc5|nAnloPFw9Ey1uI^ADV}yQWTlNWUY;wBO6=Y?!H=JgF zY{d(P^+9I&v=xLmp5DIJ#OrShR!>#i`;3Z4T2?+r(I*NaLhU%k0 z^wz^kcJ2L%jaI`c#kXXY=~;=)^2#tYEU5X~f99VH7ktuWsXg_ib~Z~L6)@ojC)0*g zmX61sX1pjq%vTz3d(uO;|F0y958W6uoqXGQZLB=SD9uytQym};d0C|Ie@MOwWjdo&?2g*DhE<%bX2Z$i_3(NnMOvF1D9>3O7FitY4^2K~+~7yyMp_Fd zI7oigrH}>tvPX`ZHHy9GQSbUnRg~z6+bJSO*`l?|UI;X_shnt~Y#^(TI<>?1g=?^$ z8;x8E)A+UVTpGx+SIJ_~Nyw9f4*usSssf?E%&IwW#lmi9M2dK{C#=_1G+7GuXKx1y9Ov-O zGCv#7T6g`&c!tWP^5|8r)@{_F-zl{HXbPn#{W;fV{~!T4TwOC8EtW-4X%tEK$iN3zdG7yMV& zf)2^O_sh)B4K$3*6g?yJl4;_PLK>wp3x|IWqS+3Dw5v~Uucqotev~bRXQPTF*sXrJ zNfLRs6v&DcX)BP)idHY($lB_%SI^!=w&E2GCq&X{FMV9$m;3gc-gO=hSP|BuR6tGA zO*5=EprQS5SB)O|8{ba=cArSJ7OJmV_|0G>6EnZj$ax}n%$QCgc48JXs<&t%neU(0 zW>-2kBg<1A{Z#b+Y+X67;tGzuG?FhJPu6PqM>HLED!G753;-Pq=ZWeQ1ZaP7GW8~6 zz5mdWl7tbvV4FNgx8c z)g7@9zKrt(gf1EDaemLZWkAgI);{8wL9n$`=(4p=({}BXb8^{AX#OtmjC5=|;gj5? z5EGpE=zSPwj8gr3NX22l;o$uhvuEya_n-w1wOj~ilw)iP+Q~FOhd?q;36EsCQ?Z1` zu=bD5%WWIzkfytKG0kY!D-rX`y|4?@r;#d^8Vhvb4w>|H6Aho4BQB96I4=n_O-;5c zR7yeyjoc9dtK=(@d|o){SDt-PM(S9g!*#yM;=i;o@Ur?yp!(o&CPep^z0F-br%LAV zl$Y`v$-J$Zq_w)j4t-C3cj@Hoy5;Q(PaL^=%I@>#5;#@Xr2_};(4Dui(l^0N z@jIpIZ^R%hcogn-z11u{D9k9-N*w)LI!{R3U*x)1$Y1qMEpuM|V4fMCK+mIxxqvu` zF_cfYbWUPeS4Sx9`8F!0`Id2>A~T_kSGEHxeX16mtkN&opZDBDML9Y4>!98`zR^qG zPe5H{wo4y=!3|*a|+)LhTE)z5Diz^TX^G&Hquid`^rumtsozI0m zvDw(Tp`VG$YJRo58T0-`5sRy9{TM#johQcp`2-IBRu7rr`XTUn*%>?~SuMvWfd^Bg z8#KDK_3U|epc5J@XYjI^VZGG=v-BhCDZt8KGg7x-(rSo~ublQG-=z!_6J<%qD1kLx z(}-o@QUVJK*fQY$5iPFAi9F}cUq_iKI>Z-1ZBVe%e@Liq(k`T$?qpc3Qsrbnmn9Eu z861*kDBV`eTdaA(uDKLy!_LD&E&UYi5;g38AO_SG96R%=c;v(=#jQin=2CoD3%P+j ziV{5&j@-}9AhhqTZa-zm^+X96sVwf&$yeM`tAg5Q(_>Jdr9d!p<8^dKdtXCe4$S2T z`-IaZZ;1IwQK=3!nBZl?YCjL`X%p?PRHDVk5uqoXw3pM~{-oa;*MqlCaV?XQ2P&E~fj+ zn}NsfnmVn`DJY*|UY_tKb+n?TrMDcbuR6F8)=Z-q_UUvNW1r)m#_Huff>!tm)FxD& zH%kdib~9}Ku=hvaQLHMo zJb^n1nk!_^cbHJJwbwr$aKTPupRepXsvk2pVeQa%op|f#VRf{RXZs@m+Iv&B^qt_yd2nLkFt&7oT{=YkJeXC)m!XL{$F5z0(?wms-<+5RD zME0i9UMgme=RzW7HU#t!SQo%)Afv=Ge%Y}tn2^Wq@`rlgJz%HqE@#*(NlbEM^_r9F z$}jz1Nw7s>V4+>k7VG*%{n&HCB;Duh0@NrPRHm^~kM}mlhgH;4a8fd)$&0L{Gj&RA z?G#aP;u3S4WlVTnleG1Gddeu03(c-rUH+m#Umz&*kk~5zWdQc9S)LiU$EN+E`0wrA z$4Wtps1SCMHE>8I0w<`fRY?P9jGDABY zeT?BtClb+7Xpz!MTOm)B9-ile9Vcm^PX0EzpY)eiOK_NyYEN5Ax8Wa?i+k5|_&MfmH~m#~>pV{=>K9VDsK(rd7DoR`V9j!1Y3L2wt_+8~n zbC3aSj_MC$%y=W*FaR2!weCjCrPqi)lU#ac74v+CUUsMR$E+w#wGX~VN+dJU>in9M zOM%gs2#r5eh`!u^q9^(1*z}1AN|5UEX&GS6?O@p@y zP0gzAAAwe9di<1n_D-rldcJyWmz6$fZ124YJn1FrvDSfl zfw{neh}V#NtKwvCoc<4RZu0Ilo34QXIwyX2@!fg7|;dm1W_gK>`E`4nYEiySqzpg1a{o+%32h+yex6 zcXtTx?(S~EwcsAQd*;r4?-x+iIkom)pK63qx}O#d{*ufT31-l5aZVbmQLI~&?-8xY zGvQG-5w`2kqVf2#88=F!=o7`0)?)quQFz`$1E*fGYzHHHaEXLK4!3ZumnJ0&$V{Uj zHj3~D`u0ZRZJo7dsCup2s6`h8u?XFP%nW-}w|vhVR?R#zc&#}I6gOf773$9ehe9s| zaBGPWw}DWt1M|ZX!(VbRmpg>WiL-~M9SUcZSa7fdLNJ#1)!||KF((!1e=4p%9vCVFSMpq#g<>My?){-FNuUO@v7bOBG0_H$o%;SRD&;x* z)^^(4M+tz!M^ zjq%P^A|5(vx|IrP&^vUf>P9YuhbV_JJ)lz)GEZ9m2&XgGz4tEshWu}F63ClazA{%9 zF!+U+!Ppm6_NMTu(p=BJi(PIh`)eoLe^@s&ZvfZFm<5NVJZ>CeTHOIH_cL5;?(0i- zfQ&TrPhq`m>HPeUf4zz)-e5;W@5HgQg*nfXIN-3_}hJ`1p1d1(hjzZCNF^) zXHRD;XIE#v6@Q+W+)r>vA6$`z46jFQzL?36!we+!-McjBe<*>l+&!Pmhkvhep}ala zf>T#vP>=u-gYY^}b6c}#G2EPY&>!W-Xcy+9Wk{n|gfhJJeX}`mLajerf{0)M3neTNXoWyCnbF~!xBC^M#lhSLXsVqs? z>uytt<&vNq`R`Nd>kodZ!IGzxUZa*$0HqvwJEL^{tgo$FC&;-4i64LrPh7VK{slaJfDSYhvKL_*u}f_gkNO1yc3unxl045=(j6o?)HuK6jLp ze>-9?`{tT|QkQ7JXzv;=&gp%5{VnWA=Dked_Gb;?95&-y*6D!ypdprzvnds zA$V4srdT=5NJVR{5tMtpfY8#-bx}VSEl=4|jgFD1742&Yu*@Cp3Z)MEsxbE?_O;Cr z%TN#SiENxm5Byo-8Ai=5_`LBuWU)4(l&ir%qt#b z({M}|WK58>Ub~iqdO+}qB~QI=2=bojYk~RVXzrx?-t8V#;yp8^`01B!bO*e|g-l{g z_kL#!)7}IxbF8ckm5lV^eK+FOn8UStc#6%F0Xb}&oGJ%)`%i{B=6YT)(*(F|4t#t( zXKJ!Fb+!Qz^jgBWW9Gd`Tfbf-cYf@V?_t#sZu_NKZ)wz)$=xv6Rof};x*e2bfTZD| zWwje`YgZes(!}f4d@tHvyY>vuS_4O+~LO;Q+uX zsH}ID`NyYh7QRE^`u&oNf67 zT6H_6R!Rh*Xy78~A?JlSF9$AgR1L3R0dc}$4!8s$ocxT4ql?9$lD45YO+%`x&QHVo zir;@IlZj4&uz8gQ*sK_?+mF3v$+EyQA8T=XyK~nkTn@WK8Q$&zb;cmRXDAH>zed3_ z1c{ta&I#}E3FyDqd#Snxp8>hn*b^IwyrxRdW;!8)pA1r*w1uUT6SB&JSss+pv2esg z?}NTFhC-%DD9{o_%mym8X)|(}pXbp#!I4fL6IPI0npn%%ecFVQ5p85Fgeou7xbGL5 z>*>ZaTy~kwzyp7NGC(MAU(FXB7=_bZt7S-#!Mp{9+pC*mm9sg3t(LK^C(2{nV|IFhQ$4t}jAR zwzsC{U56xE6^?w##S9vqvHQ_-+A&ii51OQ)JOQXNK%?L&YHc?Jp-~Vfz=PLK5v`* zvBW_y{w?fb`s%aAMg1+#`gtxznMj?L>rN*bmjKx5=l+Y|2OWtV1^l71c{9^7tAjI!O2y>gKxpM*cS%G>zdRO6P5sO$3e*||fY&xN6&E+Kk&qhso# z2^A7nKYn};g&*&b_3BPUV+Dc5JjUspC1om!YY2CfwSzQE6c96nNQ9pcJd4{$YDxVwc8w zQrN~FA39Ozlyi@kT0;{wl}=V++tkMvIvwOsVCY?=N!IjH3!%Cq`J&WH8Q0tT8XJ52 zUH7qfWOm0Mzucr&U$sKRD;Bm0e#kiRoZ*)*3uaASO4x@sn3H-wZ-2M}&Fa3&@5k3A zu%Wv?x2`rK{e@7964c&n;~f+U=KW>;3MMc6wUh$#PYgoXe!m!v3}fhDZCJK*RJ(b~ z6(t32_#6}2+Gqsxat&iR)#@xse|Ld*dXF!%^Ke&!Egz25RlFh!?>g-v)C@s0<860i zE9_2S?UwF<{rX9Euw^Z8wkCa>d;MfSe2{M@YiKZDlWqV}+qS-ljXDDT;JEWf%-Su* zCnD!oNFp0)BVSd%%#tw?Nk<^L^7Q$KtsN@zrIVS;V!8G~{~h7=hRNZwKkvVd#0#|GSGIAeH zOvb zBqfy?BGwa^-#8GkFF6!%*VT!++__kgZrxqNaE-f|aCxk{CC*S7WYVa_2(0bQlT^#b zjj=!Xrm6Go1!S`t0)X0#*wo6+`IAvZlK?C<6)WrXaOf4QVM41YKZCckH)cs5M&Q%W ze6N}$K-c?QGv|!%sgNs=26f;LZo}ZCN{!qpVr`S4b1C-r|$ZSfkB@f_* zaPpIoVlcg98a7>7>Chngi2L(!x(*jfg%W1~8!k_-C8M&XB)vERi8$KfPKlpk=p9Ws zQ0C`6N~t1JD#UFS8TJlxYE(2)vm)nKiDkh>7_zOEM`$~4Oxy%oyIM<;XGo|f3N^^frdW1VZXufrqJIx zfE~14n40FFHp9Qy!rvOGe?Gt9Jw!dHO~w6;p4Y|S(iaXD?T*=l0Q#c$ueuz&9-@%t zW$HtlIlO*QtN$ETFs8=R^(*ReoNQVLlaX11mAKkwYW({*|vT z{EqNEemI*`HonYEo44*1mzxRl=@!|@C_)?dIP!Y zO*yOdo*iZL{8lxD9i*0)PdF7dxdMDj-*jbf#1LSyJ>u5rIX}aZKR};T{B&PqXsvyf z^~F7Gdh|HKs;eC~A^u{at=7^RU{PD+c(CK4^J)-(#0@L#V8f#u?KT?#+7lt|c5;bf zV%)%?BuqE4;FlQ24umac*~Yu)4~h{S8IdNSIQ9^2LJ0g@=c#W5kLfA(rg{Jemjs}t0Q%|CHh4m|Bp3Y_lxdK~7OlZk_phT63KlK)E{wZQ#(q?8z1BGaavP?8SK&Se@XqDN1wD7SK~#0hR7kU)VN3eVC_8~X zTOs>7sw)+BFI~VvxRhW2xVqZTd$axE(Hcdz z#i=4arfV7n{Lf4D{{8>3%xRv2n7xnManzbl;@ObQuV z=@FWvLzRdU9I$aL8L-zZxwu3|`t-Ey8~u>@N_{<8@9m+NTD@Ll)xpxNINOcc7RKbs z6%ZF`Nss$_K6P2u;iX8j)m=_!ef`0M*~8%J2Z(NaZj3J7{w0%l98FO2Z-7=J>u zw+=L`Z||j}NzI=1^w@3-5NuT7zFM>FE~)_c%8HBBHeH5@&C%BrO~f4v{?GUUGTL*( z-_3m~4g0+jkEQ!n>Zq1`>d>*Pq6+1wx@50gC~uQ~EmBL6`-|zzm-(}s>B4oK5P+8F z5Zgm#kzLa9JjAsli0W`&-JczogYj(k6d(AmZvnNY)7INE&lAL_$?qVmS1$XDZ*o-3 zpZsCwtP4b!lNes&;I4hl&Y?^Da-ViMKD%{A`w9lEic0AwX2?7^ZiN~Ox zMH(B~U^~f`mVD9-lZRP6q^=*c59>0=t_e)i{fW{mVE0TW+5gW!<|~f*Mx>>Iv1~*Y zBor{FOfk9;)W-XWSewM{x?pihBmPA-UCNBg?f*kHBi5ts&b%0{C%0?|JiK~$T2ZvNGPJPkrb(C&)8ne7DQ zY}(|Kt0M~`3dY;+C}V?5{N>WqbB!SCwasIxP7E(-Un9A6U!$-5rS&K+37ovbzX*AGlxVOEH0r;z4Cn;^x5T_7qbZ*f-V6gt2mt_3 zDWZQ$>(WTGg|%pm<%S1m_LQhmKFrC?6|;(n#u8CW{ny6^+82aKf=bYVQs1v!%L{fw zHj!0Dnw%`veVNHhP6San?QCrcmzUG;WCu-NHHX|NDPRm8%ZcsmbaUnaF%N0;EQ0lk z({QE;@n0`ZXZQ2ZhgoFbx7JL#rSB$qd+6R@`-m^^j{|Zqk1vy4{NrdGLH*B{3Zucu z&9yzQ38)#jrRX=a8`O3d`&ca6_v0)DyH011xsLyZ(rpRe8to6$5~qNm`=mtBikucu z&&Wo}mOE{(K?2R7cQf=j>8Ca{)ic(q-0+jeLOvM{{exya7EltA#|lGkLG}?1mJ4 zE8119YyJuvt^N8oXS?)oQ=J{5{jV*x9@nvUPThU=<|XMSIBecVc>yx(nCxrKJLVOo z;crb3|I!B_Z<38#=x<()y?9?LDB(BBq@xlUvDNGy9LT4 zMeEw^N_*nDN|7+RknZ`U9$#uCI&4h-g7U<5o3Nm3HQQ2ZzwY)CX3Y}^)s-9#CdYzE zgrp2)0y-ZLsOKb06V*8|MTtznyolLT8dcHulF;_>5I4Ni9i4iF2#==IW6^!JOvu1W3g_Sl!xMAVp5$a`;yZo@=9fbU^_uCc}Pwihecw%Y|X59)#!3J?S zYjm7CVU?^wn=(dk(SYRgD{L+Wbsca@Ea&gn>H$B)>0o{d&nVr+H{lzXQbk=SS=@@2 z)>sXqhso|s^n0~ASkKm7!VIqr{6D7C*!vo5q-kqAUzw#f{)e?1z%OFro8!@LF?SUEN|53rvbHZqf7^P!oQ%lES<>%L&rBK>f|X zBiJfT*pKnvT0}tP`jrF*Iz5lUPKpY&7JL4sEv^cxWYkyLbc;WLvt#2EgieyCUHWIe zj=iTP?NpPdf`O@J3&=H{y55rg#%@5!l>V0Ya@tt?^=$$8_k(VD{f%hH3?l=$w&*u; z;6rM=JWOp(;ipAr(rsDiNSaB@3%B&Jr^b0{B!axATWNI$0K7%eOTxUQohUSNpXu@j1^M zrK76)35&*t$Ns+K`Z~f5WQu?qP=a;%^S)cpT2<&qv`OH6_lBbk-R(tS(09R^HggKV zW-*OyY;KOmtG|TrLSwK>u(H{DKQ@}Sk{9#o@4B0}-kY6nuIT0jjd}J`2leyHGktJ3 zckm6$adx))+`yBTep(5}SbhMYrarvR6v+cTjZJ;G3UTBO))(|XoI~`$4;RTd10XmZ zyw>7Q9~pF`ay0}_vOW7NOj=HIltJ`FNk1Rs3g?$Qc%@boJmcYZq@h{IsGZgWu`hoX zjZ)!b^j<;&$H!(MqFBK+s_xXEY8ZcPj!#+wT~h~=|C6~qj$f(MzY)~w7UYC{Oclt*_ChDBK;F3 zCSwX+n~W@zCSafk9&U9wu)QtB)0&L=hQB%Xg0xy$y7Uyz+M}sLW2ID!lSy67{Pl3@ zBoQxBpvo+z$2i_b`oJQW&kZ6>V^^TU&sqx*=IAL>e7Ed z%nBjo>q(p7n{Fv^SuXEQt9L#}AXRp7jQLbFSEDDNca||dfOCBKT*s4&mGMhB$GE(} zUWY%2oWV<)kbXfp_{&z-G)EzL<9wiCw|{1@PP7()J%jU`1kN{|+wYyEFRgU{g9R|O z%({){ky+4M0b~GPTB}RSl@q}O366}}xld@gWUd#wmmy9T#QhoetL6o}3A#|=j@jm4a0*!|h{CNHCCGN0Wg89@qk}dGE{ZOe9-5c3=;%vk_PevCa zm`?o1C*r-Vb|O#;WJ(~ftHFy7jX`!Qot#biRh4?E?IM(T8$;9}lM)R|H0Bd!Xpf)* zDNJar0Ar=N6m_U@R2M~jVd>=AE%^F7aPy1JY$e<%K`Ef;rUN%^(`M2AWJwy`IW?m?{znBf?9b|cDi4#Pq9r@oQ}L1q zP`r*b#F0N_b^5DCMJQ0<#AZUAtB^jEE?;^_qQi+_=V+A0*52K-ne*5(r8VKp!sLVu z6Yx94X|cv7b;HU9+TZ(tRK1_>pxa){et(Tl(n8kFcCKL1e-?x`M4U>=vp{xOOsy2$ zY3Fi>S`81*Q}9fgN}}1Ub-5ur+bPo4G-H(C;bSpJqY_~MJc+gVNlr%VnVj!y)rX)2sb5oy2XO-N|| zc@ny{dCn&Mtiv9xd!!u%>9I;h&L2{T(8i~I`1`J6@st$V5}G&(*+(TSuGMBGd5r zvNquR$?4j|V{Fqk26wIxFIHE2*+tz;4lbS42-f%mJ;F^i7Gm~VzcB;O01;vp77d(+ z7L+1O+)?$<)~f+bH|Ld#@cpIn$LNd(JSR(4-~Q8?yydUdImD)p#R2B9Ozm%9ebC`d zHJXk(Tk6sb!0CJx&kdWY+i_*}<@d zHo60JZRijYASa@xY|edeGc=Q1)8xr-+;8n)@DG2Nkq6Ha$ia<-;yj7FGd1Z}Sb1BaQ~p+Q(JIsBd&bM{RqktPeBrf~uW*CBpiO7=e3o2g2T7Yl(os z6IBMp!ndkJIWh0QZ4%|`M;|C=hg&a>?QX!W|HMd45A<@;f1TD$xbukCh0|-pMC#8H z;;v;lR-u>DC7}N*%U+=q%lu^2@YzY)VRcAUqGS>-w(Ocu26UQqJFDSvebA(py4L7s z=d6-6rX9IaS@x|y{nZo?)?3E6EKFDW)j}If$j|t2GeftaGQwufaLv07Y2Q^IVJWMS zglJN*oSxgINau_6lYk9ppT4Pz0ngo8&OGNzy%5fjX}K-Ae%o9dK|KAD_#KaNHaw7! zVk`BAo;9)3hGs`wy?kY;-DAUCK&wU06NVA-`Q5sZC;1Ev1#T2djz^Bbn zOEJqr{aGi!X4@m2l#=j_k#DLj2&s(gQ6K7Eb=5&jB6la^{e|ju>91aQ0~&hNy%tY9 zOk@$ary1!=Q%Am&_JP|gWXKL1?p_X)@8%jIW6$hAVQyNb?dOX>b$xC^j~k#L!|AEl z@h0RT{~%;ZOhiN#IBp%V_ZN`>vdAP}8!j3v4Knzi+e+^bvygNgdO+SsOyOE1f6& zyx_j+M%uE__falJSa+wm0mYj-31JJ>>~Iti+2ZHLCjUO~z4O|4AhaVWEPL2%fJ(g+ zqBG6#GYah6bJw%kI?B9&8^6xu!8rz)G{NXS+T|~p(lCle#9luqwKqq)E^qA7*%c%n znTEH1+3EkCSCwn0ium|uk}fb(s-v+l=GTb&_mF+Nr6I%m27|wcz!@&uf0d_BuFcKu zla!FMd;*pzVE6a=6g>1d@Wk+zJ0N&%SMXbDex2b){Pl-!!8vTyw}?oBjtu+ik(>x0 zHyk7hw&-u`WmTyTrdXAZ<_Zdcwqf{Ux_8-hL%hUnJN;_steCjWy$AdTXas=cCaiJO3B^gIXsz#pB1k#aVMYi-oDW zpbw;^;_%W1v+@qlwjiw*cN@(_ccQK^W z`wNN-#o4I2p{z@ilLqZRJgp|C8|0pK_#xs;sx+6;kfu5Kh*(zI1}Wv1qK(ZRYI6&e z%T}b}H}22tdcl6zJJ;(PBMxmAIvg}Xe)%N~eRH3pR4{s~z`=+cVJTWK+%q&pBDKKv z0gAJ;#G@4=hYS25$Uq?Aqpv)p;M`MPZG`M0X$?Rm(oRnB`)se3_cbO zhLj9((IYwGW=$XcCqRahguM$MI*I)`K7Ypx9)Kk0c@c!;U2G-RPq_f3D@uU8ToDkj zH1el@Ur<3&8a0n6)!y@ywhM)QLK$-s`GM+*dzG*rg>9Yp&_M{&u4Kvm{)N0gp?y7# z?LgPVhDg9PhwWDKabqkD0m3>?fA z3NYpi=FK!pkf}fv$I9@V{LfXp7PJWr?Sm-=(>1Ks52VpjvP*aTD^dWzkaAm$y_o+9me(9cJ|Ll5#BVfbvXltt_i_2mG z)$$AB&xSn-c3T1imvCd6(-X(kXW*RhM}0-|?KZZAG3>+11+ufm;of}9W|gd4(z69J zEpVQJp{SGwS#G4Qhti>zcjl>{#`wE**b}c3oZF@TGOfFM*hfl%?o(_Lw8jy#1`w9do-1a@4GdGWbtFK@LEbFMFE|k^Cu+)C+)A9u*2@x`$8BEr=QzR zgV6)o0ClNfYXf`uCJVQM*LbX&$k3{Qb}ZDhyfYvT{f>@*Z?1y4dC{3CHW14nZYRWJ z&V5-3qnTE3_Qj23t9fqhtXm@=5lk3lr-4T7OppItKi*c<+n42!q<|Vf{`IH*_gN9u zj5rZEQ^2{bjuKtZ2R-ElwKRMYy#>Sx=_X9eSk!6Ocy}a@72OCUY>#uZB^B24O-son z{@*TjU@~Gul#VmH>$)aYT9-`a)|xwLL))DCoSPTbVSDaq}N6**>Vrs-f?DbwJ61^t;a`AxUPdpmCgAgE{PpFxZzEHQG5R4s zHN?@Cu4a+vmScq#$>C^(#+pEWBvznJt*xw$u(Wo zp~BCWn?IaQ-B|MVSp4!qtoSC8CxZU&Y?*=REoilHb+{3$&IF`rOJUPLmJ?rvj-2wV z7jaljVzq{dQ7eN4fj)9BXO8f;$Lapv=LXqFtW?s{Ue72F}ijS{&ocnEi?)>2>Jv}|Sni@x( z?zzES)4_;l*YS|vzQeJ|cVo=>cZvwq_#5$7 z3OYKD0hbG2i2h-wJ;-*>$3pE-Rs}+{2CUCbeF0YlLdJUu#@jySPG-t_y0<5{Ss)Q( zlM#4f?{9>pkX#`?09M8YkR0!(0T811{SsFBR$!maL9j!5$IA;c`20c|$O@oOkxB?9 zet1KJ;GrQqpzG*Rz|m=MM_O<=nUq+k&#!Pomb2g{*;NT_Y_*z_l#;>G)zuxKtJfaY zD{bsOG?{+ectxXcF6|>8RlUf!nE2X|9dTm$cc`3b$tMqe6`Qke46Tdy-9-VcS_94V zmz*6=>XQ+jYfCQ6$aeN+x1pk8t#23)7<`4Yp!M8!fp44vi3bMf*zZkk|?Gi5#3dh?E1bmy0+;r~A zhJT;l17mxo@H@58yAg#utgi}#=0PXB)25}%t!%h+nKLcT$*Vf6GG}K5#MOjfI`^YJVHdIT-|xH+3wkMQr%EY4##=R(Y*poev!hfjp;~f z2j}U)VoH%2@)6aF7j^sr=eAJE=B05>(yqnjRs;Tv;|2Rio0T|ce}Z75C}Tp!FbD)L zJu%hLPKgX~W0;2PgbTJ{|ao9p>F;k3MF| zt83%_O&aB*r+u@jlAB~{0C4&GJ)2HyWI!o@nwEO`fkjwnK)`9xxg4)LN49nxs3%@b zq0lG-E}H`gI4W`N-Wo+L9Xm}fe6hyh$}n39G^4)fp4NK{wIdb$-~`~iB74hc9=>SN zXEt8|!ezn(+SgelRKliaXM1<{HuWzrubHbaF4^7Sx5>hTTn;aQDrUD}$Js`qFJExu zdOCFsqLI^~B2-QKzC&wsne3+09XMoy?&Ng*7cq~kThtQ)B){$U8tb^BfumwmeSB5j zUlHY}L0bP+M#cz`$^RLRUeTRdAj0V7C)=Ab(3Q;fYpe#(ZB%gRsIjii-So@CW&LS= z*@4$h&`V&0*VEUD&81!)1FLCefqLf>uS0%qK*@=iQwAK!fF`SG{)wZ2uYHYTP4?h?BpW$7)-u6k=hUoQ{|*wAytZYvg2a}YT^D6d z-i4sQYe|{BRw~hH30+s5mw-q-JS`%|JNAqtS_-4`r+sz-of1{ zg($ZNxN;YM+oz|7*EbhTE>*`Ce{OHs9`E7jm-|6e zi0%CCQ@qWe2CBwQ49~N_Z-NnQcWY=mS^#je!{OaA17bygd%Hmzmgfa#-}wB-_Nm47 z?KOL|yIXQ)x}i@0WBCrt>EII>nRqO}6*%>6&l`Yp5#s1XxYmvr*``OOih8Y9iu~A4 zBU#T^Q^h{I>o8s4mIQu6k4h)k1_^Oy-_)O4EuXXE8ZIWr-0WgSkD0EW5y5TyylmdC z5a{aBMtjIST&3m~LSg4cn*yMaI;i6taA@5PR{3MpwlYGp*BdCx;9Q&|Hd&1%{^23( zwBB}BDDoTt8Ryo%$}&ymgRz^+s8nUzKj^&->sx-|w<8K4)}7(UOCYhN-UIQRGY{1A86H)GglJ=zQe;)2GV3WdZI zk%&_-8vn295~cQo0PgcA&nIgWw;FF2`gy=R@sZ>sN326`r7gs`+BWy(IZHYwkwNTn z*Np_XE%VPKReE7gSciq02(!)r0NUv9A#>nYxBEa%?0M=a(sk|Dv=wG`k-!&ZWzSO2$c76hqt{D5jY@M3p$T10*JCO z>nXomeE++JFYR(zRymxoSCALcW-}uK%_iu+fuMbaaU9!q?=9|7bXLPGQG_Oau`zWymov_)zqG2qvjdGtx|&_&zmC^t zUO_u<(uT*>nCxFgV;leDL;;qciaHw|jNnu-xC@<9LFfBer@a zaP{;;e0xCaBM@zC@vr+ZYWy9Si}7!CDuixOuiME!&D=LV0o{_eHqVH3U0qfCsnMV7 zgM$E#vVLa;5t>9UmCZc;`Z9=!jOweAk}Xx2?pgMLy_{XrN1CLqtZ?+xw(Z8u(lJXRFC^_#24=zTZ&J*9!)z{$qgejkaH5rUu8`y zL{N_WZuhFL8q~wkt-#F}oYm+OSSVZ*81BX^6}9?zTz+$;^~C)>6n5#?SoLBJpDKNh zskKJ*uD1ouZ;piF(R0TVhwmo$hn&Dde+Oiz=4n_&)kkj zC8+<%s$fvG;MF=? z`%>BKl11q&*Ano9#rMDQbZ{+dUIsZcWeKr7f)U%Yl&+LGhCj!``d~A-Ho~t@eN>*mOh^EdwyK zR%4a@VV;a2B>W+x6_m*ug#uYMTyQ-$d}e-G&17OaoRB-y()de9M`zU9nt=Sf97_C} z7=x)YFrMwF`ZtdE4gXx-#fcdJA_x9tC^t21^mdDlp?x(k@4+{U_1&EQCz~pIL(kjG z3nn+9Cw_HxRmK3!et7NH{z-hWKRJ$POw0PB?@m=U0$`RjvhL)?$f!5v*rtFRGvq%q z0O5K|6OBfm<^y!S0QRKI??$83`Y_&ge#N5NUHuJruQsD4L@gOM8|cM-COg9J%sGxp z2@P<6S^Q~C)U?fMTT;ov=9$lm9HmBq6Vx7qyo3sI2coO|3uwQrWJ$V0>W8>kqVnH8 zF}ghk?!i@eCPhw=o=_bj{IT2TRCic+i~R^-iKDJHB!19kBd=`K{V)Cn))O!236FR1 zerHEzrgOw6OU=xyp}{24egp1KrxI_D(Znhfuo0P90dVs2V1LY9G)&Lz4{QbbP0Rz( zytIJ#RGjXCa!Oc66MpBJzxwZug>io+ru*;fqFwc7GmDw#OaK;x@Mt}%4{i4UGkw-$ z8HsX|P9S`wdpb5vz9OB6n~FxS{tLLs#bu`5mx5ekJZ&prasfX=gIQZqDacj`tAhe= zs9#raB)M|GJim9lt9aA#zF=PAXNZMMwcmCWA)ic2yBATntDo0j=}3pG!B{d!QlIc< zqH3gBlJy31fc7@1rU&C!V`yP)+-$%kYI`f4^*z3M@(wVG8Y4}OJStTgfpoMuLfbz* zVa)qm?W*dKwoVC$UZ014lTXc7Bm2o+CxCd z(9a~1r~r}aS>Gd-jS6CwV-<7Qjvcvc;tMetc&v?)k%d%4u?fF_x5T?*^lkkqwgtSU zFG*0EX^|hK8k_j6t6!d37hR5QWi>Yd~bZ&1D=VJ-Mo-+fw}G6sOPFWLgUK zB6(7(#wCzR8pP4vaL9@VYV z<{}myN{a4^qLZ$x&Pc=6wr|ioef=lH;gho*tkL#*q@js*cY0gR7O{lZ1+Nj*F36-0 zr_3nS#J?WrbB*n8zi53VF!ik~(!K!t$7x%5p5RvsEr`}J_2p>qBet)JNEDTglBl5dMl8ng>R zCH^pk?v}KsdBh9ZvQ(VP6;cY$LVeZ!ebiVdqePawfdvvoHjs-5ib$InP_Of!*1pQ@ zP6ZYk2Xq7w$EP3TQUfBS}QpH}D ztmouwGM$`YH(V+(Q7@{OsS|$5iH%_$-649laInE{rG+28KIhZZfYcpj2W-Zo?Yh=* zX!W5r6bPyzT?to|LZ-y-c~idO9n>VXLPC!uW&avYAH?ATy3cw5K{%?A;M$heXVYWA z1}{IxgBu~t=u$NqijAKp+xq9g(Kw9}Sn-WNwZZ{!O<$m-c8GsZ=i=V)r?8J|K2`D3 zC8i$;8|yD#@f&g88RAiZ?PZ6#HdcVkDC3;rr_yn(91g?PzQ?jj%GLxW{ufrdSD7~hmuS9Eq8k-)A0QZB9b5J$b~hu~(?c6td&7oSu0uET`qn6rJ2X7DmrtK* z5#xi$kM#PB&o(tiX|DKqp#cu8JHk+!vbNb4if1&6%C^u9V9Sh+0K$lRb6-VHI3%c44WNsSK$K5C2|^vF5hWZcv@HSQ z%A2XtSw`jf*2Hv@2&$IV?u=l`Yu8_;G~{8q*5qL4k|R2m1ZLTmrG&Dl80iC7H1@LN zCZt?rJxmW)*IPyis6%s(PE^2a#{UCFk4eAfuRC4~ORpDW@uP`a-g)2UOlikdp|_k8 z(6X*~GlbCg$D8ssH{6eSmK4Tv+%zos;8r?lywG^+Erg&bSf769bc|{Ox_6h?_YD3T zEm)cn6J8^xURiJN%=&qXENZ>Mf5;&kT#l^&U;!{UmYK$KPM*9vO5mg$3bq`mDB;jm z3@vjf(Yb*G1<}#`6OPcmgiW z8-R|(c$xK(K^;y|39$#We*KL_EKWq&`$)W!=k)FNySYM$PO3h~;jxSd6+Ce`Im_mA zD}>H*Y?y9Xq4HJV&$@ut##E>n5aFh?wF;F=N(o>YhaTmteILPWip*ZD8!jnrDen1n z6)|H~yM1R7kJ0;{XJP8`IRKOOqQv`yrW1_6=+(q>cObt;Et1TBvcf#8_E`;(E(EFZ zqYvmnUh+Ci+l7lg#Yw{_Dh0-SP3y!hEqI0&QWx+>N*6HfBX;Oz8NCzjCBMK)l|y>< z&1YB@V)&{_rAh84Q#+cDSbDN?V49x!n6ElIoxwpsV!9uheuKT~(eF=TBQLfq&rV`k zYtD*AwIljlZ&mmFK$?aJTs2>Q^Gn`cvdFIZ_IO|5lt{bzs?=%N)BFb_?~(SB#|Bzj zt7%gvmH)}dxO7Wh-|0QT@)@OO`eym$Dw*cL#6a67E?WVjx=iN(R^S&5M_TlD(?)bH zQKG?yeDO+x0&^>GWmtDST{-r~uC^aLZaAugLgY-bQ;lvJs$Gp z6lKX?y^$BAP3?IdoOva^s>1~VM6e9NO2b>f2JKM5qH;RXEE-;)Z^pNHyXVYSLsjis zm7HmIUv+hS34P|PlinEoi@_??D)|~)8yfh)F+8x^ zHD_hrFN@TMrQhyMas}aY5vQqS8G)+A)*cukiQvn=rb59*aHYNvIXQ))%d%)N_;ua0 zm@sLFRmPs%X%uK%4X7oa3P6gVBE*O+Bl_4)Ov7X!&C&IFE`ZR750`I@o%81=snsRr ztcKEkgiwPb?}e?W@kFqz1$$-JH}aUOkn7R=u~dqZOQM?WLPa7Yroz{8u7uMyPFS=a zP@h$ggACkGVH$CQpXxGdBq%;c6d;-;eGck5#Lv7&hmG@2Hn^5qK>rNCjlJZV}Ad{`WWf<34)HhvIwuG0JWi)ep%UWD% zuAO&64<7xR&z2igp9g|n1|Xn1NZt&SFe}xJ-J}Mn8QaBYfV%@LN~yH?eFiW~Zn?a0Av7&_2J#D>%5{tFiTu zt$zsc-hQ#@RT1bqydskQ$G7{Z67^T*k=yi2Y}SC(l2N^Gu0K1e!Uv38R}^ULo=@6+ zfDK@DKPw7c$x!cSr4yxg(b#G)_-D_x4gPZ(#tE8D3OV>af-k^b#=5SPCf}PM&qT`L zJPJg1laU0kG}*ojQb7_0Qh&?QSD!5qSMM!Y%hGS~3v5#7qjYY{2m)Tu82he z+Z^$WYZyQ=vAnv3Tm#W0JIoT|pRfl(9d2guTej)nMiT5Cy=ED^uvBUh{A;gamXjv? zsxn}ewjwW}vr$}anPdT0$`L%Sa6P}WPvthO&fczDzQX?~u?01Sa=Lu0B64+W+HFJ!qtaoM&dJDts zP^PZBvvrrt5p24a@A0K^LQ3?he&2H|!MvwPt9I5+w(L_^HD@tl02Ms~B>w`8r|T$O z7pJfK>=#kDb<#(`S~zpmNS~?^0w>N1%#6_oAG*t;zBOJAz?e&M=bB9pR6mPb5lDaH zDeDtH`KC;t^OA+!7=HLKjaTGwHR?4AX9(Y_tcWJed4vqdyQA_1MW-Gherx7u;9>T$ ztKQ-B2SCqpw~>iTx&inIDHTV&uN~@cTp#WuoIT2^k0hfw=-Y%m8=VNiLK-7P+U9}J zZ@?VmpAv8qdn;c<#Bh>4RwMCeuki8b4?1w;@3t7q31rRra|IAdmPg#*gxZ63EkHI* z-zDp^bd25KB5qIy=-uf+xX36iUrsf%3_xQ@g!x3#I8mGS(eT&C9bX9WH0kk742mbl z7JyS`TMqE{w4X6_tkczv0+TF}w(P4UHJtc`eY8EiQ1X>jWhDOky{+V)P-F|9#{IcQ z>yf2B_yZHZVaQwuIa$u`=6GjIb9*X134LQEwL!On6LH;ESB{83)f~=-{)YP)sn^isu`vjwy7YMAaw?)puni4O)9M zmy9cL?{&0!aSz$xMO9lL?$ubise%t^{>+(^I{j1`F=~Gj2PV13KQhcVZ@6!gfXIJC zpCmfT*~s`1S<7n?_-n*}P*50CsJy_5j{DPR7!7V%N!d0^Q=I}-|A(os4vVtuzLt`b zMmmNXKpLc^yFozdW@wO-?(Q5?T1o_@OS&0alx`U5kZ$;HpZ9&A@An_fH8a;W_c>?p zeb!og?Tv?c!n4iR7N$u*EeB;9eC|~%8(rmpBu4+GkN+7$$@aHR zLxA#ClV4n{3CsPSVPRnUaJYTdX!rW9wHmRaVC&UNz5PIjSr2o4tJ!SE2JLQIVmIG| ztfL1a(H8wkykYOfPr|bYp^rWghiD^#WMF;(rQ)+5;){7pdBOIWMQn1dWxh`jrcz&> z^oBD<2s=;oFBvIyMr^8^?bSs3?_VcTd5f{%51uaHG-J;Dn+4nierVSNhAnV7d2f7s z*$7MVg;U2zkn>(yTrO^41^h}!I=sXIY;zx3Rr~-0{so05BMzmKIU%$XF3`%R)))xi zl}RP|#s7IyK&KRPkgscu2lyco_MhFWhUZkou*)Q>76$!iAb@JfGx_ErL5}3rwHx?M zKEX(;%LnRZD#L_#+_1VcChiphrIH;^)d`jn?IB=BMvKoUrIu^z!Cl2!vnz>;6rR== z&w|Vdrkyir8Q610SEJq=Oz{xT`Wf(@RtNgLhpysK2%olv6_?J0GHD$r5Ahm?vT>!T z)UXOe*;X}2r<@J)L|D8ZIZwSzy+6?>+C*}O8wNvd@jV_D|ISsYVA0GHUN~p$?@b5B zBTP7hWP4W~q`C$HC)XX*D>}d_IE}^m_!Xk=FoG#V-jeFAVw?i`JB?6e4$pnW)~_hF zvw-=nOWBXKWVIs@;K;5qC{BrU+(BJ>iL)78gZ9ZMe2L7ha0jIqM(Czg8UI2%Uj6zy z4HAvU5SJcc+qQ?$jVYO4Z_`xyb@!hs7l5o&Mtti({S7T~fj@2kN)p>xn}j&_|2D^B z5FrBP${UBp!gEugSp54El?w9bPAcUNq)z%cM^2$SSr6H()Ew(3JAae0> zQ_wN`!2x5PK(vv$zbXrpgZx_E`1FL0@T}K!$`chsizV!HC#53)9Wkx-%Ykp%iVHm! z@;_RmFE^O>VzhT^&SsmBmR&NzK&Tnp+uq~MbiOnpQB#uO-~1BxD7gZU+K|YdEo5JtoT)e{egC&h2bjkr_FvVbhTUmx%|ZS2f7>hL z?YXOf5Vn?$Ry3QYOzo~(R{YK@CeZ#+UAD{VkkAG(26!fG&+vD~Q>X|zZRjLe#u_dS zgYoGTH#Xw!%fP1l34~8qh%Sy7!Z#gUdC3WnN&{heJCq$<92cOpU@_H! z$vU{$miDQySt2NSh1UMAUuze#ujNn&;x8nQ0|R@Rd3W-;BpF%E?VeloYJF&%cP!nn z^yUXE2||o1CHmlZilU8~zQ#|{OZvhrXkWB-_qmP|bW`f@_)p1CM7%^q;x}eS!JCIZ zPb{gvX5_%&4OV_g1VScIJP7AEe2Mgz;sZODUeMx30|p~_L)?I`KVj>ENObmeIw%4d z>mmdD7=Va_TOrCZuAD9ojMtgDCynP1H?-vU@8z#(FT;Ap5ZMT(KpiRU1!G4X2&D)IG5rRA2A|y+Xk^-`Orc0pHwW7s-idMSw-4FAtO`(eu4& zSM59gd*iugBsr2_mgs}e(9ogPLY|4uC`oo*_ zg&fXCTwjA*+U$kDRc=+fZYF8~?wuycMlz4{`n__A;v!HbGme_$_>cPXUkRDGs0;v9 zwYQTDtWHlV z2?g8vQ93%KhB9acsE~FY8sC!lRu$Xoehw65NJ6K|L&zhjXOU3;YX{U@854RqD6lyJ zn4!XjcJzZS$%iH-d9ai9d)eU^qmXwJ;aS`A+7;ZSGNjVt<@kCbR8QpM%%$!aX60N2 zYA58|nt-u5Omk@OY=L-45tJov$?(vWizqgyd^F|_ojA*L`gAfN7_HAQhpW8;3_y`E z5?L({MBisy$jILO21NJHox%4rGob_E7mWyXsP*I$)e@ToTbKHAXbZj|-6YlK?w{8A z8OCBD`{S9^FJR8{F@;90o=XelU}C?fIIG%2Ri>61|7>8!ONdEXE%X1dfJE|USq>^G z(W+6?+VSGru*lyo!8`9N7SsHmj}k$OS>$@XzdXn*nAPeUm!St;^F)aTRj7(KV17>O zu%~nnN>IoBj%K1PVRCz(7Y$0_7d{E_OSNQ`{}tzX&SA4^JJZpA(t_=AU6X08Ov|S#%T%gn5xDacmls8`3usuSQ_D|L8y&_mzy(Z>PN1G%6;H89!q_ zxUq6i-+LXX(%8^H2Y`hM!O72F{Z7Kiz*w2B39gjQt)gTm3+^O(S_WSqb||LH)42sB9*A zhl2+yS+hloPWlGn1&8LpByQa>4+hiMBSI&R%nv0DtP#y%#ol&=cnW&%7ca>*&#yL- zGi~#;I9}Eme4F|i_S0z|XPhn6eW`MGL}QLTRUQu+BriE`(vY8nn#}plbRJ#mtOc01~QI(r^1FHgndQicA+k9)j)MK zPK~R9+mhlae#`BK1~)$VN}A&0B4oTr!3ykY~vwC^Z6BOmUGu!FjK!vpM83L75=xV^FrruEw{H$e5Bq zk6FB-_Ae;CE!mY}qM!dy^33f000X$ZoP*3r(^q%b z>FTjsi{46ut{`u6YH>e4n!|H;$zXLnYJ%CcK!-%|Np_w>#yC>Pk|li29M$ZgUkYp? z0eL=pB&}>&GIk@OHN#@!kK5N(v ztJ(aK&RCSbJo^nRC)k|$=QTYhi=xPJX7~JidkIhr;0)&#yNKTDlXjwc$_nvw`e1$C zs|25PVzP?pul^?H(e8}eLx6^cjEsng5ELjb4#HJJepi#CB`XkqgzLx|Kak}U-XoRQ1cv#c%>)l~iZ|+7 zKE$(8NKG6UdBof$DQ$;I-#UDcHkcdRL`K~4im(rL_(e}31A3#5-81;A^vH~`h>{$u zbTYjQEpt}RqazTqfPMru1x9D zd2Ilx#=Iaj_0d3fu!Ig>D?$K@yDio83D0jn?NJ5HS~bbjB9=|btUI4*zKLi3Ci*8{ zS&0Id&Q$jlww0=ES&^E+Jzt>t#M6-c#6l5+LB~dg7&R0V!eMLxw2xm=9CA|7;J^S0t5feA`jhB7Je8TU5i3>@^q%Ir^ z0|+s?582`pCZ>0O1kzZWQiw#+MU9}C`WAU+IsfZrSF|Awu$3VGSd2~X=O_&lPTr~h zW{R_@3q(6ZxPB2VDlq%gU7{H}#n11LOBVw<=xQJkF7AKUD~K>Au(@Af%x%l@WCpX3 z>zN>+Rw|mdTD4?J`4MDGbQx)mDQWP3-k%cttnv4orYza2h<7xJHr0}S474EJ)yJP1 zw7Dyw*%iC%VB$^ZjV0MJa^sF>@P&b~qR4E5u&-)zEy{w=djVbIvJYR(w2OK3j(51W zkWGaeHAm3Hxplg{s-}~bni#@`Bc6xA&Q?dV2QtHrVpb9dL(kw12O`CvE&{U`CpNgx z*Bj>Yga>jrE(MK?!P#n2XS6?z_fR-KE_PHNHpkaQ9Igq>P70_+3H_q(b9lA8Q}Dhc zI>mcjQtNn>Y|qV~I&8v51PRpG$Wv zt5GI7I8}c@8K2vGG0!0w|E`WYskiC@>pRWHl#OVD98FFVDr$}hqp{SlwF(F~x%oSv z4AC*Bk`q$0A9BJ^Eko(FyHIt+!EhWULWxr!{KlRmVULNXS^hw{Gz2Evu! zLesHiQ#_KoN?u^#U2oh?)Xb7U+*28+w1oOys7g-_29PlmN9jP=rC$+q%omraJ#=}G zaVy~^+f#@#+^P%i=~%v!2k{gVE4r(Oa^?GNawit2&BqLui*34#z8oZS+qq(Hfg3$G z#BOQ|l*8TPdDI_D8%sQZn4wfPT~u+xw7)R={c&nPt}o%A-|=@xb*LlLBHeqz@vx6f zDSr68O4Tb*@OSGT34iKKHmDZDfDpO#41|T;&R44V`Y$d=dJpEK#r72!_1IH zKJx9qUjxGln!LM`x3eH8-N@vXPwo`gE}MRCroq4v_r4g|?49iRn?#P8Uz9)T{z{NQ zR)`PW`TrkAkAKEWlUfNwUum{}3rZY9TSI#`2&VJ$X>!&!GbAsh+4ygaOGLy2IsxK6 zAEJXfa!dJlV??0zQ6{=fs6~oqvLk+f8j(B6!_BU{1zn}akOnV@U+qSI=1j&QRoTB24{p_Ouw7Wf?>gDyDoY3 zqhv0uX$wmEf7TIEom(0pVHofR8+~NKRiWLYAD@2F>djtI52H=_X|u1rRra|yFxPS=yiXy${h7x^;i18 zw$TZA?vSU79TzUc5@n@P&Hy}+#y>LU?W>%sV5amM?1B9(9{e967tW!_4z;63w0J%@ z%{IQ>Al>=ot5Z;|e2$Ef+MPme{f*9~ay*xjB~TAU7>Y1et!o%;EjeS)r`e63WVBVI zUMUC!1?eD}Vg1dhUFil$-RbO;iz@rO{pT>k+qIxeiJDc%j`6#nu1ZRnm8q$z;ZlO* z(#0tsUmGcQ^b3`EJP~vJwC%e*;t)lb{7)7Kp3PU>NKfCm6;*q(&5Rz$oT7A*=xQ00 zuKV2cuK~Kzp59RT@`AvYWi&0-jtkb^u{rN0)ju65aItpEbu*llh-Cp;+u7IW-a|OZ zf;tvK_rcYC@OIWa)Ei#60;|fOZzsRa4}lZ8Cl;+t?ZoSGD4V-dtYmQ_!wrQbfwV}g zxbC6)bL1#Y2lf1=yA8*7gyD)Bu+!i*)l_Vnnscd01|?qma#eCOkO!lLFXJM_xU1#8VqArn)_0UWJd1lR9Qz{G-I z*J7oC3`k6;zoo``wowir69NeadN1hchFs?eJ|(Fi)!{^Ral^0m zyhSR=^mWYG0pZG}Da=y82BmCyiY%HY=Fy}$s*#hUy52{m2&3uhv*riMT#fE*{~fXG zHPk7h-NeH3W-_!xV#CuDVTsO2Ic-6)5`UepP}I>Md^41iC~83F^cJ~iJVKg@s;-xD z8(XwGee)Q1PbPGl9RiW+c#3r=^o5{=8P2I7)V&$DyX?K~yqI_W9~K}hID`LjK)B}? zN2_-T6?WyWfeEyfuwO+8yJ9yNurGAI!4jzplc77x*~Z;g(SQi7llHyo&sd3flwbJs zwpSSS+oOI{F61GOjXmUYly~&0)dJ~;bW3*36EwGaoS}-R_Tg>fJ@7VCW-&1&})FWxW76-pq|&6h5a;!LBD^BIo59VW_;Jd2vwV+4@Y@)tp0e4EZWGz7n&ZFgnW zbmHYojADt_ae^y+ij3KE)q2oh^pQnAkTnbXF^hbvLz{?m+BlP{2tw9Fa^(M3n7_|Y zYi!BB?bEU~$K!Z9od{Dp9*fV1XizOswYa-r!(+O^8;+tx^K#~mZU#gerujLj!;K~6%G?K|)jcDMZzQ{T zsO&c~_*in{!zTM`tYa4>-}NpNqW>*P`Tkxasfudk(4c6@>Wax}nWRy=ehA_pQ!gko zDRG^{17XXf1khiXw-{qee!(X8P@1la!i@i)tIU8>gvue>)$Q{#h(VgJhW3ah?00 zbrO1;QMP>4m&ES8D814LtHWa=Aca-O3$6^C%9j3;5J{ZSc$FWYAW!C~D;VW2sxn=o z@HsLKK`)K#upjhR#@&TGjwJJ*E|@@CRI7qmQxLB zILR$&7(U}6A|u>7(i}*_JB!0_*7I}N9EI5?)%b&57f}x$Qc$^EE&Y&!!;X&&^z8l7 z?rwYA_}f3;A04JTt{9sh$zk6lUA>z=<}3Hv!7lGgLN_re^sf^giqm3iYCGtt3=6bF zSfFm})TAMi5$Vh(mwDEzx%(G{Z2>d|91(B)1SV2YlkudREoMH$`5@`S9#qQGl6(gP z>KZ-}5Nsh{!7i~wRYfjYDP2bS6F1(sJS&r<^TF2|rQ^!SY4>4!vAk$f2KXrDMihei zz7k!CU}51H;T1655Kzn@X0pQeMQ`uz6qFoG8)zcJrC`Nph&Q@o^!xSP7)Dw*ESWnJ z>0xe{wT{dhEu{>lTjpr+PCnt#3YWB^Nz7KrQm_uEw7+U~No8&N+64LzL9-c?ATOni z?bS!ZJ^01rhhow6f38!1c6pr`tqNN?kSd0n#2;D_tJF3bIPppPEwPr;RwZWrsr)Vt zHcf=mPf39?SXL5aYIljZKVF>WVFS_r1koQzI{jZU5k4f*vU z)Uy2q?k#a!eB#LA%L3!h6AId}8rUk=2R7bP)JVZZ$}~a6VdP)dh%M_2j!3${%jM&- zA~>BsK7rM*y`2_ZO!yRd#=`_w&v{6GB005zHVW_={ZrLx?*k7eg5vI`XSMsG>t-o`=#c zw3nkD4Y@|Dy%VBQN(il4=avXpOq-9^xUM8v>b%R?glM$5g?OIi%jcUba#V2w} z_Hnz?c;yZxzCUsi2Jau+Cc0)RK$2)UP#n?~_<0`kzcHg8G|&VUB@bGqRw1@vEvf2s znx3sk?Og%e1lvdPTxQfBT<^f5kp8cH+`;d-=P`9F$XPO% zX4?Xkj`<&Yk%e+)7HhH4Cx|ODy>cBWhWy}owai-3yA%oo^&AtGeb#^|lP6bV|1{P* zf2%pkvA-uHZ>X_+D(#(1Ylfz<81>lG%1v9xJ8rHI`i*#Av7)jwVdW=0;}BFHlw7o$xyF1<_MgfNR27|C!>?y#^F)X2xD449?P+a4Sq z(or59ypE5@PHN<9BqO)8V{7y(znFn~VncgcaEO{b8BxYRC(z4^fBfGOFh)h}j!46A zOzJURw4FQxj03bxHt~Ynd^){o=ilGu(dg4b>9x4m<+u@Ey8i|_W?tIuDLbAowzBbT zF@|L$BoW`-q6g6fUm{!GK$NCSw0rh8P`R4su_BeHD9oL0E}2rABM< zx^SJ*3-7?WBvuD`i(|zga>hj9M*3o`3j^08xq&ilS#%7!NCPcKLn$-Vn-Ti4@po1d zq~IqeqyVzbO2D2Q{RM{mp!b`p-=f)|kJeQp-W;LZ$ul{ghcO*NZ=T-Bv(jxpmR8;(#q}(VDC;biTKKbH=)PzH6 zzJsRMeTN&vN_@rTsJza;&Ykdft*>(D+!S@22i47^r3A;DZ5j%$XAkO(_qXb`qlV!t zBX%TK5}%FXy+H!+Mv8bQ&OD|q4i1x4)=xh*j=oDYX^-VVtd4IHzcj&n(W4pv=>e;k zELlMgHXhaUARCgEaYiqc>J3EBC@HLWMIK7b7HX62JHZ29k0o~!D1MiI13>l^p zW^dyM1;xh3@axFPCZ)#0Jsqy;XwL-2cZB~Rx&x`>R*!0B8+_-W+g3em_ABLs3LZ{b z(3HO9IK2^BnX%0*u4Y#P7gaY*X@HqR+{eJ#>f3xyEM8R@bc?SlVLd|RNejv1ALK!& z6wC7%7)0u}3Fk|)bCuBASQOo&5tTVIJL3nnQ@z{xob42Hxab~J69GF^2ScLw<(Q{L z>*zkh{#&KYgGgU@ZLxUTaIKY*&}73n)zHO^28qX_bCfCMzT{NlfylvGg+m$wt6M!9 z)b!-{jh}B*Zb0_~&b9AYJ0X%2ieRjoWfFSAYN@fI^n{^TcIsYB2yi~9Eah>Vp@1XQ zjkif570jqp@(i2hE>pT|Cr^B}_qJMCYv)a|OjIIP z1yu$hoHNhREb7OCErowI{nBry#f~x3D9iMeZv7I@!BW3o@E12bCGiM1@uK)+Y&8`g z!Kq%hhN7#9t$$QQbz`rBCLOLhdGs{IZMBOtQG9}pVR#8yk_ zw|@X9%tXwY)E(7Nu0qh)*T>4G4&BJa%Of|#qF*9&a(0e)a(9b&N)nsRrl6$DvDh|o zEUl@l;Yozr-WYyW+^Ay=!C`&;Gl)d>DD4hQ%dgO!6_*NE$yz{Zcb@Tg;*iLm6|p&6%?J=IGTuq(7t-l(N6U~>veZfDBEdr99_gh z{xifGAS={n3r_S135~3?s_B$<__A7@ z5<2%Ic&u&y*?-2cFU3S4&!oMj`x_3MLm;lL{yfQ}ziUxtR^;{Y4xBl*k7|a=U7hCB z>Beo!4%C{o_5pHM%^zjR@rSmP!VQ;H+Khe?LB3KCWw};8AGSODg8>x7iLkn3U2U-2 zq2aZ!)J_o3dnBR@hK5+H$hOOafFnsF8`tEgYh}E=hyqOdiD;>Jw+eZ0x zC8H`lLidYA14+VWTY+{mt++dk^zCj9BRK=y5MuWW&;$dkt#ik2IgOZ8){fBqIO#W= z%raclxKOcXX#qrp>$Pm9JKIBr{4-z~q(RBfc!cbv-aNJ75vKh_ z0&p8A?*3_Ze&j3xt9Rj8K(lgJ$AMw&MfZ24s3z?etC1cLLu9dKR)9GEPZW~fM#S;sQD6CA3t2bDPiPO>6k)NAk`Fdk{zOOi; zo!xU?#QV3hSma%!CrAb{Q07q3=1@@5n!dNOvP!69Y|v_^*v3qRdTKxac48CAS!MtL zb5M+DKmJ0ef5Px^moWiAj~PFRGt;E{86FrKR`NXG#pyTCKB6{usTG?rlceAx=ZFWg zBtdABqll6>aFs^vM&4IU6n!PE|M@9M1a|;#Hicw`s|V|ghHOhW{w5=+IPu-r|C!}{F;qN4YbGKA z;(m_rfz&Qx4b0X#`a|9QW`F)T7de>-QS0Uvh;u*XjF63+n08EGtPxx{Y!n=&gDXJ) zO}Gk7xjQCb}Ahavz&`p!w23LTA4jJ`kWK zcj{mw^+`JceV@)L2ZtYrMS2IN7W<|Pe}7TKpUA`}(yL4OIkkZzfcHU%yH#?}Ht!9b zkhnbOzQ*$9JtYM-DX`2G2kj5KkSm7|BeSu${6Ov~*`}=J5=~aF-*EZl^gx;$?&a^B z>Jwr#gA%aTkWhIwA!Nyzjc1p{C6=^Ag_(I;uNPcgmYSR2rz3$Tn-@^NWT8;oIJ-+{ zPT}H<)#7eEz(%&&PYM<``JlO{jbXr(eE(_4SZhZ?K`E0EJ3UY7cOA1bc;=;{_-L?G z!&H_2&cHs{a0b^tk6>JjjuZDE5-v~&Nul#Og@q*k_)0g;lo6Q0;n1FUW@bjuE;FNk zO}};8c(!sGyt1#T1#I1{@z(SrRl9$cTF-Rss5rpsMQM3=-W{`3&prWMNR#WXKS zz`CKl8!WtoY6;ui%GGaL84VKI}mRS%9eQ=CjB%woNzc-^mL+kpXM|l zEeZ>=tD06HM}{l6TM2XAvJNl+Dcdu~aj)NT=$ow&J9l}WL3B)HH5_@F{lVgm=7Q%M-zf6bPFpnwa@lPQ98BN}$>={rp#<*oi=eaiku>@*i&MQ#+Uw*pZffKZx0H zqeP!{s>-%+xH@uw9>4v%>j6WAml`N^Dq#_Qdku6U&-R zJ0%TvLm^H4?!phzF2RODi8nNmZNlNX?Ks}qTi2h4O(dKlK32$yxE(+J!dL9xKWL9FueB# zl4eU0*rJsa)keneJOo-y|}%~raxJm(^PKvwu-C=@?;`6-ZN8v zpO412+U;l;ckRy|3`HY?5s{R!OH^spzG@1*lnIT-vmCOXnkrnn@(a&bL4au_76olav&7(>hlfCe4L5>)Iqh^lP=M~I-?eCM z&GVqVc6BMq0*UZcsFovk5At+F$Pxy+4wntJH%4rF-#{RfM$iow5I9vV{Z6=w@0 zQPYP?4CqcXJfqm4rp)$=)PvvvMx9k_rm|c0#~XrzX!-}S9$G@3zWV1MR7eufes>=p zalsCGyPFbwc#<>s zu754qVXXJDXb(#tT6B9Z*Hy=eVIV~Ea7|@CY(MzrM2UQ-a z&2o$;K`5i+@u?7gd=>{`qajo4WETl#iH)iDN!)}IR{po%FFN|n7W9|$Bqfy?ys`%$ zzsTHOosQT!ZpqRKJCnO|CRz6JJth+Q?ix~;x74X3Latr#LHkR#m0ahHfqgB>uu!yjlBMx6TYP=odKGua>nivMRFYmJ(S??0+_1I>!#7sF895**eDLy~6 zm_Kp1E|dAi?=55x&NSVq)hnz9NJ|SOAbBlSA5`Y@B%O^@FLIPdZVs4diIh+u&gToA zKfxaK;a5Ak^EEC=wH_a5W%u-|gs;RtYnm@K*|cPr--!g+QzsO-`|}6KI+L<0mtr3( z#wl5qhy^gqRF}NeA9ia-H~yD72RcS}l>YtC7*HkH`6i!#$N=D9e=I2T03bVfh5XAF zbHQnsmu)Ek%AO}Gc?N#5@r~tSDzbNHWj91_u@-IdO7UDSz=uGIaAne}W~4;8q<4LfV@ol~hia-@q$`dUAR*vE8cTW8rd zs=)8PzMT?t$X$Ii-l%=Z(#DrxFs0lZp+9=}>d954Ht`PGrqv6UUgO=b6H0(a&y8p- zbG8u4In{Qdlw=-sM9mz}5l-7HHcUhs6&e`p0WTkn`qlgoi~V0Y4?>Xm6m6nCx}tN8 z#}qsAkHy)LuF*~(uI>mtixEu{iHhtPpI3UGf52mluBo^Iz+veotM$^RoX>2swy|BY zz|C3ej5E3xDY@?-)|lXW`e6)BcfPBB-KVuBwK(JCJsiHdq;! z6l7G<3nM+BuU$^#*x5CcB$Sms6~?x${aB2id@i+Y`L4ziDV~LjeN^|l>S!v?hFooG zCv4IBI$*!WbN<&4idR$%c`X;L7w0z#b@4|FeFZfhzYn;i@TF}^O`+CnDHcU(QF>Am zthB>(pYG@L#KE5xL)=Bm-j+pM4q{aBv(4znxRVQp46rS$AeFK++FJxXY>+w_a-ojB z4SBlwEEaLE5qOQE9neQGanq{$MHmVx`^OzbL4rq{Hvo=cM<(5OK-W1sVpUib zz!sqnZ96GhBfynDRx=AGF-Kw*_vA_?*Jr*={4uYb`j2c@-eUY0j{mVI2ti#<@m+(P z+AXDi=guECe$(p*Pfdui{>3$@`aWCyT}V#IfbHJx)yJccGeawcWE6cTYF_iFM6_1g zI2t1mgS~oE9pR-uhBqAvN3Nt5C|Ri!s+4x=@Yii}O;rT!4MD$6ZdnBrWx^~p;^}_0 z-Ey0vmdisHh2G#bDm8t{hzWP&q@^i*IK$L6zuF6{qCtO18oDyHL00}u=IG4+cEe}f z<3fJS)KiBkV!){8xLp0xHw<>QF7j#e3Ww}m&QMRtky}@9ktBw6W0=oHDqPe|n9nbb zCeo$?2!m!V2RG49*AHr)Q-9|7cS>*Bc3}^T*{Dj|YmdiW@!7H+5!!mU4eyCnC&;EF zZX3U-x(>Vm*(+@;|0C~!VFv-a0{L1u+Ezt2*efgM9+@`^m5`nJNqC1q~w^AqL(o$SZsbK7p=^fwc{c2m$k7Xe9 zEBDwsO*0#cr1%ZDG<%Ur=ZefcNZqZ)ALZ#J7AaEWMvsMwPvnVyo@sw9F|C`?Xhks3 ztmt#!FV>VxTG*lX3|s2E$+Q;w&T}-|e30$)?*0F;08R6;q$eSwfU{H5h;&t|&rbvA zn`|o|%Gv~~Y)TPgZ-V`1Nzanb_)c2V7+$?E<%((4OyH_3Q*JQM$fulbW$PYJVZr#T z9F*rn3kAT#<0dt_?f5q_M}a6J%st20VSmLLt;C-LpWZ-?HHp3F%bCJ*mj{T#s;ggQ z%;pT@x|oTyQ=KBU=gg%5T81frZeZH@@t(bK$O!|WpZ4!&>WZ2+$&>jEuEItEfq;5f z&&tvf07k$9yoIL1k@1H#k^@#8bLA(1IVF4#R`&^nm?Zer07Y3n=8Kv&; zS9G(Hx8(kt0zn48GjFi;LvJVSClIS5-WOhFJ+2G1U1dEn)Q2?JV7B^)0u-c44ktGI zPwC{&*oa}lm%5%-bg-W2*N(os7k~25Hhq3@358`ZOHT0#bCJhN&OtqVNj}K*`s^g zK>UKQ*dl{nC%@?mmx4Tu6DkxQ>)yCzWjLDjxpGV}bH1x!ZJ;^+&G%?>pF~}6*C>HH zV&9MEO~NfL{cmyxRA}NDGN5_<6iSv7MU4`^u<^XN`k*c}Pw(QwwX0)vE26)J1V<~v zI%+cZwsh@VVUbkDG2UaNR&jx>TJe)(!8lBPCn?_X?1S6It58fmRtSDo&%_(dp>ekn z{BhSluLOhcSjMD2hDlx~&OoP{7gj@5tb}Z=tiMTqoV4rj+=gjVXbUT`*nPdVb5(tu zE+JyOwnBS&urgPjwFf`Bj@8Wh0QI)%zSPda-jTvh^xh*vQ)3NHs-B9j%kS-h=Z!aE z9OYc13CWnE>k4@!ERiy=x%@mm^hA3jI573NTq)*--e^WePpeC3rH_{s3h|$k<~B1~ z9fwjyKqDUh)0X*s)Dok;IqMyJ8LVL&BXiP6 z?KlnFBn~m{MSlMBQ0pC9tyRtME4yr{*WxL*YRuu!^mjZP=}sRvi$+9(U+(8 zu12JN^^NWabI|D$TiOhR81ymeH#5xmX^etst~1wcT6GDDP>si8oE5Ws@N z0B|!tE<8!)WPuPsBg0nTz;45qh2u-$o*;s-MhkM6g*hY|>%xfy-;=}`fI0#zI4Iao z#6mI)3Acl?e}#82HX-QC|IW=_sUj~NNspS8UtNSR*zwLk{gOB;#gRC!X1UX~ie0Ul zCyDm-O$8JrqpoWUR#-B^ZORCBPjJ52_MHn!j-w>!UnJ6!90%NeQjjSk0sY_XasWgB zHnc)+r{xfm+GjF^4D#r#@9Juf{ZStP)n#P($G&9N@k1Z7ih#3 zrt!{c#fp{j3M}g2zsZRgF=?Hfxgq7?)lP>2dOD2ji%DiBQ`oA2td`dZR4F|Sm_~J% z++XvQAh#msT{^5eo*TjAlem2k#)RJ=)y$6v z;vfXmB$AVJaJM=nRC%S6b{qRC>3zp3a=8Sqk!|L^PgAp4%SS`y%W0t`hkl|^wjn?Q zR_$g6&nktlyJ~2fGqqMv6J$?nWB7$Efqi<-HBAhw2R05 zpOjvM8&T3DNc>a^@)dam?;UbD~y{(O|(P?^#dMYaVho~nBq!Y z=+VADWw+qQpUuiziDt!5i#?^DlvVpkl5RDe8)0E-FDPXMlYJA&%_ZFDD?!A-elWUJ*H;$K$x!nHMZa$HzB zE#ZU7b>ix(#4N^fKLb>ZhHcDo7Ms4Jqz%%mt+u3UzxWS-7ozFL{o3A|?+cCaIBsO?QZ~}h=Xb?2kq6@goX2a?AABgpn?@J4n zQ@*y>x(G!RyKge-4&N4tz?qwTr8u&+(PQKt-abS5MkzC1`NW_h^KeRUKqa)NYsn@F z=v@hK4%pj`#p2OLM`CYCc@jeqV(wrhyg+4vu>r|!!3`>Uq4O(U+&h>=8i5rT@9Xl^ zu(_B5wi|px<0gjIJ`6{sDJQs5g*W!)V%;>i0;ZQa>b_br)oJfvwlF3VswD-oNQ(D$ zPY{0$){!PUZ9#lcQCO>P`Y(p z1cVa%=ar@8O^Q^+w}9ST1|*6{%ua-sf%{|Fl{q^c`8i_9+_xa|&AZ>eH32a`fJs`m zj=mXIpC49+r=cH#?x}Gm%r#qOdszvnI=nuE>haO~un-iIhgkf;J!5mA#SN{+wt9L6 z_|Ar`JGSa`n)*CZC2Yl1Fp@$tWT?|)lUtwq@kA5P_CiW@Ydn7nFDiy^t{2-uYTEs?RDZd$>7sLoASm%Hu(J)XQ5XOpYj<$a*rn{Q5+Ux zxw>&`#mdm=#{EuAXO=VS@r*me8`SzUDtLysg|fJs#F#A(D#OEl=#Ld|Ru)#|R{w%) zvks!F^OyS+m|lc+%2;k-(HsPXN%U=YMfdL$%fO##S~2U~g6If9-Gz(WsI{3bmE{u0 zw6V?reg!B#oNVG`KM*W7_d+B`Uu7M|khMf6ra?r+j|0mYA+9Z`-@8^OH z_+WzU|3}q1hS$}#UAwUwH;rvKX_CgaZ8d0Y+xCtdHMVWLvEA6Vzt#J}`+R@$uQ}4a z=AP@CW1M45fa`a%*)d&MXo6v{}P@0MS>*H2xi$BlXKb8?Kh&I*(Y2UWLy&9(YZ{0@U6>$5R$J z-*@}-RWNc{v;Cyuya^H zql~EwpZ4PG3uQ}YDJ~z#> znuQ#V2j)625`ysOBpwteNxiD*zSQ72n-_8PhhTtymu!*`o=vix!gRMzOMm|f=mWb5 z8nzWVtHKEzwy6mSo!$wPjN-Ljb3XBg@tqn2;C;qMkll(?k3L`78Czid(YJ*Y0_g?e<&?0wO16C?#5*`{FOTrqL7n|sw!~DkBJW+PY>$(y?VM!u8 z7heu#oGwZn_;+463iwclXTfGB_WG_Pjjno!+m)|z1b~y1iH-X2gDFpn~icz0x$?Xu|7?1PmhGyu&Sxn z5RC$njBQ}4_hrpo?OKZ={5u-E(;2Np;59IcD5~LDm``2+onQ0X0k@R!qQa0^nm68) z4fxrCU)eZN#}Ja?WR2U*D=oE;W}TTsB1Nn-jEfCt6g-i3YkySKoRJOzXA4D+rV)1} zN^OJfh1DpV$p+xipy8tyFw7HC$4p(U=%AIP(Lt)7)j^<;!359Sl&~6dp1fkaZQqa3 z>gFGL>}MO;f0(9Fe*gp*uYS!M5#icog!)@GX)j1p#7`vw-(osS!@?{V5ooPv2?}ux z5_B)EL)Ee||Nm$K^hWq;(Fw!HN`aRhf3biLig9w06#B32OZp*Z~qZB;bn;xW$bFq^XfNx~1f+ z2d9(zU)-eGZLmQ5vsybB7Cy42JYUgvfZe9LKJ#@#$8FT6#H^9}HoefOxUsJ8w|3}1 z4)MPo$;-VFE7wYR%43o_4x|YM3I?T#+il)i0&J?=m;D{4H@y6xZdr{erP_)@ktZb$ z+q~SAdb!HA1Zc|&1Me1nD7QB728hk*IXxvhd+xB|r zCObP@v1vU=ap?~Ky;0}I92)Rnc)eM-)o1~I!3n;FH+&;jnEsz9cX3KWB+BsjKw<;w z>QR@^x@pd3YH)FrJ!`7926cys|rtG~N@0 zWd3Ak7;-vbDMN*BG4NQhH7PNjl_pWs@%D8Ym5wGuONJ~R#;inE`&z%)tb|^j*5HIH zrdq}gZ_(L0j+l@-kEnl7Y(O%D2ED%+2^D*rj&l12ft$YeGkGL(V*O?|kWurpKkkfE zmdzYO`@d)*xZkH}U#AAVovA0}VNMISaBZ#{{z+g_5fc-agkHeHt#oukN>E#0%DzQN znx^o>0&ei|BDs8RPAVin(d0WK{!+7FCT|mZK5Mxsy773Ord5JHS*!_nWl_M(!MHl~ z<-zVSH9p!+rmsZa%Y?J`?NfS--6_HqAcgKVBYbp?*b!p@OeKQ#4%tascjCQ~S#2U@ z<8bcj@UP=)BT+)zxBBUfTR;r-Gnj-(1&`WDS)xcf@g!s?>9d|YY6I0^^A%>%FE}J3 zlq{lYfYpA0VtL?fjr8=6N!OAi0dTg?aj7KFla)6O~#`g*0=hgOfO(;Oa4)Im!}gHVboIVwmy_uCPzc zw9OOm(Q^o<`$s{+OpiB~B2MsyIirQqSXaO-lipH&+N*B8TxGp8lS{6lsex;9tsQ+n zljJ?VEnHr@SNQW$yGW>f%kg~=BOsl*bKp9RJyRt?;Nl^3jBw26^u_uH(2hTS!uk+L zWj(OLgLvf~b0$Xp3<`^iD40Rkpfg+JUc+|LQ%p{nzd!Rpy~$tOAq7h)m>|b#IGF5j z50N|J^16IFTS0zl;B$MlJ|1=Xp0)vP@eL|avBoL8xzrl|YH0WggV1Ta?9(V=eQw&$ zszF&^jZOW%qj zBp6)nBl$*);-`?dyh%iKn^cBIBc(EmoUBvb7seS4c!b59U7Z)F$>5H)8)t>{(UkZ} z44KgJk5Oq-;bG`r|4oPzCBgdUIhJUPfYzUugRCCEk;0BlR>`Xv8Rg~8nHwxg_JkI% z&hSd6BpED70$9%LOE1W=(&OY0nj&0~@BBe$E{)Sq#kq~2@wI7O?9x1?5?L%6Uf!PM*wL!zFtc8E!Dg1o(rS$f|RGpDe8&kIj*5R9aDtO#4m?V z>1?oN=lzHrxUMK*U`UuS5cK>KbG#NjaA%FEvV_5)6jt)hQpP${H$6Jyp2KN-8rdj| z$D5z>-li}s?L}aotzpOj#zG8bd_`?+-!We^ipubA)$#a>!~EHhnAwJ?F<%W!%spXYoxz8<|U1++xJJ!aa zZP+bV^O2A>N|ZJzYdK|{*~-1=u4$*Na#H5MDr>dChx4y(3$w}21#X0RW$=iI$vwS2 z`9tDSf%gRKKGkOX4#vRLnicV@C~FJlfY}rN_9|6q=352{mxq}9^AQJ6n?uH(rMl!A zu5upz=v(fl4_gkkP|L-l?8%B`h7Y||)v~`aK^a4oL|kqwo8p4|F5Kr-Qcl(aue2lw z9@qVt)7;FcQd^xNtmL&;yVQdJv2Sa|?#WT;iuT{1py)GM{FhZ21rM&X-rk=>XZ`S2 z5jqE1%7h@!IycT)O>fh~xTCi{8h@p1leV&utJ~HWtNN~V-+fpAm2DG=u-)N!^kq~` zIM5-ZW6WoZg*$)jdhjk@k+llcs27@ifDwPpt{;O&SkSt;L6K2#+>z7 zKLM9@Ip*bGwzW>hCv*OEC1IoH~~!?wds ztltvtx8-TkS0FU!jA~}s`P{kLTz~djC@<^ zPkiX%>~in&G+bfX!*F14TC3ECLBw9*adBjvAm3Y9%;2bt;Qwf&$sAvfQ|-(&uEp#Z z%BH^y`$(=NdWF;fLc-UwQtymv0vNhbA>oZS*YJNy06x7^Ilg>vbj)cVE(XOYQ7d($ z<8ThVf&NjgI+h8><)5w3@aW6x-?--KSXdnF-`NI`ji{If3=Gmk#az0a2SPJnJU2NY zQp1@TGhe<%jK8#E8itsBepHvL)O%oxi{voX;6NU<(cvAvs`3xQqjR}@^mD@ow!U`n zv(eq~`-gRzo?58i@8&vbpJ?R{n(Zz>3Slq%vKA}K1%mqp3hJwuv%ty9j}Ru_tdzAT zjL(7g8i#}hzT+G874GF;FUkrI25EmVlA3Et?5saZgR_*6 z?Kj)zaY}M)u9z71^QC`uiYJvyXz6W?7EV>Umvgr87NJ1#G3z)kC?BIdoi($(XY#_| zCwLm5G^n-KdlE|Z?Uq}#DU^+wp^T^2uu`eG>%8bSMyysl>@)ex^zm$TkKVF{JU0q3 zSlnz)8hB?%-X)NpavAP17|=aa9d`Ha0<^)fp>rUe#sOddVC23VKM4--^%Lw-*rW=h z4HR#aq%Hk?xt#O|DF=U;Diba>7%&WuEIdT@%DF*FdQOm}wC;Dra1g9H$YliYLp-`v zWQtwHMex28!Vw7#z<>5vlTCZ~;cWoc_vr8(B2Tr0-8E5S9!YFWO2Ga33*1VIz#Vm0>y&$@jGf-Y>q!`*K*nEpCEdEUWxQ3Fa??MP{haUGfptY5N-)4#it@c1z=G z(+WW{I`5gXT<3ub$GV(olkzBop+VdQg85D+*=tR%Bk~?0-f5ntnWXBJ`VJc2Y>42Z zaVoWxlRul^uf9exLRaDAFLHOdMC6}sw2WZ=Nfqf3&CS-xeVUeSM*b#eBbiK2Ir!XI zHhjgH$!p-Ie#YD66tr)A-_rk=z488`E`E0bCEm57_USX)?=19(wzygjUN?D4e+Jjj z7ub+joa|I6{XKAyAXf1Bd!EAcS}w)-E$i8XPLIHwiN_4;#9Syp)7EQ}grtUHX&zNR z3QMqt$a!e>upS_h~a?lveD;O}GjXN1gUW5hsbEhOxT^Kok z*+nQ9qr%?b5)+A%{bR;=NI)js|J>VTeXhE&rRFA)4xbg4#Ch~FXnO~NC;Wkkn4uKm zlO+7V+ySj`=$AuOQ+W~|EtZDT>=jw6DoNGFYQ3X6PZ&JLbj#Gp~}8(>(H5HhKth`pfd{rtc@=bi0z?P$?@-_$&7c3{*~ z4MVk2>rD+F5YT^gwT=AIjNJa|k8A^P#nKKk`6$ZM@B}m(#}$SEO1}h17(^yA93j_3 zYei8}5H@R-5xh$^UMjrjvg`e8q#l=4k_Zmy$Kk58LfqGv-8*fTtWQg8`19KFWK(z0 zMeiSEo!LWPBy%1I`Mcz8Ir6v{9Ctn?J(dTL7|dcSU+ipzn#8mD^5M-Yx|ddWQ|xEM zC7FzFA!)k+6{+5%VAt||H2e_R3;$zn-pr&A^?}ZZ@E`!+!2%Cp#g!cko zX6fBu@_F^w6j$cd1%1a29g+WTiCl+FB{*qQS*x^TgU(Cru1|(rvgd2B1_j0W)r@`# z`{rk30^8KL<%a}wxlW1MG% zmGijQ3sRW#v8g@oIAUt_c@jWrpWRk43AV=Tx}56@cf8v0zHvW9dD@r`34FUoYM1Of z9$v8}fVeoVy@UHM%_rC13;(O4nB$M&CKI@3JCuY`RVNg)^87q3KtTO_5bjd*^S#oY zfrL>H3=S+JAt51SFMrv^gDz@=L6xJ!{9Bo0P;XVT1}F_b6;*eH4nplwy;e^(r!LLOO5@vUK?J>cm)Au4lJA{qnEv&iB+PGT%YIrOeKrw z9p{q-#cJk(1e|4nwJI-{-jSdC(EWv^RAY8x|8U>h4khd$hMj-fCQgnOOdA3kyVt?2& zcvMjr%}1^H12=Z`FJ4J_WW~zLb=oQn-gghYUgnl`MfZ1B+TGPzICXnp*a$+YhWt=i zD3an#PR`bv`i8lKr0k1avwr#vl_w6jR@{zUf7Z(rpddwrp%vYsX88Nckuuv?$}V(q zS1g{}**hG|A=0-y%gv3zEwno$75gMd*h6At+q!Wu&_8pdvqS?)f~Wcu3td=$GvpR- zls~U}9HnQMQ8kHwvUu*ZxTsy#Q9oWnxhkb3FIsWi4Dl}uOj+O$U~l;lSTP-vpLmwV zbx<_aWshx_|ItqAY-$ZRU5EeO@@%CIG?S@&2&*-7uH8Chc7P2WlDETQc=YK%zE%gv z=`|^fKoT}<%Dju|9y%EnwdbM}W>4lAFz~8&6ItGPexlRJtYy+;$Hai!wpt%C2}>&+ zzk*i%9s5}9P1}P5cFEOdDCE!8U?No(=21rsi2ZD^ckIfgC$o8=PM0cW zsk&@0Rd7^}?LSf_wDBRI?FF_CS|*BfuumU6B@KD9paRhweE2*t1sYUoaGJ+r&PD@Q z*D$kp^mExlTb8xt14T!TN9EeH*A~E2EPI2vW%k#<4W&k~WIAM7b0ZLoHOruc7oNdP z&^Sv4oDMlRQQMq+|&;u79nHk-n1b8fv?bN*XN9QI1Zk+XeCYCG@#C7u6p0S#yd)A*Qn>TZ zydys9X9aR&6T8DwGj8k4XsKsGHlV2q+-~-Te%9^blRtTCd#=N$-N>2ZX`?^PK}C>p z_Q26@+~N8r2k%Kr=Tm8m6eaSMkKq$)gKKO4l|ges26WZ#3P2s1ZIwW<*soK#zxDuO z)Dl_4&sHX-90z`+3j?r=yxl_b}q9ZbMs;QP#nD z{q+@0$oNR#s~z~p6SLR_Q-;g) z6;xx#n`)-c`t|=9IBBMK*+eUPlL+A)Er|Nj3`+E2vj_{JKLu)V`Sar?lq-JS^Rm99 zKpfuf$NCiacEO3Gg_7oN+JB-OaSOkv=EmS~aHZZ2LlqSjojkCnhZV#4u-*Mt6?NW^ zEPezF{V7mnZ%e4r?C$QmjraRlgeSWg&tq6qHH1W@)h-{KsYKPoR|X+%;=Sk-bsKhE z^p?oexnRy&8i9pzf&i?|MM&XK%v0PF0M$n-V|5Y81w*&Vdgv72R>ZGmd>dZI^H%-4)t=B ziyO2_rahGL6KLNsVpq-KK6ZDX?@st@u)K8M&emP)we9J@Hv5dyd_n+#?*`b#A`+!S z`??45&ieiJX_XBW@NSqbRB$t$vV-B3qWNg9%61`CEqz_Ql(c1^^KLWOsuAhaJPQwr zQt3R`xs*&zirO%1PEdni(V4!6Iib@)A*$XEDT8#sSuA{ zBHU)Lq5Xv6E(Ci*HR%0KZ+*BYQ3`Mw)#xM*x_JETbps8XPvyMNNoO7K#vnA+q$!*PtsmjB0-agPHbCb8O_k&Y=-ebIq7-SWx%k1+VF{qzS!+hAz<@!F(xOBRrSP_K zZANxgw`(tV8-=3@ z3(7?GZGR>!pZ0w^McKUmd)h-6TjvEip9zu=dkhHbmSBL(SRAv*7$`W>Xcmn$42ayS z$mEi(+KM=|#}oPN^@ncqsecBQ0Zrg-LMF$WUbj;pUZ6mx9-O5{N-6Zq6sd&WyLA7& zspaEqzKEDLrPK09=})ALN_X7pg|1mWN^wG_@QgdONSN5zdM~*}mb5 zcJc)-;IW5L8x-Jx6xV6FjHR0;w6Z{2HUUG?5H2*nc&ghN7(eIzoY8xr8}t+L;~)kL zU1FT=rc^HoJGE&7M9ZgAwiH{s^IX9a!p`R)xWNaDWM|X^++Mgfj=$b}jexJEGCaEE zI>67b*J&HR00QuAAMC1=126x-$nvO_9!Hi?KhV`i^Rnt2pK+W-&J8nK5VKjwAx9Dx zF$pscl|k==+KpXPm^kdr@n-|bKTQ6PYcC$H+-1*TaB!B4r=}e=yaL|Lvaah$mKp$e zv6Elcj8N5-xF|Tp4$HWSB7UINM!3;zj|@q~;z?NWU6J{VvJh9RQSP>vJz6O5?#E%E zmVi2@5m6WsDaltnVR+BimuA;~>k9>=o~D+4l`3lJl&6{F>0^J@*z?6HzjcMQst1zP zv5+j5+xzo+e2uF*`uJ~Q=Tyr(G9T^OjKpwzx5Fo(yET&PtFz{IAg?ZOE9oH3-egdf zt)=$wD2cgi=0of?H%|2`Pe2?K5%idymQWfRRI-HvD9Q4%LM`H3tAGQp3p?0upU zp=VB+&P~G_&+z9rwSR0=u%De;N{mT1Fd{b@yi49z&^#A3yAQOF!CK>F$E%2TOCg)C*TytkfnVDH{C z|0%6+a7+O^Mv(!H(vVGK+=K;zY3%!O035A~>FJAK^|s;qpoef`xlFfXybV*-Sq@5L z90_?M=us*&xPUU%hCE9qmC{-Fup++G5urLR9idFjSOy z00R43H~e0l&413HVt~sDa`JwUr1d{$?M+S}D)Juag|D!4$ zuhSA)gtF=jOsrproADL`X3ea{N`_Wq9uW6Aj}bdPZG#5{7*%- z6oQ%{GHiP|g@Aan8MDMEhz67-vM^ojHKM3mUb|kF)pFYXBB%3Cw^MMfGPyq?xlq29 zlHrfJI1We#ct)g8;b;0m$F;7M+4KpUzg5e=58bC$`6`=dR3!QUDIXU?c+|gX6fQE& zJ1kKu0Ml`WX6l^8RBc3DzRl~NQDf%y_(o@3J;L0b2|yNC7m}kD2K4x-@CehG9SWV1 zr3(ePiOCDJq3-Iiq|u_YpE=i1zB$WQoXr*q>G=~h867Qc!@PVH*lU?mbuMc!AAQ?q zguh7Nt_64HkIvv`9LP?LqNhBrKOCP}I&+GEo8ZpmNypUeaOYu=Qq#ZmHG*+ifY3~b4sK`Z+JzJ0U5Hsh{Y!@RSS}tzXQqDJCZ<1` zj{8+NP3%``*JNXC%X`tOwdC)xL%rQB-61_M%-LZVEgdmw6OOGAu_h zD`v2;zP8*-abkbkJ;yxXSaI-bb&_}xc@5FT#9WHz333xGm6M)}cvv=HRVl(+$n+Y+=V$#X9zEZkFJqH0 z6ba;{b!QlY89;gc1o9MCEiXwp_2)u?f`gDBFD`*8^ zu0Yxmu*sf(wxT(>80|W789GRUEDgchF|jU^&U8A(oxIP5S8SkWQi4`)$c3>Tb~f1u zAP3>vlQ~Ry1+$ul4DX^5>UNj8p30rnyr@y>W}H{yH!rvRKUtcI;1cOz5UV@Ms#nJt ze~Cs4cq5K&po%fGqzZw+j$p%2*NI~}^rbfX)ChoZyxX<>Vdscfr%%rpZ8XfVlog!( ztT^>dL;9Jq$be*w*dvM>3FA~y#`h!P+3rTkHAht1^_62suLrM^=Q$EjXVuk$@2Q{t zdc=r!3s4mVz6O?0h;OHW8~w7$qb%o$G)ibO#WbGM>3zSY(BgvWS?+J?d+NinnKvdv z_g+Mb=9J7@t%_^BIpBHvM6oy@d3vLc3kdlJqXXD!a_HicxdlbTS+8&wh{vddL`3UN zhI|3y!y|k?ES*h(1u|I*koH*q>K>DrgKR|QO?9a?*hlW}g=#e%+<-)#SfU-En&~P1 zFIJBeAs2v?93G1vwVD_@V*h+x>P?0}9_OL-MEaB3^|eLnO`F=t(cAbIy4-9`gHN;R zor0djGh|TMFe z^)g7|a6VH--;u7$UGJ^+4VLpU!gG9&Etz*q%9?-HFa5^3VTV~s*RQU@(y2+xdtvis zP;=%i1?}rj;^JKieXdRV*jC6<1G8M|heBw^)nq8pgnS9QlX>Wn_ym{|j%)b9v4 zLEA8}Mp!p~ksOo~Xl^5Uhz)cBT@eh0iluPdQ3kIVI^wcI(A%-O3f^%rYhzLL;TLML zv3V(%%E6{cjK7(NBqTterpVx`8LD}!2hekc${=D(if4Y1p>z~H$rsNBI{8b*C?Dk! z?*JNrG`rGOOjfvjDdE*6D&l0h{4hN1Z%&k>6royhdoYI5o@A`O0osU^*PuD_3;Wwi zog70|`A@KrTVY+lqU59oYfF~QSrOQ0(tg4I^uB;9x5-L;quLh;v4#S*20+PJC1f&t zs4I;^A)6Y!I_9B7OB!nMlot!R$;lG5ql@=5FWG75zeiL%3*h^4==Uly$6B+Q_mFdl zowZS_n~s};{BF1@7NX81t_F`f5vS41pzZDq6}$+|y$Gjv?X34seqg8M^#ySkZJQSs zxQ%_SF+KHXNLZfow>{hK!1YUzA%KA2u|}!7PJ&Zfg@4hqDTNl2G`c|3jsx*=yCR8WS zPw8{jkHi}-X!+}0oC6%bZMNWkJHEb?0{^!B&<@_f`hjU)&psIUCE4xv5&RX=L2ni)|8iK(ZmNq&UH@()y-)PI3s<)XEFs)Ht2}Krjnm*4$kY-XPX&p6)Hh({6rF{ z4}hO|Id5Dcz21&IC%+C=+#eK<}&wXm_yW8Q_uT`V$c?DmN_OiqAP*2A?7ugo_1?r4`T?%nhxRnwa z4olp@W>BxW+syXVW3eF=K5)A%HDB8TH5iNmdHe8F;uK zzq_F%mcL&YoMI=ZrbB#|Bsv$_2j@XAqMnKbp2Po@mrn@(4Zr{>s>Ou{DS9FlI8c_1 z5>KS{mf0F6I<_GQyi8)XXwHRaM9Uk9EMHG|zfw*;J^aLZ!>X(_bEI~mtHLT@%EP~X zcVXHr^8Xik7SZp)k72>~XB)ovB`!Z;fs+pfL)>!02WSyKj3!E%1{KoG(~ZRQ%;vF@)C z?{vi!XsvAjdiWB+T6Z#Qany3fWc+%}JeTE!tWvb@Qi1pL`WMWNC|^}iBm)esq`Ix> zgxYjN5{=}!c|KV@H>9IrQAQB=pdb(i%G1)6~tcbQtG?{}#7x@2Q!yg+8>zUE$ zItcNF@4f2gvWTMOzhuGRPf?r+Heq}#r7C$x7}w1_)%Jy>^XqMS(ofAqyTWtkWz?Zj zQ-feKI)r=Y30WcUy8=DHsk(8mFbqXn!PpwX!bdA)&z8U_T^p~s^^P9^_;PoL3`-+yB(<3m6f zb~JnARhIiCNZfE2e#X$C(70uKfqru*&SrlL)S{%@H9QO*6oq1utZw;50?JvZ}* zKLKv(-v;fAIbtGcG8FMRSn=T(N~^4h=C)^T0uTV4gr7~{HFK}muB5UId&25T^Uw`Z zI++d4aOR1G*4h61pZ{wmqd|b#MKlu3asExhpy_!K4g9BI5Ws;j{RD8clHOEB&!^r? z8ePsKw(H4slEu0DTvs(EelqOux*x@N?vI9vctwp0zd(R@bjm|XNCil|3BZETWM#wk z{(C^n`v4gjP7UYR#mb8aQrs}I5J*iR3hns+;y9jgkkRX2_jBk8#U8p<_eWl4RVu=L z5ZK#iHP08c5(r^=2Hu8OF0MyP7tmo@Sg033IrU5#sRZqmJgg+M1{|gyNO5%w@l&TD zW$?e;wZA$4Poez(?ToR4Pv^?6`Jd+qd}XJsPEan6yt0!e^WBN!oI{g_ri*Y7bsrp9qQ|nBUCBjiPsv1Cmg!tS{dyR5 zxv4r)tRf1E`#<8QUnPOxv<6;`cT~X5nM1YC&vncg39?5Nvo*d*g;p7olI>%qU+c9~ zZuj0pnAHzNzfclkx0Uq%cUtEs(ns*!Ad(F>R%5ykyanAey8kA=WryIv1NAhlp2 zNL1I0;e?oeY!WSz<##-p&YXEYy{8wvs?U*-N;t!F zaA)H9Tf-kgj@R3Fig?s5ZOL-s=gfIx`mJGM`t0EU`OK*y{r>Sa^Gp8E)ubdp$YK2V zVtL?(6t?o~TJZlnCn(*JbF8;lBuLFX_t;C6u9YR$u_9iRNPy+dAzjpuW?dG``B4XZ zqDnABR-I~5P$4EC!saj!|H8!oZ%_N5Qhxr`t&Gr_12Y@!;5|K7`5{@)AhPXw{=eN%iQ8(qBNqDLZGoF+MXJk44b zi|a=Gg9Hnt;%$2hxy*S)JS*raJ#Vgo`AnR|>?c`}@y#vu!@_G=&<0L&*h8{NS4xra z%c%J^{XN4#rXmU~V#Kdsu~Zv=smeSJ_7t=czZrx1ZB5y~pLHaFU!L+6xMRs#&YvXX z)E;~@Ci!JAmj={eIQfiG3YERQ^r9J8dYpQ)xlHiE=E>~qQuHbyB>vAAArua*t@Dxq zxWea8bsi{`>`@LoFz;RByTBn`w3w3rh?#r(o%qUgk9d9XnQ7QtWWlv%dEWT_ETzw2 zpJ<%+RnjauM8?A}g_NpQt^>47n{Eg4xqgotpSCfWtuorF^CjjXjWdap%iG*_@bc=b+kv{5HbcjN+_R3!(l zoM`t7Dx#!7iTRwO^oF#UhNM&QHoTr9YZR7+5GLnhw2y4@aMcYIbiAS&o(;^tG!k3Q zR`RTjh4IS~Ht$o%Ry_6*3&TQuJ_LdWWg^zbj!b5=k8c$5?rls_;%t!QEPJ-;4kOaJrAAc3~&CAUQ>fO2u>m3^E>bP9nzpPfg} zm9G$H$|F%i`05Ei>&;x!#e~MEhX^jvy0?QOysP##W7vR*_&e&X!S=JLv6A) zJnPki)bhN8SOZ74SxhSjw}Z9J?-Q=QU%f*0h3bTq!|3;tZPL|z*2rg{{WxZ!+kn?0G3uf%>ZIG4 zY|ZzzY{|DvzQJ^0wF?JB@U`&lJAUPwhXCUS^whb~{B`=D<}Obm$fe@5|7ZcwXAk*< zv!0Wj3;_gb$ZNt4wi{mn$LBNU$oLP3SPCuw8d-q_vAu+7n-0zLc}?KLjdD7=`cBMR zw=b=t51W4E5OPjK=RVasnG4Cwd+x;ltkh2((C|%=yvWzrx!?@vSrI0ho&{m77q?}8 z6^}3DQB-#i!sWjOA8$hTapNk0d?TYp8Lr+9LGT%x8R`NSIH>u?PB?Zn&E3mu6WUIE+QK9_46 zpo_L5_|JV@rXKpmfLgpI=0c}z85kS+-8+P#nf_>N9(NH0Ha&eb+igEw$DJM_iW_~@ zhSLN3n#TH~SmQqVVu5#uz1bUI{W`&3HM$WLN!8jgg9(|-qEoZyQHI2-Z8c5 zm%z0^sHv`rz2p)NFgYWAVPDj!&LpCU<8|qcamH(vscI6&Y4V#Ck_E#l+%qd^zKFxG35G&dQ+Pgy8!C1!ok?MsNtjd$eCHfGu&IOJv^ z`QVUX`IZw{nb>a*@o45BlDO7@@imRK4FRW4q-Cxv7oVJEB#7n!!Of#{da(BVWgWWX zxDTHul@I^>+em$?R3l-#>d$Qy3&_(b4cnAKs4K7C3R>wK0ivg$EN6tfizCREqT@Wm z9gGmzl(CzG3_K)tK}-UakFTRGf;JXzI{N8F=SDU%NuSMofA+U1l#=zVYR4^*+@#fP zk!`y5K3Q?=N4>0O6rLQeHNU<`tx6O1*aaJ(5{#M-Uarhmne*y)#;Odt-jJ_0hVcKv z$%1Uh-BW@E*7226_+Y*^Io~f(-n1l5;>%U_q~^%`?4st1xuw-9%j2|qCvB?kYm2Mq zq0JCckDX7-GR} ze);Rkr(?;4S50cE65Il76!n5&e2Y35i#V<6cCZ*Sb+;fc<-uP@XclbUJk?v!6I%S<;p zkF)eJlik>`i;%R|sf6`84*ZNy<%--0ud3w8$%tlZ-CuaeW7{eC zFk-#7_Ti7gwoPgFdE5y2CrkP7X6x)7c{G`~)RKB@Z8z@ed3;_o@hr<-@yD>$(+Y0O ziQ|u7DltZZFm>!Atfwm{dTSwjL)K)YYc4R#qL$H?m6x-yIkVxwc!Y?0z6h;XuV zh;njrif|5R2jm8%?*nz>J6Lge8IuyfBh(@G7JP$(5`%XL1Hmb;+2Jf9*{F>X*$*~` zUHQ01&du5?@lm#3={pB6c?Ui4&t2N~cL()e22lN2ycktvW}aU-!}3%#LD0A=KBAgO zXU9raCrVlV%w}fFBsqHY;6&8z$Rb&Dm=JgJWv^OImNyI^9G+kYk!t0!%u5dyv1O-a*lESdT$!8gFLMInmHNCgbk$t2M7rB3_IEWy4NHcsnue7){L9MWxV z*-$88VX&t=vWa4lf?ZkggOS9!)f+Rj{3@ntx40hVdf8@%D*s55inWo%jEaX_)&z>f zME|^StJQ?Cdqd)ps_)>^u-!p%`!rkxHKU-8kOOI9Gg!^Vy`UoXBOj)tr50bn)8O=D zdIzR~{hJn2u7@%Fk3Ste+9%=~StD#VKqoqyof3l;jo8)QWI2L5r$I*3sFeU4bF@j@ zJVO@xT*`-y+{WqhcOhN(8|nLW-^l(H?}Mt?-HY^{-v`~0yk#C~h?-KYt@Pxc)PIhQ zMwxq_*-+)13uP`c#+ZmxfeNy}niCT>!2)xlB#5t01&CT;)l^l@E-x;cTb`b>M&Cup z$&SmS%0`dJjWdj+j*l~-%F2$%{c{nJc&bI$w4|6k$tQ;t=6(6!X(`{7EIyo@v=v@< z40zz4+qC^>tw|`FU3SDA1CxQ`n-p+^bi;nXqM1HZ@zmKQ=m?V(&%YiGu>rYZ?0~DG zi%;~C*Ho;j%%K%$j==d$(sWG&2)WmDD_S-(Bqh5}qMv2}d z)I%ttW0bO^aXM=;vAqTgb>`2%H-kvruVqb$gKHmb=Sdql9Q@WHrkXFaL@oW`m=Pey zvkV(>2J$dTEe%Cq)|K%6^3~h1kAuq_W5ddO*>v?G9%mxn{xGGhPbLli$*1H{faVAs z7*Ag(nj|u@)|H3eKhY2PL%MK(UvuLUG6IPp@~Ni><&aRyL}>cYmk{wb&r(uF8*bNO z6EpuW5s0cq7AIds9n^QmK;dEiCcwoZ@MTg=$piDH+WI}u075u*7M`KD*n%QVP=<$* z{#K@!Zr52=#oZs ziN1)3S#*tzXa-Es2U08eWY@Z$b*!^qxH&}J*XF)t4m{(WTj6t<<#s>g{cDOGyv_ox zR%x1nRwc(uCm!3@+Zo5Fiw;a#A|w>!SR_lMhYr3KbXBdy*M=03(FIl5iT!f~v@5^; zAy_-jo{L6CuC0BH@K}isrf&4Q>O`$)nHxxyzz=EQVb=RrTKqoC6Bwc3%O7;ktbijY zHWJ?-PY*u&d8ErkIOpS4J?)NQLOIp3%CMAeK7wbJUiZ9JI-`1F;hB#~5Z$(>S&0sB<5^+V`6J%m5R+o7?<1WA8Cotp?lvwSQzBZ$ts;<7u8n=$n04z z*~!U4ydbqw<#ZVE%f!xT$i0v4g(z=}TZpA95+EuIPPLwNB4i8vWQk3;u3ZsRh-8G< z+8uU`@k5ch+hZgXy`!{Ge*8pJsFUSD!6U^PzN7RXtu_oq<$`^KOb0=eNGGTu_qbpD|?H4hyHdHGSzs*TnXwl_JwOw??! zvBy??XXi!{3FAj75)zcqVBa<5avKr^GIBg|N%kGM`%{t+H3;*X14#2mU!UHktLA&y z&J?bZsuNcVd~nd|+I&@`YB)hC$+7AhC6CN#JG48PTLpN_eMwrelI$*a{KxhMeMPs7 zL1y}CSB|&q2&qIX{6x{V!A8rxKRzdoG@|QJ5)++uU2E8S_7pyJ{aukl2Bs_v(P~wgiBD%NDMk(8%aI75I zH}sc2;`HVcn0OTOCVtT?RX36ZRp{g+N`M~dq#WFR%ylQDS0TY`&t`VfqRus=j^7V! z6X`#;vNH5*wHx+?(_*<{6@nt+BCKU3nID6l*i3pgz~R=51?(!GEb_VW*l_ETq^*)w z#U;eJEGl5WYnRlyy6S)rhmLyLI+K=BTc{0%ZN+FO)m3;(ATgwa!Lhr@TV>|PKpGCk z<`rFI1yHFlWTr-lP4G#poq4`2ziG&I2^}HxhFd*#nemsNQ@i|- zJDO|zIDu}>HMmq+g{4|0z|})81c|1R4+y1B(q1I@xhz?L1pv4{PR`Ha1#nnfLb4#` z|3}qZM^&}`-``RKqO>60-Q5k+sq~>iy1S)QI;6Y1ySux)yG#1nc<=puf6sr8;Q)_& zpS|{4?>Xmds?|u8^zgANDJiK*I?UP_mDH4vBgOyO6Jsc-&ifFfv$EN>Ea%Q! zn{30|vVk`ak#|MPpOcqXU-Oey+%PxZAL|(}c7{UejcpB7TDsnxk3U{UomBJfw=}qF z!Sa}#IIdKN4{)ZMwVPP&AR2;rhdGilJ~T9|DkPv}WxoW6zYWn% z2p;-DkKT?GPb+<@kupt*Gn;RCJl=M8o7x;U?g{&>n!OKgUZmLQ@|!9klq@11p|P%AQ)_9r5$h~D4p&~>~!%UDI1!Enu@+q6E;8>#j=32FZYjx4k%fz z&mM3TmF=_zOXknd{5VELUtgUG;fI4KHuUwS_c>Fl-V)^>amQx6F^`zSRf^QTU>o5p z`;#6--h0&o$3LQC?2@D=Q1IXZ+pX^tpMP(W@+zU7)u#|iw(RhwHQfy27_Y^5IIy^$ zk8)93y5XuCnqJFV{%$12ROfE4#&j6@k70Bke^S<3lG~qh>jw-;L3A z7JTMVP&Ge317hG(p1Q7)cJ2ru9+1F(3qe5IAod`D!-j;Xu>KTkqILNm;~CWO*o!m3 zW0;!VM2Hd{*d;(q5=OUS7uRh|`#M2>{?s@N7KB#?<8;$F&mf%^;uDk|sYYnX_+%7r zRIM=n16Ap zkug5vwP-Qou-<=>(qaoRp=Mfy!JiGTC)^6T8=J-c$}?Wsg7_H#bzy2x*Bmigc+s!7 zJDWS~Q8@8dwhy)V$xg~E$3_kKr5|COdV$hrp&eOaK^co}oKN*)d$xgtvUWtlfB#=^ zVk`g-k^qeAf2Fgin1W}C-ESX-@XtpGwMQi;K`(4@F<@|A7+EZwSxrL#eHP z$wn;$wWP6t8vYqdg#2%MQ8$F&&U9e^V7``U`>z(nuRvf0jFZ@rVvL*8#v*gaa*T!1 z@-mU4<($2tMD(w5zG<(A7~g|OzC~q7A$j)z1#@beQ9_cHr?(ar@lSu)036tuW_-)!btAKqUPfca zjL8y~DVaF4R&e$*Fu-!*zNEN}K|U8{CP5SvgVe1|MU6ioCY}U{?*H67K=0=Tr@R9i zY}#Vy>)sgFZwJTqn{(BOb#YY%WAn1Xq;2KHYoeDi9YShzrk0 zvb4mUD)wF@`M4kb#!bvEbnsOX%m~#{wT9nEYC@_h6)*DiK2*DLwzl8o8xR7Yfdl4e zGVl?ylkbre|4qNN*LnS{@8Oc)z2Ex!d2{pFhd(c64xHOT)7jK&sjT_pZv`9wAP8b- zaM#I?3^(IJv{XA|SP;A%zS+s1l_m2wtC`|S_7S3S`>$H8WO{hY)yrgy#+$W7!35dR^- z7$ktpgHHn34`|+sylV(6-u3V?zaHNXDfI5S%m52g{8{6sPC;oAE*9ucQC8ybg~Hz{ z1zRd!5FgeWOyOg2)IF6d0c$QB`qgyb#n|F+9zL_ zJo$eQ1SGiD`!`)VTHmmWu@MO4%~I?qlDn>4$PTATo}nkZ|1nq%O=X=uq0GtfsA2r@ z_m)zrL3px^Qte(*C5~C%GpU6>cRga99Z99u&f12zj7&N2Y(^MPcE1nXv1~%;9PMIA#-J8AyHlW+m zz43E&KV|>?Pg}%mM2R|G;~A^;^?eH?DzJ(>zm0D-TWFPJf!S>v<3ytrmAt5e2eAvG zan2}UU%2{nj|xiH4*~k*rdo>W5$fMt>2*}tPlLpb9|0nbZLV*hs zV-UUFbD(GkAhlUTa8SH%&;fZDR`rZf`0rKa6JZ6u$70naf7gJ2p8HHdv1VJ{N-Fi$ zhxK^X{EUp8tSdeU#625Y-0)IWql`l0vPZwBw-8Of?6}^!^ycYZ$+8Zj%X_u6h4b&{ zJmc1X-%ElfRE5Bg`8Rp#RR;EEtPoyUFoRD5?k7+IOY%qa&KAK3XAM$BCBFYqA0FZV zbwThmioQFuew8bkSrGM-ArK%a93Q9uODmZ$iEh~CypdV+Je$@z@bSipU!_AqF14T8 zp=`ab%L!(DctCwhQkn_>Zo~IM#Gij13wYB+vnL^smPMM5q~E*SxcSfr68tOa1CN9_ zq7_~<&BUoc1Iy39xlh(s=ir;|t9JTUu&I#_##ZSd7vO@nv-$q~QV4uSBe;>|R<~L$ zh0MH#d=xhT;E}MfFHlqf(?oKHn_y}&PAarjIO%`)5r8Do zfNLeDGjlDzS%=ua6-^kCRN(~wmn?dN_A8(QT-OZ+BeCC<)P%{f5-RefePG;hFjbi4 zFV?4O-z*hZT^9JND#8HkWw;SB<++{I3w-NV2sb?~iZ!q{v@?UVr%jqJBmt~P^%Z6s z0-L{7wcwv}Z_Bipz~3&7b*>~mOb@_=bA)OqJ>GJPY4$j2qT4AL^FN0MaDluL-w|No zB9Q`dxgsFqG&y+Y^6NNi2vLxzx^N>YZTHVcT9V>yvU~K1N*NH?N|m-uzUX&6z!~Mi z`nDcroe&5I2zy?Zd9^ENdKQazzuljfFe<+MU)6RL{M+sn)`e|2k6E@BawiWpT(%)j zp?Bf?1x8Xn(ZAnh@xY(^l&bjnC^4uQUR7j76oi6A@{BHHuT_t#yJzmDimEo3ilgOX zC2C;Qb#)_DX^jUm)aKDQ`aK|-vhY=RWi<+vj5pw;(?kRRSoZ0+YaqNv0kiCJok+WU zElF{hICEu+5wHOE_b#j#3A{N7VckRSl$rF_#`(MD{R!=2_SmR*zS`PI?%)oW%dS#^&0aYD@xlty(=_-~h{*E~@~>jPv|F#LUg z$&%wQrL%?yd5^4@BICUau4Mt?Z2w(zZc#o5HL2Joy7V`BTc!S>sh|1MHAlDUp4s9; zzO=|mW-WY^7qQa>@7`5rb%%SO#i*B;)8!(P;Q6|TD{#DCM3e);oRFv?RIg`rNG?F5 z4t7bk&z8;Y;Ev#kgV%G)nbybgau8+ovGs)eO)~Fe*xf%}IN~2vb+n1OY`)2xEMI`$ zm*%HWWUOa^ga3B1TV*`mEacC<)o-KPMnNTuX-HD!WHU#n>h&4Uz*p>2h<_*bR$jp* z_MewCy;Nt3?iBd~?fvyWWszkYz@W#bZI8wO}d=6Z8-kK@e=Jok2JR(e!=IiP9^|c3| z?Uy*I-U-0m@B#jlVG94Cjqt5UL4@ROP%TbN4*|E|{@YL-Vm8cS)}KRew`+D0dGk2p zPITproS51B{n)Z7MDEFsn-d#ub9QUA<5WSB?0)N+y1x7NzZMXWm^2V$RFlU56j_Go zpvarq<+FH!JMeU(KtMLrBWGz$2WUm6AEUt>%eY6tL4icAqVvl|g*1xbU6E||YpQF6$3aE%U&o2mqDY1g5_#@cJnpwEfg^JJ znTl)Mxt!|z2n%X*Vbz6M+l5V(xU}+lNJ~UiWSM?Ec6YkyY$No`U@RQzHdx89ie~Sr z8do~@Pep3`LA7pZxEYNtO64BUxyP(3=bPkU-?jv`@E3G$;8qy?iyXD^)!Dt;m4kj# zHiS_!0NagHz0WRhYp%60Kd@W^?`wsN9dkDwl~Z-80pl{c=@wtUSb@zWic8sn^EX0yq{ONzMjduz61_yZX{QEL@Gs; zo#ljzyI>oUaJg;aO>TP}%GUPPJ62s@N7Y;N=9o)4lzGz5VGc_EW^82YE|0XnfEhrv zNoJW~zv6Y+or5QyM)es&2=<^Y_asNMF6{!%F2buxcGV!N(jnZSdh3rlE($fP04DT% zzlkivm+yr;o{%j=sKfkhc&)mOHXXZgkRg*qmrh>+TLM0fmzMwm2eL*RC}FuFl&x?ZiAs(gfcRk3S3 z&R9B|#Dq{N%U>!<(Xctjy|ztEs#S{Wtse%t0VBK3C~VvHK)1g?ul?Q&_w_h^RIjcA ztLa<}l_J>`#*}IAGHd_XUaMCBi8h|?^I*Xvt?|q$;FCyK=)f*O61JZI5Ee?xw8{02grYll)&}%8*mZ=?zv&-q0pVysD=BXlCfcF(W{33s)aoZ>FfNj@&UXT)S6=~NKq)at zXsK50rmAyE$(m#;@wx3QuY1?Tq+vbc8+Ej6vR{?7ty9L_u< zH=L8=0+}Arn=XmLcuXuRF_O2y-=KwT@0D)pJwzDt?&p?v%6^7cZ!ST8hqQs zP%flgIlhA6UMl$Sy!YmtSs~dh&*|h%+Fcl2T^tMLS)rv5G*W0wjATn?YNs8#Nr5t} z+*Yr46DBvJJq*|KR3AV67c;!*_+Ze5n=!1qxj-U8nH)S000h{4Np%$qn(WT$JDBZK z_OC|1pWGr(o=Q@C0tIS6JDvKcWugVlNE}kjRN$y#kd5AK3${Qf(?bWFzN*W+2hsM& z4)~NvTa)hBic1%0#x_@54MFj)F##g4LhZa((_NHyK|zthqUZY27}Pk+iR>1vdRgQ} z`~H&zV$ZA++V;-r&o? zp!-GnhH-&H^u1#(X3y;PY!_?P*^nbCjcbDr(D|dl4i1=+-;k71j5QQ)xPJe6?Au{r z03Sm+Ofzh7^_S`k@zTTB)xyey%_ZvIlKA1nbGw(b1OyrLm;*rz%ccCstxzZt(7*Q@ z8P!VxY7(jTzSE1CC=1VQ#Ghq(H|Q6vk_ASKM*Vej>Q5gMOc*MaoQ_Upb!S!n+tgO4 zzHWsj&b0xGt2U7v;n{Ij=zKsq0o(ydB7ZhZ41G_U_OsNt6hO1a$E@(bTIhJ-_rZ?@ zd;>;#__eLz9h3lDSaDL*1@=}-$IyaY7)(Fgu-7JPvHbfwQ#GbqsqTbyzPQoeZp>P$ ztdD5%fJsy|X8dmQB%a-3^vbGYi!dUkJgOPcT5%GVI0t8)Yb10r^blsFeHP>LW_WLm z0p--U$@T(tr2*>3!J!bnj<>2G~^!5yQyab7YedQ{OrY5bK=4 z`Tvf}>sb8S)2wS|v3*))m6z8?$7++9FGmy+tEczd{utq-mXY^T1Jv*@X%HfS(|9u~ zc??W$`tx5eb%ij01AX3ITa!_HHILbW|5Cf+*X-Frjm9g8BgF}GOV&BNu+USGZ%Ko} zhp#KEpE|3$_Gv&bj(rl$nL;GqihE&tb;c*Frq>O8upGdZo~-aJvI+Fq4z+Hd-3*7^ zjGV~OWN@jj;o?FJH3B#H5f)yaFPLdmKHDroht;PYR61=HtWVG1>rVNeJA=*X$WypO zrXRXO^@`7Hc$r4hS70SWSE$L#g>G6S%yMHGq01AR>N#(f5n0)i0;P8zdehazxf7zl zZNAFxiag^%n*`wnlymna#M!$X(ta&fqQr`wEyMe^)!C5vWox#p4rVsSxyr>x{k9g* zLSiqp`&RgIjT-#ar|tmF6q(@dKp$=N^tTn_Z8G>zX`4La@8&zzT+C>TICZOIgVS`Y zUmI;dj|*$X6yywpM> zKF2nRpTRF@Abuoqa)`DAXZY2jIF!(Y&xI+iqaT(jmH1$B5*kmOWSc#v*Oi5;t2;4D zNzfov41&(r+Bzy5E5sT{CAKpdiMAw<2%Td=J(@yNBzXY_%xq*dWg!|3p=?MgoF<@j z&wQ*rw^)OKjABJBs+2F5SNNw(OJe+nq{*9axN0_aW5@!ecoWbwseclONcoo1e6d#=S*w_d@jb;G< z!zzV>A#N2&RY(}d&E6gR)cv6b^V5-b;q&i7seusgD;RFihltscB&j8(;hgN>%2$bQ zEhGTnvf6sWu9su{b!3i9j-hhfzn$zR!ExK5acysfiqrn}>d8^l|L_)au;$Wfsrm3O z>u%v~dtRX4y_1kk;_>e}a~%%<+{}<}G8MN|Snm4NvnV-W=#w{13FVqHr%n+1bMo! z$dEti=pE_shqnG0bXj$9bxbX5W27W5tW?ro0S{Mhs`-l3+j-!=9N@Jt&;ZCMtVod( zaA%>^T`3=(HKcfRG_&(mc}mMX+P!$uw@OlR%FSPR`AS-j<9?VB9ghTi^MSZDl#d=DBLTK+ytWr5ri}JR%7t+& z{wu)=dY`PQ(>A)C$&-TFw8%G|vY6b*F=xnklqv$>OFB1_>|-3Om9j*NoZHwkl^pA6S17=8p; zZfxCww|t(dbWv+NQ6ZSgsX3{_GnZf73W`dL*Z_hzJ)W4bu=z%Ba~xne4i2W0CQ@hi zC>*P=`cxCT^=;idJA_xOTm~xWPeEhvPWiHL4Ph}hP<~NYQ=84L$~~~M06)Oaq4#`v zt0n*#z6s0@lxxF9Er)vty(e>Rf>TF~ZMOtd%npEu(hezx-6{{FP7>_&lobCWU!dA|Pdw=l%>9X1!-LuXQXocpuaa&xLwWU?l# zz96CX>L|E=P%7vBwdz=;Nt7QyhVAnBU~o-Xb?__15Fk18&ER;L136@Ra1yFlDFBd2 zdI#|So}0!vJHwZNpe;Tx%yBd3`paW;lgR7hdG`Fhs6ass>v8e-4H^k&(9$GDUxrMq z!kY3W3K6%L~(r*>)Wd%hzJhe zME*^7knuMZJu8=i^fA)FGM$@&RlJNIHjJ+x(b zLOW#l<}iH&0Mu_?u#I+yrTa=(W)0WF> z%PAZEzJ--Wx48G!g#*t|_mUKe44Gst?l;NyUNU;FHLTSQ4yVMkwg8|M_4);81OUjg zltuXTz2kUg7b*96;oNPS7bVx#o;hnzZ| zaCbH{Is(|*{j5iYFS9@jG6##uR}POIJjr1uWc*g~O0^AWz{>s1)Hfl*ikE4XF(c>_D%o{YwpX z8QwmJt-j@lPw<qF+R`bMBO>y2&EzdFJ3HxRvEg~;x~ zHh%9Ja)y4`{3LL&b+?K|O{y4`ZLTtXvfFyv;Bfai@OW!zPe_?Z4(VI@zr)!d5Tq5D zT>MuO3R}!Ma_ODneE~qYBueJO$l)gjN{O-GM%%qNX znR;m9arsVza&#di)yQ9zZNn%~{c7~O8w{C@lA6Ib5H%bYWmxFq&; zo^Y^k^nO;vjJZ*~yuvottbn?*scbJ##>cV`K&s9J?E6nl~v?IWN zfhw_!-7Z#3BGKj;J`<}O1R40P~us7%( zDUGD^w@X_mBsjpQ62#AASesX{ z7&Jk#|Ll0tEkN2>VFBXihFuz|pdH0q=_AbD1@Au5Dlwg^u_YsSeIbP8QGUHcxZfWz z5=m1YvxB6dR`xm@k<4q5&RI@wZAZ}0rMP<#8LlItzv36&--hnzh4^cY-|`j6-8PA z+d&F|Om1HHy$Bb}ttdMRPdiFXw}E!MIP3<=5cri%cDVl4+0VB6Y55(mE)!`&FFCdD zqPCdZ*rK`zP(VBV8)5)g@5i?zP0w!=XGJz60Qnci&c^_skzbqF-6{aKY1vP*-vC~U zJJW2w*nZlv6Z~>CB7whMgZo~G%N1bT8E+o?EWgB7_2G}wYQ!Z^8_yGCvKwF&7YmUEuSaQ)Ft&`^kuC}^#WNc=P?DXNuNqy;IJ`D`C za)!^fww+>4O@DO&We2lhS#j2`W|*6**a79h%$8${g)7Yvc^flklVLWWCsmzl_c?!8 z?$G#}IVc`7@4Unf+J=2Z3WwhRydA&iw0q0pfyLf&E}OaVDksPRmMXc#A%M)pyCul}b(M40fv zY|nU{yqm|et9tkpSAK;F0%n&N9T~D3$Lmd^Q_t~_E9hn7D|`=0_1EmGabLbRcqR$% zS^SA|UuZL)KSmJc5FlkeeVMBBt(DPo$HclIXMD@9UC)qNt^S4zPz^}wJF|MQINo#GY!H+^drjT-=D-pFJr4ir#G6z- zppR63gzKq`Nw7@-&3(!b1)4&#^l5wlSog~a3eUm!d^?fK%G!-5a1uMVfw#`|C)|~- z=1v=qV%QnRAmxVJ0Cn)#hqnsvA3If5P&kz2_kn%A9Gp#)OYvhyZfUjCg%X;bXvSME zv8HUh+mR_p2iF}b*kuoG{`SNL(XcRpcx@*c=$M+wY z27=5N%keedDufUF#st&OTdCCjE`U(3L=p#f^w`1g0VdYyI6npJGtcLB|`-hJ`9+z z=E0j8HvEO9783ygwO(5=DrwXmI!$;mpla-V89!r~XfwDAPfTK&ojLC;bR^HXD`$>0 z#nGR-S30nsAAbS=oR?XFM}I3id4_bgxbpTGYXv$~vk4%FKL21Fb2IueCA9~aUG-_& zFyx<8ov_F{m6)Huh{D{((m~sep)+$NCQShT7xqg(Zd=a~Vn5Dk<>YmXuP>?zbj$}p zoFSGnBJ65&b0ub=$pWQz3YcdAoBkp;IdaTQJ*_d2d-qT>PqxThe$G}ou} zo&bv}0PhK_^qXkUeDi7EWgKCtS|Ghs!D^TgXCY@A8VrhRH2Sgb3e93W{;>d_qsf(U zP$~54PE!vj*POe9GbSIDGPJ&cm4XhVfTEPx^`j)m0iu_Yz+q=tUX9ZpRakGQ9EX!s zz$`&&P_L6b*LsQT;AOrGBZ=J=>4eAg5)r67kCwXaNJPESI;`tmtIuD;d@gv;ZzM#@ z$mm^HPIS6kkYw5|-QXrNWOdtu70=#me~%q7!L!xSezHZb94KL?r+k>fX8^}pJ1R7K zR&l_WJ#xU6X^V0QstXmr>r{3FL44cT4CmLK#;)sPGxf8cE28Fm<`nh)YWl}DPleRA z4YtjIy|vyx?%Pf!N3(Qj>(=Bhu(P)}wC>J{ig=rmw5meR4ti#NHe*hqIr~w{BW<8Q zmU9te?>`R=qV}#O>o4h&WN9Vu zH5x;gz5+7juu?F`kql|5>1zUlXIHmxfHhw4MV!#w)C!p;yXo9T6Dl&M0FW@v67>LE zrla>`LBDQiC#ZMZGnj>VV8N>nD_wOLC~tiS2_qO$*82W0c& z$F)FWuKi$^;KmiQV|s0zZ=RqDUvy+N0baQ9h)GQ#CG4cmD~!i;mG|JTeDMG!^9Act zz)kO0CVaWYwe_)QmBC#b4z?h`6YU4H z{uBzBP=`5MjyX>LWC(lpVQ6r3#Bi#0>Q3cjT#FO$o9L9rRy`xC$(F%Lir9`>yzBuM zj>q{r>q^3M*oGAExE73b&YS>AI2Y`wQ!ac818cdF1Z-nl5IQv5_B`T+1812(bc zS{#pDT7SNIzA7Af4g7OyfMy~3g}RoK=sT!XuxEfv=m)PU;vVB>Wx}i5gRS+B7G~89 z0rk0d5L>x94p7TLfwOY6VW63r83wuBY{!bT(hy0Ca4Pjr`||QvGoIK*Z;=x7ll_5b zbzdgzpg?BK6%J;ThNnK4wyKX}wE)ar$CRtA8Ut3{U!apFh4rn?R|dxVC#-)zuc)0+jlc{k`feh#+T!39c0B%^F^pk-uKV_2cj?y10q7#V zaR1mplPShs_VKsfxvt_-`JH2*RH|)PD&4@4dWSJwWX&_W+~*oa^rhxcRNH*5RHfs! z?BjkNFdF7DCsZDqCcqCqtHi_g-rf6H{V;fq=nS<0KztRa;s%U+cq-nv37!EP0It4%DxVUWn4n|68tJL zrcB`uDfL2cl3j$?&f+jh^%mZj!Dlb}cS!Fy-=F-pVK~sE#w(Z@N5Da!Y;>O^Z;3}i z_W~WV|A6ZoTRG3d>9dFy3^eC*3E|gFM|4T*g4o$jPKu^M=U7yYZVI7+YDU<(SoL^P zdvR4A(u3rH;n8gUl*who8;IwbkX)onONP?MLv~6G2lts{@#ws|q81;E3NH8{FGC6! zOj$)bELObEQ6S^Jb%govCblzgSonIMj}QgWRBeEkFJGjCMb4ksy&oDX$O6 ze&31X*>(mpnYiVGaUbrl>eb*i#vsWTDGYz4R*)WEgJkx}2o;3pJxZ&6t|Q?PoIerD zN^&;y=MuEIj467T-laAhxV>QtFmKgIgnu`@$c-hd*F1jkiWve_luAP@vWQ?nz}~Im z6aZy$1Hou>4!3Cf+7nUBH$@-T^t#5AK9=@(YMe!z@Rx9r>if#7iYKF&UOHYNhJ6@@ zYgJxFxQ`Hp4T;rj-b_1u5bCxMqp^~5Lat##me~4$`sw%W#t>M4d&z=pY!G|IFCd;5 zVecsz`%?F_yku1?)f51~w7?Oi=#_FvV>%D6WeZEYZ5|-6`EM2=Nb+$@sdkM}D2lkUx-vI|X$~2NT!jpn4jYtH zQvUBc1k-*VdrZOx^WfWWxhCeF34U)~G!rYHdG=R%^aBe(SN8;Q5aUk2Q2AlE z!o>S|>eI=j&GK8~|b3yVNOAx^XO% z)4C`)vbj_AI=Hde5dATpoPY@ETQNSWe1*|3&r{~aq&KE0Fo^nMunWqV{Jao_wzCI+ zMv!bBRIdwVY(iT&ZQNsK!cf#~Y}l~*$aBAVbQ`d$fHvlP`pjAs3sinIHfKVttWC!P@oLyv zD}2e%3p`Wm<2Njxmo)wOqi$NQS-;Qa4ii4~ITrOUBuq6TmT&vQS}ydj8NC*-9gH^w zfIhSwejgU05}n(ksn9%yZ{|Vs8!<)A)}q`NoX+-XecPenN$BVFsW(l{iZ`~kNq;zE zdc8B1cOf2{eI=mVqXu4+rw12c?~{$Uc|362g-w&Rg1_v1{Wbf#2uh_GHoeQ$C;{HmCCcpjt|p z_`F+We?o{i0$dd`c_|>}-Z=)f>(-k%g^Gp9TZeo2 zg0SVgnpAs;+IKs2bbOwy#9S+>D5ICeE2@uRx_ej{=Fd{4L8>EQr1w-aw6{41&K+L4 zPv*(6i+pb8;i{CZ&Ns7xKB=N5U%{G&zO&i~9)o{E?FgLC6Pbi1BaW~B$_G_69M-#$#2iLAQ z1Y;JsXvgDB&-UwPH9oO_rudeily+f|P%;N${O8zy+2Lfj5l$J?eqS<)wk>C2X>2Ny z$%9${fzta153~K=bAJf6;bv|-XIZ8-(?E#NNYOAr1SMw+&~j(N`)Z@z90cRTz3&y`iU=)WvK+w? znl34|bt{n;M7P;7dvCv|72%CR&uXy-diuA!UCsHDVPwTVp?S*i$HXv)y}A+QteIF+ z+57V+Og*(00aG>G*>E(#Y7MmaY*=qe4FU7tXbR|~p(4XDhKc91M4@WU)Gp-MLR+nJ zf8p+FQ2w-u$%HwKdF!Su(5ara(}#Nw#kGoI=c2YWKe=Yhb^+39rMGO_w*Lj^Ii9;I zbx;)3gX)0G>o?a&N5fgElA-XKgU5J8N2W{2(~AA_oiPwW0C;f~eI|D=O++7LxKUNk z3$^kCCUdJ&M8ulTzG(hCTz0JFTq~g`*K6elK zsnJ5|TyRY`8WK{%TOKHw1?aF*ZJx3Hvx`xKXoEkg8A%D~e~3rTV}9}*XCdXrzv3tr zqqVI4QYBZE?~-r~*$Fut4v>c=fH8|c;YWjt6QPM3n~Pz63eN_O29-EjCF!)q1Ot8t zf+(hFt_)d~Rsx(Anl8UAGBaZx~2!xhr7M=IGVZN+^#k~^<5PJ*sBhDf` z7*n8Repv&yZm{Y>6)8B~aa*>CiKY|2T1%0q;|W#57Eqxo8M6!K_v zxQ4WnqzZ#!Rr6P6l*^=?>SM3|j|1<6fyF`Z=4JaaV1MJPUvfG;ld+IBKalTqY0Gd5CJt^DB=iDq97T z$`mhd4uszpl88tLBR_mk+(74~-)pdVBdd>)j7^};Yp!TIX>`cRO^ z{NS55m~e2CGE}KRDV!I%@N0jQ(sh3%4+6(zC~F|` z+hQ6T{j@*DQo`C5fpXSRw)@nMYt3*Q)XBzyS4bKucF&22;ZP$^DyCSk#=_;eJ!pD} zi+~esaH&OpXJ-i?()Y&4<2Jcjk{2%QyyBhDYd-&&!>z7Of*S- zZtq&(n>4i-@OSsUN8rY`r6+;Enm>80)cc#=o65j1P02f)6`aDo!_N6xUH2jh(LF;6 zD}WlW+~xK6Rhe583xN{*Zr%E+Ob5<#!pgq_ zNOfTax56M!ig#Y&YZ3#(2W7`-XZqfSazgn0kWn8Yjo$P}V5|<*nB#zddYwzsCQ;p<+hGM#s+b12?r+2Pxqxtl~7vi)FmR{F- z{7Y8xSbo35U^qy6Hxfb$ERDhZ+JUG0KOfJiL7`YLIq6#|C7wSPnAXY8)ZLa$suEp@LWH3A7$I? zP=hm|a@5K|iBFRQQt|~-MI3ggjd+!MC%Th;^++nZ-l&}%UhZUPKTMfaDmI=Zdt3%p zR@re;aC>diu=*05+O>~1)Xk)WXnKvNR%fG7%@C5^`9G-oR_?>2S$rX@h0{y6cOfm7 zpG+{7J=pD1p5EN^o@ioC2s2K#9wnvRIPM{+y1jA^6YJubS4Aq;lhttVye~HqZrwDA z8|siRmQU6QvQ=;#%FfW`K%cW`v6zr=rQZj<0%twc{C(`-h z=08(UXDVA!BZaatiOz?j!q_htLn!u9Li3 zS@lXK-^N)MEc$|vRD1yvGY1f}G&_z;na+1lb5cbn&8r~Jp)Hr{R*k^_Kvq02`qrb^ z@FB5ICSRor%yI*I;R_y5+v|byA)_0mG|2wvgUoMPBRbxb?ppZ8zDS6R8MfMI7!F{k zj=!#*6*eiA@UgHD4yyt8i`>k!e}IpO6R^aj0wyf7D_G=15l}s@&vh?L z;N0yoa~3FH^!xU6JTSbc>*&&95*X%PCHR-S6ahAz;025z+r=34Xx`pSfcjAF0D@TUg%gCERh^gN z0XFm0xhmn}lh8Qw_~OVtzR>!-JJi#So_>H|sCIO2Mxn^gK#Zj@E?hey&=$<|u=#gg zQ`nJOtabs+}UNxz@KTN52KPSlK4^H^Ts5S*d2tX z7-h5&HWa!Mo?}PsaBV|U!u)xPTaW7m&%TXFW$aTd?4Y+xkr?3hJQ&BNcKAHRULz5J zKDt5ujUGbF1(NUsG3F>|XaQff!RXt7dt6gCfxK+h8CHNTxrI{vFL9vg86XaXft>pH zq;-oNix&MbmTt3HuW~pCodeh*m>=9kWM%g?f2qw-O4U#29{o9ie6B!Kbcxa`nfw*a z71nVySBV|3bWk3V%I(=_>+=G!8US=t*juQ0k1;`Qz%}!aLaY^%R1IF0Mn@5^JQZCN zfQv`LEbUq2Ql};ai!fD{SLOmQo^Oi`v|*aE7O5e&hPT<8BSZA9e?T33aMY?;gJZ(i zW1ma4dml?Aa9iz;0OFohx9(CsA*FcZ1%(jptG*u+M~NcUPxI`kOMME@;-FVu*ZLM`@OI=Wr7r`K+_aB% z-R?b|;i{l1JOT(`SE6i6IW1GwVrzKQ0lTlkdmMV3jv5jPx)((5dRwdH>){jr(!=@i z*BNxk+XzfqoRTf??s97FgCh!fBLhmt>woX8biCL(>Y;a{Z5nd9UZCkWz#Rg3vm?mO z_wbzL>^>hw)Cd;o>BO>9H}O%d%JI?^^S3bqJRS3gPkfHQU5<<}4ZZ;g8t+ezja4MC z-XDqSfmTrn^)B3$={g(2$Dz6~X>cCONVFh*v)<#Ho%VKf3`*SuxUK6A>MXJL zYk_dDTrBZgnRf;U!#>b!DF!Tr(ZcmN+fiPH3R%&yKOM%Z!C7e-EZ;c zN&W)9(VzkF!8-rYmL0cmb;OyVe)W_QK-B}vx#BH-ev|j*a7L0rV};qZtM%?EjCaL! zadwNS8A03nc_1pMLVY{x&bvZ@T%jx3=l^5YVHkADO&G?Z!q*zPP=mm|ylro7XJOiz z+{`tfW+jda%+U?t478>Axw(e4RPgbB3$Up~5w5g|QxM-zVrRDvrT9;EK0drrGPlOIC7zTJ|4>?sYd2tp0yF`@E2FFL4HL`D`eDpCzJ=q9w z`#gxa3zJPqwEmIA;%Qj6h$y(g@Qk4#-bToNm?nv~rlJympIjX82wHasz!oyOud;z8 zAfA}=tVzTl>OW1jkb0ChIQOC*PFui}elCxz_z6`|AP696%qhuwKltj{VQjqs@2npT z5Cmn@PzgUwhWKWgiz&U@2tyE$VB>ehqKuOX!tZ^lLD$S;#`^_I3AvKGS!|2%HiB;X zcIaCH5(R_KJ%wsa(}3?9U|T(yTczrM?6ZDK_~{1HhPs@bW(wlou_5m7?b)z>ujQ+v zCcM(&%}Ni=s&ptjnnf(`%hAE~(`BtvQe00N;BQT)jO3=(>;L@y)=8$H?k~520^o+U4xh}uHWgbc@OA=(^YW54&da=8^d1gIT*Xi ztUH^k`aDCC988uQ z5`l)3%b*jLmY)u#GRZ=fdJx0&nk5H&&7=Qcdtdn$$J4ZnhQ-3-8r%XQXmEE31PBmZ zf(CbIaY=9p4gnI}-Q6v?ECda{xXU7YHa~fu_dS2W`E;)SFtc-Av)wawS9MiQb>Fks zgFV%_umZDvg7dA3kd)Qj($w3l8+Ok8wA`V8FWMp~IOnT;UDl#I&+^k! ztpQd}^0D-y5i6o=Q!ne$!RN5*2MUcqi#lPCyW2J(z=52Q6Pg(Fr>YIUTs$dH$xbm_ zUIqrQyqgU1JKDQb|Hg0(Cn6STw}QNq*{bc?J0gCq{-uP^R@pwwFng||c_iGUqA=&? z8MqZY`e;SaI+R|q+uC_IJ=6?dTL&kjN|$ag$h7yfnu-@= zWzehv9TiY;gckg#a_jH1X!uX@A9V0F+VfKSOkdy!kCY!lpp0a7u=n<2M5V(;_NO~6 zs#TMAlO6sR&j*B9^O3mMuC8F&blx(d!ig8Fbc3x<4-eI#*;*&UpeyoU5S5gCH^$Aev!>3=BaQAQ<)sGqbBel7EsVTHpM%LQ?FnM`lmuKtgcdy_)@S|mB(Rpdht&|Pec-UWx9ai1peJ=5aC-6oIL){gT2KI zg7t^noGfKriCgxPzF!wIzg-(rEm0`u|FYklT4R!OU!pQPvf#p#(_Z`g_gHv|p8DvB z*QfiEJ7Dw%kqTM2`6mbPFPSi|b#(oyl(PpClqg!_cNFw%;WtGeYM;wCOH;r8KJ)oq zwAEtW)NFw+BIQXEx181SLMPUvp^yiyq6j@LZBMaJa0E}w{Yjn4fObWsld&)}-!{Qx z>C>-`ToYhQZVNA8S)=k>3qSf=>uZ!A3WQR(3r6x0u~4B)6hpvwDuBvQv|l~&Jk11b z?5&iKUdjYb6zVpsWi;Q`Aft)>3hSLV)gEkq+a>DFbd~it+Dgs6M!Rj{@j((|B?`sg zAHOdj{h`J^pty*x59ZSL_yIJPFffm@*;P)+?mcFv-L&N5C2?U`z_OJ&89+$D8M!2gcxf!r4I;l#D-D2TZmxh*1@JIxdk>(pB}Ra-hL3VN`b_EwLs+ zS4{IF17D!bQW+Cj!Z{ybkX2!|xyaqnu5VI3y;z9Pmh)W|&wg9@+-31GL(B8q>ol%} zg6Bsg7i3^J9548HSM>E*fmVhIDnX7p(Q{L0&arJcSs^L8uR^dT{=#n^EKKa*jAx{u zCn(mDd{5H?&t7Le@TG{s`Ow8!<3EpCey@2QNG_^U6{gD95%B&a$#-HJR{-1haH5)4ho?oQ)tWq8`6cCd9hpAI0^AttJ{e(GkOE3f}>d6ae$( z4|ixRM#UJRGSZU?R0ZlO_vv^A*4ZN8egwq>(%DiuCB8Y_#jZ^gJ9OqGXq~i_jI4;GY9Cl zA@^QIj^}+(87bLlr(3%JCQpS=Fx@r4UEUVAP9gw@{R;5P_t&PEDmw~4l!nO!B0YN#~zk**vSG}rZjepsaX0|s-aF^&mQmL8)gD3z{ zTf+swml7lBKEd0Oxg* zj7rVn;Obl@(J>O~5XcNBGfFj}Up;vpYb+Mn*9m>BRlN5WSCK@Ze01yAd^0OrfkRIz z;;UK@+6osjr-I2eRp1h2aqeK!&ooV}F`yWi8tBDZ8-hQ_@k%}cjjKa|e)8gs%Qhy8 zDVpw!tu8`P`)RQ2Z%G!-avixVz97@45AO;yHMo%c*J8+J70~eJIGZXS>wTiI!bKR5 zmu11T#g$NCw<3x%H!IcEXoU0r=r=uHX4|mKW5vQ4DuxBb3Yp~-RmNj9?k$nszIZdY zvENyU5Aiq<|JP}izakH~3w>y8U$@@#jLam~`B$=d^!ZedimnzNgD@?X=^t+KaUs;b zJetrFbIduKCZ=4&B(pVEREG$PKtiE+UKHx_;1E_-JY4zx4>!>X3-u?0|{p&(MqAy=F?}y|rMb_xF~q>d`IOn)lBSR}^kUpOEI-I^q+u z_gBNoR+l5P4p;St0Y63VtR0{q_dhX+J#1uLDy~Uf`cB{^MIi+KQdsll1fgSRUkI%{ ziOc2Rxj=&6j#RoIU9l)T5dF+7lC^tdGnd=}{YVI^{lu+VX`q7Y#-XXVD@ASkkWkL! zKE+5l+l47CO8{{l^Uvgbchq2g6S9$JM|pTz!m|6DSa<^LQRkm9@YohQ>9-}<1(X=L zo@ke?#Gz1_=(y|X+kQ@u8$@Q!1iCoZ;{LR(;$JE#b+sdxYxTgc2Weqsqn<|0j$OdY zzHjX*G6gqkLLDMIau1<$=I6htbM9v{JzMU-TWV1MWQF`N2%YP}CT(#NgDK;u&@+<; zYNPlk#SL5R9j<9){Bqkbc<-OiE#q{8jl=|KTe)hpEnYSIcE)ATeNGrS=5uHsY(55 z<>c*N_@$e?k@v3p-0sR6RbZK)n!7tYR`4p+tS&uW7&N;ZqE;O3V5fW|?9NG9m8~XL zk!#p1OkyIvebV_;9r3#$-?Fp=7KO|S))%d-?jq5s0ga&LJzmjMrN*}v3jUnQ{i^Pu zx%^nB&!2sZO4)>wS|c1qeBrq@mJc22)^j#W6YW=lwihIXuCmWA#bgmos6y5cQ&GtQ zywC4aP6`62vfu8q#&kV=JIYP~U1HaJuRBGsz&9AhmCN)$R^RKGqLsEefVje1I)p+TAEm1ImWGW<6Ls!Lzd(eZ zt{JL&ACIDAWsB*wUZs}aPJ3|^4895XSpunalx-dI>a1Ya)`H7MdfUma1-$N_C!xBx zIe>Xva#y}88jAYIU?8xk@u?5@0wn+D0>}|_ zHI`Ri znzK@^e9cli&njVWZLp?o|3xvSwCFZ;R-?amiT?N;Wo-XVtFk$_k~-=)YKny;FRKAj zqzpCA{NY523TG!RNg0xl`R>zvsiHz2Jt?z$$*r@We!5;Ulw<4FYtcn)-{h-Kv&dtu-@@v-v9tAdVY`Z@lsEuQJx2 z%kr&wWp6y$)`2X|Z}u*pPxurV=C)a`c^fU_UcdPm^75Lzz6mo5IQv!!N0$y54mClk zR7}j+91h=}zo=Z=i+{ZejpU3Osi=(0A{3!VzQw&}PNMkeQKVL)vZ8zO!)lSsJAb-5 zP}wwuM^;kuornif)F~aByG1P3JCST_l=R5{ZX*Y!__o*3Dim_1X)ZZ*xk&al!n`TA?L4}cU+JkFk~MQu z&RU<18MEPj-c8^Ns5WN}czKR3&74261D07Uo=Pou0j%CpxPM^|QI*hFf(?X!;^_Ib z9s)B=n4k&;rsf_Vd6ehC;pX2TB;X3}od5=9LH4o2a%g0~UEg~%wx3=PvS@#?f~5*(#VZ`& zUcdYmzTPa>6fP%dhAvGK8vX*stK-g!yIVATLPw!$Vt`Fg-PKAZM?2O`q|KkI=3b^+ zU_k^)DOIo>Gb6RbJ}0@G@1dC@iYK|q5^{;~bj3vy3Jv0blDNIjp!6M-Xo+69Vesc1 zFRt_*y-Icq0Oq=L&QUd-4R3|=9K}kX*pl8CJUG!v$**<4N6a_#x%U(J=8coBNJ8^j z?T5Ar5?#_vtvN@uat~CKw8cP=w*=%MR3|scVqniFIgcyhNpZ8fw>V-b`{WT_lfu-} zj$BXcKN`!niO#<>o07Id{^A@h1w-qD=1tjFFr`xUWpIf&FZtSmtmW9Sp_`~f#}3#i zanH4&c$@&=vlrJ|4 zJPa2a7iMY3SXRy_v+pd~7R4V2o&6O!{h?G->QlbeoHl@UtDghQ~_uhEjBJ{XkrY!L? zE?DYXv#hulT88X~SBm8SnkjS<${b)H89o;Czc+tVbC@+Kr0!Z=W38-E_8VfrKi}4u zUg+R)9wpy;^dwhoPdD*d+;dx-(a$X2BlpLeu@PfPoEz)xA#dAR44 z{v*~LP!;oXzJ89%!paL~%Ku7*J|u?-V$(Q)LsfKadD1Z}u{e4hqTKxY-WmEnTN)tI zxV)*8l&9C4F^kshIJS4We13RHXw%xz!V)5`eW8(p?en-F+_*P~mccqls9CDs&!SZd zismZ-(N(RkGfps@B@L1PHOGzzQ*_S3P$hFFm1x5&OoK(ZHU!wC8L_Oc*o(b#l%c4EC^*F!dR{9E!O096r6axv6%rB8+ZiAGNz5(5TFm6RWZh@oh1@R-t9)<; zOcbYb*>5!H9ytuCz+^}|Lzg$d*1Co1r;p@aie-GNo|w+*nAc!Qa)dRE0q>_=dh?tVdJsQ4I&-;oDmnW*S*kqIV zth`hzMsFI~LxCkDlAcX?TLrx?m1>w?AR6jc{~Pyu_Bk_MB@Q=s4Iw|rL5>Ma7F34O!p)L7Z7chfU(C?<3Mz zyie#ZR|>xJJXRvR24f+u%Uq70z<#8aKA+l!-J$Ae1ehF$ZH7SNxSs^&L2^p!GYIjY zYs44O+`KPDG@nFbMR4MMN9Dxv?hbgbqv&n?w1m=|cQ~STn$+f;=F{m*z=ynUujcin z3{hMYCGErf?n_xZ6|Z#PpFgC0m7v;^qMplbcD>(wD>CbZBRr$Yu*x2s#{Y~87PzrG zXru}O(Eet4F{8s&xu8%BenI2=@XX7yQcMxR<;U-rH4=#fUDrnGi}SO4EC8ZYEd(s* zc(K;}>d`Qyh{WlA*u4RjD52PgV1@0>ij{~rdmRO{YYOO;BU`2xC=Pm=yI;kE;_$Yf zxdlYUe$Vl*ie3Mz^d1IC2k~B6rk@|!_n^HuJbo1T4&TEGgse1)2H9Z1TDZD(eKn>F zO66D~Q%{u}*OSU09V~9+XOc&E4#xQ`-ci~+V>Yrb@7{!SyW=w_8{I0L3EZOOvO)bV zMO8(!fG64#!V?uBW0WJJy()f#KOWA`VY<>$1_-1u{+hPZ2oKFl&!|ifkQ0KwxO+%k%kGjwd;?UGHuG~U!9BIUm`s9&`;>d?=5boqAj8u#! z^vcKc!N+Yiy2_!OgI{;(N`I8Ru`cyNgt~0S+Cc8d3guIx^RZQVp=##IJ;G8qZKf&aBz-B;y?J>{BA~JnTjjPOFO#p?Fu)AYA%nO@^q{u!nKr1e(`} z%G)pL4YpEEOzX%}ge;meyd}E1F&kZufo)p3u-Z1_5mykqug0^4T?^-I-k&hjkSDd! zQEWeowCepFXcd^=Dx5!DqtyRFRZZtf8};|~y_YwiBY{Pv^o0C;MI7`AkhSMZ6N1r4bNl0hu1Iq`-3Fbm zq;A~kSg+&63D87686rqlV0jtmko6;uvt)=o_@PKj3F5q^R{&hRQSr;<<1;tD{3s1Y zd++R$>0yZSz7$-cvc19b)gf+1$jIWpx!(XFp`8olMbSHF!#_HEPC*NTWxe1ry(HF2 z$U4p>ZXNWh%Ay*N1vyBHsIAoyR%&!}p49!k4sE(j|A{%Ux*H%u%m1>kpYEaUVqHJ2 zzE?=a{T?9uA*ov~leN`X9=~TM8_@OSp^afXL= zNvFLs8FJZlQO~X^hf`C|YXx~Ltlfe@$rbCQbD26FMGIWBPtD+>LCa09w-yGtbd!fH z_Yt%eNZDC`>|bCTb=M%*-?>f#bri*`G`@O{45BK;ksg%5iGBb96AA|s){GiwQagWr zY0~`V%N4A-?|lUz`1p>HgtNS4)6eM2pMW@4Y$>`Y~{49N}zxx)imI@&6`@d4j zaD0dlO9pgqd|O?6AUtuUR>atJ|3?BG4hQ3psI-dK!q69R6YZVRWWt^PO(^^|q$Yh7XdT-i;>aj!Z0smElKHdBd;W<~ zM?}O(1g)rl-{KG*QYcv}<^saD3tuu>wjA}HHB$JHGcRrete<$P%i2CyH zP??DUpE{FuTOy#GOB~8?(x*6-_#TWT_|6}a*C>ix=Tq4BE<96C&{e%{kj#vLN7!*C zj&J@aMut34cqxT##5||H=+`iJkVp~5aQPp6>jzJfHY~3Leli~Bza}9pr4nmoqoF<+ zOfo@1!b7YefR7eM80pl;(v&q@vG&(Y$Z5zi7AZ4pSw!KHz8xRxqoMY0ua|InpJ9O7 zw~m7&5zFIZBICJ@{r_nP;@AOs0xP?GRKh8V?4eFHh~eTK8q8MC38*sb)jzZTb zOlOA`%8{6x`cR5bg{wIH9inG#WIgFdgsk|u2@y3h1^Ni4CV%@maG*C`GY}vi4d#}+ z#!K-j(-~!xW00+G5rAVJspezY_bwdoqQu<+P45_9480a1nSNDnv~(0x;kdK)Bh=nQ zSC>O0?O^C%Jex%zJS=<(#au3qH=m&FIPOOF?0CagKu#ZGneI`5wo}VhH?NGzGFEVP zr*GTPk7^s>yw=rn5Sk-aC92-QcFL_r^31>^^hzUfjK9IAA}3jOTa{eznb&EXS3p3xwh_d!n3f;NI_CQlk171PKHK zpQRPNRER4*%E*{bz=<@SDFhlGW9&rfv;2*`7l`&oYP1M2RIwf51o~^;Ki&> z&@L0bKGA=M3>TDtZ|s`(@s{XE04)ubdDqLmr)%lZ$Rr6Zn+nHjDRJq(GDbes6{HyG zwi1bCN8K=S(=)q-iM7XH0~bq;YSTrP%c+#aRp0svP5*UHeSraMaJnvNwFq{$BEOsF zj42HbjeXplg^O2EA`#yMh+C6CVx zv&xOGio#P*?Q^^8(Jpd@?+UgYWYtG%7+mcSh!j`aBfoGOlmFcS%;W+7LD@l_L2H6O zhM7zA8}-q{^-B~O@!i9dld36W7H**4_vDsIw3Omhl9h z8rqtv=teYDt}sNGTwh4^vw0W2Z^%oM@#+uUKYi}Et6kK$XrCV*HYj-Xu7jwfzuf() zUPuI!9xUjeiT|?3!THP<`R|5kj(nf{60h>Dc*}^TGt?@VJJ$xFj3R zaiK%i5#{1ePmQ7S`zO!l@h@)dvw|&8cj*fMTgbn}Z+@X^b$ax6B-xc_Db(rORj8KE z*(G%{>)CyOed`w4zY&%_G3p}uk>Y;vc!oD6GVFBF2?NS;6*#Q0%Z2nA!Es}jknqG{_#WUoOx~NaE@6MH~d>Xy8pAz zXRv2tX#QxOXfQ2mzlCi@$4`S|bNpN;9r{QfDTXE;3=^&9q1H?^@|?lIaynpQbiG zSx4^-j%zw3OIVh^5gG3zse%LYPa_D$?xM=qv~#4TFVV2})Nb-^h(1MOFgM0~Vto(g zxaH0m+X?ay&JONmRZX#-w(R-MT!YhqA`^i3qKf$M$CHdI6Q(TQI(iDk%!k&wV~X|DXl{F@j1 z>nq4iTd!HbG^Le?;1-BhP>^LTa2GlxUue*NDUccOOl8)%G?NNfL&U#Y;EyWATbein zQ7{XP9`scT${M>?BXl$!+nr4#jt-Z*B}or{U%{5uQl>$ezgj$e)CipiZ|gk(URjC#M&uH>dZ3(%J@$UxuR^5q5uebrvw{u!G)FD*LjAgOkLn}e=uox^dyeg*OAuobek!S;pM9zOoA zky6kn$g68Ub;ifVg+xTS4;iWEX1bHM$0SQAGTs3P`SYq@RK55cUMOMMU1Cdfv0q|% zZtZRzt|p1ryN4wrpBtuD@9f>eG3_~>ZAm!Z3xMypT>VId!+*6ogg)%RTTJxI=q7pM zmQ-Q{7};-Eue;*e9GNI)7H}BvZmE&DLV8ps^um2Ocf0)v&%}VoG+}_`)a{REscQ4) zJj7qV{bTV?wWx2|Z#i$d)t_!H2vi*thT1oJej~G|yy!Su6!&}rv>-5uldn{t{|L#$ z1j)TtRy3%l+vah7N;uTU6O<>#3koMDHgvRYaMUu(i_&NCKF^S_o|#tM0aN=7l8{t^Fy;DIp9Nh%*!GeottuXa8`T+F~)q z=^PQ*L-zP_5iv4_D92`8b4r3VM;i0&YSgXW`b~Rl@}rvb)cxxp{iexGNdrZSWfvjB zI`N0bc<1H=JW5w4_+(`J&(+M5mRYDi3FB~cU@?|PasouLnkJei{-|@rS8VN9G$Uf+ zA-W7WZoLkpXuLsG;bMGP@kyiX8lZ_ZpOSjg1jj2(e0LbGIew*dc<3vZS&Y+Rl0aWs z1fw&8-!a=gL|5J;vmQ+J?WP!^z@IAWZYhEqK1e2w(j&b`K>V>y9s=v;f%Q%eT> zUcJgxH>uSg3(NIPl&eX;$MG*Gv>+#snLYPW;!ffSRFf}-c)pKAUV_51VLs+6Tz%nN zM7iTna5CigjV(?&bSJAv33kzF-wcGENL_P&Ofq$PgO;cK?6~$_{2+5ueAC8)$b}77e4ruy{^ZKwN~w~gw~&6Xb1=}SOEst i1p)%Xvp=CbGD2m5Y`m!}*CG50f`ZID=?Y2Xp#KBVE{y7xZkzH{z9XTR_HxlPtwv(GWd_{Tp+cA}Ai)=ug@)MR92 zJ9V_xO~}Z0Xp)hEzCtO0l3w!5jbvmJ?{FF@oM+H6R}TysucX?>H(p5%UNIG32{F=N zNi`=|w6ibT6L|1Kdt!Jc)p;c()YXlEXDoW7+`-?`9fNcAat7W2#jZ{pm7JWgBI{K| zeyXs&_pLEJgS33nSQmY7CyWQsgHzB(58`5Ck{f+G9p5TBD7{hQ?7KCN_>Y1kuECo# z6bEKcnvAo*s}sg=v++g|&fD7q=ZgKg(9zq=3*(6UsUq6f*E{g%YR9}iHdnVfDi-a$ zHR49^q=F-kXpi4ZEnJ;&E}H{LiEYl0R9ze6>g=*P^nrt$FFes(l{X9gT+mM5fg9~N zi_lJJEY39mqvq}5?YlWqv_H<9G?U-Ad26$My}g0Kww>|Di|zlCL_rPXK{|`AUEUm$ zS3>iD-uC-m`C_~_*ZHq>hjYaN+q-_e0ca2Z-(?bm!}tPI-zIiqfv&)IN{I1m2xyra z0be%%s!8yw0s^)9C^)esxCNB58&Lyfv;!}S7$HayCbj>z#Ju21AvCEfVik^ zzVLK)aw0WXJ?8C&+Z1Q<16$9VU6Xp+lC~dPy1C1w9x+=N=&vMUYt^L1kPci6*L5ksEKMR1RvYkWO&p&6XAbZs<>E zEVcE#?Tr6;ETZMEdV2BY@@i zKs#VO>^w1^4j5lQ7gy|WvZBB7YNLFc9{F>%@4I2>|2eHsl9Y8Tu&EKYG{!cqBeVX8 zBsUv3-((X=$N)tf)B0b&{=XQA1Dns=u0Md`HfFN@`ipO&od1Z0_2MH0V0mbWI z0kkH@t0BQFBgQLB!hM^FLCnkBOJp4)k~-LS`u_?zP*>5A(Nz1c-nU)ggK9Dws{dH= zZ(aQ(|F6Ko|A{8}Maj$l4g@5&F8y{*AO-9_X{Y~#67v@to?o0KCVJ@O7+(W#Ur#_? z9P}U|dIvzj4my**@k@|usJf#4059|Rx00l{`e;uFH1O$%L*oDu6yx{BU_AiPkHa_# z@Ja%n_OG4t0&a;4;9>mu*9m9m0F1FAzbbYf-hi)75@i5-0w4dlJ>cF3h&zb}Zh&;s zS!n{TH%m8q{#|-C9asW0fnvx#W0d4nxWM+T4T;hLVXaDea{b%j$ zhC==WBl}%qfiLT~NZlw%&1kC>kWGiJ7r-arTm_+ljpje*zfMT3Q%~Q$Z#5%8@V?jC z_SFkuLBzIveUh+iY&@*{@9SLEk5(izaFADaLr4Mlal^x3AIKNu-;Wo*6QG;K!QTciNc@#Y2nfOv5GTP+EUA+LS@8w9Mp#Q+oZ{Q1(8|mZ( z=p@lVZ(k?7?~i{3DAwTyV2!c&E}r zR@>3W#`le8z*w8@er>S<5RPCxj)|agIDpk8jpgC$g+cq;`M3uF5C7yqn{75`_uDAz z$bz)1>s;&xZL_hPTZ0{Q1cSsiRQX` z-;m2c!zRCik`1Gwfl|{mQ`a~GNaqIlz+wO@=}!O)$r+VZJY6m*|qDeu9bv6R+ z;S2b*BwBtQjF7Iu59qSZNALz9D^LmNLh>;-#zP0W{(HgqzbxkeeZb{cG5#9?7s)>b zF5B1+5{4l;XWPk+E%N&Z+wnu9NMfi$az7=+{QdmV9v&pg)6h3EwFEu`T`&L=-ony= zAg;rApky<&@NXha0H$0@;|I*yMwqBe%c=s5;vWGZC$yi-_vp>`@Y#Pq8v2zY|J$G; z$-hEF{1{Ix?l>TO|3*9e@0ARaqyG;mcG>km|8?%xuM+b&GQN`kl=0Odnbf~Fx|(Ks zdVuQruFOa+NTisK!FRK}>Gc1V-QBFRVJgK83`xO1tS?674~*?Lp1{-J4+m6q^>Xy^ zcOn_w?IZmcOZy#>QC+O58~i2&F={HJo%UJ-QSM} z`0XD4ji89sKMzGTHw^q=Ba-hIcMFPaV*s`T1VHG1I9%Jl{#z-4%>n=I9KgRUbcJGu3T zgMY{1VkAP0)c;Q;*Y|ARP4K=>acl1hbjD^czs+Vd4*BPdpP08V z(93V_)}58V&=6w$>mCTu6ce=WFaUvnfT<&eDA57lK#Dbxf8y=$;IZj-km8Lb!?+dt zG}y{X`QhuVC)SfTgkKLxdjUSzNq>jqShOFh6z2jEf9r%iKnJ>#=Dc+3o1X;B|M8}%4 zDl!uPI9YeQ2HQ*__%&bPdm{HQxdo)uykD~Q#Q)FI^+=*i+Q&`AEcFL@1mfaqj z*$@z(^&f!zZ)9{!0%E55CqGVDfA`a`M7HOJufN^;oyx|G%?6}jwru=fLDu@K1sNHP zOh;YCGzj#;hWs>-E^Cd#oifjtq`xODr`LKvS&_bfnz9O+dw#X>cCMatir%{?IWLQ> z@Y7n314YFbQPu1`|aAMf{d;CT< z(ucezKG8UBcGy&t^b|z_9yq-I4d`WCt@WoF7O=`mwncM#%iJIBf36p`4?N(q-f)!( zn8H^k8_~1B)SZz7mpuGw@-RG9OneXC-p_1vUD;4V-Y=~-rwx^doMzj}|Dlrex7lsh zz=lF9;(>l(PB421sn!8OFaDo4fsDsK0*X!EK5ghV()gy+?B$ztg^R(4dTNv_8W6MLq){QYt_SIGyNp?~-W6VE`<#5VwCLl>dK zj9(>C549K+5rtK-_Ow8pcT!64TSXQhq1Zwe#&e3}-n+Yo^;B0^Bn@{#&)LLG4(8p- z2dA94UBCE#e_kul9?Hs<9yJ(4dD6Jz(}JwXp6Gyk45EtSO*=f1g4T<>x#fvYx3M)} zR4SL$`7&AUDk{>-ApS$Lo@t$jkm_mjWV@3>7IX^pnC6sQ4{nt6Fmt%;sV z&OfG@;M+x!IcQsWbuQ+4OCAm}#&^n;;pPqc-kolT&%xyK(S(|{_EYnkP7Y;ZQoYIBVE@&swx{%wHPWZ(lPOv8CXJT(CeL5!Y_Nta00ro84XN^}9i%f7rM%2^jr(c7M zOH)5^4%7PlAm+6c_h~J*BlYgz$PCjjy?7+9Nk4aj;dQv`OkJ{!bs8kQBsiF#=B*)f zt`&6lqjNt0#e5z6kNc~u@>(yW?zX>KWTosqI?e+L&ohnsk|Yk+89xP%a^dv%uJ3nw za8weXwv(Al%hA@HrP6{1lqd=;=yk}6M&vptzM39x`I^uA6x?K}8M?3zM@`jD@lv!jX+}v^ z89}8LPB_3}HFwK~%c&pCU}crCf@9#!B)=|K&8~)wO-M#KRl__e!v(CX@i^9-w{L@9 z+|iG@Wya=@7!+loNVL+f(2mw%Ox;B+(_!FMBD!SHToU1D%`WTf70xe?uIK?a)^_Dx z0}sT%K506CCYoJc!S;Q$(etmwYjiIwyWWkek}nDhsmBWSi=2G3Kdl1HV-K&MBX>s* z?FjAcs6?=Mn!HW8c-D}IK{XsIOr??TmXO@vd;IV+CAD#*oY-N3`eial`!u=xiBM?5 zp#^doHGBOEgThN9k}+q2#aw&@LXfSd(@Agzis~QcqZ!0pusW^RUM^c@tqm_o7^111m)EIBQtepPSh4eunl&) z^D&BjV@UJij+%=OYD9BrK$=&nAb^j&0g8&thI!uUnBu=xtH3c=_39L7RPFura<| zYjS6E{_B;C&AiQsZ>9MiY?R9mkdJc?i!tF2><+8;tE0BVoT#z*XWY%yQ#4}DD>rbR zl+&rZQ#jse*}Se9te{_3ie5cTpkOuz39)uSD!k7NkJR*gCksziqaOG;QHs^E5&O)? zq#O;<2KJ9B16otW6J;J@26#$etEGKgLK~n%Rc9h7mKfL4j43B#?DdJLUaQfJC-s17 zwpGL5V*}ao;F|`Y$6`U7_KGf&2uikfx-~O-yyhB>(7`M&=R|=AzN6Iit!mW?L%h3@ zFt}Q9Qc$%vG@fwY&ax)*T%E4JV+Pp>Gm^Pp*R^TK+f^a!bVMh4Vsws9b!K$>vOxFD z2<-@2Od7N;$1pSUMtw2UXZvSyo?eu(;BI9ysa@gK)>QGG`(cSev-*6988Bp0G2J3a zps}5EM1zrAgjHVEUI2<9HxCOM)>0T()5?fTyP*izH&jmGM)s!!-@@3`6bo7VWEZt1 zv#QxcXieF*eb#T&I|RtHzWwe@)#r<8G#a#X4c(Mq0r|Zd>7ttFn}oBc$wviIS?M#- zD?v@tpTX(7=%7=)Q%G#w5QZf?)`g}`QvrS@oonB|1d5?}9IT4S^mg2{y}6yQ+eq|@ z#EwXaFVrW=9s)`ggF;X~`CQO~(4p8WBWcJI1B7c8mi{g1YP{$as6SdfM#>%Zkd9rM zHY)wDK`9@?{)U@CP$g^E965rTvxn-r#7;*}pTkCqty0h#(^*A-AHI^d>sqIyWrX3o zgl{pZNOQqPh*xtGc0A-^M}~=6_>`uz&ST13cg$(XWZGI|ONXdSSM5W&4Q|-E9I7n_ zF(ib<+}q&^a)s7w9=ly(pMn;tXKTrUOlzFcnrGRk)LdDOy`SYokUfg%2s3?#hnmqsIgyKjg**-Dt;2wHiqmp4bDkD`!w=U-dk@~B8 zb>Pa*FoimWx;4H~agd%E#ms}1LZjwjyf|4XwpO5aAu?G!agAv?ifX<|XbHBKC%-Zn zk9^W84efPWU~re1%5X_qEnWnJCJ%7>2yfmgPz)JU0SF>x(Lh4P$}nZJY|MnlQ_1!{ zX*dyyBOL6!CGyR_le>?1p9kSy?uo&a=v4(*#r z{CES+D|ZGmZXf-MAW)=#0%Es)*1r75TZ>QLm7`V201DzYdBXyg#k_*1x8xJDQ9k{%2eYcA!HeGVyz14x-&w?VL`D?Z~N zoe^*l);J-}pnr{v$ojDvr6FH}^wH#4Jkq{n;;ZcIC&thob(I5ynFLI{X4UxMKH)<0YOWb+%ec)9DPX2*g ze38oaIkYZ^v^g{5{h^kfn(1OQLMk?=UlNjzu(*vgP1MOH$agla#P||Y(F&2t{zPAe zXwH5J>yp|HXK08SwuihH0{WaR@8+p>h@n}*#E)`K33jD_=Yt~qDMU&o#KN$L^$^VhOqtj3LK$o*NRwy)BfE6jwpm#)xJNh* zhrR`J*xBoOO}-jNbZ9Wog~ODOI!x|`QWwRus#d2&pTlP{Xmg=t>lh(&LEMqoArR(P z6;wzqQ%0X}isxOzKwY-nJ$;yYGUCN*qy}a8t@8#N`3gZ9=E%Tn$TmT$V40V6F_(e` znvz*YZZyqdpvVEj9C$aCLS}M~^D=jK*m5d#Z`6(2F^xSX{WB)}0c-}=4wkPRQdHT2 zOd_N=nR~!;f-)ekNm9Io)JQI%Y$TcsLf=oP)M~q+!=RgR#0kD?%RFGW@@Qwu78ii} zKwc(Fe-a*c90 z3JRmEi-O&}hfh&uZx*$7kGv2TGid3XNoAW@q|x|Jes(sM1)PUcgr*8-IUJ8C74V06dF!bc;RzZqUfKJQ^=W9vYjN>7zgV4PlfX)b$Q zpK3WTMLcP3?J~c~+H=Bd$DD*DN}sC%R>d+EWmDV}#5LHsz2fmt59sYeha>tuoe12$ z?$tXFq#gs@5iJ$`7qlG|rKJ7BVzT1EmexXH3J*B4Su!s82*U7!sDG zcgKu#iwKpyrxkz63A~h%@zF11v0!eRs>=OMg`D4V}^w}8H7m=54@5_6S zpcikK)Dx;=xRCW&ZWl@@WqXcPqflK<Aj6vnI52)Zx?6k{wyKdz9Ep1)p>`$;Lxp9;T#9 zyYV2~L5H!pGE%~brOde@4M}HjOr?~-0F!&oO_yP4Xz!UIOA97bKP22KCc1zVe;}5P zEy3GX-M#=hPL@w+WpRn}q?V;4Psx62i$cpZsz%G?xAJOxO0EtII>5qVFUFJixF_7% zwZP#-IU?NNP!Vl!?HH=IuFm0_=&QxeT&;@fJQZ5mc2UKlM8nT2@cOjcg7dItm4x9G zRaCDddqWjlaBozlyUsVXm*AxCT`FfSL>j_;amA(b0{V#>7x=@FDuJ(I@kVi3U6SE& zYVFb9B8&LN9EWT()C*$&dxT?xT zG`p7E6iC|^mbcsu_BkxZl4f3 z^|r!XUGpnunrDvsobcK01!ZXEkUf}Ix=zHUzy zyAPv7rPNlR>o?P|LcwB9_-6W&O2m%uqT^@PNY8F~DlF||USIG>*0m*j@mLA&Aj=(H zWY^i#Z^A2JrRp`PtE{+2$Fqnz`}_Onl(D8{-SF%%<-TlUJ@w0q3_2xisJ0@^XeK%x zq(ZBdAX;&_r+tr@Nl?8T1u~@#Z1UcLMi{HbmaPomgY|~R5ha+yqA;*BzI?-8tNqL@ zol7@~xAoq-I?NGQc6v(h@aI5Taf4Wz;nf6%J-q=54J>9Agyq=DjN%;6G+aXKF^MTE zMjv5_wY$n^cyWow2@73`oxKCu*^l9hMxs!%fu)t+*e~#u&oq%{m)!1aHQ7JvJKU1P z1jpHn?5JA-+rZ=qt#O0d_Eraa*-;`rN;1ZDZQrrf)3-w;oHIT~e@&lTCHz*EFaTeVPKh$nXzKb$Z!S)zTp(xqh| zeWMQX@F{(7K#C#116_Try)dzfp0cj#Xi)$Sse<1A{d#=fzZ} zNj>t1jm@YjtByu+t!6t+aylp+3AX(_ctxuUq0~%a4eb@LfR)T*EjL+LFcj3jLl%GS zjT=xvm-I7&Af7q>xN|SJpcl)|Zp%l%B*FN!QiMK0BqjLf=%%ZBrXWqO!-Ou)ekw0^?9I zls-Kc{!;ndMZ30ZChx(z=e|-)(D=wiGiKMgDAg0bk&C*}w8Ar^Sv?8`Xk2qqb-LpdF#S-Xgz~{*UGntFK3~svU#6-^Zup$CIzPxb zDPTzPsJF9LbrHS~Bny$Cdc)VfYc}e0wAhGB6ubnhDzE-kj5&dRAmkDGQ0$jPk=9P* zD2W-&j`|8ilzY0?V_vIymPz2E5*E2)_{tuPE>oq46Ov+^BwCM;qfUB3)i?oa>DvjP zI7#nYi!2{wxZWqBc2rW)gkma1Zl6Si4L@h{7_|7REZ5705l=pzEYpK=S5OP<1R2NN zBa1mGrg6>!GRovm+si&4U0sig<8nWJroe@Ko# z_i7$7$8Onf#|QNJ69pU>qh(JxlP}L|Pz5j(y#<1oV1ATuRp=+#-obi#K?1aq0&e5T)17Zy;IhQic1bJ-1aSLmI&wW=6T&uGO2Hci_V2jOg>Jk=1QS zsgXQ{+jT6`Ft#9cOZ)zWU8rXQ?)$67^3S&+)G8AiXx{7!iW%}yLghyl?oz4ORd^D7 z#Pi;xcwEBaBcda40uJ02nd6*EK1VSd^tmA!xf+za!@xzd%p=+*&TPN!vP#uZ$&tdy z%p<*e*LJpP$Oh+k&vvKD+1F4hRJTS$0FcI55;&x#Ksap{ro|3H2KK#6k?7>p*fW}9 zp5EG7m%Pu~p6oX3SOFsCOOJMta(x{_JBX_H4q+%pgS8F(Fg|F2+&-37sj)z~QEe!m zNPlaA$3uXy-1G^ZzBi#ommkWyEMj+%p=_6ZNqt{0m6EX5aQ<{}_KgG!aI#K{Zp`dk zvR092n*Lnrmq&)4Gr{t%y&`HGwiL){fGv-9N5(YdhF`NDP|J+W3Bx>gekIyYDB}8D z(vUE3ID0s3xf`TTfz`4StL;)+&WIDIf3RXg=>xqULhitn9hlHC%f(hc(~v|v8I9RH z6n{Xl)*4S&Qg}-<=%BYaQW-j|h{rY6fW-^@6Mdo=DKQRof+iGb-MVG-N%ko&fLs>s zr7wwAh>@5eo}tX&Ie%FFA^n8L7YwmkgF#li*=IkN+t#v!u+!%Jv*KnX)b33=?==M! zz49lrh;au`Ev_B~Rd#I+K(#51BUgjLCuwFYawZfsvx=-yN#H||sD|Q0$Uca%RJ)ys z|1$g{7>IFP^n{fxqLi3z*{-ua(4N_U>PWB z!{kUZed}_RHKZ#6+1yFD`k=XwQiWO`<}BcH1=9!kV2_;Nx2)DH0QlG#cDrI}c@3wj z9@PrTuGd|@r2a@J#W-fR1my0rtYh7I%xbCg&RSS@*qw#R8?5<)^;Mwfz9+O!$8JR0 z;AAi(`duI+W{bKFAYs2BIwUOZQFC|KDmAifXryW2cJAx0*n9ih8j;gSvxk|Lpw~uJ z8LX;&E^{Zy!&nhX_CC;h7AqcON?~LBXGv8(Y3;;4jOglRL>eRPc%N6QXHK69RkGN) zioFh%31fJBC{%66B*s_S7eTAS1{T^?JyV~={D!}TY1mqi#519oOVTVsb$T=%@@cz(C_%E@(S#NW+Pjcy&_`KspWDzH*j>F+*rqy=$ z+2)JY7}A5^j>Z>W1Vnl)W<)WXz~*|1wjS)rS%uoGT&>3SnV`sT1{-fp4oOBb*kf+4 z&T}Vc}(K?@-OtwPxJlo#D{*`jo8E^Qg zQBKt~ydsigigKd4k61&d*&2U*B*UI#p>6%PghFeL>Z;;zB`=aYGG|W_rWT962CUw+ z$fZ=C@JR;#i{xTzWY?6#s^^K!f;kh8H16xhmSQ`MZd$aY7*0v|$N0lq*odVNcNo-! zxeSL^N%^E|TCMf9*?>Nc&@iAWyBuZm`1nAGJz4!OS5&z%7o>v>byi%Hp?Uf4Dwo?N zXFhw!wOk;30+N$V^7TU0@GqGlFd~coWDfM1 zN3{!H&jejs1!qylqKnox&^(<~RtMD{?g`^y%8eHS0^W&8Lh0p27gZK!Fu?OhNKJ_2uwN&JgE_Z9a$M4K+*VDAbPJv1h)@@@lMw6k;)h zCV1&7@iM{aPJ*5vL`ke=8A4U`oGV43WasI$;P(k?w&||$GGoq>YM~2{tum7r+vhlk z9$ApYWLcFcQyNMs>pw~jSDbHO6$G{WW{ zJxeC%GtbX&_a&I7SUvJgp)rdKv>p%|kjzWjJviV!eYflAbMps2hVu1v_Yay{x;+_v zO-{}{&GdYQ6HrW9fRbBj^RBcFDr>$j|3P9STOw#Ls1#%G(Bm$1i{(c`0E819bgSu; zGwdRvr^#F=Js;j=FPqG|Omsj0+>JsUf}s5PX@b6(=)038rEh{tqR;&R&>V(?;ddM; zS?P30kHac3p)wkd?lBJ_G%xB6N^q#N2+JxkqOw<>2S+2d<*)5!naKW#S+j$PnakH3 zFuaTK>wD1Q{XFQ)G_pK>UH9H6Nzwgkkpcd7v5MLWmSGaRIxZrNi)rA}B=Uj#zR`4( z^=4yl-P>iQpLH+yaY{fYejas;qh~HuJMB!o-_ct;cQt0*4w~0i>MQio;-CO|cs2+4 zpnN%JHA^R;HjjAIou(-Ap-g<6YZ&L{$ty zY%FG=tXZh5}A{&?g$uQ7Adp}ha%2cq#4Y3g`fu47@*@J z2|gdruTgzVBYVD{&Y~=)MtqXcWgn*)nDZz3AK6gsm5tDEC@J*28bTELIim%OgH%Lp z;y72eyEC5UiKd-yC=3`@ow@NfYkEO}w!g>fe|_JCNWqfhk61Z0#yR z8d9O~ha)Iz0K%AuX!#801l{~z|K~jhR?h{R;^u)VlM-i?mHoDet?fOIa}#}jJOfDx zKvkIvzZ+X+%2*Bjm&Y4eWBYF7ZQ#He*bGp3#f&0|3Xa=$@~4EP7A<@T+b&>hrTAYT z@RIz4{ftZ`d;Wa@88TqIx+<41lQHZ>!nb9;U<#(==J-7!PsF#iks?34D3ac0ufFXQPZN)q&IKnL|pA|wwtiB zG~-*y(m z#YxlqzOqmSiWzHePGmx(UtH+xi$nAAIn4VdTF0)l2^>DMTB2{cu`F;1E%vKC#c^60 zA{)_f$A#jVbCF{^mn}b#19$D?sAAA{X5WyrL6$0)y>`wWNfwkX_GM5>xh+Pwv3DCo z;*l+67ldX)Bgbgd3+5`-D8M6(LH_n3du`1-qk4*3jBcxbZA^Ay$}UM!32nxYfwne( z*_U>6b;Nm!3x|fBsWw&2&_|Zlp70zMJ`~PLn{%%%DPRFjf@06EDDCbs?8y!b!gvC^QVvgG)|X9MjwE1_c(MyfU`f}W`BAP4MQi2SrIVb< z*_6V_Nk_9?*1}WqpJf{s28PF&`z#~(?9MAy)k<-878!lFE^M#`*boaKy?90NgshrE z;JCYwhcckPU6k`5`-}gSBPg>6JixW39@cMBBs>XFiYg1uK6f_MDLbUX{HGe<^Z$tA zB)Ka1(&`FGJ?sNaCSr>A{GpJ3Vb&*kMPnTosv3r0zdCe`LpN->)TTywF@t`%Jx8>S zP!+m7t(3de`*>3Da8;*ORgj{7u1xm95-a`AQ&gARi)h9$ikYq=hMYGWw16(`iesM+`vGs-GlJ6qj`!U+@l@m?mMNb<+38}t_(hiXqWPzEm0h= z3HscxFUwDZc@X@S<)Mj@=!n9}Qo9E_HtjTRw;mNd*DQ({drkg=?v1fe+v4H9SvP&V z!p+M4Un+*K?weGt@()|$QapG?A!tFJSu5yu|D83c{@I#@X8j9lR)JGY9adhm2Zauu ziT^^12G=nHLLdFsbIgEr#+=}YZ>yJX^HQ5so{W8Daybapc7}V$J-1%sPGec$apgOy zMT)DQN2RixkhU_06Pdy%Py#pT9)so~z(TZ94E!EC=K@cTwpfW8=ka{mD}ZYz7s5-7~?g?5d*V zvHh@X2^PL%pFDGqSt1F7nbK#8`zz0WyDHBUzf)q&@Z0E^^=A+zs22lSWs0ng zUb`f(aee&otIHe$POY$>TP^mvwuK&^E1#b(^9r@fWKF;NvgX9($8vMuqG@P$C`RCD z{iB|nlVrhbgOpcCEyIkC)`VX_>ubc>mU>?J5Hr7J^o0jO8s4vK1Fw)P=vMkniYEE- z@VA&~m7ZwDkKM?BJJBYWKK9PDyhF^^m8b1iX1Hj?E!)~-r!b7?6+?`BW6LfnrWYT+ z0Qfa31Ov17&c18h7txNi=UHMt`@Y^CiB=R`)ZmJ#)`kv|D@q zY9GjrBW~V_&mSg4OL#XSC!>3rp*j@glnQBR|t91 z<8XG#3rxhs0T)$qhvve^d!HKv13X@Vd!dJ~A%MGAP(5Wm1a=*`%iDBkjJ?aN6W@vM{7xiJ#mP- z=k#1r4RcQSfoP_9*q-+mIoFtVkCdm16r10pUU)yxdPY2uYLN^0zXR$%HxHBO!X+|G zRtZyL4lr~e6BL6-X#J-S#MH;qxO=(ZiHpb^b4x-VGgESCPDmB58nN7KcuvvK@WQuy z={A)Ccf4dd?{!I#3#*Tb9Cb}ST7kRqqI68w|5H>-^L+1ASbWjooCI!RpU&mWhchxS zpWxEG!QCr)$lsx(9V~=fw zvy=s+deq6RmWHFoWA3{(F-ow}&QviCfj4-bl36hFS;(u`Roer_AvuwyNcz!b1JXdbD)^tq!Ie=Q!2*` zI}G<-%X>=bsET(%?rF}R*0-9qWB3xhg~`<~zn;(5^1NH(_hpJ)qPXo)=!abj!K-@B zmbM5@C^EXmfGtlJX;&lAu)dzgtMem=d&*)n9(g^x%Lyu2J?;y;P@ah?H62Ml4<|k* zLDxaV(3_XwO(d6^s+`}bQ|OOaIX=9)Tjp3@?pea8ySpYm$Kkvgbr^f^mASW))n5B7 z=XOa{ySJEEHidnhV2<%qe(u??bGB@n2UT=<$5YJxhxI+TpBUyTo-t7fw10f{F0E5~ z;Yb@s^ER&g?h6U3!uLCv&_X;O)`TWoUS93f+Q(gAO}y^e+?Q@-b z(;cjKzEgBo6fe==f_If4qr5yVyh?h@%)!@tgu!(vs+L2|PTTC1FOlE>? zEcuvrPEzUzTOMB1;&km}8>;XUNflo5+8-E4QiU{;dbxMn^6hGZ1#J95xi1QILvCkE zUP*a#peE#n1 z7-`whe^ThV=4dDFyV@h`S5EK`dju^g;z!)VC>R=yO0w2xON6&syA686v#$0he3M4U z>AAfssl`2Oy0dbz+^>WC@M*r%U01YqsA7bL_lE?yo^Y~D)bbptSpx!RWDs8HLqL(X zS1pu14JkU|Jy>&P@oZq5gkTO*IdsUCux-6($?t9m(^xCPdo^D;dR5(c}WK{&0K!@GW2D+~`^zo(i?Gs%E zE$KI_?C*z6hetm&>Bh7co_y?eMO1DwaR%%vgV{&>}BG_pF zz-K6EUcY>?)~PG>c3Gn`#W@XXkWQl5zRHLBOy4}Mr0X-x%Ls_ab_4>g-TmNPFKcP9 ztQ>~Rx=*Z+7f#O=CF9126pz;mmbpLFe#)!S)`R=hEpq&uTbA1a-fZpu2j0?VrPgt_ z)ypBatRq|TNk9w2XBhCzqlCy;4^1STwI?s5_m+K-h(~{1{!sZ6dC6>~_xzVPmWA-0 z43rMHEDMg#oM{qTRM|ZisyY;}p30s$(vrNhw6w9{pr{olRIxrF0+p66f*IhDbpGc1 zYE0xVef}P&SDG40^&X%LLrq5P?*0u2J%yO4DJ9y_TGBDu1Qp9_~k1rx(4zmdH zm$$S7v)8}&W?^PU{HTmO`?IUY#wQE4!S9;dp7UBQQOF6W;H6>qW}3vCq#JhAi&C0& z!#*&8P!hIP_VRuLI^9lQXQ1v{=LFmYc1u>?3s8@1HO}uO%Nd^OCWdPY>F$Q*(2R9- zgH&dU4xi7~OgpcxWoVZl-SXrNRkytBJ8kZC;+;-)aX-i*xi&d0e*OoEnY}?ku}UGv4{cY~kag{?T#|77d1&%SHLliz?-~L{+Ed1-!M=>nnT5 zK0Oq92Zvjf4#oj@?8d?H)dfrW_}z(Jx%1g3eW$M$F^dRB`@f59*gte2*`mL~C6__k zqebiZd%tX{F}r(S*I$&%%ieL$e$6Y?`}F#A@2kqTqw_f_QZ{ANa>^|R7mAYJ9`2m` zd}RFVDc!q%4Gy_U0SJAHWm4vK3Z9%qK9IlCo1zBp-?9SOd$2mgs#B%*(t^ZW!TQ;Z z9`C*$y?~=E1c&#C$^+Hj9inp{#)Xml^v{YaRPW+eg}h&R7d2;3b+pihDrt|%nM(e4 zu2c2q!MumjU(cAeO+Gonafr^W7^Tr{SK}tnH91=lkDFUI-hHJlYA?5{`tEY;g=f#6 z0Z@iFY$c_e#)0KR$%|J?#Y-QbnK?)~_e!nIkEPwkwu(SaGf&8)>tiAt82g~uuUnc9 z&&sg;?JMQI5B95dZ4+7T-TAUiPEX7x%Q|n?kO0?7X^VqVS(v)eN_mCo9*I1W2bWzl z)f-mLlsQ=PrXN@wG5$3cVlR; z30EXpUwGhjz*5OuK-UI#r!_|?ghj@yP;E)Y_si;=-Sdw-tl&n*2X{5Zy&V%7B9b;Y zH5PDrQ~i}Hr9lZ~7H|8Yru;UU*tlaWrc^p##kx8f9Qn;l`n=>6gJWo#5?OlBSBHG* zJ;4cB%=~n(NAbyceb~@)8FqLsL<^dYiB&>USm& zU;Cnq5@~ntIZz-Z&6SJ!pkA)Oi_v=Jal5%BW)X66*_vU zvif64O7@k$sEAJ~vqKTN#@4E_bozb$$m+RM2d;-W+%>4tl<@qp7I)+wPUos+L4PLF zna>qX?JMOhI{HccY9s3wa0BiF4TOT0L-%OozNt5lUj|AY>lTi>$FpapW2V-$GE}E6 zuVtCGvrIQ)7Bee7HN}w@02zIqlb%tb!8V?zr4~V09iA?|0bwv6UX2iQ)AvPsw3D zBUt#F=usVU$7WS2f$O@v3014#zP=P}X2bbP^xkmQe9q=xT^s&2Nx2zeddb&J=9qx( z?UWY}BBnX-4E8ha_p;u-qvT26AvK{KWX7=E;-0Gafo-O)Z*mK?2daHU*roH!I`$i0 zFgbeH?aa$h0TcbFf)}Qc^F#5MXGRU+ma_$B?H1mbfn#lzs@?QrNf)`4Va6Y106_8m z3PH;;6g#<)le>GaNb>5Q)K-*E;c#8Gitq81K2$p?p=+o-pfSoE7o;jJ^ctSm?EiHZ%9m_cYRP=xIF%{Z1R4e_+6R z^L2qQv!DQT)%x&Q%vm+)T95VXeyj9X6+`2-Qt5N;+TGtX1eyXhKIr&D3%4Ut3Mk zaUEb$sr0Yang%V*d=8NdSdsYH7ZQVQ79gm;EMx>0$;C~~-E?zS_e7c{Uw-H(?~{Gy z!kgCYhvyUvBP{$DkNX}w&_e&=l}_{Z)rpo^r$Q14b5KvyRpgRTK7|T#}6oRr_Y@NX4MzZewZC?62CIsQdr&JeEmvb zd+4&Uko1lu;;Z&}r~a6(XsGke{MUEl%M*6xcd3Q6-#t6F)Q(-dz~L0strU%6>!=s~ zTGc9P?QPR%c!kcR(7D{O>*3eEt#clztB*FWJUfIGh~cQmb|$1fMcIYLV*3wq0Y?|cr{{7wS6-mb zM9S+NYD!p_<8r|OgC?ZTOwJ`cgFtLnyG2Z#HIAaE_})onnqepvn7uMVOG-1Mm(^k6 zJ@uiG-q7$xh+pNla1E>xMx5VqL9)|haR8*GJ4B*nheoCg`la=*`T;-FP#y7JJy{?X z*s!}lXh@F65uowIo5==gYFSA%Hspmkp2Yqqw9wYi$AwO%gIa3eOfM|o6mp&WJn`AI}c5rYwh}7^q!5&1r9K6Y1pQ-=OUa#VT2$=z-g~j8nbp#aPzXMS?4|)CsV-^SY zAodxy?;QQ$36Q+S&}VSR?z!V?6j3d_-eZ(%*7|S8&mKC|q}v&qJ)cr{WP>YD7ALxc zOR3~3LL~rBKk|xzyz|g;@^F?f%0S%cO9j26+YVcE)k?W5@aRkAu2M0-M=X>#hf)ky zrW(nexjkp)r;mnDC>QSJ{v{YP^L9N90C46itIigHOu1QYD)Ly-p2XEEV zZ0rKwyP0h4?Q8=nw82h3LW)uBN?9Y+h!~E=KlKd^AF8is+qPUz@>&k^pqs>b4YBa* z+b?fYS~&E0c|)aBw!&k-hPzg2L81xkFVB)@!L-NV*BVg(!*07J{~K|tQc@_@BEvyp zv-pb|-m398`Cq>c%P`<>g+Z>3E>~DNnMNEiT|YE3+8^BhE4maRQMgWg@^$ zxe$jQ-TU>jkfH+YK-lm5ZBxETs(O+5Bx^Gw1+jxvjQxBUH=jO{uH+{yD|_Y60qLLD z^y6vcclOFKAa>ma4|-bh@VBmD6;SNQAEmGbQmk%#P;Cmdr4@z-q1f*}@;0p?!%m9T z4T!v@`^*Uvjr_AyAq64PSS>*AD0hQiwM?aTp+_RmeEffS`^Lb!zirzzw$U`Ulg4Ol z+qTWdMq}Hy?Z#-V#*JBhiPW||wWntgR==T)m9{q$Ldm<%Ye)5d# zID@9;Jv}FadxC=kg6Ux>?c%#ICz9zYV+22YJCJ((US1Nx@sK|Bx$x)*!0$-JlBwwt1oHG^7=6qyRED7`G}bLyXEVJ4Y9t;HPN! zn5T;zAOMs&g`;Q~_>~kPa&kJi!ocVBXb%%g&ju8hx6UXfKokm( zU}pa}p7`3c$IYGzL*@MX#_X;&EZ)!7l1{BVpx5!mJjoxPgf;&sZ6q+s8^!@k-Ap}5 zy-0mX{dn&lv4_|YaySO z!u@OO!GHgs^u)jO%|CudAU8NSYpuzj zSB&0`@VT}x3K222+7eQH!6q?sn?;*#{OOSo#EVB~fJjCTSQ|hf z=YSP9dkb#yzwt!sren5m^n+$>Vbuu1hc;XD$n9nhff<6amv00fCu31`M%L`m_{85d zY2W*={a^xdgMUMC0@BxwK6xJcx1xvYLmDg8qC2_1H>l315gTMuD)(r!m20zr?M=lC zBOCwFU}A!7!v+h@fcl?-ti^}!$?+~MN)cauF58!*5{xCF+hr4t}6AR;Uk5fqUWgNlJAl?VL?jUYhU@bdA`;hV1il*ZJGMkxkgQUMML>3JMBZYAQn!NpUi!`4Z4F zYNZhHCn}xSwSSfk6z+ePjUknUzYEBT|K-iF>|ZEF);6dm~5e24dD4)eUV4Vy;8AK$=MCRa3q##3s zQJ~|Y?F+m^{n9_wk!tB5t67X}caUm6w@j6cXU%>dRRc0Pv@7&$)ox>7p8aT`KKS8))9&|ZBdw7>fAu5K{mG_jJe63AjQh{zI|RD)DdrC3M9@C4SNCOn zptNfP8)@Z^BuEu~l*0-++32zXC_DU@LLh%vk4N<)#G^NfZ)WDdc2G0=9YSno3JUZ# zbgv`&2}pY$@I0PQzS$uY$m&6z%+k!u*xTQMk~g?MRdfMMtcN$^mN~-p&zuZeA{Jn@ z{?EU#Mdke4JpP3I^vkyr?66i73mDgH|78Sezj`Zo{N$&KWfYx+M8LU4&R&cgRdkJ@RY|KzGQ*caZzrE8-)6{mk;@ zg+Znb?`e+M^0x?qqI_yXEOjA5&gfIg_`5d1h2ZflNQdMA< zI+r##@W$GDnVDJMTIdIWHHAN6v|mDl1D^beAo4}@QhE*c5`PUHRN}u>?Vs)lQExe+9LIT)^mH%QJ{&yw*Pb+eY_zAZp zu6S~e$U-X~4x)hXocf+`&uLCjGDU^#Cu?CWh0e!ATrGLV5kBihuX{#}iszW^!W3(GlLOZ-1S(j)BE z_^;jt7sh|G3sSMpk(E>;>8p$DEb)L7#RHHUg=P6NM9*lz77vXqk&f zHDeL+Iq6pyx~=%!c4XHdW_F5a(-E}Rik56hvC~t25i`onzB2eJCF`t{Dl-mH^ z<-fa+zY9nJ*8jw5WN%{;G^(eYDW3O!RA3} z$Ymn_s{l7P!E}oKR$uyRER~R^hW9%+r-}A>@twrn>wctHqRY{kOZwe~p++c)j_rCI z1&o9UKcwnrdh-!A%t{yv@$?^`^)jK8;-F>>Dy6LR4PSnDN0WqyCW*k6ee!a~RA4A7 zu}1P~CpU0Jt4+{Cv(A(xsaiXQ|D8B080C@`RQq4=zxyv|P!Duu`QPp0zgk3GF#nvK zbBd?uY7y%%b22%d7nPf zK@ zUvdfW#=3{zt=pYpxw5O%Cn>?VR=w7XUhL!?aqU{;qF-=U%JXSm{1)lVzEtsRqsuob z<_qmIs5@8>^zjjdq9u_y0&gC!`PLdtvpS6=?cwg*62M#fVaj{p;mb!YLoL^jZa9k1raus4_l)AHbq< zv2>a~U0#s@@(b=Rue*>+DC5)Yx6s|R1^^Hz$1RARHfDs+Ty;AUd-S}0Ec+F4c6IUb z!hb-oO&;56H~5>sy~xi`o-Gq<1rSbLybtvU&O0Sa^AOwxfdPBv;iaDO{Vvp0Bel#xM z6#1U43^uyo5lbjfq6=SLZ2FpS++HzN*`)B!1_vAOipLcaqyU)t+ytE1;U8I&PhG}? znhaa1v=)Y3?QUpMc&w=&ZdNh*r3zi?%0-6QM=jlnw5nm%S)Agli*#`k-&`+Bc1k8C z^PAPTK%Mt)eLU>X60KhX$Y@}8E0xhxh~7M>7nq?+d!SP>&uKw+h@~0XAR{VJW4lj7t5p4hS<@`;uK+4qz^Ur9x7)t~ahSbtEDDnAY z_XKzvpHsecI10~~JN&N!@g^fezj^6MTCN^i}XZY>0fsmrBA7d(yA4rMR`O{*B#bgJ!?6~%|_l}RXhlg+`K z%mBFsvPe2RaV&*OKxsH8cf90YK{<=@RD&~f@?U`iW?e%z>z0u|U#vb7P9stsu$J6N>y3y%l!TY$WN4+f=ND7+6>s`2H zr~4$e8Yr08^YK~?5Zk8Zu2AvNS5B5}z>ls^2bn#dPhmd6_0Z>x8vzWgPbO)PyUR{) z3%e6ZS=XuHe6BaIgHo0=-QPr_apPnOR%E62Wn5CkM1;dX$FWuE@<$xV5+NsZWHbsr zyaR*{<+1>R$u`$}LLKfpHBHw0<5}6Ems?aaM`NeOuLhA?gFRkv)H~S#*G@;9|Fo}3 z<)h(1biGs|pMN4sxmsQn)t+$6dNfbxWPMivgwGrzLiBM8cSGRtVH8Zuq(?w-`|~GU z9zSPm5k9X|(oR!OpXc88q-;;H9PB5Po+x)^2@4>b;qM#3q}82-_{dlgGt zsjGefKssr^HZtwcW3jNUq3GdkD}^n?mw9)QzZZ1&LWy&v!E9!qFh%nT`9kd8x{%Ha zQ{hEbhRPuyTz7Pc>pil!Bqrx*qJsThJHi|N;|*s+Ch(#mrycI2>mvq#-B*}HO>^)m z^krul=_f=dsw+ix6e~LukZ6|+Lsb2C-AioF@$|T z`-iSbztHf0DCeROR>^q%qEh)q(nM-mXt#RLRmcnzeFNca&Apq|`74>eAy%pNCaAk4 zn%%4B>}92oxTk2mm6L+whx6aU?z6*1C35)Bls#@tJgsh8%}shs$s41Xg)TkbFOnos zE+vgHee$E77Lg#GVh~aXc>{*L$D_+|C2w5~F#}w=YV6w^-!CCxS0J+-lp^_Dx6VSo zz$-T>V0>Y8Ep2IdCKYN9z^0O`v*sN=E^)u~H3?RKnLCgbagQ)7@gj3lEbg?h7#$O- z$B!NCT}VCz+BnsOJA+pBr_*K<%M*^t+S|&w>gpi} zY#LMezUAnI;;AH!uULh{+f1&r@O?-$K=skI-4sEqBR);X|D-*hC)F z>qO1!aTduo`a$rB&9>sk=MzM?TWUNO=?gRbY2IcQQ-JE}`ZFEn-En9DF-QWLOi(V4 zzldRov&9IBL>5z91~;{Qx3t4iR*OC7*@|bZBNmk$-Rc7&1%*tzIJgE}i^K{2txPQa zrBHH}Cc~6`mFALplxDjy%5^2dh#3_JAI4UoSoB zuQ6Ec&7mgxFFdq@L{E+fv5OV8mqB;deaf7>F(w9)z2(3A12`DP3H=W8xJ*u_Zyn~! zoX?pOd1WDW+U%bLdLAeUkA3g9F)+q8BVFwrI?bLpdwk}$Ii<`mTvsnj4&<^*uAgY+ z-y0!Htv4JKc_duFEc>EUfzBi)ks+ie5pCrB6?GS3~lekcB zYXy1*(^JIKV7@sJmr!c3C+Z{FJQi{_n1^idSh`_vb_(h*bs#r4Ht1ApTcEm&OAP+@ ze5cf0A-Z0y+@Qm!S#1)e|g}hqJXz%wO5UTO`7T(L9g}fu?6iKi9S9b1fM? zZvW7mOlE6sTv#^YWK{HfhZf3a%)CzF?SQ&VC6){+ zJ>ahutSp=;4Av(VX%*@-F6Xt@?Tme$%@>Ox+IhV(L`I6BQgOY z3|+>=dI8*^3W_Y>k?B=+JIYk;{I8(m#DQ7QyH zDiVN~+^TDPxSjA53S1cp?v=CVI$J2tloGv>mO)F+anuMYCk4ZDtV>Zwp!U1H zM-5n%X7{x7J!)ohFaE#Mi+{#}R!FPA*={svR8Bc9uoPq$W9v{*kR+!UqtJ#g(dAGu z{%p&RNU6zFriveJF3^`@-k)LFBTfe2vZC$({Oqa2?FPG0uKL5?jz4T$Ea&;|RM>n~ zk8YJ$X3jVt_*>Y2B6J9Atv8siB47Q=mnvjUXi_k2S*g`4bFH)e7U_&9(baZC;tFku`9KIKw#+yMa9LQiHURBOq0rhHH| zAL9w2Kv{v`e!yv3Ii#XoLW15529nH=t1!j;T?&P~GiQlL=GY{|r!_?QO6!@Ys=tN< z8NBGagd~L*?{EQb$KYb5V0Sp?g2im*bGbTg#k^2<&GsJV5qq_6esR;d{35zSnL*RG z^Zl-Zu&2bBa;dE?(#m36N`$s{%Ii0oMHNab)Z>k6qO5P;+AZ9%rV}U01)bR23N_I( zhm%!SiWe<}pNgY#L=Uly6Tf3a3E;9>b4dI;TdS8vbx~YKV(@IXD=a5-l8juW*E=jW z*5c6V@DkW&GujrnTq;brzDYIk9)w#$O8e+rPN=s#G^&71GXFTQV!}^3%UBwDj$N!1 zp5E#f74ufDHj0{ed}Q`~Pj0_I6Q8!CUaaVX`p^{eyQI-; zNR+mkD}w0qGFfxg6o`GNwBX%Gb6c!72x$IEyQMqcie||e)U$9lo z^1JXe@HXDS-}dd_p@=bFB;fOz{^dSbD!s6+-00==h1(It`{`MvSf|HbWif?bO;Tg% z$f?A*a}VMvDfsj}3d%fj{Ilg!KFLHniySg0HiUz-C^Br^#6zBS*)@}ARk@4t+98pV zMAs69OlD%pJ%Iq4s7#*EI+SD=oZ(v>K0sLbZU|*78}U4#d0gaFTyqRZ8$pk_7XH(ObLtJnVAubb0H(f0ZlS4vrJP zvrJ!WQiUgDFWUdqaNw()ientK}Zb!9c)#x z20$xIm(HB_?&&I&9CNmKh!mz#xLQu2bsm$=Aw!owUm%~i_JHT^O$~IZ$$a&}&o7}yb_y_)ec{+^X-D<|m?d~Ecmy12lV6@5W6|vSE$RuTh zT}<>o6Hsrc8KKC3FWSl?XZJx|k8wfWo)ZfSr!f~N7_3`J~kq1-JEd731H}sH~F4nr-IAoeXMIxVbqO1hZj3y^U9g zVq6(3z}|N8+=J?RnM6}D z__!9X7Km?#ht1JRN)77p zquvRLbcy?5Ku+VZFg)g~Cx}XC3U5a`jizjOK92`Zn-QjT@jqvi#>CZhAm8kAY0-9I z73)MHEb6#i4>`)}ul_&^;%GTr3nc8z6gnH)blC3``|}6$lFB@@D7HB1?WEeQ71zP< z7*=?v)teE=zReA`2I0ezdQsd>x3kYLu2Yq044ZO7xQ#e!UKE zFxE~iodM6o$VsM4mDmx()?CH( zgmxy}=Iir8ZvCPb>=`syB1b@KizbScpRrUky%YLC3KO*h>hu08V1uJp)c@k9@I>82 z#k*=wE2N0+LMTu9>Pu64d|&G275KYQ6}ztv)?o)*uaaNV zX#CqXA6g;wIaTwp@9PDh;3nj)xzI!{aMyHkm^n>qLm}3%8B7b;4dV zZvDNC|MB>4oQWI9YH6NvmOwW$sV%}ZVZ8Jc)9LbFp1PZUR~h);_UZ>FUd9;PF2)Jf zNOyI>s~4d&-FTqx4Wi89pExjOiAp-)@SPjYvx*|#-+(2n)L~n7_}>4rUBrQ4gQaIV zk75PBuagh{gNz6o?|P>PaeZ~#pboQN(s{9Gj%?pONOA7pSU@?f76-OAg0o=d@D1if zo=e6O>UBV(ZvEJ!Jwkxn{Nq+4w+5_aJO}otiXs}U88Jq&3ie9klX3EHYawjbFRJ8L znc3W)w5xF{hlP@HnyjH?VGP2})f*yUo;XWZP61flJrHT0FXwb+Ff@vPAva_YXyqDh zT}|TX&&YH1cPnM-F^(r%&`ZvvF zyDB>!zGz7P9{r!@RVQj&ms|ZP#}Fu|SxgSLmapF(oEvhvW1cP2U{Z5w$2n>qg@T|3 zsN%`;f&hw$1VYVjLs8&;U$}aVS4$E_>G1H#bk^*=I~TZ^HTYe(ddtUhf*JTpXT8dp zCPf<@pYL;KEdq}5;a~&S=$~WBC3?LChXB2#XZD))_^r*JkLiFkIP>FF3nt#oR6fE{ z;AlhE&)fEB@+KmQnJm-lxE1dFb__b(pf!HJowb$sokEhT<%GRzBhpU}TL3Ye*EL-f z0avDhe3Ja}24hmH7n0+AxjDr&I!A(l&#SP3VoLEQo$hchR1Sap``fh(SM3Q1!U=O^ zHn*1`3W-Rf4h$YSOK=Rb>@yuhDcTn70!?omDBMcYH`VEQnFt%rNGXx5aRP^&i;Z>~ zr9LtH4VyrdmAwBgL;h3~P4ONi}w5fbU*zzSbS;d9%>hL_>b9&V&wE4kCzPrC@g>E8}{Vr_o~Nl{LtzpieAJ{BY^v zVskCgZGRU9Ta;^LQX(G-qhg_mFCbC-`2s$T&FqLgz3F?wBLzkr|89O{D>>-p@t~99 z@U!FjJR_HbaZe~Hjns)3Mf7JnF|IpYX-~P0{6##^43*9RrT0EE^Ovo07Pn>0w2SHP ztXm*sEkD?Ak;o6xa+>c3aqcpCPfOqaveawd8-n1VzJwo9)~CYQ}y+;XvjJo5V&cKvkf){aHc_s93!BeW{r z9>ww~+*`hM4iDI^5kF6)^O+~9p(YB4KGJLTPq7h-Guw_@$Ue2|cjCE5aUvdiT|N>^ zb~*(CPq1@kZwKaEzlxZ2b-4RsTJctVA`xxBQ8%2fbm+7L&1f=xG>7?mBVH69W1J#P zfK$$+BD}F%!t;bhX4uJ0UCaDx^LT6Ys9!5L`3{rtRe)*#`>(yI(JnX}etN%RwT z9vvfB_P#RBe5rWP)<>C=A4gwB<5+pLh}ER}RHI4}ly(Gz5mZ<3&+{IRB7^N~>+IcF zk$4$#$oi47Au^OGpq?Qv2|;$ zy}58toUuFpf$zmfx03_}4i<7-ZP-L2A+rGv?>sE}DAF)3A%PyRuLKP0Ap0=Y)QZVT zVbV!B1WnP){e+gs)vwkBs$_^CmhR*hXG|{2u5vmtD_HFQ4wZU6hPW@;m_x8X5&sT< z*XbTM5jU|I=1PeacS`~6a~9#nVqvjBQ4|gp?S})nMA+GH5vO88YZ0-`Y~`9&;a8Ff zajEw#@~MbSHlHA6&->jxXM-yZLMh_($DW@|ucxCVM;@*}m*dN2>>VIYUW0xdot70S zcS+-ytMK-a>fVh3*Cf!ANo0rXSB2B4i?MY#xp> zCyv{z$zpLmAAcEyVKIdmRp!z^mfzDsrI=VcYlvy`V7?jV_m=1V4LLEW@K#;tc1)~I zemtoqw$U6KjhxnG%H$R5MH`t{Jg~I&x5EyRI6%41ZJQgO?aj6+qSa%GFdU4gncL`g zr_)JPX*NU%xML!=PyD1^BYyk2j?$qXJ?QK%P)w$n4vJphIE(Qtsa}292f4Lqu|ynb zACi(6Y8GNq4>LopYdJR2k^>3Z6?E%hA*k$r;DXm9Zn<$SPK33fXPUJMcoF3l8e|QD zgI+eNGibV6OOnxGJakv*ba9_p zVXn%hugRVr&QK%oYmA2IcCjfi9rf$kjcY43m^*$M&a{0ruvg;B1ZwTo9abLT1XKBD z2S7_+A(wn?NM<#b&BDf$kc&iqmI&!do20MQXhQU7(P=%^D_<}CB9qN1P|W9kuij2b zIWt_R^iI*g2<}*!I>aEY+v83)=ipCfdxR<~2!KE}Vb}Gfz=7;~kDh;>>$Z@t!d3B} zma{7}1q1t24?l=%=f6ATNvSY6o)t=zpbA==Q>^%p%3e)j9QOLSLz)AU3Rv_eb1fsQ`~N z)IQnAMIwnli5nIoxiHN0=@r8rV%YR7ebnZ1#7L?y2!UiHFG@9wP$Dmd*aenar>>E6 zV`0J1oIzsS_y?7n|Jr<>nWtux6^6-Z+MrDLi`&JxJ?RxLH84C}H*=kdvkIMvCvt)e z$NpA4-^&O#OG4jvy;q337Y;{&5^WZX&!B7F#NcSWfSm$_o=wwCjP z!z5m9)EZ(sKtjMOk7sV;o&lLC|sL`}qhgrpJQR zdPQvh@;I|CXd;Yn{E`5=S=6+%?|on~xZl682fsiw&P1;pAiU3(rxOS91YxDB9=|>8 z8Lj6|J}NiPJr-lLSU@pZK;@(+5uq~h!Q6kd~q-r);cQ=|xVe%A9%ccB$Nyb??VadbDK%6-x z%P94kb4BM9FpNlF#!*HIvnB?m8)ck&Mx7C!4u-{$ET`_fSvctd*MdGkj&BSE6|e8g zK$;oCST$dtl|g3?S+x7B1O0tx#wCXZ1596IYNUp7heZVmNbe5o8^!V-|{fU-$i-1iZU4>o^L)T6M z^6JB?N*yQpqeD~k##B0IRO?A&*$1VSH&pxFhHm=tq+;iORooW)A09%tFrgwkiNx*x z9S%bZI-?DsW`>fYj1>7u^&vCVvctd# zz4?!WmK??gD~^yYrorMy(zVMXinr!xC*7FYY>ez_k!|yT6>JNNW-M_V7c1%OIg^bu^Jy^_A`3H|Z%{ z&o5JUWmvkMx8*`RCP51(8<7q{1gah{&txi93-ZOHsH9Mp$SA+NEm#5YI9wFzNWz60 z6MYk0H+Matvmc2`28R!qS~nSR50O~^%xE-zBT~nMi}E;(q&8Y+1ZAQF^$lo=`fpx@v?_kMP*iUZ1YCGn+aP?`y4i`fJ*R1M_T z#i#Hjrfi(R2I-PWJnAV46)p{@{RCZ}OyhtkxXJ5E6<)rH9G1q(OKq4uUL8^bk4BaoNoqtXm+~hu zdO(9dQSBu-qYkAO>auSnwmP<_Ix+<$4mc_q3+ftx{av}{^hdZO&`d_C(NE{1yp_4` zSlW(J>b3^K*J!syBI5DzC!Hh~Yyd*bj21g+KG+}vl6Y9%&fa6t@;bvI;@8xf6`3Id z-M%%U&iIFqE{a!<`L*B%O79i;zYMok`PFK=GR-&1ASa2qk7tCP z6a@zvWAg39fggBc*;46TCSNk>ofEqq8;pOEOBmvkeD!#Rwp=JHlZ>yX%e-VF!?4yi zoyZYr%23Ao-d%j0WGPuC9!IPYUCCMGJ02}_9p;B(6oUHv{cZ>YyN2GNKd9&vPt-Tq zOC#m8!4$_VkERr}T53GN@qzI2X6}ttPY*9}=^Xs#d9n-2b&1|qep&!TjA5n+{1Om)=aMbjXQqPu)m(uIqviNZ??{D`>W`^69;@Cv=!BXg`YWXhXi?cIe!=#s{f!25V4y2$x}!_1m#m%uGX;;#H?jfNT74R$@+dFo%&mR z;8~$gI}(iO&o00BR`u^spmD@vMC$MX(ywx_N4H+of}Bp*@?Y6kA+MmdQ6EZduQ;jvqv@C6x>#T`1-J z{6MC~ctOCHqBL6$NMkY7XAdaa`yl_IL26MQFprDhGvV}c`9$7?)0}7)5b#;XsNQgv zWkM*EW#ac=@!@e!1)x*qHqo@4sfOBGn`4VAA7SPhS1Xgj2%0Zlid=ES;disup}-}% zeB9#s!efCy*v!?VFc-OJnLg7*%a>2sQu5Xzy5W|2@`IqZ?+UyZw$(hHF{Zf9MFo4h zE|GB#Q;}03{ZDrw@*om>)6rT=VsL#?b++Ez+G0UTzk>jl{~G7@$k0hv6cw5?eYzJ{nPk8Z) zfd-`V-waw^LFp}a(&cAkD+)%tjl^eGIdK@}i#&;V8_^VhF@m znt@(vddnz8euwRj%H&!Z7-jlOtjIoZvV>!hB){fW_75YXR*Fes(nTGwiO+@5iDwDG z61(gyx+s^cW0z|cbmQ=7eUd?AS0XLL(=`g`OR zU?+)3AjjOF@<{`_>bQ4-{^8%#i!rh}1!2EJ9chb<9^RRR3OUM*xwK;k!qz5PsrCQ!K#buxKB9V1ZcN>+tbnczNCl5{#e zk#$>(J(-lx--yy*lvQsw>NF$WsgE}tFqjy`5{RCGIhQM!-orQHA5O8FU+m%@x!gj(xpN&-tM+{X1j}!Eh>{+up11p zz*30M2Q1`c)I{dI^gaq+hICvW4G3x$PcXVH@5+@jY;B;wBc5`}zZ1@eN%T`W&X2Iv zt`cep_iTBWU0m|cA_*d!IgI(WdcE9d?aREVJ%)uu;t(_5K#P@`JA7(Wua?x%xL}z> z($gSSDz;8D9}5`01`J=F{+@hgfmw2sFIJi9rz&_YJ(h{Et^|rH%aKqgZ3og`Facr1 zH=)dihAWMy!}kJb`zfM^OkSTaqw}>ULdU;53T`icHzqV!uToE$RL$guMBbd{TV%H& zJwPV+X*ikORdUE86+?f2B}bJO*s^^=^w&ht90Ap*fyk0V&@%jWt>-b2PA_Whi7ef1#LVF8!TowpoH|)mpoi-ZXXbSqy zn%%wxyZ=tb2Jf7~v<4H?5`(KpwJBHb!av#yy5Y%Nl8uF*2FnZoIwshZ6nnpMsHZ210(j(C?P+-&cFMmckG?K>SK_xE$& zGj7Kueb$JzRz7nn**VVOyNu?92hc*6*(!+SMl>@9ib$|Xl*SgVO@sPPZwQU5TcHqm zvhknal^pEHl}qV&Ivxo82_piDklCcw+sN?vC~xZR9fUnwv zzw_C=%}SF2pMYDqI#jIQaeu&(oSF9cWZS*f+B%vBMjrNTkfVzn-o0AJE^lGQA0 zoe%!9yJB{a`}w7b|FVfjC|zc4Fur&F0O%IlOC%pUMEvK~C59hO_w(;Jjb?KK)QL5< zv<;HMmf)3sFAE7R=B24UhaJ+5tsn`}Hto}RSA5`4{@6ZimW}E-$j$O(uYWL(Yv>kb zFk+fXWY?WArszqKk)b<%Zjy^9boqI>TJn6!rQyP;yHrAnxwLh3CZ3ctIs4G207$P$ zbmzIL)P1MjbTo@OBT_9zE{89nK9k4cxH80UtLz=2uIPkMhY* z*%b1mQ=I2~QtRJe!D(i^25Dv|d7z-VjKmQ0>x;r??sZ;Dm5aE**LeEKjGkFct*-*L z0BLiPu>{&&V%moykp8+h^_38JQnTgAsOO8V*llOV)qFn~GL%6V+?`Bt*VR#czefLt zo)38x+9RAjW-AkVjLU1O*-W<@Nn@bU@pcFnmBX`%K3QvNh-@HDWHOx)r?AB<2~0tp zZZ=vqKbfIZq!1i{zWz00fEF^7=2>EKFpAgs^2>HCmMoCg?rG5kolcZRVtN$1BEom6 z$8Ok1!zfMiyoWmphPRUguCLap2GRKs@#SyH1A?pq>ZS_$)USDDkiF>MQW=a@14r{& z38+vic+DjjJ+(2D8+<~n$7(4v9G-KS(KK$Av3t3?Wwi80rqOXvB@BxnAMyZ{=RW0{ z`I~;FEr$(nMVyYSId)Gt3R5Lf|F!XKLv=`{Ms4CVQMEbTv!92^^qgE2Bx~Vh9oTST zM0@O}=Hi7G#axyUhf+p$)luKhO}$R9Sva9%C@?lOC^K%SU1^nXMq2DDrsUk0Z?3xE z`lX88w*b*hIfZd^!D+s|+?xX!4BN)WtWSD)x_7paz^ZG|@uG05)v49+u3GpM0 z3GsYg7U)X~e1wMLirbb#qwUB%BGrS8K&+Dj?}cl7N0YNn0HP8KjMj_L;VZMiW<04B zm2c}PQ;ged?>C28baZxILB5|5Z-Ee1x}IHg^ouUs;9Sk;#NV3q)2=eyu`h=Jdnb8x z9!=*A&c0v-u^*_U$y!6XLCpLV(l}CME9edzD4FcAv(pXSj|Rlno=bCsxH8m>VvTrv zewC?Qo~Zt}3k{V)(RL7GNPh5(EN?E8Unpu8L!AtbhQkXg5Z6e;nk3n0h*6OSYdIiH zF=tLqz)!#U>w<+SRTLxaVpWPo=j5NTM-5;9w!8&6)@A%Z#+(5OMp?c2vL&{v@OC&W zP7#E`X<=Ha!nkaFYCyi&)9wnR6|c#pQYLEgr0iWrQ_5oAh?x|*YzE26db?4KvOvcS zy~#QiNfrGZMPz>4+)!GP03L|~2rF`;y<9e%kA>syfQ5u6@ucQZ)EDUn@(&>#waF9- zs2JRm5ErWvN0b=x#FAssBee=mk+Ed5f&rtG=q?ubXYJA%=oDVtWB2RZwo^5+1lE(F zvIdjyfU-rAU@;Z&4vhEF?)G|TkQ&S&(AA>3}%hz8DZD$ga-=z0|S2yDQAWKEJGyBMXf~PfI4A$RyjIt%U>D!ByZ4`H_-KA#opy(A<;rqVf9wTiZ%;D*utvc@&1)swo>( z4wAFY9UbjO|6E97WGejWexU+Yccz%s6m9y_m?Z%$&)>%(z-C&Ye;&>w`I=@R8P&`mb}mZH`*PpMhqPZ)wPKwN*8il0|K0YWKE zh`{IbOsc{!Nc8&NgXH}14pzQeNay|d_=7r`M&d6!4gW1fk|*XGu$J`a2k-X$AlMtv z0tl~~asXvL;mKNOwosC2wa)R39Pmd+J`<0l{Q-ZX0}QhCxB%D5w4TIk^eRG_ z+cYPvxT+iJ2GdD?h6}y?^$fLSDtaU!-br-`>wUHZjiwXJ<`h<(yO}llh)^ z15Y!_Qva&rzqZYMT9;8LTSR8qFzA2xri4Fa`)%aS>4-&`DP(SLjQg3_JTMEMKUxdJMj&eopY;HsVA$_WsB632PNf8*E%==_O%9Jo-^$x zH>Fmg$+L!+K&juBvR`HB9)!h;^}u1fh!x%ks=4tI#@Lq_q4-Y~byqu2nej3^SB$2sk$X6nt(QhMdJJk*%G)edei8})jG{{gh(L%;Nec+$EcS6&u|+d>bo;!8(XczRe;>@I7F`+NxK`)c^DBW&$>T^|z8q|q+Z~i24zK(Rr@U83frPH|C9g_u|_=-x&D;6#Q zKHg5rl(7$T`jwJtQ9Op#_~rTwQc(n)_}uS0Lq$M;gj`sI|C(#Qgjn?}^-~W9oY@FB z;9rweO`N(j^nw*-jmvH7->p3>czr;Bo+VXmI(@w7@i z6-OtH5_T9!P=k5Fkb<{r{+C!)>#unR9Cq~I%=_Q^RcH@r4)F*BgV3t&sFS5co_42; z?k8zE^{?-Czd?>?t!kwky8nprAwxwdwaR7lgp%w5pMu4AQCPFV8PUbsXbm=oS(<>` zi`JJGT+-v|J?*Q`yWbQbv)scpkFA)#lLkUYsq&2Yg=&3efkt9!k<2vm5S_$>r9mC7 zIh+zuKXUI)CCp{9mQZ79HO8DSQ%(&8-4(lVO3F>C7#jvWI*h7z=LcVpSMHEc#jDm4 z!l5v_?0bQ-{Xiq*mzVsEN5d+DIL<6Hg;+&&h;D&*6krtptwOgkldQ?=HvNJ3iVHvmP>Im z-aG2Ar-82ed87Y0(Cah&jUa2s>jjDO9E1yAJ{4ICA1aj$}-f)E1~asY6(UNZ$4gn@6Eq#Zo5qyt#3* zEDt2%3N0ZXN-x{}tGiPl4rPF69K8&+;5XRaaS`mxqb*^3znF_K>5yS3o) zMzartbVjqhP6iIC`2w%zSpeKI8@!mU^%o=fcbmpX7B5IO@j&1I4XnZJZam{rG|K z!hq;)Op!WkR9V+jeKClz90_lsQn`3ZQuy^YOJVDHscCk_dGL6J(gC{rqVZubKeU8Y z*g25fuKd>AqTw=vS3H`<1@r4R5US#SBw|zH0l3S8`|CX^lMc2xIyQ2@;@SN$^h+aw z@((p*G!8DV0Tw>A0E;UixBWmGz<(_#XGs6Z!MVtwPu91?2!-JT$qbvH`^A(Qyw|B3 zT2_3MvG{p+G~M`O|EFTLA`|3Y5TQi7+x|-7@79Ur`XTrb)_C8CJVu4%12(H{rJZJr z!mV=1TeK1c1tWBK)Xx@nGpu^_v|-7Qf2+U$DLOCv>A7$Vzt$naj4l*!1Q|JLVHvqF3h3!c zu<`@Ts{{rndBSNbhPww2ZK#F0eLZdX&#j|(O_*lOK{{@4$8uqJnYglP37 z=gFM4Qoj(G%N%R2$F&Naboyht{4O{A5@gAt)yCJTy)oR)>b`G#4*EH$zMh9#3S+Pw z+ekE8%LY<;KQ94=lL}sqby-?dju-7{IA7HI3}6cphd&7uP)<;PhTZ51A{pQ#H24vM z=#GK7^=5xT$`pU4=Ib^Tvi~cp;BTv8jL>Thu_5{l{!11IzeX<_uy*H?FQA)LnDoMn zT?<|)MG?d-qw^_1_33_0koa<8^*ca5Zfqmc#j$B? zW#-kbrG@jEhRZyoCJ~xOR)u`uppzpD$ciJ9va`xJq9LSx(M~TgIlkDP@?tj+IoV+r zS55pvtM*!s_6t0)PZG4=9bob9;D&!v?;zzt*aDQ9oOF9N&QP|={O{ODhIk&?8d0%3 z`}eW_R+sbtr;J=>)Yu%=gseJdL)1n3a7FjoCKojR&651jlP-&RvVc(!qk_PeTAJfX zZP)+4Ofa7|$!hlfLY28d@&0@8#>^jd7&pc)24rFd$vMco*hW^%!!9RmjRFAd4uNk zKe35Zcgq`&q{)kzwVDT^P+-z~Y5(*rGy&rm%Jv(R9j5!F7rKVdUqMJ^a*4TLJhR}p zV9|s1PQ2ehngVBem*SOh^j&RjsA+1shLB;k!b96Y4<2}$Xuz*8omJie)?NhmR`F%0 zM=+nY=<3jx&0ENF`dYCt28IN~@Db$~H@A&}NOmrgRTOyZ2#7#WRk+D|)U+5ogL+^M zFF*`(6ZL(zm<_}HEk@XbusziP4SoSRZAVJJ;)&XhTEb~{7* z1?Fb-|2>0n5;&tHD2KrI_7m&{vh58H#0cu`J_>K^^#UJ23fuDYyIfmP8%b#013G0j z?eDK{fM5MLlC0;7@?K=Z7?+ul(-Gy5`UJEf^yFjDcTf8^29PmN$ub_XR0+e3^fEc1 zK?HBTfdfHEa_abOHW7T_9TxOj6$i<;&BT<~jGz*C=#S=1i_q%W@_s=rQU8Y8O!oRA z8!pbu3O&#IiI!^+1;D}ZriL_q*5hfdM^$`SGd?oZWzAcb+bMeznmFNhKO-z&ag3)E z{hZcDC<2NDonE&v*e>fm2hpz(ZF~m-QMLe#voIB{N-nFErranl`m(LOmDc`j0OSA( zp%SPIoQiJZ7<>Q$lh{&8x95pTwQNqze4P32Jy>6W@09CyTgTHVSA@TSLPfAo;tB;r)ZG;H0ZR#Oeah)!UHF90F7lUj0a4C=UIdytkeBL9Dz^kwG#_0|FDt7B*H^7`y&Z3-#D1!T>B4Tc6 zvO2y>Vgg&oRhT~lssQp*(UqbKKw_d^nioz;Q8~yj0}xY+^;JQvJmr9{GXNOM z|7M0|@?Ez$w;g~CDeO(t1JEi}nV(kxv1zA%6Ze6Q`Um!j9K2lOp4F=k7%muA1m5(_^@B}*>s+f1)6NrdfQ z-#yR0xYP8u|QeGQs##j@+xw*d#VE20x-3xvdDJW_G4WJ1qd)f%^a0 zRy^;f0T7sYjRPR;JpG-z2p~GeSRN7oKXpo0tvv6b-CF?Vvs_9iOcyH)U^N+pZIF`* z7L3ute#2UQ;+2?5P5$3Qs;9B%v4_)>G3W_A9-kHw39F!g=_=G?9-+?1=tf3;8AvmE z>g8J}N+yF}ZM_NTDX8i8Oj^tUdCO$y0~VmBL2Hb&cM*hM&q>J&%8>ty;AjMD8D$w!D1 zu^!S640tSyvl|rJF4!+gOKnQrS{x>d5+uc`UdHf$D2Hd=ueR9$HrYdK;BPMN=Z6R4 zj(3188$gvrigz=NbkqMYrz$S{E5^UEfPavIo@l4=5YqoBb-??A zkr{8SI345v$Ds4(f?V|YAGhpZtdMSW5BS-Ddo2q7pGW`R6o3IX&Itk?iGW@B-;AMu z=nvpO095b~L<#u%w>koM-{*Q{XZv5T4u%>29~?t>uuPHrYok%x!LF{=Nt?yyK>&SM zIPKp4d`-|pxI6P7(jUOE9HilB>E!Nk?P$Dk>aq@2ytFf{c^98+ucd17y=Lnu*HLTP zN;NE-{txr31wh3q$7#hG#aYEUhL(G?i!hXoq!VMV6F+{J7b>k0z;gborQLgrdgcWB zfMq_6)nv2oZ$l4@m7AWZX7s)~+rCOCQLT!wK3l1UC{eAh49wqqw0xtjspGDbqSLIi z%(4Ex-Ho^cMK}6H$dhDL=kXaI)HENq;BhOp3B`+);LxT$I&BJ@U<&GhA2(+FGe=H< zITDkkrTgasI?p=qbw9!060e568$PV(_9JboXW+s=m^e=@NjQKBsjSMv$%$0aV)gl` zq6hx++|FUQHnu;u|7n9n@@>LxqU}sxX-E2bSV(fn0g$?n?3@*m&(D=)AxXutuG77I zE`IKV3GLO^TW)&@WbXGx-5s$;Pj9qRYFJ#IzmO?*>Q^!D{1Agw(%zCKj14KaixVsD z7*g~M5^K5u#rspr`VJOH3hV#c(>c+!c_P1^}`F_w-Q zr@f2=wju91lX@qP$-?T>JR|3RKz{1=GRKoTd`Hb+LRj)h$J z&l|1%*BeD$$P5Y&iVjK)$_y%?A=?sG;$fPx)YI`C?CZ1B1<1TTK#Z{a(Yr?Jkbxb| zZs95(0g&zfrSF zva4UrWPLlU^33)Ryz$ZczXp$qQ}f#k)C;tWw-@LalgC)-mT7+&d_>%#QL{Gf&-7Nr z1WpAw@R(|I?LQI7K92_eoKQ$ixXaV|d8n8!%SoNU0<$%gB!sH_FO`oMpz{4^z{h#u zbj6~@g$enZ$EC*Q#+CA$T!Cqau_~#*c*%wsr_xO31pJVI>*A9!%kRO$Feo6fl!%Op8D3XWw7sie;=70c%z%&6GzCBV4q10@L#I~{RQ&dw zD&*Ht1`^4*H^^8N9$ZnpVjO}smHX?JTXPEh-IDM0C94^UIzGQ;)#?vaq*BA9jwvVF zlKARXK~68ehrXHp?ac1~Y~L;P3lx2-ZIW%WZHjHGZL@Gy?b>onFx3qSe;EN9NyB{X zCs{%pAG%P`MrYW&`2I~9LArviU!=z4w_}uwVnz|2U_~`}F=)mU#uvU{x}S74b=?C$ zcDA{8HFiC|(`k5;+^gsA3AU6Ni8OHwz@&I>Ko=P*J~<=AIypv(WS^_C|P>)x>ZKe$U4F>cPf>5Ph z`(9PxYl*A|1z_VeC}d<5n&E!pv;n5+WCmVJAN$1Xl1_EXcUnzk?{QCgW$W)P;)E(E(H;ixEYK ziAsiWFs@ex)&QULfI+rXpYDRPU>#U4614NT;c2$%wi&jWIyT&(p@~$5{P{O&iGL&s z_{t#7r2pXRfM$``n9Smtk1OjWAoPpk+QcjM!wTD zu_5xLKwiXs8{oir6G4J!1A7$4QV(~xD>+js8nQW4IDaC*P^D#74d`bPL{u93Y!lh} z<}l~3)_u4a9vZkEa4Q?~{KMaIJ#iy(Gt7fnGH9qj@`7aN8Ge*0VPTq-$qN3_s_=$} zyz*Bc;hhb)%QyM7Ae@N=JML>~YT(}7KIaRNDKl^76lQ^)Cp4giwDBN-S)@FumQR*a z%9zSjY7ocZ5C(h>9Xx^>XIkIuIryG1uC3yRRDRJB@B=03#If1ERf2B-eu^rD+6oN9 zF5nHi8@nHo2e%pa%PAj&k7=e+aa4c4A&9h%?xh@ggEaWjW(hcd)eYL3oC@3z+br83 zwrg%AUTbnd2S6f@we40drl_gY`oV);Y`$=T)q_Gko0g_ebnx=HwA&w=58)@T!{+8l z9#4ylXTn8_1%AQ{vUi{w;lY|_*4m2ldp@9f!fONLS@*r(Cp}F)cQ|eIT53Wt8yUsx z_kcO9&-D_=B{8vJRbE89+|}E7Dw9WJhdD({3}9$Eg}(adp{l?(tV_zo$%i-kqo42u z`me|YHmz+JxQ7`p9};SXz*7+(n$e@Y>HP)H0U}^$I`d5uC?1F@+?WgtbK*r!dFPB$*^&m~!_LcjwFUKjA1bzOLX zMyaxKM@(S#zapvq5DV~z>o<-70B=BSVxBifu{tY{FxKWzDEqJmK-oY7nlX$C=q;v} zkPGhB^tds%pXyqC8JpC}@x&rXgr^~^xPa{FCy`bZ)YiZKbFZ7U<1SkUg{gCU-bfH} zjROA;{?E*UHUX}6I@M`Rnpu7a0Lp;wxcp_#?}%XOm+J4s#gv_NN3z@i!yY8 z5$0(nf_GGR{!=Q2Fvn{w&d3+#{ttjV{t80Wm>Aq{^IsTt8c4nk@;?AQ4z)Ooym7PB zic*rteJafRt*?Zzs8+@8a|OX)dWZQI*qR(`StV&@#k zi;wTsugtiW3R5Vl@C!}ADDr5U=d;5`5NSLS$-~JAmo*->NxtV>trbvogtIwGM%C#I^dQnxwwVrux{5zb^=>CUm z4yC8I4m3UnL=s1!v*lrm%`)E8vm;V^UH=tKi*5)D!c!?Z{T!IUJvmOS1rq)_y!vuXJ4C7e zHx9y=ma;bRoBDZ4$vl6`zd{%p&*zk8`dNc87A$!O;pqvdqz!nCX5fH|OIrVDuOGm& z;|1NQrFgKeYt4+E$JXB;!-V#i2MY`EU_|1&3X$L+OGvrsC$I?H0F~#=5&yqlH z-_ZjkS-8XU2#8s7^Kzut{aR`fkQiunG=x>wY+fh@`=3#{PhvbPA;p3b>JGkt?qU4p&pjT?vB2FF%eCg?U<_M~QvsN)SU~7( zRRWsbMOJ`s$fXMT)_&oGZOnnhI_ueEy)k_l;A9=*Lwq=y;?&+5x~kbOugYuTAAn(j zbO>qnvB)+IU=X{05GBzF0B&BP;wtXvu{vPD40#>wnQ&uD0RLERi--ZlOQ=b`;}*bf z1iK!idYo)Dwap-C`nx4q2<{dWV$cQ5!tK6lts#3J;{APa4nFPMtSkn@*TWWsL%{X( z()M}>xZ2oFYnAd=Kln-b%#M0HngQGx}fw)tKmmW`TrxnLXN5D7p z$C&(`$(%Yi_*2gd2?lEexp?nqJ%W5>5P`laxS8jBH+s2=klCU0C(P>a>HM5H4b|ek z8ulCD?UrRvm-cVkf+yc9>S<8I@ z@{WcPu2^h)T-;|5u>yep!&hF~OVA%)roOKlX=Ah2HFpM|G+e5w1HFu z;CUlUSDMEYYf6`*xA*tO0WzKg`cC4j{&VSum&ZFnV;`&i&BkEQCXjZ@3xHeJoPP&8 zVwBAXsLilqO6myUHX#JVdv{tY-`{j#_U{z3N8E|hJ)tTPVqHDNuJEvY&qjmLNL8Ni z+t?m)RUb?K8Qb05TzmyhR<3%*UlBmnL>NzT;O4kb6Gb&JjfgdLH#vv=JEfo{L&k^sDu|IjIfU0G1SW zR9OJL3^5JUvdh*#)~gkhZ1!&~pte~k485yIxY_l}8@H-JBK|xl9WvWa;eIjSxSw|8 zajDjVhfJDKSviH6i@5=TAeIP_&wq%fk-vs*-5q+V8*b)TOGqs*2~IDEX_hqxb=L1N z9hDkWG=shrKi$q?DZ)>Ux!D)F6F_z7UU!wmHgjUtvAE!a1FIj@0*kGz7C#&Kp4#QPHyD!McR&KB{t=JTK(n(&}QH z#BRlWL$cC>hOvbTOPXX}w>Bw!IDD)>xNdL&nB7vw=gSBX=2E>Gm{p~Sgqsiw!I;Lr zUb7!tq*BcP&J+ItAVDqxVo4xD&k=*H(MWPCP>(AW%a9cDU9)bn-X=!%L!QuA1)=P1 z`+>0`vskh-)pC_D@>$zKJ2H8ueMy&h6QcD_#~4%8sj26N{-SQ2Y`I~C&rxSn1Dok4 z8-5B+{oNaGU@C!<=|@x{3kt`fS7dM zT-kmGA!|NWhl51OB2*1mfAFz5;@5BEn|&mj^mUQyqj>?pP=YLy9uzs{Ghr-i{w0h0 zN|4evez_qe-s-C-xUl1iJev-amo+z5i(!4RK_4(WTMt`bzDb75vG@$vi}KvL5$4()2=bR(!Nj6hXMYVOJaz?O z`SmN)h#G|^M+j?jItJ{=l*+)cU}QWe!;R*mr0Tl@kRp@qA72$Y9dW+D95)3kV7bn5 zTkf(`|Egl2qWv0lZMi}d)s2EDBj|z>^IRBPA_r9P?*dfONr0Cqf1jLap94^hB$AaG zH05G3H4K*M%xbcL>axCD;oq2Jw`=Hm{Mo*f+0M$}Z?*~SjTgr>xM&w{kB_+KG%qtv z%S|hF+r;leVGAWHEV_UxTDcf6%pq|v^4YB;;YbpDDD_jf1P<~!y$7hPAVtCnlK zzGqKQ8WS5i^V_oY)mt&XXRk8J1n!u@RLak``&|08F{P?%zH|&G<3<^o>tsvJ!z3>+ zzL#0%92eu|5;By^%L1&3CAo`0-j)xH%< zpy+*#TA~GgR&IFbDxSiO^uc^NMYG9Cnyb1f+lz!;PKm#wU7%VhUcAzJ$P%bKqvmmU z3adtpTzg(pt2~*spq8s=&WNLr1}LUiB@Lqygp zP;-f}uydeTDnp59;H^)qRZnc8-W)@()g;s-{hnyD#e7OvmC=r>SiLNij}NHSS1MvD zfi4t7jS+kZOS_ZU0n+#xvNcW14IABVV1>tjV~zA0O!DEfxMqYSRGKc?&X*Ywc)ilB zHE#|!TgdQnZfF@KrLVqc6bMh4`@NaQ0>>b4S^h;j?&B9@bO5y+Xy|XB$A8ov&0~jb zO4m%eKbOx&uxyhAsO^cHNoMUFak1?cfOK}GG=#=7L}K-KP9ZY$lw;kYnH4Y%ZJh5P z^JyP`gV^{pz7#6@OfxJ=VnKJe!ac!tAN;aR*U3mq&xAwZCV|&&z}v$5;qcp+&g3lc z=jGXj=zPN+>eT^L_;=((5rFMXtY(nmR(8BpXUPzP$MJa`P8W~#+okcCNyj-|G?9gf z>1akZcdR{N=0wxmf9I?{qU7M>m>@2<%tQ=$OP-Up{(}BU3z%nrmV;9|_+Z>BCoegg z8L&Ls_0LyDB6^1V7sDf}28@77T9)4aP%jp41m{2iC|)ZA3bsM{-?HT^22(6t0IkC* z}0m)b{?4Pb)Mer{@e!4rxg7LK&puP|EB zgZ?I1Sf`c1%$2ai%>9Df@a4j(?A>+8CbYB@UVd=2M5ARS5q5ZxK`@z)ly5Bh zIdAB6o)i;JD~zkk?7>N}*dX|L;Yw^@7!dn3PC zi#5I?sY*3%qZ0mnd#8X|s!UF+S*TnhjDSv>0cIaz7G=Uh z6=VIdeN!Gv_{qV4y(>f%Kq$rDz~3NFr`Pg!1x1tMmwL8)neD9`tQsId|C7)2gB;p8 zW@aaRAjH5Xx2cYKIOyuHC-M=}m(qQT*T;kaX$!x5M!3D-`XXWI+g7{5K3fLi2a5$o zRZH*DZ%-F~!}z>{2vMY2nQ+0F!FgqZrPtA=VxQ*g-SMstCbjw}dSpx-B^P)9Y#E0l1P#REyCHh`0D;h{TA=AhTa({r^(8IG zZ`om2j{KZFqn>VpUj>$rI_V)hHK9BkQ05)b-2RDGlS!M%L(qkjL75`tcO z-a=>jvH|UYtxi%GMExy}5#u9ZBN2RL-Rg}>?nX)E0}KQJO6DTB_V(wX^V|me-)~~* z#P75P^xU+C%hd&2pElIjO^&wXfHCm=a6<4CjiEuaFW90qy(ur~oRhWm^8nUR2EgW! z2P(Bu$+DgH0>7;RMzrI8T(oXB_2>YW3>n8=&lL)PWR93{@^_?P_qCqzKh3CO3T86D z;rt$rF7G}f08^Zago7{%G^tv;YU}|ni%q9Qi}hftP(ajYK!^Cpd2({Hfr*1=HUQ_3f`f5jRoxMUrL$G%Li&k@BKL&MkCC1Pw$WZuUv0} zWQdkxpVz;!02~rb#m#bvS#S64M=J4X5YyIBvQ~E?BE0Ax)`7~mpemVeXX~~xSn9dH zxv%R!wH9U$3K0C!mUDHVpYz_qf60b(oImVi)m9m-&&IHzfe`dvOKsw;&I zxXgj%xBPKFvM_V6@dH>*(gXF-52OKNFs5t^O5ff_4w8g#iMh zK$;y0vZULQz9mo2l@0s#mT|&JYH2tjvunFGE}IKJAZa7{)@Yb02|=x~LlmB2IafWK z6m3(>Pm>^x9%efHv{M2*mf3KC5=MV9N>qkYad(t?1PExD&ef4+OQ+%%Y#y4MoB4+m ziA_8=JBs-0yMFS9*@+#|+N}fet@*xwC;y8i+N5TjAr|f1<$kBzul>+uTC)@3R&QYPGm#OPPICh!;2L z#5&OG&2-OTRB2vc!f_Ydq>NaLBh&PD8ZRx2fe1o8Hk)vef`_&O2uBFhZJ2$B_NdUT z6F`0=4BXzA@MB+_D#US((}!+2zqbI~L;#HK1Dx24$_A8^m2}oU=KX!QmuD$eO8{pn zS5L^>n41ZAGun!Vp8v~ftZUjnhDkT8S5XcXUqf{!6W2abo}Hg~lg^lw-~ypWbK~RX z!H!J4*4CKa2PqPjkm1x1V@c{6l4SFB7ER_Ii8Bl)BiA8dMxI^0BR5bBmzL@0ps+yy zWPCk7^zE)4=N{y*e-hnR4{R-nfV7SwBL@>d8C8JB-6g!PHUZbp4JIhZu+CgMMOe+5#~d_)V+Kda2GPF&Vqg zrp~VwQVSo`M$-i*JX3s3I@89!?ao4ceQbJg+?!5L93T0s&-*9cbyO7O*v8FF6lQ(E zT0*y+?>=TT5g!YXeEK3N;HE6(MPFApE9YtQzfG&coy?Nok{Cbth(#|V;b#O2_x-yE zZ~cxJYXxVkt*E_`Pk4v{2Fd$00QB*>aQIR9LLx{uI{Bj;t}3G*#2UV0zOg<3-4{7a z@2-A~>?Oc4yQv@fVaI!8xgrW!U|H~JBw|g#lHAL}hjPqG*|G0!Yq1v2>ikxmSpR04 z=N!j&CnuRY@!J&4mx7v-r3`=vBD??1B+odXnAPp@n+ZhV(M!q|i$MYgfM!YMaVB4Y zmsg6h7?@%WIc>Z;>kuuFuPi7s>&#k%9$1{nxzMxN+HwNC>Qg)*Y<`7@@1st2hEAHp zZi7CW3Fo*}sX-EwR7Bflxl${hKfaHixgm0yYfhCJZvcQoCeyyU6li5*zZ*#we-ylx zzUh=WDk(ld@Q5*+q`2#lsKO(nEdEli(QFek9?hWVn(*PyW3t*#E1sNTo$%+kF0$aC zQ`FT>8QVBV7&%|VF*#L8Xz*e_ARHE+uU8K!47C?QE22WbZOFjNpgnUhNGW423$7|U^ zu91GlJw0EUw4Rao5``j1!q$c3i4)~wjAb$({kt=41QQ>kJ zQch+YXML_|2EnpfEPxgkjI3n>X{OOAC_3OroBNAALRDMa0)_FpktC!+TnJX$i8&3dn@V(I6bCaGF3wbqzkysjD zG62x_I;KxTSa{AvK9?lP>s}VMLUxi>k0-$pNREpzPLLaUO9(WDiw#EWLXDMvi<1p6&! zogWm@q zOav9SOPEE$8ZY-OhJ8dD0|SG?c)QaAY` z1A2K1a5Xq!l&dzMfh^`cQoy{8-?8RzUxEb{BHnHpk0mToBO;+@iT|!%(q|XkKCHai zq}%I(c{Z;*mmA;AHvvEztV*k8SF6IGgNc*D+I)?nuV`5ut+@FpI#tW8E_C)jqH6h@ zt5kOZ)ao3%tY0B&4FOz@7tmj=n3P}VE99ilMLlN=E`*I`d>t^Tv*@b#xn$OD?pQ~2 z@0|rcd2&lRk!9TWtK4)6O-T+#D-wBjUEWnc2<^T*U2){w!>S$19P&skOdn=EErj+~ z*j!Wk^TsxJ?6y{C{vbqGrJ2Su{|(nyCw+@@#4c@wqCtF#bd_!ycF$P(0zXeRUoKJP zoT=VDic>y}L8It>?|{cgplUDTQ11RSqqxvv3j7S(`IYPo$%CB%KNuVYJv{k-b6*MTol+>qz-Tce`ac0cE?Aj224D4p02ar##ID zTygAQuPo5{up_0!i@B=1*O7wqdv8p6>~`Kr(n|6>CD$H?2;*Vuw$vbI#n}l*01hW) z3lc>3is4u6;Zyi;$8&_wxsBoT%r>ee$Gw(|*`55V6ft2-X-0bbWP9j11vxpPM|jHG z!9E1A$ap9Lq|X7MDg}8tB^b2Ge(HQ)IT8)w&%_aYE!Ir3m3hR;mHmAvf|=*s86S$> zex&g6fx@vN0)*N#>Fp7B$t}hFaF5KW7K~VsuKjmu^3-H=;uR=zi)giE6%+)^iD5x* z&KxmSRh1FnB#EVhDlU-Gyl%Y+Ri){V1fgj{66J5azuz-!`S)g@$&X8*#Pg4>_a@WW zeB-_o4md=|p|zl3j`;)?nfvQ}hX!<^r5VOJt&h^bX`o-Kh|r;5h`DJm^c8fPh;}MB z!R&OK*n_$a0|^P?)oLZkC#>{yN#UJOg}AHrXH__2Z&t_U&Y4&FS7edDy2T3=enjL3 zUZ0R^1PDP|5Li&>B#^$XFP$*DvA>Jl8;iWo(vu(!IFfJR57=Z!&8J=3VToTOM5Roi zxT`iFW)42)eeE#qFk90BbCvkvw!~mQOzA5}N<#XGKug)P;{!a5mnR?S3Dy7QIy;p| zhYFpDPhw)mk{7;PUSJI6=)uGsw?$nsLRxhzreM0go&b)2U~ghxboWnRqC@DYfk41M z0z7mUJoa2iu9IEMkn=!~)fOlwC==)a@Uc#kGTjARj8Pg?Ei`9lX#eo#gK{_v^ZB&G zCc04QS|9j%{XhZmiTae#&jh-xf^8`_N$GAP_TOx&zU5VV$I??BhKobdY2@aODk=&^ z{c~A(U@!sg=lRdS`h3v4TT-}ltjuXfRJE6}LKsYHxtau%R;sBC=`RwN{yIQ&dlB;S zp=R>F`tn854GX7DD?H2ClE_Mv%Ji-6p9_UkL!K-PS6oO`3M7Y*vb|n#&nEgvkEzw{ zdS`-eV`D>gO_8Yokc;$^9=f?PQ>x{$V66Y59Ng3GE?wS0>>SuHKnc7DsQ{nipJ!`b z5dqO3O9alyv@Dd$q-NX&rR(zcJRhg)U+*O_(tnix@-o#PrW?Dw@lx=T(ZUD%O#<>C z8+=B1_I|M`FW((YXY$9-PTZ9;!^|}5A&=uWWGvBqz4?oR16gQvDyT7u_4mQ#8{c~B z3dNRRM2(^KHdQ9*zB*U(Fji^XxWwo{w^T4gmIo&tjyI4j3k&z>e75q9ZSG6@v5~{R zK7UoH_!x1iv9zW~u)2MGufZeynq^z6ZCWES@`FptX{+a&8{ZGhG28EH7lzE)Z$QT5 zIiD;S>z~!Q>ux~U_PEoJCJX6Sf&kb$LjP%NOPYm3x2rz8@7s^R{r;zeyn=&T8>ueJ z%qB{{*_}=W%~0m5efnB)g8jmr8yAN?n8FG-6{sZm;5(hCSj#MeV%^z;LU^`57o`7` zRgKI^*gnamfL^5*U(>!^>@soKaD!&`=I|rs(EW;o3F=6kuetVvg7`q zAdw09$rZVWrp)@oIq&|d;0QJ?h{J^{grFVvbh+OjDRpHbj}oW50pAYGxa~i&fU&|8 zMoAnlm2?t??V{EY3PPJ_2Y6goi=T2n^*$NJ->>lkKTd1ByGBO>h8*9th<571d&PW3 zBcd6&ZCB#j?TRe{i{Lw-gp+QMF&DJA>uw-`rO1+5AQeC zC(BqzMMV=sMml2uOdV)Q-oRH!AJn_I=k!Tj*!dqHm7qhDxqKC7r2EGoKU~!Dnqii9 zQI#sV5{}rl8Qq=r|D;j*$mUKs>ACQn#yV@mC`oI2!90~%X*&4ecrfv9=LGJYZE7O% zB3VRAr1t=I`TB;LsREcNr);RGox3Z1JU+p7221iPRR@Dj2&Y%wy8*vP1D3+C54kY9 z#Ie^eKQky68U&7pv(-A>anDF3P!;9@i`2V$P7WZxe{{QDs%3My`sp_La??Jn<6$hl zj>Zb&`&e&qa*Wvg`GH;8cl62M`oQp3HD53U{gs$9kPLF z)1uDwEcO?I#C1S@f^^+vKYxLaEEi@p!#n2Iar!ZD;hj@J8ZD3c2Y(c^p%%e!$BJ#op`82Bz$T+qy zWPO!!pyzU2qb&`^=f&8BQfhSNX0*R1we;945MHDYX|bO%q~~&Zej_4e=h~0~NHs|t z0bn{VlkeUio`^!xAU@wkhRGkg&aN=uH>H(NTYT+!E^%GI2Rb4P#w*cG2j_ibv5M-q zpVFKlkR*5>^ZsakwE$6T{qgy|abpK`QBq6f&CS^sBiF^jYW$(C(9scSIY}vHe>%gTnBD-W$=XO6eTxZsqAhMQBK^I~bM`-Xy zppYJnRZ{?j=g$RwZ|6=H8xaTl6YW+gGoqqD7B6;MIG9Zly!Yc4i`MN=WI$OdWRYo? zQxnBY{p7xWabmjiPWa8ba>l65azP!XiWrKxxcK3-Vq)&$+;9GvmD28+d`YPMf_x%8 zvx)IBfr;s%w0J>uHlN6mwwVR2>Mt61i#0{d?Im;>`COV!u0Pv&d_7MIEP1^jgpjC) z*!DFrI6s!R6~GkoU1Xcd2+wB!_)v4;j&HGFwbkd=!WG~&UuS|f&9Krs@Mk2uBSU1T z9*}=GaDetVZgktgeS-}lZWqt#NLIvklR-@X8}G{RP^8)+xxP^|M_pJX^82~%xcSNT z6*g}p3O=?ClVKhK>>E5dVu@51NKayEJzkG)5SQ5-JeGHwMwWK+lLj}2qs;Db4w*dq zo1}-_s~v|fYr!iBjZV+dm2W(z+wJ4(j0ut)Exi|-3E10v!{7!fPV5|=_4g|SIwI?d z_rXgTEWhWkk8=9P2tJXEnHfKWbTT&dE6wKZ0z@eie@BpS0MhM-qE=!UJyv`BWEzLdfgsKl_R2 zLGL-Zs}@M@h@H@klV`pG_EfC(P&^DdhPohg1fdDgzBk}NJe+Na9xkm13cw()`S0ks zN;=(kgp?JPxubo$+1)jk-sfyM){$o%Kfw;fWLmvgY&JrireAuMYokzZxFN1?xViCg zBe!@H*(H%Kip6EFzoT=mH&JjzsZsDoV8z#(E09^3U5Kf{FSI`0dE=J~ry~r7DS{6a z7YYe0Eruq=x|OQ2YscGc8~s!YO(sZ}jca??Jfck4)*K6p!@0xutK8uLl#2LPvCVUl zt#Xs8kEkEj;|&5gM>M%R62oUeNetzF+SG>-Ne#a#&bNlb2!8$#O@F@_(6Axl4RG#%3=VY zK4?gfR}fFcKUY1NJx^AS>lfaYxFDa(HHCe*vpsd2gsVMV3|r3*%nZ`N8cgO!i7``b z>nr?}C)dFFYea4MDwW$6l5iwSrO^hBd=NLx26+Pw2Ms~^9Eb}u6We3v&?N<$1m%=7 zsaqSb=!lGKc7}#<)fL@o>HGG*}C0KBDkL8#<}5_Yl(`BgW?b z;(>R6MI6I};;egLYvM$Bm9nNtA{Sdb%O1ST@)k-Ixpf*!js4l^Y1o#4I03K5{73RJ zm-W-WeYpCVYXCe_J$`53{uxX6TKimdxx$YR5q-!__@lo)WCm5Zljjh&KgoWHd(y<9 z(Tolx1{<=x0iNSB*XT>k9FQp9CuO<1s)tmn(x0LZ6F6pv4Buc5rF7olBv(1{yx!fr zsFu$jSnJD#1I#ZP<#g)(^2FVo;-axyXSKm(PE0g39p(>#&L(;SE_L;I0Xhn;2~KnA3l+vB zjX%~Q{`ujZLdi_N8x{r|rB}j~X}U?rwrgK=z6}8?k(iouxO~GG0`1OWjVn6b_Q6ib z+()-nn@9LJfw6A&KF6@4M)SubS8L~{gmKKY>r zM4VpMcqs43gV4gab1+Z2ZGuBALQG@K7*`}m`|1&am|`mz7Q+{O1?lGtoqtP>S4j(12ER$W%ITVW(kr*_2D9~h@<`(@=b2#2>r@o7r z0?@Joi?Ix8{$ETlc#fcm1pIek2s2>~#MB>qdY$%+`rZkV|8a8wyWUGO_*K(hQg02A zozzhgLH4Qf<<-33#ScFeNjk~f!N3?w-)EBh-w(BqDzy#}fw8QwB4V3+o=(s2YGU^t zc1=UPf4UU#wGorM=9+C89M1`xJrMK7Kbg2_glT-Ptca+ifh?FBJKZApWB4=b^;P0@ z?3b5rxnL^P-b;8&gL@o*ghvd>wKc2s^a!bGw#^}$Z(r6Mm)nmIV3V5N2tc-USR)2FTx|Y$UquvuaK&94b zNO1@o+&Yv-V=SM`ZEW%cs2ruI(BU=Ig>K|qW^Nk~nkyH{LzSfLL@;_>DiGkPtiTEL3o1ZB9=s;XD5cRq8@ zwfkV`-=`f#Aw6GdAWI7P_^2&b>ftdlm1$?k#@2)we)kzvV}pQGyos7*T^P=1@a%EQ z8z!A`?W(Hfivu76;6{E?*A|PKnH3ioYowe(3F2JCh8^G_w9pD5{y+wkwz9%1SAK8Nk36gRLJ~Umut+JYc^oB zv#Zh_AJhD}BT8dVtpn;mRSR?EBD9WBI})A`UGwLWUCx zb``{FU=pwZ9cu%I6UaB{y+{csi?c1Vh!LkNXXo4yb}u%Av35J8rQphWO*T&CZ$OXo zV;%b^i2t3+(*%N$yzAh2QPiYhbKLR%Zjw+#ctsp$BZ;<(s0;xHRj$BLNbO zqM&pN*@quyZugh)qC6^*GM%p4~A{L>G~gc{c|~%E!mPkrIcN z>n$q~?0?Ct;doyXKl*6gV~;yy+R+;l7(d2ePM$Tb{V12P&Q_?j>iq%)w>JBOrY-rybE`0zG)BeJ4g>H3`ZGV1q)fRxq0zJQ*RgZ2s;o}r``IG%>N zb*Zm!EYTDGBRVm1LC&*OR=QF!6>BsgPlpy|)i|3M$Jatq2_t$uo`R|lJ%FRbjVu=l zFmkB)Jk>wTc*f~)yLtuh(%NK3FDOQ_8LhmDT#q!GFNIl{t7`vs5jX!dk3MCoK-e4Z zbVhjS+d=uo=YPR~NN->fc;5Rh#)QIB5v!?XMFeEJuS&QpycMWE`*sN)sx0A+1 z$n|cFEHfL}JE_(@Y`Ca~2+aM{F(c<_amHeETcgLoWkB82e3duz{=vCz0OtDv$QWgv- zQ)$q&m5!5iN*EZ+cR9*h7qz@^hMaarlaGM1A6Adwn1?w7h<%1Rk{=` z>i5HMnsZvRb-DYABgObZnN`d!Ue?CUtW<+=GeL6=9ys8Rl}zzqhkSWX!Ug7HFIxx- z-Q^rCO2(-BKZ^%~HNUU5cRu8Q zS%e#$rOhDE&(B|@;kvP}Ie4>h5#xhVA|9FVM>hBaEGBMftP+PjM2F~6toHk3VS?(` zGq$9TO|QmAB`tnK$MEv6a2CS}$}pZg)YU|8RUathi9v+Dc!el2(g-n9B2WJig1%mo z%*x7Y8fIqZHD*Hu&P6p_9pe$GBY!KVPT0`5&6Ghq{G=>c6;hGOEN9kxlxd`B@V_S{ z8+$_I;avI^j^0k61@iQfnAum^j}==Z?x^BCe=e;I!KgM~O}GL5_Y2?w4Y6Q)J45>< z&NkZtkF)2W$wH-G_ANmnUsoZ;RU{jSuFv37A!DSOGfU&(_%TOde4HVjhh@K;Kl?fW zWVVC(X)Ik`@bbePqVvTr8G}NUYQBP3WLb}qdLz!zVA3X#0!XKnnQX{yl0BLrH!_^S zBa@7iy1r>NOi6UZ@j&0q)%4F~0IIejmVY+Dy>kv;FpzQ*GG5R0mG5O-Az=F$r>0aT zRHD#RF;y>&wZiGJfW5ah;U$iv_^e0aBA^hh5-QLC`;BFs0sJgV#XSl}zz_4KOpsC7 z%sVb3dLq=L#;VQtQQE*`fkv5l!sLf@(q*@o5630^MX@U1{Dd~b<1NGC1u0j|wb{IP zC^f6Rd^Vse=OaLGfyCVWKdyoY)Cjx?-2HxJavNv%j;^$)#FYms^p;*bIla)bWPRSg z2daF!8<7Iio58*P!>wjr_-A1x zdXo*QTP=?e1KjJ4^c%8k(MnPhj2M3Ra0U(7W;(sNMXDC8LV>`Erk4tPSF77COjL#e zShUxh7C{Uu)8B;*B9Czl1H-G*1bND%^i!$FgkcV<<_cyimDm7r{{x7)Ba6EB5&V+G z?vot5jZOo;K+G%cuiQw}_W`O7V$e5I1}I1Vd}AGMHyM)+=@@o1~*m$xc(+OpD=#X9jPzr@`Ef@7mr zE5l>8xukXMD6ss~4Y`L!%1MJY)XZwd|DdyD=ksXjV|tsZSq;un2a z!Y0<*@u3&`u?Ksn8)bwJiU&kq6d9NtbA2pZfQk1lQ~yhJnoG!9E`qrMzYxU{y2NuT z_Xf&uC|>sAk&~*!G&|0!BWS+xN~@KPAY0zJ+tfT#%lRtI(vZ;d_lqh9vg=@D!fiaRW_-PCY#9A^d^{UW_)1pC}aW zHs2VBTPmwm4jt~>duX3DH8?L|skWlM{nTh7_mh?c>vt=X9<=d0UcrS28*kf37FhV6 zQEdOG64Hnp3=AV{i3H=Z& zYlqFL8~G-oBY!U?#k)ix8?6a^)5>-OhyP+k)X0G-I~U!ktVzx5UbZvBLc$bRfhVw>iv0Srot=#}WX#7q z98kJW9X2L9nR!ZbGWRbm1y2AMj(z>+)ub8oiibk{`Bjq$LSimpn!kxgIv%k8?(vb2 zR^Vwx-4~G@!JHS9mI3+1MEx0)%=q>YL(V+mYb5VLI}qD(=PNd(3GeDep6_{sih~2( z#zlQVs7;4ccHxK$3i{*tK&A{V)RbLt(~oyiLVTq1*@)I1LE8L6(rJ9Xr}UKv>-fi)MVSuUW7lrgLnD_<%w zleoIdR*pelfpm6aT<_-leW2aGOS)BgM+E~K%YgFbY zpb*nqLg=|rHjGER;}w*i+*ky4-`n3{qPJLZ-@ZZvudOocX;uf9K%M90sG=IyW%H)bkux?j!wQKF9*?f^z&$ad2x^CumE}{8^6|nB;?hSUuJf16!Nbx8^(a z#YL)coSgL-oEqxK!V(C-guMUS_K872G()xqrdB5T1I--6)aEV zvuz>)cRZi=>D3hnwKY0M9zLR#Sh-ZGW_O5;178hCR8%I+zZ5}WNAxcpg@b-ZgZzp8 zB-!@eM>o*!5Fm>b7e;&y-L}bNCGLVliQFG*Ej2f3Wo+;Q(u=U1IgAZw3viI^l~D=a zC*~J;&x?AbW4mc;Y3R>ZSDx>weIo<5X0 zHFrw8yr*J{3~5_#`EFY1PBJH$owO5GWyqbAi5yocYPOE?#w#hM<9dB z_s_*SjfVRcX7Lky7%y$o@@&Fj^u1E4VMx;*0zKH`Ju=~(cUVpH+0`Ic2;Nj^`263L zFg^~`pf?;phGn_V0Y{c#Ic}OseSKk@zs|$|4ZJJv&-swDJJ9OMa&zDt9m_9MmiHvf z7=O?rW2jXL4!K;&r+GeL;(VdkhXnM&f_$|MZjG8lqm@Fa8QQcwIZtsN7$k%EiT3})5iS!<6GJZ!QP9BbfgiCD;Y} zut~euxY))3csNvq0d*Bg`=HbE^}~s%d)!cpRAK*wAHd~gc2#28U5p3`c;I@L3iPcf z9w=4l61ua%is$A|o*FBx>7p7IwyUMmOEORDeqZX1-Wmn+Sw;*3iMRj<)@6>sdh09j zP!em{=#kH^Zj;9yp0}&xn(e`SIedUnPAP^p*Y^Qz-deBb1#dBfC`m;nCDP;`q>V2C zSQFEPb;Tu+mT;k3=eI>N$X0wgHs>cj`2~Pvze}7kH!5)KebjKN`GNP2@OTGl)>MO_ zs+o2pbQQ~+=~_$m{(y;kB@A(*2t#OA3yts@nvGULkMEE47F|Mf4u^g#%lGL{qaimeY{t=vNIFv#F>ancD(5b`65j?`k=rHaL3y+UO|y}Z2;{kBV3B^ zT9wY`vU!wKVkvr46eevQ&x4m4FhOqqXY(%jd(%s>D&6gzy!?q3z!j^Gmwp~7@&(C} z0g1nRJ%C=)o0^5x>RJM`5B&G0_ z+dna?P3?EvK4W*m{ZiJ@TyL^Pr~G(v?pqlmrsZ$Hh@#gxXpIVTrtY^{;;JpZpnrKKKWn#ckXqzfxXX9WAq`^5h#qs_mF<=h4k+gbrQ#>r z*j(v-eN*eXcIzBJYd%E3*ZUw@2{^0=;r3ye$kTZ{?}fh7+ZY+^@9s>?`E;g7qDN9# zdJJjR-~jJa2vGA(imUG8^QIH-{oBf9ku&0ecv_7Ngc%D*K_5ISMSfL6AtRke3|T)c zS_Qnf-J71WPQ9LV=uFQ7(0Y1$R?$3D>2WCXvk5Y`Unfh^F|7mLRgVGS7_Dyaw45)c zCK8ppWpHN(5b69b55DFlna@d2BeE%*T}j6MkG-;l&Uce}vT!YqSiV=mU-y2j(;Kk5 z+E@2xQG{D9^`e`;xBR-gnt;ta0wJU+A|g^&=h7WD#Pad; z{mh94wnM2q>Ae)M%qS_5UKT?8|6g#g&mCXD4~30#p(7oEHGxxnnP=mJEh%OLFk7{~ z4S<3soi6UlEUA?n@JK!qp4v=&@{dKP01`g-omW2YP}EAL#EMxtw>3PlYw3eBhl}-* z=oUf)QJ=GR%(3OazyNDqJ>3!Km5*+M4W(uk`wHnNC^kdVT<2zTN{tN0@V;z=2h5_W zuisV#lFzu>1D42^$d7u(S#t`~J#=M|!wAQ**y43)3BE_NwbE7?%=#oIavJs5fmZ|i zh=3jvoK5?!H$gZtT`3v@b)Glx1H_P#*oB8HgyLben)@DEBl^u;-w736Yo%vIz7xux zaDV~6HUhG(sM;+~LA$bq*2|?1-VxM}+ID3}p%3k~XOHvxv~i;W*_V2^XkkKjVJY0< zqW2#=ymmyM`d^X+ROh}srkw<1W3L5+yCs0;5fO}cZZj{W_b5PZHrZgh>dX3+FGZjb z;Gr>D-;Z{J>~GT6XEs_0stuGPig*uw%!BlnCxGa4^d-yu4Xq#O=^}PS1j`loRW@sY zu`7W88&P$&-e4GrLuU!L>Qd%wpoR(ddV#r&?|4pPNEKC7c2Y~cUrPp&(8L-4T;YtyZx=xyauf;-dVwjZ|o%|pgPJQA3yEgxTqbkGzLpU7gsaMz_Oa3 zGVu?!AYs{XDqPIx_)u#kDqVqr`;ZV_xs{|851_%kPS3V4! zLFB&D0&!S11=ZOf4Ss&bvPJjpu#`|#SD7HWW01P9w$4!dxb@wqWAtH&zJy)9F zlIIb%#CBg%7%MKaE`3GO`fF_wS$xVbu<;co77jB)l?d?hLr+iXjg=jc-h~p1l8fo< z-X)U7)7vBIZElQJ>DsFO_Or)53z)lzgA^$6G9YPkd-@sO#SAOaJ>Llr7R*VZEL2i| zFAChIH8v8Wp}&~J|K7wXB47hg`s0y$`m=D5b!J4L9N?53C4qGkqFAm&PY{t0#b6P0 z8e2&=7*I*q3o1Y+fug*(TI|6zO7?lo=3#ZcAw&jiw$h^!3quvaxyw5J$i(d*)jxjF zolnQnY0&vCk2r?r8bq4Y%ov?`%4d(K*u&M9Yf4x=zvHJzhGZ}}kpn6!xD%02^a1X^ zIVui8n95N@dWtlPCa>pPV1p>hoLpG=!R`uSecZdtMj;bUmdOuAxX59DoKR%@qkiXr zZ^-F*wqY`-hYw+MIR0m2zjFCIi6+`KOcu)aCq9ouqE$<~^`XGIa_1{Y>)S_;x7EXc ztQCb&9f3hAO<&K6&l@eLUK+4s)endR%O?8~5!A>(L#K(-~f3xvhu{>$UqekNXWR znrtfX&Q^|6S|)D}ued=_Sg$+xc;4=t7M;S!)J}Ev1*7f}FK>}cVgH&G{>X~l>Y=&> z&K|=(lS%wEK2JY7*Sb#Mzsxkis{VSbi3GvG?tOJ3hi`vJz3XHf>g@H-vLk$HMWcv{Y_L&THwI&=P0p_aLxD#Kn@fm zMKjvp@-etVTcTrOu^*doDRM{==%ScEoNay=K$^m1VE1|SR&aP>C;7HSo1Bm^DN}Ly z6K;mt-AA}nF0Y&+Sm74TPfE~L$!XeBLW9%NLkC&9M~vxlSUk>;Z%RrUdr-h}8q7vP zusKx7hw|zXS&&4f5nl7B>$c(ge)4Ha{<2d z-hTbOjj2UbWWKQvE=-HTTS2M2l*SbB^E-iHUl}7%a4Qyrecjg4qPBNgK1T%k8BOxe z$-D>tzJt5+m?${w>Y*u)CT#g(&&_YaIxCO)>g6A&rl;js>X;j@j=K6)%Lv`Czh=R4 zQ7l}V^_mpC0B3_Q^u}Dnn}0Qci1Qwy)Gn?wTdJX1^wMscYEg!@5EB%Hk~!pBogewI zh0=^?Lc~RPE)omeuGrug;MoD3PHm#ngKN5P&xGrU|9NPrNQkokey+4Ys9i=49SQXz zS9?s*0EF9+nUwr_jn4yhqo}&>xD5|Du|~Intl@Sq!mpwW;G7nZJT4thSwQrOT}%JAJEJ$fu|43|d5~aT(n&t$n)%f>gDH&mYFlcwa9?v^_h^+~)&G7P z;2yrM`2CvRH&eL(YxnuD{Rm2VpWV>5uk4oA-CEaAwZ)2YwT@p`38SKi0j%X6HuY7h z0gv`U%Rk2MaMhJw2Jc#MEMXCQ$&#|*$2IB|_nJ52A-CgC_wc=L8I@sm?;1o7=Pts7 z`I{cPH4FubY{6%0YU(We;2Jn?Yj{`IlU+I|aFXBy%C7#HHyA8os#4T_5F>w?pVq6! z8wV)8a^&m6=KeP>=_1pK+2E*GEgb@tzQSMjY{w(%A~xI;H5x(_lBntLb?86airW-m z__=ho?tqhAc!?e` zmj3hS10_-IEaer+M3`F+BUwsv{eHwJOls|*)+MnP0cmNi_vHnBY+#{E)1KA{-+PmT ztr+1k1s>zVuD*8!p??lis>D_3OytncS##bovvmlotJ;y`+3fbm<*R{^(E_0Xb?aNh zx3t*N=oYCy1ZKJ*0z=_vK3dlIz?8N{XRrYYWHda`yh>Slu-o^7PXnxc-_Mw9hr(x( zyK}XYj%VlWb-$A+J1J7nd>^GKLHZ4og{4}kffLwmY#$YiU+aJ)iE`X|?i_ht9ejNK zbK;{cU^4qhQ0i^F(;StQlsNVq40c6A0CTM1{kvTh=5O8=ko#krU6Qfb*c+{XiEaPq zYW5KU77W%r+~s7wD<5`}`bU%ZHXP8D6;)pXZ?|`C^n8UE z9%97geGxn}UY+|Fs%9`pq4!vRSb~Pc|J4}fo0O7cyJt&{6OSti##~W9NqIgt31=G& z=9WY#WExGLaH!YoDa&D)CdIDmM7{86WNQ7#tNr)7aX>_52nJS@Z<^5142jl!Nqid# zrP{=MeZud_@nd4UDSidI48R!~AEc*yM8@DMMoe78R2{PV=C+b&<|_qNJeeW~cvWIR zy1-oW%O?eq8+`ksB0ovjKZyMygWqFWr{at$g*vtfItbw@p(Qi?uS?VS87`pUFXiyx z-A^AgaCJV&`eyy->FcwCgY*CArTJ>Z?=yiL)9y4dOOZTW%B+5*4LRRukL9^)+v8Tu0$2Z;OUxqH)D8jhvGk*XfbL?!WE!!t-{(^`6bK6y4?Dati{uAtO|+T4jS?W z?p1^o<=wlC&%G)4#-8Z)OIm2uhwAA3?5rlQ(n-me%HtkWTpwwyrip|kM*f1?4x}su z95iN%qR5%dKg_iGR_0A+C#mFspc*>lRE1;v;yOzkCl|DUJW2%yq!6dW{xaw!6MUAy zLz>WPhs=3Z(Nk3){c6nTo&8rhlt~2e1ibLz{uvf=e{BK_>#N?fCCsP1a#Vqio*Qyk z(84!aU}3AMS%?2~?crZ70PmJbOYv}%*%qu)tdXd!;YwvCAo-Uf-55@nO{jL3sfjl^KB#=;n z=~4JQQwFGK>r7ImvwG-ZNyXUjA8g$ya%0gNfbt9N{q+Ep2)iIZcxBNS>tH3HawpT) z_=3Fn4L|=AcnEEU@VHwbW4F6}!&!au^EaT_#czJ)#e6t^qPM+rkh+Ykva|WqRw`&YhLBIlOf~h& zdoX}Fgb_kI+&CLF_p;V_g*Ij?^3iHH?v4z8FyP+VTWx;VvEpZykZHERMD?m`zP&aH zH;o&$t;R&m+kEwaHo3Jk6YPtRr%LX~>@W_n7HhU`r)a2c>}9xEkxT#9xY zmJnYtwFv&<5KfHZ+aejZ?afst!nr!<{Nrc1gE?Lr$Y3b*A<*C=FN%FSyRb0GY@X#+ z`~d@r zTF%{FP9MbWu~o6vzRLPQf}g`4t37Ua)7c?b#l+_ug^?jm2OJEq-MbS*pUJ{gOQXQCuL zC7^%z%KT)k?Snp_I9w{b-#`!k1Q|}qD*&+!Dc(5v2g(d~tz#asNv0)ItMJz8_Q#-J6XRVTJ0m(r4FyYVEt}aegGN# zb_W|Me@R0e_P57;K_@2q2#5iq{QJF)0R%4ykU-T2EGW_8N%ZUWMrfVw0cYAhpjdL8 zu{q8gZVmnNAH*;KPJHh?7HpwZ%|Lj@olYxSuJeEqGSY0Zdf!-E+SzafTfV%_PJwEv z)jH6dz~an&UFIQE8LQjV1#(In(Dot3fiJVYFsU+Igr`}UYfFek19+HQ@reQ3Pg?!i z*y=AW)`Y+o+vh4IB4=-MG)*ZLR+L0U zP@>B4A`zi$#OH5jsY}jl4Hu{bxXM4wfuu(6&iW=S`&tX(B*m~_O$}?p#{PWpccRZM zL6M!b79(b%p!&A{9B=&6V)VYyR&T!^v0+Hhm_a$#j8l>9y9~#O;KE zWasiKzQn*ldcNe2LeR^r=PqKFIN%8{7%1=n(rYjysUv+!d~^YqSu$Do;UbG8=H<>W z2o-0skqn3CIs8V*VWfec#@Fi}KGZ3)W^1xmSWn}uU zF*~B2@9*@^wQo8(|L!G7KI8GO^!P4c_eGbz;$bw`JdylT!efk1Apq0};r8P8VU>m0 zWac5CEeIRKD|3{XksA$28Q6LY+07J~24QN%F@dicAfKS>lPkW8EVZntQI(`g6&!+RCk2tO9 zb5(}$0D%?kOH!rG*NI2}y(xOx{cfhWd-D@8Jy0CPRq?x~ne2>pOj4kL(DB~|8S-F7 z)VN#bTT-*O7P6RR#RkG~&MmY*wGlwSz0I_#R;T~oC3Pn;Jyxn;o86gPklM;Gutl97 zN3NAYirV+_?=-_O-k9Mo+#QE|x9XnJ0Z{}G2C+i;0+M8`q0n~i1!RA=IJG~kI&=9? z{~HFiEdw$K+NheF<8iEIuB$GuPTF)H!F?}DJ1Hbfi-j_rfy#@xgg}m)96?;%7+U4G z!U&k%2W)5KNaWRj+9rw#25^4)vzX=?Jp7syOb#uoVX!%4p0U(Ithgg*_F1g@maVgS zFYUx(JTqCt1{jBY&68W3u~vXNFw>PxcbRT z4u#G3kZ`nh>h*`Br^#BWYyXjAv87-^RJ%|MD2}t3mSXSnZG|P#a z%R)r1|7`Pb6<@dYwGR8T4fXG3`qub*!|_Dry))}v4`>ONno+#b-m(*-!Wm#145_oC zFV2|n3hht9D1N)Kz;95``kL`GU#SGmBnh{3bP+{VGaN|J3!tjq>~D$!G3_6cUS~xv zf5jL=P^Twng=-Gy({pnXLd`xFIttrQc?_+yj^ySR! z7db8yLLP~?@23*WGI->MTM0erYPhoE&W9|_5`VGamvWla6J|8zZ*XLgP zX^ZbK{>F=pV^!I48ixBZ5aRPGq3neKR9c$qMX&4Q`?p-b`;CGv=JQUIEeRm;K+-&* zoL;?+tcOjm%}Y-E z*CajY@KyCP;`x(KB{X_&n82sTAHa8^UTG*?7_s++m@HB(fdYHs zfcE5}G%M#Hbzb{G4@3lL^IjtY9i1I#muHIk1)i1qCtt%Swk4tw0!X)gNfS!kRfn`$ zgSC3y+9I@(zc8ALqrK}^wr71`{gJ9$Ckr%RkRTX6>b0W~0*dz*LD?8DR@o`R7I*zO zMo^ISX}kaCNLz+nNzUo{lB3GLWjIAlu|vem1q>vhrmgpv$euT-(1RD~i@v|zJsKHG zWu;(Ne8B5N6&{+92Rv@7>bWG5nRZGl_tzZqT8ST3N&>J(btH6Toxr1@ir?-}iaOPu$ZlcNu*s5&W!P}*j`D?*loE9) zsQ!+SIsoF>Q>Igp-Xc=a!r5)RI}W$}As&XCLY7hp|m>NCE;3%1}rT@CKmVPm)1 zXU|vWh4VYSFI@;z3!c4To-roSbg+DD4ns7pXlFLoh0r_Kl!T}xE*s3fzYDcM67$IY#~B-loBaT@0;}dv!IyRJsLSJKE=I%~ z!llj>pho619XR5O*D#(^3WVM)+4OrE+rOU+moj1R)Ue4E~{3@7edSV0cVry@+z%ii#GENZW(v6iCUN7)L6jTec-eS+rng3sf4 z`G)bZ57?2FZr)6ds0`~tLHB1Ofbdy?3JDNsf$$VQqfv>(nK1|wtFdKF%(?n(psx&@ zV-DnthzU3;$G_w%;b_ZH4dzC^GcQ@l;%|ARmk9V6_vh-5NqB*6GCv)M4UqBN=nm3f zKX-0Y14oi_o=bctj})7$B^(={@e}X-kpETlgNezX^qi~94oa5{(EK4!KJ14w+tHBv zvt8N+;E;#~Df;LQ0rAWjvC?8ZLKtGqFzXM#Jy%KT%9X6~Z0;b+jbBza*z|Zwj%I6g zIwp(V2qgG;39=?lNkZUt(V(WBF6vLwVjJ6b$GXbTfG*1qYy)jf=1dCj!u9AZ%$xsiJ%-_w zULE^(|2p<0ex+*} zr2rKHqpYr4J*_2EHM2l{kU@Qmo1xMQTov7}jR-=`$pkVmzMjXi3@33CLWx}O7KVBE z5Yex3(_MU8zyAylY#mvv@R0%s#u*#s>kmyal$l!g_U6TL&2wD}40E#FsX$1FBxNRu z+s4qE1{@tYo;EA=`#_^{W>aksr^TUnhnlkP&?S5{Pzs{!jB|*k*FbZSr6}?D_3IpI zVCz^Fu>uKiDkz!uGv>&3W^=|ROc!;A2l2w1kR}Ngm(JBd7Gaf&0O10n&+5GD%HYTd zlT6a^YWvx1C%YHWi6rl^77G@}Sz^9-{;S-;=gYsrRhPy{%xPc%4a}-=tCHbXV0vr~ ztuXHZ-o%rpZH+bCb>38Nm$rUsF#Gl-kN~r$Vn=!iOKpeveNlIRivNeAiMbny35=zt z0-VSf;80)wCPf{EG^KMrJ#zBO-L$oeb0^lwi(1X)7QMujMfvF#ord|kW+I(~3OXWX z#jE?YMT%Z_#)lP{h1q+HN4T?Pt4tBmY4a5Zy(3T*)_YxK$t~wTqWNe7XdO44G(XXI zPuj!#P3bFYd@?`@qN5&y^7#P}bc5e784J1S^ogZ?Q~^oL;A7BzKL(?-Kph=wglJo- z24Nlk%BXO4A#SHRa1?ocictEf0g~Y=u}Qj-Jg3Shodo2-iC7k{_P7zI{f-1l>kX^N z30D%4uoZU4IB^It1Jt~u}+PPcI4j+L$@g^tcpz_a?a;7FJoL@jm zCPdP;R^6~G=?Q|RMwmHM}=rE>_0DDVem7YJ80JtGuT4o<&eHESBz-*T;@ zSxaz|MvK_VB(WpLh{@O~=7@~VB|!e7`edESIStvoW3v`$Y=q2l89+HW&;m;pHn=Sa%`aWb6xqo%`V z>*s~~KJ0f|ojJ6#)|L>Rmx{g+usd8+QjDHs_W1+dFXKQZ*Rk?Ry92x1J?h93hqp)P zy^Wg3^5d5VWB6-lj!w2c47@ycn|fg=TQe)|(ZQJUbC*`!mNLSLB+*0{9nLVd!S5Sl zKe+y|mP&qfTgoj-JT=&v)p;wRrUt)7BK&(|!ZFHPIJVks6*8FZlOb6;klHD0dg#!=3NdcG;d5}>#fbw-# zA6tSddybq&kM!LF_7sa__zPpo%`Zb%9!|aS2H%N~2z%SGcM_s#4u5nzt+VgO2y&)W zI zK84$tF4eEuxDm=?5okxa$7WMR@vm7DMf~c)=rU5dXZD=b1fu&S-%Ggzi-FMA-riOR zfV_SDeaR)q(|G$YFAm=O_=&KpX|?c^=4!WhFh$vFSkj;pq#*F0gkAN zKaSy5j@l9}UZReAL?;fE9EHh(TFb4 zF*$Z<$AA1-7_n-}MDt|qNqyHWQh;*QD&C@ypa6*HcDE#fVhY2DiLKl-jropnh$xc4 z37n#~i*{$)5h;t#!UDkiMoz4iKKwn|uev&RB)oB0<&Vl)|KoqURYmQ?xX9+AMr5d< zFBvg8FcOsH5@oapi`nizImpIJdBF!1XLkklW&DD=gd}OGNx%P1j;V}^9Ygun0CQU? z7YGM@V?5{Ccao2kKTM4|mPXIplRcZ|J@0{|9>#0+XeP5%R3~4ga(^X{s8q7oVE%2fM31fUE zVR3x!3^z)kzS0D!ovRO75F}XVJJb<{rA9-5gSOU_`V|kRXO(1GA*sLrT_(@`_#S%C zSB9l%`krbLxTY*~P#=TxpQxZRqk%_@#LAZDQ>S+==HQwI!p&a{u#JwGe{hoO<7He( zzzCo38{zf|$rvL^sGy)HDiYwAfJUebILbY$0C|`;IO`q(B9-<o4)=itn!5fxq-dAS6iVXKPqn1Lnore(L$eP;G{#T#ePhL-cdV=BustPfjJK zLk>{)_F;6i7yWW0H-=LO(5iU2Sj7wWYX?@ET)aG%H*R>Pjng?4@yhil2ZW?=ye%^< zLZbq|DP`2-{wX{SZ7%?_Rcn?cs2wdY0OEU^UhN0+pX=@u2eqHq0d@xr2v-`ts68-{ zYIqO~JpTZvTfx_?YmfMEMdJ@Nj9|2FyFn75iNsJQKG0s$(}F9F`CV+##K0Xj(C#YD z?QuzpNpp(DMw3;G*r?H*5`#d3 zUY-sHfz?VF2&%DtadQksP2rL`N}Ij)-$bZ9iwfA1>g928IwrjmRH&@gRHwVbnrbUDY zaWV`3n#lTBVajqaDp_L_*mVD`liZQaH(5&b{loXIdwF3GF2F z5uq#aAaRnghi$CBx4=6Cg(mmaNb-$t0z=8nQKA;j!69wH5ms_J-)Jw7mmo33#hElJ z&5$WLxR}|t?*V2;&MP!6U-`MX^_$OrEo^r+6{n8LS4RSp2RI zI;=DA0Q|gmM8S=c1)%6K&@desj09S@d~G`0ktH16WenL;ln58{&|mN@ zG#h=~?(7pT7d_QkWiQf4LwZP4EK61;fJVVSx7Wl2@M`qS&lh74#pyCfuhGXl{MIt| zSiq*sP^RiiOYu4BK2np^t1TRiI?_V;4^xw$E{1#?wiisp#nw@$t;}3j!9Nyidg#db z+Dm4zdSPUz-+JRZ*9rgOd+&-lHhmMy-pvy*?ivk`2!7Kieb5SX!xI0vv@)R_{1xax zvz&MKLsWAJcTfA@ltGIpu=|==K4?gaqasPd`kmZ6bT?_$F^da?mWi!{_UF#Buvx6x z8`n-co^${L!0Y)OQn@>7b+;z}Byi!@>xRqfKu_4n#l@C<&ewnBT=opHD|1oN_m2%> zZ5P$)nGsr6r|GVK#4}yBZxN2$86FFT76zZb}%(6SO3E?ive{6x`l(QS|-O=RUE?snn!c|ICYj)2+khd-m zx<^sfuW5EX7ml{#CXSPbKi#H`zx2-8_aI)PpATCoAXAbY+dK9TIYX}w=DOaZ9P@0s zmufc+EcUH9`NKP5orMkdi~bR`Ip5s$57*?)9EB@ZuS*=|IyA?QBO5Nbs)71f3!qfL zB(CHG&Wzx8zrj*R(PG$CAeGu{~KiTc1piD2iZASZt$t`i;i8vH&Y_3C92yVyl z`fCW(mf3F7ngEq(H{)66vd?NX@inMWjqf&W1{^F`I(G$HBBeW22o9=l#T@>gyzyQq zZ+62MDU=0MTMQEZyb&#UtnkXy8=f#G4~n%4SEkQ`+MUx(sL~}1`cvu0fGvRpkWL}M zWO>6Hq4ScgKj1jQj{qis46-0yGoo@KA`u3>?_r#=?-(4;xnq8R`IY$1_&mNno!7Ls zk@$1iSj5M%;E2quENkGrH)ySvH|jke69;gX4lCs#U?ka;yckWp>dtKz9d8bYdV6W+ zPZ55+c~?pC>b57L(W_h4Pu|fxKip<-4_I_{w6w;ZF#G*ry!ZZ-4-f~+08$@crc5Cp zyh!9LYwDGJ2?N`vCMNuVJy;X%@b$_3Pb0sAT8azba!`m?SX5p{jJ~G-*@&o{t5EVImdXO`@SwmOSMR9RukVYS>n*{ec+^$ zEtNnb5D=X_!(mBoE3>;VBCKMTH(1ZIFEKPX#0bJldjH-#Xb?c1mc1sNWh?gM?R#rb zCg{^QaJkriws4805gavibY)Lrv-hRlG#YHZj-^sVBe-YJEGh~!H=~^w&;yH6AtJVO zW{a{wBCSS`-}@hKtd``7zG_^2?!0bzYtVF&GI`-+N2#L#rl}3J)3(u3U2T9D`FSCm zFg7;!t={O9$%;Kco)isK+*cuBgiu2PfN0H7=WlhdbEY8U;~-fOlu^_+Ag*aak=fr^ zEq|BrQ;SGy-_3Tb2X=KZKQ!}cKx>6EJ}>W?3{`{v3c4{@#>MBFeY?QW6lUr2lUYy! zB699t&V^)$;-51uvx2_S{s4i|d<|IuC$i4QfQ(ZIfmFTr#%QeBi$AS#do)yLNj{h$ zOnW}-SGc$)TZSy*@dH{7;osR0JU`y706y1J!K>pCLu>Am73UWN6C9r>Yo@upFfI;{{(C9)E%yzU?Y-(SuBPV1%J z|5*Qtx&41y|Ajr7EkrHA8q_3OwTG|E4{8Un@=5ndgs(iQQS6?Y9G150cOqwm;Z>W~8%`vocHd#`8n|D)Z#*2OlJB&#c6jC!znr~> zU;y-t&&aHJSAGg15W}skH4wO3bzC40T&D^z>G&^kII|>I87D(%t^}II-iYiz^V%0Q zcAM}kP&vq+2k=pt5Z=J9Losgn2tHBNA(i}MDrGZ`2*5lgf$+()LCPrxOB^O}D|Tj) zLk;Pm!YLx{Ro!B7FdiYji-D-J*Kf=-Z)M(C-Om~Wt1T}1`q{g!YL8CsSQeJ$p_sWn zeA&?q(&?#F{v?~1QwpuRukm*18$kDzU_DHzFNU2D$g~i^49+SM4J`0Inui2$AXU1K z4ok`5I~4svVq{D|A0VY>yG)y7o=);FH7zG|UIxJ+a@!(+^3l?0u8Z0i^sqk%DQBgcV9JQQ9tsZ2}{mur+~t=O3iZz~(^_jrr&lF>`w*TCuY zE%vu>CbAP<0U+KoMQE**MDVgc5`P(nF>Y<7RFHmiHk;38z2~W^NC-#2dVnod{$#xb zvlM4*hLABLNJ7)sRqSe*_d2u~xsCIZtvn}Hpyj%(FmIA4F6_#^QAOM-+@Ie7pg>s=AtQF0s zE-Jn^Ac-Ingj2u#zI@$J#O-kn5B3>eSNZjMeeF0#eEM@pvZ$M8uhn)ky3TmqPHRLC^SR&y700vdjGXb z9NcF_wBjW~1^xfTN;C4RA2S2N!-X&M4i^#1<}<&uBD|swk{F}Ttop@xTYqQOb_NfA z7^`}7Ks7qWBMzmGP%pJjshov<&HP?bMk`?sMPGM9_17m4+!Qo2HAPSvef?_)l8+q zf2YCYN|g^rICs8a?e%oTT0{Rb#=U2SbKrq9b;shZG=$! z?Xe}&Z4j@}p6ZGgU6%LTeU3tysG4{vHX6$dSNOZYQ7%Ial6L2sWptFaIZttPXLv)u zC)<_*k6UFF#KpePucAVQHZIS#{D^wa;e%EQ!A|9Ib?8?qH<;>aU0q#`_3f@3NMy_o ztG^ZPv0d*gl%O(qFN+wr72z-fdEF0UJa{)G1)1KW7rs$6js!_xvxsuzcEdPg=7N6}#H;w7ARI?np808m~DeNUdCGfA{TcMm>yfS@S zu(9z#VBdgfUm#()C@fIZ!{YkDZqdU2QtJF8y*)Hml+Z&-BP$5oHK(w-Y=wc>R{f1{1Yv1pw< z!M^v7S}j^ea=~%;qD5H5V>oJek8e9PaWMUeoaMDzYNU19$$YlqA&=urwF~D(d$B|_ z+7LamDc??p8d_I2dv3SukOX0JR51n^O+Y`hKd%!;387M9DdPt=o*HL>yvG&Q5CplF z1&h1Nij{*(F{*#>Lcf>0<}+|lF)x?d{Tj|`f$xh=NSv8=Y~R%{pKSOIhq$%oMOGef zbyuOcZALq+ARp3HOj4%697s->vM%@6Iuu&CTs5g$hjdRElUfzKa(gt#qAZqTq54KX zNmgL#YGeasG+r2&KO=E`j%UnMsr&ar6cU;bsMD=QX*gf2xdhcjQ!tVX-I4VYhz+(V zKVbf*p&N-3ljEeOaliRm0B+>q5`2T!CBIPnQ>)3s@Qb-VX~FJpL1t!YVs|wq4b6ms zIp0}&l&ho)(@o((QcaA?TnC5yaOho>n-@rSL9*RR)a&fhM7aoT=E7?xKS73_LJnDq z71+m;yh54JZ_`lYi5yq5m8T{?0akZq)lsUiWc*=BoX-2oz_`FgO73KERUE=>vV3I@SQ44xJYC& zcj4~QdZ-rcOu?I!s)JF5DBJ{gqhW-Kbm~1d38$gg&QE*cxuQ#n7Z8>}OL>-k`rhj; z@h=v%=QDK=R;RmMv3!+rJvWn8ew-wool?n9^Hq8mH&))5qlM_eDuv}i24ut~hm2G&oEqo!(${TsPlc~(?! ziJaZ=nF%yJtWXT`bF@63>S&w~#PA#&kwIPX^$&*xqV=-^Xqn=QkUU|v*LIF;x)3&K z0P5A2?`0`W2A6(twF6{#{%0q&w9EMO4G&&DYZ5)&C(mmTo0?5!4N=^CoQ$=t-F=C( zo|ws-pYshQ!}+OD*+O0Xo@z$;2Rwq8SF1x9ZKVqh|BsEVNKTiqLnI>c4t-i(SMcbo zqNcJkbk0fD@|&0dklmwbs*1LcrfF+)Td({-H?nI9PZ>`1PwJyPisF+PCvw*A+2**G znej*#bNGriYkJl&lH5b+8$2(dem+Pk=Ud=krXzm7s2)5_;#n*)UBDAa@WD(D2@YQB z-OJ9-j?!S}q7a;*(+J$|7;_%o3UQ{9Z8CPB*BaVr##71D`jo!*L^u)s+`<**`1+y@ zJD~N#PN?qjc`}99qtb~L5Qq#}prWFR)79@wVUKLN=Tt}{8~kOvv7XLqI&K`#JNWh8 z$x?;!$mvF3l0wLsR=HhTu2DQ&nOzJS^-Q_PX%kcLGbWYSk{1SpX+J6r2MP3*H3x@9 z<6CfWX2Q0-;;PG6e-GDfjpTnc=q#sgFXg?LqCnZwix;|4KgVIgJu``iZfNU*6hJRv zRrvnEpZ!{$Exkr;r1BF1l(8Xj5G9;am z@G40WBD#p9m1IIh!YzWdJ+K2dqaZ#;M+SIPjkD>vCP|+lP=Ix~q(^)u<^V>*v1nyr z`ZDp5{^dKeo998&n!SrvN0+WzwVs;rOLJi z9}F{yg90w;>&a?-A!P>bwsLzcuG7*==!Qd+fl)?mmSa8@LOE3UTLfr{=r+#?P^YaF z^&{Eq+yVeMkU#Z#v&*%*$nk~mmPo~r%T!Uf>HDcqj`yzP$7vbj;0J;sk6f^pLd$i> z-<1V}qJ?t1O|Q$Y5`%En=GGf*>T$Lf(6$$U85_q=9XVonS7LR={f%16)T& z?Rpt$f7TP4SZId5da-xs`$yWRs>cKbIItm$qz%bRVUSvDe3r5TX(c1ZmexEa_fh5_ zJ;iWJMB+h{E-%qL_Nt9_o{iwXgw!`OT6&(X)7fkpS?egqY2+s<(!i-w%XAnS^WhWn zx<99$DAq)lKz|-Q>08PHcsTxle<-OH3!B9_by~5u)^?^b6kdllC_fKg|_Tnk*)96BZ zk=#orH)E~mHXf%W|1qCq8fc0%;%X1cF;2*pfM`O z7W_%0)ErisK|NeGH%!2O4&RIki4&L_`;rDn%Kzd(c-%Wjg1z%ldiBYKE7Nf) zwCys6QoqT`$?#pLh+LV7hzOaL>N6+=hf7EgJY@c!zppN`-dAPsR-ktnHwoTXt-^^Nsa%b69)+03^Ii=Or z!~W?PWgR?q*Y>NTG_(hrQ{z=X91c$|7(lH*!J>}R36>W*Y&eDILjKz9&y|V&){Hxt z5hR3#Tz%4NPf;r#C|QSjQyeurvcAs|eIu6$o!Eb$koHus%-{#-A!BOfv(KUB6a|E= z(YKdVT#Bo~*K%1BfKBsDv&t{`wqn+;og53&>ksXdraK#ZmDAx)TK32i#vEn%v(7%~ z!5A$tk3bI)`C1bjA&PIrH@b$moUP0?dA@$aTA2}4}db zMZUDO`e{P|vN_k$IhwDiuV$JM3bWYyhV`ofHFfKO4fLLp14b(R`5kY7n2|>tAs<){ zrWW#=heuq$cARqVV|p{)@S24G=k-Ffair~IX1%N0rxq3@wqDbhA6OZRm|T|zGK*vE znM*jBmIu;7X>KgpPNLv}PTft4m&+@p8ak7FAqH(u$7NzduAo!}xC=Z9owoLNpGL>W z5At7|A~9y@3j&@%&HLHguac?sT1}F0exgE4Ksf?r0T+qAij%&lSU(qzPt1k0(bZq! zzUq#w#3>brC6?v$jPQ?mrE&cfEli5;3pE z=d%V|1d<}$h#ibfH)@5*lx4n5(^5TEg= zEN)A)c}1lRJ%xUbs-zcr8uXf+P+>T8ko)6)eSI*m<@7UeYP%eF-9o-G&}Y6ihXIXN zx!DYu90rx^G`{3rD1@yM|EhZWsqQ%Dg!-U_To>&nZFtG(7LpdAq1oQv?n?jpz?cS234d$6FC z8LbSM!-jG9KmGA~>WaxJGDD(?`XtPQQyT#1Kg4I;dXQ(L<2R=;)G=C<=P5`VJ7_ zg=*&}3QFKmo{@zJsE~Oa$l>}%r!}TYi8N9)U$Ci}5by3G`7gHqGor+;xV=HohA$bR$ z7R$2i%X9%$K#%nP-ubSgatlh8O?oIs=+pr9!VmAU%=t_(gWp2~I8&%qOw^m6Zzqq5 zHfcWf?9mNTFxMDDhYSn&Y6JE zPF?-AT?LEw7$i179!cs;OcYyg2-Y&f0eA_bWz@GYxlm!88BW|3J>z!8X^FfJt9QF+ zicJ{9fogj}`=b6mBR0Q3Q~S|&zZyA|SOV6kcoJ7OaD{sgg;h*qX@QS9+}oLR^swbG zt4M80>Cp}k7j;hn-2itHP?)>5V7tg3Zl+@l9U}iB8MITbK9JnkOaZY(t96f2B6~@5 z=13ruEKb0N>i(fEQ8f3+9AL_(JEb3Wb&fG)}S$bZnu6+-YB@;~5jK3;R~+wWa&OZG8nAJDzHHI2;7*TU$Cwg#c=4+1X$U9M^U%e;(y*{K*!V4@ zjm4-eOmWo-KMTORS@Jw=H6)T({;-$Q^!&pzn0jTyLyql0yF1%A%4-=Ox3#GI*2-P{f z0W+5iYPl?!?w@b7SXekYuaQO?I2?8--KBoA(WL7?r>3UHQfYX;a}kdp$uvb2&oui0 zQs97Hp95i-4pq?3Ho7E(S=P}F4tpTO%PQw5A|U~!Hd3x8NFlR6gW&tE(^LLQ+i!iFJQf{w1zeXzk#tMbLayfKURTldQAt!-Xmm?tr zQp5?23a}x_WIudhmlMi7Lm~vgmH+hElSD$WmDstBYO4YH`3i%o3*J4lD{?A7WDicg zT7^f~WZh-g16s2Y>$1!ReSM%%7%rCb+hDhXruNj#p_oXbh+u6s6^8pk(;sErKRara zH($tth8o>(JP4Q#!t`!0kA7lzJID9X;_FfQkOgOFuW-d%l-YQQn2lebZp9#fXGnUh z-NH<+KwboHq26(1t$`>HUGz2}p0&lTFM77z9=f5|X5y?oLBMJI=ByzaGYQZSOI&%jHD3~33&?f_} zqrgW7fXM4BR$#{7{zk-2%Oa-$c!ve@Ojo?_#}fN+=ZB4>TD(6rIn^}x5_+T1mL@BO zuQiLb*b)q$`VlV0s0QY}dTD(o|CuSSuG+G>vzI4H9UcZc>ACl0%_qA*o%v-gGl9mD zEB)2WOpfHk8y<@gl46Y}i4WcKFs??-Rzg~e)dhYaB0Mx(iw3-A(OcawfTAK(fx(nM z%Z6;EjAPk4abhHTZ@$|1;EhD>vHrIQF=DN!3~4JTyW{yIk#=``wVXN6fY_Vw1MZzR6kUTv{u~z%SKwEBQ)3)Mg4_lpV_(wcgc6K@;9u4ME|Ldqh|dv2 z+C3LGTj)BNS^%EYz=W?7kX5_vyE}DTNEn*=YrGC-$h1GeDBMq zWzV^AzKthn@j^cLT|q*A5d=Kf^k{jeiY^iTa14g-51$rdQ|2Q$@#~pVJau$dekYsYYThiEl_w z^C^mp2aAS$%Q-y$p!D*kbP2D=Il>100yI9I=|`yUs6*Q|k+)gAZbB#5l^CSp{rFc> z=>ZX1Co2is+cN^YA2Yp&(8i{>Hn+rNLWOPmF~n?rrjw`&xyf5@|1T^lA%j?S%zoxw zou2$4NpbLX(_aZBCm7>{0Xi^0r5aHhj1P|~t7yt+(eI7d7?<`haEvmVXv6Nsk`e^C zQCYo%*b7D}oDF^rr{s6Wq|(lW?T7bOkFletE){FXfb^CrXsa+vuk}T=Lte;un6XCZ z{dng%!p7kB5w7qseF~wBjg#wZ=x_m~8wd0EBqL=RVpDOU23}dq7SA+(pND@hz`59; z7n-O#`vnN8E0CXRru6w{fcjWZct(~2#2~r=o6FJdrq!VLsxS!DC1c?M@rdAuTASyA z5j6Y3Y;$eD>1P8S0i`q(#UH^C2&($?+PI3F{cz zoWAZ@;X6ueQ8}FvgmDKwB$5{;C&R_=TvmNMVp(k8XjLjv?Wdf*hC#Jo+%IYOHAW9X z&MCp@(-;FOuZr8np{ErF8^u#E3=fUOAHksyzj@|2Zgcq&3C7YB6nZRX5nAF?Q=All zhcTnnNUFS}@dD-|} z?D%|Fdu}~k-THcgcJSGS!$^ftC>%7B5w6+~^DX{Asg~RUIsJ+JzpvH^`BU zdUu6% z9r-$Q@my^rIAw8o<1+NuIx8pR?6JC;Cfw(wm@dKm7gG!uWNZw!B0nCj>gZ$%2o(rC z7c4$KT7g5{nSce#Z+?qk@C+uj3dO|IG&N9`khYLYXYX_3T#6lwuplD?{SR|aS(_sO zAYpZeD5UWH9GkU(iD2!F+ARg8}W*P!ZLXD;5q& z0<3f+Y+s*t3rc56|3e-%=6)w2R0smClT{BK^8t;&Y4hd8IG>#*`-}#kw0P=U8D@^sn{h#~oqKgaz0TNs>7Yg8!!A_pC+N9EtFioFv|`{yh*MKSn#EbjZ+!OC&mp+>M!jPGCcMfNq%^6xanf!->4DcR51D zoKxw{0vh7i{ETeXT-82S5BqeeL$9%|B&PA@4v2qQ9Nd%07CBL!?lLQmi@@#xeppHe z-Ly@Xs)`EE6!wuI2Wfa3*U|eT$Ue80PvXFN7XvUz7jt|l$=DZ-ZS(-t*yiR{F z(BR{Av<;ia;~DZuEp^RLu#hQz3MN@}t!&iU*o zsUE9o$_gn9R)~^SbKALy+?|R!C>CO|Nr$6_k_Xz^cTW zL_bC)C$zHpAVS1Rj@yq{-4=T^g66c0ZluB`9Ks7^KHIx*5O`KISeiZH!z2=Ni@T#i zevm>i7?n=B?TO(aTt=!nO=E)5SjdQRUASqFOMo%4r zk!E6IlHvRWT`Ki$j3s`FTR%D^G+JlM_iG+fir{@j^4 z&q4Rkv4|V`$kFhD8}WhQiGK*@)2Kkezn9g8Y3&Y0DRDkv1Ypu<7$?`8Lx%9vcAFjE zneGg|u|Jx7sq`zPzQzYy&eTAMr+CgNe6a&twT?Fvb3viuYiLn9hF?oCPi&?r3oyztQb}YXy#oNx>xG9Gh%7vr2`Bo=LlNVD84dgAc6g0q_N6 zNW8irW0Hp10j2Lc4ZxP&XtaX$V7R~;PbeTTTd9sdBWSXBV{oxM+qFKt?7e^Juy;(q z(E01hy6}mng!Z@ZH(faaYVC_08t}gO9Sq@I(W9G$oxb4kfSQ>&Xb*@}lgqEd*{z+* z=5ft6ipE2j`_T>BS<==Mp0f_1VpEdItWC}rhcKN zuMyOKX=ITjH3A+P{^0mA1w8{`rL@}Qd%YIcvqH~hDQ3S#X(`$$We#TEg1Amc>hjjf zJ3{KJK#{g~wW{?fNb6mfiYorQybRQofeb%n%H{Gt0n{)yPQ3ppds%y1*@NH0s`UsO z>Lv=i<45{@_)%c!SVko&`H@_?%ybo&Ww%q-gmla#H{b>j9| zZc0XWLFf0;+fDr*oW%dZpnB^)Le+6d9|HIR=5%xWW%OQe;m4A+ zW!LTSi#uUgI9N@*`7r&i;)*wWSpVG7$1+>0kSQmgvOl#7RVIy3fb}HAEqy3rZbql? z@Cc|maD&=jyl@Roj#qN3`&9@_9s518fKO?YOHA}iC?^BZr9hhgNrO=#6p6TDUuuie z_9&ZZAb~kfAzlys?B(%H$=iAIe$r^mg{c?2ug52TOHapcQbx7y4E1v5Afb!bOF;)K z!URIKmO6Jn15ACdDvhGI!@r|qxn7N!&%QPS!;4-Czr- zA0ssXT#Invy-OfglPVV?moxDDaVr^&@Q6VRI%17|cpysjcy%fxhi+n_$<^AX<1{pZ z0j}M}dyVe(x28uf01w{(-||d<{lznl%1-cE>Gng<4SNKX0>Y>F3&;12pC_G)M~al- zW}R_TXv33O0;?sN@l(~+MOEH;6#gQ1eOFOo4VGJ%TCUHITkl4d9^={|>j^ey{`OqR z)-iirXMPvI>ShVS$_r;njet|UV==@=C4m4G2;L9|ag#ThCVjF(BGx2-3%!AzawBjM z3gD+93=@{Xfe23+f{x^g7xII?)B1iK?{u3h?4VpCwOfW*xll@SyPH1Fm|?7(T{G(d zRrFiPpZn6J{m#?s)|;3sUdwA@JCg?cW#qPwUVDLny{KfhUT^5Mz7tQoXS!AzyXZk zVHzptN4%ki41yp!4ho=f!)4myg50(DKGg~${KJ%8|I7k%Xs?9VM9z{Aj|yqikq^!m z85m54GjIq4r=?%>(#=$tzjgggQmF!DTwfY{9oT2#5kI3>lLEG=GnJM&E9XWhFX!>3+r^{l}xS*K5y z3$d&`;KKyyWJV*5j*bdY6{EUKf0(57KWFN3F+@H{oZY@>z$PYUBqI^} z0*?3Wxp|8E75U`glp41QE2J0??0AjpV#HUin8r-}*Al09V3~!Q9-K-=CxuN&^^@D z89cJa#(cs9d4(~u=XDs&0Q;cQo0L9|NtC0N05&8$6%7pt@ zJJ3vB#5VLs&||2M%4v=3vCabuMYs4rDp*AuLf@-)en=6{Btz}``uMrC^|07lwpTZ! zb+I!-Bo+C^VSmWH6OiemIXE>Q_|=r zx1-oW+fPmO>e8H#S1A3fKI1;3V$r?W4h_*RyN8G;1(Seth<{

b2`!MA?b-Ae6>K+EX39|M)9;VtX&n{05R->JQl>B0^bTEu0-1{znWb zRWiAaRd4c-k^`eiW8J)^>0DE)?Yl6@P6^hv-FAw}N19M&s)!`aou{s|kl{WA8A86+ zW((iX%JX06dPTdFr#1!|L8+fc&(stwN+)&0OuV)n6Kw0 zHW+wu(31>qieKWIK5hEB6M*dd>Wg~mbm{&^YlNDvLv zo8?z+=2f z0Yf)G;)+T^yi}9hmT`Pv_30M(d_aABh;uK-)cy6DWL9m34de>-HC%*uWMHJC9}r%a zznkbxqjS|`ddzf%K4PG?ZbvcJ>;EQlaqE*V9dsE`d-y)8zQbKyBom#oLWc_Gl6q=okxqYdOs^`6+1yxRYj4 z8vO@;(Ege`YQ0&UpG#4X?BKP%o*^Wlm+FGg)(zA^H}RTj#1FQLVa<`1RZtME+D(Rx zcXtPW?0>CVgJYMZ`&e^)oSNz$Znu|Xy1aB%z~>IrDp6&u zl9>yTc_5yC>UjBAm+_5KKlNv@mR!XFq>4i^KzW{X{S-jz{%JP0&fCo8OpO@I zKz_(jgc+iUb`_8G1v}lXzs#C^_xqc!!sC>4d`|U*Ohhl#F1eyG63ohNr+#p;m!!Bm zzVZ0;_Vn?JOpgEEgv%DA#4E^%;@!*_+;c$puTe*#_ z`u?7tDJ0>kH0)<8k|$IyBemLavd5ge?Tp!E((a(f(NBR9O}j|v^V7z>@Kmyt;76hOI5KNS^ng5hL8@bMqP zI`Y6znd~Bd_47aKi7CW{g(%(KkpTQ@a}~r(Qev`X!h29st*s8mI7UN(7X&)%Z7NRJ zn^Z=BNTEE&$W3`%E3VH-$RqeW_wgY=EZH%2VZJ?ddkN(|n?wd^WvrSoH;mZ9i2NvOOP6Fs5}<9;fYNUi6|>*$X)jY zZQ)H=5Lw>WcPsWN(Fckmrt2XAAK7Ifpzdz+ooe^W#K8*@ze1A0wqX=@-k)kD5b6xW~8v!e(SLMv(o? zzB6Cu$g|7nwp;g^Ag>%uevxf-%WikJY^8|#rIve0a%F$1-n0 zA57YQF^A_kR?0bn@C7e2Hl!Rs@^iQjX~@pJxskqk=v?5f)mqCtn9J1W*k3dPZOdPC zQ=-G3aeFvLVx;%7aq12i>05!%rs_1|@%77b+7w^ux$CTeQXC3#8s#6)fj=O#y{jiF zM#NeT;1&gfe8i+%Hk;@7)DLh-{EWDsF!zeo-#3H>eX@)cxkYO_EJ=uFPF zmCZ8~cDh{fD=TBFGFQLiQFy!+zd^0Brhx~c?~sBEIV)>h5jBV6$)-CZdS<54ha$oY z>#hQ*5^WTVb2A44qfeB11J>`&b`4MMOIi!wL>%(qHk-M5mG4*-KY%Cewber8R*Zq5 znp#S4qyAB@%uu>jG*3OSE09JIa>EiUu>)xthXb=U*(Q_4LW4gz0lNkUS|HA@p zZB$!T6Vik6nQBdPGW{b!2XbtY^FylnSxejbV-lttMZZS{Lo=UWbMFQNqiqYVlp4p& zkyjsld!?~nPdj2GC9{2=kSNWdYz05_k{e7q>=c|85STnX;tp=d!_sakI>O#~OCVJ1 z{D0y=|Nry>uRtHL#5EhW%_BnpLgynmp+XzV7)p=7`+si(hX40%!1R`#2H=X%k{x<) zmCW`0%I-vWchS1OenJ-|2$g5YF@9pE)Vg~U6c71@sQaU~hjX-YP)Qt~qT=Ht{cjd% zB$LwVJD~v4Wr0I?i-4d3PWum~Iy*KFKd-R8?-#0~UCvi=BA=Q63j_-JQZ1D?xF=Q) zqG!600`EkQTfVm3GS5fP^~&ndCQlc#xTnoVzc1FVBjuk{^8-4x#KW-OL-#N0#8clz zuCJ&$1%mQJ4C$fK%R=i|)xaofS@!h**gm}O+9j@eG#Oksy= z-Z7n#%WEh3U!UY`if@Y!F+fmiwhjmzX$VC?mS!UcJYP`eWL0 ziBr>T|7l=&AgBvtGg{z2?&>b$t0q`VKYMzm5dnOF^t$|Nac#h}>WhY5HWFiyPG1R{ z(PZOu|H|Qs!g3KEAOg0tFWG*mgw-OY=oYN39wedf0Rmhg{~0wa#EXZNQsWz<)Rb@F z21A;)PBgJZa0$2TJz_}NK@9^5i;|)un$<0|W|MQcz1sNO22-bY0(CJ8RyPP`BA^O~ zl!Yk9E|T5;Td!{Ry{qSXAYBLC1hOP#HtIYhNG}@DM3s z{CnpAXT7x|8d2{0v)*OzFprwEJki~7YfXt8%VZ@%UK3^ki7 z2U>}jxKk`3#s%YGF%6Q`Rn29Yu+O-V*RPGcuQ?{m8IQBIPo9N&x%;F{Sd^_OxCMvm z-lvO#x~uhn&^^7*A*2lx3|hSb*eotY0EBYsrvLsI^i?6)*Ly#aYAoydJivGEqRCM-+4@C#2J zIX*u_bf%BcG1Aj-6dza`!TykY`vfpXSS;q*3PW04M&kEMjryh2fbtEF*h2gl*!Ml! z1`ifQz2iq2lFtZ(LCjH)8YTyYrpp+VF&gW))^@NUqkX@dv`+D9G>c$@PQ!x2_yO(x zD{w6cPd*Ksu9SMDY3ieB>Ai%7Eu$fm-XizPlFR+}3u8cIa8fW){g;hoJNNZ_Wwgg^ z-{ZCN(iA?(`*8}oxK^GZJD4}H@BSYw(4z~|ZQN~H`Fn?*pSeHUXQ)E&F?|n9thn88 zF-^7?b|;yaqlwYQ1I6Sy$v;$`YcRS_(KSfr!9-EZ#(I?a~*See*g3SQC>JE5wvo! zx1f8+C#FoQ9b8{FTs;`P8avY@MHmCT24VN%7imC%&p zd(qsvq;k=K0eea(e3F+#0_Xxo5&XxN8p`RViG-pVvQauj@(fJGUnfKnH$#NMx}0e* zN_;3C^NlzVC6!A}At*{Qjup-X#T3l26^n@+%8{=AHR6Y*OQk3qtC7~Q*Z@O*qu@HX zkcj)XnP>yRa_`)V(F96aG|tI$15XYuQu*NE;B|Xk8L*jSt2&=e*uzsMUbjWGREQlS z$QG)ClwH#ET>HxJ!@T#8FT(J?2DU+U4oBQ@wF*hS0Ag*>V)Z|TRjgQ`7ETy$EDXj} zC}7cs*#PJtYrac1Q0O5Yxon1)b~fwk)RVCB;jN;}EY9?1j9G6s9%lkqovzA|EWnE; z{;b*?#--IciKt;-yTa_+yo_gsP;o%XDg}saA8^@PdCrLfw)vtr1)!%g53qzo#|x4# zbyCb&izZ-1c~E@M36P$<`|5T^0su5G>s>c#$A9j|5WS!ENlFDeH>04V`5%Uu9#iOa}{+n(1rPW z+Bx>(<1aVja+#sri-WxS=qMU@ith^5$VLZ{{K$0gVv4=#3lT^z5p7ZfW`I5tqx}-* zYTu96gx=0}cLBsYt=zTqp`oFS0(s*9qe6Denwd2yupRB8?XH;vuh24Yeo&2teew+U z5$oBJT4h9S5SE?<_EWfV0u1ugQK+mEewO(JY#Dk(&D`z0@=MGC?QarVC*93J@c!-< zJdrsTC>}O>0cG`rzsx)Tck;1s=$P}}SqhyCRxlyGUfUB)LJH*w*i0tqo^Q}A=Njzs z14KfChd`}88-U4=UqS@8NMTIi)VLsJ2b^)C8DROCu{H-(g?jNIHfT~f`$gu3b1aFa zbgceJACzk2e6Bsum&l1Qdw=$-0k$!pJR8_b z%CgWjoEpu(bw$KIA`Dje00xyp8Sqb7}|nuHYOrJ>LxRRxpZlaQThR9JbG2q3B#95>y78H;~F1f;!dT06gb6|IM4Xg~kZJ zSNjLI`0rAfcpzIZN#e@}3ieXIn?$KV$6P%O$_i+eeIm{v>)BT?{=U)q5tEi}H z?@a%2I@v}tO7D5jOwoMcM>wkyc__~9a)`bDk`|JoDtL-J8G3Lc-exj6WA#b7xOYa1 z?t^Txj!H2@I!+urG#!=JfY@(9)7JLdRee4FDg`2E$7XT~;Dr6}5&OC{y|;fK{|vrPXz znKQQJ2C|zxRjMHR@@1&Zc^GI=J3N_Rwf5f-BVVa^jDo8sgLawO zWY~e4%Xx?7DgJWMZ7po6PRt8rr&LjXh;3L3Q@WZ7*o%yt*{0$b#qKInkcs$-FA?y zy4u8|?|8WU63N0qzJ5=uUYQ!wZI#J;iX%t5+4Y>cMtV5xmZ%7%_4+mxo2+|tsI%x% zyWYZYmcWrl7B@X{uakF*W`YpC5iEx?dYm?%)iD77vJ{hkTY z51$HB0V8TZU1)DR5wBw54r#m>7bAg7%X>cN7HR^k8j<%rR=c}$MtURs|LXsU9{k_? zKlwqef9gL>s+tFmtyt%RE+=9N2a=tPrdLw0pIeE}b=ZshN6vt~Nu$usbuW=aLq1UN zWqmZ>;sUDQUezi~?{>Ub<>-(q1w5y(Tmf`&Rm~g6&#{@7qln``ABOT-Utj9J9ta zN1PHCq(gJq2u(c>U5Q&J3PK5*hrM6R1s3d*tnVBjx;ANuKNX7Y>r=@SR{>?Om9gfF zJ9;!Ttj2}|>6IsUO$O`2F#cES&&U6y{`lUe{)i~XQZgbntwvz<*Rij7Xs0KpxM9P- zC~wGB7e?e;7JV*yCK&X*dLeaptPErx!jvINuwJ7mWmkyE*)GnFu30#mfB#Mro<*uD zAFNRIFZ&(ml@)Qz8D_5^e`!fs8>sH0#31iq+tTRQeEqd2g+r8NR3Z=L05_^Bs(}0l zdZnY)m>cA-vQpSPNy|f5jk1b@H#-f@y8amj6JZRf*-P7vSWK#^s*bFyE3#g7| zE<@l2kUzMb%~P>Ie1HnV0*=1+=qN}j0T0w&#QxL`s)jH~r=;GXbu*?roIuK-67g@2 z3`WrB*ar>W7j6s8zZ4-`XDNE+AH&9zN;I+GQ(iNvy%d)}F;{0TW8IzqNSdUNzu>>U zwbN_8>OFRlPhWsbO`O0E(qlB%|76Fci-|+v*)h`e35$Ip{hLd zg*&Kr8L{q2W;dJ#eJm|{{iA~8x9Aj}ZA#3TNUxliO}}9m-olgW%~83Sd^ok;H4h|Q z?%8xLk&X(JDyil7{8y4|suHJ*Hl1ysNPC9}ByFT#{+BwnPlCHCUM~o)kejxi7I!5A zRRXOf$1%N*yq-O(bJPkD_eUM8tI*yUcn3RMA{ZEJFe#_jVnxNLb9IOyg5mP5Uu04_ znPHfBfOn+vz5Ck?tGp07I!t)+MD}9~*npmWu6AU&-b89jC!1sc_6ym9Lmvm&@W30h zKDjBFn)#?A+@9b=vmQ>Nf)~jw{q|yeH8{cII`msaFoe*s{yr==WJ0T~#_*IzGc z#*%mkP3~{k=zLP~QAG~Im#lMDbsOBhme<VV5^+OpzPX?~H&=+o_ zHl$D#;4FDC15J6((BNk63C1`6=jVvZC(FyrQp&~0nHaT_EKLH(g6TJQ1I;)4IeRA= z4)@q8|4<+N>sNz2%V1=1(zaor3Cnz>z*RJ~2&qMPI;ld-)Oo4{PFDQ4PO>~N2lC`1 zX1di~IzH_f>b|6-=S2eexBROp6}YxB_@3fhe|ON1M)peZiSdv9!@U7G#!XT8;bw@g z`N>t%jq+Th>q>){G7(w}a8?Ng?*1jR(_4$1AgMG7s1{IJK3GvKjaB680YSHBWg@3R z-pkiTt$MHI3jTF{jv*t?JURct2~`%HY(rm$3mmWs;z2t2!8XAyU|FI6L#ztw6JVn4 zy8aAeO)jHz%JK2o-;{X%vl+c+hPb+rQTw&;nIAAWOd<<*U(Cm*rIBWd`yoo(JugrW zTCbv*g62kvi8=S&0fBp(cm=g+Ez4}}U*P0%@8rdQzB0l2)xQ2+^jx(@hgSpgJ0+Wf z=ZC|~GRQuta#jy=+*u7P5);Yj2+#^ikR%)*q2P@7r&YoEg0J-<9gF|U9XLc^ld2P_ z$-v2U;FI7735=Qj0cQwp><1**j~z`=-#@CAZZP;EHkv8-cviG!FT~16V3})kMt;dInke;$uK6!M6>E!gx1TTmb)(f*~h18bm#{L|L?CH zPxiOQnV&xyC``!f(J@n|b3GI4nC8sMOu=UAW3%-&5TAlGL_t}ntab!dW?{h+@-*v)H7kFD18dLvzivNHC zJrKCpm%vA#jK7qY2zZ4RT{2g9{H==fJ{XA~*HMInRPD#D z;mxEEoEbsk;THqf!W}M13g-$ zF?0net;Nrncc*t>k5p-?frDUbYIt+!ahSF&<&rl)0*!=EfNPuG*?*z`qMa^R$GdU# zA(58F<0gbX?=b0Y>_i5?y>04tzKo`^-^~w+vdCYt26>Jm@)SBtfKG3u%8bB4pYm_u z+CLTv+<)ee7%0iD2wcgC@twH0`)tu%W^um#vAyH^*a3@s9I?kFYj?9&`A0Qn<0uf zn`HM0f|%XBkKXP(s_l%b)X{l-_kp|8-Dzn)=y9{$^}q!Jb#J}>4;_$afqg}$En>Ir z1ENyqTU=3%H%Zt#>WE8pXYWe*&1C!H zlm7X7s?kdnP*o)v8a@OC9lA=#nY9~2BV?#X{|OWW&{04@TF~Fq``QzJ$!zeDFu-d> z00kjNN={BFeB_NrwKBALbEGRY!McprvchK?Lx^B=IP<&Scl@oy_>_lnFega9RsLwB zi>ex#Bvl|Y=hC+V)V%wuV=Mp!r1HrB*z9xBr__W{Tl1_lywMPFdOxCHR`yVBD zCgYWsBeWb1b#y@G%llqQu}R^pm^e5=^fXF}^M+-re?+KT@!68X~VbqXPTERtRfAdhJm`U6hMmRGg z!mE#^kWt1rW_VAjNNV)kdNDBwf6~&8g#HM)S`6vt z2LyA61d&5F^|!8YhYPKJrMe8=Im{IDv%U*Whp&vc`bdJA$&-l=6p*Tab%pZd*`v+~ zQT(t*tR=iWP>DrPo#)~*cX>3PkW2+H$$jsqvUc35guadeiu{WQPbNUVWA1RSWIHJQ zKqymgr-#!;^T6~R((U`s9wX|g0z|(v7&RA@9%7{ zt$f>%9!(D<8Fr!9Pr#|xK)(aDTv7#G0_ExYSK<$Ul!keZlLUT-2oaFBezM05G^|`< zaQ?E0$~XS#AXlEsitpv8E$p^7!9_yAro$`nY?+7%0^Ci4dPo-ZKHGi^6tBdt)hjS%%?aLhi4MijOWkA|r{eAlCE3EMLNpxt?SD zYJb#MLgN?8_tBO~DR?GIq>of$uA{!=us*Fk~`%2-#E6d=MPE$)UY=qL*x;>h2Z0R@s#{XKqdm?9Le9nz?QfG z(>e)f(Z*a+nbAyU?A)#)EN>SE?zFuVWh@+03#IPJHMVg&?R;_Wtq}QlvFBF(+u1@Z zzIbvK3HZNs9YI}NBG0dz-dG@#E%200e8Uyp_{VO?D75>d--Ye&Of60=33JR~vXKGE zpZLM~Go0Z{OxpLxHJVw|VYr{Ko2&KbG&1uX^gPWv^;H6t*)pN)%tbQBinIHpQznIZ zs0QquS`>N4kk7vvsov!`XPi_~EYusPE27;GO~jQV=n9JokMJUXVp8K*!@i&GJz`Q( zAEcq|`d|>jadbh%X4(486VQ_Y&90Gs@MZ0*{mo0vW+sFS)hPjok-MP4NpjiazXTtZ z?)J0je$;kLjh#fnbiZoKii<bJzz$nQDp$8;yB$j&Ewz5Y1QveO3(kXAzYp+XQvcaGpm5aCSZSA-z zGMD%y%#~gJ!I`we!i4{Xs{cbz4nH+{5s*~KTEp%xzIK-WyV1u85Fh0djF!udd};8Ga!JTZM4RyXqlHUUmE1vrsSHNJf);(p!c`Vkue1SJy(ZE zjR_*M4cuPQK#-OYdU^(}+AL~dRBF|IVc|5v)~mRtFed(w$;J|Uc(P+yu~1F?`Sd}W z&@*;U^M<7S99(!4=`{aS<4*VIP4`2wgvWGBsi5Je|7ehPlH+;t62uqYe`ib$__nRf zCM!+Cj67Y9#2HA4)l35fq9%(oqqd&y8B%$Z9AJBY2DsV!(Z=xRd@(9u3^gpd6SD;GjtfqY3Ahzjyr>b!2PH&1?3GXaVqZ zTg)e2V5q~gz~vkK0Jbe z99Iuup3N#{XxDB+0X?bbIz$2nl;bZFF+8tM8Am!lEs#k@Q}Ez@rfPWWUMB$pYlM8D zn$p+*wYks7(ViODVR6g|q+%IYDvSGg+#t>h?#B1}ea6M34jdY6c)aw|4XQU!2_nE2 zzcq!S`4V94gi=}C{yb~2`uJ7m-zzd)$HjJ>8&~b%JBY*iLFC zPwg4^{A6>1`0s_LIK?xXz^*31Ia|l$P)P8DuR5_S2XJ0rzWR-mG2BIzDW#oL_!96} z#}dhJBytf_k)LozS}9Dy3-|+)-SyGDAn&`tSun$q~b7~J-ibM3_w;4sFkU&>S#J_6hbd^PgeM7 zcpVX*&bYPZ4057jVRlXkYlMSx0?pWNqyimy&5ForF*wd{KL<}}B>V!9e_!$Vjy<)1 z^2F6>tU_}bc3-FDzOBfhe4x+QtfcHMe$=p2>UIZ(@UbbqI)}*!_a=~t8m=TzsvkD_ zH=6>!bc7*t+O+$|c(gBmfd~Wc`xdkmihPQ0@PMY8o6HyxVAQj!jCtk$t9yp&2|*Fy z&bX%&^?#5LXVnSU?7V+CSMMfUOhHW?n^btU@DCO+S-EUD3|VLn15p;1(0DYU|@x&V@Rg7;6>t55k>0bx!R9Y^)DF0JsADLbExmWyr}{qNJHm2 z-;7#nrapNJ=Fc~^OPdQuUpqnWVPD}CM)6+*5(A7cN2_f^?V^wM-m-hSSHa^rEDVP4 zTBYsDt$Wd&gnoMF9UKz+&1S>znA$Qd`?u)Q`fitRo<;ca)7dW^5B3&1j^{108~m;T z7_snU@%Q@%Qb{YydxKD%3}JpA*R8vhu1Bry-f{5aR@;K_!vI}*0@>>1;BfDFo~m>s zC%iD&;vOJgbq^&f_~)3Dq7_E^UOpiO60&Y1Zej-auRHlnfVspVvNCmNWBSNW3nW>c z=bQ#cMzSKsnz!x$l-%Obg&0UGS}QWXEuVGZXOXy~Nz?%e+wRx19NVl!K%C zEBw^Ad77jCpgN3zw`aJ0F)Yh|d~8%@RU7LKxqY|NSF#nmdx`yq&3<v4pbz+H|9T7aE~NlME!8)`W%01eR-*RRGEG_L4*~gC zE8CQD*rxAYuEPbHU*}4AMRK{g2+=s_5R||3y|!72O&38a>Q7dPr4+7b4>Q0fWYZ>; z+vWtM6rg_9!#ZycCt%1>R@!Oi0Aw8Oi@=uKgCG01<-|a6z`j2G*tJ4i?K${mX|8OP zZrb7B37E{`X}rhz_f$Hgk`WSej*;G>7!V8jZ&AiwSD5zmC**n_t*Yv2zCS#^(6xWE z)1${v>QMIKLty-fVAby*_-)t-?Gw6xxsD#y;Mc$l0aRlk;Vt25 zO=@Ynh%hk6tai-u)LF57HvC|QE_c|=Onn%s=06Xk^o)uQS^k`{bf+#R!k%*W#!5G$m)Y04IYJg zY;tj>vM%l(4mMRxqB6aJnhi7RkOrUM7_c!Whs=zx<}iX*q zKHgnG{(0%vp2{y*s6sWE&c@6f<`5MC+*XhEKHPoqel8e9r&+9BCj*}l1toJ_SC3zT zP@oEQ=mM3Vmp*ZCW&70>=Ch|Wd7uohz_&E2XJ6erHqwUCgtmqOf8`8La=xu0hY8k= zF5;$WEsnu+^KYqhiro2`$9q4&ge4ZK>S!~>K0lP2>-)8qo}LwE?~y_A|D~w7U$4= zxHcFEH^$$&a7>gL64|Jb9oU@LPHvh_{^YKBq?Y2=u&WlO{PxWOE-v-|a7@r^RcT4X zNw@(eME^Q%f=|N_hq-DVb_%1unN>`_)S-^w1r7K`M6(7x<&12qj0c}tM5^_}4D4-2 z$<<>rrNxbxOY|cc%|Lxv$$NZBk%0EIb%NKg6Q^mhr}6b2KqDBneeb*H%OVim!vA49 zt>wBTNt-3@VMbng6UVyHUnj{l$df#?GQ9JxDt+q@3Ry3M=Hr%K@B| z91GQVo2B33J4(9syy3$6&~i8NmE&#g$M6>FjE~kL8VkoOTsSl9pNVb;VR1WD62534 zJDZj88%y%IYDf?9FKu#XRs>~&COOCRj-@=PnnT3(4`k!2n_x_z4_OF!pI^o(J_yq! z4dtstc0L}!i^pMnGWy|;hRJD{?WFo#3$nzAP02=@!qd%8b>;4YOK>k}eNnK17nuBo zT1U4(FUp#hy-Ik0t&BX()x?*aFRHmWX=`lbS0#~ zTv@5(OCrc}%cmAg>EnM*s#Y4Q0JG0}b9r*rsVqbjtE=siO zC$<}Fd+;)iESK^t_)F>JrOK0qV=hW@XnK)CMLZ616F{juSUb@pfjf-Oz2U~io&}e} zUu!aOAvypU+K(PQOSVk(A02mclT9N*yXHsA|Hpd<@K^iFCF6!;=AR~*%+`4{^xqAh znWINAo-nPDb*_Q1?gCIY6lDE9hjG$4eDPJW!Y8|Au*&!@s8|`;>;Y zC&&Iq0Q|A0Sjt=EH>kwg#Df3H-sNMRJu7ZRaYdX|X!=`P1-=2r1UE%SeC4|i{uhVI z*thJAH{3{LM*+u;`{i0n&8!DU;CeUr`<}GP1v+5;0#dkvxaCRrMsuU#H^p@OJCFYH zCr2qX2d1WP|8cS*MnDcG4b3}GzQ--zdkJ{>@B%B*HxL2UL1R48M%wf`BXF{wM)3Hz zC^aioAxm_2j7i@f)HDiGUexh9Bz|O(m5Upx!^_u){aP5vNPz>f0lQ-H&SpwSKD?ju z8O3xPxU%3-*4UjuoJyoZH=n`~b72+yp#mP4T))ofO*VK}5&w1-Q;lU@QyRA*eV;+E zZIWd5^eXQ98v_vteFbh=d?bJGhg+UfUS{X|{>>`Q`$zrn&K4e&+~{ZKw*X1U3**J- zpcZa~3y5q$wou7Q~PCHxBC&GYR+&>hg+^Cg=xU7Fo+r~~q z(Reb1ZI8J;F7BM!XILcAY|-A*B;YX6isCS4k@yi|tX1}1Umr@)AI!@5fB*1D!V-VG zeKfepEC$1;xCRFV37B2lD0(U5r-j`U&3-X0{!y2NDb(w*BrKj-(93-a+&{fs;11@# z2!bzMRK$L?ZO{4dO(^KgZS<0~>#imW&dTTTIl8_B*}T%+Kop_bKavqS-`8na5wN|- z8Hrc$(Nb7ENah->>P)nev)m!%r>8FH8+Qx_`fgS>?v1kILwtyyn6510evw+tmCsXx zHszACuqtyQG}n$abVYYIS#@k~2z}h;w5a{GhhhyrCHBYn4P}=9i9wy zNh4*9XR|TBh&j{x#a<3&18hm4Wk6+&z6^eONCveUo)>MHWHV|UKT`FxME5*n z7gxtLhR@AC8L3O?P22A}@OFgB@Yn}iPfPxCKd)W%c9)FV_6?_hW`~{@3R4|M-9iam zWs_`a^t?C3wG~>*4YJOy(R!M1&^LnA)?u0r-hhiU_u{0C_{B4r<}5B1e6O+x1VE@; zEQqf!w{1Gw7al)KOQNR*>6g*b0dr@$xj4Mll{bk_z!I*={R|1~Iq}w%`OjCcV9Z%K zW0**5LQH;so)`P-Ug?BDhDd>Gp#jHNT;ep=yl4J1rVQTuf?EC_zv6JnBpnHF{qq1* z@(DEdGKO8sw^df=gdO~v8=q~gVuo)5BuL?c|JDU9LH)I6{r&J!wM!l@3bQjY+Arrz zx_5tz;jiB5h?82Rkqu}*JBFf);&kNV#6OpW61$0TU0j}69(R+9wcN1&+=XS(jT*y` zMP0Bma++^mQs*6g%Ow{?j%n}J#uLa0l)zhxgx{0Ft

kn_Ee?T?$T zq^Xl2DiFOQLL|hXR^7T$sZFhM{Zz!;b^>~4`TnP2F(ydUqU2|!1w@!E77Ya`yGZ#7by?u*-B6Z|LcFS0P6R} zl;$~-Ug*Bc?{8A_DlW(oi%M|MqtL2aY8ImCX*^?)+bF6f%=)+1Dn`#7g?)QOF3LX9 zks~JNYYxYB70IKgq7veo_uorAGX!6Sl1w6%k^bqS^`ABz#Sh6*gAIL=Fp(ljvDodF z1-HM|bvTk|rJrnm5fy`YYGY}k-G}(a1k5kMr3s4vF9;NxRvxVNv796#v@n#sxDR=K z52*{rdD%jV`r4N7y5I*gedi~0VwRjB|0$gZ@IlfBASi2BI`$c@nECmO_ta@c9V9Do z%}e$L;fF|tE6^75&rdDL-=wZcDLHuHI=rJ-2jgo?DgvAef>wtEv; zU~p4RoF$OaxSWuYf7~fEp-8t#gLa=HkQ8x<_@rzeVe}@u@fz{PrlH%V&~Kje?RMR) z7Wey7eOd}cp!2Ok2v@>jj>ND07faL2B4PvR6<#sEJ|3+RF8ANPHl1s{$ueWYKn#)M zmJ5JpLgz;KWm^Q#k%yqmWGb;-lS6Y|E2575u>rCo^+pE2&>hjL{A+S+=Dk|pnJaH< z1Qb7T4ZGAugP220a2^=Eq3im*Zmk-ptUJasyCeW1E` zcFcxuf_C4M-#I&Op$uz5>DN|7>_8)Q-$u;)B*%=&9}`hORt7}9wfYUh2J@#DruVjR z>>Wfu|8TL7U+GVWrtYtIHUx0$OdA4G!FZ8&CwCXBAC4~Aw4@@g*=sLLhN38j1j|U# z;nL%xRWTi}Z}tl6L>;6YbUQuo-oY8()X&G&LWA6a_!udkJ~?$N<-BazJ>$*GY7GU# zL7pYM7`#;5HnGoQ7JKLIbNyQVifj2qG;X1`u$l#oBoGVn)DHgMsq5)ki;-e`;-hDm z8*#ZVFH$Ka8ueHD7dp(7i*V7DAXB;1mD4SV&%VBGk%gwy=E-Z+<7NEF;Ihn5V`Qn6 zp`BvD#V03amQDSUpB)1G@f46vUH343%1MK5MWJGw&jz2-nD=|E-3K3)ruqBPa6ir~ z?-3`1s%L>6_d`cd@$iIs0SjUc18!7rhLQ_#~IplxDdpgC)5pIV*Th5K<_ z)opH^qxzHc58t1$6K;-|6RzD#nsGg*$wJjHlxc0jUChbf?cN>6UBLvO5T3}Dhcqe? zd>Kgb#$$akL9fQDLx)?7J5u}OQBnuPjZKjNRt{d9!uv|kgaU7__ef`q*Qu{`Y}kN6 zTjCI|xUN2GM3P?$iuo_(`>*M_`N8MkKU4!(jpF;Kj)n*-Oue-i#&L1xRTg`mlp?Nd zXrJZujp%Tz@gBF>+}KFk`rEeH-oR2zCTR3;Oo52YkwR@wy4|zd*9ZjjVDPgeF9hL= z<7y8q5BbhdTkw@ipC=(HEfrs&L2r~$fCM?wMuRv){B*tmkuU!&fDY$5*a+($Gne^u zls<{j-n&dnH_+E&eqjXtR!-pzn}6G4(9Eg?PU(Fb$duEx|H;}Byj7T$sS&yogph|k zeiOzvo3Sojs=KuABaK<^3^qCY{7d}F-x4-j5%Adz$9pVk<8RYqBQPB|o!g1!ljkwk zLuT_{VbT#B>o7#kC+ehGpnHGw2Z4bh{Du8gynR)d-FLf{yLG&|n?U@#le`sK=Ja_z zqvkS&gkVeHBMu!l*D~VGH_&u6`JQlM>b$giTvk&2t;YOftLyeGun3#Aobz|#;MUju zE)V)&1+EU3E4HimYQ$%Tf18wwgKt16%Ow=Ut0|8O)Gzb7Jk0iC884%(FlMZmv%+{uP^C%boc4avDsh#moIW9Of+!!u*=UZ=B2;J0nb=FILZ?3R0tf zh%B3d@1e&n);HFFMh+&=VE>#7F|j~~KcPk|>uxq%#EJ{ub^-igkb!_vOVH>cc{ix;F3Y$5f@S|6P#|cNwe+>zh^8lqvIR zFfG$@vSLCoVe1OZKoV2jmTfUR&W_DvuQx|)`xcfoz?5DX!)DF{ti7%uf&h(9U zyx-;Z$%39nav3h#H?*n+7aZ+V*;B<+)l*HxUWQY{)7gmpn@Hz8Xf2iTjz>Swt7>U6 zBBErUO>|uK+&Sl)cJZ@f$$DA2zF3fe0?ig1c9IIu0Z8H8XeR;9-ZjUSpu7RAh zfl-M2c^n}h1tZEH{>upKJV$~AFirGQ4W8w`E^iuolwKj+ZS>j-gyTEyBn zd*TvHbKk>W;Ea{BVOs|Fqu1{?(Dv&5e&(?{_w2qlX~~n;n-7SmY>9chcDqiy?sKw9 z(0FJ9G!gm+ngk`1-i)gmnk)FBa#!;`mR-K@^*(wX*F5SO_uN-!$adR&G4(e9=Etwu!q|{|>brW}nN>;1Ii!nQ5y&I^$xcnpE_>;Ws$c4s5?tLOJY{lA zc^?8Ln^2CgcMTb(r-$KUu)ZPqSEPEuL33dz&jpHqFb>IKb}~KA*g$ZpahBp=-I+Dq zTiex6b@_F}Hdrp{t{ASEu9&Y%m2`6vUt{gx*I&`8GnS7cWdSJ{m%`2UKW9}SDGtc(~J&4%u` zdx2gvZE$OcMo6sYu1SxB&W6mzaw99L#?kRWg+q?Mm_Y4l-pM_OGRSgw6#gr!^I?Hj zKc=hH&D*`P>%SYe8z*gler}_09R2=P<4K8##(Txdp$+O!2K8DwZ+X4jYB0nEh92&z zS1UwpAZSWIVmZ(gI&Xi4aMASslZB*}zg3IXjTP7y!IxL%XlVGMa(6@cCit~I=;)B= z=Vtkpa35%_AFdCfFvBP6|Mw~Rf3ZM5ppkg3KY#@g3#4c6qt4=_Zeq}PDJwNs+7N^w z^B-KPm**os%X?e*j`j(x`mWZRa{i+wT(wH^Q?^qs_3^T*+s%gVVhJRG`2eoCPG$8i zIEBA?bNWnXqs{K#OZLVLz908qvI^-X3cME5@0QWPKn@A?Hvv1L+CRdt3+#lmtELB= ztJb2%(WRcnCz~SW!dCCUZ$6S${&63#gQwSuNn`ub`O%X=Xe6}!)1SGMwo5`|6@Ws$ zF!Eo<;o=t&+r5wA7ta4nV}lek7HMI!Y24jZJ~PH zz_sg9OZ9lzDVJb&VJUt{&{ve}E%L`LmN%A?JIt39SEf~zT6zU};o7h8;1>nZ{{!41 zdT_l0ZZQ4%0bKq6{7Huph^rYL8sJ)yQSyI(0wWMNE&%Dz8)*!nVOKf5hyT70Zy&CD z9OG$m{I3t3-vKHj^o|s((chA=Of z0LQoR59Yz=OKM5K3)n(``ta{Vh(HxP{;!c`Su{bfsw~UwU1S&CFLcd-LjodM3V21micu>x!#MF9_63D5#{9RI3i@z4;B3_T@4RVbclyST0BLA1r{Pi^Ydf zq7nP$!OF?Vu&RF@ZI*abu$)icJr&e1Lc~<{Z>OUJcQVCsC4Y482@dP$SW<=+V~xm} zYrlqJ1my)t?|)zbgD!gaZiDsoZ@+ndm%!DCtae%hD}7UQx)Iys53H|Egm=-Oo=uGC zS*wr==y*J2jtx;OC7^p}$~$Fudqqh-u1A@ondX|7oYtKFtE^4u%A4d}sBP8ON75Q# zx9iSwvI00IJfWSzuUe=|B^6T2YfPt4{DcVu`4tu<^oi8*tzvenJ8DOO;m`-`_Zxt* zw{5Q|Lc|t+4sTzaezPBT$El)-f$`Jv+apQ^v%oHswepp~UWnzU>6>b@OI>4Ld~5-E#`9%HmcOws`#-Lw2G zncZYY9nwO5Qpjt+cwM-Lc&Zx?V^v5h-fe+fNAwjE(hx-XXp{tkwOPl{>CUtXv^puW z;|7spr;J8QUne-TiDFpS<-QY0u?4Y*OyO$; z`EA*YTMi7d;Z~Dmy^$43ikBSdC+rbSrd>v3Hk9M+=;Qyt{ek{!T5x>Ai}rGpHU554 z6^(auv)vPx;e}Y&N0UOMtyqI8Pp+ZG-*OmLA4$I^{!|{LlTJ?OUiP9VsRCh)X^5%A zOxZF-S|mi8b`$l>KYArx3?YBaC|Lsm-7`afZ!Kudp$t@kI+-b+zSX8~W@ETp{-i7f zlY;!Bx;WnYzH+J%4}FyV$pHhBiYeW`ChlWkzcDHI!IvGY&ad)~NrK&twwp=GHx2A; z!d9J`=9cxK!qV4L9S=3)QHrZcxALwmz zAD$VfjV#6U7)dCHCPHGUd4^glGFXl2aPjfZe?6#asa{BAgj>qQU1h#laqwPNN&x5z zEY9UOE2(8Jr5ve0F_e4O>RWCdt|HalUB7^Gd4viIo25OA+vP=RR=ibPgX*_8H!R zgFT1 z16ZW9Q~>{3<2oEza$azjl}djZ-`Wn0vr=G$zLoTH1v*DVPuq+(b3~< zO4jbX9VY|Q+xG6-Q%4f>q1834G~_eFli+)Ml}Z`bDh9NVF?0;z%gsL)0WLhYHa{9% z@7S@=iIQ;B&p&jbWf*#KB&118mpYxue{n{nBLG(oknj-!u;#9<(8iTx-W_EX;49A z1H-=Z^2uvaDHv-?ERDCoKrjR{wE$Qw0d5VOf7;%hpK~a$>W2Lp_S~FI5A@tGq_sua z&VSOphp{`*UpL0ztZZY)oASIXf&F45S(^wxIv56$5AL!)Zm+hm)g0Td6Y6g^)@c2e zD5GjdF2?`yndv=w<8nAaUM>-M2fkZU_zpx*MruC{*yhp+*Q3%wRRAc6kErh;N!JPn zz{Lk19@~qHeTO34JZ1X{>5TSRalG#qy#siUzg>$R(TPg{443&)i3Qz8+dPcX4#r5v}~e719FTzz2 zx8aIsFz&5iq*FY!i4sRfsUb*)kBEnya44}z#48`AV7xed7=ZF%Vj=X8cW*#vTBH|C zdk>hD%YtFHI%M#Z;x#dV`}YK>&#OQaXOpRGmZK$DIV2r>T=}@fNB;fAhTg zo0Ef9nkS6islf&=(9uHPBG5X738Dsk3^=cKe%=&*AhCOV_GVm`92$X@eCu*SL9>5! z$KNUB-O`!y@R6OL@E@(O&Px%}Lb06PZ<(&c7 z0&3~k@;>;H2@6BGlI%vb;CVY)Puu8!n*PJ{Bc*wL^aIC=`~J3SYjDrPYj&jF8%k6; zWPGm}%r**oi95NbS%iFYwR!?NK%L$vL)6ry7*36Q=3}sL9626p>ySrBmpZBtM+&+3 z`TV`7iZT2trrSn@sC!(#rCuaGYNgK$9}Vafl&X00R<@{MNr;DtMTHVbM1~jXQYbGD z(?2qV`Re{2F`JhT&mpw9D!@VoCFz{PT` z6iEohI@co(k*boBqCq`&p?fI68U8>4AV>Qho?d=$oMa=2nPhM)n#!ud5?ahf+gu5d z;Jj`-<$BYyTuy0DUv{vT`jF)gI2#rJG*Dl6wtSZ`_`sW5Oc~PhOkwZRR95= zv;7sV;6& z4Zk9!0^qmQH)SpT|79G4h2jL!+xagm4I%-DxzC3p$`WK2N4*FO@;*O2>35peOaJeNnIU3g z63cU?t{xZ5MU|Or2j7qdD;L@_4V`?l6eZB6V=O0)5Q~ftRaX-6HUMV-iwh`Qv&I`f~gM324@7 z_7l@7)!bFTmuprU$EA;#x3M)>JUR;>vuyI$HJ6VHDl7rj?`_2bnN3?6DT9Y;OQS)G_6Fs`)kb!TMi!!mj;OUIKk;#Xi$CU7 znMyq#?b@<>x0HFQB_cMqP?eo8cA0uAmlWP|5x9 z$T}x83gws{z|uWSRQ|p5%Xs&wAuem?WM%wC(yg-q8)DfV8HDnQqTo>{`T6k$ABxCX zdlgTR&DyaG{rhJkrMivI$`uQ>#3z(bmO;*lPB*jNR#_)0cVn)a+tU%s>6+En1Kn<| zuZ}K_zE|s4LU_WFwWeQu^s`(1d%ehZ8=JC1j;OmCf!!t;0Opced1Sqq?a31|Lu!Je}b@+QvQ-d}2aEO(U6q z6R*Abi{C_3r~7dz!WpH;4w`&4eHzS@yz35yF@;{K>2#(XHyH`9O@HuA%##A^3w*$b zi;-bKLelwQFZIrmVYy#H18Jj246bH-Q$u1Jf-A4JJ z`X>T+o*QbtE3dEXGZdmZ-J8MbCLi2C={HO&(^C2I=767A(!z(#_TuAmhh-9BsBc-E zo>naU_{R#=nwMdP13cG65MJw{SU=frtMhZChb=pW?5^vp+8#%j$;aa?TJ=d2>0<5z zDyfxS_f4EdgRlo3-_~M@F=D0}0TiqXoBBb+LceT#Tay#}2JvFV-HFz?! z+v~wV{h}wp8DpL8?)E?$H>ypiEFoUVQR>{tS69~w`-gX!HE7!3y!$=2kS9;oNbHFk zM=2%1p4Lq2`&HQDd)rB3te0{KmY0T<4;GocYBx!FoNJSGI{TG=ZYMkT?e8NN zzSNU{y*y)_AuYNdB}LCJdlbYOO|>FR7+3eh;f8dir_qVw^ttp@oob<3{s=M@V2 z2@)2^xa^sdMCHW8Ne4q#_xcKcM^pcx8HZ5>zUp7^DQQBTIH3;&Q$ia~uT}As=Po7R zNTysA_25*-GJVt8h#9H4FMroCIrEC0Mix`t*YWSA6j=(V-5s6;))OCty4Mo;#N#N1 zUaJVO^>I_>_8B)nzC$hP6V034|M=(pC;O>tx~@U$ur?-42Ax7BaE?9@Idl!^Ml~B`#`}(iL;m3A|TqjWBk+dmRx>OO^D2$B?Kpp+Qg3|W% zoJ$>#p%PXmr7;H7GfSm~*fiefq?^OKdH2m)yjak+)(4ZAP;qon;&MWx&Ls1Fl4p3I zZu>wF?@WF$Nz%@eXBEG-hmfd{Tvj?{G0F+6&D?+gOi!|ZKn}#lHkz1Y-e?{&D6xj;R)skvljXSA(YXY z7e8ObpGV`;A=z`O7C)_0PtQ=+Q6c7tiDCQv*2x_iosGeF>mloKs!?(V{60KCUN8Rc zeHP^ZRFMoV3$4O+`n3U-7rD(cKk^rc($Cn!HNEze?cJ-Rb1n_eMhfZp<-9V&02`Hx zmw7yyVoEw;6|YS^Y_@9!<+B-P9M_-{XNp@N`mP%Kt3SV!8@D|3$J0dC&VHg)MG=YQ z4Y!$?-jauaE6o0>ha)AOjE7p<6^+4D^H{>tyJvwqy{WODzCP(39%ZElED*of@M%Db zRnbAa=71zJ3Hprs5SmUAVrJTITgFuDDMZ9+l73-_NvqD&#?cdTW*c6c#j(WWFxr9O@;^3r|<)Ha~`HR?9Qh*$ME#p0ga{Og65s*uQA zOEK_nptbUQ>lh_Ul{n|(a9E~*Im(xoce5d`v@Yp%zW0(?4Ny$X{JVbfuXE(YsJW$& zR%uv?`5mHNujrg|zx9{U7@>YPYpoXfCF(981I8*xcvu{bWs8T9ZjB0+n9p&*dc2g_ zVgmINA%}Tu{X6>$@*=tDJqaM0hB+10TP^3BJh0|D36IUXSi#d7$FvwH7Q1VYP@77% ztIHEM9G%1&Tg6()=aTPLMR)6-&nrl?%C zdbfSsIE`&Iw(X>`-K4Q?yRmJzvD4VLRvI+6ZS&6l&))l-d!BngXJxI4cfNCsp9VVJ zH{93ISLjs!VibN)+4Qx}<$FyHo3de1sbevMv?%6-&lz9JLSUMaA3d9T}%5tT2CmE3A!D|3@IC z1aZV!9Se?ihi5WdA1~0jGqO#|G9ggifsNIvLOn-aX^;t0KNI7zxm1-3$>DWDytiHd z5`464Lb!Ta+Wr8$C6#QZkUL_u^59UAO*WD^e33etyF0}hQ~2~ikvpvf6Ql~!dA}DI z@!hfsP=u5IvYS_5>F@#9giJ>k@P;t5DVw#Xx`)Z`wC6u!vkdN+O2C`JoXoVmnasS* z^(INDl49X5PkIg~N+XVl%1NLSVylAFMN2G03+2 zw+ja&0`o#V8O-)->PgSp>i1ov_Qd}}?*#25JI9O)?lGx;~H{CHy zGK(oZOKo)bTq1@b!2+MM_4XJ7bkpv!EY!*3FSBdmHFAPszFv#QQZPAePN&(Th?-kt zG$5|be8zP3P-?UhrSJrOixcvxkap^r^0Jgq5JzeBA0}pl5=Jx}701DFh()@-M9F7% zZ(NyTZ_H>iP^r;QBLbXv&fhCOg-#vDKvw+_J2{DRnDk=ShOK%3VgQE<<`%00B@CS# z)o?JvB8f^V|6yUR5QrJ6w{EE&$v+#{7C?iWXZ&zaGNGlx2bh>~8Ok{#)=lH%+w^U9 z^hC)KlQk`#Ky|u z_Q6USA?_cl@S=teU2)~&5x3yn2gJ6so&=8>fOCg3ZkmLf@8JDq`sf{mfQLxWdy~YC zr%KCkno1o9yUbY$c~7jyDniZ%yeS}XAV68X(XOlC@~e#G*vxGgNSjUn@N|~F|IDw_ zXtoxZUMl(mvc7XX;;>`1nYC5cSz=WGN!)n00DmStiu03Y3VMI#v98(S z7^+OYgn^wzJSoJjE0J_a9@mP? z@AA{E#R~4?RZ92o`1g^A(OsFbd`B>Beb6tg`M6aIcP4$o}tc7Lm!)lSq* zfCrq64o~zEyI5-D>S6F$%55!Z04`OOUN50JW6#NB+MLN*H4%&w%z|JuiMZ{$UGlPh z@(ebsL9O8m4h)yA)OUN&#(Jf;FO%=`Tcb4OnQ*K7BW@O-4_vvms^#+(i&jtH*+TkQ@MERc>+j)=!E{Z;Q4>$MVMKHZ|A z$qVl0ed4tapU_%0*~X;{<*U;6lBqm>P}$=2-F9d!v)__;yYDS7xHwXrIQzuCQ7n;a zp$(uj6yX8bRfNN@-!0$u#E8?A5vur}w&c`pxT7pxIwvvdH5(&j&sV&Lkz68?Ka;o; zwvf!6I$;U5!lxI$&o*F%5l|BL1|dY=t!8bw?h|O=W%cBsaRO|QrMinBBhfo zeoTZ+D5GV6>vai;1j>fq(`%%TktdT(10Jn<=gV4_4zD3yL5*0T;JLHor<-xw9ge=S zI4yYZohlC_phfHrf{g^(v~3=hE-y5?aw48`#mJR6lA1)QPGjC9m^{*{h^KL75lHiH z_6D;KI-XBA((`OA9r6O^FUB+hqh5S?A|~mO=!o7v^2d6e{0IZ6im-&|Bo>gehwRrp zb0fP?+pCk4?#s9IqFEQe5*yF=VK4-VVJeVr(H5e0CA zh>eMVU?;{ozZV(h;dpuW4ZS|add`vVaFKXBteSi)cC z9{{ylVESv0;m^3~kQV@a;9q|s#s;(kQ~KVHYBOL^0QRWB=iq0|_6E!|^M1Hk!$vG4 zY}|bM6X|~WQkt7_v2*!F@Q~IdxI34a_<3FBUXXdy&ZEowH6XLeZsX?x_P zWfU&e=>deF zs+oUd0Xvl2L4|x3j3)U9dgxF01S_;-Kb!y*OA~Yg$%@@yK41N}K`t0c0TUbDl}Kqn zRMNXS9nwW|3?n$ENuJ2sGz$449LZx^6L;5ix^%VdUI#+&K)g;J7oNI;+)yOLQ#O%8 z9BPcVUkzoB5tUWLfbLecHu+q` z1T|GmeMP(NnE$fVgM;k|fF|@xOPuu-X-nd34$1E2&7#xEiqi)`JSiv%0xJ~0O0o!z z&ABi&SSi4f%H@I*(ajzsDnl z-=(2w9R~{?U9$%KR25o%djuPA{TClH=3%$#MbvF9VKva!fjJ-5(_7ZP?%wO-fOTc8By^|346pVaxw zDDKFpyZ46uvsY-a>TeGuXC3b@I$&ClgoH#&#o9VAGI_^69ncG{2IXSrS*!wX0pY$n{;AKJsw~JFFT);JP7yl;Sbr-fdV`6ZV(C2u3}~= zmPiS}59@6fOD5&csLjK zI+nZE8KuAIBF>DTGRg9l^g@f3V^)hZXhEY-DuH>-?0$iLP5?SBxI{?i6Ncq}zLb7t zAFHH_@MO5Jqc8qF9#n%o0D{%+gok~Ue}v4141>W{_S5p`Uz|Tq6#sWQK}ORl1+A<| zy-8?JiPEG}h~M5tB)~LWTs&L^;?7`m`|Y~oJ9>TJ#nUlJrN-lFsEGVCw#N)RJCecL zBwGxMkgMYIxFM#9Atame8~Qnsa!b28mH)ipvGWOdfd^Dp?GGTuHkr>9WUxsQcxq3s zC7PV`cw*ET4v7uzH4I>lV$~ll!1+MnT)(Yql~F%_g$erZDD`qnFb!9k(U|<|FQ?1m z1u1_f7}^l26ZhFiy#9FU6YU(XFU;}$*=2`W0m#2n0-09bT9~l$!-(A8J3%^C*kVhh zsnNhoLo@jk?J3r%9>i_NpA-m^XYNx_2Ae{Pm+?HcYNZA^|C1YvU_HIy@b7EccRNH4 z1(lztEaamgB-sJYSHXn+OMi!l7n)_rqF; zU+5k|MlFx>{)}0!*%yx|rsF>|Xe^A;4^Vb9t;cU%hT~kWS5!}kXTkm*p&L(&Z3|?B zyp8!rv5jo6fj3x4I(MyS@t*yQF3U<}atZKiN9xGiCD0KVH2(Q*3Q~u^%UBc0r5IkL z@HoBmYCwE~?cy3(?_BSKsy9d^G|A-wmAp+GKppOXr;;QcBYxICYKdSZ!lbBg-678Zy_y9wEQH4jmc9!p4l(+J2H4z!q7t6dFGKk z_fK?^Ys~S!gv&06w1c}?H@IR7%MJG*Xy&YgE=i#`XCjx~uOG^860O%7uX#&<9OEIt zOmY$p%ETMLC>E<;_SPcYS7r5Y_^v`edLbePkNNl4*t$(5EDj!vbcsu2F_K8_8tz*D z(Aa!&e}2F{)8>h3Z*j`gZh*oxi=Ec(@ow?^)nJwAAJ^=fDXZh_mB%0ldxaLk#FFbkA#F<6a>+X< zxeEV(VT`^70B}35yt>qA|X1ikM6Fh~ev>Fs%c|K!ID&J7FxUl=|n=jj$x`J8lc#P z06XTW64wp&-Wt*Z%YRH`HY9Pa;2I_&B!fTPj{-`2{Tn~fptEP1{O8iSW?hv_(nsnl z-S@ZcOrCaTeyS(1*VZfnw@-F!?Q?-w4*)V0mCg2vpXWBEprFVijP}WT66B*;E^X5+ z_}V!xDDM1a6}4aCdn@}9spAfnJ%*1w?p4oEhGPqwc+sxnr;W}&525#;9n8S=EPO#1 zc%zWF(BD75Y!6@dad~dRh$G^{p$4hS8rTFH$dks8FtBRa4ZfEC*EZxk7tgA#>HAcV zgG}L!U7%nU3bg9^l0Ak|cNCnqxUkz!thlKgTrabyn4$br%(wA%pf69)7_TSGF$ zn zHf(8hSyV<9UyEkhfu>DzT4UH9-tzGBcWZ))3yLRlq7srLhMTDaUZ_#XC1P_$-nNo= ztVf?--hAIgP@TwW=LUxz5=|D72nA9KaLlM;-t1_&p5qFB3XEOe;(hURzgZw)KWd6= zj7Y79^+}F`KqK>yN~PPP#IQ2sGBJ%35SH?Y-{= zhP$Ar6AJ2T?@T7bsnH~TG_%ZDzNEI!WPxN^Diq802Vtr~ArOUnHk3s;sev2xg@`2$ z#~1qT#C2X&?=GN_1f|+YW>I~_$NSTBHc@U5bl?Z9BAU!a>AH9~z>EM`Btm{umRhq3 zvQ9ygaOf}Lzq`!LZjKjWc|Ou%$l~uA`KtW%DTBx7fC74S(R15V6l!HrYBF1_@hsx7 z;>oMFpn3A({a_DNiZJL9YIqgHfd~U@l}L))p6hz2NnZ*afXellh|c9hD`nkJ-ZEsF z`7_NvGM?N2B>rHm}jeyv`IFeAjGQNzf(9Xpr>0sKi#qVem@?d6@tL?sUS*vdD?*^rA_LIWg z&ldo4c`#STYbuvP-}iWOCBvfhu5&s2O3lN03_s|!w>98|W%9-@Gq2wp7Jjf?!`Rw* z5@N6=5K_m+%XUgNGm(9P()(FGOCv0oiLh+0GNPq?|NE2Hyj@@w9-vq&MdP)#Hl9o- z7#PI_H#0ptOo?G0V9zAeG)$9@N@FgtynXr;r+{2*O=PAEda#2Ei6IJT-V*K!y(ii^ znUMG4A<6XNFwdX}?*aMn{*=z9(Qhk^7j(`Ok0eS&U)rQygCwhZCdX=q}v;FM|nX zbOv2@U!_dUnm;D&mTFYr%TpPL4+^h~^9^oMn_c>Nah!FKGs$?RLZrnQDbhmM8bC0y4>(&_7oB!Y@~lmKrXYnF4SG`D_E5li+;YKglUGIrj{$K7i5p; zH6v`<-`8SPjvUoakZXD0AjG84WDHl;y8Q6nqE)-a7pDRDD{F$T*@}F=$i(zQjage2 zNvzvBPKxRtIufICP%0X;i0I9!zz2%~aJXtJdsnWpUESQV{x8XH=W8TQKDzviTMmAf~_>{l(Botzslp-Sc}y zjeo5xCgsAkLvHt?Ca|{oMALEDXy98Rv+pQ2lYqsRF#t^`Msd2h3t)q?m}!x&Bna$+ z@3wp9eR&f4rQ$B-RsfdazyXf@0srWfRD*9hp0)-wt3&yDQ;6KV7vs z=t$I`E-BAt3&OOpgPXCiDug)WD79KLw__RuIoq~tohUM2=|%c@q8*M-q{h>D<+8fo zf0xS$%BJ(fwhx*MCd<({WZR%|?gLUm)P9tW&Sg49**h<%WmhtELX+Hv)VNHeprHm?h1s9 zb#;*Wzp;7xQIkVjxXqpBDV@(0EBwBXTJhRI=88=0&r#aM%3(6-)xT=qOyhA97;mE0 zpF=AK<=Ol5-FPN4oN7s<&S@10LK?!dP$*R4Z*7j45@KGXp|dE#r5)V z)2P}7-RV3BjJ@#-`1XHa)6th!#YKRfT=lx|Sj^-1p_g~|{WlhXc@}R(DNLeTGJ-TT zs}K-Yq1(d8(_llf?Y$kBwCmyVbi&Q28cOCJV0 zBxX+K$)@LOJ1@kheS#605m1nqLIu*&>uY8<7COD;d^y&yJle4c=X!bZy+Y)Cbik0njj48wdnGJ49w-3o^I&oH-j;1{gWAjmExdZGn zy|Jg+fRhR)<<6E*5kE1>!Ji_ABqbSNS_dwPtO2;-_9t57c^# z-Lnt~Vc=Y&UKPu1#|1*R(j61RQoCwAEX>Et1e8;1V(+o>C|E$QEr5aSQ!nL1?=v z@MI;&Jac06m^uz88_Z$;V1VjJpiikxL@MYL0q!YYlSHLBJG1W{mo|?d4lq3{4_2e_ zSAtE4>tPu@Enu>h7XAxbJ`noLV1Xivqt#gg)gn~GP*^dQVYWW&28gnNy0XoujHDBZ z1U-jI%}N{@6{TBxHIYfh>DcV%zZ9kOupe`NX;_L@>XKF(Ph&%UZEffsA7I!Dp=>C{ z1-O3yk|`ti$ZP7jMG7bl+n*;KTn@7sff5R6oJBtiTN@q>{6?%p%47j;!$vX81N~(J z@skV5@8Ybfe4Hx4<1K)zHVw#y?^nll=%8bA`Jndw^P}dgszgPcMi+1rZZeqdGRUNs z*{qNlT;^Qdy|=}h6Mh&ym?;-qAZXeG=oYi%i@o#t0vz=~^JN_$?L5QK4Yx`Zi^}YP zs-VPN*2cKXAjY^>7&J8y7kR_}NX+_>Xas9F^EA_CGGwZm;c|rzM3Esypi@!!HZzJw zhp5GA>epGWeoD7JvR6<>?Z_w^F;D0L)M9}QSw7Tg-kT45FEg_Ke2_|A>ZCg^!?9JT zUc=;PStfn8JPn5t(_5calRy1Y1msB8Yn`yRTbr{u!atDQaH+4IkAJHqoE6mt?8|C~ zWvCr*^z=PD7O}uCO_Prr{^@m*+_~+D=d*B(W8=)W8GoXJ&4AHH5j5srj$yr$)nYP# z(BMMAF+o4uH_sqc^hGt2LZwWS>E}Lny!rJ;Y%q|X>td;z!V1%(?e%zcn?_Ek5rip7 zriq0bSDDE!G9~iJRN#kHvHA;=QXv<7r1TI?eDvX`Jpb-^Pc}szBxjXwSP_ENm}X49 zQ7&n@Hig1_HJR&)Mjm53XBjuFBZng&9yjSP{Sg}Cc$t8Sk_|W{vnW^V8QB$Dmp-gS zAId|&>pS{9vB)Dh+P894MqPY&cW*Y_>?U_e@#tJbbe}o8@YO)x`Fy)r#02~uc-=GB zQGQu&4jhCuchD!p#aYZ(K0DRT2AtJ`4k_!p&)yX~%Dn@!XUCze?Yi!|TNF)+bq=8j z2f_8FDm7t5J6-XCip166&Cx&E#5_WpS+>BIGPTGU|cH|75I;D!5bCnw7-}I4FMX{jhsF#vme9( z59_DLO}i)jsGioN(=B6)g{~&E7Ru0wj|9JqwD6iM)`rQP&*n7~f&jqkqZu(zqaDTe zAzSK>hHp}(GA28&>QE=av%}_!n3Q<@LS7|e$I@8s&7wbl6yb|y|l1HYXy+= zcou2hxK=^ZCXe9;aF~3Hf;(p!8?~gmg{g9Tci@c6-VM_r-LNRHQ%mWD$;i7 zduU7tFR@q|I%eG+?lc-Ver>)^tZNt!FNJ(A7ea28QlrIOzoz+2142Y9Jp&WpLymTml20-+HaPaAiE;#bS94;vK>ZKVmI#XwD9?AU&>q@byP>dg?F&db6=LF z@81pR1W_1$N%R_a^WDGg(+=cP@i+TPfX1T;D061F#%Bpa83P@MPd@@9CE%9X4oxyz zX1=^=D5QS=t5XaIWNV~;n+ba~0Xq$YmB(K%j&_~wEP!%yuwo>?W57<(St_ON`N73{ z!R3>fbQ0@ccrFucj-ZzzJitN$`9)l$l2iWo1C(Dj7&`m^r zDeSyT6^gL@Bg-}Q#GR612!xwtVO8D-(^YLv&QPe7(!GHWjk)md+}q-rLfTT4o*FQq%XKr5&>$nSn5G?+y9Z8Xf}TcTzQ%OzHf-DH=|=s8kM z0)_l1kh#y!?N%8g{}Q&|O@FEeKgRYdAaXDn@rc+@vQT#oH_iqa9ExozeJ%33l(z&i znS9IOqXhfOgE-7Fb$a31k}2bFFYd6r@jE3qRFsmW_P0+6WC|%ep&Z(LEl8;f0BHzl zM8Zg5z-T(X-u(f=uz4>(f@p?XMMElk-NAOH zJ=$861-H)Cav6UmGx_Ri3PToiTW=hDD*CYV_U8N#Y%HN*PyHfXl%^z)BgfumK4r8+ ztJ&DyDwED#$V$&jSQJ?X_?6e34uk82b4d@MVFJP?ROriJqz)d zn>4q0&5ks1?kej8Z#h{cIyUlc`4T*BJri9!&37;@Yu^gb(i>Wa?O5{FE4xll8nRL#FLi4J3&u z%%nLrwK8t!47OwfAGJKbK#W(@ISWi@N$A}Ps_BPRNkP9mk-}`$Af3;bS)V^Bb%zvrTOUqtCbH?OQd2#F#Hn#b4TNb$>U0 zJ}SngGKAeE%Pu=!MPk&c|D{dgRAi=6&SyM>Ps}mB$O5;*ro!Rqfco-z_KLj>ZHa&B z=0i2-=o1Fs#mR_@MWFm(MaVhIS@Cj%AJ)ZyGPkv(q2j$T4Eg8}#DT#%sPpyCd?4fH z!+Mu6IyS)Zb~B=c(ZPflD-b@H-2aJ`+^9{k`U*d`;G=pc)*GT08BhLvzY9+%n4TQH zGOq%R=wxx)DxtkQUxNbll;gSVu#7o=@4dgT7u@M(1U&6?3CUx5IjZ}%JZ6yDzro`a z_3=w;x^9t$FftAvUR@2a7D&IoQUDXFB!a*DksxzBqVxGY?G#hYw^oQZjxTl`+D}gG z1jNlq$=QWdd;}?z86->Jq*wBC1d3Kj)!qzZ1rZz@@X*G=Hl42}Fn*1|B*zU@MkSYp zPmzH|qo{~_M%xFVxwIyCju0g5V3feYfU&m6EyAT8M{B}ZeBS;*rL<1{Lq5@KlFcYs z)0`xZ(r$hk7nY=vCfoI$k7NdgLjgc}1!u`pcHez=z}j4VGxHf`u}+1^^KxSo++#L7 zNte|sHk*$mKaATUihz~lEDTzlWjYnun%=Fd7rWVj;)Pu5*HD-(Huxh+r3|I*_ukZR z@j~Fw+FL$W&_|w@qvTiwaAcpX*#?u9xl^8EtXuPQ&h2v79DE)fW=t}K zNg$PI9KRCd_1*-HL8sd55AiG%)yH$U44KaY@5zJ%EnNs`G~Ev0i9mD)sTeFyM4<2f z+~_@QktUm`NZffOpwtjmCjpEI4M zpl^_V4OSc^?r1#&Y3F-s#NANo|BK7*Ew9PaLA+9-jky@beqz?YSKZbhFiM+3vLmv3*UF2m!QB`<%~ z`Ow{yzl9rtm|HbMmwsZ*amG1j73p|5>kIF2^m+Rb2%yCH*pyeI*ht2gv)WrFM|<&f zi)Sy+;X+0BM$i94|C=Ow3_H@9&wEZZ!(cE)03DyhH5XMIgafr9kPU15+CVX>SoA%WZ&i!+a`^*&~4xi6M|01xfob`ef1mVbX0 zmK)b#j+><^ML*Megx)hzPl)s3xk>I{T}U^&P)9ym!AcG%`=l8n@SN@}@mY42N6V7~h4TCZ7wBy%cMX^W|ZyASfJl7}z8XdPCyxyd9gza{$4fd_<_) zRGx%{Gf~x4iEiE6qa&rR6?$iV1mmwfgoq-X!SGZ@-2kt`oK`1PC-n);{W}7#?KB?r z9Iu3~8(Kg!XsD_K{^_G2aW_sTV0URsBTXl!bF*FjK3}bCxQp615iB;4D+-8>0I!T* z$ns+1!FmTfhn+_FT=w%DS?Dg=UbRY19I>Ui{woMgGK&lccb+O3_^8KN9EFc~63tgN zWcpS6yIJPT_r_E3e`5i&Q{dh>Gv9Sj6}v3(>=f2JOxWO7+MM7@XK->r>Exw9M7r0J zhcl@0nz0H>dwAW>zcU_fYIR7p$J1E@kWXnu^-GqLCeT(D$R#3bsh3lDHrY5 ztyc-V62+fFEOv7*IQp92r6|XV_JbT&OIu z5q@l+#MH6nA-Rl?sMY|VU*q{oVK1ocV~TGR$42(kl;*tMlNQ{LaqN(o{1IPw9=c_H{#tt-w^ViW=g)0a-x_&bPq{C?{E@Ub zo6ymYT0~N!@*_m20a7=yI^#W0@c1Y#tW`#TITEM-67+l0K6{kp!@t_^1x@hwXy#=R z0J1;*@oejdQx@9;4Jr4k^(Mso^OYgM(UvYY8UabW!-0%7snefh;}u7%$51elre5SQ zM;KmD8lYOPn+KJn<$jofNrSFIsCPrgn$B$pLG5+@M8DGP2@Y&o;)zFcO10jw1r4l8 z21T~;SCDdKPLKRHQEZn$#Hyf_x=M@Redp|kZ<2QBYOM;K)Sv?tjhU9Cf?AAG_LFmh z=;a)O7N9zIvt1LupW_$mok&t`V>q(5d|ym7LzSS#qm{+w*4((%K5OFv%t3K5vPL3z zt%8nNUGKJ=qXXp{d|u2@_t;UspjTv`8@I9$Rx`QN83jW|r#snc4n6(;JEyD-!dh=7 z)ceTfXf`MB-Y+w3Ej!u!>8HDH(*UNnu`-Dxfrw40nwfr;x&d5{9;*N{#+s27T3YW` zacPkHxxH&WSyqT2_t^t|wn`CM&5}bj+A~J^>x{3?*@RW9 zkM=ZUG!>8)iUhM@=GF~NWGo<;W7STbSmG4_S?i(H=m~4Tc-vkYsh`Ra$Vq_n5Pbdi z%4>}2W78W(4TDBO$J2Bg0@Z*UPSg@Y^sQ*oO07H)Q|?>7TCu5Q<|-7V^sac)uoBa+ zNyXBl@Eity%&gUC*U{H66Nxu`+O;~h$|z|OV<|2hk^VUy?jdjdP}p?|Kgg%J4YPi( z04j{{bxgFgbO_58>Lu!3BxPUA!k7LW;atb5vNg!P4~*@{QyCbJXK`V%?kl9)jH+>! zlVycdvRY*ff_n#Y+GLDP@Nz0P7$VG8|50S1Yy!n$m)4REHR@1l+g@_rI|Ahvdgi~4vD@(R~Nm08)8#=X}*kW(?HyzOM>igkQiEH z@kpD2$wEeoVcC(6&fA3}HVPRBo5$V21V@vQ8;DZ^k7sdWVY5_2TIIP(+83H>Pu1yk z7Sl%AHWm+M`-SHE&{t@5WCky_GOU1{Wys z35JB8pJfS4xSwmTM7fRH)7cgr)uR;`wotYuqFWWzjH6y?y->fZ#pmMgbLbI?Mg4(6 zicv9-g=9d^UutIGaAak~(Ao=nXilfU`HvcdLk@=l1RxA2g{9k*OCu(%nBEoLQq{Z= z=3BrP_|>FM(&@aW>U_#Cc_yRTXz2s*as! z@uQIC{2I7E-i!$?@!8l@zU!@Y8ovoVn_Qwuho{`~n>}J`<+o74t;p;&ZSH(9r?5r9 z^9_~FYFW)tw^>Nn^p+x{NNVeAb+PYwT-~o(BQLDU^e6{3v+rKX6!jx?_w2jya-|n= z5;OIB<}YdERy5X8#^&UM;=0G}>fWssKy*Z)#d|a|QcjE%-;5b!82%zlzETJ7?{~Hd z#n6g$)*r?-BELp9h%fb84e32yY$*TSbPlCg0)8pU#2z6!pyv6SH}y;Gi@mtPh<0Jz zB!i&jxXaNpEM;s~KY+7su#LjPqLK?$f97X)9p<_?T^oPco4_-!8-+@cgN)ddD9lJk zW>uGs%Ha5v4;3S_SD^VW@(#*!i+(u91UHj zhq&o%CY#2{Zd+Nj4T0KQ0={!xWKubzMWKiUxHChOAC`4W70H_`mcq`!+s62{3j@02 zBp3)d8x-DCT(r7(H^7JB`!72C$lq{KO+?>8@>lrwRv-4*v2rhd=r*+e0oW3y#?<{* z5&sA-r!6`_=^O*-DU^2ix75~iX+0ENR!<|Mq*x3TV}&?tcrx=VTMfX{wc>= zX!4?!PAav|Dd_$5(NFA$V++kfhaVO&X7+O`_qxD4auJK!6_`Cop{{w3_M>sD0zfPb z8IcIX$Dne)KZV>!ZI2~58c#@7b8gBIdAc5vrYc;Uh?pVi^!DP&>{bZUh=(Byr)2WW zAVPJ(n%NayF7@`Z2~Qd=5w7a#A=lHL0|NWnRnJa9V-w);r9T#Wa3GujOZ&>_TNHoA zd?6ZV_bb0e=ApMkobAj_8hzc*oPkft-H)VSyZN~Ro+|oP3n`N5fn%IzjD6xumx4R% zgf`$~LF1hL$y_qY)RN^rGkSV8%r6-UuoGiUZd(x!4ue75%CV;{u1|=A2)SW^y(_Q5 zh}Z5&?ugfL=&56E>&5Xcy~cQorP+In0ucH$+`C5;F@DVNX(d|lMQ_u-8|zRij?JYX zaiU0}Rr`n)t2flx&}PDa>v`GQLJY!kq2l9wZo~)Xem?(lOQcl~ztqty>Y6i2CW|gD zCOaTS2K>*!Oz00;(Ia!{)M;2bc0@3a-BZa00& zn5R&^A>#M4*ZUE*MYZ;!A&Z{5@NdP?1bP{994t zts(V6{FEvw3P*~QiED{&eh&`;b;cY>hIvYh(Yv&>z&7t5He@ti+P^q!PE)cKXP!}} zX&2dyhOQ*a8QQlx0|enGJYNaXXhMuS_3{2jBds2jVHr-wkM~C7mbn=FL@oY|y`MQS ztW=XWcXYjkT#sf@s<4nYx`SUg2os)A50x>bw;rO6G}_D%5OoBo!rAxi=GK%@EpJpF z67uIi}a{>NW?iJ+s?K=pE`LXr(-H)8Cm)vB2`@ zvfa~&TQR{m>pV(llj}Q4uKP9h2hv*afaxc><`DVv@m*E&X=aUe2w6%xWvQBSILc{S zL}#%n%ds)wQh8hPHb;xm=8wuViC`&52aU5&G`Vk+%-X3d6iscmu*z}X&ReQR(riT5 zYY1+2y*_Z9@HEVj$g95L5XzcmN?FekE{{Fc%I}t?KrYJ>=uDf*QZ5tHy2|oPzxcv4 zZrARN8%8}Ro86TyE;tzS-nE=2lCjTh{8<|EyF&ngb$ls<~+c+(TJWU*zJBZ{J;FIV-GDK7V6L>;|47BjvM;x}%fY9VU zZZOP#vo}zb6u>_W`=b!f&~TXz$pqDW4%;ovSMo@=N?$Kuf-;rwc)_%b)_%Df(R~wG zKTiiImgt8;**)n#n96%zdg=`HnyNQ&(mQ^SA%Rr^;hVhPyky|2gZ_;Lbn;tfK$RQM zZH&cwXAV_+_S5=&58DfzPkwn{P}9j;S`ZR8Ip(ONcbY6b$(T)x z>)>UVd)Bs`I$f~9Xd3%?EHU~^slOUIy>9z`+ zPw-IBt&*CH9ouM0E%P9}W@6rKnb}_42kZf0+3j=Ne*4%%RjjDSZaI`!XDC36O?L2R3-I`q!XnXh&ZD;?-Sc2@X2dEZis>b zaeI_!jLWNwHdD1C1T&!{g;D*%)Oeyd#$S(c0&IfnjCX89HM|#fjc@%BqG}~wiIqNf-`sKrvN4035@nW>`051UBDU-CI5!yJEO zGaR7|<7|YOD&1>D$m0YR{JYk804GosA&-EN-}vjTcqv#sldv~k20!d%&TF#>8CADR zr?Z27i<5qZRg87IQknZ^w+6c!B^pGYMC>QqF67VN5Qz)l?;UeB?mFBZwjX2+R-Cvh zNdq96N3md|)3%}j$ZON>tvQRY#pP;!1b4&Zgk|}&-lq3Q-58o~KNRA_QO)5^(x7*h zKHt%I5pT{q#{LMjF@?t@#|v@5v)J?eQZ@(q0StvyJV9m$yxTdBO;a41zn@G}U001~ z_;a^roQZT2oa3!J2I2~gJsebMmdBeXdCN`?tMj{OPnl38kpKCy7!U87!~NWM*sxWP zK5TO6x0ja=;HuV0nBl))i@&5WD-#`5g z!TBFvu>bhK0PouiknIZpufYyTYIwju|Ch(?e||yl)SuryzESp-v2zP?%dBm9RGC|K;+#<>9V=~Bu0G<{t&qB&1kxlFAmzG4ftCkx;}}G5lzvpI?QCRX$e-pI}Un zEiSf%a}lBp@op`6FZ?o)-+GWAHrphc9PjN?mvmzEv)Nn+kUcs4)!-BKE6rgHc9~=t z>c1B5|J(rN2V}`(m|}>N_ziouiGI;20UqY>zx=V#yb+i z>aG}Mrgp-jgqX#bW1&by6hTNtYibW&4k;yJ>$KCEFah;W#q!xXk?xNu1t#d5nsi~a zG3c9#&&8CzT$l5I6cLxaehy&~sjTvsY0T#TYPu3Ok& zhYjLW)bSG|u2>`U|H)za&qalygDLS$BSt1QS=EZ);!f+Qs{X&%iIBZG@d9#wK*2bH z^h>xAy|CyUCMA7nTxGz8_cHKnsd}B>QWU*`!DQ%%tP)Y!Qwk5!xquw~4&{;;!k|@X zn2sXsLcn31qQGx5hhHKDk-)R$0_k8Xe+cR)tI)&c-RW}L7WVD-P%N2YFcx>PlhAB_ z9G3uUYlqmHRj;2^STgcIK`w7NU}gIMuSNW?B_;^gX=U*1v$&X3B_xifE^@qP@I|4F z6-12%r|Ec_2C%BvR{;~kGf8I`vewTwZAs?OgOflMlx0M>_-Et-3mS))>GH3P3F&02 zSP@Ku>A`D!o*@W$>e~40K>&Hxz}1NgapLuPGiY6X)+m;WH}~Qk84QI;82?oC1;as= z=>Kb=q9{jUobwN$ft6u$G0b8J^|c8jcP9Ek!;W$#63i5w^J{iF!+a zEe3QGAd=Iyj%Ts>6rFyDbbb=?sNwXoYS9-5lxl(h-0=T;-2J|PA1(Xm*#txN=W@;N z;SUwV@d_&W`_%otpb$n7!0t=~Legu$QH)E<>80cxWFbqA;5TR z)_u%?!*df$%Qfy5H&EK+MVZ%LFeJdK{yRYlP?JG^EG+-$$o}(S0Rt}p>5K)z&oHGD z!GEqZaytr)asR=&|7yNO?4?LT+8*X6UBC-nAJ=9zmE`yW8KsyudI(N+Br$s;%oR(_ z?{KZu<*`br0a!x7IM*1>T>9f*G8uCjWEv zLiVCU4O0J{*NfNRD|&>74H^FUEiRD~^78-C^_EdpwQbn$BO%@09nvKo0@59l0@B^m zB_W7(OG|?yozmUi-MQ#)zMD7pe#f`R{^<`Ku-062&HK8}^EfQ|f2*54o~%Kef4KrY znRaH;=w)1fIT(K{uv-H6FfykW)#R}OjLsd#=G8vNUU9>A5e=#{-MM0`~P~s{ZakV#lxSFJw#JHbERV_zmvb`tK`Zjmq>kE z@W%hSE&luS`R|uQS>oN)3$YO1?1fGDBVBolxP|KEaDivB#S4iw!&wTpW#>7S4@`0b z7kEn7AW5u#m0H@k1JoKEt4gScVs~e zFDK7`oS?Fg`prhgUcSj8-bM3wR8Sci8CvD;+!b@(l3B!W3^Q3?eV<)0n)tg_flX?Q z735E2snfuOdbn~at`l~L<2dUQHE7?L{MLg@66I)%xi6NaN&Z?2E%&Gc&TTAL!_q{C z0IQ35oZL1R-r`sbt5m-!dat-Pqvyf>t`lRDDRV_v@7%n+op|yX0E(jw2EvWL=Y%&zU+=QR!%;0A zPtR`2xFFw!GX;Ibejq~pR!4<%SRN+_(&$JyzL#gqE}hNRyEU*c@njg&MNwKeBR8u& zGN-?EwrP#WSNb6{@`nx8-R&)GB@cg`9_D@XO3_C#dUdOys^~*~qifQ=F_lA5W!b^viTSCXdhTICzrIla$QAq5w@JSCF;*?B*P=>L*z^CZnWZfr~xL|G{R zfuWR$q#S5*f2IN_SJ>K0y0Io(DogX^0pH~AzwAc6yO!M!cj4OjU$+lEdp$|P#4V?Q zsdckle6@mmwPn;~t=ZuYCQzN!lR10U3Jr}!(WF+Z@Oi|DGh8l`^Faf|^K9Iih{^hc$_?Hhq17XsM55gR;_hC6*Ey(yfGI#bptErX<&L3F~N z-Vf&Jg#j<1hfECkE$3KSP`=ZLk+xt-;Uu_Z!1E8(qeMQKBBe&r+JI=X_PY<46svOJ zDUu5H?vk0*scC*b6hH=8MX6mjLv$)L8{V3u?*PuSI@WOy8f=v7G}6>GR#h(zwgAvz z`}?sEG#ji9i{1G*-w!Mo`L)NWoiwDVNC-O;SBUQF5l z&H`xs2O2xNhvT<6SzP`i<8zu)tkcOeg--AWyfba0uOi^oATAnkm@Qkw|4Oft-Rr5! zBUR!}uTsMSs*RKr^67FrBiZ7WI!>+(r%fJ@_iX7 zN0b@!d`l$V1R@0M<@Z#3BNj;rF_e;GlNveMcW%y+eo6HtDDoI`)do8LZ6Ym+9fGm}-2cfVWNaPp{aoT8S3J-v ztC8JS0PxK>sZ;ek|L~&i2O^rvk%!mO8F|-6{u!;6l5*HK0ZhzjdPDCw?AcP{K__c@ znU0eQl0jIkcErkBiuW3k4t{6Ycr1D#=pgdKQ}8>L{brwzaS2-Qrrr-X2>2-hMw+Ewyovd|af#Ge%(Btcf_aKE5ZflZi!DSfW-pw2AGFQLzg*tJ; z?T~Sy^G>hXQ;&8;y9}AyFtuu*npD*-&M~bo<&7o=A7d@xaSF%w{1Lcn4=Sou$(45* zG-woCJok}|@)b8}w(*erwAb%4PMV%zt=C$Y1}kg4HvSXgQaAy)DAXwC=l-)~ql|qtasbAgw3pWK#5~2H zG3HxUW^$Y)8ErY%2Xb0d=jN;?{1B-JJ~da1W5e!y#+F2?h&Q0L@&!H!@!djyl^A&|dY$hf z=IaZakYCkAVX1>hTD)Gd)Em4F6)Kmi@T}59$zdO(OO+A0iu5FrmxEckn#7bc1rr_X z&+(>mPHxZX9F6Y?{&Xr>WM$_?oVJ{~d%H)Wi5(%un)W?d#(K*q^Dm*zT^%le$24UK zq&`3krEE7HQvJ~%6BwMThM9osfkg3rCcLK9pt<*N{y;Q=PbXfXqBkOz$Roga$xO%H z$CCo=3*42BlrRz}*S(ANn~Yc;_I;1z#RpRW(=~VaNYb2OXh{u8*R`>4IkD z=>j!{(6L>dxVG=Ut6PqiS~K$HBu)G8TbI*Q*`3cUJ|D27?{S)S&8|HU^8b?n4{Khe;!ioZh-Ie#U86KocXiH59NZht5jjdb|=^6i^Hq zFM$JCx^F7aC_QUktAbCbdkO`duo#mtx1AQL6jN>FI>j zXR8UEQwam^+Jzcxq|R^*Eaw82pa>V)3&K%_D_u3AbJy?;HmDayFQ}W3)|F7ryV{0! z=x@B!B|BL1b3kM6>WCrMJ{)@&J~z;%)x$AfF!W`kAQD0BiY<_Q7`Z?xPCwWs#1=gK zP$ib=1O>YgB2}CtFibKw0Rl6Txpi^^%I9d) z0}Vd^nMjfK(MQZno0&W`)p@P9HsJ zCNj10+NIm|GW6nK=NU$r#fIm9+>Vx$)^HF1Zrlwm%+`6nI&d}-9Y7U=c>^rzPCtso zA|&C7xRGC9A4jGKRGFm#8A0SfNt&9)Dj0jPp@lukq*(B(jvf6d9N*;zmJ}OgD5EeI zxb37GVNT2ac7&sgt!HwW!9FMCIry3G=(DL*48R3?jl-{`D`Mvzws{$%WviXfQY=ye-d)fcG>Yh=+tVN&Chj zkzI3fJEQs5L_tV+sGdvB-nWPu;*k*bhkcwUO48i8A7+nMJSATZpHaR-NHsxq@UgZ( z%N78tzPt>#Uxk)b9Jav!(6*kb7GY5T6Bs-+PbI#6Q0IJ3h)OmaD1~-x_;5kfYnBN| z@)s?-zAmcZ!kzCZid5DB)P}ZBhyOsNGr3|_2&b5604vp`Ce&YkdQ6$V0Y#D6rE7KU zg*CfNR$&JoeufnvDcTqV?xRu=FOxeTC2}OgL`&nreTxXv$A8a7rNNNOn^x54=|pS0 zB>R=dm8Sm#`xu*hrjQR5W=|+gABRJyN!8xsOf>n&JL^g3_vyjRF~Z`wi+_D3I-w4` z>UWn%2&TDfq#3{ETL+X;$PB$7&QGV1-v8)p?)+QxQD`Xe{C(C%#p9+?TD2ndB;{zdAKrw?&mm$F-fl4zRyP5W&I za4ihe1z}~e--M2VmQtLa*=)W%^JPgGJ7oKiXdUj^HrzRb`Vjd*NIm~{6BMCww9oRB zRKe*#I`?3{IYb=|iywKKY5B`lHcLnpDU5l!Q|6fH@Z*+^?T<$nn}Z0>OZ@kRmoMjmr$5mrGFRD-;Y+|02>o=^bYKi*U2m&}tQkYq8uPku)|D>?yI6mL`4!>f z#5dvm?wF-er<`ejrX(2PG>k6)DmY0pFYxM5l>eym$>R%k2AJaD#G>;8E~UDK_XhyTukg(}f zR#N#eOBI0&&g1(gz3MMEz>I@&XW8+^qJ^NW5LET145)~{`u-bF~?h&HwJW(+%8&u;4Stj|W8=4w_l(tI1uB06nOD(ZgZnSY!FVrX9`kl~m6(L}9D?BIQvm6oHQ9u1L0scZI3dg6Wjxsh1#;^xtjfG(4Z+%hw~7nf;-dbCbDej4)8h{>rb+ zw1Fxfh7Ahb**O!zk(=rC>p2G6(UPR>!e^s0?!`ql3UERuACqcfaonJb)<2?=Ra&R+ zE8jRVed7YN*TcKgeUv8ZmmPTWlksmT+Bl22p%|Tzd6vhCXl?RIFRTVXwSIKm0s$9kTP$QW_ z?E3)hV|pPKK`L%E{0*E516jXP?%l0OLlX?S%N{*QY&vn>0lPv7Q$X*}%=L*HD0#Ub z^ayC(&bNCTbT$ihuczOr4Y>2?*Wd#h^9h1~RgEQJDNt1xX)q|yYN_7M^j10rRWQAFOiPsjpZX)rSbcjfm zZk7Fab?w+38OU9DtkcDfVHvRxpC6%$Qu1n`@vQMA3f*;sVeP;xncWCtk&jl+w{D%^ zVn!Ps8Ev#Hv3jmEL;?Dh-TCP=)h#_v6Y+4>paBEV_?EosB6lle--4(;oQx5zL+IZp zsKP0TWQ%BW{0Q)*OpZ=I+N5Tk##=U(nyZvb_>pvY-;q^?G>%Ro0D#D3Ew^T)Rhkojo1@&? z>G2{S^=LQCpOT$PMq(or4A)WUK56m1ViBUAiEr)W~j@9QV)G^*_)_urlp)c>tw&wxy_`(Cb>z0Yk%8U-!DLA=B23#fcB0m}ji!xzMRC>5P7Q6g~@ z33X`lFwOoR7W_j{u|fH(kqrDA=(9f3P!-G{gK`vCf~v9_^sq@&L|unG!bmR%iEm7}wM-6@HCncj1(+vAe zW92tr-zTxAiFY&c5pW@1&xI4yFrYRN9=X&t3IU2R{YK1u3$&vZ z*kL=9WZ}IVZ2MGNp`+zyf4J*oX`JC!6Ayd+rg*~@{&J(P#6x5K%>6d)j4|WR0LYwI z?4|-8d~gOyO27bxRwT{f@#5deS3>gvoRT9;zF~dN8Y)xS(Yd}4C(Z1K+Y!GdTk3np zix;G0a#owkTU?gtj-p&j`59n*zrS1LJ!NxQl#f={t+(U*rCxZY=0l77MvK^5Ph;b9 zfrmb=h5kkBGKXZAZ9&n2_6S-r8_R)S>IvCcAspny3h*MtQT(6np<@Hhy8^MBu|waJ zP1Gsvg5LAb6=`A2AhHb~%r%xQ7`c?ze3e2dW%?0iX z4-%9CvbnC8v@Z|v&c_?#Ex86Clp(;J>)V;4K zs&Qw+nv8ExCUmo0BEWNuKhS4Dok`6(!=%kTqR`xa8*VA=ku4F082j6k1l52q{3`^H z+}RZx7j0fA{j)=Y09> zo4-q__X~TMXj5+j3&!3w&G0Rso{#=;r1hC><5F=ch$k(Jmw_#y&2pJ)^N%1_$#tZ! z;OM1*s$#uU7bw0{gU_- z5c`riVmdctU8dL0lSYIS&(FG?le?3%Qt}Pk4rDk6iEQ@R9qW4b=eLkv$i}ynztVN5 zQ*)o@tuq_`(O^vv@p7|kKRl2r))s!ANsbQ|Yen);n>!e*^5rC8*bB48KjYsF39FMt zLa2xoQAXNirti2VMwBM1ciQfj9m6|bZh#)vpk8CZ8YF)DLS5f)x}wqrO|akOb*(Y{ zx<6)0pXFM=%0^iD+f(zqk1J+#euUw8Jk9cctleLW1MsQ)cy+_AKmO_zu9Fgsr&#;J z?V?juAON6D(aVjfw?8cjx0wNlZAGx<**y`17S&)ko<@zk{+i)%rNIxcI#s=+jjH4( zS;&sPb?d@=4iiup?p-RWJqN&HjP$YLr&x0TS)Ph#ic(LSQ5v0x$MhjG*Qtotnmm{F z3_b{{N~nmMjbN;4mKqpBhM8mG3rOrz+bg(jonMuw)S95?KX4WT z%XX^u2l>P*11YM+{c^Zj^G{VCwFy=9 zkTB6c4^MIM8=b$qtZfdxNl3cW_7ctQ9$VqZjbG1t%fz1+IqZiQ`N~}PbR1j!$aY5b zTlq!n`e8Uj?t@x<&T;2RDnh-!(5ptVYOOyiY1|i+&(+&Jah$-)$nCIj4m_TJTD%Dk zA@r_S-abgC?yLLMEmn`6c6EI`&x+79xz~7{^>Fre?NzAw(?=&8Jua_urrz_^0zn<8 z8?gfUISxZGak>E$^rMxgDB2IHtG)hIH_1&&6~S@K&P z;POiB=Z@I$_$+Yvx|Y)pJ)uWTRHN2h;Fshj3rAKCZ2fSyHB1!j?hzol|UxbE**X4J?O$HW@3JRDJ&r2#@O*p&j^qj-+C` zvs9CUE%}z)D!3hK%r)B4rLc%eRX<8BUkhv@n)}x;Hant#U}k$b;5zl~kzfxdiXEBH z7sw&0|6YQ>Ma02d(G7mUjkss$_*i#hu`+}(w4WFP)|*v!A90rMP?KCv`qW>Gb~|v} zED!d5N@A+0%Zg$OT@+4F9rzogJbs=CsKwrAI)fAuov9uQ6K{uhYAjM-H-GL^MCbhK zoF9r_wK3yZP5>((qEX;b4c3}`9i))QapH+MNM z!JXu5vwy^uHfu&0FF(_P@lABupf-Kj$7QlNs?>Z`HC`<&*%rs97W3C9Yy}v+j{Eo= z-&}#)=pO=36Uj^2wU2XBJ1`XU~!) zbr)5CdIri$rwi`6^5$b3EU^`}hrP^QO>w>eK7+!r&iEZTgIE zF7~igXSc+@5~9$o6&G9{&66@}G>}VXbL|RP!0=Wkx?B|z4`-9CPeL#De>oI%IH8}l zQ@GccTB_j>W`e0}F&!J1c)2=>5n>a=2Uy^hNr8Bx587#Sm?i4avkDX$_mnBk5rBVD z>?ns|8q9`!TouOp$~m3p_&-g|5A_01-#pI|`Elv|wM_^`Q!r(!i|YyOdupV&Lub`i zq(sbr*PixAn>)(NtJ_-Hq1it^(DtWsA#qUme|t(#AI;MAn%${Ra+xM+7<8pq<`O|P zWnoIXJ{OfL<}DvBb@sDZH<;N_ncyPLbr_jc>rtcvHpuuz$KmS#&H_dZUJ+ExW~mOz zI=wDXx*9i=4qJqE&QVF|UVSRWpp=ZS`COp>HFjLL#7f(P%Z;n@ZWOl`+d{dVqC(rW z!${OAGKC)#%!MT5rX$LYCr|-e3ZIO5(?w_5u)i(a^NrNTXD5hde*!=~F< zbPlVOdX;{OyAxf?_TpnXG@D5c&?K)S*RC%xZ-fbLG-@RKEKp_2QUrS{W!n(0UG8{H za3JDBv^Wv(@-kAjahp!7FEYZOr{9<0+KuV_?a@H$c~T%%b2&@nHmKK()59tx`sejf zp0KTYp^ntu-k?Ffo>7Sg_E{Rz${WN zh~ozqq?^~TFhe)e>24))fFSRM<_;iq;zb7O_ln;g!gtwCZibr#&X^&Z5-o!D&&0)@ zZ>>`~s^xV7irz}Si23oejzCREav)y0{kiC1W;2X)0aLth{%M{_0cu)LGjm9pQyc$B zUQZlLTffE<%S!Rz!-GcHEM4X#E>1C~`k0UQ3-e37b&vR{VNcx^5d@8b;OTOB6Lz)Y zDXgb7$KcwZw`zm;^)*Q_v9@zRzjA`! z@45wIWhf#jIM?F9odr0N-hK&AlMgcdpwjy$$)bT|Awm<4bz`&yrBJsLvF48fgUjav zacikGk!C5he&d(`k38EeI)YOyI+gaDI@c?5MbgFKookj)vtRVxZ&xu>-j;;TM3wXk zNhS-~?yHZ}LfC$~;SHv9M~-PVes%2RGZShM z@z}!UsO@CfApIXXS~^S#>@q~4SiXu>nCGPB6&0kCy4ih1Z7!6F+Q`TlbUWP?UTpG) z`i(ad3xn>sQ*Scg0VybV-6PgE*xo=^p59qbh^uqnUE>ZHTYUZqnopXi#72j`18ta9!M-`oe;kS)Zl&yiiZ6e1r#TK; zDhfj{xIU=Kbz`2_WrzBz;ZU68w+Z+n#ltcOp(}=J#tFu*@^mUxeW$K%@l-aSJ}!A} zF73PI-lQkux-v3dRQv7#mvhI9c?_8fjv;l6$`5!Q$I?O>a;MsK=^ss!>Fh{*=7p@w z{@w7xV$z+Ni2_&QS$p5ChZ9`n3U4r|)f)GN9_|Vhqe;V8qE4D7SILNUai=rakS)AU zf+os6Jgw}ZlAmQed+Mk2I=J32i|hjJxUJ>_tnDYb>EtW=HGsWdz}~z{2_4{7WB(IH zx}qSczODb;h7MwTEb*Y~wo>0z;S0D@?duU?%D+{Zlm7tV^2>!-hR|U`vQG1j&W{R> zKm`A#Pj|8<{)5Yp^5XvY${Q)%d{7$yhXS49%K<|rqQBpF@ItbGN~LY0p7Z?>10wk{ zPR!qDo;+H{pEdVV@+o}VJEiT?Refb%e-c6$l4OehhRp2CbyJfT6dFsJO&(haa?mg* z#H02sRh9r0ACvR`tQzy0e1TMd8kq`$rCFWYVw~uP4?001h3WD_dl~ zdiS4sQ1nU=aV?(xB%p2og2ubdf~*4hABy zGHRILaAO~_yFx&vO(PclMq5&0;!}cP5q^iwgWwTXC0EUx7;49MvhP2KfLGxWT?vZD zF&hSVAX}(LZ-UGI7XMg{54iQ97K$vF&)qqHLQ$al!(;fYg!sK4?tqILEjDvz%BaT2 zN~7yd{(k!%pOj0hA5Tc~b7v%xF0EyX*##%1S@93B_?TR;9{Jn?zmUYBz(6f)AkhQFykK>K)biY2L_@moP14HE#rt&{v zK!pmG*RR>}1tV_t=n4LzLchjPWHNm0r}I;QuEIbfQxwHx`sq!daS$5btKWDfA;QL7 zb=7al7ba>K{+awUM@}B5T=Or3)w?g0tpss@h^KgQf0Ey)I<63S?sF6JaBtf>IKD;{ zdwl!=Bb&(T|3r1S7J9E1ghWP)#KXijjQ{u>2VwoI3(+XbTOxe3t=Fgdh5KL&L;MO@ zxOh8%{R1ijtu5%Q;UQvO{Xaw8q2?>ZS!eUB&sit>BgI-r;7>f6RDa^%eLjq;8Csx< zu+2S5W67Xgk+d!0cXvSm2=$%!*H#Z)J(6F+ zkGuj2)aJW|$H~(9f%`uTJa!!$fjy<_AO4DwcT%oqqG)y&`ehf{z-q$?``mp?klS7b zqo1egVBnzmsaGkY+w8D4?vbm2xfa4NTq%=?<1 zhMLE$@;7aDWK{gMzd~rv^Y7S`M3=DLj7IpEn>~i23qOMQWVIIk<;ge^aH^I`32;G&6EyjS&{b+WEdSz`(cYJ4A1$gl`==Oxhz!RNw37iXk_(95|1@0Rj@%$z;$fpJyF9?bpZHo`#=ezwsu0}@1}T&Y z`qJmf;dWdE6pB<=164A zE#jq-ks!OCmwEmah71}A6|=XDwHwh3&`c}4hz;^{qeZ5A#rkG-Su-v#t|9)0NPUUo zS;w(*4_kCP%T+Rg{5nuQdlfc)Bncj0pB(?FpCbDIjA->b{Ze4PqUip3m(cPy_jy!! z7V;M^weyFwTsU9JLL{YStVyA2EtH_3pfcc>5-<74%7$p9$1uq_?U^sutzl(FNrA0g zTi@f3!zTzv=XV?;Am;hUyz0qP0`7|RJCw(yRgXv)yThht^$i}ppx4v70d|<%@k)-& zZ$ar?+Rl>IQ)CnHcjH? zFz!Os+*BWxx)asP@4>OjtRpHIef{osyvq?3L$f)$2Q$n}DsbGI>M?9Vc_U(Cb!rm~~R z1|Ux|)|P%*=y0BOwPB2(>g9_%5&k z_TggpS%=3Si9@}u)G|Dqr8q;%RoJjnZILkXQ`e`?*YVatfGij(%r16y2zhFiqA>H4*_3%iDV(h@ml+ud>o20J4R+Q&8vb z$mWYvDRsiouJv!4-+;`BJZC3$KiA~+BdNeQ;-cB|U)h9x=2N3E2>qzFKGi~0{VU`x zkp@Be&lbJfX%6qZ#W(7|4KTFNP5(J>cDE#FO)S;^K87Xv1;#?DphQ*EEv43;vI!++ zrmb>kB=>nUd82Tw?0XgGXnkh+$c+gZt>1s7FzimFFSZ!Ce|uq&j|7Ml`ZcwimzlTvdOo<;R69>Xye!|$v=s8@#!9ULh+J~>C-=JvSFoSeid$YZ}i^Z0o`>^>6s zEK!NG`j%R*tnEn@h$v>$hNVE=OTzMo9N-wQVj6Vn>f4!+u$zuv-mO9$Zh+p zy#LMuNJYLrbTRgw6`pgR@q&kV0!oJ(Z^zeXdmkQ zy}WWeDzpI3b&`Y*ZloxlHHV>TZw^)3o2fNeKE9E&0|{pC;?FU@0CQ$Nm9C!#+|rkf zT<<{XhvRqvFSw7*I3ld_Nafa-MYhN zavY8>9A9Yu`!$epGL;;Oo*U|!m(YqSRJQ;YX?}-WY0_6v0bUBtN^B~fvvs=zOntv8P zlsYojEzQ_^)(?P z{5k@7cH{5Sjts{tZeyvdt7Q(E8Evn3S8%puKeR3uAxQ{j8r?1SNmH=NP<6B9H=)Bg z)(VrXeae(Vhdp&%8}|_OdBPs)(wZvvF$TCd^ocHfpVM(zCJc8^#&pj~8#l z*8O{s_6nSz)=eATZ!tk)rSk{E1e^VxdpPkp8dbMqcR5zUSf!lqueKshy!TON_@yR6 z4o8a(Xz7#2x3>J3l5wxAO)^3!wm&_$Q8_d?AWZFZi>Km7KG6o1Yn5&4g1$AUR!kIo8U(=PbYQ0Y8%{WN5%C)oThDhLxzyg6e~0z*Q`wah53CVO|6-OA*Tc-&z$iXHvWe=b|B{CZ=Bz>#)% zd2#U`G$6M;bf}MtDop#n0o7pGaoUXg{Sk@ApM`J9&UiVtVYHZx;HHI(4xoIl=`p}) zEjU&zBsRIjgAQNF6xi9+1G2lTf|k7V*A^j9dT9>I(QbWe)IE1NknpY&okqlIZjJw+ zziLs7}y_ z4mJ;(Y#*%Dlci4E+_6~m$VEVskkJY_%!cSY?jAJ&#B9vjK}ic>S=?86KgKF~kaxG3 zo+rt~PJ+pky|zb-#DVMwrcjV}mQLGNOr~O&OCQ!)mxZcV>S-<;iNkW-=;_KsN`mdX zLuSord3WD^sPbDyAv{wEHEGx&F}J7|N*(Yng3y&aEztS3fBjm)u$cSFv9I~&Dli&7 z54VpWGx8#}Kk(jd6IlvP6gQDc7glcOSiJqcOf-cvcTwUSYsD8ud{-x$)6QvS427I) zu57w~{!*P8IUGJdpCOCW4r_o0iKkp_yd&6TOM>*}SfK|Tm-Pfnm#n5qRlLDmYPo~Z z3th4U&Xo$p{B}9rnHV%mL#EfIzHqI|(dk_LP>ri(&y(U!wlyLLwqNXiyFe@{J$CmC zt{_wro!p0;F`!`uAt!lCNDP&x@C7j&CN=XCEJCI22H|HsC22;Gp!TPtrE-cd6d~jY znSahYXnwjEjz5Kxx zdXV$Z9f(mqQc3{~NMaEWf(XuV+;8uE5`S)r1t~_#r?pb2(T)G45SD>~nh1 z#TozWBMe4YU=ep@Fc~+83NNWXh&C3bfgS!}h9H^586M7?{Ty_7Wtf1VQ_*2#mAcO# z!ko6|QTU)8XA7UbZRDQy(?yLcK$usxr^wAbo>lE*Y#ud>X)f9@iKx?G5f8*zFChGA z^Cy8sbIDMej9B59N)rbond6tPc98WA{C)fTCz$AN_9ha{l&oN)X=zXy*m;T0zm@DW z`pRZmOl~FObJZ4bZGjo^d&ah1T6A*!X!3NJ+)#!YGYxoFI&WeE0cM9KoOIeCd_t-< zIYFK$$sJO9^-T1SfB2-|-rsuGi@WEq}iZ@c1bJRp}z ztsc3?b}vn0ApoA!pECOb~vp1`1>Et39Vc?@b|hAQ}~%f4fT$A(y}AaZPgv@ucs_??VYu zjvEs#?N%C5wP8Fcu!7>=HUv7LWMv+TAO4bsI(x902jok2pYa+)|=xFgTg?UE4B0g>nLi^A6q#Y6dfchP)HTA+-!>BwYMliCx6WRlvo1zyvP-pbnoU>{UdJcn?kU{yl&#)!I3 zPmr#bxHn}4b-|U;OjVgx@&e!Da8_e|yhMV|0YPNLMJAq&{bjP}EAAOcZF4Lt^z$pn zL+hx^qzc6<*8c`-&SYq$dz1)xh}~E-%jgLX$0>m}?FTvh+e|{s1g22=@%wbWzg!$( z<K5@a!}l|7?bPOXLsTT5wTjR2;^~_VLRb%Ve0p=V>_y&@(4W*^?F^TP?o6;Jx3jM; z-mHrkxjwj-9vfd1pWy1D_#=^51&XvaIqPE5dS0VODl8TrDY3ZISQL4w&Lvf*3@A<} zX3b(}l`;h5P61?1v>#hz{FrP%o9^OrlZhu*Uw)^qjXv++ZzOyA;n= z6H>iGs_8JmCFm^Ym* zvnuDA0v-KqYsl+f3V7N5Lm`ieh5tj`TSmpTE$zaA5FilTo#5^oT!Op1Yj6ne?(Xgu z2yQ_W+&#FvyG!#evd`Y&}jaoQ#$$ndz*_besd%>2AnMVHN&FhOV?u%kHEfc>Zi;`r%WzS+(q{9#&Msk zr7eO1r(4XalciR==LYi+5LSR!uaO!jV2tzgk&)uH_cklPU}Y3B8cP#eiBP4}g#+TZ z`+zVX`L?)qQO`QFi5MWoEnQ~Q#~x|!HMM$M9)%FQPP6XhEC%$42%F<#saT05`=c!+ z*9crrm^ku!!K0LFyS{?#!ERj+4@#6vG8rR6HJZ{>^sTdi(#*{3+Cw}7On{Ff z?AWPxcR6MLHYh1P>3foSyW{YXY5SGAmECQ-?^mO{*Z9a<&|?>BuY9}LbFY5_j*c+l z?U@mTspEy%y2=~wUc`boriJYe9q@xdvW+bo)x`uhshj!&=}d_Oe2%Y5_<+a`4mj3! zN21`czf&hB_S^?DCXro|-4c~}4&FTVNE#v!=t4GCQbr#pkHsJBr_*IFM1PdwiGa8U zl6^;*_uW{DB!(HM^rYYQxn7yb0jiIeFk$HI^(kn4Ipwl95eI;D2ye}%(*4eTW4Y*m zOt&asnxucNQ@1CtpF9r_+%+cK-wo>1Xmz|%VT0^kH4Dko4JlkD1R3)MTEVD43=orV z$4c(SYmj~(o@2Co8NFRi0n#kN>kf9e0zoDh@(!y}&ncgruy@L{{A+K=I;-3A7TixkejSW5ZM z6k0eO{jE$F`QXSm+Wq6XRv>c|dlG;#r4&>j`Q77vOc7RdCjgJNjd09vqw_IXh*vlg z8_yzXEV)+j$5{CAp1+X0MKb-Hfngx)sorkVwL(1KlBY$bTs0HNS3P$4mXlxMyRydf zlOYK3rUommURW6e^BwLMfNLu9cdpxOfyLVits5_ym}F~H8U87j;q-F?0FIsNoIXJnP;g7kg=I`lXnWH7Uv_ zgA2a+5QfU&+9k^4sL+EA!C{O(PD!Me09T}l{DcMpArv}IBs(-2q!rJz_Pv|<6SJ3L zPz>d~$x{xc&p@P#oPyR%#84_N#%NP!uX-c%c=rq7W;ao;ZD^LuLiR}UP#z2j$M+>& z5UUHaq`dmX)6(&JO2G7Hh1(<_5#H927WSl#?pT7oe)T_vF-ZqkXH#E1^Ob2SpeaP*GKCf zj`~_6ftn8g!m@h>?@_MqtGs%$<<0z>(*m5e>C&5tY>ZJgAE(Gg;slesXD9;D)zxfe zYc%D~fmglv`lRSx&BZnU+n{o}2Xx&)KR~u#GY8;A3uT}7G>Aq`}V~fc-!1`sl zI&2&tvafu7)khN$@9xbI6EyM;z9Aw)11^c#H)8>J)QZ)R6*|p6xTTv!=~k8=mrp?# z%3s2L`GQ;~Q*Z#Z3RK+>V-tDW&UMb^l#Xl_+Cfi#GlTj=^?l;{+ilZq~ejqNYweS#3nt97GvuU{PKk^y4>HE8- znT+SYsOF{qCbs!yU^bmce4s=V5TPRfZh-ThVKE(e@+cR87Gsq@Blh7D#xLI8ce1_6 z+-$N2MFe3#Qi?J!JpDyBBB|3UmZL(Qm(RW^GWza4ivO-3vtsQI%Nl5_TAwVl$Xv`c z8H+^G0?30*LuqKR9mi?*ySlNi5Et;kB1EL9VX;|)q1*5c)vlQCSLUD|1g?$0nt!Ox znJTUq0;0w!Vu}ej`j}}vhXD_ATpSkBYskcyPwA{Xv|0Y4`9(gpe4p8oHp5M-wI}jK zgl=d!<;n#GY`x{eC5SFeZ$8&Fj^*$Q#rCq%4l2{XPnQ~ytCd%N1p-)VQJSKe?V9!h z(hZ0LFkj#NT=cGyu<#~{bj6{z#vPbpXAC^uP z=V;bmufO|P+Ls#X8m_{hVYL6NaFex6C7{T&Qgc48=WKt9yr60*5QY_dkVo9naQRb1 z(xYXK&d8$U+X0~>GtT=2! zmA60$|Jqut;o1-J>^4FU;syYtKLmvfJ!v#OtfCES;8}J^PFokdf~~MxHEh z2x;y3SmEV)H^K?vO1%a+``LPrs!Ouz_vXP(j=nEy^ zRBxc8iH4}C*aD2|)49?-qs95IJSDSJ<|Odh-q6WBrd7M$a9hYmZ#PTb&z3wbIoE=o zi>(dsMO3HFr^Cd}rhd>j9c3{YbZV*do*Ur>MP~=*V-RctUXm%FylFdOJ5y4>F2$?Q zi#>16P;N`ZgwDM!KGnOP>Ea-a!g@E6f2dlb5v)VW&F6LxBxiu}3bSD+?l{?@xAc{G(rT%+Y8{*U!6^5? zFzc_dNo6ZMnrM4bqTcn25pa_P-CA)5NK-e4UeM*a_vx&$f#Yd9#|T@yL(-W%|o} z9T?!@CPXhr;l|zaj&6vz&f+ayHm}c@#KF*N3c89+o&Z?#n~lNLxAy98fRbp0zp#Vs zzL1EdGYVwjW^i8Mz{-o#;U#V;*N&JO!48EI&Sd~nI=s&^IZG>dB4Z3`+vOd*l(&6^;HG~X%EIqC36;Z{I z(q$Ua%xvLw>;Vc?JB?uq3)!sy5^4c{4Q=w@KG`{WsIun4{Y?1nTYExf& zIs;*Lf+J>Wj3{XUcK%n~p3_Emxc_^Hm$ySkk20$~q=&NrtOkc$8>w!iYKJ-xXFkff zAs|KAi$Atw(Tm|aAWS7Ls;-R}`tQxPynPMGQpuy4VGI^>GH)`744X8$Kpogdhxov( zKO8Z!hF{bjm|{8_n>qo^1_5N>Q)020^7e0af$%Eqe6J!p#iB-93u)TNBnxC% z&FMG`T-SYn%Rs9CK+Tg_0Z5y7e6RJz3zz&b?sXWGMh@IDXEYRMkFLSJ`IyAa!1*{G zY$Tpz&sJLW%#j}U_^qf~+MMLQw3RKrT+3JU?mLtfAd39S((s|K6p3yr`c|gH_>4Ri zz*~j`bgg-7Mb^;|CYtdiGG^Gc&#}a!;ERK9gU|#S*_omQH4yV=C!91?l_BzAf_jeu z7`KX)JwiU1jBp5Q9*P}m)rPNmjlJ5lH{vdQ>TCws0RDn{pH%=1e`q|ix^j_eGx;Z$ z6*~k)NA`+H`U?Z>H`A*p(kW5Q43faDx;Z!?MSsAT2oO%cBAsDFMQjR=TZQU=0kUB}#|iDf*8U6r6%9C+ zF!&O8qKZclbeHdn?bQKx0JTKmU-|(+5Jdt9sg~>G-tp4iWg8ps-vXB|d#CF+vJU3s zI@RC=eBSEcLC>2( zM9okeu-Ilm@%Qk^M_|K3xDcJX?4^8F;M+qwx1{pT$-#fV<;pJ8=ov&s4E zd$>Lyjbx`O+`l>$68>wzh&H42Wn-!Oy4*|00A%k|VTHo$6cu~}zABh~S*hZ6eU;F$ zs?+9upi7pB2h1eG5Tcyuw+~H-IU{nto`C?%J>My=DWuI6j+4sg2ivT8cTXjU;i9Pl z5YB|cL_a}ecKj@$+}S3=R__~g-sT2eUEbEm5caWEP%2~M9}M0a=&Bb7TtqmRJytqs zn~=MHPoEHv;Z;&R01<`FZ4Heb*yrSlc;AkG8Euf5EJ6F@^XFtlYlX1bEr#G z_GzMksql5LAqEGt9xQknIw$-I`@!fPHxK)#7{J3#^pypg(EU)vSy_iqZQLVgkH?@( z3y`%2mJ&cCKS=Oi&?zfEM5G4Piw2beOG%a?+>qLJI6MAjj_^ms2qZb_%lBZ0FSpnT z9+Q_2tvkT{b5|vJ;4B+_UA7Pi5E0KW-Z!J*GG%$No4b>fRC#_B6vm2FR77GpAwCIu zIfSI8gktjqlxhV&VwA{}R91wLC794enUl+p%P&WxZnqaJ&i>2B7^9$@CHG>}EKX*J z9jR9N=U?~o-#8LCj6LvXHm`o>6hLdmKwGA$N~(BjTy}@b;G34y1YIIebNro3H)Ya!*^-9jaz14}t2d*mzN$y~{M%}WRKWr_J9 zX&SfKdG(Vf zm`{i~%Tu6zC@JrlPc@!8yOZcLX!+=&(|1uJdqNG#6>9XrE&ryXI4}shHxOp0b?%DO zC0;*0$Tzw|5vMxJH5z21@Fzi}NV&gDP<>G&-yCN;$&y*|{k#nnu3#$gm}pK9(-JeD zsi6W7<0k+=PgH;e|Ar}z zbsYm>z%8z~i(CH;?UT)jd&jq@%}R$3j8F^lPibp?a*^w|C#D-9_S03pIF8ZocygIR z8%Y&X>n-ebq8#V~wLe{6=-7u+u$1XCJ~~>?I8vQBujhG8VSxmi6{CG++@i^~v9oVM zF)PVVi|Dm}{L&p+7Pq5TQl;*i*FgtRj~4V_?WzB1uNYK}3ZcZ9rtpO*_g*X;KiBhc zVWh=)LN@fKEA;(HWRN1jArcWECmDBV;f5lI$%a@GsZ%9v#^<3MIIiv6Q$8zJ#_eeaM`+u8n7%1qxvr6tda8 z^D4OW&5oxU_r_gB7T;}NNhgbxlQ{(t&pdkQ9ldU0_ZJ!Eu!#idwI8EXEn|uE8&qQW zCeuL1%LM64oY?PL+fjWl zk-!r2>Kqi=$T!YSp;#m)K|KZW*!b!ffGbl7W$DMVG|^WAa)77OQ}e8f$Db&(LtehM zJmDGux$|;&W_EnWygxWS?kKTm`7n7<0A1h&As03|N*PRy6y2d@Q8K9N6Wb|Pr#L2yige!E30n7GpaX1znM#s8KUlpHT2G?a$pAX5BcT$cA&s~jnZJrf8vRM6r)r%+!K}6)n zp?=Bcl!#8giAglkYB!d%dDDXISPlwb?d?A}9zvn@iWCD@?CGBq zV&$l`V>hL;azyX@qS~{B2!VM4^jKa`N_O!}e<@wsM^O=(XPx#($F`#-_x}1B@7F%U zL67U0H`eJcR6pkyewA5>tGjfNhjfa{k~mBN!bprB4EI-=UN&uvf7jI8>)rlxh$WR) z1lmUSd2^iTB>7l~w@9EN;eMbbzD$XYE$0;TW2-_kl+&Qa;WMB7J9D%0X%!@~0chNn zYT>zi=|`R_{a#i3hp$NFnk%ZK##!?MP7l)q3fB0;2u#n->(r^wLt^)am(De({W7x} zFN%2JY3s1aU<=g8NjH*Q4SaP8Hz^4PnYuAK1$lyH1}tdGDq1yznfa#;^tz%%(IzxJ zws$EWFN=^^>=vo=arU^$?lAJrG}_fV38-(keI3T$)`>2V9-|mj{OP^p;&~}cvfFO-Zd)>e{)p-t z<;HrB8B~4y*c0)Lm8A`PWOs{lQ*nv)GWYmw&G8e$EN3qt8NrfBNjM?MPBgRU-+0u6ItOc?3(|`x_PZj0tU1>_q|Nbd_y6tj%cpeEPdseAT z1Gf#={6)LHUe{Gt1y75FA>vJiyDXo6C1~5&_RP>9%e_bR+!rIq<)VxF9{(;cZHSLT z5U+9MB1kT(d4}>b+puw&0DGZZxnhg+_+~L`bJ~8%OAFXsg+p4)3ouD|5h#kzdz6-L z#99hGCuPhl>ogZ^29MNPRM!#ZddxV!jxskoXVZ7Y5m?zJ(!E`^@Lr`pb)r~R{4zb=}8IdaQ7p4Bt1^|~f&Ji49ZSKYsm7;2BcR#N!zhu|qUdkwF%n1%I* zzLa*X;^JMQx$Nz6am*m*Z1#f<5fF}ccZVr)&OVpjc&E5L&3HO3#zTPJ479y}GhXGp zT!!FY*e4|9PDKg%XMz1|RlL@hw)+C~ch6gGFBuSPD8PY3#8&5HP9Pvtf&n|c+<*Ck z7U4(B|20=4c_7H|@Oo$hrT_KPe|+uofolkUCY*SsL!Lk>5+K?9_hK<~&}JV0ej20n zdW4q7{B{9k-hVYp2!b|){`<#&bR_OQm~sRe%6M7?5h4s&Js?^A^U8me0;ClJFk1aV zVW7j5wK^=S|8DG`o))M;u&n<5oc|j?P_#5JjV1o?M}RPFk2Id~Kf2HV<+aw*>KT@? zu%ADK`KMI=&wcQP%?bF1+4hD?J-p<9qZcARn4|JvuQ#2&G9plQSHs|c8TP+Nx#lMd zG>^dzOYtV`XP}QylKg+;(my;K^@i9`6$B1+1p4dm|6A*TJp2KI1>;`>HU(@4B4aST zaOMA{Y>XDP*{6TgaKY#w|M}`fwnd^UTPJ!li|EX4X zaKhr^uCPtbM}gsSxAe%h?(4-kDSbV3DG*4Y5X`U#O$Xop&kqI}0_s5!KBVXHy!ls_ zulBE_3&_vh5EO~%pj@4J|8A9&Eq8pS&YY;-UKlc*xH1G~Lg;lR|DQf$hzNTb8rLv( zYhbSLcs@K}UNP;Nj!zYEY{!~Vyec!1V4?@CxGzn5Et{j&a8VrJmlU>a?YH7tW|xS~ z!?{V!D=TyV>h^2b|NeBxLMb^Z-K)Fu|La4z!o7B&JKmKO&FF@u%IBX2=^1G;^MV=% zuu{>ir<+`}kB{z3KsaD3n}MbC>b8w$|J42SWTF|3xlIYBc{neym;F^a#JSG`wawoR z{pWWEEke@+#udY!;rLKawt%dEJ7gQ&ssts%D|cEyg{nrGX(1s^&JCnUPcF~*QV3<@ zszUYV!vD89^Uoe&3nI=T<#vY7NfrF=edfD687K0ce%DB$gnPu^pE0X`N3kbv^7 zt*?#n^!a@qDRy{%i?E`(^p}P|6^TKEV~hNcm80+$e9vMs$;`4Q3?IrfigAzlGa)b+ zQ03vy$GRDJ%|r$5Oy5x>dSo{iy!-#zdVDYI0`{!NSLL|N#@03WmxzHn9CV@H5Bd_z z5Q7)BRS?mW{*Og2W(;lS^shnE;X;&ydJi1vGUAGl;=UQM7|+V~FX7>M=%c2D|)GBD(0(_n*SpF!Awy4L_mtAatUs;FPV*C&0lfp}4 z4CJO>Iuew~U?U0t%O?KULVJPkZVOO}HoX31hfvBJ5A(Y9qSc6FR-%0T5xO#v5T6|P zzq~u(?^BfbS9fCcAjs(zLsXJFpJ9^XHgbMTPIEx`*)Ru|F;HT=}Gw(Y$WjS=ZLRV3+nxBm5<2b zxPY`fO>ggJ>TK80K}OyXm8i&tKCs>aIW*w;0(~N{b6xmvCQyQbjrjjv%RjqZ_v`5e zC|IylioyY6*zMY6a2@eGjxEcBTamK0z{27_;cz1QDY(H+&?)D{M^~Ll&1sjvTBO~ZR1DjbX-C&?T(9qRxS>)o#XooK{D^T(d(Nvh_0b>27LJt;=e@ zS@9e1oB)#vHNEOq8QnJr&Me5Pl zcg?P_UQA5^gRtBClU5;8>F16SF6`C|`h%%Fs>YOGg?PlXw{^f&1EjchRO@8)GFEAy zwFeVy@Vj_c>sbpm$Mgm9ka+0=6xfAk>}#BsXPR81YqN8^ic@R4IeiY#8&_W#F`Kd} zFR~xwTpVn|^@PV;=bJyYbWx9%GoGyHSK;L`RVxtv;?t>Erlb zp!ia&ooffBtU-h~Bhwb+aOO+Cme+3~{Pcq7)@rpb%sXG4l!a=Oz7BVTCjWQ#t9Q}6 zBBnG69~9b5$knUN)a$ah?sBY47Q3aWNgKbzC@%n(U{ewkI}}kQTy=;xmTgYLqqG+SGU`UJ7`f{GN#*Dz~%SRi~aucAiFut#$9G_ zDbUG()aQr(ZiNuIUB<<{!TE)3j;Q0S#RE8zQQj;BB9lY{llu5h2p&&tSEgiYFf?Cu zd#*v<{$LOO$KEs(rAmX9h2xenhbh`O0<)5>l)kKA+cQH{NXDx8FGLd<`a4 z`TDT828wl)$2zpA;0~1IxU4$_^FLtojEp>{dJQP9q76u=hI}S2U?_@2*8&^PT zR0;H?s=F;qnYbbw8)HyALk_w`+FX1erPHpFsblh*O4aHkjYiyJEp^zHW$<-qI-c%w zpX!e2T{re;rBph!x#ybJqk5_YGV?}A!!UX!J~>;GEV4@{Tj3NB%ptVkyn7Ci1*Owq z$v%joLe!Z2=BRcqbZ9JE_6TP?^LQ{cT!B*Nk>sq%_!<|p=+ z-|^qLOUYNZ2Fw-Cyu)`N#FJ&BB^T<$9eTY zEj^;vZr{V0LFYykOl!LLmoSe>NR8;z`f9o{q6u|;D_$r{lKDDCPr)t4V}K>Nk` zEM9W9H!j<6T}%7q4oS{z_PMNCxOVnXQmB>66f{;27Byfn2FXubUrN93T}7Si-cG;> zYBd?!DX%Q`MrfZbVHg@iGrPhhW}VxjBXCuIQ`nE%s}rwu8o3}-IfTcjP2(ZDGlW+= zduTSLfk&dF$VN}bov%%E359w|vUHmkvftb}ew5xBtrfLz+zo>J)<10?KJrA5n!uy` z%%4Q7i#8~JjOtq}LO7z{mxGki2u(o09+&4Lt8|h&5h(JHkK=3f& zo7qC*dE>`87`*`xi|Pv9$oEo%o&WI@lZwd8okM0$|Dfl*_uJc}vE=^99_%!m{9e zdLr#o}_AD$lgNBH72Z>nUMhj|*fp^6%ee?jEknvz*D>b=ZI+$SXH(fb zcn7iUvUJ2}AFtF0<4EJr2P@`l&@|GXSlkYj=UcaR4P~{fiT%+W57w6j;v;yXtrneN zx$NxRt3*WKvzCHYzFcDw%ktO(3H&S{R@hPc4l72e*_U0uQ*wd&qFw|WtQSd%gPfAM zRd<-;B?G4H^4%p?T9e<0O$})WQduMF+iwq=KN*5y76r&zY_$1=Rp*YtQbg@65)HYI)ldOxQ@A6JZ^P!7? z4Tkuv{ce)5%liz{!wN4|SikQinQ@c1afWf_D8QF~v5vs(AeO1hz>23DHvLnbhugul z`Q~(uG#{~&YRP0T?JBNf^j4x<;B0VoiuIG7R%j;cR*3efo(kX+`Ofp%n*iGz z-QvS~Rk!Ej&9VtOduzlGp+4tA^Y9gqwC1+jqgs>XlT+_eVp^)XFV9QUh(-nX4Xg%4M`D#Oina+w2RZNh# zzhvHB?0cmRILoEONCeyJ80d#K^o^q1@Jy{%$G>wwGbn~xH%rews8d-^woUqi;249z zmqshZmqJDm`K;x3jc0M>f3dlXwK;zYeSqV4As{zdV7*2wgC2m~oGLd{vOIsr-YVd^ z@R?kh_$blW=$@W<8jj}$c)mH1vvi-gF%jAy09Y*LCMS&UJ~F*&|GG7(JHI9@L#!8*;RhAWT1(9>X-&1dyWOAd2~AJ; zQ4%71WlvACo>QY@$xOdG?shp=!@)dRPm(AI44J}EEs}e*IWD5yJ$6SNQteETWnZp$ z2OWEdD;UWh=-ciBj7BpC5C@~8MVuUlTompMr3}mc8L~(2f@#pd!E-rz>F%kPNHP?> ze5UiAb&aQMHR!{4F?blZsfxmg?H%lx3I5^g`L`_~7%rYzRE0SSgVLk^tM@+Ty_pjG zl=JuNjGd;dINV-YbJbXuz0BPaKa0jJM(6n5cADaI#h=Q4_2$Z=b5>y=4x~N$tHuHt zL)qTEoR6qU@qvwGJbyn2t*i~0RR6|Yg0e?`PJN_2d&4c9{lS}wEO>+UNpS6OSH4$# z!v_JY1@rby(sLXt4&$ipqC0-9m99?tNg%-&uiZnF6v-9b0rhMSs?a|@c};du(B)$A zE{MzJ<0%7roN9vim3))>0lS_bgO}Q6CG`v@mQVZ4!g+%ys`)rOf+Qn(r;90P;t1q~o3&+esl~3bY1!3;PxYtF@(jtj)<6C2 zvboQs=l6?aG0yHfX48jaZa)ncqv)V4&^zVxhAGWl!d)N!9=xj#hYLri?_0=#Nbfi! zcRe_V3RvDyL!K=+_bfbia5UO6_1tPun8nfYCtq>7R>$JK`@DbmWayG;e~Zp;as;E} zoLIeylz?xYHTONB6gPB@Y(PF5we* zQIs6L@jbhLj`r>+Mo4D$qxA2S9eQB`$zP>Lmq2wI-yEq&)F9<1fcgKHg{Y?A1?hQ6 z-x+1;iRhcqa7U}IVv?%r*p+h6<~8)OU6K1U!u;}fK0$+INU)b`2uX)emZJz{j7J#$ z`XJRg+bqbL$<{)A$a>_f$O6<-!BD+cky<3H$}WLIhAu@W4=$5dBZt|KOCD)7lr|qQ zltAfkG?*ZtGR%xctFeVmsl<>iuLpYp?34hHm zwj-8GdJ)J6SyD9O4qG-1t+uFbth@@^v||#@#taboaA@hIZNCg=+!HGfCOG46)5z|B zk&~-f9VM_7+MCl$(tD_PskaK755fQBGf>VcdB<|TqFPb#^R1d3!B(S7wIEZKF3v#A zN4%M8WPJ{JS)CLP<|frr)OgX*0In6eyLvMfMKW(d-SEEa7waKB*(CFzth})#I;#7u z?`M$Itt8v!(5`=db@%N#+!3N=#y#ZO7=8Z6xIMFTY)?G zIxlughuQBYjh+1nhA>kBfuLvS=+a7J(ddDKXk84{gfn<+sQ}2<=TJh1+I;kt9W@wh zYE^57zA6~>VQS~3c5xjqs{)33!YC8zFDc6$35GAeU7beGJvJ|5FT7)^Y|b`4Di>zo z9}9-UqhTdH(W%X#cv@JK=Bh}Jsp&d|8?2W}CEFiIj)r}rQ{VXR11`^L9#vB*czmk5 zo=hk;gL70b#)d*2Lv7PqW!MThBEh>~7=+fi9Y#0_Y4c6peZ;!(5_P!3%|(C7 zY=KKETz&aS@9eF?WFkdBjdAm+)iZyQt$Jy0cG~{7W=VO&ASe6%@1o8^Ar5+0i{E`T z_8y!TK3eNHHdm2NhS42fvX8fz#6?<}>! z^2Rlnpja21U0V9!uS!uJF#H1RHuE^WV+ff++=Fs=jr4@;QA8{Jdvi>raS`cBfGc$-)Uk<-p9HH5oEq+r!ZJY=hV`)m04o z_h`mCAECA}ofoY<9d5%|oH~_!DY?7EG4?Io!{s{Z*nc8B*xJlN|ZoPzWxVu&)&1KZp zYsHc3w6=RL*J{Gug1{H8lSCctLkbgCo$sE4S4DbaM&rKx`G>6)>`JW!D!T@A5;9b{ zFj`pynh{g)`(AnUN^y1yyiUzL8S2&5mHwPs-b3ns`QP818N!Uu@+zs7&Zq?Fp3LWU z1v|{TF52&lEViG2DR$4iQX}fl^W}QbHKo5n}kM<5-VR%eesE>A!u(uSt3iYqA zdNWJ4>oAB-7Kr;RxAtdevD?b={G=D~EM@23bc1!So0G5_+tynTyK9V&w z680vYYh5HUMDX)db(Dk%NC`*aYQgSxE6yScc)w(&cPt1Fy zi}Pq~j34H-&y;5&!&CFRJgyWe?d*p9`~gJj}$@;lV4>{AMg;h&}_vBCfWx6g&4; zeb!R>XZ&h^@O53YRellpLn5#B%uq%y4Vxz3jt@txGXV(KdZKEpjlH3TptpQ%D{^#j zZnv)H`&4f$e>}htGrX)-eBJ0b+P!zWeiz947NKlk3f#8ku_8V2x9-7a^+RHSHpHBJ zsC}%nQ;W;775%%!5S?sd9`aA z)zU;c^otQ;Uk<{6iZ0QpxClc2g!X5VRNlfy%9iS{1gCAw0$*mDX7Q@ho5*784;xYp zUj**fLodeCX8NG9&y?bOe-~;WHct*{kC@uM;VZwWh5i{3?!mKC^^NPioG8AS1~Axo zF|}Yo-q;Y&IlJ$R`$0@{+0>(uS#2dY{DL6LY?=IIyn%DE_Ut^wI;*9>H;d*i+IPOS zd7GXIBW3o5+ao#x8x%M1tLHkxwU5h_S623cmL7rEHu>jUD-9m1vHB9@jfdUrerlF9 zzQ>U6pu+cSydk@bq#Zxv*GKGQTQJ|~9Ch9;9Fv>9)oeWeYPWK*bNa3}#ZPbv4t>-6 zRUpJtwZn=^_OFUYl27pEp8F&uA7t>Rhx!@CF@Oo)H|YU59+HBxAv+%ZNkLmVvKrtoA5(6wG*`$wTeok&2Gp?~$UKtuT=p*0M zK^hF`1N5LR(mDck%oz}?JV&zyO+LPpAM31gQr;U5+qJ53grKyPA`-7Kgw>kb<2K*p z?OdpnB!r-^QB?Qdm5s~iMURrQ?KNfc-Kbf=}fd18~;+MZh05kH>61B%Lz zy16d6M^CLg2$rs2Be{z5%rHNdDy>jWokFM zDDZHS*w@wHY)e3GeLxSZ&SvMkprY9*kj*(}w9cdHTUNMtmrSv<&!V2yyFKrAoy@T| z=FJbSj`+Oi3|SX{fRGgX;7FZDov#M*87aK)v+nb-q}&|e`ry#f$%kZbhFZhs_n1Te z)oAo951&O0cXgtMzG1dv;xx(R%@A(~L=qH?S+6fjXrB%W*N?paSVe@tz-l9z5pxlZ zc3)9O*QXkTuMh^b$23gdU;%z573=bKaj&u7e0n}QnTRvKKS znya>!&WH{gbvmY&dBlEu-Rm5w%p0_sAh{66aZLY5J%&b(TyDGX!+bEhQS8|MyT1s7 ziAD{Ld4ZE8-FKCv2>})D!}v+@{I8x0!uF2|yxuSO)#Ykz*oGOUp6PCa9A+=0^bp%H zW#_A2jh$LUKdX_V^s2b3Z7Nm#ss_44x%^r?Exc4q#qy4j*G9VeaJ}wI)pC`tsCegA z)E`93;g%hDM|P~<;YH=x>2r6-?i_PD!L|esCWS7c0hsa)1Xw&iw;`6=MPKD-hL_$wgRT%iiRVSvd0VS19rHDc zQt=(!XMy`_uQqPO-w>@1E~tXtZ?~ISg1@?% z$7pG8$*+qZRKbeUki~j6@nXE|=@!2%m5f#-ql;NBp|dahri!sD3T8{NI$WResH zPl8DM*&q5sewwZsfvaS&tmB2ne8+4dJZyi6eh)e1SjNYCo0RWUYrWE#3}wGFd_bpw z&N7OXCsURY)*ROhAVWJ~nVaj4kx{h&+(@X0xQ8Ft#vr=HXXZxo&PO zQbn;_&RL%n)NQ%|_Y~NL&P`vTZOoq^tU8H28Z2Xr)2oVCVwCSh?0(@0s*9wrFCssS zbwMf{EkslXW}5ZCu;RD(eM)b{8fMakTbe?=3#W3gUoTY%Vm5%4wq)r7GK9V~s zn92Q3WKT`eb2?tuW%GT!my|&YPT^dGyq(J~LSUO?3)xW*BM?gl`EvWNT>)iKhhrC| ztdFbN5$?*MWp3uQru^TheQ7^rB2Kw@n3#6t?yold2qbeSC9RkGB}>|~?bjd~gSV-u zhD>u8*zIA%92dB)I`&53psBc=PgqY|xb5Aw9#U=3bG3>P$lI*7vsgTTe1DN+DjeVs z309qfOKSG%SEBV+{?He-p;f0k|_@!(_tFk1B^% z=rW1jRAdns%jl4_+!`n1;Cm>C{XU{CJh+ORbZn8V{vs<@VF44U6b6px2Un(hH2KS; ztjtGvV1Rv)-lbNpLeAK6% zk@RIAe4B}E?+Pu`c21`*D2b>DEaJpeD0uu{3AvCIz{X{~<}kMMwl#n&sp)N~A%q%X zxotq2%+vO)h#u*Wd^*0}xjll82*$xd8+3SNRN-F#-B=!XlT@}J$v@&lTrh<->-TL` z{y(zLIxMRGYxjyEB_%Bl(%m325`svJ64D?b5(6R(IY>81cS|S@(w#$>NcVsU3^8=f zz}YhkG8zB#?B{Sc(5> zcpSj9#<~8>}uO3k1mNWyAQwm1eqF89EXg(#=1w zGlfkWI%4Ril-k2FBOoxhp>Di@s#J(FhG10;(I}*XW1QAjG}Y|O=^sz0b*HH)Z9CE~DF<_Ge!I54p<@@y5WJwTGPiAA z@zNA@5ho_i^xAKGD=?ORn{Obniu3*<1mo560F8O+WCj=%R1(36Ks#TI;6x(!Yfr0f z_sDX~W~I<|?mO{z5AEIEY8Jpl?I{8CZ?vAN^*PM9Xl6*TCL3j_*EwDctNqb1m&z{!Z^K2W=uE5t`XgW>BPh_Lyvkk8V~~mWd&oE@|d{Q_I%a&B>651eyMn z3J+FNsd)1Zdzti_sg%!w6MAg^lwaXy54)(}qJ1TId0l94GVkwUQWGq*F#K#DBnJ!A zA8`59c*I;d6?)Vd`1PV5U^8!V^a(}B0QE6|!YT1jO2E#2aah=fPMZQrxaAdtDorDP)otz-)XnUjW=1%3#oC2fQ8RIX znp}Fw*?x-U#e-fZq=d%PGv_^>29l(Pth4Ve*+g4&LN0>^iE zSP(HD5{7*JkTB3_*u-Mp9>VIR{KOH5_O1LPZy3!vz0AtC@fzQ=;kR5X=KDCz6|}K( z(BM#|gTz)|AoF^(%$FxvdSsLl~*W^Bb_1oLJzM_V@ucM49W$$Xd&%NqDvh8p%IA}6PDKa+>INVOC|`64qm>`zoM{)_a8PlIdr=po3-za zVjKl@#H<7paHV^FNUue(a3qJ?05^Y-u~$PjFH?^Vu$@!u(ZVcUsWg7II5A7!e@niD zJHN#>+g+dl4LCdw2>^GJZnC*UDQcy}Rg=`ZS}iU6eE5KK6h0%D*%lv&`F0A`NSWVs zWd|DDPEqPHU%+dv1977H{76xc4un#bs`{&JG3lcBD{1C6?CtfJaTKo47!s<>s~`JC z{UmPCgjrHzAfG$;#x1%3GuYoG}0;^wkIi)lO<#NdfEvu{C7mu)B1C>P^A2% zf=5G;2GlCODIUzw?0n-_MLI|0%FmavCB0q<@{nKUdC8FdMMxBM%ul40v*?B+<-fi zoc7$D-sFbph$8!fT8A3a>g>EyNq3A`pwEOc_?!~VC@;HSuHzGo?CbAa<`brT?K)r7 z3|Z-UY8aBcLtMZ)bLwOtZ#l`EwaeZNIf#jshNzwINcCmB^WfRRB$9$#xQF+4pl7NE zD0vk9IQ3XN(>XR`tEh$?P7d*)`k3JTLF-7~jfE!mv+Esiy%Yv!qh`kvNsEzMLa``D z`WXBQ66G+9SoWt9a`IFgUl#T~`}*Im%~QNGrJ&K)j|pu$3cJ{EJGTCn?DU9Q(ho_N zakuYKjhygync?waug0`)evo-E8ycjCVg!p9Q_h80)b&Wf@~|zrEveyL+vAY+5G>-p z77gQvxUjgY2)i33jUwtfA!N8l_~1*)hYhj^doum)wM80KaO#-GS6@h-`ZXGU1$)#8 z=6M%vZQSgPsV)#cP^S|Czk5*hTz*Y-C*R;`ScfRJjUdKowaa`FZOf~#&fI{ZYVCr;#A1KInhfR~*v4@4^ApXsKHiQ^snmfJ($#I3v#2B#jouR6 z&u}U^p%LgD_Kj6R39+(6LW%I$)1|fc_;5ubx$0OWklC++kzKgrSM22qjC87iO;Pw0 zF6>WPrq3_XyfbIUq8WPEkR+4{BetgCB2cS+ixq=}U2_m*N{|>pliWW+N~w~BQTy$E zYuoF?ESNFt-|z0pjh_-;PvYru5dXytE0&+q_G+VJ|Kv}7`sr7=2>c<_7lAKF#_f(T zOl*j`6ANK@3l}e}8f3jYX zv7=t`+O|sNv?47T9*H4P-KegJ1Ke{y=Qn;TVx-Rk-J|>bULXzeQ}abU$Hqbb(E=W& zsrgIEtc>@>+zWf|+I@OCQpLVrttd@PQ)zv>;LJP(+sHDVKAVJPkCL+8s+mc24U$Mi z^VgJ6cFy5tV&Q?fAW1&D4J<`koh2w0DwpEUwB5%}-y&OpHjaN;dKSzUcmbT8q@mqL zGTe}!*m;=dU;A{ZmwcomDZo7O6(pmRGEkC+OS`4g;QN-IR--B&t9^1YLoqIS1zf@$ z=|ao?ZjN_9yEl!c2&d*SwKqy-t6mivAHiSo(T#Lb_eXK1f}gZD`{IC`!7_H_0T)n${*j9eY0*}ba@fuiPuH# zQBCTuyFs_resXAsBNc}7WV+zhclhE(yuvaB z7P#d9?c6?MS*daFU3z{n;-0d8Yu(@c*Bn1qI(UoKgH%JDlC)M`0LCh$S`J!FvmcUI z)^f5=`Uhs)0W9LzgmO&nSQe}CrQc*uidXHDn0zzB8AWthRFfM*Y1!G%#C&s?58Wk} zf9MtYUpf;zJw4!#$B@kPn8$I zVReDC?ZCbfcCW40`A8#OEJ`i)A~+{B&t&$kZi80aU)*QF=ZeGrpsMD7r!SSS_OJ7a zO$4Wl9FQ-8t`>|XWZ4{55rCTo(O{(aGrG|0wlw=}s9_b*xVt;te)T z$OPH36bEoul=Gnkn48U=HP!I+LQtWGjn%zNO{I(+o+$}#h$Cp^#)9;DF|`g~>k)Lo zw4pB61#!X9ZAtZjbU|R$n#U`D=7XHGtLgyzaQRXFRUL)f2+623^KPOn38-M-`<{cuE|hRbBHBq)3_r~X*+~@Aq3n(WgI`qn@cz^ETFFIkK6&~c7CARa zgGG8C`6;l;Boi?F9@g`&ua7#szqf%4eI8dAArpa(_No3jNOhg% z%Ln&3&RcIYI*U?5V^{)*R56;gdBy4Uc}q0uvuF`p<~?nM^N%jo}JA-+uAG%a)39;@aOVGgt9hhH=<&LqA0REO7 z!GxQzOCceSW2*Ab{8ou*g}5%)d-h2J4_5*tnb42%&8r3o+HyX=!L)e6s9q{NrNAY( zil}b<6T)wGo$ay_8n5R@*Qg|9`YF}OAm_{bmcPtlh{APBP>Ci9tc~KK8%nu62 zI;efjRyAAYLZAv=#S%^yv<#j__tE%;K_uiW8KoaotT4nIE;hIv1LJkI722jt`~d}& zq^WA9$~Jg7bwab$baA$-E9Q|?OOjN{^ey_kw!M&ZM>a1zXwjegekLw^IKiaB={2T{ zeY#Y9=)t6_OA5S-Hr1?B?*8xuU37}0iTH1n1la=uC@%>=)w`P8C}t{9Zn0HO+8-)X z2$;|Cnfy!a+~gA)v0`R5$42a^%zCN%;RHP|F~Ny^9(vh}_G|X$&z?rInTKi4$FeWe z8P?bQeGmrQM2SQQ4(?`)S?bldLIOBP-MSQ(P&F*MtfoV5uF#OHr@X4GHi-<=e2X+p zNi18Y{IpMf(PAdV_S(-#Ab!R9FQ%BRk21CfjUERwGj7Rh9Qlmd1@~5 zduzTkOL=Vx+hBHkwCH(-`ppKx@ld21PX|SP8BJkoox=P?tfy=^MPuI~|D_-0Tvep7 zbhnJ=x2T1GoL-R_o`7q3Y^;=)|GjcL#l^2a<_ z-aM=|+tbaCC$OkA8s2}V{;QXZVGWl}ekdtA!4jJS2iAw9dCl)~&dWw?&0%B=9A8m+ zXL&qw+RwO;abpCoJlV~icqKWx@@E>?^`S4HfpiaTm4Xw>V%kvNXTtF>_H-$#=PAh) zU@Wo3@T7OhE@7FPNUp(75mRLd67B*yxT9C%hedJf=&*isntxyjye}2#I!{RzG8B_- z6AF9X(8k2Uki6B_^OLEZB3JXNCT+}5=Xvoi5#?8F{+Zh0u z)Xdz7ClAKmI>>#e=+^u%sKix)PCIP;?E8cDZ)k1n-DtJvCad@DD5sLIjvdeo^~ty0 zNM|jS82vSy=82TrO;vt2OGPfu`6YfbuH_>-^EzUb5r%J6Z8q)&)BlvBHaC1cDAbYFrjyO?Li^-PY#1z zYrq#}?-Gt4`RYD2FYTm_MoE8>?$A)`TJ-+qT1b$WU>0lL%6%>P&|Y6yg<{NW4E3D+ zrk}wlq`l(o=*bOF(7HC;mwefyFUS4esMm7_mYd$m0iOkhf{^q7*Z7fi2C^uy>F%~ zAC5x6sPI4@mvol_&y>V!odtA`QaO7Skjk1w7L z_4a~r%2XVeTz@^4uWH-sAfU{~6rfq8Snye}N%_M7fi{&tfCM#yda~RBTIK*ut<%%7@T-dKe!vvt7t(-UmelAY z%4sl!Q(Jr!t9RXmUgOdk2q^bi(~CoGtc# z0%(zOe9{b&)EoKJ>x=uES4zHUX!HA^!v_wCzqT_?^uIM6}TeehhY6zsKX)w*79`yDxo-%`EHzoN~~$4Oe*q_bvEPfkT!pgLNnB z8BXuabj+=K+D}y{WOItEoiaMa`E@hcw8bAKigHlzeLPJ4{C2Ta&zvPWf#Lr|83KJr9Q}99Vt-&QH;u z&MTr*!W|nF6#gWeK$(GbVK%}DV@_aMyxN7MG1@(#?}LDUJJ9#vB+yOvTmwS6d(cyX zdtS62Fg-`cjJUh#JP>P?-^BPi6n{=JmsObYen{t4_0}-Y_GFRd7KcEkDEKP$QZBFa zppEe=xtip;6V0_bWNx!Lp65-u_M$6doG~GOzSsX=`||AioU#Qn@hXEPOE!DX)Ukcf z^U`J0GdB`JtLDQhc};8gh!fjyv5{YFMx&A?%Cdx$-6kwK{*_Uxy}-`%UazBfEg zbA7=mjj`O;emAuAmK`!piPB;isdN(&NR9dqPCDKERwsF>hB?}L#x1Ct@61Z&SE9Pa zM&;&mB?!*}XHItV3+vugxafoHdXmAH3{EB#Z}1a85}UDODA!9o_G&v~k$V0_M)$=s zw4`kO1~D=IMn{G7X+CX;GBp{WRK|FRsRU@J@D6=6%es(nXUVB;l$iIul~44xisgB> z#>%aIe8|Z6mRSX(Ls^XbtNox)dO6rYd@PRmE_%z<@Wk~4WO-4WiWKJ+NeO+q~tFcx|iTJmB$-*Vfpvawu}D-`%~)Z%_u5S zosNVL_iuO6*XM(B&t3kF{{C0y_3r;%{~!}8CtrIeHA;9+EJ8*qa*BFd zKKbG{OOU43Z#;th9VhX)niKyg?KKBk{>XkO!M2^|0&m+G(_#G|E#SYu%m4HKDj{F` zyn$4C_{4F0G~B3btk9R$iu-56P=)8qS?xxTq4sLqW5P!CKHJeXgIzw{76|$rO)oGE zeb*`41frw~zz`y0Ubz*5r2#BnjJ*ism%Pr>1?~c+tg7N-$Vlq*#wz?%oFmGF(Od;( z^$f$heUS+|Y4@I#TQsCX@BoOz(t}Yy-jBUCn6(|?djntfpantYO}v49H83#_zyme^ zcpBYa8{HgpeK)QsIoSqjIP#tbI$zv`fbx*cna0g$)LseY{Ow}$;frrfxmH#*lA3NJ z1FqeVQfI8rwpJQVV{Ze~rEZlib5MczZ>RSj_D|%)3V>irq+xByGn~yQvY(oG3~!RF z(#7C*jaB*zI&DvCrP7af=drm3_*5kA=$*EK=R zYOzK8zGc&R$4ILKa@|SrK0W3Cw9)Tu&~BbC{jNPdp>bE&_IFECBud~!@X+bTe^S!p z=o{(~p55i_a^!Zj!D%VGHg9m5fybN2REMlZH`7WgzA^_@Br!m*el>`Bmj2M5G{5+? zOw`3cz3g&cCN%u^ah>gHF>ZH7?D^lTgc8h*sQM3S?W^pKn}2o#O;e#Q=5M#pCgUd@ zmt&l6*5jhZ`3}x=AX&+GhN<%vsEKabG6xW0rQI7I9X0WR4Gt}3C(Vlv;a|d&J=ir3 zp4!bgB!b(F%|E~k=_bszp8VrbzeO75ixe6qw3;61Dp9wLyyX~6t5Hl7Dd*dJHQb(|#E%N6}X^KN1hVo-gk+0^5Ux*R0`bjF3fW!2ABM%ihOkTMj@p zG2u>_SiCy>NX=5w^BY}Iq}G@P-I2Ql(#HOr|BJKo@9)-e8TloN*W}K?Wa5lX{ z@udXcXw~teo(zXhkq^1?Ebwgxsy9o9wf6>eCk7x`^}6~-p|mL+ z(5Y=&aVSTZc9CQEQGRg%$dzgK^8(=5372QWG?@7Xevuk_O4d$$D8oW?&rV~oO_ur*|%cu zJ?e^K4b_`OsC*sT{W5u^BDJU~5qDf(*x2P(F(53OY)qt4^;+L>_Tk7k&KMiLS&m zif(mzx?CGa1Lh!pz+R4X54~c8y|^9#ES{Am33R!%hNkt<^f#sRx_|if z*?{G|Fcg{1XFUH`;4C12hlAf2Wm_N-&GKL+ljAPsC}KPLvP^k!;ICi}mK7@`6#8e* zXGlY1-)^b-JSUP$^p_I*xnAoppda_yXLOkU5rbHOjQ51FA863e4QYY%JpO8j1Q}%!z-0z>Hg*jONEbEKU~9v-4h;>lNiW zm0rhj|4`}N-Ngg#94`T8uvzS1YO;2K>$!ynbeZD;+_rzKEMc2Xd9F__>yFsG13VL* zgf@2Y0gG4{TlqCtebUg8tXV=--^4l27C#3a%y|Q_Y8SZ7bjF>=h5LV+rrE2-I_j_C z^;A7GN*vLcp*N7mI_nRHXkcauNE?jAq!zsC+a>O4rRs?Y}yqYf0%>J4KBCBig z5j@A$$y_;hINDViA-L4Sb9s|8>5;dQt^>7wMI9G zOe?a;z-_<>efi$36?-nIJ4GGlA7FI}I zZ87&j=LQ&(MfUH~X{aN3NLQ72*Ko?bVaGjXvLDLl{9o_#_&`cvqhh6RD8F`)rgyvF z-1z-gQfjr$HLXhvNT=`-rl9i4chbRLXEZ<$T|vC2yVcg5YjOO6I^1(QZbR46MEXM2 zK3=R+j))UM?+q5N)EHV`Za0(YbN^RuAMpRe_FqNp-wX5G8_daXQ<1i9aSZnX_fiag z)o$$6J2;--P-Pv346jS{lpVW~?L|C+RfI20%va9LY zzbaIR0b`EbKO`Rs5D5VFx-_VG$MrwzxnCf2&f5Qcdp;cC`tp=}Q$z9k4ol=`hB@`( zfANX_c}W8?Ny9iy&}Zu2wyF>^$fj8w7gI+lngLN~&gv`x2g3?@)gp%9 zhi(CVE*9Qef<+m}FQ7!li};d|kjOt%N$Zykm99;&1%lQ`=V|I&Z|T1HygugTNi^F2 zs%yj{d7N))ZS+wQeL?b4gZ{KAbIQ68!XY>*>3-Pn@td9fA)5+0wk4K#>Ki1Cb@>4{ zVKxNrWHjjGcQs+beRtCkbKgOD$n)^KyVHEdB7kW_`f_+z5Kr2>=kr}RE{GVN{=)nd zjeqd6&wjJuFzWPf?WX>uy0Y?&vcj4oU!=1lt-TR3J=!G3Z2?9~{6AYo57 zJ^gCr;p+My?u+eQVfJ|6%L|~8j{aQ;rOdlIjDNjd#cq-$r%=S^aTdp&nd46@a)5x#&h$hRpZ*xEs2$wV~B*E;$m>bNS`zl0;orIM`(t1ZE*$)*+N9=tF=20j`aL#*oAWhX?CGTP?%9KlWB%m9 z>H?OHlZlG=dq2WnGu|fpJM}z8M(JfGj_ejb`SE5=%;V}i_K3LKNV+Vun5Ln)TKrym z;Sk7IJ>fsh4h!9D?IVD5kfSJ%SX4R&21V7Nqk@ec?_?UpTy4Ult`?=+Gn2?v!G|{bIk{4c^f^^Hacu=wtYf< z!m4+@#5a9_lZ%9@pGfURL@PpZj?1tEzcb{7b0W zW}y!8y+kTp0-?sEo?jKI2$&V&r*mWR^frIVeuyc7D`lpytJPF2TZ`_#J}oR3uz+(Ov@glgmzWw%moIPu4<6A$2D0Zk_H#8>VPY)9v+gd< zgX4?I#`Xz6fH9ZE%@>8-dbUf8wq{LBbU*qHPb7*wO7N~M3H)s(4HZ21%1>Epa!9Uh z&V#g9^{Gj&{1tunE_4%z=hY(wkmwv$E5`vHb*FZf-)9?E>z{X{;FF^JN9o>I5uij#~lxMwEv7q_O_f=5%N=U!zB(1ZEfF~lO{!8c0}MzI@4-bI|Mj6AHD;A`ig=~r`jyd(SfDsA-c zHAf8OZyuAfYouL01fwv2`)Z-T5=yCb&}7)M>pmx{W`ERE{*+B|DtC#u`u#RoKJyD} zb7t`Z@B)>YtnT;cS!m4o9QQf7?)`Ug&YjNv#9Rh7;pR6uA);0XfM>^c43Kn^7<}J_ z)!JTjoZ9lw8uoq>_esDz)6J@v%9cczvbG+!#z0 zZaT0exhJJ6=l$e|W5-I;9`Gqs5Em_EY1bfloEIUabgZHQK@D&4 z<#i~G7bO1))!>5A!iKdR{Jh7L6rxhQl{FxgZZ|^P<1^zBZK6Zi4LT&!rvcmNvBT^N zL?azpOAQY#QQ8|-@*pQ0D@fk>x>zo)HUCzGz;nxEd;Su4d-}LN)7#A7YgsxOI9Ro0 z$e({%(DDRGcjV^}C-K&V1-R)0p}W_+v7fs54n7cRGdR;(&L!>;5YgAg5WW4Q)k2>5 zEpUC{qAs<^Fz2-kOb34ND|`UFtp@KkVcWPv&cr zpqY3g5B0ElE_LaS;1=43_mdj7UPTK!o=sGHsCSO(9j{Qjlg$Y&lFN%38Lcc=`2Z?h zB#&w~VHVGyIVj=WcW8c=Qj*2u3&*p<`?4n_ZZ>Rsq-Jc=`phAdqp_LEN&}dI^=M@Y z1Ckrh(mRNX&=Lt$xuvGm>UW=jc!C zFo(vr!nPFCRg{*?*I`O4As;LsyukyCk&jO#rv`n*G|>;$i2J0E9%Y^2)@eRU(A-zsxCHn{47O4>X!=;P&%vY7aU< z_+xzDHd~deMtl21k}G=hzTZ{|!>x!T_|$=Sb10EGp#(>A{aQgK=r>*cN{1>=#9?@) zIKYeGe0}lJk|7!UQri2X*va1Ku=2ao%SL1{hM>O?r(C~JAyUB~!=k4+RX$4v+r0RD z38qpgv}ybH8>*FcW00GVJld9NwZ`k4Z}!$k!M=hq*58l$v{qd)j6!aoiE6kDdg!8p za3%d2Ls?{^vl%egYRLNiRRkbeQ8a!O7vP4RzGaJYPxaW?okn-e~__IqPH zER7+O)}tg@}1fmYco+r|nb z6-c#jMU^$veG}@F{o^=cWigMn-t$NqJgC6#sgjF%r(f7OAjDdJ(|i(N$U~N}0tAvB zy5^MIO86)fG*jB$kXRvD5lNN_DmoOn;kazB9QRi@i?Y47JWr`<%D(6~ehP3hsx<3t z`DC-5KFk-X<^3Y?-=sV{&}h8*!+SweBFw>enM?F>zNLN&@W|VGEz}<*wV~n58uoc| zX!iJLWPwC7PW7ix&}}nsJItOVd!)+AreZF3t*@h(J}IXb2G{?s|5prtEJQtDvIi?p zciO9QsqYLXGTp!V`F=RXWR-+@X^X21Xl&9n6}-;roUX^-{-MEQcy&n9{R|(y3(zIw zYWFSl6Y$+|3T*Xlx6H09RL3k#t9J6J5N6&pJ2q{5?Vmx4q5ANsxS3wTa z#Qkm3-3!LJq&Mq8f>lZL+_<`~-CJ2w`-XGs?pe!OvPEEX^x4jc8ZXPM2wcXC57$y~ z$n^UXYulg|PBMAO#@n`=sq?jpf7{J=M}|BO0c{YM>sP(P@%^gGH^7Ius=ip)n2Y74 zI@Vh31ij7cqh92l0Jm)nVpT;|MgHoi6r;6e0%0afl7Z(cp;>{Z@&jTQn&ovW?~;@- z`$Gy|QHa7Rtor!Tj6PUWt1_1AkyU*wAta-(i>n@OS_1;-gv`cLfI$Wx zkFEg(AMzF1(I7?A*kAF^_{;=AD;L~mjH#}ab@|AU3;MYHqk@|^7PWV{QNV6eg`-GerC3pkp4?-?E)5e9dm9x8F z){?ybzV6V~a-Z69Ed=F zi}3x-cB1fqhjT`aJ7ro!z*r&+pclSerke$hH6ZS4VPz1rXyR^><>FmR)OOQ30i=8G z#o_2Pp^H|^K$T%bpLaA;`ws7RH9Sd<823-jwUo{OO2l?`r#PFYG%E>PbAy*}5<01lyKU*J(;9(SZ*m~vQX_A>bAR{%AXXi+c2nksCQ zz(>!@)~qhk*2AZZDH$l_hR?hKBr>%_@e7_>RpC*$*{JWbyyZ?#N_T9Tru{8jG#<1^Sj-$HqTt+>g*r>)4U8yS@1- z(LY&BPw-*9sr_X}jmxu4p+L$(xxDS_6E%Vh*3^FTt?FDB*g$+B8wEbq(Yp{-Ir30O z=;MXzC&TZ%n|&NuqNFc(CCPjPOBiUP#Y?ULvr`E;QY74jYNzpuH15Eds}VW35_LugJh-yS28vm zQH+a^rz`1d&p5h@rJ~^;@X8hE0U}aovL4>0wmo1sB(Qf7l6@nh-BXZB6|>J$eEV}S zl~%I4smx7= zc_J*g7cbpS>T@&m`h#A;iVsuM-d8tb!LR`F?_OP1HQ@Z_i%)giUm^ROR(j-UBXkq_ z$W!OZgrGv>ny+1$pPNgbTKh+0NYnCdtVBGpRxNl%CYlXt2p3>Q_->idGiMus@z)^P?`pygGgt1}v_tAypkOiM~dIUni>?vo{+I)B#{ z6W%3fXb__^X76TgyM&FHrL-ra!6U>|3$~19jo3?4pC~x3Lw1ta!p4A_>R| zn{$Lh)_GBe%d=zM@omp$AZ{ST6Gwb{R(2RO(lhYM zkd{;X%`2VUa$pP_$e<6~)pyHo*4LjgCT6*c9up^ak3Q(hGu104xe`~cvF68(ygFK> zRBoQml;m_D)*PaizHln7xZ@|@x_VlDsD`^dw$G|#sPoM{fZO! zzLT-3S9{LN-FF+tm^MzwcmIV-oVF_nkjJEO-{-lZ&DI{c}jFxFGn8W)ZYbziWiu zf<~$Z-7)WKxacuVZ|)o?`KUQu`*X-($o#*uYoIcty1R{QD=~@tMXp!mc;|!(_)_~* z)fIRTkF1{*{(&Z(a#8DD*tMTLxWDrIpO=qeUsL`vPBFL44G?rqtoTBCh?}(+&75yc z(J)L>$E6+3ASwS8;_C-2<5dd%COxCtkm!lk7&;alAq1D)xW(4JGpz~cnM0w^WriC2 z#%P;83d=vrcA@<0gkkddw3J*sJRrW8cZ`KV#qIQeinj$}} zqg_n_@y~Ay*p4yForz`Uh)a2t(KJf};gT12e=^72aqV!|(gryH*z@CSv4Y4ZbM zOi(BM7uaVegGszDk624w6>wpMCei-9`VB^&ZeH`KoZc8v8QG#1W`41JszdkUKG&~3 z@q^}9D?LM6#HJH$Dlo2iaARW*^1$~i+fCOddqgwiPt?}iIDj!d;PX2`$aP+CVge`? zie7FY`O|DS=DLmM6(a`0+f+vLZo0M>xPqMbPFcFBome_F?I!$+$J6COe9vg6SAp{{ z@x1FT@^-W(q7|*Cb*-qa34&fbh^GkfOaXg)isL=sN*37sgl5$}vk{(Wxur4-pAqh_ ztKCUxYS%`}3?aDTgzjIw%C-;mKYYRNUXy=nLMsrG|4&^&Bh{&nt5GLQp&zbp(#`u+ zKgFc3&C53k#t(OO`*_vH0gMe+9-}(Y{9$+sFlY=O|qGi0`0}LU8)4SQ=l0X73VQ5!uGW&oO{E@?x z>T2~*wd%gIwhMAcO!(E2RUF&^r-ODuAHLU(vb`Kx?v*7AHbRmaqHZMO1|yjdS4M<; zAMZ(X@RFr9)PreBf9n<4l83yO97dH3_apgGOu zwgfQ6Lu9vphdl2Jak$C+b+}c&U|%Yg9uBs{iik0=aV6)qxn@BQKD#tFATQvOQbAIF zlqFf1UE9M>=@x*P(`x}>-lOA+5>`GM>X(KpBrhLhioAZoNX|km&a1)2mt!qsj`Qg6 z%)40!;ei^RNR_R^ky|C@hL67=nzK&5`=*{g4OMD4-J4S++)K3!T{3rjJxpRK&4gkO zG*03!DmAVrFOeX{Q|+3^)hYg>wxOR{=dl_>N#Tu*Mm7P7PU^!mcIzTi<T47ZJ7I=wcO0<=EO~bZbDc9I3+Oery}Pt zX8DhU5Psr!t$uXQElMsildSuJ;N9S{$k(Zsic#q;oFWSiq|TI92v zCRSU85B!_Uey9h|`R9&qF+bqBwm*sSM=BVa-PD&HRzcnN=k22N4QtL@U1r)oz<1ik z(-@py0RfQ1ZZ;0c9XpOp`S*`S>OKCp(;C1bWFGf?t`XnqE<7h;z)Dw~;-k1s_L5Ve*F?0Bq56dNjCo7JJ5u1b?PoRc<=|_t5w_z2+egPFLjgB1dUe9h#7ysu z{sz&VtH-Fyet)(5OzCEniZ{<7-j<~VHrMQK2J!S~a#nr3-$m&cgP{#uI< zgJ?va9^u_{d+&(`F8RKbIZF_N>2z0@EFsYZa*dt=!&%p7XUxOkGt<7~G?)Dc)|>YG zF1-Y-;xicfSR!soi0Bt?Ht(b;7+cW7mi4a3?Z1LkRKLKlP8af9=e-pTdyoM)4@`c- zALFzCgp~J$Taxt|ia`WZors;OhQ45b`s%v=p7bVF$L#JV&8`aY_LYV68r+^gwOwe6 zHb}5QusHsE=!f406X0ODSaa>Xu*S+_(LhvJ1>N}+)*2Y(vARy(^)7XQjOVoEhLq3M zXY12A^5M_hlX%Gq`BOXdmc^;W`JoDe9ZTlegtUazusQu|!>>bEbF7|+<0Q6oK+7G~ zWC;woOaOsT_Yz1Emd9C_G~V_X_HN$b^bYx&SV5@@p&eJD>4--Bcly3t!?^4&; zO-Z=Z+fGV!#VSg19!aE*X(5Hb_r(4-1L8}$q{0At@c^@D)@Y(Xdj_d)Fn@voOtF?z;gawt@T9m^5(?-x+-X}|z zZY^p*^1$g7e)?!d3#_J<@&+p67+td30Gh{lUv%>E`%rTpfGZULaX)XfO^Ph9K^VEI zPXQsR4qycm%K9|nV00zT`altjuS71uwtN-KtT!}a2^$&h);eP&8G8VoZ}n=2v7u}L zgSr389+9_Mo#GQ<6I#>v6)a%Jj@jHC!vNykH7aliLSHG zX_9}n-MQ?g+0@D3GqPkii(gd-T4;8N@Gt9U5IUZOROx(Nomzt(h8oSl zI{oNc4;NO8uWy{-fgOO2#JFw#<35XZh~%cm%V#|{VMwe^{7X3xWs_C@^16Z)y~Y=2m=WU8C+Oy*a-0@w%Ie6UAaM-~aPb|?FHefzmynO+?H z8o&=?uN@jysM^1e3uR>AqAa6iOJR_~2+3`$gj<#x!brI# zGn5b`WSKA&lC9CAl9DNfn28BPmWc?-HufcqCEGB+Yx-5s-Sc{$*ZJejIe(pVoq3&e zUElBL{rP;^XbRl-J~)2FZl;f}1&CjB$Hz$#<9LO^s9;b^NIKSEeE94u+n#4mF!8XRaSs<(jv}X)fSa5@lVl^0OC*kcu17LZ@!8)ZB@O)Q!ca zj>P<5-6W9fCGmi0dTB6p09cIVI&)0oE3XzW?lPt3t2ls)(*1j%zLA#-SX^#ha~;9Ob_{pG z%`>4DDZ&x>1*rEi@*QM=h4@(xNpVFPnrZcbz{SSFT&cXdVH-t_jit`e1=POFyDp{X zlox&gVpv{l7=5X3Y$Hg<0+d6il4ox>|C0#~9qh6r`}%H2e7@_$6t)@VsT3|rc>BQ; zHF#D0KtK}u(3UH$x31tTPixHS^b1Hk3`!hlJST)+-Y4~!5OrLY1+)z#Fegiam&rCN zlhL`Iz^MV2x~fuGmBE*NJt1Q#9ef216bvc4oCZ`R4ZY~hAM)ZsSj8~Bd}aPAC#%o# z>3M!doe7{GXAbl3+Q7Amz9%hss5lqw2a1S? zc;%uGn=Ay-uy`8Z<~W;?q+NoH8`o}3bq&HIX;stRJ4nqpFL!=Xxnp-NweR&`&~($S z3m$&)i0suo*;jly2zx$>%6%7%h0_EX!8kGAIrz6^BhGwDgahM0x6MXt_pOS z9$t)#hruM?2?7n$#^bfpnYbdVYTdcA6_T8+@=J0VnbPxkeuMFNA+d80|ZT z>R=oWcqXW|L8DVVVb>;vr<^#w)5&i04_WB;kW%2VF_@EBj9Way{ zwuQ}NLTlbjHD<%Mk0^$#CIrmMgSmtt^)!B{W}pE~C~6m|&bw@ko}Gc)J3gG&b^ho) zq{s5`*$C|?CY{~E%3C2}V%9D)rWL#;jW1I@pFIai482JMKXm8G&0OachUk?ZX^Z8^ zHo<&jzOVILzKS$>zMe{(X4$zIDYzym8>`o+Ll%j>p@JD(HcnY2E8vHuE=VmqGaCVP z%-A==wzEzo)_X4ImZrZo+GdQv2b#bAOu@)3M27-ZsPyLGYO!ONCLxO~O znw2R@vf!Spo;L?3@9Ji?sm*3EV|=sw9o#FyjLiEJ?dIKa0uZp(L|rpbH&^j_`=nSRw2mbG+yP0WC( z-{7kJ-2wvH2ajaEVJhecxkmy>U55lztD<%;uePZ-c8@gTBi@>~RZ898XQ z2Ysljv?bDUf69lSVtWys<)z_B|U&n=eWajvJ(&RXS8==cc3@21)=i(#B8C5qScd_x7F`hQc*!Z$rQE zV)malpp?YG&-g0I=s%VNYY}=|#4Tvhago`qbONL@fc|mMVaAx=Ih&HI7v@V>Av-MZ zt>`%P&P5+Z=r(I^>;o&A$3Aw&E*H8KpZx}^F@#qdASlA89Qi6+rgJWls zQ5yN~{4F`Zr1ic5wXbg9y19L=EDe#Z}}mGHJGB`M9=TT*uwy{myNGldlqz%(v z$_8tO2=7*K<5fTD`ujE~JCWuQb-q$vW_@x04=TB7? zPZ{p_on?88QUrD1kq+|I(gYpbs-QOzN+x0JI9CEB1uy0zr1H7|`tBXtTl^#>VfbJ@ z)c9C!uY2D*e>77RnrieFpqJ}O`tdT#7nisQwsFU7Dltnl2*-v)KScAn)eT8B$8bAm6a%}3oO32O11r6hOj+f!aIKp$ zCv0=phw@}?Y1pexbLsuf7}48;443;C5>?$jzjhMlg& ziVpQ)a;n}4XRMUdYt^+*vECJ}oCd-i|0G!7qE!h`uu^QauL;iN9YM;kHc7pkrJDV% zq7sLKI(ScZPr=7~FruHmtMudJ=smim5ERqYLjt;*5}Gfx+tnXQ3|C%qHa8ZqNmG4O ziL{()ce=3D*u}ew*}dOSKLIICcE~X%Ok&PHNO%F*Tzki=Q3J z2{!cC=1~ayAXhaL?hW%t^Qx!#!r0X=T0vJa{kMYhXxuEr;^3;J9%eMyQXeoj|*2lF4ye30wP|U zKJMmb{qxcO*yV6>>sbA96_G3Q?n`=(FT@DNlW?COH#Ly;ka*YV8`FkU=v(1>X|drpgFw5k+jFcX?4H_ao?T5va&wdsca6 z+MfzSn0Vd30M2k&{bgfTWa=LEA9?W^;Q#;t literal 0 HcmV?d00001 From 02f76a76be66dbdbefd6ae28adfcf97cc415e191 Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Thu, 28 May 2026 01:42:55 -0700 Subject: [PATCH 10/59] fix comments Signed-off-by: Shiqing Fan --- .../distributed/distributed_data_parallel.py | 8 +-- megatron/core/optimizer/__init__.py | 62 ++++++++++++++++--- megatron/core/optimizer/clip_grads.py | 20 ++++-- megatron/core/optimizer/optimizer.py | 5 +- megatron/core/tensor_parallel/layers.py | 9 ++- megatron/core/transformer/cuda_graphs.py | 24 +++---- megatron/experimental/gtp/__init__.py | 57 +++++++++-------- .../gtp/generalized_tensor_parallelism.py | 4 ++ megatron/training/initialize.py | 12 ++++ megatron/training/utils/common_utils.py | 3 +- 10 files changed, 140 insertions(+), 64 deletions(-) diff --git a/megatron/core/distributed/distributed_data_parallel.py b/megatron/core/distributed/distributed_data_parallel.py index 846c27747cb..9a40ae88ddd 100644 --- a/megatron/core/distributed/distributed_data_parallel.py +++ b/megatron/core/distributed/distributed_data_parallel.py @@ -6,8 +6,6 @@ import torch -from megatron.experimental.gtp import GTPShardedParam - from ..config_logger import has_config_logger_enabled, log_config_to_disk from ..optimizer.param_layout import FullParamLayout from ..process_groups_config import ProcessGroupCollection @@ -145,7 +143,7 @@ def __init__( # Non-GTP expert params (biases, LayerNorms inside experts, etc.) are # REPLICATED across EGTP peers and stay in all_params — their expert branch # reduces over the FULL intra_expt_dp_group at line 263. - is_gtp_shard = isinstance(param, GTPShardedParam) + is_gtp_shard = getattr(param, 'is_gtp', False) is_expert = not getattr(param, 'allreduce', True) if is_gtp_shard and not is_expert: gtp_params.append(param) @@ -605,7 +603,7 @@ def hook(*unused): # wgrad_reduce_scatter returns None for async RS and writes # the wgrad straight into param.main_grad. Skip the assertion # for GTPShardedParam — otherwise it fires every iter. - if not isinstance(param, GTPShardedParam): + if not getattr(param, 'is_gtp', False): assert ( param.grad is not None ), 'param.grad being None is not safe when overlap_grad_reduce is True' @@ -731,7 +729,7 @@ def broadcast_params(self): """ for param in self.module.parameters(): is_expert_parallel = not getattr(param, 'allreduce', True) - is_gtp = isinstance(param, GTPShardedParam) + is_gtp = getattr(param, 'is_gtp', False) # GTPShardedParam holds a unique 1/N shard per (E)GTP peer; broadcast must # exclude those peers and reach the FULL cross-instance group (one-shot diff --git a/megatron/core/optimizer/__init__.py b/megatron/core/optimizer/__init__.py index cd19143196f..e862d67728c 100644 --- a/megatron/core/optimizer/__init__.py +++ b/megatron/core/optimizer/__init__.py @@ -56,7 +56,6 @@ ) from megatron.core.process_groups_config import ProcessGroupCollection from megatron.core.transformer.fsdp_dtensor_checkpoint import get_global_unique_param_name -from megatron.experimental.gtp import GTPShardedParam from ..distributed.param_and_grad_buffer import _ParamAndGradBuffer from ..transformer.module import MegatronModule @@ -347,7 +346,7 @@ def _get_param_groups( param_override = None is_expert_parallel = not getattr(param, 'allreduce', True) - is_gtp = isinstance(param, GTPShardedParam) + is_gtp = getattr(param, 'is_gtp', False) # Create config_tuple that is hash-able, and has a consistent ordering of the keys. param_override_tuple: tuple[tuple[str, Any], ...] | None = ( @@ -1179,35 +1178,38 @@ def get_megatron_optimizer( ) ) - moe_param_groups, moe_buffers = _get_param_groups_and_buffers( + # Expert non-GTP params: reduce over the FULL intra_expt_dp_group (includes + # EGTP peers), because their wgrad has NOT been pre-reduced over the EGTP + # axis. Backed by expert_parallel_buffers in DDP. + expert_param_groups, expert_buffers = _get_param_groups_and_buffers( model_chunks, model_chunk_offset=0, config=config, config_overrides=config_overrides, - filter_fn=lambda g: g['is_expert_parallel'], + filter_fn=lambda g: g['is_expert_parallel'] and not g.get('is_gtp', False), buffer_name='expert_parallel_buffers', ) if dump_param_to_param_group_map is not None: - for param_group in moe_param_groups: + for param_group in expert_param_groups: for param in param_group["params"]: param_name = get_global_unique_param_name(model_chunks, param) param_to_param_group[param_name] = param_group_id param_group_id += 1 - if len(moe_param_groups) > 0: + if len(expert_param_groups) > 0: expt_model_parallel_rank = get_pg_rank(expt_tp_pp_group) # Pass Gloo process groups into optimizer only if needed. if use_gloo_process_groups: - expt_data_parallel_group_gloo = intra_expt_dp_with_egtp_group_gloo + expt_data_parallel_group_gloo = intra_expt_dp_group_gloo else: expt_data_parallel_group_gloo = None optimizers.append( _get_megatron_optimizer_based_on_param_groups( config=config, model_chunks=model_chunks, - param_groups=moe_param_groups, - per_model_buffers=moe_buffers, + param_groups=expert_param_groups, + per_model_buffers=expert_buffers, model_parallel_group=expt_tp_pp_group, - data_parallel_group=intra_expt_dp_with_egtp_group, + data_parallel_group=intra_expt_dp_group, data_parallel_group_gloo=expt_data_parallel_group_gloo, data_parallel_group_idx=expt_model_parallel_rank, intra_dist_opt_group=intra_dist_opt_group, @@ -1216,6 +1218,46 @@ def get_megatron_optimizer( ) ) + # EGTP params: reduce over the with_egtp carve-out (excludes EGTP peers), + # because the EGTP wgrad RS has already reduced grads over the EGTP axis. + # Backed by egtp_buffers in DDP. + egtp_param_groups, egtp_buffers = _get_param_groups_and_buffers( + model_chunks, + model_chunk_offset=0, + config=config, + config_overrides=config_overrides, + filter_fn=lambda g: g.get('is_gtp', False) and g['is_expert_parallel'], + buffer_name='egtp_buffers', + ) + if dump_param_to_param_group_map is not None: + for param_group in egtp_param_groups: + for param in param_group["params"]: + param_name = get_global_unique_param_name(model_chunks, param) + param_to_param_group[param_name] = param_group_id + param_group_id += 1 + if len(egtp_param_groups) > 0: + expt_model_parallel_rank = get_pg_rank(expt_tp_pp_group) + # Pass Gloo process groups into optimizer only if needed. + if use_gloo_process_groups: + egtp_data_parallel_group_gloo = intra_expt_dp_with_egtp_group_gloo + else: + egtp_data_parallel_group_gloo = None + optimizers.append( + _get_megatron_optimizer_based_on_param_groups( + config=config, + model_chunks=model_chunks, + param_groups=egtp_param_groups, + per_model_buffers=egtp_buffers, + model_parallel_group=expt_tp_pp_group, + data_parallel_group=intra_expt_dp_with_egtp_group, + data_parallel_group_gloo=egtp_data_parallel_group_gloo, + data_parallel_group_idx=expt_model_parallel_rank, + intra_dist_opt_group=intra_dist_opt_group, + distributed_optimizer_instance_id=distributed_optimizer_instance_id, + pg_collection=pg_collection, + ) + ) + if dump_param_to_param_group_map is not None: torch.distributed.checkpoint.save( state_dict=param_to_param_group, checkpoint_id=dump_param_to_param_group_map diff --git a/megatron/core/optimizer/clip_grads.py b/megatron/core/optimizer/clip_grads.py index 058fabd46a5..92a255374b8 100644 --- a/megatron/core/optimizer/clip_grads.py +++ b/megatron/core/optimizer/clip_grads.py @@ -47,8 +47,6 @@ multi_tensor_scale_tensor_impl = None -from megatron.experimental.gtp import GTPShardedParam - from .. import parallel_state from ..tensor_parallel import param_is_not_tensor_parallel_duplicate from ..transformer.module import param_is_not_shared @@ -244,6 +242,7 @@ def count_zeros_fp32( data_parallel_group = None use_megatron_fsdp = False gtp_rank = parallel_state.get_generalized_tensor_parallel_remat_rank() + egtp_rank = parallel_state.get_expert_generalized_tensor_parallel_remat_rank() for param in parameters: if getattr(param, "__fsdp_param__", False) and param.grad is not None: # If the parameter is managed by Megatron FSDP, we need to handle it differently. @@ -256,12 +255,23 @@ def count_zeros_fp32( grad_attr = "decoupled_grad" if use_decoupled_grad else "grad" grad_not_none = hasattr(param, grad_attr) and getattr(param, grad_attr) is not None is_not_shared = param_is_not_shared(param) - is_not_tp_duplicate = param_is_not_tensor_parallel_duplicate(param, tp_group=tp_group) + + is_gtp_param = getattr(param, 'is_gtp', False) + is_expert = not getattr(param, 'allreduce', True) + + # GTP params lose the tensor_model_parallel attribute during sharding, + # so they're always unique across TP ranks — skip the TP-duplicate filter. + is_not_tp_duplicate = is_gtp_param or param_is_not_tensor_parallel_duplicate( + param, tp_group=tp_group + ) + + # GTP-duplicate filter: only needed for non-distributed optimizer. + # Expert params are replicated across the EGTP axis (not the GTP axis), + # so use egtp_rank for expert dedup and gtp_rank for dense dedup. if use_distributed_optimizer: is_not_gtp_duplicate = True else: - is_gtp_param = getattr(param, 'is_gtp', False) or isinstance(param, GTPShardedParam) - is_not_gtp_duplicate = is_gtp_param or gtp_rank == 0 + is_not_gtp_duplicate = is_gtp_param or (egtp_rank if is_expert else gtp_rank) == 0 if grad_not_none and is_not_shared and is_not_tp_duplicate and is_not_gtp_duplicate: grad_obj = getattr(param, grad_attr) data_parallel_group = get_data_parallel_group_if_dtensor(grad_obj, data_parallel_group) diff --git a/megatron/core/optimizer/optimizer.py b/megatron/core/optimizer/optimizer.py index c4acb2974d3..96ddba35e55 100644 --- a/megatron/core/optimizer/optimizer.py +++ b/megatron/core/optimizer/optimizer.py @@ -37,7 +37,6 @@ multi_tensor_applier = local_multi_tensor_applier multi_tensor_scale_impl = local_multi_tensor_scale -from megatron.experimental.gtp import GTPShardedParam from .. import parallel_state, tensor_parallel from ..config_logger import has_config_logger_enabled, log_config_to_disk @@ -189,7 +188,7 @@ def get_main_grads_for_grad_norm_split(self) -> Tuple[List[torch.Tensor], List[t grad_not_none = grad is not None is_not_shared = param_is_not_shared(param) - is_gtp_param = getattr(param, 'is_gtp', False) or isinstance(param, GTPShardedParam) + is_gtp_param = getattr(param, 'is_gtp', False) # GTP params are always unique across TP ranks (tensor_model_parallel # attribute is lost during wrap_gtp_sharded_tensor), so skip TP filter. @@ -772,7 +771,7 @@ def __init__( float16_params_this_group.append(param) # Create a copy main_param = param.detach().clone().float() - main_param.is_gtp = isinstance(param, GTPShardedParam) + main_param.is_gtp = getattr(param, 'is_gtp', False) # Copy tensor model parallel attributes. tensor_parallel.copy_tensor_model_parallel_attributes(main_param, param) if hasattr(param, 'shared'): diff --git a/megatron/core/tensor_parallel/layers.py b/megatron/core/tensor_parallel/layers.py index e5296d67e8b..03ccd7b653e 100644 --- a/megatron/core/tensor_parallel/layers.py +++ b/megatron/core/tensor_parallel/layers.py @@ -29,7 +29,6 @@ make_tp_sharded_tensor_for_checkpoint, prepare_input_tensors_for_wgrad_compute, ) -from megatron.experimental.gtp import GTPEmbeddingWeight, wrap_module_params_gtp from ..dist_checkpointing.mapping import ShardedStateDict from ..transformer.utils import make_sharded_tensors_for_checkpoint @@ -284,6 +283,8 @@ def __init__( self.gtp_size = 1 if gtp_group is not None and gtp_group.size() > 1: + from megatron.experimental.gtp import wrap_module_params_gtp + wrap_module_params_gtp(self, ["weight"], gtp_group) self.gtp_size = gtp_group.size() # Nothing prefetches embedding — it is head of the UNGRAPHED @@ -308,6 +309,8 @@ def forward(self, input_): weight = self.weight if self.gtp_size > 1: + from megatron.experimental.gtp import GTPEmbeddingWeight + weight = GTPEmbeddingWeight.apply(self.weight) # Get the embeddings. @@ -964,6 +967,8 @@ def __init__( self.gtp_size = 1 if gtp_group is not None and gtp_group.size() > 1: + from megatron.experimental.gtp import wrap_module_params_gtp + wrap_module_params_gtp(self, ["weight"], gtp_group) self.gtp_size = gtp_group.size() @@ -1319,6 +1324,8 @@ def __init__( self.gtp_size = 1 if gtp_group is not None and gtp_group.size() > 1: + from megatron.experimental.gtp import wrap_module_params_gtp + wrap_module_params_gtp(self, ["weight"], gtp_group) self.gtp_size = gtp_group.size() diff --git a/megatron/core/transformer/cuda_graphs.py b/megatron/core/transformer/cuda_graphs.py index c8c5f74dab4..6deabe8e092 100644 --- a/megatron/core/transformer/cuda_graphs.py +++ b/megatron/core/transformer/cuda_graphs.py @@ -58,15 +58,17 @@ except: HAVE_TE_GRAPHS = False -from megatron.experimental.gtp import ( - GTP_CONFIG, - GTPChain, - GTPShardedParam, - get_ag_stream, - get_rs_stream, - reallocate_gtp_cache_to_mempool, - wait_async_comms, -) +from megatron.experimental.gtp import HAVE_GTP + +if HAVE_GTP: + from megatron.experimental.gtp import ( + GTP_CONFIG, + GTPChain, + get_ag_stream, + get_rs_stream, + reallocate_gtp_cache_to_mempool, + wait_async_comms, + ) try: from tqdm import tqdm @@ -940,7 +942,7 @@ def _compute_finalized_during_bwd_capture(self): """ finalized = {} # id → param for p in self.params_to_backprop: - if not isinstance(p, GTPShardedParam): + if not getattr(p, 'is_gtp', False): continue if getattr(p, "prev_w", None) is None: for w in getattr(p, "_weights", [p]): @@ -1263,7 +1265,7 @@ def create_bwd_graph(self): if self.gtp_remat: pset = {id(p) for p in self.params_to_backprop} for p in self.params_to_backprop: - if not isinstance(p, GTPShardedParam): + if not getattr(p, 'is_gtp', False): continue prev_w = getattr(p, "prev_w", None) p._is_cross_graph_tail = prev_w is not None and id(prev_w) not in pset diff --git a/megatron/experimental/gtp/__init__.py b/megatron/experimental/gtp/__init__.py index f220afa7b5c..7c33d02f5ee 100644 --- a/megatron/experimental/gtp/__init__.py +++ b/megatron/experimental/gtp/__init__.py @@ -5,45 +5,48 @@ """Generalized Tensor Parallelism (GTP) public API. GTP shards weight tensors 1/N across a GTP process group along ``out_features`` -and materializes them on-demand via async all-gather. The implementation lives in -``megatron.experimental.gtp.generalized_tensor_parallelism`` and depends on -TransformerEngine's FP8 / MXFP8 / NVFP4 primitives. Importing this package will -raise ``ImportError`` (with a helpful message) if TransformerEngine is missing or -too old. +and materializes them on-demand via async all-gather. The implementation lives +in ``megatron.experimental.gtp.generalized_tensor_parallelism`` and depends on +TransformerEngine's FP8 / MXFP8 / NVFP4 primitives. + +If TransformerEngine is missing or too old, the inner import fails and the +package exposes only ``HAVE_GTP = False``. No core module imports GTP symbols +unconditionally at module load time. """ -from megatron.experimental.gtp.generalized_tensor_parallelism import ( - GTP_CONFIG, - GTPChain, - GTPEmbeddingWeight, - GTPShardedParam, - classify_gtp_chains, - get_ag_stream, - get_ag_streams_for_chain, - get_all_ag_streams, - get_all_rs_streams, - get_rs_stream, - get_rs_streams_for_chain, - reallocate_gtp_cache_to_mempool, - set_cuda_graph_modules, - tag_gtp_params_with_names, - update_gtp_config, - wait_async_comms, - wrap_module_params_gtp, -) +try: + from megatron.experimental.gtp.generalized_tensor_parallelism import ( + GTP_CONFIG, + GTPChain, + GTPEmbeddingWeight, + classify_gtp_chains, + get_ag_stream, + get_all_ag_streams, + get_all_rs_streams, + get_rs_stream, + reallocate_gtp_cache_to_mempool, + set_cuda_graph_modules, + tag_gtp_params_with_names, + update_gtp_config, + wait_async_comms, + wrap_module_params_gtp, + ) + + HAVE_GTP = True +except ImportError: + HAVE_GTP = False + __all__ = [ + "HAVE_GTP", "GTP_CONFIG", "GTPChain", "GTPEmbeddingWeight", - "GTPShardedParam", "classify_gtp_chains", "get_ag_stream", - "get_ag_streams_for_chain", "get_all_ag_streams", "get_all_rs_streams", "get_rs_stream", - "get_rs_streams_for_chain", "reallocate_gtp_cache_to_mempool", "set_cuda_graph_modules", "tag_gtp_params_with_names", diff --git a/megatron/experimental/gtp/generalized_tensor_parallelism.py b/megatron/experimental/gtp/generalized_tensor_parallelism.py index 7e0b1b70ba6..487b3d96c31 100644 --- a/megatron/experimental/gtp/generalized_tensor_parallelism.py +++ b/megatron/experimental/gtp/generalized_tensor_parallelism.py @@ -538,6 +538,10 @@ def __init__(self, tensor, *args, **kwargs): del tensor, args, kwargs super().__init__() + # Canonical flag — also set on distopt's main_param copy so both kinds + # of param can be classified via a single attribute check. + self.is_gtp = True + # all gather self.state = GTPWeightState.NONE self._ag_ticket_fwd = None diff --git a/megatron/training/initialize.py b/megatron/training/initialize.py index fc89ba5c3f9..92b52392627 100644 --- a/megatron/training/initialize.py +++ b/megatron/training/initialize.py @@ -338,6 +338,18 @@ def _initialize_distributed(get_embedding_ranks, get_position_embedding_ranks, s if mpu.model_parallel_is_initialized(): print("model parallel is already initialized") else: + if ( + args.generalized_tensor_parallel_remat_size > 1 + or args.expert_generalized_tensor_parallel_remat_size > 1 + ): + from megatron.experimental.gtp import HAVE_GTP + + assert HAVE_GTP, ( + "GTP requires TransformerEngine >= 2.15.0. " + "Install TransformerEngine, or set both " + "--generalized-tensor-parallel-remat-size and " + "--expert-generalized-tensor-parallel-remat-size to 1." + ) mpu.initialize_model_parallel( args.tensor_model_parallel_size, args.pipeline_model_parallel_size, diff --git a/megatron/training/utils/common_utils.py b/megatron/training/utils/common_utils.py index 5f4d8f41ae2..89fc85d034c 100644 --- a/megatron/training/utils/common_utils.py +++ b/megatron/training/utils/common_utils.py @@ -47,7 +47,6 @@ from megatron.core.transformer.module import param_is_not_shared -from megatron.experimental.gtp import GTPShardedParam def _compute_norm_2(params_list): @@ -116,7 +115,7 @@ def calc_params_l2_norm(model, force_create_fp32_copy=False): for model_chunk in model: for param in model_chunk.parameters(): - is_gtp = isinstance(param, GTPShardedParam) + is_gtp = getattr(param, 'is_gtp', False) # Filter TP duplicates. GTP params are always unique across TP ranks # so skip this check for them. From 476aa059fd5a30d7f6a08222064d876193f74b80 Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Thu, 28 May 2026 05:37:21 -0700 Subject: [PATCH 11/59] Fix EGTP correctness on cudagraph bwd capture + main_param dedup Signed-off-by: Shiqing Fan --- megatron/core/optimizer/optimizer.py | 2 + megatron/core/transformer/cuda_graphs.py | 63 +++++++++++++++++------- 2 files changed, 48 insertions(+), 17 deletions(-) diff --git a/megatron/core/optimizer/optimizer.py b/megatron/core/optimizer/optimizer.py index 96ddba35e55..99dc78d9b76 100644 --- a/megatron/core/optimizer/optimizer.py +++ b/megatron/core/optimizer/optimizer.py @@ -772,6 +772,8 @@ def __init__( # Create a copy main_param = param.detach().clone().float() main_param.is_gtp = getattr(param, 'is_gtp', False) + # Mirror expert-tag for GTP-aware dedup (egtp_rank vs gtp_rank). + main_param.allreduce = getattr(param, 'allreduce', True) # Copy tensor model parallel attributes. tensor_parallel.copy_tensor_model_parallel_attributes(main_param, param) if hasattr(param, 'shared'): diff --git a/megatron/core/transformer/cuda_graphs.py b/megatron/core/transformer/cuda_graphs.py index 6deabe8e092..dba4b1c12d9 100644 --- a/megatron/core/transformer/cuda_graphs.py +++ b/megatron/core/transformer/cuda_graphs.py @@ -766,15 +766,23 @@ def backward(ctx, *grads): # main_stream stays unblocked so the next runner can start in # parallel. if runner.gtp_remat and runner.finalized_during_bwd_capture: - gtp_rs_stream = get_rs_stream( - GTPChain.GRAPHED.value, parallel_state.get_generalized_tensor_parallel_remat_group() - ) - gtp_rs_stream.wait_event(runner.bwd_phase2_completion_event) - with torch.cuda.stream(gtp_rs_stream): - for param in runner.finalized_during_bwd_capture: - hook = getattr(param, '_grad_accum_hook', None) - if hook is not None: - hook() + # Partition by (chain, group): dense vs EGTP use different NCCL + # comms / rs_streams. Fire each hook on the rs_stream that ran + # its captured wgrad-RS so FIFO orders DDP-RS after that write. + dense_group = parallel_state.get_generalized_tensor_parallel_remat_group() + expert_group = parallel_state.get_expert_generalized_tensor_parallel_remat_group() + params_by_group = defaultdict(list) + for param in runner.finalized_during_bwd_capture: + is_expert = not getattr(param, 'allreduce', True) + params_by_group[expert_group if is_expert else dense_group].append(param) + for group, params in params_by_group.items(): + gtp_rs_stream = get_rs_stream(GTPChain.GRAPHED.value, group) + gtp_rs_stream.wait_event(runner.bwd_phase2_completion_event) + with torch.cuda.stream(gtp_rs_stream): + for param in params: + hook = getattr(param, '_grad_accum_hook', None) + if hook is not None: + hook() # Replaying the next bwd graph destroys the data held in static_grad_inputs, so clone # wgrads as autograd may launch the next graph before wgrads are accumulated @@ -875,11 +883,14 @@ def __init__( self.stream = torch.cuda.Stream() self.fwd_completion_event = torch.cuda.Event(external=True, interprocess=True) self.bwd_completion_event = torch.cuda.Event(external=True, interprocess=True) - # GRAPHED chain only hits dense modules (mamba/attn/moe_router), - # all sharded across PARAMETER_SHARDING_GROUP. Materialize that - # (chain, group) stream pair now so it is registered as a - # captured side stream before the first forward. - from megatron.core.parallel_state import get_generalized_tensor_parallel_remat_group + # Register (chain, group) side streams before the first forward. + # Dense for mamba/attn/shared_experts; expert (below) for routed + # experts captured when "moe" is in cuda_graph_modules. + from megatron.core.parallel_state import ( + get_expert_generalized_tensor_parallel_remat_group, + get_expert_generalized_tensor_parallel_remat_world_size, + get_generalized_tensor_parallel_remat_group, + ) gtp_group = get_generalized_tensor_parallel_remat_group() graphed_ag = get_ag_stream(GTPChain.GRAPHED.value, gtp_group) @@ -887,6 +898,15 @@ def __init__( self._register_side_stream(self.fwd_side_streams, graphed_ag) self._register_side_stream(self.bwd_side_streams, graphed_ag) self._register_side_stream(self.bwd_side_streams, graphed_rs) + # EGTP streams: required so _wait/_sync_side_streams drain EGTP + # NCCL into runner_stream before bwd_completion_event fires. + if get_expert_generalized_tensor_parallel_remat_world_size() > 1: + egtp_group = get_expert_generalized_tensor_parallel_remat_group() + egtp_graphed_ag = get_ag_stream(GTPChain.GRAPHED.value, egtp_group) + egtp_graphed_rs = get_rs_stream(GTPChain.GRAPHED.value, egtp_group) + self._register_side_stream(self.fwd_side_streams, egtp_graphed_ag) + self._register_side_stream(self.bwd_side_streams, egtp_graphed_ag) + self._register_side_stream(self.bwd_side_streams, egtp_graphed_rs) # Bridges Phase 1 (AG drain on ag_stream) into runner_stream # so bwd_completion_event records past NCCL_AG completion. self.bwd_ag_fence_event = torch.cuda.Event() @@ -1330,15 +1350,24 @@ def create_bwd_graph(self): # consumer's cascade; for within-graph tails both # happen here (see wait_async_comms). if self.gtp_remat: - # Phase 1: drain AG; fence runner_stream past ag_stream so - # bwd_completion_event records AFTER NCCL_AG completion. + # Phase 1: drain AG; fence runner_stream past dense + EGTP AG + # so bwd_completion_event records AFTER NCCL_AG completion. wait_async_comms(GTPChain.GRAPHED.value, skip_rs=True) - from megatron.core.parallel_state import get_generalized_tensor_parallel_remat_group + from megatron.core.parallel_state import ( + get_expert_generalized_tensor_parallel_remat_group, + get_expert_generalized_tensor_parallel_remat_world_size, + get_generalized_tensor_parallel_remat_group, + ) gtp_group = get_generalized_tensor_parallel_remat_group() graphed_ag = get_ag_stream(GTPChain.GRAPHED.value, gtp_group) self.bwd_ag_fence_event.record(graphed_ag) torch.cuda.current_stream().wait_event(self.bwd_ag_fence_event) + if get_expert_generalized_tensor_parallel_remat_world_size() > 1: + egtp_group = get_expert_generalized_tensor_parallel_remat_group() + egtp_graphed_ag = get_ag_stream(GTPChain.GRAPHED.value, egtp_group) + self.bwd_ag_fence_event.record(egtp_graphed_ag) + torch.cuda.current_stream().wait_event(self.bwd_ag_fence_event) # Record completion AFTER AG drain + fence but BEFORE RS drain, # so main_stream can trigger the next runner while RS is still From cdf5d3597e7e152fe0ab3d858bb1fd68ed4c52ba Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Thu, 28 May 2026 21:20:49 -0700 Subject: [PATCH 12/59] fix comments Signed-off-by: Shiqing Fan --- .../distributed/distributed_data_parallel.py | 19 +++-- megatron/core/optimizer/__init__.py | 13 ++-- megatron/core/parallel_state.py | 72 +------------------ megatron/core/process_groups_config.py | 16 +---- megatron/training/training.py | 30 ++++---- 5 files changed, 35 insertions(+), 115 deletions(-) diff --git a/megatron/core/distributed/distributed_data_parallel.py b/megatron/core/distributed/distributed_data_parallel.py index 9a40ae88ddd..cb1c1ec3b74 100644 --- a/megatron/core/distributed/distributed_data_parallel.py +++ b/megatron/core/distributed/distributed_data_parallel.py @@ -190,14 +190,19 @@ def __init__( # When a full_param_layout is provided, verify that the grouping is consistent # with the layout (same buffer keys, same params per key, same param_indices). - # Skip strict equality if GTP carved params out — the caller-supplied layout - # was computed without that carve-out, so a literal == would always fail. - if full_param_layout is not None and not gtp_params: - assert set(buffer_groups.keys()) == set(full_param_layout.layouts.keys()), ( - f"Buffer keys from param grouping {set(buffer_groups.keys())} do not match " - f"full_param_layout keys {set(full_param_layout.layouts.keys())}" - ) + # (E)GTP shares a BufferKey with non-GTP params of the same dtype, so keys that + # also appear in the gtp/egtp groups diverge from the caller's (non-carved) + # layout — skip those, and skip the exact key-set check when any carve-out ran. + if full_param_layout is not None: + carved_keys = set(gtp_buffer_groups.keys()) | set(egtp_buffer_groups.keys()) + if not carved_keys: + assert set(buffer_groups.keys()) == set(full_param_layout.layouts.keys()), ( + f"Buffer keys from param grouping {set(buffer_groups.keys())} do not match " + f"full_param_layout keys {set(full_param_layout.layouts.keys())}" + ) for buffer_key, (params, param_indices) in buffer_groups.items(): + if buffer_key in carved_keys: + continue layout = full_param_layout.layouts[buffer_key] assert set(params) == set( layout.param_index_map.keys() diff --git a/megatron/core/optimizer/__init__.py b/megatron/core/optimizer/__init__.py index e862d67728c..0877e989118 100644 --- a/megatron/core/optimizer/__init__.py +++ b/megatron/core/optimizer/__init__.py @@ -1031,8 +1031,6 @@ def get_megatron_optimizer( mp_group = process_groups_dict['mp_group'] expt_tp_pp_group = process_groups_dict['expt_tp_pp_group'] intra_dp_cp_group_gloo = process_groups_dict['intra_dp_cp_group_gloo'] - intra_dp_cp_with_gtp_group_gloo = process_groups_dict['intra_dp_cp_with_gtp_group_gloo'] - intra_expt_dp_with_egtp_group_gloo = process_groups_dict['intra_expt_dp_with_egtp_group_gloo'] intra_expt_dp_group_gloo = process_groups_dict['intra_expt_dp_group_gloo'] intra_dist_opt_group = process_groups_dict['intra_dist_opt_group'] @@ -1170,7 +1168,8 @@ def get_megatron_optimizer( per_model_buffers=gtp_buffers, model_parallel_group=mp_group, data_parallel_group=intra_dp_cp_with_gtp_group, - data_parallel_group_gloo=intra_dp_cp_with_gtp_group_gloo, + # GTP does not support the Gloo optimizer-state paths yet. + data_parallel_group_gloo=None, data_parallel_group_idx=model_parallel_rank, intra_dist_opt_group=intra_dist_opt_group, distributed_optimizer_instance_id=distributed_optimizer_instance_id, @@ -1237,11 +1236,6 @@ def get_megatron_optimizer( param_group_id += 1 if len(egtp_param_groups) > 0: expt_model_parallel_rank = get_pg_rank(expt_tp_pp_group) - # Pass Gloo process groups into optimizer only if needed. - if use_gloo_process_groups: - egtp_data_parallel_group_gloo = intra_expt_dp_with_egtp_group_gloo - else: - egtp_data_parallel_group_gloo = None optimizers.append( _get_megatron_optimizer_based_on_param_groups( config=config, @@ -1250,7 +1244,8 @@ def get_megatron_optimizer( per_model_buffers=egtp_buffers, model_parallel_group=expt_tp_pp_group, data_parallel_group=intra_expt_dp_with_egtp_group, - data_parallel_group_gloo=egtp_data_parallel_group_gloo, + # EGTP does not support the Gloo optimizer-state paths yet. + data_parallel_group_gloo=None, data_parallel_group_idx=expt_model_parallel_rank, intra_dist_opt_group=intra_dist_opt_group, distributed_optimizer_instance_id=distributed_optimizer_instance_id, diff --git a/megatron/core/parallel_state.py b/megatron/core/parallel_state.py index f6b1c7604db..551eca72115 100644 --- a/megatron/core/parallel_state.py +++ b/megatron/core/parallel_state.py @@ -75,7 +75,6 @@ # of true expert-weight replicas. Mirrors _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP # on the dense side. _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP = None -_INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP_GLOO = None # Parallel state values changed on the fly _MPU_EXPERT_MODEL_PARALLEL_WORLD_SIZE = None _MPU_EXPERT_MODEL_PARALLEL_RANK = None @@ -146,7 +145,6 @@ # Partial Data parallel group information with context parallel combined and GTP peers # excluded. Reaches only true weight-replica ranks within one distributed-optimizer instance. _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP = None -_INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP_GLOO = None # combined parallel group of TP and CP _TENSOR_AND_CONTEXT_PARALLEL_GROUP = None @@ -1027,7 +1025,6 @@ def initialize_model_parallel( global _DATA_PARALLEL_GROUP_WITH_GTP global _DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP global _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP - global _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP_GLOO if gtp_remat_size > 1: # Build rank→gtp_rank mapping. rank_to_gtp_rank = {} @@ -1083,36 +1080,18 @@ def initialize_model_parallel( pg_options=get_nccl_options("intra_dp_cp_gtp", nccl_comm_cfgs), group_desc="INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP", ) - if create_gloo_process_groups: - intra_group_gloo = create_group( - chunk, - timeout=timeout, - backend="gloo", - group_desc=( - "INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP_GLOO" - ), - ) - else: - intra_group_gloo = None if rank in chunk: _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP = intra_group - _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP_GLOO = ( - intra_group_gloo - ) if num_distributed_optimizer_instances == 1: _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP = ( _DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP ) - _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP_GLOO = None else: _DATA_PARALLEL_GROUP_WITH_GTP = _DATA_PARALLEL_GROUP _DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP = _DATA_PARALLEL_GROUP_WITH_CP _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP = ( _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP ) - _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP_GLOO = ( - _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_GLOO - ) # Build the context-parallel groups. global _CONTEXT_PARALLEL_GROUP @@ -1529,7 +1508,6 @@ def initialize_model_parallel( # Build expert DP group with expert generalized tensor parallel accounted for. global _EXPERT_DATA_PARALLEL_GROUP_WITH_GTP global _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP - global _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP_GLOO if expert_gtp_remat_size > 1: # Build rank→expert_gtp_rank mapping. rank_to_expert_gtp_rank = {} @@ -1567,35 +1545,17 @@ def initialize_model_parallel( pg_options=get_nccl_options("intra_ep_dp_gtp", nccl_comm_cfgs), group_desc="INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP", ) - if create_gloo_process_groups: - intra_group_gloo = create_group( - chunk, - timeout=timeout, - backend="gloo", - group_desc=( - "INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP_GLOO" - ), - ) - else: - intra_group_gloo = None if rank in chunk: _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP = intra_group - _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP_GLOO = ( - intra_group_gloo - ) if num_distributed_optimizer_instances == 1: _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP = ( _EXPERT_DATA_PARALLEL_GROUP_WITH_GTP ) - _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP_GLOO = None else: _EXPERT_DATA_PARALLEL_GROUP_WITH_GTP = _EXPERT_DATA_PARALLEL_GROUP _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP = ( _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP ) - _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP_GLOO = ( - _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_GLOO - ) ### End of expert related parallel groups initialization @@ -1835,21 +1795,7 @@ def get_data_parallel_group_gloo( with_context_parallel=False, with_gtp=False, partial_data_parallel=False ): """Get the Gloo data-parallel group the caller rank belongs to.""" - if with_gtp: - assert with_context_parallel, "Gloo with_gtp variants only exist with CP" - if partial_data_parallel: - assert _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP_GLOO is not None, ( - "Intra partial data parallel group with context parallel and " - "generalized tensor parallel (gloo) is not initialized" - ) - return _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP_GLOO - # Full (non-partial) Gloo variant of with_gtp is not built; callers needing - # cross-instance Gloo over the GTP-excluded set can use the non-GTP variant - # since broadcasts are init-time only. - assert ( - _DATA_PARALLEL_GROUP_WITH_CP_GLOO is not None - ), "data parallel group-gloo with context parallel combined is not initialized" - return _DATA_PARALLEL_GROUP_WITH_CP_GLOO + assert not with_gtp, "GTP does not support Gloo data-parallel groups" if with_context_parallel: if partial_data_parallel: assert ( @@ -2424,15 +2370,7 @@ def get_expert_data_parallel_group( def get_expert_data_parallel_group_gloo(with_gtp=False, partial_expert_data_parallel=False): """Get expert data parallel group-gloo.""" - if with_gtp: - assert ( - partial_expert_data_parallel - ), "Gloo with_gtp variant is only built for the partial (per-distopt-instance) group" - assert _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP_GLOO is not None, ( - "Intra partial expert data parallel group with generalized tensor parallel " - "(gloo) is not initialized" - ) - return _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP_GLOO + assert not with_gtp, "EGTP does not support Gloo expert-data-parallel groups" if partial_expert_data_parallel: assert ( _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_GLOO is not None @@ -2555,9 +2493,6 @@ def destroy_model_parallel(): global _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP = None - global _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP_GLOO - _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP_GLOO = None - global _CONTEXT_PARALLEL_GROUP _CONTEXT_PARALLEL_GROUP = None @@ -2663,9 +2598,6 @@ def destroy_model_parallel(): global _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP = None - global _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP_GLOO - _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP_GLOO = None - global _EXPERT_DATA_PARALLEL_GROUP_GLOO if ( _EXPERT_DATA_PARALLEL_GROUP_GLOO is not None diff --git a/megatron/core/process_groups_config.py b/megatron/core/process_groups_config.py index cf9f3442dac..4f806ec7371 100644 --- a/megatron/core/process_groups_config.py +++ b/megatron/core/process_groups_config.py @@ -326,7 +326,7 @@ def setup_process_groups_for_optimizer( ) expt_dp_group = parallel_state.get_expert_data_parallel_group() intra_expt_dp_group = parallel_state.get_expert_data_parallel_group( - partial_expert_data_parallel=True, with_gtp=True + partial_expert_data_parallel=True ) intra_expt_dp_with_egtp_group = parallel_state.get_expert_data_parallel_group( with_gtp=True, partial_expert_data_parallel=True @@ -339,22 +339,12 @@ def setup_process_groups_for_optimizer( intra_dp_cp_group_gloo = parallel_state.get_data_parallel_group_gloo( with_context_parallel=True, partial_data_parallel=True ) - intra_dp_cp_with_gtp_group_gloo = parallel_state.get_data_parallel_group_gloo( - with_context_parallel=True, with_gtp=True, partial_data_parallel=True - ) intra_expt_dp_group_gloo = parallel_state.get_expert_data_parallel_group_gloo( partial_expert_data_parallel=True ) - intra_expt_dp_with_egtp_group_gloo = ( - parallel_state.get_expert_data_parallel_group_gloo( - with_gtp=True, partial_expert_data_parallel=True - ) - ) else: intra_dp_cp_group_gloo = None - intra_dp_cp_with_gtp_group_gloo = None intra_expt_dp_group_gloo = None - intra_expt_dp_with_egtp_group_gloo = None # Model communication groups mp_group = parallel_state.get_model_parallel_group() @@ -492,9 +482,7 @@ def setup_process_groups_for_optimizer( "provided. Please set use_gloo_process_groups to False." ) intra_dp_cp_group_gloo = None - intra_dp_cp_with_gtp_group_gloo = None intra_expt_dp_group_gloo = None - intra_expt_dp_with_egtp_group_gloo = None return { 'dp_group': dp_group, @@ -511,8 +499,6 @@ def setup_process_groups_for_optimizer( 'inter_dist_opt_group': inter_dist_opt_group, 'intra_dist_opt_group': intra_dist_opt_group, 'intra_dp_cp_group_gloo': intra_dp_cp_group_gloo, - 'intra_dp_cp_with_gtp_group_gloo': intra_dp_cp_with_gtp_group_gloo, - 'intra_expt_dp_with_egtp_group_gloo': intra_expt_dp_with_egtp_group_gloo, 'intra_expt_dp_group_gloo': intra_expt_dp_group_gloo, } diff --git a/megatron/training/training.py b/megatron/training/training.py index cbed9c59870..d245a81d8aa 100644 --- a/megatron/training/training.py +++ b/megatron/training/training.py @@ -1652,11 +1652,12 @@ def get_model(model_provider_func, model_type=ModelType.encoder_or_decoder, wrap args.modelopt_enabled = True # Configure GTP padding alignment based on quantization recipe before model construction. - from megatron.experimental.gtp import update_gtp_config if ( getattr(args, 'generalized_tensor_parallel_remat_size', 1) > 1 or getattr(args, 'expert_generalized_tensor_parallel_remat_size', 1) > 1 ): + from megatron.experimental.gtp import update_gtp_config + if getattr(args, 'fp4', None) is not None: update_gtp_config(pad_for_alignment=16) elif getattr(args, 'fp8_recipe', None) == 'mxfp8': @@ -1715,23 +1716,24 @@ def build_model(): # Classify each GTP param into its prefetch chain (GRAPHED vs UNGRAPHED) # from args.cuda_graph_modules + moe_shared_expert_overlap. Must run after # model build, before the first forward (which lazily builds chain links). - from megatron.experimental.gtp import ( - GTP_CONFIG, - classify_gtp_chains, - set_cuda_graph_modules, - tag_gtp_params_with_names, - ) - _raw_modules = getattr(args, 'cuda_graph_modules', None) or [] - _cg_modules = {getattr(s, 'name', str(s)) for s in _raw_modules} if _raw_modules else None - _mse_overlap = getattr(args, 'moe_shared_expert_overlap', False) - set_cuda_graph_modules(_cg_modules, moe_shared_expert_overlap=_mse_overlap) - for model_module in model: - tag_gtp_params_with_names(model_module) - classify_gtp_chains(model_module) if ( getattr(args, 'generalized_tensor_parallel_remat_size', 1) > 1 or getattr(args, 'expert_generalized_tensor_parallel_remat_size', 1) > 1 ): + from megatron.experimental.gtp import ( + GTP_CONFIG, + classify_gtp_chains, + set_cuda_graph_modules, + tag_gtp_params_with_names, + ) + + _raw_modules = getattr(args, 'cuda_graph_modules', None) or [] + _cg_modules = {getattr(s, 'name', str(s)) for s in _raw_modules} if _raw_modules else None + _mse_overlap = getattr(args, 'moe_shared_expert_overlap', False) + set_cuda_graph_modules(_cg_modules, moe_shared_expert_overlap=_mse_overlap) + for model_module in model: + tag_gtp_params_with_names(model_module) + classify_gtp_chains(model_module) print_rank_0(f"GTP enabled. {GTP_CONFIG}") # Set tensor model parallel attributes if not set. From 3c8429162044f146b858a3e625a27e85641d0de4 Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Mon, 1 Jun 2026 05:12:18 -0700 Subject: [PATCH 13/59] Only batch with _foreach_add_ when finalizing multiple (routed) weight Signed-off-by: Shiqing Fan --- .../gtp/generalized_tensor_parallelism.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/megatron/experimental/gtp/generalized_tensor_parallelism.py b/megatron/experimental/gtp/generalized_tensor_parallelism.py index 487b3d96c31..043247329bf 100644 --- a/megatron/experimental/gtp/generalized_tensor_parallelism.py +++ b/megatron/experimental/gtp/generalized_tensor_parallelism.py @@ -1263,7 +1263,12 @@ def wgrad_reduce_scatter(self, wgrad, nvtx_label=None): else: # Sync reduce-scatter — reached as the natural chain-head case, recycle immediately wgrads, _ = self._reduce_scatter(wgrads, async_op=False, nvtx_label=nvtx_label) - torch._foreach_add_([p.main_grad for p in weights], wgrads) + nvtx_range_push(f"{nvtx_label}.gtp_wgrad_accum") + if len(weights) == 1: + weights[0].main_grad.add_(wgrads[0]) + else: + torch._foreach_add_([p.main_grad for p in weights], wgrads) + nvtx_range_pop(f"{nvtx_label}.gtp_wgrad_accum") result = [self._handle_megatron_grad_accum(p) for p in weights] if poolable: @@ -1283,7 +1288,13 @@ def wgrad_reduce_scatter(self, wgrad, nvtx_label=None): cache = get_global_GTP_cache() next_weights = self.next_w._weights wgrads = [cache.get(w._rs_ticket) for w in next_weights] - torch._foreach_add_([w.main_grad for w in next_weights], wgrads) + nvtx_range_push(f"{self.next_w._debug_name}.gtp_wgrad_accum_deferred") + # Only batch with _foreach_add_ when finalizing multiple (routed) weights. + if len(next_weights) == 1: + next_weights[0].main_grad.add_(wgrads[0]) + else: + torch._foreach_add_([w.main_grad for w in next_weights], wgrads) + nvtx_range_pop(f"{self.next_w._debug_name}.gtp_wgrad_accum_deferred") for w in next_weights: self._handle_megatron_grad_accum(w) cache.release(w._rs_ticket) From 392816ab18d598fa3eadcb4140138b054067bf3e Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Mon, 1 Jun 2026 21:10:36 -0700 Subject: [PATCH 14/59] gtp+gmm-fusion: support offloading(moe-act-input) Signed-off-by: Shiqing Fan --- .../experimental/gtp/generalized_tensor_parallelism.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/megatron/experimental/gtp/generalized_tensor_parallelism.py b/megatron/experimental/gtp/generalized_tensor_parallelism.py index 043247329bf..2ff6c2a2744 100644 --- a/megatron/experimental/gtp/generalized_tensor_parallelism.py +++ b/megatron/experimental/gtp/generalized_tensor_parallelism.py @@ -1306,6 +1306,15 @@ def batched_wgrad_reduce_scatter(self, wgrad_list, nvtx_label=None): assert self.is_routed_expert and self.weight_list is not None return self.wgrad_reduce_scatter(wgrad_list, nvtx_label=nvtx_label) + def get_data_tensors(self): + """Expose self as the lone data tensor for TE's offload-marking interface. + + TE's ``mark_activation_offload`` treats any non-plain tensor as a storage + wrapper and calls ``get_data_tensors()`` on it; a sharded param has no inner + buffers, so it is its own data tensor. + """ + return (self,) + def __torch_function__(self, func, types, args=(), kwargs=None): """Subclass-preserving dispatch for ``detach`` (other ops fall through).""" del types # required by protocol, unused here From fc570d014b9f772fa2e797d008d40753c6aa03c0 Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Tue, 2 Jun 2026 23:18:27 -0700 Subject: [PATCH 15/59] GTP + full-iter CG Signed-off-by: Shiqing Fan --- .../gtp/generalized_tensor_parallelism.py | 62 +++++++++++-------- megatron/training/training.py | 8 ++- 2 files changed, 43 insertions(+), 27 deletions(-) diff --git a/megatron/experimental/gtp/generalized_tensor_parallelism.py b/megatron/experimental/gtp/generalized_tensor_parallelism.py index 2ff6c2a2744..7aab8492191 100644 --- a/megatron/experimental/gtp/generalized_tensor_parallelism.py +++ b/megatron/experimental/gtp/generalized_tensor_parallelism.py @@ -53,55 +53,65 @@ class GTPChain(str, Enum): """Prefetch chain identifier for an GTPShardedParam. GRAPHED — fwd/bwd captured by a CUDA graph (MLM _CudaGraphRunner). - UNGRAPHED — fwd/bwd runs eagerly; includes embedding/output_layer and - routed grouped experts always, plus router/shared_experts - when their scope tag is not in cuda_graph_modules. + UNGRAPHED — fwd/bwd runs eagerly. - Chains never cross-link (prev_w/next_w stay within one chain). CG - disabled → single UNGRAPHED chain; full-iteration graph → single GRAPHED. + Chains never cross-link (prev_w/next_w stay within one chain). See + _classify_param_chain for the GRAPHED/UNGRAPHED rule. """ GRAPHED = "GTP_graphed" UNGRAPHED = "GTP_ungraphed" -# Module-level cuda_graph_modules, set by the integrator at init via set_cuda_graph_modules(). -# None or empty → CG is disabled; every GTP param classifies as UNGRAPHED. -# Value is a set of scope tags; e.g. {"mamba","attn","moe_router"}. -_CUDA_GRAPH_MODULES: Optional[set] = None -# Whether shared_experts are run with overlap (cannot be captured). When True, -# shared_experts stay UNGRAPHED regardless of moe_router scope inclusion, matching -# the transformer_layer.py guard that excludes them from the captured submodules. -_MOE_SHARED_EXPERT_OVERLAP: bool = False +# Active cuda_graph config, set by the integrator via set_cuda_graph_modules() before +# classify_gtp_chains(); consumed by _classify_param_chain. +_CUDA_GRAPH_MODULES: Optional[set] = None # scope tags, e.g. {"mamba","attn","moe_router"} +_MOE_SHARED_EXPERT_OVERLAP: bool = False # overlapped shared_experts can't be captured -> UNGRAPHED +_FULL_ITERATION: bool = False # whole step in one graph -> every param GRAPHED +# Empty cuda_graph_modules under per-layer CG = "graph every layer" == all tags present. +_ALL_LAYER_SCOPE_TAGS = frozenset({"mamba", "attn", "moe", "moe_router"}) -def set_cuda_graph_modules(scope, moe_shared_expert_overlap: bool = False): - """Record the active cuda_graph_modules for GTP chain classification. +def set_cuda_graph_modules( + scope, moe_shared_expert_overlap: bool = False, cuda_graph_impl: str = "none" +): + """Record the active cuda_graph config for GTP chain classification. - Called by MLM at init, BEFORE classify_gtp_chains(). ``scope`` may be - None, an empty iterable (CG disabled), or an iterable of scope tags. + Called by MLM at init, before classify_gtp_chains(). ``cuda_graph_impl`` + disambiguates the empty-``scope`` cases: + - "none" -> CG disabled; all params UNGRAPHED. + - "full_iteration" -> whole step in one graph; all params GRAPHED. + - "local"/"transformer_engine" + empty scope -> graph every layer. """ - global _CUDA_GRAPH_MODULES, _MOE_SHARED_EXPERT_OVERLAP - _CUDA_GRAPH_MODULES = set(scope) if scope else None + global _CUDA_GRAPH_MODULES, _MOE_SHARED_EXPERT_OVERLAP, _FULL_ITERATION _MOE_SHARED_EXPERT_OVERLAP = bool(moe_shared_expert_overlap) + _FULL_ITERATION = cuda_graph_impl == "full_iteration" + if _FULL_ITERATION: + _CUDA_GRAPH_MODULES = None # scope unused + elif cuda_graph_impl != "none" and not scope: + _CUDA_GRAPH_MODULES = set(_ALL_LAYER_SCOPE_TAGS) # graph every layer + else: + _CUDA_GRAPH_MODULES = set(scope) if scope else None def _classify_param_chain(param_name: str) -> "GTPChain": - """Classify an GTPShardedParam by name + active cuda_graph_modules. + """Map a GTPShardedParam name + active cuda_graph config to its chain. - embedding / output_layer are always UNGRAPHED. Other kinds (mamba mixer, - self/cross_attention, shared_experts, routed experts) are GRAPHED iff - their scope tag is present in cuda_graph_modules; otherwise UNGRAPHED. + Full-iteration -> GRAPHED. Otherwise embedding/output_layer are UNGRAPHED, and + each layer kind (mixer, attention, shared/routed experts) is GRAPHED iff its + scope tag is in cuda_graph_modules. """ n = param_name - # Always ungraphed — embedding and output_layer live outside any CG runner. + if _FULL_ITERATION: + return GTPChain.GRAPHED + + # embedding/output_layer live outside any per-layer CG runner. if "embedding" in n or "output_layer" in n: return GTPChain.UNGRAPHED scope = _CUDA_GRAPH_MODULES - if not scope: - # CG disabled: every GTP param goes to the single UNGRAPHED chain. + if not scope: # CG disabled return GTPChain.UNGRAPHED if ".mlp.shared_experts." in n: diff --git a/megatron/training/training.py b/megatron/training/training.py index 0688176baeb..f4848fab60b 100644 --- a/megatron/training/training.py +++ b/megatron/training/training.py @@ -1730,7 +1730,13 @@ def build_model(): _raw_modules = getattr(args, 'cuda_graph_modules', None) or [] _cg_modules = {getattr(s, 'name', str(s)) for s in _raw_modules} if _raw_modules else None _mse_overlap = getattr(args, 'moe_shared_expert_overlap', False) - set_cuda_graph_modules(_cg_modules, moe_shared_expert_overlap=_mse_overlap) + # cuda_graph_impl lets the classifier tell "CG disabled" from "full-iteration / + # graph-every-layer" — both have empty cuda_graph_modules. + set_cuda_graph_modules( + _cg_modules, + moe_shared_expert_overlap=_mse_overlap, + cuda_graph_impl=getattr(args, 'cuda_graph_impl', 'none'), + ) for model_module in model: tag_gtp_params_with_names(model_module) classify_gtp_chains(model_module) From 23ed3ba21662d170b3bdacea432cde78ef0254e1 Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Wed, 3 Jun 2026 21:27:54 -0700 Subject: [PATCH 16/59] [feat]GTP: prefetch recompute-forward weight gathers via a separate chain; update README Signed-off-by: Shiqing Fan --- megatron/experimental/gtp/README.md | 34 ++++- .../gtp/generalized_tensor_parallelism.py | 127 ++++++++++++++++-- 2 files changed, 150 insertions(+), 11 deletions(-) diff --git a/megatron/experimental/gtp/README.md b/megatron/experimental/gtp/README.md index 227eb8b1c9a..034d7a8b48e 100644 --- a/megatron/experimental/gtp/README.md +++ b/megatron/experimental/gtp/README.md @@ -25,7 +25,7 @@ Core implementation: `megatron/experimental/gtp/generalized_tensor_parallelism.p Each weight is sharded 1/N across a GTP group along `out_features`, stored as a `GTPShardedParam` subclass of `nn.Parameter`. Materialization and gradient reduction are both **per-weight, per-call** — not per-model or per-module: - **Independent state per param**: each has its own AG state (`state`) and RS state (`rs_state`) machines, both cycling `NONE → ASYNC_WAIT → DATA_READY → NONE` and tracked separately so fwd and bwd async ops don't interfere. -- **Prefetch chain for AG** (doubly-linked `prev_w` / `next_w`): during fwd, each weight's `all_gather_and_prefetch` issues async AG for `next_w`; during bwd, `all_gather_and_prefetch_bwd` issues async AG for `prev_w`. Layer *i*'s AG overlaps with layer *i−1*'s GEMM. For an L-layer model, L−1 all-gathers are fully hidden behind compute. +- **Prefetch chain for AG** (doubly-linked `prev_w` / `next_w`): during fwd, each weight's `all_gather_and_prefetch` issues async AG for `next_w`; during bwd, `all_gather_and_prefetch_bwd` issues async AG for `prev_w`. Layer *i*'s AG overlaps with layer *i−1*'s GEMM. For an L-layer model, L−1 all-gathers are fully hidden behind compute. When activation recompute is enabled, a **third** chain prefetches the recompute-forward gathers during backward — see §3.1 *Recompute-forward prefetch chain*. - **Deferred RS finalize for wgrad**: `wgrad_reduce_scatter` on param *i* launches an **async** reduce-scatter (handle stashed in `_wgrad_rs_handle`) and returns `None` to autograd — the wgrad is NOT finalized into `main_grad` yet. Finalization is **deferred one step**: the next bwd step (param *i−1*'s `wgrad_reduce_scatter`) calls `self.next_w._wait_reduce_scatter()` + `_finalize_wgrad()`, which waits on the stashed handle, accumulates the reduced wgrad into `main_grad`, and fires the DDP `register_grad_ready` hook. The chain's head (first-in-fwd, last-in-bwd) uses a synchronous RS since nothing follows it. This one-step deferral is what lets layer *i*'s RS overlap with layer *i−1*'s bwd GEMMs. - **Cold start only**: every weight's very first AG is synchronous (`DATA_READY_SYNC`, no prefetch has run yet); the async prefetch chain kicks in from the second forward onward. @@ -224,6 +224,16 @@ fwd: AG(W_{i+1}) ∥ GEMM(W_i) ∥ CG replay of ca bwd: AG(W_{i-1}) ∥ dgrad(W_i) → wgrad(W_i) ∥ RS(wgrad_i) ∥ [finalize wgrad_{i+1} + DDP hook] ``` +GTP runs up to **three** independent prefetch chains, all following one rule — *prefetch the weight the next consume will need*: + +| # | when | consume | prefetch (overlap) | AG direction | slot | +|---|------|---------|--------------------|--------------|------| +| 1 | fwd | weight `i` | `next_w` = i+1 ‖ `GEMM_i` | rowwise (`fwd=True`) | `_prefetch_handle` | +| 2 | bwd dgrad | weight `i` | `prev_w` = i−1 ‖ `Dgrad_i` | columnwise (`fwd=False`) | `_prefetch_handle` | +| 3 | bwd recompute | weight `i` | `_recompute_next` = i+1 ‖ `recompute_GEMM_i` | rowwise (`fwd=True`) | `_recompute_prefetch_handle` (separate) | + +Chain 3 exists only when activation recompute is on. It mirrors chain 1 (rowwise, prefetch `next`) but runs *during* backward, so it overlaps chain 2 in time on the same weight — hence its **own** slot. fwd (1) and bwd-dgrad (2) never overlap in time, so they safely share `_prefetch_handle`. See *Recompute-forward prefetch chain* below. + At bwd step *i* the step is launching *RS of wgrad_i* while finalizing the *previous* iter's wgrad (`wgrad_{i+1}` in bwd order = the next-one-over in fwd order). That one-step deferral is what makes the RS run concurrent with the next layer's dgrad/wgrad GEMMs instead of blocking after every layer. Communication never blocks compute except at the very first layer of each direction (cold start) and at enforced serialization points (CG/eager drains, finalize-grads barrier). @@ -234,6 +244,28 @@ Current behavior: backward always runs dgrad GEMM, then wgrad GEMM, then issues A future MR will add an opt-in wgrad-before-dgrad schedule on `_Linear` / `_LayerNormLinear` so the GTP wgrad RS NCCL overlaps with the dgrad GEMM of the **same** layer (best for the GTP + no-TP case). Until that MR lands, attempting to set `GTPConfig.wgrad_before_dgrad = True` raises `NotImplementedError`. +##### Recompute-forward prefetch chain *(GTP + activation recompute)* + +When a GTP-sharded module is in `--recompute-modules` (e.g. `shared_experts`), its forward is **re-run during backward** to regenerate activations. That recompute-forward must all-gather each weight **rowwise** again — a *third* gather lifecycle, concurrent with the in-flight **columnwise** dgrad gather of the *same* weight. Since both share one `GTPShardedParam`, the recompute path gets its **own** prefetch slot (`_recompute_prefetch_handle` / `_recompute_ag_event`, reusing the `_ag_ticket_fwd` rowwise buffer) so it never clobbers the dgrad lifecycle's `state` / `_prefetch_handle` / `ag_event`. + +The recompute weights form a **separate** linked list (`_recompute_next`), **self-populated** on the first backward from the weights actually re-gathered while `in_fp8_activation_recompute_phase()` is true — membership is *observed*, not configured (no tagging, so it tracks exactly what each checkpointed module re-gathers). Each recompute-forward consume prefetches the next recompute weight, so every gather **except the global-first** overlaps preceding recompute / dgrad / wgrad compute: + +``` +recompute-fwd of shared_experts (per layer: GEMM fc1 → SReLU → GEMM fc2, then dgrad+wgrad) + + Before (on-demand): + default: AG(fc1)─GEMM fc1─SReLU─AG(fc2)─GEMM fc2─dgrad─wgrad─... every AG exposed + After (recompute chain): + default: GEMM fc1─SReLU─GEMM fc2─dgrad─wgrad─GEMM fc1'─... back-to-back + ag_str: AG(fc1) [AG fc2] [AG fc1' (next layer)] only AG(fc1) exposed +``` + +`AG(fc2)` is issued at `fc1`'s consume (overlaps GEMM fc1 + SReLU); `AG(fc1')` for the next layer is issued at `fc2`'s consume, so it overlaps the **whole** layer's `dgrad + wgrad` window. The cross-layer link is what hides every region head except the very first. + +Under **full-iteration CUDA graphs** the recompute-forward is captured; `wait_async_comms(GRAPHED)` drains the recompute handle too (sets `_recompute_already_drained`) so the captured consumer skips its cross-graph wait — the same producer-drain pattern as the fwd/bwd chains. + +> **When *not* to recompute a GTP weight.** Recompute on a GTP-sharded weight adds this extra rowwise gather. For MLP-like blocks at short context (`SeqLen ≤ 2 × HiddenSize`), GTP-sharding the weight saves *more* memory than recomputing its activations, so the better trade is to keep such modules GTP-sharded and **out** of `--recompute-modules` (offload their activations if needed) — avoiding the third gather entirely. Build the recompute chain only for modules that genuinely need both. + ### 3.2 DDP buckets with (E)GTP ![DDP parameter bucketing with (E)GTP](images/0527_ddp_param_bucketing.png) diff --git a/megatron/experimental/gtp/generalized_tensor_parallelism.py b/megatron/experimental/gtp/generalized_tensor_parallelism.py index 7aab8492191..a3f01f644b0 100644 --- a/megatron/experimental/gtp/generalized_tensor_parallelism.py +++ b/megatron/experimental/gtp/generalized_tensor_parallelism.py @@ -31,6 +31,7 @@ from transformer_engine.pytorch.distributed import ( _NVFP4AllGatherAsyncHandle, gather_along_first_dim, + in_fp8_activation_recompute_phase, reduce_scatter_along_first_dim, ) from transformer_engine.pytorch.module.base import get_dummy_wgrad @@ -499,6 +500,10 @@ class GTPShardedParam(torch.nn.Parameter): # params with the same chain_id. _chain_state: Dict[str, dict] = {} + # Per-chain cursor for the recompute-forward prefetch chain (see the _recompute_* + # slot on GTPShardedParam). Keyed by chain_id like _chain_state. + _recompute_chain_state: Dict[str, dict] = {} + @classmethod def _get_chain_state(cls, chain_id: str) -> dict: if chain_id not in cls._chain_state: @@ -510,6 +515,12 @@ def _get_chain_state(cls, chain_id: str) -> dict: } return cls._chain_state[chain_id] + @classmethod + def _get_recompute_chain_state(cls, chain_id: str) -> dict: + if chain_id not in cls._recompute_chain_state: + cls._recompute_chain_state[chain_id] = {"last_weight": None} + return cls._recompute_chain_state[chain_id] + @classmethod def _buffer_link_table_row( cls, prev: "GTPShardedParam", curr: "GTPShardedParam", chain: dict @@ -575,6 +586,18 @@ def __init__(self, tensor, *args, **kwargs): self.prefetch_initialized = False self.next_w = None self.prev_w = None + # Recompute-forward prefetch chain: a SEPARATE chain (own slot) linking + # the weights re-gathered rowwise during an activation-recompute forward + # in backward. Kept distinct from state/_prefetch_handle/ag_event above so + # it never clobbers the concurrent columnwise dgrad lifecycle of the same + # weight. Self-populates lazily from the first backward's recompute-fwd + # gathers (see all_gather_and_prefetch). + self._recompute_initialized = False + self._recompute_next = None + self._recompute_prev = None + self._recompute_prefetch_handle = None + self._recompute_ag_event = torch.cuda.Event(external=True) + self._recompute_already_drained = False # Chain identity (GTPChain.GRAPHED / GTPChain.UNGRAPHED). Defaults to # UNGRAPHED as a safe fallback; classify_gtp_chains(model) walks the # model at init time (after set_cuda_graph_modules) and reclassifies @@ -789,8 +812,11 @@ def _all_gather_weight(self, async_op, skip_weight_cast, cast_noop_flag, fwd, nv weights = self._weights - # 1. Transition state for async gathers. - if GTP_CONFIG.check_param_states: + # 1. Transition state for async gathers. Skip during a recompute-forward: + # it gathers this weight rowwise (into _ag_ticket_fwd) while a bwd-chain + # prefetch may hold an in-flight columnwise AG state on the same weight + # (separate _ag_ticket_bwd) — clobbering it would break the dgrad consume. + if GTP_CONFIG.check_param_states and not in_fp8_activation_recompute_phase(): new_state = GTPWeightState.ASYNC_WAIT if async_op else GTPWeightState.DATA_READY_SYNC for w in weights: w._set_state(new_state) @@ -974,6 +1000,50 @@ def _get_prefetched_weight(self, fwd, skip_weight_cast=False, cast_noop_flag=Non result = [r.detach().requires_grad_(w.requires_grad) for r, w in zip(result, self._weights)] return result if self.is_routed_expert else result[0] + def _wait_recompute_param_gather(self): + # Recompute-chain analogue of _wait_param_gather, on the _recompute_* slot. + ag_stream = self._cached_ag_stream + if ag_stream is None: + ag_stream = get_ag_stream(self.chain_id, self.group) + self._cached_ag_stream = ag_stream + with torch.cuda.stream(ag_stream): + if self._recompute_prefetch_handle is not None: + self._recompute_prefetch_handle.wait() + self._recompute_prefetch_handle = None + self._recompute_ag_event.record() + + def _recompute_prefetch_next(self, target, nvtx_label=None): + # Issue target's rowwise (fwd) AG into its recompute slot. _all_gather_weight + # skips the AG-state transition under recompute, so the dgrad `state` of + # target is untouched; result lands in target._ag_ticket_fwd. + _, handle = target._all_gather_weight( + async_op=True, + skip_weight_cast=True, + cast_noop_flag=None, + fwd=True, + nvtx_label=nvtx_label, + ) + target._recompute_prefetch_handle = handle + + def _get_recompute_prefetched_weight(self): + # Recompute-chain analogue of _get_prefetched_weight (state-neutral; reads the + # rowwise _ag_ticket_fwd via the _recompute_* slot). + if self._recompute_already_drained: + # Producer already drained via wait_async_comms (CG capture); skip the + # captured cross-graph wait (CUDA no-op anyway). + self._recompute_already_drained = False + else: + self._wait_recompute_param_gather() + self._recompute_ag_event.wait() + + result = [] + cache = get_global_GTP_cache() + for w in self._weights: + result.append(cache.get(w._ag_ticket_fwd)) + result = [self._strip_padding(r) for r in result] + result = [r.detach().requires_grad_(w.requires_grad) for r, w in zip(result, self._weights)] + return result if self.is_routed_expert else result[0] + def all_gather_and_prefetch_bwd(self, nvtx_label=None): """ Backward variant: get current weight (from cache if prefetched, else @@ -1041,14 +1111,31 @@ def all_gather_and_prefetch( Returns: weight_total """ - if GTP_CONFIG.weight_prefetch and self.prev_w is not None: + # During an activation-recompute forward (runs in backward), route consume + + # prefetch through the recompute-forward chain on its own _recompute_* slot + # (see __init__) instead of the fwd/bwd chains; lazy-built below. + in_recompute = in_fp8_activation_recompute_phase() + use_recompute_chain = in_recompute and GTP_CONFIG.weight_prefetch + + # Consume current weight. + if use_recompute_chain and self._recompute_prev is not None: + result = self._get_recompute_prefetched_weight() + elif not in_recompute and GTP_CONFIG.weight_prefetch and self.prev_w is not None: result = self._get_prefetched_weight(True, skip_weight_cast, cast_noop_flag) else: + # On-demand: chain head (fwd or recompute global-first) or first-iter build. result = self._all_gather_weight_on_demand(True, skip_weight_cast, cast_noop_flag) - # Prefetch next weight + # Prefetch next weight on the matching chain. if ( - GTP_CONFIG.weight_prefetch + use_recompute_chain + and self._recompute_next is not None + and self._recompute_next._need_weight_prefetch + ): + self._recompute_prefetch_next(self._recompute_next, nvtx_label=nvtx_label) + elif ( + not in_recompute + and GTP_CONFIG.weight_prefetch and self.next_w is not None and self.next_w._need_weight_prefetch ): @@ -1063,14 +1150,29 @@ def all_gather_and_prefetch( ) self.next_w._prefetch_handle = handle - # The unsharded tensor has been returned, no pending work so reset state to NONE - if GTP_CONFIG.check_param_states: + # The unsharded tensor has been returned, no pending work so reset state to NONE. + # Skip during recompute: a bwd-chain prefetch may hold an in-flight AG state on + # this weight that its later dgrad consume still needs. + if GTP_CONFIG.check_param_states and not in_recompute: for w in self._weights: w._set_state(GTPWeightState.NONE) - # Lazy population of linked list: link previous weight to current weight - # Uses per-chain state so dense and expert chains never cross-link. cls = type(self) + + # Lazy-build the recompute-forward prefetch chain (first backward, in + # recompute order). Consume/prefetch above used the prior iteration's links, + # so the first backward runs on-demand while these links are established. + if in_recompute and not self._recompute_initialized: + rchain = cls._get_recompute_chain_state(self.chain_id) + last_r = rchain["last_weight"] + if last_r is not None and last_r._recompute_next is None: + last_r._recompute_next = self + self._recompute_prev = last_r + self._recompute_initialized = True + rchain["last_weight"] = self + + # Lazy population of the fwd/bwd linked list: link previous weight to current. + # Uses per-chain state so dense and expert chains never cross-link. chain = cls._get_chain_state(self.chain_id) if not self.prefetch_initialized: last_w = chain["last_weight"] @@ -1442,7 +1544,7 @@ def _allocate_buffer( self._total_bytes += buf_bytes print_rank_0( f"[GTP Cache] +{buf_bytes / 1024**2:.1f} MB (shape={out_shape}, dtype={dtype}) " - f"total={self._total_bytes / 1024**2:.1f} MB id: {id(buf)} fwd: {fwd}" + f"total={self._total_bytes / 1024**2:.1f} MB param: {param._debug_name} fwd: {fwd}" ) return buf @@ -1634,6 +1736,11 @@ def wait_async_comms( param._wait_param_gather() if had_ag: param._already_ag_drained = True + # Recompute-forward chain: drain its separate in-flight rowwise AG so the + # captured recompute consumer skips its cross-graph wait (full-iteration CG). + if param._recompute_prefetch_handle is not None: + param._wait_recompute_param_gather() + param._recompute_already_drained = True if not skip_rs: param._wait_reduce_scatter(finalize_grad=finalize_after_drain) # Fallback inline-accumulation: only when finalize is requested, From ecf2dd16c63960df0722456b2a802afac0680b4a Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Thu, 4 Jun 2026 00:33:56 -0700 Subject: [PATCH 17/59] GTP: allocate GRAPHED buffers into CG mempool at creation; fix comments Signed-off-by: Shiqing Fan --- megatron/core/transformer/cuda_graphs.py | 36 ++-- .../core/transformer/transformer_config.py | 21 +++ megatron/experimental/gtp/README.md | 6 +- megatron/experimental/gtp/__init__.py | 4 +- .../gtp/generalized_tensor_parallelism.py | 156 ++++++------------ 5 files changed, 96 insertions(+), 127 deletions(-) diff --git a/megatron/core/transformer/cuda_graphs.py b/megatron/core/transformer/cuda_graphs.py index f72518f13fc..8f047397306 100644 --- a/megatron/core/transformer/cuda_graphs.py +++ b/megatron/core/transformer/cuda_graphs.py @@ -66,7 +66,7 @@ GTPChain, get_ag_stream, get_rs_stream, - reallocate_gtp_cache_to_mempool, + set_cuda_graph_mempool, wait_async_comms, ) @@ -442,9 +442,6 @@ def create_cudagraphs(cls): ) if any(r[0].gtp_remat for r in cls.cudagraph_record): - reallocate_gtp_cache_to_mempool( - torch.cuda.current_device(), CudaGraphManager.global_mempool - ) GTP_CONFIG.check_param_states = False gc.collect() @@ -892,21 +889,13 @@ def __init__( get_generalized_tensor_parallel_remat_group, ) - gtp_group = get_generalized_tensor_parallel_remat_group() - graphed_ag = get_ag_stream(GTPChain.GRAPHED.value, gtp_group) - graphed_rs = get_rs_stream(GTPChain.GRAPHED.value, gtp_group) - self._register_side_stream(self.fwd_side_streams, graphed_ag) - self._register_side_stream(self.bwd_side_streams, graphed_ag) - self._register_side_stream(self.bwd_side_streams, graphed_rs) + self._register_gtp_side_streams(get_generalized_tensor_parallel_remat_group()) # EGTP streams: required so _wait/_sync_side_streams drain EGTP # NCCL into runner_stream before bwd_completion_event fires. if get_expert_generalized_tensor_parallel_remat_world_size() > 1: - egtp_group = get_expert_generalized_tensor_parallel_remat_group() - egtp_graphed_ag = get_ag_stream(GTPChain.GRAPHED.value, egtp_group) - egtp_graphed_rs = get_rs_stream(GTPChain.GRAPHED.value, egtp_group) - self._register_side_stream(self.fwd_side_streams, egtp_graphed_ag) - self._register_side_stream(self.bwd_side_streams, egtp_graphed_ag) - self._register_side_stream(self.bwd_side_streams, egtp_graphed_rs) + self._register_gtp_side_streams( + get_expert_generalized_tensor_parallel_remat_group() + ) # Bridges Phase 1 (AG drain on ag_stream) into runner_stream # so bwd_completion_event records past NCCL_AG completion. self.bwd_ag_fence_event = torch.cuda.Event() @@ -928,9 +917,14 @@ def __init__( self.fp4_recipe = get_fp4_recipe(self.base_module.config) _set_skip_fp8_weight_update_tensor(False) - def _register_side_stream(self, side_streams, stream): - """Register a side stream for graph capture/replay synchronization.""" - side_streams.append(stream) + def _register_gtp_side_streams(self, group): + """Register a GTP (chain, group)'s GRAPHED AG/RS side streams for capture/replay sync: the + AG stream on both fwd and bwd, the RS stream on bwd only.""" + ag = get_ag_stream(GTPChain.GRAPHED.value, group) + rs = get_rs_stream(GTPChain.GRAPHED.value, group) + self.fwd_side_streams.append(ag) + self.bwd_side_streams.append(ag) + self.bwd_side_streams.append(rs) def _sync_against_side_streams(self, side_streams): """Make registered side streams wait for the current stream. @@ -1809,6 +1803,10 @@ def wrapped_func(*args, eager=False, cache_key=None, **kwargs): self.reuse_cudagraphs = self.pg_collection.pp.size() == 1 if CudaGraphManager.global_mempool is None: CudaGraphManager.global_mempool = torch.cuda.graph_pool_handle() + # Register the pool so GTP allocates GRAPHED-chain buffers + quantized + # storage directly into it (created before the first graphed forward). + if HAVE_GTP: + set_cuda_graph_mempool(torch.cuda.current_device(), CudaGraphManager.global_mempool) # Cudagraph stream capture requires no operations on the default stream prior to the # capture, so change to a side stream. torch.cuda.set_stream(torch.cuda.Stream()) diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py index b99913f5057..800807a7dcd 100644 --- a/megatron/core/transformer/transformer_config.py +++ b/megatron/core/transformer/transformer_config.py @@ -2348,6 +2348,27 @@ def _scope_to_str(s): "moe_input_jitter_eps is not supported with graphed moe recomputation." ) + if ( + self.generalized_tensor_parallel_remat_size > 1 + and self.cuda_graph_impl == "local" + and (self.fp8 is not None or self.fp4 is not None) + and self.moe_shared_expert_intermediate_size is not None + and not self.moe_shared_expert_overlap + and ( + full_cudagraph + or CudaGraphModule.moe in self.cuda_graph_modules + or CudaGraphModule.moe_router in self.cuda_graph_modules + ) + ): + assert "shared_experts" not in self.recompute_modules, ( + "GTP + local CUDA graphs that capture shared_experts " + "(moe_router/moe scope) cannot recompute it under fp8/fp4: " + "te_checkpoint requires .backward(), but the local fwd-graph " + "warmup uses .grad(). Drop 'shared_experts' from " + "--recompute-modules (GTP-shard + offload instead), or use " + "--cuda-graph-impl full_iteration." + ) + if self.fine_grained_activation_offloading: assert self.cuda_graph_impl in ("local", "transformer_engine", "full_iteration"), ( "fine-grained activation offloading is only supported with " diff --git a/megatron/experimental/gtp/README.md b/megatron/experimental/gtp/README.md index 034d7a8b48e..da29435e2fa 100644 --- a/megatron/experimental/gtp/README.md +++ b/megatron/experimental/gtp/README.md @@ -40,11 +40,11 @@ CG compatibility is designed-in from day one, not retrofitted. The entire sync / - **Two chains, never cross-linked** (`GTPChain.GRAPHED` / `GTPChain.UNGRAPHED`). `prev_w` / `next_w` only connect same-chain params, so a captured traversal never reaches into eager Python and vice-versa. - **`torch.cuda.Event(external=True)`** for `ag_event` / `rs_event` — the events survive CG capture boundaries and can be waited on from replay-time streams. - **Idempotent ticket cache**: `GTPWeightCache.get(ticket)` keeps `slot.buf` set even after `release()`, so replays read the same buffer address as capture. `clear()` drops buffers while keeping tickets valid → supports CG re-capture with lazy re-allocation. -- **`reallocate_to_mempool(device, mempool)`** pre-migrates GRAPHED-chain buffers into the CG memory pool *before* capture, so no CUDA allocations happen inside the captured graph. UNGRAPHED buffers stay in regular allocator memory. +- **Allocate-in-pool at creation** (`set_cuda_graph_mempool` + `_graphed_alloc`): GRAPHED-chain AG/RS buffers and quantized weight storage are allocated **directly into the CG memory pool** at first creation (during warmup, before capture), so no CUDA allocations happen inside the captured graph — and no post-hoc reallocation/clone is needed. UNGRAPHED buffers stay in regular allocator memory. - **Lazy, one-shot chain linking**: `prefetch_initialized` is flipped during the first fwd (warmup), so the chain-construction Python side-effects never execute inside a captured graph. The link table is buffered and flushed atomically at the second forward. - **DDP hook manual triggering**: `register_grad_accum_hook` stores the DDP hook on the param; `_CudagraphReplayNode.backward` calls it manually after replay (since `AccumulateGrad` hooks are silenced by replay). This is also how the `assert self.grad_reduce_handle is not None` failure from partial-CG + overlap-grad-reduce is resolved. - **Drains at CG / eager boundary**: `_drain_gtp_side_streams()` before eager MoE expert compute. Inside bwd capture, two-phase drain: Phase 1 joins the within-graph cascade and records `bwd_completion_event` (next runner unblocks); Phase 2 calls `wait_async_comms(GRAPHED)` to drain the chain-tail handle and re-joins side streams (queued after the event so it doesn't delay the next runner). -- **Side-stream registration**: the `(GRAPHED, gtp_group)` ag/rs streams are materialized at runner init so `_register_side_stream` captures them before the first forward. +- **Side-stream registration**: the `(GRAPHED, gtp_group)` ag/rs streams are materialized at runner init (`_register_gtp_side_streams`) so they are captured before the first forward. ### 1.3 Low-precision quantize-then-gather @@ -200,7 +200,7 @@ TransformerEngine owns the linear primitives (`Linear` / `LayerNormLinear` / `La 1. The `gtp_group` kwarg that Mcore's `extensions/transformer_engine.py` threads into the TE constructors when `is_te_min_version("2.15.0")`. 2. The hook registry (`register_gtp_hooks`), called by TE's `module/base.py` at `reset_parameters` time to slice each weight into a `GTPShardedParam` along `out_features`. -3. The `_register_side_stream` / drain calls that synchronize TE's quantize + GEMM kernels with the side stream that owns the AG/RS NCCL ops. +3. The `_register_gtp_side_streams` / drain calls that synchronize TE's quantize + GEMM kernels with the side stream that owns the AG/RS NCCL ops. #### What the flags do under the hood diff --git a/megatron/experimental/gtp/__init__.py b/megatron/experimental/gtp/__init__.py index 7c33d02f5ee..e90935b5a2f 100644 --- a/megatron/experimental/gtp/__init__.py +++ b/megatron/experimental/gtp/__init__.py @@ -24,7 +24,7 @@ get_all_ag_streams, get_all_rs_streams, get_rs_stream, - reallocate_gtp_cache_to_mempool, + set_cuda_graph_mempool, set_cuda_graph_modules, tag_gtp_params_with_names, update_gtp_config, @@ -47,7 +47,7 @@ "get_all_ag_streams", "get_all_rs_streams", "get_rs_stream", - "reallocate_gtp_cache_to_mempool", + "set_cuda_graph_mempool", "set_cuda_graph_modules", "tag_gtp_params_with_names", "update_gtp_config", diff --git a/megatron/experimental/gtp/generalized_tensor_parallelism.py b/megatron/experimental/gtp/generalized_tensor_parallelism.py index a3f01f644b0..9d9e0122a83 100644 --- a/megatron/experimental/gtp/generalized_tensor_parallelism.py +++ b/megatron/experimental/gtp/generalized_tensor_parallelism.py @@ -13,8 +13,9 @@ import math import re +import warnings from collections import defaultdict -from contextlib import nullcontext +from contextlib import contextmanager, nullcontext from dataclasses import dataclass, field from enum import Enum from typing import Dict, List, Optional @@ -309,7 +310,6 @@ def update_gtp_config(**kwargs): if kwargs.get("wgrad_before_dgrad"): raise NotImplementedError("Wgrad->Dgrad schedule to be supported later") if kwargs.get("coalesce_amax_allreduce"): - import warnings warnings.warn( "GTPConfig.coalesce_amax_allreduce: coalesced amax reduction across the " "GTP group is deferred in a followup MR; falling back to per-weight amax " @@ -653,7 +653,10 @@ def _configure_quantizer(q, group): continue weight._quantizer = _configure_quantizer(quantizer, weight.group) - weight.quantized = weight._quantizer.quantize(weight.get_padded_shard()) + # This init quantize is the only allocation of the quantized storage + # (re-quantize writes in place), so route it via _graphed_alloc. + with _graphed_alloc(getattr(weight, "chain_id", GTPChain.UNGRAPHED.value)): + weight.quantized = weight._quantizer.quantize(weight.get_padded_shard()) weight.quantized.is_routed_expert = getattr(weight, "is_routed_expert", False) # fp8_param_gather: the init quantize above already produced a # valid FP8 cache from the BF16 shard; flag did_cast so iter-0's @@ -1470,6 +1473,36 @@ class _TicketSlot: buf: Optional[torch.Tensor] = field(default=None) # None when released or after clear() +# CUDA-graph memory pool for routing GRAPHED-chain allocations (AG/RS buffers and +# quantized weight storage) into the capture pool *at creation time*, so no post-hoc +# reallocation is needed. Registered by the integrator (set_cuda_graph_mempool) after +# the pool is created and before the first graphed forward; stays None when CG is off, +# in which case _graphed_alloc is a no-op and allocations use regular memory. +_CG_MEMPOOL_DEVICE = None +_CG_MEMPOOL = None + + +def set_cuda_graph_mempool(device, mempool): + """Register the CUDA-graph memory pool for GRAPHED-chain GTP allocations.""" + global _CG_MEMPOOL_DEVICE, _CG_MEMPOOL + _CG_MEMPOOL_DEVICE = device + _CG_MEMPOOL = mempool + + +@contextmanager +def _graphed_alloc(chain_id): + """Route allocations in this block into the registered CG mempool when ``chain_id`` + is GRAPHED and a pool is registered; otherwise a no-op (regular allocator).""" + if _CG_MEMPOOL is not None and chain_id == GTPChain.GRAPHED.value: + torch._C._cuda_beginAllocateCurrentThreadToPool(_CG_MEMPOOL_DEVICE, _CG_MEMPOOL) + try: + yield + finally: + torch._C._cuda_endAllocateToPool(_CG_MEMPOOL_DEVICE, _CG_MEMPOOL) + else: + yield + + class GTPWeightCache: """ Ticket-based buffer pool for GTP all-gather / reduce-scatter buffers. @@ -1527,18 +1560,23 @@ def _allocate_buffer( else: out_shape = param._unsharded_shape_padded - if not isinstance(dtype, torch.dtype): - quantizer = param._quantizer - assert quantizer is not None - param._quantizer.set_usage(rowwise=fwd, columnwise=not fwd) + # Route GRAPHED-chain buffers into the CG mempool at creation (see _graphed_alloc). + with _graphed_alloc(getattr(param, "chain_id", GTPChain.UNGRAPHED.value)): + if not isinstance(dtype, torch.dtype): + quantizer = param._quantizer + assert quantizer is not None + param._quantizer.set_usage(rowwise=fwd, columnwise=not fwd) - buf = param._quantizer.make_empty( - out_shape, dtype=torch.bfloat16, device=torch.cuda.current_device() - ) - else: - buf = torch.empty( - out_shape, dtype=dtype, device=param.device, memory_format=torch.contiguous_format - ) + buf = param._quantizer.make_empty( + out_shape, dtype=torch.bfloat16, device=torch.cuda.current_device() + ) + else: + buf = torch.empty( + out_shape, + dtype=dtype, + device=param.device, + memory_format=torch.contiguous_format, + ) buf_bytes = self._buf_bytes(out_shape, dtype) self._total_bytes += buf_bytes @@ -1589,8 +1627,7 @@ def release(self, ticket: int): """Return the buffer to the pool. Ticket remains valid. slot.buf is intentionally NOT cleared: get() must stay idempotent so that - CUDA-graph-captured buffers keep their fixed address across replays, and - reallocate_to_mempool() can find every dense-chain buffer. + CUDA-graph-captured buffers keep their fixed address across replays. """ slot = self._slots[ticket] if slot.buf is None: @@ -1607,85 +1644,6 @@ def clear(self): self._pool.clear() self._total_bytes = 0 - def reallocate_to_mempool(self, device, mempool): - """Re-allocate GRAPHED-chain ticket buffers into a CUDA graph memory pool. - - Call BEFORE graph capture so every GRAPHED-chain buffer lives in the capture - pool and no allocations are recorded inside the graph. UNGRAPHED-chain - buffers are left in regular memory (they are never referenced by any - captured graph). - """ - - # Identify keys that belong to the GRAPHED chain - graphed_keys = set() - for slot in self._slots.values(): - if slot.chain_id == GTPChain.GRAPHED.value: - graphed_keys.add(slot.key) - - # Clone only GRAPHED-chain pool buffers into the passed in mempool - self._total_bytes = 0 - new_pool = defaultdict(list) - torch._C._cuda_beginAllocateCurrentThreadToPool(device, mempool) - for key, buffers in self._pool.items(): - if key not in graphed_keys: - continue - new_buffers = [] - for _ in range(len(buffers)): - buf = self._allocate_buffer(*self.key_to_allocate_func[key]) - new_buffers.append(buf) - new_pool[key] = new_buffers - torch._C._cuda_endAllocateToPool(device, mempool) - - # Map each buffer in the old pool to its corresponding new one (GRAPHED only) - old_to_new_buff = {} - for key, old_pool in self._pool.items(): - if key not in graphed_keys: - continue - new = new_pool[key] - for old_buf, new_buf in zip(old_pool, new): - old_to_new_buff[old_buf] = new_buf - - # Replace each GRAPHED slot's reference; keep UNGRAPHED slots unchanged - for slot in self._slots.values(): - if ( - slot.chain_id == GTPChain.GRAPHED.value - and slot.buf is not None - and slot.buf in old_to_new_buff - ): - slot.buf = old_to_new_buff[slot.buf] - - # Merge: GRAPHED keys get new buffers, UNGRAPHED keys keep old ones - for key, buffers in self._pool.items(): - if key not in graphed_keys: - new_pool[key] = buffers - self._pool = new_pool - - # Remap quantized params into the CG mempool — but only for params on - # the GRAPHED chain. UNGRAPHED-chain params (embedding, output_layer, - # and MoE paths whose scope is not captured) run eagerly and don't - # need their quantized storage in the CG mempool. - torch._C._cuda_beginAllocateCurrentThreadToPool(device, mempool) - for w in _GTP_PARAMS: - if getattr(w, "chain_id", GTPChain.GRAPHED.value) != GTPChain.GRAPHED.value: - continue - if w.quantized is None: - continue - if isinstance(w.quantized, NVFP4TensorStorage): - w.quantized._rowwise_data = torch.clone(w.quantized._rowwise_data) - w.quantized._columnwise_data = torch.clone(w.quantized._columnwise_data) - w.quantized._rowwise_scale_inv = torch.clone(w.quantized._rowwise_scale_inv) - w.quantized._columnwise_scale_inv = torch.clone(w.quantized._columnwise_scale_inv) - w.quantized._amax_columnwise = torch.clone(w.quantized._amax_columnwise) - w.quantized._amax_rowwise = torch.clone(w.quantized._amax_rowwise) - elif isinstance(w.quantized, MXFP8TensorStorage): - w.quantized._rowwise_data = torch.clone(w.quantized._rowwise_data) - w.quantized._columnwise_data = torch.clone(w.quantized._columnwise_data) - w.quantized._rowwise_scale_inv = torch.clone(w.quantized._rowwise_scale_inv) - w.quantized._columnwise_scale_inv = torch.clone(w.quantized._columnwise_scale_inv) - else: - assert False - torch._C._cuda_endAllocateToPool(device, mempool) - def get_global_GTP_cache() -> GTPWeightCache: """Get or lazily create the global cache instance.""" @@ -1695,12 +1653,6 @@ def get_global_GTP_cache() -> GTPWeightCache: return _GTP_CACHE -def reallocate_gtp_cache_to_mempool(device, mempool): - """Re-allocate all GTP cache buffers into a CUDA graph memory pool.""" - if _GTP_CACHE is not None: - _GTP_CACHE.reallocate_to_mempool(device, mempool) - - def wait_async_comms( chain_id: str = None, skip_rs: bool = False, finalize_after_drain: bool = False ): @@ -1879,8 +1831,6 @@ def backward(ctx, grad_output): wrap_fn=wrap_module_params_gtp, ) except ImportError: - import warnings - warnings.warn( "megatron.experimental.gtp: TransformerEngine does not expose register_gtp_hooks; " "GTP will be a no-op for te.Linear / te.LayerNormLinear / te.GroupedLinear. " From 1163b4adb3dec33e1901b1602f753cbee2b1aa00 Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Thu, 4 Jun 2026 02:17:37 -0700 Subject: [PATCH 18/59] fix comments Signed-off-by: Shiqing Fan --- megatron/core/tensor_parallel/layers.py | 8 ++++++++ megatron/core/transformer/mlp.py | 16 ++++++++++++++++ megatron/training/arguments.py | 14 ++++++++++++++ 3 files changed, 38 insertions(+) diff --git a/megatron/core/tensor_parallel/layers.py b/megatron/core/tensor_parallel/layers.py index 03ccd7b653e..9146e492b5d 100644 --- a/megatron/core/tensor_parallel/layers.py +++ b/megatron/core/tensor_parallel/layers.py @@ -411,6 +411,7 @@ def linear_with_frozen_weight( tp_group: Optional[torch.distributed.ProcessGroup], grad_output_buffer: Optional[List[torch.Tensor]] = None, wgrad_deferral_limit: None = None, + gtp_size: int = 1, ) -> torch.Tensor: """Linear layer execution with weight.requires_grad == False. @@ -447,6 +448,10 @@ def linear_with_frozen_weight( wgrad_deferral_limit (int optional): dummy argument, used to keep the API unified between all forward implementation functions. + + gtp_size (int): GTP shard count. When > 1 the weight is GTP-sharded and must be + all-gathered to its full shape before the matmul, mirroring the trainable path. + Defaults to 1 (no-op) for the common non-GTP / non-sharded case. """ assert grad_output_buffer is None, ( @@ -467,6 +472,9 @@ def linear_with_frozen_weight( else: input = input + if gtp_size > 1: + weight = weight.all_gather_and_prefetch(fwd=True) + args = [input, weight, bias, allreduce_dgrad, tp_group] return LinearWithFrozenWeight.apply(*args) diff --git a/megatron/core/transformer/mlp.py b/megatron/core/transformer/mlp.py index 1522418c813..4556dc6c9e2 100644 --- a/megatron/core/transformer/mlp.py +++ b/megatron/core/transformer/mlp.py @@ -394,10 +394,26 @@ def as_mlp_submodule( assert hasattr( pg_collection, 'tp' ), 'TP process group is required for MLP in TransformerLayer' + + # GTP sharding of the dense MLP: forward gtp_group so fc1/fc2 shard their + # weights (mirroring attention / shared_experts). Only the non-fused MLP path + # honors GTP — the TE op-fused variants (TEFusedMLP / *WithGroupedLinear, which + # define _make_fused_impl) build their GEMMs straight from the weight tensors + # and never all-gather GTP shards, so a sharded weight would silently produce + # wrong output. Fail fast on that combination instead. + gtp_group = getattr(pg_collection, 'gtp', None) + if hasattr(cls, '_make_fused_impl'): + assert gtp_group is None or gtp_group.size() == 1, ( + f"{cls.__name__}: GTP sharding of the dense MLP is not supported with the " + "TE fused MLP / GroupedLinear path (_make_fused_impl ignores GTP shards). " + "Use the non-fused MLP submodule, or do not enable GTP for dense MLP layers." + ) + gtp_group = None return cls( config=config, submodules=submodules, tp_group=pg_collection.tp, + gtp_group=gtp_group, is_expert=is_expert, input_size=input_size, ffn_hidden_size=ffn_hidden_size, diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py index 8453e404a31..01445666d24 100644 --- a/megatron/training/arguments.py +++ b/megatron/training/arguments.py @@ -1462,6 +1462,20 @@ def validate_args(args, defaults={}): "force setting NCCL_PROTO=Simple might introduce bad perf." ) + assert args.ckpt_format in ('torch', 'torch_dist'), ( + f"GTP supports only --ckpt-format 'torch' (legacy) or 'torch_dist', got " + f"'{args.ckpt_format}'." + ) + assert not ( + getattr(args, 'dist_ckpt_optim_fully_reshardable', False) + and getattr(args, 'distrib_optim_fully_reshardable_mem_efficient', False) + ), ( + "GTP does not support the distributed-optimizer fully-reshardable + " + "mem-efficient checkpoint mode. Disable " + "--distrib-optim-fully-reshardable-mem-efficient (or " + "--dist-ckpt-optim-fully-reshardable)." + ) + # Propagate --fp8-param-gather into GTPConfig: enables optimizer-side # FP32->FP8 cast for GTP shards, so the forward skips BF16->FP8. if getattr(args, 'fp8_param_gather', False): From 5dc04230893e7419b605e563c8b424edd0f2e0f0 Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Thu, 4 Jun 2026 03:58:03 -0700 Subject: [PATCH 19/59] fix onlince checks: copyright, intallation test, build. Signed-off-by: Shiqing Fan --- megatron/core/transformer/cuda_graphs.py | 19 ++++++++++++++++++- megatron/experimental/__init__.py | 1 + megatron/experimental/gtp/__init__.py | 4 +--- .../gtp/generalized_tensor_parallelism.py | 4 +--- pyproject.toml | 5 +++++ .../generalized_tensor_parallel/__init__.py | 1 + 6 files changed, 27 insertions(+), 7 deletions(-) diff --git a/megatron/core/transformer/cuda_graphs.py b/megatron/core/transformer/cuda_graphs.py index 8f047397306..ff0f7deb27f 100644 --- a/megatron/core/transformer/cuda_graphs.py +++ b/megatron/core/transformer/cuda_graphs.py @@ -58,7 +58,12 @@ except: HAVE_TE_GRAPHS = False -from megatron.experimental.gtp import HAVE_GTP +try: + from megatron.experimental.gtp import HAVE_GTP +except ImportError: + # megatron.experimental is not shipped with the megatron.core wheel; treat + # GTP as unavailable when the package is absent. + HAVE_GTP = False if HAVE_GTP: from megatron.experimental.gtp import ( @@ -69,6 +74,16 @@ set_cuda_graph_mempool, wait_async_comms, ) +else: + # Placeholders so static analysis does not flag these GTP-only symbols as + # possibly-used-before-assignment; every use site is guarded by HAVE_GTP / + # gtp_remat at runtime. + GTP_CONFIG = None + GTPChain = None + get_ag_stream = None + get_rs_stream = None + set_cuda_graph_mempool = None + wait_async_comms = None try: from tqdm import tqdm @@ -615,6 +630,7 @@ class _CudagraphReplayNode(torch.autograd.Function): """Replays the runner's cudagraphs with autograd. Handles copying data into/out of the cudagraph io and fp8/fp4 if used.""" + # pylint: disable=line-too-long ## Capture-time sync schemes (wait_async_comms is called INSIDE the captured # graph so the drain ops are embedded in the graph itself, not before replay). # @@ -635,6 +651,7 @@ class _CudagraphReplayNode(torch.autograd.Function): # Phase 1 AG drain. By the time bwd_completion_event fires and the next # runner launches, the add_ is done (no SM saturation blocking overlap). # finalize_model_grads waits phase2_completion_event before DP grad sync. + # pylint: enable=line-too-long @staticmethod def forward(ctx, runner, is_first_microbatch, *inputs): diff --git a/megatron/experimental/__init__.py b/megatron/experimental/__init__.py index e69de29bb2d..bf496b2c297 100644 --- a/megatron/experimental/__init__.py +++ b/megatron/experimental/__init__.py @@ -0,0 +1 @@ +# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. \ No newline at end of file diff --git a/megatron/experimental/gtp/__init__.py b/megatron/experimental/gtp/__init__.py index e90935b5a2f..2d3eb0abb9d 100644 --- a/megatron/experimental/gtp/__init__.py +++ b/megatron/experimental/gtp/__init__.py @@ -1,6 +1,4 @@ -# Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# See LICENSE for license information. +# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. """Generalized Tensor Parallelism (GTP) public API. diff --git a/megatron/experimental/gtp/generalized_tensor_parallelism.py b/megatron/experimental/gtp/generalized_tensor_parallelism.py index 9d9e0122a83..f16de0a245b 100644 --- a/megatron/experimental/gtp/generalized_tensor_parallelism.py +++ b/megatron/experimental/gtp/generalized_tensor_parallelism.py @@ -1,6 +1,4 @@ -# Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# See LICENSE for license information. +# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. """Generalized Tensor Parallelism (GTP). diff --git a/pyproject.toml b/pyproject.toml index 77ea81bf124..aa01d0ab4d9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,6 +13,11 @@ include = [ "megatron.core.*", "megatron.training", "megatron.training.*", + "megatron.experimental", + "megatron.experimental.*", +] +exclude = [ + "megatron.experimental.gtp.images", # documentation diagrams, not runtime code ] [tool.setuptools.dynamic] diff --git a/tests/unit_tests/generalized_tensor_parallel/__init__.py b/tests/unit_tests/generalized_tensor_parallel/__init__.py index e69de29bb2d..b5dff7b5663 100644 --- a/tests/unit_tests/generalized_tensor_parallel/__init__.py +++ b/tests/unit_tests/generalized_tensor_parallel/__init__.py @@ -0,0 +1 @@ +# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. From ab869b92e1c68058f3825baeb003f5ef5029d17e Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Fri, 5 Jun 2026 00:23:47 -0700 Subject: [PATCH 20/59] fix te min version required for GTP. Signed-off-by: Shiqing Fan --- .../core/extensions/transformer_engine.py | 24 ++++++++++++++++--- .../gtp/generalized_tensor_parallelism.py | 14 +++++++++++ megatron/training/initialize.py | 6 ++--- 3 files changed, 38 insertions(+), 6 deletions(-) diff --git a/megatron/core/extensions/transformer_engine.py b/megatron/core/extensions/transformer_engine.py index 1823d271dec..f17d369b525 100644 --- a/megatron/core/extensions/transformer_engine.py +++ b/megatron/core/extensions/transformer_engine.py @@ -896,7 +896,13 @@ def __init__( self.te_quant_params, torch.is_grad_enabled() ) - if is_te_min_version("2.14.0"): + if gtp_group is not None and gtp_group.size() > 1: + from megatron.experimental.gtp import HAVE_GTP + + assert HAVE_GTP, ( + "GTP requires TransformerEngine >= 2.17. " + "Set MEGATRON_GTP_FORCE_ENABLE=1 to bypass for custom TE builds." + ) self.gtp_size = get_pg_size(gtp_group) if gtp_group is not None else 1 extra_kwargs["gtp_group"] = gtp_group if torch.distributed.is_initialized() else None with init_quant_context: @@ -1106,7 +1112,13 @@ def __init__( ), "Must have at least TE version 2.3 or higher to use symmetric memory all reduce" extra_kwargs["symmetric_ar_type"] = self.config.symmetric_ar_type - if is_te_min_version("2.14.0"): + if gtp_group is not None and gtp_group.size() > 1: + from megatron.experimental.gtp import HAVE_GTP + + assert HAVE_GTP, ( + "GTP requires TransformerEngine >= 2.17. " + "Set MEGATRON_GTP_FORCE_ENABLE=1 to bypass for custom TE builds." + ) self.gtp_size = get_pg_size(gtp_group) if gtp_group is not None else 1 extra_kwargs["gtp_group"] = gtp_group if torch.distributed.is_initialized() else None @@ -2006,7 +2018,13 @@ def __init__( tp_size = 1 tp_group_for_te = None - if is_te_min_version("2.14.0"): + if gtp_group is not None and gtp_group.size() > 1: + from megatron.experimental.gtp import HAVE_GTP + + assert HAVE_GTP, ( + "GTP requires TransformerEngine >= 2.17. " + "Set MEGATRON_GTP_FORCE_ENABLE=1 to bypass for custom TE builds." + ) self.gtp_size = get_pg_size(gtp_group) if gtp_group is not None else 1 extra_kwargs["gtp_group"] = ( gtp_group if torch.distributed.is_initialized() else None diff --git a/megatron/experimental/gtp/generalized_tensor_parallelism.py b/megatron/experimental/gtp/generalized_tensor_parallelism.py index f16de0a245b..acf76319da9 100644 --- a/megatron/experimental/gtp/generalized_tensor_parallelism.py +++ b/megatron/experimental/gtp/generalized_tensor_parallelism.py @@ -10,6 +10,7 @@ """ import math +import os import re import warnings from collections import defaultdict @@ -19,9 +20,21 @@ from typing import Dict, List, Optional import torch +from packaging.version import Version + +_GTP_TE_MIN_VERSION = Version("2.17") try: import transformer_engine as te # noqa: F401 + + _te_version = Version(te.__version__) + if _te_version < _GTP_TE_MIN_VERSION and not os.environ.get("MEGATRON_GTP_FORCE_ENABLE"): + raise ImportError( + f"megatron.experimental.gtp requires TransformerEngine >= {_GTP_TE_MIN_VERSION} " + f"(found {_te_version}). Set MEGATRON_GTP_FORCE_ENABLE=1 to bypass this check " + "when using a custom TE build that includes the GTP hook registry." + ) + import transformer_engine_torch as tex from transformer_engine.pytorch.constants import ( MXFP8_BLOCK_SCALING_SIZE, @@ -1832,6 +1845,7 @@ def backward(ctx, grad_output): warnings.warn( "megatron.experimental.gtp: TransformerEngine does not expose register_gtp_hooks; " "GTP will be a no-op for te.Linear / te.LayerNormLinear / te.GroupedLinear. " + "GTP requires TransformerEngine >= 2.17 (planned release). " "Upgrade TransformerEngine to a build that includes the GTP hook registry.", RuntimeWarning, stacklevel=2, diff --git a/megatron/training/initialize.py b/megatron/training/initialize.py index 92b52392627..8f233eb6adb 100644 --- a/megatron/training/initialize.py +++ b/megatron/training/initialize.py @@ -345,9 +345,9 @@ def _initialize_distributed(get_embedding_ranks, get_position_embedding_ranks, s from megatron.experimental.gtp import HAVE_GTP assert HAVE_GTP, ( - "GTP requires TransformerEngine >= 2.15.0. " - "Install TransformerEngine, or set both " - "--generalized-tensor-parallel-remat-size and " + "GTP requires TransformerEngine >= 2.17. " + "Set MEGATRON_GTP_FORCE_ENABLE=1 to bypass for custom TE builds, " + "or set both --generalized-tensor-parallel-remat-size and " "--expert-generalized-tensor-parallel-remat-size to 1." ) mpu.initialize_model_parallel( From 45a604f8d0bb35c3dc5c4927d1ef698b1ced2123 Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Fri, 5 Jun 2026 01:39:51 -0700 Subject: [PATCH 21/59] fix online UTs; fix comments. Signed-off-by: Shiqing Fan --- megatron/core/process_groups_config.py | 7 +++++++ megatron/experimental/__init__.py | 2 +- .../experimental/gtp/generalized_tensor_parallelism.py | 2 +- megatron/training/arguments.py | 2 +- 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/megatron/core/process_groups_config.py b/megatron/core/process_groups_config.py index 4f806ec7371..6ca97357e48 100644 --- a/megatron/core/process_groups_config.py +++ b/megatron/core/process_groups_config.py @@ -152,6 +152,13 @@ def __init__(self, **kwargs): else: raise ValueError(f"Unknown attribute: {key}") + def __getattr__(self, name: str): + # Return None for any declared field that was not set during partial construction + # (e.g. when use_mpu_process_groups is called with a subset of required_pgs). + if name in {f.name for f in fields(self.__class__)}: + return None + raise AttributeError(f"'ProcessGroupCollection' object has no attribute '{name}'") + def __repr__(self): """Return a concise representation showing which process groups exist and their sizes.""" active_pgs = [] diff --git a/megatron/experimental/__init__.py b/megatron/experimental/__init__.py index bf496b2c297..b5dff7b5663 100644 --- a/megatron/experimental/__init__.py +++ b/megatron/experimental/__init__.py @@ -1 +1 @@ -# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. \ No newline at end of file +# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. diff --git a/megatron/experimental/gtp/generalized_tensor_parallelism.py b/megatron/experimental/gtp/generalized_tensor_parallelism.py index acf76319da9..afad2977ce0 100644 --- a/megatron/experimental/gtp/generalized_tensor_parallelism.py +++ b/megatron/experimental/gtp/generalized_tensor_parallelism.py @@ -63,7 +63,7 @@ class GTPChain(str, Enum): - """Prefetch chain identifier for an GTPShardedParam. + """Prefetch chain identifier for n GTPShardedParam. GRAPHED — fwd/bwd captured by a CUDA graph (MLM _CudaGraphRunner). UNGRAPHED — fwd/bwd runs eagerly. diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py index 01445666d24..c5f20a89657 100644 --- a/megatron/training/arguments.py +++ b/megatron/training/arguments.py @@ -1479,8 +1479,8 @@ def validate_args(args, defaults={}): # Propagate --fp8-param-gather into GTPConfig: enables optimizer-side # FP32->FP8 cast for GTP shards, so the forward skips BF16->FP8. if getattr(args, 'fp8_param_gather', False): - assert False, 'GTP+fp8-param-gather not supported yet!' from megatron.experimental.gtp import update_gtp_config + update_gtp_config(fp8_param_gather=True) warn_rank_0( "GTP + --fp8-param-gather: setting " From 87a50cd9c8401ac4933d706567d76f0ec15d444a Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Fri, 5 Jun 2026 05:39:54 -0700 Subject: [PATCH 22/59] fix UTs Signed-off-by: Shiqing Fan --- megatron/core/parallel_state.py | 12 ++++++---- .../core/tensor_parallel/inference_layers.py | 4 ++++ megatron/experimental/gtp/__init__.py | 2 ++ .../generalized_tensor_parallel/test_gtp.py | 24 ++++++++++++------- .../test_tp_gtp.py | 6 +++++ .../models/test_hybrid_moe_model.py | 2 ++ 6 files changed, 37 insertions(+), 13 deletions(-) diff --git a/megatron/core/parallel_state.py b/megatron/core/parallel_state.py index 551eca72115..0f8f275347c 100644 --- a/megatron/core/parallel_state.py +++ b/megatron/core/parallel_state.py @@ -1717,7 +1717,8 @@ def get_generalized_tensor_parallel_remat_group(check_initialized=True): def get_generalized_tensor_parallel_remat_world_size(): """Return world size for the parameter-sharding group.""" if torch.distributed.is_available() and torch.distributed.is_initialized(): - return get_generalized_tensor_parallel_remat_group().size() + group = get_generalized_tensor_parallel_remat_group(check_initialized=False) + return group.size() if group is not None else 0 else: return 0 @@ -1725,7 +1726,8 @@ def get_generalized_tensor_parallel_remat_world_size(): def get_generalized_tensor_parallel_remat_rank(): """Return caller's rank in the parameter-sharding group.""" if torch.distributed.is_available() and torch.distributed.is_initialized(): - return get_generalized_tensor_parallel_remat_group().rank() + group = get_generalized_tensor_parallel_remat_group(check_initialized=False) + return group.rank() if group is not None else 0 else: return 0 @@ -2185,7 +2187,8 @@ def get_expert_generalized_tensor_parallel_remat_group(check_initialized=True): def get_expert_generalized_tensor_parallel_remat_world_size(): """Return world size for the expert-parameter-sharding group.""" if torch.distributed.is_available() and torch.distributed.is_initialized(): - return get_expert_generalized_tensor_parallel_remat_group().size() + group = get_expert_generalized_tensor_parallel_remat_group(check_initialized=False) + return group.size() if group is not None else 0 else: return 0 @@ -2193,7 +2196,8 @@ def get_expert_generalized_tensor_parallel_remat_world_size(): def get_expert_generalized_tensor_parallel_remat_rank(): """Return caller's rank in the expert-parameter-sharding group.""" if torch.distributed.is_available() and torch.distributed.is_initialized(): - return get_expert_generalized_tensor_parallel_remat_group().rank() + group = get_expert_generalized_tensor_parallel_remat_group(check_initialized=False) + return group.rank() if group is not None else 0 else: return 0 diff --git a/megatron/core/tensor_parallel/inference_layers.py b/megatron/core/tensor_parallel/inference_layers.py index 2adefc58634..8da5c3fbeb6 100644 --- a/megatron/core/tensor_parallel/inference_layers.py +++ b/megatron/core/tensor_parallel/inference_layers.py @@ -83,6 +83,7 @@ def __init__( is_expert: bool = False, symmetric_ar_type: Optional[str] = None, tp_group: Optional[torch.distributed.ProcessGroup] = None, + gtp_group: Optional[torch.distributed.ProcessGroup] = None, name: str | None = None, ): assert HAVE_TE, "--transformer-impl=inference_optimized requires transformer engine" @@ -131,6 +132,7 @@ def __init__( skip_weight_param_allocation: bool = False, tp_comm_buffer_name: Optional[str] = None, tp_group: Optional[torch.distributed.ProcessGroup] = None, + gtp_group: Optional[torch.distributed.ProcessGroup] = None, name: str | None = None, ): assert HAVE_TE, "--transformer-impl=inference_optimized requires transformer engine" @@ -260,6 +262,7 @@ def __init__( skip_weight_param_allocation: bool = False, tp_comm_buffer_name: Optional[str] = None, tp_group: Optional[torch.distributed.ProcessGroup] = None, + gtp_group: Optional[torch.distributed.ProcessGroup] = None, name: str | None = None, ): assert HAVE_TE, "--transformer-impl=inference_optimized requires transformer engine" @@ -358,6 +361,7 @@ def __init__( is_expert: bool, tp_comm_buffer_name: Optional[str] = None, tp_group: Optional[torch.distributed.ProcessGroup] = None, + gtp_group: Optional[torch.distributed.ProcessGroup] = None, name: str | None = None, ): assert HAVE_TE, "--transformer-impl=inference_optimized requires transformer engine" diff --git a/megatron/experimental/gtp/__init__.py b/megatron/experimental/gtp/__init__.py index 2d3eb0abb9d..8fb8a307275 100644 --- a/megatron/experimental/gtp/__init__.py +++ b/megatron/experimental/gtp/__init__.py @@ -17,6 +17,7 @@ GTP_CONFIG, GTPChain, GTPEmbeddingWeight, + GTPShardedParam, classify_gtp_chains, get_ag_stream, get_all_ag_streams, @@ -40,6 +41,7 @@ "GTP_CONFIG", "GTPChain", "GTPEmbeddingWeight", + "GTPShardedParam", "classify_gtp_chains", "get_ag_stream", "get_all_ag_streams", diff --git a/tests/unit_tests/generalized_tensor_parallel/test_gtp.py b/tests/unit_tests/generalized_tensor_parallel/test_gtp.py index 02b86ae5666..2ede51b480d 100644 --- a/tests/unit_tests/generalized_tensor_parallel/test_gtp.py +++ b/tests/unit_tests/generalized_tensor_parallel/test_gtp.py @@ -19,7 +19,7 @@ 11. TestNVFP4LinearGTP – Linear + NVFP4 recipe: quantized shard setup, fwd/bwd (multi-GPU) 12. TestNVFP4GroupedLinearGTP – GroupedLinear + NVFP4 recipe: coalesced AG + fwd/bwd (multi-GPU) 13. TestMXFP8LinearGTP – Linear + MXFP8 recipe: quantized shard setup, fwd/bwd, padding (multi-GPU) -14. TestGTPConfig – update_config: valid/invalid keys (single-process) +14. TestGTPConfig – update_gtp_config: valid/invalid keys (single-process) 15. TestGTPShardedParamProperties – shape computations, get_padded_shard, _strip_padding (single-process) 16. TestGTPCacheKey – _get_cache_key: expert vs non-expert, fwd vs bwd (single-process) 17. TestGTPCacheRelease – reserve/get/release pool semantics (single-process) @@ -42,6 +42,12 @@ import torch import torch.distributed as dist import torch.nn as nn + +from megatron.experimental.gtp import HAVE_GTP + +if not HAVE_GTP: + pytest.skip("GTP requires TransformerEngine >= 2.17", allow_module_level=True) + import transformer_engine.pytorch as te from transformer_engine.common.recipe import NVFP4BlockScaling from transformer_engine.pytorch import fp8_autocast, is_mxfp8_available, is_nvfp4_available @@ -991,7 +997,7 @@ def test_forward_unaligned_padding(self): # --------------------------------------------------------------------------- -# 14. GTPConfig / update_config +# 14. GTPConfig / update_gtp_config # --------------------------------------------------------------------------- @@ -1000,22 +1006,22 @@ class TestGTPConfig: def test_update_pad_for_alignment(self): original = gtp_module.GTP_CONFIG.pad_for_alignment try: - gtp_module.update_config(pad_for_alignment=8) + gtp_module.update_gtp_config(pad_for_alignment=8) assert gtp_module.GTP_CONFIG.pad_for_alignment == 8 finally: - gtp_module.update_config(pad_for_alignment=original) + gtp_module.update_gtp_config(pad_for_alignment=original) def test_update_weight_prefetch(self): original = gtp_module.GTP_CONFIG.weight_prefetch try: - gtp_module.update_config(weight_prefetch=False) + gtp_module.update_gtp_config(weight_prefetch=False) assert gtp_module.GTP_CONFIG.weight_prefetch is False finally: - gtp_module.update_config(weight_prefetch=original) + gtp_module.update_gtp_config(weight_prefetch=original) def test_invalid_key_raises(self): with pytest.raises(ValueError, match="Unknown GTP config option"): - gtp_module.update_config(nonexistent_key=123) + gtp_module.update_gtp_config(nonexistent_key=123) # --------------------------------------------------------------------------- @@ -1333,7 +1339,7 @@ def _worker_prefetch_disabled(rank, world_size, port): dtype = torch.bfloat16 gtp_group = dist.new_group(list(range(world_size))) - gtp_module.update_config(weight_prefetch=False) + gtp_module.update_gtp_config(weight_prefetch=False) try: l0 = te.Linear( in_features=in_f, @@ -1362,7 +1368,7 @@ def _worker_prefetch_disabled(rank, world_size, port): assert l0.weight.next_w is l1.weight assert torch.isfinite(out).all(), "Non-finite output with prefetch disabled" finally: - gtp_module.update_config(weight_prefetch=True) + gtp_module.update_gtp_config(weight_prefetch=True) class TestGTPPrefetchDisabled: diff --git a/tests/unit_tests/generalized_tensor_parallel/test_tp_gtp.py b/tests/unit_tests/generalized_tensor_parallel/test_tp_gtp.py index b261444d315..88d17328944 100644 --- a/tests/unit_tests/generalized_tensor_parallel/test_tp_gtp.py +++ b/tests/unit_tests/generalized_tensor_parallel/test_tp_gtp.py @@ -31,6 +31,12 @@ import pytest import torch import torch.distributed as dist + +from megatron.experimental.gtp import HAVE_GTP + +if not HAVE_GTP: + pytest.skip("GTP requires TransformerEngine >= 2.17", allow_module_level=True) + import transformer_engine.pytorch as te from transformer_engine.pytorch.quantization import FP8GlobalStateManager diff --git a/tests/unit_tests/models/test_hybrid_moe_model.py b/tests/unit_tests/models/test_hybrid_moe_model.py index 3f85a52489f..f21ba8aac10 100644 --- a/tests/unit_tests/models/test_hybrid_moe_model.py +++ b/tests/unit_tests/models/test_hybrid_moe_model.py @@ -95,6 +95,7 @@ "ep_overlap_early_attn_memory_release": False, "experimental_attention_variant": None, "expert_model_parallel_size": 4, + "expert_generalized_tensor_parallel_remat_size": 1, "expert_tensor_parallel_size": 1, "external_cuda_graph": False, "ffn_hidden_size": 1856, @@ -122,6 +123,7 @@ "fused_residual_rmsnorm": False, "fused_single_qkv_rope": False, "gated_linear_unit": False, + "generalized_tensor_parallel_remat_size": 1, "glu_linear_offset": 0.0, "grad_scale_func": None, "grad_sync_func": None, From 107c0772b8020ed670daade3637eb477138da350 Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Fri, 5 Jun 2026 19:11:58 -0700 Subject: [PATCH 23/59] fix UTs Signed-off-by: Shiqing Fan --- megatron/core/process_groups_config.py | 55 +++++++++---------- tests/unit_tests/test_optimizer.py | 1 + .../unit_tests/test_process_groups_config.py | 6 +- 3 files changed, 30 insertions(+), 32 deletions(-) diff --git a/megatron/core/process_groups_config.py b/megatron/core/process_groups_config.py index 6ca97357e48..85c230ce280 100644 --- a/megatron/core/process_groups_config.py +++ b/megatron/core/process_groups_config.py @@ -163,15 +163,16 @@ def __repr__(self): """Return a concise representation showing which process groups exist and their sizes.""" active_pgs = [] for field_info in fields(self): - if hasattr(self, field_info.name): - pg = getattr(self, field_info.name) - if pg is None: - active_pgs.append(f"{field_info.name}(None)") - elif isinstance(pg, list): - sizes = [g.size() for g in pg] - active_pgs.append(f"{field_info.name}({sizes})") - else: - active_pgs.append(f"{field_info.name}({pg.size()})") + if field_info.name not in vars(self): + continue + pg = getattr(self, field_info.name) + if pg is None: + continue + elif isinstance(pg, list): + sizes = [g.size() for g in pg] + active_pgs.append(f"{field_info.name}({sizes})") + else: + active_pgs.append(f"{field_info.name}({pg.size()})") return ( f"ProcessGroupCollection({', '.join(active_pgs)})" if active_pgs @@ -371,9 +372,10 @@ def setup_process_groups_for_optimizer( else: # Use provided process group collection with validation and fallbacks + pg_set = vars(pg_collection) # 1. dp group - this is always required - if not hasattr(pg_collection, 'dp'): + if 'dp' not in pg_set: raise ValueError("dp process group is required but not provided in pg_collection") dp_group = pg_collection.dp @@ -393,7 +395,7 @@ def setup_process_groups_for_optimizer( ) # 3. Handle expert data parallel group - if not hasattr(pg_collection, 'expt_dp'): + if 'expt_dp' not in pg_set: raise ValueError( "expt_dp process group is required but not provided in pg_collection. " "Please explicitly set it to None if you don't need it." @@ -414,10 +416,10 @@ def setup_process_groups_for_optimizer( else: # With multiple optimizer instances, both groups must be provided if not ( - hasattr(pg_collection, 'intra_dp_cp') - and hasattr(pg_collection, 'intra_expt_dp') - and hasattr(pg_collection, 'inter_dist_opt') - and hasattr(pg_collection, 'intra_dist_opt') + 'intra_dp_cp' in pg_set + and 'intra_expt_dp' in pg_set + and 'inter_dist_opt' in pg_set + and 'intra_dist_opt' in pg_set ): raise ValueError( "intra_dp_cp, intra_expt_dp, inter_dist_opt, and intra_dist_opt " @@ -429,7 +431,7 @@ def setup_process_groups_for_optimizer( inter_dist_opt_group = pg_collection.inter_dist_opt if ddp_config.use_distributed_optimizer: - if not hasattr(pg_collection, 'intra_dist_opt'): + if 'intra_dist_opt' not in pg_set: raise ValueError( "intra_dist_opt process group is required but not provided in " "pg_collection. Please explicitly set it to None if you don't need it." @@ -445,7 +447,7 @@ def setup_process_groups_for_optimizer( intra_dist_opt_group = None # 5. Model communication groups - if not hasattr(pg_collection, 'mp'): + if 'mp' not in pg_set: raise ValueError( "mp process group is required but not provided in pg_collection. " "Please explicitly set it to None if you don't need it." @@ -453,7 +455,7 @@ def setup_process_groups_for_optimizer( mp_group = pg_collection.mp # Expert tensor-model-pipeline group for MoE - if not hasattr(pg_collection, 'tp_ep_pp'): + if 'tp_ep_pp' not in pg_set: raise ValueError( "tp_ep_pp process group is required but not provided in pg_collection. " "Please explicitly set it to None if you don't need it." @@ -578,9 +580,10 @@ def setup_process_groups_for_ddp( else: # Use provided process group collection with validation and fallbacks result = {} + pg_set = vars(pg_collection) # 1. dp group - this is always required - if not hasattr(pg_collection, 'dp'): + if 'dp' not in pg_set: raise ValueError("dp process group is required but not provided in pg_collection") result['dp_group'] = pg_collection.dp @@ -621,9 +624,9 @@ def setup_process_groups_for_ddp( else: # With multiple optimizer instances, groups must be provided if not ( - hasattr(pg_collection, 'intra_dp_cp') - and hasattr(pg_collection, 'intra_expt_dp') - and hasattr(pg_collection, 'inter_dist_opt') + 'intra_dp_cp' in pg_set + and 'intra_expt_dp' in pg_set + and 'inter_dist_opt' in pg_set ): raise ValueError( "intra_dp_cp, intra_expt_dp, and inter_dist_opt " @@ -635,13 +638,7 @@ def setup_process_groups_for_ddp( result['inter_dist_opt_group'] = pg_collection.inter_dist_opt # 5. Model parallel groups (DDP-specific: tp, pp, ep instead of mp, expt_tp_pp) - if not all( - [ - hasattr(pg_collection, 'tp'), - hasattr(pg_collection, 'pp'), - hasattr(pg_collection, 'ep'), - ] - ): + if not all(['tp' in pg_set, 'pp' in pg_set, 'ep' in pg_set]): raise ValueError( "tp, pp and ep process groups are required but not provided in pg_collection" ) diff --git a/tests/unit_tests/test_optimizer.py b/tests/unit_tests/test_optimizer.py index e40f0e7c12b..17daf5cb882 100644 --- a/tests/unit_tests/test_optimizer.py +++ b/tests/unit_tests/test_optimizer.py @@ -82,6 +82,7 @@ def test_get_param_groups_no_overrides(mock_get_world_size): assert pg0.keys() == { 'params', 'is_expert_parallel', + 'is_gtp', 'default_config', 'wd_mult', 'lr_mult', diff --git a/tests/unit_tests/test_process_groups_config.py b/tests/unit_tests/test_process_groups_config.py index b49962b1a5a..a61936bd132 100644 --- a/tests/unit_tests/test_process_groups_config.py +++ b/tests/unit_tests/test_process_groups_config.py @@ -29,7 +29,7 @@ def test_transformer_process_groups(self, mocker): # Test attribute existence assert hasattr(model_pgs, 'tp') assert hasattr(model_pgs, 'pp') - assert not hasattr(model_pgs, 'cp') # Not set yet + assert model_pgs.cp is None # Not set yet def test_grad_comm_process_groups(self, mocker): """Test basic functionality of ProcessGroupCollection.""" @@ -47,7 +47,7 @@ def test_grad_comm_process_groups(self, mocker): # Test attribute existence assert hasattr(grad_pgs, 'dp') - assert not hasattr(grad_pgs, 'dp_cp') # Not set yet + assert grad_pgs.dp_cp is None # Not set yet def test_hierarchical_context_parallel_groups(self, mocker): """Test setting and accessing the hierarchical context parallel list.""" @@ -129,7 +129,7 @@ def test_default_initialization(self): assert hasattr(model_pgs, 'tp') assert hasattr(model_pgs, 'pp') assert hasattr(model_pgs, 'cp') - assert not hasattr(model_pgs, 'dp') + assert model_pgs.dp is None # Not requested, so not set # Test that an error is raised if an invalid process group is requested with pytest.raises(ValueError, match=r"Invalid process groups requested"): From 9dca05a9a89b819521d0f2477857f6d5ac913d49 Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Fri, 5 Jun 2026 20:55:30 -0700 Subject: [PATCH 24/59] Fix GTP DDP bucket alignment for distributed optimizer; add corresponding UT Signed-off-by: Shiqing Fan --- .../distributed/distributed_data_parallel.py | 29 +++++++- megatron/experimental/gtp/README.md | 2 +- .../generalized_tensor_parallel/test_gtp.py | 73 +++++++++++++++++++ 3 files changed, 101 insertions(+), 3 deletions(-) diff --git a/megatron/core/distributed/distributed_data_parallel.py b/megatron/core/distributed/distributed_data_parallel.py index d67124efe8a..069490ce8d3 100644 --- a/megatron/core/distributed/distributed_data_parallel.py +++ b/megatron/core/distributed/distributed_data_parallel.py @@ -339,6 +339,19 @@ def __init__( # applied to GTP buffers (GTP manages its own sharding). for buffer_key, (params, param_indices) in gtp_buffer_groups.items(): params_with_names = [(p, param_to_name[p]) for p in params] + if self.ddp_config.use_distributed_optimizer: + # Pad bucket ends to intra_dp_cp_with_gtp_group.size() for dist-opt alignment. + from ..optimizer.distrib_optimizer import DistributedOptimizer + + gtp_layout = DistributedOptimizer._compute_per_buffer_param_layout( + params, + self.bucket_size, + self.intra_dp_cp_with_gtp_group.size(), + self.ddp_config, + param_indices, + ) + else: + gtp_layout = None buffer = _ParamAndGradBuffer( self.ddp_config, buffer_key.param_dtype, @@ -351,7 +364,7 @@ def __init__( param_indices, self.ddp_config.nccl_ub, pg_collection, - param_layout=None, + param_layout=gtp_layout, ) self.gtp_buffers.append(buffer) @@ -362,6 +375,18 @@ def __init__( # params took the full intra_expt_dp_group branch above. for buffer_key, (params, param_indices) in egtp_buffer_groups.items(): params_with_names = [(p, param_to_name[p]) for p in params] + if self.ddp_config.use_distributed_optimizer: + from ..optimizer.distrib_optimizer import DistributedOptimizer + + egtp_layout = DistributedOptimizer._compute_per_buffer_param_layout( + params, + self.bucket_size, + self.intra_expt_dp_with_egtp_group.size(), + self.ddp_config, + param_indices, + ) + else: + egtp_layout = None buffer = _ParamAndGradBuffer( self.ddp_config, buffer_key.param_dtype, @@ -374,7 +399,7 @@ def __init__( param_indices, self.ddp_config.nccl_ub, pg_collection, - param_layout=None, + param_layout=egtp_layout, ) self.egtp_buffers.append(buffer) diff --git a/megatron/experimental/gtp/README.md b/megatron/experimental/gtp/README.md index da29435e2fa..d27c3783b78 100644 --- a/megatron/experimental/gtp/README.md +++ b/megatron/experimental/gtp/README.md @@ -134,7 +134,7 @@ GTP is enabled through two CLI flags on Megatron's training launcher; everything ### High-priority streams (Blackwell and later) -Required on GB200 / B100 so the GTP comm streams get the SM priority needed for AG/RS overlap with compute: +Required on GB200 / GB300 so the GTP comm streams get the SM priority needed for AG/RS overlap with compute: ```bash --high-priority-stream-groups ep gtp expt_gtp tp diff --git a/tests/unit_tests/generalized_tensor_parallel/test_gtp.py b/tests/unit_tests/generalized_tensor_parallel/test_gtp.py index 2ede51b480d..0d75efffd16 100644 --- a/tests/unit_tests/generalized_tensor_parallel/test_gtp.py +++ b/tests/unit_tests/generalized_tensor_parallel/test_gtp.py @@ -1623,3 +1623,76 @@ def test_fallback_skipped_for_pure_ag_param(self): assert ( p._already_finalized is False ), "_already_finalized must stay False — no finalize happened for a pure-AG param" + + +# --------------------------------------------------------------------------- +# 24. GTP DDP bucket alignment: distributed optimizer bucket-end assertion +# --------------------------------------------------------------------------- + + +def _worker_gtp_ddp_bucket_alignment(rank, world_size, port): + """GTP buffers must use padded bucket layout when use_distributed_optimizer=True. + + Without the fix (param_layout=None for GTP buffers in DDP), bucket ends are + not padded to be divisible by intra_dp_cp_with_gtp_group.size(), violating the + assertion at param_and_grad_buffer.py:1427. + + Trigger conditions: + GTP=2, DP=4 → intra_dp_cp_with_gtp_group.size() = 2 + pad_for_alignment=0 → shard of [out=2,in=3] weight = [1,3] = 3 elements (odd) + Two GTP params (total 6 = 2*3; 6%2==0 passes the per-buffer total check) + bucket_size=3 → first param alone fills bucket-0; end=3, 3%2≠0 → AssertionError + """ + from megatron.core import parallel_state as ps + from megatron.core.distributed import DistributedDataParallel, DistributedDataParallelConfig + from megatron.core.transformer.transformer_config import TransformerConfig + + # The module fixture initialized model_parallel without GTP; re-init with GTP=2. + ps.destroy_model_parallel() + ps.initialize_model_parallel( + tensor_model_parallel_size=1, + pipeline_model_parallel_size=1, + gtp_remat_size=2, + ) + + orig_pad = gtp_module.GTP_CONFIG.pad_for_alignment + gtp_module.GTP_CONFIG.pad_for_alignment = 0 + try: + gtp_group = ps.get_generalized_tensor_parallel_remat_group() + + class _TwoLayerModel(torch.nn.Module): + def __init__(self): + super().__init__() + self.fc0 = te.Linear(3, 2, bias=False, device="cuda") + self.fc1 = te.Linear(3, 2, bias=False, device="cuda") + + model = _TwoLayerModel() + wrap_module_params_gtp(model.fc0, ["weight"], gtp_group) + wrap_module_params_gtp(model.fc1, ["weight"], gtp_group) + + config = TransformerConfig( + num_attention_heads=1, + num_layers=1, + hidden_size=4, + tensor_model_parallel_size=1, + ) + ddp_config = DistributedDataParallelConfig( + use_distributed_optimizer=True, + overlap_grad_reduce=True, + bucket_size=3, + ) + + # Without the fix this raises AssertionError at param_and_grad_buffer.py:1427: + # assert end_index % self.data_parallel_world_size == 0 + DistributedDataParallel(config, ddp_config, model) + finally: + gtp_module.GTP_CONFIG.pad_for_alignment = orig_pad + ps.destroy_model_parallel() + ps.initialize_model_parallel() # restore default for remaining tests + + +class TestGTPDDPBucketAlignment: + def test_gtp_buffers_use_padded_layout_with_distributed_optimizer(self): + """GTP DDP buffer creation must not violate bucket-end alignment for dist-opt.""" + _requires_multi_gpu(4) + _run_distributed(_worker_gtp_ddp_bucket_alignment, 4) From 0906db0c4dc0885edb5e807437cbfbb50a9336b6 Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Sat, 6 Jun 2026 05:00:10 -0700 Subject: [PATCH 25/59] fix formating Signed-off-by: Shiqing Fan --- .../generalized_tensor_parallel/test_gtp.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/tests/unit_tests/generalized_tensor_parallel/test_gtp.py b/tests/unit_tests/generalized_tensor_parallel/test_gtp.py index 0d75efffd16..101609ee87c 100644 --- a/tests/unit_tests/generalized_tensor_parallel/test_gtp.py +++ b/tests/unit_tests/generalized_tensor_parallel/test_gtp.py @@ -1650,9 +1650,7 @@ def _worker_gtp_ddp_bucket_alignment(rank, world_size, port): # The module fixture initialized model_parallel without GTP; re-init with GTP=2. ps.destroy_model_parallel() ps.initialize_model_parallel( - tensor_model_parallel_size=1, - pipeline_model_parallel_size=1, - gtp_remat_size=2, + tensor_model_parallel_size=1, pipeline_model_parallel_size=1, gtp_remat_size=2 ) orig_pad = gtp_module.GTP_CONFIG.pad_for_alignment @@ -1671,15 +1669,10 @@ def __init__(self): wrap_module_params_gtp(model.fc1, ["weight"], gtp_group) config = TransformerConfig( - num_attention_heads=1, - num_layers=1, - hidden_size=4, - tensor_model_parallel_size=1, + num_attention_heads=1, num_layers=1, hidden_size=4, tensor_model_parallel_size=1 ) ddp_config = DistributedDataParallelConfig( - use_distributed_optimizer=True, - overlap_grad_reduce=True, - bucket_size=3, + use_distributed_optimizer=True, overlap_grad_reduce=True, bucket_size=3 ) # Without the fix this raises AssertionError at param_and_grad_buffer.py:1427: From 6cdfe5d24826a41e779172d91e147b497d14d983 Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Sat, 6 Jun 2026 06:12:19 -0700 Subject: [PATCH 26/59] fix regular ddp buffer bucket misalignment when GTP params are present Signed-off-by: Shiqing Fan --- .../distributed/distributed_data_parallel.py | 27 +++--- .../generalized_tensor_parallel/test_gtp.py | 88 ++++++++++++++++--- 2 files changed, 92 insertions(+), 23 deletions(-) diff --git a/megatron/core/distributed/distributed_data_parallel.py b/megatron/core/distributed/distributed_data_parallel.py index 069490ce8d3..9208b9a0245 100644 --- a/megatron/core/distributed/distributed_data_parallel.py +++ b/megatron/core/distributed/distributed_data_parallel.py @@ -303,17 +303,22 @@ def __init__( else: assert scaling_factor == target_gradient_scaling_factor - # If GTP carved params out of all_params, the caller-supplied (or auto-computed) - # full_param_layout.layouts[buffer_key].param_index_map will contain GTP entries - # that aren't in this buffer's `params_with_names`. _build_gbuf_range_map iterates - # param_index_map, so those stray GTP entries would surface as KeyErrors in - # DistOpt's world_param_group_map. Force buffer to compute its own layout in - # that case. - param_layout = ( - full_param_layout.layouts.get(buffer_key) - if full_param_layout is not None and not gtp_params - else None - ) + # With GTP: full_param_layout contains stray GTP entries not in this buffer, + # so recompute a fresh padded layout to avoid KeyErrors and bucket misalignment. + if full_param_layout is not None and not gtp_params: + param_layout = full_param_layout.layouts.get(buffer_key) + elif self.ddp_config.use_distributed_optimizer: + from ..optimizer.distrib_optimizer import DistributedOptimizer + + param_layout = DistributedOptimizer._compute_per_buffer_param_layout( + params, + self.bucket_size, + data_parallel_group.size(), + self.ddp_config, + param_indices, + ) + else: + param_layout = None params_with_names = [(p, param_to_name[p]) for p in params] buffer = _ParamAndGradBuffer( self.ddp_config, diff --git a/tests/unit_tests/generalized_tensor_parallel/test_gtp.py b/tests/unit_tests/generalized_tensor_parallel/test_gtp.py index 101609ee87c..3e86bbc0005 100644 --- a/tests/unit_tests/generalized_tensor_parallel/test_gtp.py +++ b/tests/unit_tests/generalized_tensor_parallel/test_gtp.py @@ -29,6 +29,7 @@ 21. TestFuseWgradAccumulation – fuse_wgrad_accumulation=True: wgrad→main_grad (multi-GPU) 22. TestGTPGradAccumHook – main_grad updated after reduce-scatter backward (multi-GPU) 23. TestWaitAsyncCommsFallback – wait_async_comms(finalize_after_drain=True) inline-accumulation fallback when _wgrad_rs_handle is None (single-process) +24. TestGTPDDPBucketAlignment – GTP and regular DDP buffer bucket ends padded for dist-opt alignment (multi-GPU) Run via torchrun (matches the rest of Megatron's unit tests): @@ -1631,17 +1632,18 @@ def test_fallback_skipped_for_pure_ag_param(self): def _worker_gtp_ddp_bucket_alignment(rank, world_size, port): - """GTP buffers must use padded bucket layout when use_distributed_optimizer=True. - - Without the fix (param_layout=None for GTP buffers in DDP), bucket ends are - not padded to be divisible by intra_dp_cp_with_gtp_group.size(), violating the - assertion at param_and_grad_buffer.py:1427. - - Trigger conditions: - GTP=2, DP=4 → intra_dp_cp_with_gtp_group.size() = 2 - pad_for_alignment=0 → shard of [out=2,in=3] weight = [1,3] = 3 elements (odd) - Two GTP params (total 6 = 2*3; 6%2==0 passes the per-buffer total check) - bucket_size=3 → first param alone fills bucket-0; end=3, 3%2≠0 → AssertionError + """GTP param buffers in DDP must use padded bucket layout with use_distributed_optimizer=True. + + Bug: DDP used param_layout=None for GTP buffers, falling through to + _compute_default_per_buffer_param_layout, which packs params without padding bucket ends. + The distributed optimizer requires every bucket end to be divisible by + intra_dp_cp_with_gtp_group.size() (asserted at param_and_grad_buffer.py:1427). + + Trigger: + GTP=2, DP=4 → intra_dp_cp_with_gtp_group.size()=2 + pad_for_alignment=0, weight [out=2,in=3] → GTP shard=[1,3]=3 elements (odd) + Two GTP params: total=6, 6%2==0 (total check passes); bucket_size=3 forces + bucket-0 to contain only the first param, end=3, 3%2≠0 → AssertionError """ from megatron.core import parallel_state as ps from megatron.core.distributed import DistributedDataParallel, DistributedDataParallelConfig @@ -1684,8 +1686,70 @@ def __init__(self): ps.initialize_model_parallel() # restore default for remaining tests +def _worker_regular_buffer_padded_when_gtp_params_present(rank, world_size, port): + """Regular (non-GTP) param buffers in DDP must also use padded layout when GTP is active. + + Bug: when gtp_params is non-empty, full_param_layout.layouts contains stale GTP entries + that don't belong to the regular buffer, causing KeyErrors in DistOpt's param map. + DDP avoided this by forcing param_layout=None for regular buffers, but that falls through + to _compute_default_per_buffer_param_layout, which produces unpadded bucket ends, again + violating param_and_grad_buffer.py:1427 (end_index % data_parallel_world_size == 0). + + Trigger: + GTP=2, DP=4 → intra_dp_cp_group.size()=4 (regular params reduce over the full DP group) + bias=True → each bias has 2 elements (not divisible by 4) + Two layers: total regular numel=4, 4%4==0 (total check passes); bucket_size=2 forces + bucket-0 to contain only the first bias, end=2, 2%4≠0 → AssertionError + """ + from megatron.core import parallel_state as ps + from megatron.core.distributed import DistributedDataParallel, DistributedDataParallelConfig + from megatron.core.transformer.transformer_config import TransformerConfig + + ps.destroy_model_parallel() + ps.initialize_model_parallel( + tensor_model_parallel_size=1, pipeline_model_parallel_size=1, gtp_remat_size=2 + ) + + orig_pad = gtp_module.GTP_CONFIG.pad_for_alignment + gtp_module.GTP_CONFIG.pad_for_alignment = 0 + try: + gtp_group = ps.get_generalized_tensor_parallel_remat_group() + + class _TwoLayerModelWithBias(torch.nn.Module): + def __init__(self): + super().__init__() + # bias=True: weight → GTPShardedParam (gtp_buffer), bias → regular param + self.fc0 = te.Linear(3, 2, bias=True, device="cuda") + self.fc1 = te.Linear(3, 2, bias=True, device="cuda") + + model = _TwoLayerModelWithBias() + wrap_module_params_gtp(model.fc0, ["weight"], gtp_group) + wrap_module_params_gtp(model.fc1, ["weight"], gtp_group) + + config = TransformerConfig( + num_attention_heads=1, num_layers=1, hidden_size=4, tensor_model_parallel_size=1 + ) + # bucket_size=2: each 2-element bias fills one bucket in the regular buffer. + # Without the fix: regular buffer uses param_layout=None → bucket-0 ends at 2, + # 2 % intra_dp_cp_group.size()(=4) != 0 → AssertionError at line 1427. + ddp_config = DistributedDataParallelConfig( + use_distributed_optimizer=True, overlap_grad_reduce=True, bucket_size=2 + ) + + DistributedDataParallel(config, ddp_config, model) + finally: + gtp_module.GTP_CONFIG.pad_for_alignment = orig_pad + ps.destroy_model_parallel() + ps.initialize_model_parallel() + + class TestGTPDDPBucketAlignment: def test_gtp_buffers_use_padded_layout_with_distributed_optimizer(self): - """GTP DDP buffer creation must not violate bucket-end alignment for dist-opt.""" + """GTP buffer bucket ends must be padded to intra_dp_cp_with_gtp_group.size().""" _requires_multi_gpu(4) _run_distributed(_worker_gtp_ddp_bucket_alignment, 4) + + def test_regular_buffers_use_padded_layout_when_gtp_params_present(self): + """Regular buf bucket ends must be padded even when gtp_params forces layoutrecompute.""" + _requires_multi_gpu(4) + _run_distributed(_worker_regular_buffer_padded_when_gtp_params_present, 4) From 4d1e2eb03d684197c45a77a17767ed9be8469f47 Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Sun, 7 Jun 2026 22:01:26 -0700 Subject: [PATCH 27/59] add integration test for {mamba,attn,moe}+gtp; polish existing gtp and tp+gtp UTs. Signed-off-by: Shiqing Fan --- megatron/training/arguments.py | 6 +- .../gtp_test_utils.py | 101 +++ .../test_attention_gtp.py | 244 ++++++++ .../generalized_tensor_parallel/test_gtp.py | 579 ++++-------------- .../test_mamba_gtp.py | 261 ++++++++ .../test_moe_egtp.py | 289 +++++++++ .../test_tp_gtp.py | 207 +------ 7 files changed, 1051 insertions(+), 636 deletions(-) create mode 100644 tests/unit_tests/generalized_tensor_parallel/gtp_test_utils.py create mode 100644 tests/unit_tests/generalized_tensor_parallel/test_attention_gtp.py create mode 100644 tests/unit_tests/generalized_tensor_parallel/test_mamba_gtp.py create mode 100644 tests/unit_tests/generalized_tensor_parallel/test_moe_egtp.py diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py index c5f20a89657..69edc880a23 100644 --- a/megatron/training/arguments.py +++ b/megatron/training/arguments.py @@ -1441,9 +1441,9 @@ def validate_args(args, defaults={}): # architectures. Assigning high priority to communication streams ensures that # communication kernels are scheduled with higher priority, minimizing the exposed # communication when it is overlapped with other computation kernels. - if 'ps' not in args.high_priority_stream_groups: - args.high_priority_stream_groups.append('ps') - warn_rank_0("Setting 'ps' group for high priority streams.") + if 'gtp' not in args.high_priority_stream_groups: + args.high_priority_stream_groups.append('gtp') + warn_rank_0("Setting 'gtp' group for high priority streams.") if egtp_size > 1 and 'expt_gtp' not in args.high_priority_stream_groups: args.high_priority_stream_groups.append('expt_gtp') warn_rank_0("Setting 'expt_gtp' group for high priority streams.") diff --git a/tests/unit_tests/generalized_tensor_parallel/gtp_test_utils.py b/tests/unit_tests/generalized_tensor_parallel/gtp_test_utils.py new file mode 100644 index 00000000000..7919ee7c420 --- /dev/null +++ b/tests/unit_tests/generalized_tensor_parallel/gtp_test_utils.py @@ -0,0 +1,101 @@ +# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +"""Shared fixtures and helpers for all GTP unit tests. +""" + +import pytest +import torch +import transformer_engine.pytorch as te +from transformer_engine.pytorch import is_mxfp8_available, is_nvfp4_available +from transformer_engine.pytorch.quantization import FP8GlobalStateManager + +from megatron.experimental.gtp import GTPShardedParam +from tests.unit_tests.test_utilities import Utils + +# --------------------------------------------------------------------------- +# Fixtures (import into each test module so pytest discovers them) +# --------------------------------------------------------------------------- + + +@pytest.fixture(scope="module", autouse=True) +def _torchrun_dist_init(): + """Initialize the torchrun-managed dist group once per module.""" + Utils.initialize_model_parallel() + yield + Utils.destroy_model_parallel() + + +@pytest.fixture(autouse=True) +def reset_fp8_state(): + yield + FP8GlobalStateManager.reset() + + +@pytest.fixture(autouse=True) +def reset_gtp_globals(): + """Reset GTP mutable class-level state between tests.""" + yield + GTPShardedParam._chain_state = {} + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _run_distributed(fn, required_world_size: int, *args) -> None: + """Run ``fn(rank, world_size, port, *args)`` on every torchrun rank. + + ``port`` is unused (dist already initialized by torchrun) but kept so + worker signatures don't need editing. + """ + actual_world_size = torch.distributed.get_world_size() + if actual_world_size != required_world_size: + pytest.skip( + f"Requires world_size={required_world_size}, " + f"got {actual_world_size} (launch with torchrun --nproc-per-node={required_world_size})" + ) + fn(torch.distributed.get_rank(), actual_world_size, None, *args) + + +def _requires_multi_gpu(n: int = 4): + if torch.cuda.device_count() < n: + pytest.skip(f"Requires at least {n} CUDA devices") + + +def _requires_mxfp8(): + available, reason = is_mxfp8_available(return_reason=True) + if not available: + pytest.skip(f"MXFP8 not available: {reason}") + + +def _requires_nvfp4(): + if not is_nvfp4_available(): + pytest.skip("NVFP4 not available (requires compute capability >= 10.0)") + + +def _make_gtp_linear(in_f, out_f, gtp_group, dtype=torch.bfloat16, **kwargs): + """Construct a bias-free GTP-sharded te.Linear on CUDA.""" + return te.Linear( + in_features=in_f, + out_features=out_f, + bias=False, + params_dtype=dtype, + device="cuda", + gtp_group=gtp_group, + **kwargs, + ) + + +def _make_gtp_grouped_linear(num_gemms, in_f, out_f, gtp_group, dtype=torch.bfloat16, **kwargs): + """Construct a bias-free GTP-sharded te.GroupedLinear on CUDA.""" + return te.GroupedLinear( + num_gemms=num_gemms, + in_features=in_f, + out_features=out_f, + bias=False, + params_dtype=dtype, + device="cuda", + gtp_group=gtp_group, + **kwargs, + ) diff --git a/tests/unit_tests/generalized_tensor_parallel/test_attention_gtp.py b/tests/unit_tests/generalized_tensor_parallel/test_attention_gtp.py new file mode 100644 index 00000000000..d7ef79048a2 --- /dev/null +++ b/tests/unit_tests/generalized_tensor_parallel/test_attention_gtp.py @@ -0,0 +1,244 @@ +# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +"""Integration tests for GTP + Attention (TransformerLayer) correctness. + +Test groups +----------- +TestAttentionGTPCorrectness - GTP TransformerLayer loss trajectory matches baseline (no-GTP) + over 10 training steps using MXFP8 and Nemotron3-Super proxy + hyperparameters. +""" + +import pytest +import torch +import torch.distributed as dist + +from megatron.experimental.gtp import HAVE_GTP + +if not HAVE_GTP: + pytest.skip("GTP requires TransformerEngine >= 2.17", allow_module_level=True) + +from transformer_engine.pytorch import fp8_autocast + +from megatron.experimental.gtp import GTPShardedParam +from tests.unit_tests.generalized_tensor_parallel.gtp_test_utils import ( + _requires_mxfp8, + _run_distributed, + _torchrun_dist_init, + reset_fp8_state, + reset_gtp_globals, +) + +# --------------------------------------------------------------------------- +# Attention GTP correctness: per-step loss trajectory baseline vs GTP=4 +# --------------------------------------------------------------------------- + + +def _worker_attention_gtp_correctness(rank, world_size, port): + """Verify GTP TransformerLayer produces the same per-step loss as a no-GTP baseline. + + Phase 1 — GTP=1, DP=4: + All 4 ranks hold the full model and process identical inputs. Gradients + are identical across ranks (no all-reduce needed). Weight update: + param.data -= lr * param.grad + + Phase 2 — GTP=4, DP=1: + All linear weights (QKV proj, output proj, MLP fc1/fc2) sharded across + 4 ranks. After backward, wgrad reduce-scatter sums each shard's wgrad: + main_grad[rank_i] = gtp_size * dW[shard_i] + The optimizer divides by gtp_size to recover the per-element gradient: + param.data -= (lr / gtp_size) * param.main_grad + + Both phases use identical initial weights (synced from rank 0 in Phase 1, + restored as shards in Phase 2) and identical step-by-step inputs. + + Nemotron3-Super proxy hyperparameters: + hidden=4096, num_heads=32 (head_dim=128), ffn_hidden_size=16384 (=4xhidden) + MXFP8 alignment with GTP=4: + QKV shard: 3x4096/4=3072, 3072%32=0 ✓; proj shard: 4096/4=1024, 1024%32=0 ✓ + fc1 shard: 16384/4=4096, 4096%32=0 ✓; fc2 shard: 4096/4=1024, 1024%32=0 ✓ + """ + from transformer_engine.common.recipe import MXFP8BlockScaling + from transformer_engine.pytorch.quantization import FP8GlobalStateManager + + from megatron.core import parallel_state as ps + from megatron.core.models.gpt.gpt_layer_specs import get_gpt_layer_with_transformer_engine_spec + from megatron.core.process_groups_config import ProcessGroupCollection + from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed + from megatron.core.transformer.transformer_config import TransformerConfig + + HIDDEN = 4096 + NUM_HEADS = 32 # head_dim = HIDDEN / NUM_HEADS = 128 + FFN_HIDDEN = 16384 # = 4 x HIDDEN (default GPT FFN ratio) + NUM_LAYERS = 2 + SEQ = 32 + BATCH = 1 + LR = 0.01 + STEPS = 10 + dtype = torch.bfloat16 + recipe = MXFP8BlockScaling() + + def make_config(): + return TransformerConfig( + num_attention_heads=NUM_HEADS, + num_layers=NUM_LAYERS, + hidden_size=HIDDEN, + ffn_hidden_size=FFN_HIDDEN, + add_bias_linear=False, + params_dtype=dtype, + hidden_dropout=0.0, + attention_dropout=0.0, + bias_dropout_fusion=False, + fp8='e4m3', + tensor_model_parallel_size=1, + pipeline_model_parallel_size=1, + ) + + def make_transformer_stack(config, pg_collection): + spec = get_gpt_layer_with_transformer_engine_spec() + return torch.nn.ModuleList([ + spec.module(config, spec.submodules, layer_number=i + 1, pg_collection=pg_collection) + for i in range(NUM_LAYERS) + ]) + + def run_step(layers, x): + with fp8_autocast(enabled=True, fp8_recipe=recipe): + for layer in layers: + x, _ = layer(x, attention_mask=None) + return x.mean() + + # ------------------------------------------------------------------------- + # Phase 1: Baseline — GTP=1 (DP=4) + # ------------------------------------------------------------------------- + ps.destroy_model_parallel() + ps.initialize_model_parallel( + tensor_model_parallel_size=1, pipeline_model_parallel_size=1, gtp_remat_size=1 + ) + model_parallel_cuda_manual_seed(42) + + pg_collection = ProcessGroupCollection.use_mpu_process_groups(required_pgs=['tp', 'cp', 'gtp']) + config = make_config() + layers = make_transformer_stack(config, pg_collection) + for layer in layers: + layer.cuda() + + # Verify baseline has no GTP sharding (gtp_remat_size=1 should leave plain parameters). + assert not any( + isinstance(p, GTPShardedParam) for p in layers.parameters() + ), "Baseline GTP=1 stack should have no GTPShardedParam" + + # Synchronize weights from rank 0 across all DP ranks. + for p in layers.parameters(): + dist.broadcast(p.data, src=0) + + # Save initial weights; will be used to initialize the GTP model identically. + saved_weights = {n: p.data.clone() for n, p in layers.named_parameters()} + + baseline_losses = [] + for step in range(STEPS): + torch.manual_seed(step) + x = torch.randn(SEQ, BATCH, HIDDEN, dtype=dtype, device='cuda') + dist.broadcast(x, src=0) + + loss = run_step(layers, x) + if rank == 0: + baseline_losses.append(loss.item()) + + loss.backward() + with torch.no_grad(): + for p in layers.parameters(): + if p.grad is not None: + p.data.sub_(LR * p.grad) + p.grad.zero_() + + ps.destroy_model_parallel() + GTPShardedParam._chain_state = {} + FP8GlobalStateManager.reset() + + # ------------------------------------------------------------------------- + # Phase 2: GTP=4 (DP=1) + # ------------------------------------------------------------------------- + ps.initialize_model_parallel( + tensor_model_parallel_size=1, pipeline_model_parallel_size=1, gtp_remat_size=4 + ) + model_parallel_cuda_manual_seed(42) + + pg_collection = ProcessGroupCollection.use_mpu_process_groups(required_pgs=['tp', 'cp', 'gtp']) + config = make_config() + layers_gtp = make_transformer_stack(config, pg_collection) + for layer in layers_gtp: + layer.cuda() + + gtp_group = ps.get_generalized_tensor_parallel_remat_group() + gtp_size = gtp_group.size() + gtp_rank = gtp_group.rank() + + # Verify GTP is truly active: linear weights must be GTPShardedParam instances. + gtp_params = [p for p in layers_gtp.parameters() if isinstance(p, GTPShardedParam)] + assert len(gtp_params) > 0, ( + "GTP is not active: no GTPShardedParam found in GTP=4 transformer stack" + ) + + # Restore initial weights: GTP params get the matching shard, others get the full tensor. + for name, p in layers_gtp.named_parameters(): + full = saved_weights[name] + if isinstance(p, GTPShardedParam): + shard_size = p.shape[0] + p.data.copy_(full[gtp_rank * shard_size: (gtp_rank + 1) * shard_size]) + else: + p.data.copy_(full) + + # Pre-allocate main_grad for GTP params (required before the first backward). + for p in layers_gtp.parameters(): + if isinstance(p, GTPShardedParam): + p.main_grad = torch.zeros(p.shape, dtype=dtype, device='cuda') + + gtp_losses = [] + for step in range(STEPS): + for p in layers_gtp.parameters(): + if isinstance(p, GTPShardedParam): + p.main_grad.zero_() + + torch.manual_seed(step) + x = torch.randn(SEQ, BATCH, HIDDEN, dtype=dtype, device='cuda') + dist.broadcast(x, src=0) + + loss = run_step(layers_gtp, x) + if rank == 0: + gtp_losses.append(loss.item()) + + loss.backward() + + # After RS, main_grad = gtp_size * dW_shard. Divide by gtp_size to match baseline. + with torch.no_grad(): + for p in layers_gtp.parameters(): + if isinstance(p, GTPShardedParam): + p.data.sub_((LR / gtp_size) * p.main_grad) + elif p.grad is not None: + p.data.sub_(LR * p.grad) + p.grad.zero_() + + ps.destroy_model_parallel() + ps.initialize_model_parallel() + GTPShardedParam._chain_state = {} + + # ------------------------------------------------------------------------- + # Compare per-step loss trajectories on rank 0 + # ------------------------------------------------------------------------- + if rank == 0: + assert len(baseline_losses) == STEPS + assert len(gtp_losses) == STEPS + for step, (lb, lg) in enumerate(zip(baseline_losses, gtp_losses)): + print(f"Step {step:2d}: baseline={lb:.6f} gtp={lg:.6f}", flush=True) + torch.testing.assert_close( + torch.tensor(gtp_losses), torch.tensor(baseline_losses), atol=1e-5, rtol=1e-5 + ) + + +class TestAttentionGTPCorrectness: + def test_attention_gtp_loss_trajectory_matches_baseline(self): + """GTP TransformerLayer per-step losses must match no-GTP baseline (atol=1e-5, rtol=1e-5; MXFP8, Nemotron3-Super proxy).""" + _requires_mxfp8() + if torch.cuda.device_count() < 4: + pytest.skip("Requires at least 4 CUDA devices") + _run_distributed(_worker_attention_gtp_correctness, 4) diff --git a/tests/unit_tests/generalized_tensor_parallel/test_gtp.py b/tests/unit_tests/generalized_tensor_parallel/test_gtp.py index 3e86bbc0005..76b871c1d42 100644 --- a/tests/unit_tests/generalized_tensor_parallel/test_gtp.py +++ b/tests/unit_tests/generalized_tensor_parallel/test_gtp.py @@ -1,39 +1,32 @@ -# Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# See LICENSE for license information. +# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. """Unit tests for Generalized Tensor Parallelism (GTP). Test groups ----------- -1. TestGTPWeightState – state-machine transitions (single-process) -2. TestGTPWeightCache – coat-check buffer pool (single-process) -3. TestGTPSharding – wrap_module_params_gtp: shard content + padding (multi-GPU) -4. TestWrapModuleParams – wrap_module_params_gtp: param replacement + weight_list (multi-GPU) -5. TestLinearGTP – Linear forward/backward numerical correctness (multi-GPU) -6. TestLayerNormLinearGTP – LayerNormLinear forward/backward smoke test (multi-GPU) -7. TestGroupedLinearGTP – GroupedLinear forward/backward smoke test (multi-GPU) -8. TestGTPPrefetchChain – linked-list next_w/prev_w wiring (multi-GPU) -9. TestGTPWgradRS – wgrad reduce-scatter shape + multi-layer deferred path (multi-GPU) -10. TestGTPMicrobatches – output consistency across microbatches (multi-GPU) -11. TestNVFP4LinearGTP – Linear + NVFP4 recipe: quantized shard setup, fwd/bwd (multi-GPU) -12. TestNVFP4GroupedLinearGTP – GroupedLinear + NVFP4 recipe: coalesced AG + fwd/bwd (multi-GPU) -13. TestMXFP8LinearGTP – Linear + MXFP8 recipe: quantized shard setup, fwd/bwd, padding (multi-GPU) -14. TestGTPConfig – update_gtp_config: valid/invalid keys (single-process) -15. TestGTPShardedParamProperties – shape computations, get_padded_shard, _strip_padding (single-process) -16. TestGTPCacheKey – _get_cache_key: expert vs non-expert, fwd vs bwd (single-process) -17. TestGTPCacheRelease – reserve/get/release pool semantics (single-process) -18. TestTagGTPParamsWithNames – _debug_name population on GTPShardedParam (single-process) -19. TestGTPGroupSizeOne – wrap_module_params_gtp no-op when gtp_group.size()==1 (single-process) -20. TestGTPPrefetchDisabled – weight_prefetch=False: single-pass forward still works (multi-GPU) -21. TestFuseWgradAccumulation – fuse_wgrad_accumulation=True: wgrad→main_grad (multi-GPU) -22. TestGTPGradAccumHook – main_grad updated after reduce-scatter backward (multi-GPU) -23. TestWaitAsyncCommsFallback – wait_async_comms(finalize_after_drain=True) inline-accumulation fallback when _wgrad_rs_handle is None (single-process) -24. TestGTPDDPBucketAlignment – GTP and regular DDP buffer bucket ends padded for dist-opt alignment (multi-GPU) - -Run via torchrun (matches the rest of Megatron's unit tests): - - torchrun --nproc-per-node 4 -m pytest tests/unit_tests/generalized_tensor_parallel/test_gtp.py -v +1. TestGTPWeightState - state-machine transitions (single-process) +2. TestGTPWeightCache - coat-check buffer pool + reserve/get/release semantics (single-process) +3. TestGTPSharding - wrap_module_params_gtp: shard content + padding (multi-GPU) +4. TestWrapModuleParams - wrap_module_params_gtp: param replacement + weight_list (multi-GPU) +5. TestLinearGTP - Linear forward/backward numerical correctness (multi-GPU) +6. TestLayerNormLinearGTP - LayerNormLinear forward/backward smoke test (multi-GPU) +7. TestGroupedLinearGTP - GroupedLinear forward/backward smoke test (multi-GPU) +8. TestGTPPrefetchChain - linked-list next_w/prev_w wiring (multi-GPU) +9. TestGTPWgradRS - wgrad reduce-scatter shape + multi-layer deferred path (multi-GPU) +10. TestGTPMicrobatches - output consistency across microbatches (multi-GPU) +11. TestNVFP4LinearGTP - Linear + NVFP4 recipe: quantized shard setup, fwd/bwd (multi-GPU) +12. TestNVFP4GroupedLinearGTP - GroupedLinear + NVFP4 recipe: coalesced AG + fwd/bwd (multi-GPU) +13. TestMXFP8LinearGTP - Linear + MXFP8 recipe: quantized shard setup, fwd/bwd, padding (multi-GPU) +14. TestGTPConfig - update_gtp_config: valid/invalid keys (single-process) +15. TestGTPShardedParamProperties - shape computations, get_padded_shard, _strip_padding (single-process) +16. TestGTPCacheKey - _get_cache_key: expert vs non-expert, fwd vs bwd (single-process) +17. TestTagGTPParamsWithNames - _debug_name population on GTPShardedParam (single-process) +18. TestGTPGroupSizeOne - wrap_module_params_gtp no-op when gtp_group.size()==1 (single-process) +19. TestGTPPrefetchDisabled - weight_prefetch=False: single-pass forward still works (multi-GPU) +20. TestFuseWgradAccumulation - fuse_wgrad_accumulation=True: wgrad→main_grad (multi-GPU) +21. TestGTPGradAccumHook - main_grad updated after reduce-scatter backward (multi-GPU) +22. TestWaitAsyncCommsFallback - wait_async_comms(finalize_after_drain=True) inline-accumulation fallback when _wgrad_rs_handle is None (single-process) +23. TestGTPDDPBucketAlignment - GTP and regular DDP buffer bucket ends padded for dist-opt alignment (multi-GPU) Multi-GPU tests skip when ``torch.distributed.get_world_size()`` doesn't match the required world size (4 for everything in this file). @@ -51,81 +44,41 @@ import transformer_engine.pytorch as te from transformer_engine.common.recipe import NVFP4BlockScaling -from transformer_engine.pytorch import fp8_autocast, is_mxfp8_available, is_nvfp4_available -from transformer_engine.pytorch.quantization import FP8GlobalStateManager +from transformer_engine.pytorch import fp8_autocast from transformer_engine.pytorch.quantized_tensor import QuantizedTensor import megatron.experimental.gtp.generalized_tensor_parallelism as gtp_module from megatron.experimental.gtp import GTPShardedParam, wrap_module_params_gtp from megatron.experimental.gtp.generalized_tensor_parallelism import GTPWeightCache, GTPWeightState -from tests.unit_tests.test_utilities import Utils - -# --------------------------------------------------------------------------- -# Fixtures -# --------------------------------------------------------------------------- +from tests.unit_tests.generalized_tensor_parallel.gtp_test_utils import ( + _make_gtp_grouped_linear, + _make_gtp_linear, + _requires_multi_gpu, + _requires_mxfp8, + _requires_nvfp4, + _run_distributed, + _torchrun_dist_init, + reset_fp8_state, + reset_gtp_globals, +) -@pytest.fixture(scope="module", autouse=True) -def _torchrun_dist_init(): - """Initialize the torchrun-managed dist group once per module. - - GTP tests use ``dist.new_group(...)`` to build their own GTP subgroup - within the world that torchrun set up. Each test runs on every torchrun - rank in parallel (standard Mcore convention); ``_run_distributed`` below - only skips when the required world size doesn't match what torchrun - provides. - """ - Utils.initialize_model_parallel() - yield - Utils.destroy_model_parallel() +class _FakeGroup: + """Minimal mock for a dist process group — used in single-process unit tests.""" + def __init__(self, size=1, rank=0): + self._size = size + self._rank = rank -@pytest.fixture(autouse=True) -def reset_fp8_state(): - yield - FP8GlobalStateManager.reset() + def size(self): + return self._size - -@pytest.fixture(autouse=True) -def reset_gtp_globals(): - """Reset all GTP mutable class/module-level state between tests.""" - yield - GTPShardedParam._chain_state = {} + def rank(self): + return self._rank # --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - - -def _run_distributed(fn, required_world_size: int, *args) -> None: - """Run ``fn`` on every torchrun rank. - - ``fn(rank, world_size, port, *args)`` matches the pre-existing worker - signature; ``port`` is unused (dist already initialized by torchrun) but - kept so the worker bodies don't need editing. - """ - actual_world_size = torch.distributed.get_world_size() - if actual_world_size != required_world_size: - pytest.skip( - f"Requires world_size={required_world_size}, " - f"got {actual_world_size} (launch with torchrun --nproc-per-node={required_world_size})" - ) - fn(torch.distributed.get_rank(), actual_world_size, None, *args) - - -def _requires_multi_gpu(n: int = 4): - if torch.cuda.device_count() < n: - pytest.skip(f"Requires at least {n} CUDA devices") - - -def _requires_nvfp4(): - if not is_nvfp4_available(): - pytest.skip("NVFP4 not available (requires compute capability >= 10.0)") - - -# --------------------------------------------------------------------------- -# 1. GTPWeightState – state-machine transition tests +# 1. GTPWeightState - state-machine transition tests # --------------------------------------------------------------------------- @@ -161,25 +114,15 @@ def test_rs_state_full_cycle(self): # --------------------------------------------------------------------------- -# 2. GTPWeightCache – coat-check buffer pool tests +# 2. GTPWeightCache - coat-check buffer pool tests # --------------------------------------------------------------------------- class TestGTPWeightCache: - class _FakeGroup: - def __init__(self, size=2): - self._size = size - - def size(self): - return self._size - - def rank(self): - return 0 - def _param(self, shape=(8, 4), gtp_size=2): p = GTPShardedParam(torch.zeros(*shape)) - p.group = self._FakeGroup(gtp_size) + p.group = _FakeGroup(size=gtp_size) p.expert_idx = None p.pad_length = 0 p._quantizer = None @@ -247,6 +190,22 @@ def test_different_shapes_use_distinct_pool_slots(self): cache.release(t1) cache.release(t2) + def test_without_release_pool_stays_empty(self): + """Without release(), subsequent reserves allocate fresh buffers.""" + cache = GTPWeightCache() + p = self._param() + t1 = cache.reserve(p, torch.bfloat16, fwd=True) + buf1 = cache.get(t1) + # Do NOT release t1 — pool stays empty + t2 = cache.reserve(p, torch.bfloat16, fwd=True) + buf2 = cache.get(t2) + assert buf2 is not buf1, "Without release, a fresh buffer must be allocated" + + def test_release_invalid_ticket_raises(self): + cache = GTPWeightCache() + with pytest.raises(KeyError): + cache.release(9999) + def test_fwd_bwd_tickets_are_distinct(self): """fwd=True and fwd=False reserves always receive distinct ticket IDs.""" cache = GTPWeightCache() @@ -331,14 +290,7 @@ def test_unaligned_shard_padding(self): def _worker_linear_param_replaced(rank, world_size, port): in_f, out_f = 64, 128 gtp_group = dist.new_group(list(range(world_size))) - layer = te.Linear( - in_features=in_f, - out_features=out_f, - bias=False, - params_dtype=torch.bfloat16, - device="cuda", - gtp_group=gtp_group, - ) + layer = _make_gtp_linear(in_f, out_f, gtp_group) w = layer.weight assert isinstance(w, GTPShardedParam), "weight must be GTPShardedParam" assert w.shape == (out_f // world_size, in_f), f"unexpected shard shape {w.shape}" @@ -348,15 +300,7 @@ def _worker_linear_param_replaced(rank, world_size, port): def _worker_grouped_weight_list(rank, world_size, port): num_gemms, in_f, out_f = 3, 32, 64 gtp_group = dist.new_group(list(range(world_size))) - layer = te.GroupedLinear( - num_gemms=num_gemms, - in_features=in_f, - out_features=out_f, - bias=False, - params_dtype=torch.bfloat16, - device="cuda", - gtp_group=gtp_group, - ) + layer = _make_gtp_grouped_linear(num_gemms, in_f, out_f, gtp_group) w0 = layer.weight0 assert isinstance(w0, GTPShardedParam) assert w0.weight_list is not None @@ -386,14 +330,7 @@ def _worker_linear_correctness(rank, world_size, port): dtype = torch.bfloat16 gtp_group = dist.new_group(list(range(world_size))) - layer = te.Linear( - in_features=in_f, - out_features=out_f, - bias=False, - params_dtype=dtype, - device="cuda", - gtp_group=gtp_group, - ) + layer = _make_gtp_linear(in_f, out_f, gtp_group, dtype) # Reconstruct full weight from shards (all-gather) shard = layer.weight.data.clone() @@ -417,7 +354,7 @@ def _worker_linear_correctness(rank, world_size, port): assert out_gtp.shape == out_ref.shape, f"Shape mismatch {out_gtp.shape} vs {out_ref.shape}" assert torch.allclose( - out_gtp.float(), out_ref.float(), atol=0.1, rtol=0.1 + out_gtp.float(), out_ref.float(), atol=1e-5, rtol=1e-5 ), f"Output mismatch max_diff={(out_gtp.float()-out_ref.float()).abs().max():.4f}" # wgrad RS path always accumulates into main_grad; allocate before backward. @@ -431,7 +368,7 @@ def _worker_linear_correctness(rank, world_size, port): assert inp_gtp.grad is not None assert torch.allclose( - inp_gtp.grad.float(), inp_ref.grad.float(), atol=0.1, rtol=0.1 + inp_gtp.grad.float(), inp_ref.grad.float(), atol=1e-5, rtol=1e-5 ), f"dX mismatch max_diff={(inp_gtp.grad.float()-inp_ref.grad.float()).abs().max():.4f}" @@ -490,15 +427,7 @@ def _worker_grouped_linear(rank, world_size, port, num_gemms): dtype = torch.bfloat16 gtp_group = dist.new_group(list(range(world_size))) - layer = te.GroupedLinear( - num_gemms=num_gemms, - in_features=in_f, - out_features=out_f, - bias=False, - params_dtype=dtype, - device="cuda", - gtp_group=gtp_group, - ) + layer = _make_gtp_grouped_linear(num_gemms, in_f, out_f, gtp_group, dtype) assert isinstance(layer.weight0, GTPShardedParam) m_splits = [total_tokens // num_gemms] * num_gemms @@ -535,22 +464,8 @@ def _worker_chain_wired(rank, world_size, port): dtype = torch.bfloat16 gtp_group = dist.new_group(list(range(world_size))) - l0 = te.Linear( - in_features=in_f, - out_features=out_f, - bias=False, - params_dtype=dtype, - device="cuda", - gtp_group=gtp_group, - ) - l1 = te.Linear( - in_features=in_f, - out_features=out_f, - bias=False, - params_dtype=dtype, - device="cuda", - gtp_group=gtp_group, - ) + l0 = _make_gtp_linear(in_f, out_f, gtp_group, dtype) + l1 = _make_gtp_linear(in_f, out_f, gtp_group, dtype) inp = torch.randn(4, in_f, dtype=dtype, device="cuda") dist.broadcast(inp, src=0) @@ -573,22 +488,8 @@ def _worker_chain_async_prefetch(rank, world_size, port): dtype = torch.bfloat16 gtp_group = dist.new_group(list(range(world_size))) - l0 = te.Linear( - in_features=in_f, - out_features=out_f, - bias=False, - params_dtype=dtype, - device="cuda", - gtp_group=gtp_group, - ) - l1 = te.Linear( - in_features=in_f, - out_features=out_f, - bias=False, - params_dtype=dtype, - device="cuda", - gtp_group=gtp_group, - ) + l0 = _make_gtp_linear(in_f, out_f, gtp_group, dtype) + l1 = _make_gtp_linear(in_f, out_f, gtp_group, dtype) inp = torch.randn(4, in_f, dtype=dtype, device="cuda") dist.broadcast(inp, src=0) @@ -621,15 +522,7 @@ def _worker_wgrad_shape(rank, world_size, port): dtype = torch.bfloat16 gtp_group = dist.new_group(list(range(world_size))) - layer = te.Linear( - in_features=in_f, - out_features=out_f, - bias=False, - params_dtype=dtype, - device="cuda", - gtp_group=gtp_group, - fuse_wgrad_accumulation=False, - ) + layer = _make_gtp_linear(in_f, out_f, gtp_group, dtype, fuse_wgrad_accumulation=False) inp = torch.randn(8, in_f, dtype=dtype, device="cuda", requires_grad=True) dist.broadcast(inp, src=0) @@ -648,22 +541,8 @@ def _worker_multilayer_deferred_rs(rank, world_size, port): dtype = torch.bfloat16 gtp_group = dist.new_group(list(range(world_size))) - l0 = te.Linear( - in_features=in_f, - out_features=out_f, - bias=False, - params_dtype=dtype, - device="cuda", - gtp_group=gtp_group, - ) - l1 = te.Linear( - in_features=in_f, - out_features=out_f, - bias=False, - params_dtype=dtype, - device="cuda", - gtp_group=gtp_group, - ) + l0 = _make_gtp_linear(in_f, out_f, gtp_group, dtype) + l1 = _make_gtp_linear(in_f, out_f, gtp_group, dtype) inp = torch.randn(8, in_f, dtype=dtype, device="cuda", requires_grad=True) dist.broadcast(inp, src=0) @@ -702,14 +581,7 @@ def _worker_microbatches(rank, world_size, port): dtype = torch.bfloat16 gtp_group = dist.new_group(list(range(world_size))) - layer = te.Linear( - in_features=in_f, - out_features=out_f, - bias=False, - params_dtype=dtype, - device="cuda", - gtp_group=gtp_group, - ) + layer = _make_gtp_linear(in_f, out_f, gtp_group, dtype) inp = torch.randn(batch, in_f, dtype=dtype, device="cuda") dist.broadcast(inp, src=0) @@ -743,18 +615,11 @@ def _worker_nvfp4_linear(rank, world_size, port): dtype = torch.bfloat16 gtp_group = dist.new_group(list(range(world_size))) - layer = te.Linear( - in_features=in_f, - out_features=out_f, - bias=False, - params_dtype=dtype, - device="cuda", - gtp_group=gtp_group, - ) + layer = _make_gtp_linear(in_f, out_f, gtp_group, dtype) inp = torch.randn(batch, in_f, dtype=dtype, device="cuda", requires_grad=True) dist.broadcast(inp, src=0) - # Forward under NVFP4 recipe – triggers setup() and NVFP4 quantization + # Forward under NVFP4 recipe - triggers setup() and NVFP4 quantization recipe = NVFP4BlockScaling() with fp8_autocast(enabled=True, fp8_recipe=recipe): out = layer(inp, is_first_microbatch=True) @@ -794,14 +659,7 @@ def _worker_nvfp4_linear_unaligned(rank, world_size, port): dtype = torch.bfloat16 gtp_group = dist.new_group(list(range(world_size))) - layer = te.Linear( - in_features=in_f, - out_features=out_f, - bias=False, - params_dtype=dtype, - device="cuda", - gtp_group=gtp_group, - ) + layer = _make_gtp_linear(in_f, out_f, gtp_group, dtype) inp = torch.randn(batch, in_f, dtype=dtype, device="cuda", requires_grad=True) dist.broadcast(inp, src=0) @@ -839,15 +697,7 @@ def _worker_nvfp4_grouped_linear(rank, world_size, port, num_gemms): dtype = torch.bfloat16 gtp_group = dist.new_group(list(range(world_size))) - layer = te.GroupedLinear( - num_gemms=num_gemms, - in_features=in_f, - out_features=out_f, - bias=False, - params_dtype=dtype, - device="cuda", - gtp_group=gtp_group, - ) + layer = _make_gtp_grouped_linear(num_gemms, in_f, out_f, gtp_group, dtype) assert isinstance(layer.weight0, GTPShardedParam) m_splits = [total_tokens // num_gemms] * num_gemms @@ -902,18 +752,11 @@ def _worker_mxfp8_linear(rank, world_size, port): dtype = torch.bfloat16 gtp_group = dist.new_group(list(range(world_size))) - layer = te.Linear( - in_features=in_f, - out_features=out_f, - bias=False, - params_dtype=dtype, - device="cuda", - gtp_group=gtp_group, - ) + layer = _make_gtp_linear(in_f, out_f, gtp_group, dtype) inp = torch.randn(batch, in_f, dtype=dtype, device="cuda", requires_grad=True) dist.broadcast(inp, src=0) - # Forward under MXFP8 recipe – triggers setup() and MXFP8 quantization + # Forward under MXFP8 recipe - triggers setup() and MXFP8 quantization recipe = MXFP8BlockScaling() with fp8_autocast(enabled=True, fp8_recipe=recipe): out = layer(inp, is_first_microbatch=True) @@ -960,14 +803,7 @@ def _worker_mxfp8_linear_unaligned(rank, world_size, port): dtype = torch.bfloat16 gtp_group = dist.new_group(list(range(world_size))) - layer = te.Linear( - in_features=in_f, - out_features=out_f, - bias=False, - params_dtype=dtype, - device="cuda", - gtp_group=gtp_group, - ) + layer = _make_gtp_linear(in_f, out_f, gtp_group, dtype) inp = torch.randn(batch, in_f, dtype=dtype, device="cuda", requires_grad=True) dist.broadcast(inp, src=0) @@ -979,12 +815,6 @@ def _worker_mxfp8_linear_unaligned(rank, world_size, port): assert torch.isfinite(out).all(), "MXFP8 GTP (unaligned) output has non-finite values" -def _requires_mxfp8(): - available, reason = is_mxfp8_available(return_reason=True) - if not available: - pytest.skip(f"MXFP8 not available: {reason}") - - class TestMXFP8LinearGTP: def test_forward_backward(self): _requires_mxfp8() @@ -1026,26 +856,15 @@ def test_invalid_key_raises(self): # --------------------------------------------------------------------------- -# 15. GTPShardedParam properties – shape computations and padding +# 15. GTPShardedParam properties - shape computations and padding # --------------------------------------------------------------------------- class TestGTPShardedParamProperties: - class _FakeGroup: - def __init__(self, size=4, rank=0): - self._size = size - self._rank = rank - - def size(self): - return self._size - - def rank(self): - return self._rank - def _make_param(self, shape, pad_length=0, group_size=4, group_rank=0): p = GTPShardedParam(torch.zeros(*shape)) - p.group = self._FakeGroup(size=group_size, rank=group_rank) + p.group = _FakeGroup(size=group_size, rank=group_rank) p.pad_length = pad_length p.expert_idx = None return p @@ -1086,7 +905,7 @@ def test_unsharded_shape_strips_padding(self): def test_get_padded_shard_identity_when_no_padding(self): p = self._make_param((6, 4), pad_length=0) result = p.get_padded_shard() - assert result is p # identity – no copy needed + assert result is p # identity - no copy needed def test_get_padded_shard_identity_non_last_rank(self): # pad_length > 0 but not the padded last rank → no padding added @@ -1125,22 +944,15 @@ def test_strip_padding_multi_row(self): # --------------------------------------------------------------------------- -# 16. _get_cache_key – expert vs non-expert, fwd vs bwd +# 16. _get_cache_key - expert vs non-expert, fwd vs bwd # --------------------------------------------------------------------------- class TestGTPCacheKey: - class _FakeGroup: - def size(self): - return 4 - - def rank(self): - return 0 - def _param(self, shape=(16, 32), expert_idx=None): p = GTPShardedParam(torch.zeros(*shape)) - p.group = self._FakeGroup() + p.group = _FakeGroup(size=4) p.expert_idx = expert_idx p.pad_length = 0 return p @@ -1193,70 +1005,7 @@ def test_rs_key_differs_from_ag_key(self): # --------------------------------------------------------------------------- -# 17. GTPWeightCache.take() deferred vs get() immediate pool return -# --------------------------------------------------------------------------- - - -class TestGTPCacheRelease: - """Tests for GTPWeightCache reserve/get/release semantics.""" - - class _FakeGroup: - def size(self): - return 2 - - def rank(self): - return 0 - - def _param(self, shape=(8, 4)): - p = GTPShardedParam(torch.zeros(*shape)) - p.group = self._FakeGroup() - p.expert_idx = None - p.pad_length = 0 - p._quantizer = None - return p - - def test_release_returns_buffer_to_pool(self): - """release() puts the buffer back so the next reserve+get reuses it.""" - cache = GTPWeightCache() - p = self._param() - t1 = cache.reserve(p, torch.bfloat16, fwd=True) - buf1 = cache.get(t1) - cache.release(t1) - # New ticket should pop buf1 from pool - t2 = cache.reserve(p, torch.bfloat16, fwd=True) - buf2 = cache.get(t2) - assert buf2 is buf1, "Buffer should be reused after release()" - cache.release(t2) - - def test_without_release_pool_stays_empty(self): - """Without release(), subsequent reserves allocate fresh buffers.""" - cache = GTPWeightCache() - p = self._param() - t1 = cache.reserve(p, torch.bfloat16, fwd=True) - buf1 = cache.get(t1) - # Do NOT release t1 — pool stays empty - t2 = cache.reserve(p, torch.bfloat16, fwd=True) - buf2 = cache.get(t2) - assert buf2 is not buf1, "Without release, a fresh buffer must be allocated" - - def test_get_same_ticket_returns_same_buf(self): - """get() is idempotent — calling it twice returns the same buffer.""" - cache = GTPWeightCache() - p = self._param() - t = cache.reserve(p, torch.bfloat16, fwd=True) - buf_a = cache.get(t) - buf_b = cache.get(t) - assert buf_a is buf_b - cache.release(t) - - def test_release_invalid_ticket_raises(self): - cache = GTPWeightCache() - with pytest.raises(KeyError): - cache.release(9999) - - -# --------------------------------------------------------------------------- -# 18. tag_gtp_params_with_names – _debug_name population +# 17. tag_gtp_params_with_names - _debug_name population # --------------------------------------------------------------------------- @@ -1264,14 +1013,6 @@ class TestTagGTPParamsWithNames: def test_debug_name_populated_for_gtp_param(self): """GTPShardedParam._debug_name is set to the dotted parameter path.""" - - class _FakeGroup: - def size(self): - return 1 - - def rank(self): - return 0 - model = nn.Linear(4, 8, bias=False) w = GTPShardedParam(torch.randn(8, 4)) w.group = _FakeGroup() @@ -1282,14 +1023,6 @@ def rank(self): def test_nested_module_debug_name(self): """Nested module produces a dotted debug name.""" - - class _FakeGroup: - def size(self): - return 1 - - def rank(self): - return 0 - outer = nn.Sequential(nn.Linear(4, 8, bias=False)) w = GTPShardedParam(torch.randn(8, 4)) w.group = _FakeGroup() @@ -1305,24 +1038,17 @@ def test_non_gtp_params_are_skipped(self): # --------------------------------------------------------------------------- -# 19. wrap_module_params_gtp is a no-op when gtp_group.size() == 1 +# 18. wrap_module_params_gtp is a no-op when gtp_group.size() == 1 # --------------------------------------------------------------------------- class TestGTPGroupSizeOne: - class _SingletonGroup: - def size(self): - return 1 - - def rank(self): - return 0 - def test_no_sharding_when_gtp_size_one(self): """wrap_module_params_gtp must be a no-op for a singleton GTP group.""" mod = nn.Linear(32, 64, bias=False) original_weight = mod.weight - wrap_module_params_gtp(mod, ["weight"], self._SingletonGroup()) + wrap_module_params_gtp(mod, ["weight"], _FakeGroup()) assert ( mod.weight is original_weight ), "gtp_group.size()==1 should leave parameters unchanged" @@ -1330,7 +1056,7 @@ def test_no_sharding_when_gtp_size_one(self): # --------------------------------------------------------------------------- -# 21. weight_prefetch=False: forward still produces correct output +# 19. weight_prefetch=False: forward still produces correct output # --------------------------------------------------------------------------- @@ -1342,22 +1068,8 @@ def _worker_prefetch_disabled(rank, world_size, port): gtp_module.update_gtp_config(weight_prefetch=False) try: - l0 = te.Linear( - in_features=in_f, - out_features=out_f, - bias=False, - params_dtype=dtype, - device="cuda", - gtp_group=gtp_group, - ) - l1 = te.Linear( - in_features=in_f, - out_features=out_f, - bias=False, - params_dtype=dtype, - device="cuda", - gtp_group=gtp_group, - ) + l0 = _make_gtp_linear(in_f, out_f, gtp_group, dtype) + l1 = _make_gtp_linear(in_f, out_f, gtp_group, dtype) inp = torch.randn(4, in_f, dtype=dtype, device="cuda") dist.broadcast(inp, src=0) @@ -1379,7 +1091,7 @@ def test_forward_works_without_prefetch(self): # --------------------------------------------------------------------------- -# 22. fuse_wgrad_accumulation=True: wgrad is accumulated into main_grad +# 20. fuse_wgrad_accumulation=True: wgrad is accumulated into main_grad # --------------------------------------------------------------------------- @@ -1389,15 +1101,7 @@ def _worker_fuse_wgrad(rank, world_size, port): dtype = torch.bfloat16 gtp_group = dist.new_group(list(range(world_size))) - layer = te.Linear( - in_features=in_f, - out_features=out_f, - bias=False, - params_dtype=dtype, - device="cuda", - gtp_group=gtp_group, - fuse_wgrad_accumulation=True, - ) + layer = _make_gtp_linear(in_f, out_f, gtp_group, dtype, fuse_wgrad_accumulation=True) # Allocate main_grad on the local shard shape w = layer.weight @@ -1421,7 +1125,7 @@ def test_wgrad_accumulated_into_main_grad(self): # --------------------------------------------------------------------------- -# 23. _grad_accum_hook is called after reduce-scatter +# 21. _grad_accum_hook is called after reduce-scatter # --------------------------------------------------------------------------- @@ -1432,14 +1136,7 @@ def _worker_main_grad_updated_after_bwd(rank, world_size, port): dtype = torch.bfloat16 gtp_group = dist.new_group(list(range(world_size))) - layer = te.Linear( - in_features=in_f, - out_features=out_f, - bias=False, - params_dtype=dtype, - device="cuda", - gtp_group=gtp_group, - ) + layer = _make_gtp_linear(in_f, out_f, gtp_group, dtype) # wgrad RS path always accumulates into main_grad; allocate before backward. layer.weight.main_grad = torch.zeros(layer.weight.shape, dtype=dtype, device="cuda") @@ -1460,7 +1157,7 @@ def test_main_grad_updated_after_backward(self): # --------------------------------------------------------------------------- -# 24. wait_async_comms(finalize_after_drain=True) inline-accumulation fallback +# 22. wait_async_comms(finalize_after_drain=True) inline-accumulation fallback # --------------------------------------------------------------------------- @@ -1478,33 +1175,33 @@ class TestWaitAsyncCommsFallback: pin down the fallback's contract. """ - class _FakeGroup: - def size(self): - return 1 - - def rank(self): - return 0 - - @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA required") - def test_fallback_accumulates_when_no_rs_handle(self): + @staticmethod + def _make_inflight_param(main_grad_fill=0.0, already_finalized=False): + """Build a minimal GTPShardedParam wired for wait_async_comms testing.""" dtype = torch.bfloat16 p = GTPShardedParam(torch.zeros(8, 4, dtype=dtype, device="cuda")) - p.group = self._FakeGroup() + p.group = _FakeGroup() p.expert_idx = None p.pad_length = 0 p.chain_id = gtp_module.GTPChain.UNGRAPHED.value p._quantizer = None p.is_routed_expert = False # ⇒ self._weights property returns [self] - p.main_grad = torch.zeros(8, 4, dtype=dtype, device="cuda") - p._prefetch_handle = None # _wait_param_gather is no-op - p._wgrad_rs_handle = None # _wait_reduce_scatter is no-op → fallback fires + p.main_grad = torch.full((8, 4), main_grad_fill, dtype=dtype, device="cuda") + p._prefetch_handle = None # _wait_param_gather is no-op + p._wgrad_rs_handle = None # _wait_reduce_scatter is no-op → fallback fires p._cached_ag_stream = None p._cached_rs_stream = None p.ag_event = torch.cuda.Event(external=True) p.rs_event = torch.cuda.Event(external=True) - p.rs_event.record() # so rs_event.wait() in fallback doesn't block - p._already_finalized = False + p.rs_event.record() # so rs_event.wait() in fallback doesn't block + p._already_finalized = already_finalized p.grad_added_to_main_grad = False + return p + + @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA required") + def test_fallback_accumulates_when_no_rs_handle(self): + dtype = torch.bfloat16 + p = self._make_inflight_param(main_grad_fill=0.0) # Place a known wgrad in the cache for the fallback to read. cache = gtp_module.get_global_GTP_cache() @@ -1534,27 +1231,8 @@ def test_fallback_accumulates_when_no_rs_handle(self): @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA required") def test_fallback_skipped_when_already_finalized(self): """When _already_finalized=True, the fallback must NOT re-accumulate.""" - dtype = torch.bfloat16 - p = GTPShardedParam(torch.zeros(8, 4, dtype=dtype, device="cuda")) - p.group = self._FakeGroup() - p.expert_idx = None - p.pad_length = 0 - p.chain_id = gtp_module.GTPChain.UNGRAPHED.value - p._quantizer = None - p.is_routed_expert = False # ⇒ self._weights property returns [self] - # Pre-existing main_grad with a value the fallback must NOT overwrite. - p.main_grad = torch.full((8, 4), 5.0, dtype=dtype, device="cuda") - p._prefetch_handle = None - p._wgrad_rs_handle = None - p._cached_ag_stream = None - p._cached_rs_stream = None - p.ag_event = torch.cuda.Event(external=True) - p.rs_event = torch.cuda.Event(external=True) - p.rs_event.record() - p._already_finalized = True # ← short-circuits the fallback - - # No _rs_ticket: if the fallback ran it would AttributeError on - # cache.get(None). The skip path must not touch the cache at all. + p = self._make_inflight_param(main_grad_fill=5.0, already_finalized=True) + # No _rs_ticket: if the fallback ran it would AttributeError on cache.get(None). p._rs_ticket = None saved = set(gtp_module._inflight_comm_params) @@ -1584,24 +1262,7 @@ def test_fallback_skipped_for_pure_ag_param(self): called cache.get(None) and crashed with KeyError; the guard now skips the inline accumulation entirely when no weight has an RS ticket. """ - dtype = torch.bfloat16 - p = GTPShardedParam(torch.zeros(8, 4, dtype=dtype, device="cuda")) - p.group = self._FakeGroup() - p.expert_idx = None - p.pad_length = 0 - p.chain_id = gtp_module.GTPChain.UNGRAPHED.value - p._quantizer = None - p.is_routed_expert = False - # Pre-existing main_grad with a sentinel that must survive untouched. - p.main_grad = torch.full((8, 4), 7.0, dtype=dtype, device="cuda") - p._prefetch_handle = None - p._wgrad_rs_handle = None - p._cached_ag_stream = None - p._cached_rs_stream = None - p.ag_event = torch.cuda.Event(external=True) - p.rs_event = torch.cuda.Event(external=True) - p.rs_event.record() - p._already_finalized = False + p = self._make_inflight_param(main_grad_fill=7.0) # Critical: simulates a pure-AG prefetch — no RS ever issued, ticket is None. p._rs_ticket = None @@ -1627,7 +1288,7 @@ def test_fallback_skipped_for_pure_ag_param(self): # --------------------------------------------------------------------------- -# 24. GTP DDP bucket alignment: distributed optimizer bucket-end assertion +# 23. GTP DDP bucket alignment: distributed optimizer bucket-end assertion # --------------------------------------------------------------------------- diff --git a/tests/unit_tests/generalized_tensor_parallel/test_mamba_gtp.py b/tests/unit_tests/generalized_tensor_parallel/test_mamba_gtp.py new file mode 100644 index 00000000000..27a58ccec5f --- /dev/null +++ b/tests/unit_tests/generalized_tensor_parallel/test_mamba_gtp.py @@ -0,0 +1,261 @@ +# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +"""Integration tests for GTP + Mamba correctness. + +Test groups +----------- +TestMambaGTPCorrectness - GTP Mamba loss trajectory matches baseline (no-GTP) over 10 + training steps using MXFP8 and Nemotron3-Super Mamba hyperparameters. +""" + +import pytest +import torch +import torch.distributed as dist + +from megatron.experimental.gtp import HAVE_GTP + +if not HAVE_GTP: + pytest.skip("GTP requires TransformerEngine >= 2.17", allow_module_level=True) + +from transformer_engine.pytorch import fp8_autocast + +from megatron.experimental.gtp import GTPShardedParam +from tests.unit_tests.generalized_tensor_parallel.gtp_test_utils import ( + _requires_mxfp8, + _run_distributed, + _torchrun_dist_init, + reset_fp8_state, + reset_gtp_globals, +) + +# --------------------------------------------------------------------------- +# Mamba GTP correctness: per-step loss trajectory baseline vs GTP=4 +# --------------------------------------------------------------------------- + + +def _worker_mamba_gtp_correctness(rank, world_size, port): + """Verify GTP Mamba produces the same per-step loss as a no-GTP baseline. + + Phase 1 — GTP=1, DP=4: + All 4 ranks hold the full model and process identical inputs. Gradients + are identical across ranks (no all-reduce needed). Weight update: + param.data -= lr * param.grad + + Phase 2 — GTP=4, DP=1: + Weights sharded across 4 ranks. After backward, wgrad reduce-scatter + sums each shard's identical wgrad over all ranks, so: + main_grad[rank_i] = gtp_size * dW[shard_i] + The optimizer divides by gtp_size to recover the per-element gradient: + param.data -= (lr / gtp_size) * param.main_grad + + Both phases use identical initial weights (synced from rank 0 in phase 1, + restored as shards in phase 2) and identical step-by-step inputs. The + per-step loss trajectories must agree within 0.1% relative error. + """ + from transformer_engine.common.recipe import MXFP8BlockScaling + from transformer_engine.pytorch.quantization import FP8GlobalStateManager + + from megatron.core import parallel_state as ps + from megatron.core.extensions.transformer_engine import ( + TELayerNormColumnParallelLinear, + TERowParallelLinear, + ) + from megatron.core.fusions.fused_bias_dropout import get_bias_dropout_add + from megatron.core.process_groups_config import ProcessGroupCollection + from megatron.core.ssm.mamba_layer import MambaLayer, MambaLayerSubmodules + from megatron.core.ssm.mamba_mixer import MambaMixer, MambaMixerSubmodules + from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed + from megatron.core.transformer.spec_utils import ModuleSpec + from megatron.core.transformer.transformer_config import TransformerConfig + + # Nemotron3-Super Proxy Mamba hyperparameters. + # in_proj_out = 2*8192 + 2*8*128 + 128 = 18560; 18560/4 = 4640, 4640%16 = 0 (MXFP8-aligned). + HIDDEN = 4096 + NHEADS = 128 # mamba_num_heads; d_inner = nheads * headdim = 128 * 64 = 8192 + NGROUPS = 8 # mamba_num_groups (default) + D_STATE = 128 # mamba_state_dim (default) + NUM_LAYERS = 2 + SEQ = 32 + BATCH = 1 + LR = 0.01 + STEPS = 10 + dtype = torch.bfloat16 + recipe = MXFP8BlockScaling() + + def make_config(): + return TransformerConfig( + num_attention_heads=32, + num_layers=NUM_LAYERS, + hidden_size=HIDDEN, + mamba_num_heads=NHEADS, + mamba_head_dim=64, + mamba_state_dim=D_STATE, + mamba_num_groups=NGROUPS, + use_mamba_mem_eff_path=True, + params_dtype=dtype, + hidden_dropout=0.0, + bias_dropout_fusion=False, + fp8='e4m3', + tensor_model_parallel_size=1, + pipeline_model_parallel_size=1, + ) + + def make_mamba_stack(config, pg_collection): + submodules = MambaLayerSubmodules( + mixer=ModuleSpec( + module=MambaMixer, + submodules=MambaMixerSubmodules( + in_proj=TELayerNormColumnParallelLinear, + out_proj=TERowParallelLinear, + ), + ), + mamba_bda=get_bias_dropout_add, + ) + return torch.nn.ModuleList( + [ + MambaLayer(config, submodules, layer_number=i + 1, pg_collection=pg_collection) + for i in range(NUM_LAYERS) + ] + ) + + def run_step(layers, x): + with fp8_autocast(enabled=True, fp8_recipe=recipe): + for layer in layers: + x = layer(x) + return x.mean() + + # ------------------------------------------------------------------------- + # Phase 1: Baseline — GTP=1 (DP=4) + # ------------------------------------------------------------------------- + ps.destroy_model_parallel() + ps.initialize_model_parallel( + tensor_model_parallel_size=1, pipeline_model_parallel_size=1, gtp_remat_size=1 + ) + model_parallel_cuda_manual_seed(42) + + pg_collection = ProcessGroupCollection.use_mpu_process_groups(required_pgs=['tp', 'cp', 'gtp']) + config = make_config() + layers = make_mamba_stack(config, pg_collection) + for layer in layers: + layer.cuda() + + # Verify baseline has no GTP sharding (gtp_remat_size=1 should leave plain parameters). + assert not any( + isinstance(p, GTPShardedParam) for p in layers.parameters() + ), "Baseline GTP=1 stack should have no GTPShardedParam" + + # Synchronize weights from rank 0 across all DP ranks. + for p in layers.parameters(): + dist.broadcast(p.data, src=0) + + # Save initial weights; will be used to initialize the GTP model identically. + saved_weights = {n: p.data.clone() for n, p in layers.named_parameters()} + + baseline_losses = [] + for step in range(STEPS): + torch.manual_seed(step) + x = torch.randn(SEQ, BATCH, HIDDEN, dtype=dtype, device='cuda') + dist.broadcast(x, src=0) + + loss = run_step(layers, x) + if rank == 0: + baseline_losses.append(loss.item()) + + loss.backward() + with torch.no_grad(): + for p in layers.parameters(): + if p.grad is not None: + p.data.sub_(LR * p.grad) + p.grad.zero_() + + ps.destroy_model_parallel() + GTPShardedParam._chain_state = {} + FP8GlobalStateManager.reset() + + # ------------------------------------------------------------------------- + # Phase 2: GTP=4 (DP=1) + # ------------------------------------------------------------------------- + ps.initialize_model_parallel( + tensor_model_parallel_size=1, pipeline_model_parallel_size=1, gtp_remat_size=4 + ) + model_parallel_cuda_manual_seed(42) + + pg_collection = ProcessGroupCollection.use_mpu_process_groups(required_pgs=['tp', 'cp', 'gtp']) + config = make_config() + layers_gtp = make_mamba_stack(config, pg_collection) + for layer in layers_gtp: + layer.cuda() + + gtp_group = ps.get_generalized_tensor_parallel_remat_group() + gtp_size = gtp_group.size() + gtp_rank = gtp_group.rank() + + # Verify GTP is truly active: at least one param must be a GTPShardedParam. + gtp_params = [p for p in layers_gtp.parameters() if isinstance(p, GTPShardedParam)] + assert len(gtp_params) > 0, "GTP is not active: no GTPShardedParam found in GTP=4 Mamba stack" + + # Restore initial weights: GTP params get the matching shard, others get the full tensor. + for name, p in layers_gtp.named_parameters(): + full = saved_weights[name] + if isinstance(p, GTPShardedParam): + shard_size = p.shape[0] + p.data.copy_(full[gtp_rank * shard_size: (gtp_rank + 1) * shard_size]) + else: + p.data.copy_(full) + + # Pre-allocate main_grad for GTP params (required before the first backward). + for p in layers_gtp.parameters(): + if isinstance(p, GTPShardedParam): + p.main_grad = torch.zeros(p.shape, dtype=dtype, device='cuda') + + gtp_losses = [] + for step in range(STEPS): + for p in layers_gtp.parameters(): + if isinstance(p, GTPShardedParam): + p.main_grad.zero_() + + torch.manual_seed(step) + x = torch.randn(SEQ, BATCH, HIDDEN, dtype=dtype, device='cuda') + dist.broadcast(x, src=0) + + loss = run_step(layers_gtp, x) + if rank == 0: + gtp_losses.append(loss.item()) + + loss.backward() + + # After RS, main_grad = gtp_size * dW_shard (sum over ranks, all ranks hold the same + # full wgrad after all-gathering the weight in fwd). Divide by gtp_size so the weight + # update is equivalent to the baseline. + with torch.no_grad(): + for p in layers_gtp.parameters(): + if isinstance(p, GTPShardedParam): + p.data.sub_((LR / gtp_size) * p.main_grad) + elif p.grad is not None: + p.data.sub_(LR * p.grad) + p.grad.zero_() + + ps.destroy_model_parallel() + ps.initialize_model_parallel() + GTPShardedParam._chain_state = {} + + # ------------------------------------------------------------------------- + # Compare per-step loss trajectories on rank 0 + # ------------------------------------------------------------------------- + if rank == 0: + assert len(baseline_losses) == STEPS + assert len(gtp_losses) == STEPS + for step, (lb, lg) in enumerate(zip(baseline_losses, gtp_losses)): + print(f"Step {step:2d}: baseline={lb:.6f} gtp={lg:.6f}", flush=True) + torch.testing.assert_close( + torch.tensor(gtp_losses), torch.tensor(baseline_losses), atol=1e-5, rtol=1e-5 + ) + + +class TestMambaGTPCorrectness: + def test_mamba_gtp_loss_trajectory_matches_baseline(self): + """GTP Mamba per-step losses must match no-GTP baseline (atol=1e-5, rtol=1e-5; MXFP8, Nemotron3-Super).""" + _requires_mxfp8() + if torch.cuda.device_count() < 4: + pytest.skip("Requires at least 4 CUDA devices") + _run_distributed(_worker_mamba_gtp_correctness, 4) diff --git a/tests/unit_tests/generalized_tensor_parallel/test_moe_egtp.py b/tests/unit_tests/generalized_tensor_parallel/test_moe_egtp.py new file mode 100644 index 00000000000..c5046ef9351 --- /dev/null +++ b/tests/unit_tests/generalized_tensor_parallel/test_moe_egtp.py @@ -0,0 +1,289 @@ +# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +"""Integration tests for EGTP + MoE correctness. + +Test groups +----------- +TestMoEEGTPCorrectness - EGTP MoE loss trajectory matches baseline (no-EGTP) over 10 + training steps using MXFP8 and Nemotron3-Super MoE hyperparameters. +""" + +import pytest +import torch +import torch.distributed as dist + +from megatron.experimental.gtp import HAVE_GTP + +if not HAVE_GTP: + pytest.skip("GTP requires TransformerEngine >= 2.17", allow_module_level=True) + +from transformer_engine.pytorch import fp8_autocast + +from megatron.core.transformer.moe.moe_utils import get_default_pg_collection +from megatron.experimental.gtp import GTPShardedParam +from tests.unit_tests.generalized_tensor_parallel.gtp_test_utils import ( + _requires_mxfp8, + _run_distributed, + _torchrun_dist_init, + reset_fp8_state, + reset_gtp_globals, +) + +# --------------------------------------------------------------------------- +# MoE EGTP correctness: per-step loss trajectory EP=4 baseline vs EP=2+EGTP=2 +# --------------------------------------------------------------------------- + + +def _worker_moe_egtp_correctness(rank, world_size, port): + """Verify EP=2+EGTP=2 MoE produces the same per-step loss as an EP=4 no-EGTP baseline. + + Phase 1 — EP=4, EGTP=1: + All 4 ranks form one EP group; each rank holds 2 full expert weights (8 total). + All ranks receive the same MoE-layer input; alltoall dispatch routes each token + to its assigned expert rank, so each rank computes a different token subset. + Gradients are local to each expert's rank. Weight update: + param.data -= lr * param.grad + + Phase 2 — EP=2, EGTP=2: + Two EP groups of 2 ranks, each EGTP-sharded over 2 ranks. Expert weights + are sharded along dim 0 within each EGTP group (shard = full_dim0 / egtp_size). + After backward, wgrad reduce-scatter sums each shard's identical wgrad: + main_grad[rank_i] = egtp_size * dW[shard_i] + The optimizer divides by egtp_size: + param.data -= (lr / egtp_size) * param.main_grad + + Weight sharing (test-only): + To ensure both phases start from identical expert weights, an all-gather + collects the full 8-expert table from the EP=4 group (where each rank holds + only 2 experts) onto every rank. Phase 2 then slices each rank's local + experts and EGTP shard from that global table. + + Nemotron3-Super Proxy MoE hyperparameters (scaled for unit-test speed): + hidden=4096, ffn_hidden_size=2688, num_experts=8, topk=2 + MXFP8 alignment with EGTP=2: + 2688/2=1344, 1344%16=0 (fc1 shard); 4096/2=2048, 2048%16=0 (fc2 shard) + """ + from transformer_engine.common.recipe import MXFP8BlockScaling + from transformer_engine.pytorch.quantization import FP8GlobalStateManager + + from megatron.core import parallel_state as ps + from megatron.core.models.gpt.moe_module_specs import get_moe_module_spec + from megatron.core.process_groups_config import ProcessGroupCollection + from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed + from megatron.core.transformer.transformer_config import TransformerConfig + + # Nemotron3-Super MoE hyperparameters (num_experts scaled from 512 to 8 for test speed). + HIDDEN = 4096 + FFN_HIDDEN = 2688 + NUM_EXPERTS = 8 + TOPK = 2 + SEQ = 32 + BATCH = 1 + LR = 0.01 + STEPS = 10 + dtype = torch.bfloat16 + recipe = MXFP8BlockScaling() + + def make_config(): + return TransformerConfig( + num_attention_heads=32, + num_layers=1, + hidden_size=HIDDEN, + num_moe_experts=NUM_EXPERTS, + moe_router_topk=TOPK, + moe_ffn_hidden_size=FFN_HIDDEN, + moe_grouped_gemm=True, + moe_token_dispatcher_type="alltoall", + moe_aux_loss_coeff=0.0, + add_bias_linear=False, + params_dtype=dtype, + hidden_dropout=0.0, + bias_dropout_fusion=False, + fp8='e4m3', + tensor_model_parallel_size=1, + pipeline_model_parallel_size=1, + ) + + def make_moe_layer(config, pg_collection): + moe_spec = get_moe_module_spec( + use_te=True, num_experts=NUM_EXPERTS, moe_grouped_gemm=True + ) + return moe_spec(config, layer_number=1, pg_collection=pg_collection) + + def run_step(layer, x): + with fp8_autocast(enabled=True, fp8_recipe=recipe): + output, _ = layer(x) + return output.mean() + + # ------------------------------------------------------------------------- + # Phase 1: Baseline — EP=4, EGTP=1 (DP=1) + # ------------------------------------------------------------------------- + ps.destroy_model_parallel() + ps.initialize_model_parallel( + tensor_model_parallel_size=1, + pipeline_model_parallel_size=1, + expert_model_parallel_size=4, + expert_gtp_remat_size=1, + ) + model_parallel_cuda_manual_seed(42) + + pg_collection = ProcessGroupCollection.use_mpu_process_groups(required_pgs=['ep']) + ep_group = pg_collection.ep + num_local_experts_baseline = NUM_EXPERTS // 4 # = 2 + + config = make_config() + layer = make_moe_layer(config, None) # MoELayer uses get_default_pg_collection() + layer.cuda() + + # Verify baseline has no GTP sharding (EGTP=1 should leave plain parameters). + assert not any( + isinstance(p, GTPShardedParam) for p in layer.parameters() + ), "Baseline EP=4 layer should have no GTPShardedParam (EGTP=1)" + + # Synchronize non-expert weights from rank 0; expert weights are rank-local. + for name, p in layer.named_parameters(): + if 'linear_fc1.weight' not in name and 'linear_fc2.weight' not in name: + dist.broadcast(p.data, src=0) + + # Collect the full expert weight table so Phase 2 can restore identical init weights. + # EP=4: each rank holds 2 experts; all-gather gives every rank the complete [8, dim, ...] table. + local_fc1 = torch.stack([ + dict(layer.named_parameters())[f'experts.linear_fc1.weight{i}'].data + for i in range(num_local_experts_baseline) + ]) # [2, FFN_HIDDEN, HIDDEN] + global_fc1 = torch.zeros(NUM_EXPERTS, FFN_HIDDEN, HIDDEN, dtype=dtype, device='cuda') + dist.all_gather_into_tensor(global_fc1, local_fc1, group=ep_group) + + local_fc2 = torch.stack([ + dict(layer.named_parameters())[f'experts.linear_fc2.weight{i}'].data + for i in range(num_local_experts_baseline) + ]) # [2, HIDDEN, FFN_HIDDEN] + global_fc2 = torch.zeros(NUM_EXPERTS, HIDDEN, FFN_HIDDEN, dtype=dtype, device='cuda') + dist.all_gather_into_tensor(global_fc2, local_fc2, group=ep_group) + + # Save non-expert param values (router, norms, etc.) from rank 0. + non_expert_weights = {} + for name, p in layer.named_parameters(): + if 'linear_fc1.weight' not in name and 'linear_fc2.weight' not in name: + non_expert_weights[name] = p.data.clone() + + baseline_losses = [] + for step in range(STEPS): + torch.manual_seed(step) + x = torch.randn(SEQ, BATCH, HIDDEN, dtype=dtype, device='cuda') + dist.broadcast(x, src=0) + + loss = run_step(layer, x) + if rank == 0: + baseline_losses.append(loss.item()) + + loss.backward() + with torch.no_grad(): + for p in layer.parameters(): + if p.grad is not None: + p.data.sub_(LR * p.grad) + p.grad.zero_() + + ps.destroy_model_parallel() + GTPShardedParam._chain_state = {} + FP8GlobalStateManager.reset() + + # ------------------------------------------------------------------------- + # Phase 2: EP=2, EGTP=2 (DP=1 effective) + # ------------------------------------------------------------------------- + ps.initialize_model_parallel( + tensor_model_parallel_size=1, + pipeline_model_parallel_size=1, + expert_model_parallel_size=2, + expert_gtp_remat_size=2, + ) + model_parallel_cuda_manual_seed(42) + + pg_collection = ProcessGroupCollection.use_mpu_process_groups(required_pgs=['expt_gtp']) + egtp_group = pg_collection.expt_gtp + egtp_size = egtp_group.size() + egtp_rank = egtp_group.rank() + ep_rank_egtp = dist.get_rank(ps.get_expert_model_parallel_group()) + num_local_experts_egtp = NUM_EXPERTS // 2 # = 4 + + config = make_config() + # Build full pg_collection for MoELayer: default groups + expt_gtp for EGTP sharding. + moe_pg = get_default_pg_collection() + moe_pg.expt_gtp = egtp_group + layer_egtp = make_moe_layer(config, moe_pg) + layer_egtp.cuda() + + # Verify EGTP is truly active: expert weight params must be GTPShardedParam instances. + egtp_params = [p for p in layer_egtp.parameters() if isinstance(p, GTPShardedParam)] + assert len(egtp_params) > 0, "EGTP is not active: no GTPShardedParam found in EP=2+EGTP=2 layer" + + # Restore weights from saved global tables. + # Expert local index j → global expert id = ep_rank_egtp * num_local_experts_egtp + j. + fc1_shard = FFN_HIDDEN // egtp_size # 2688/2 = 1344 + fc2_shard = HIDDEN // egtp_size # 4096/2 = 2048 + for name, p in layer_egtp.named_parameters(): + if 'linear_fc1.weight' in name: + j = int(name.rsplit('weight', 1)[1]) + gid = ep_rank_egtp * num_local_experts_egtp + j + p.data.copy_(global_fc1[gid, egtp_rank * fc1_shard: (egtp_rank + 1) * fc1_shard]) + elif 'linear_fc2.weight' in name: + j = int(name.rsplit('weight', 1)[1]) + gid = ep_rank_egtp * num_local_experts_egtp + j + p.data.copy_(global_fc2[gid, egtp_rank * fc2_shard: (egtp_rank + 1) * fc2_shard]) + elif name in non_expert_weights: + p.data.copy_(non_expert_weights[name]) + + # Pre-allocate main_grad for EGTP params (required before the first backward). + for p in layer_egtp.parameters(): + if isinstance(p, GTPShardedParam): + p.main_grad = torch.zeros(p.shape, dtype=dtype, device='cuda') + + egtp_losses = [] + for step in range(STEPS): + for p in layer_egtp.parameters(): + if isinstance(p, GTPShardedParam): + p.main_grad.zero_() + + torch.manual_seed(step) + x = torch.randn(SEQ, BATCH, HIDDEN, dtype=dtype, device='cuda') + dist.broadcast(x, src=0) + + loss = run_step(layer_egtp, x) + if rank == 0: + egtp_losses.append(loss.item()) + + loss.backward() + + # After RS, main_grad = egtp_size * dW_shard. Divide by egtp_size to match baseline. + with torch.no_grad(): + for p in layer_egtp.parameters(): + if isinstance(p, GTPShardedParam): + p.data.sub_((LR / egtp_size) * p.main_grad) + elif p.grad is not None: + p.data.sub_(LR * p.grad) + p.grad.zero_() + + ps.destroy_model_parallel() + ps.initialize_model_parallel() + GTPShardedParam._chain_state = {} + + # ------------------------------------------------------------------------- + # Compare per-step loss trajectories on rank 0 + # ------------------------------------------------------------------------- + if rank == 0: + assert len(baseline_losses) == STEPS + assert len(egtp_losses) == STEPS + for step, (lb, le) in enumerate(zip(baseline_losses, egtp_losses)): + print(f"Step {step:2d}: baseline={lb:.6f} egtp={le:.6f}", flush=True) + torch.testing.assert_close( + torch.tensor(egtp_losses), torch.tensor(baseline_losses), atol=1e-5, rtol=1e-5 + ) + + +class TestMoEEGTPCorrectness: + def test_moe_egtp_loss_trajectory_matches_baseline(self): + """EP=2+EGTP=2 MoE per-step losses must match EP=4 baseline: atol=1e-5, rtol=1e-5; MXFP8""" + _requires_mxfp8() + if torch.cuda.device_count() < 4: + pytest.skip("Requires at least 4 CUDA devices") + _run_distributed(_worker_moe_egtp_correctness, 4) diff --git a/tests/unit_tests/generalized_tensor_parallel/test_tp_gtp.py b/tests/unit_tests/generalized_tensor_parallel/test_tp_gtp.py index 88d17328944..38e24710cee 100644 --- a/tests/unit_tests/generalized_tensor_parallel/test_tp_gtp.py +++ b/tests/unit_tests/generalized_tensor_parallel/test_tp_gtp.py @@ -1,31 +1,25 @@ -# Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# See LICENSE for license information. +# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. """Unit tests for combined Tensor Parallelism + Generalized Tensor Parallelism (TP+GTP). -Process group layout (world_size = tp_size × gtp_size): +Process group layout (world_size = tp_size x gtp_size): - rank = gtp_rank × tp_size + tp_rank + rank = gtp_rank x tp_size + tp_rank TP group: all ranks that share the same gtp_rank (size = tp_size) GTP group: all ranks that share the same tp_rank (size = gtp_size) Test groups ----------- -1. TestTPGTPProcessGroups – verify TP/GTP group sizes and rank assignment -2. TestTPGTPColumnParallelLinear – column-parallel Linear: weight shape + fwd/bwd correctness -3. TestTPGTPRowParallelLinear – row-parallel Linear: weight shape + fwd/bwd smoke test -4. TestTPGTPLayerNormLinear – LayerNormLinear column-parallel smoke test +1. TestTPGTPProcessGroups - verify TP/GTP group sizes and rank assignment +2. TestTPGTPColumnParallelLinear - column-parallel Linear: fwd/bwd correctness (weight shape verified inline) +3. TestTPGTPRowParallelLinear - row-parallel Linear: fwd/bwd smoke test + numerical correctness +4. TestTPGTPLayerNormLinear - LayerNormLinear column-parallel smoke test Tests use (tp_size, gtp_size) = (2, 2) → world_size = 4 (runs on 4-GPU machines). -Run via torchrun (matches the rest of Megatron's unit tests): - - torchrun --nproc-per-node 4 -m pytest tests/unit_tests/generalized_tensor_parallel/test_tp_gtp.py -v - Multi-GPU tests skip automatically when ``torch.distributed.get_world_size()`` does not match -the requested combination of tp_size × gtp_size. +the requested combination of tp_size x gtp_size. """ import pytest @@ -38,71 +32,22 @@ pytest.skip("GTP requires TransformerEngine >= 2.17", allow_module_level=True) import transformer_engine.pytorch as te -from transformer_engine.pytorch.quantization import FP8GlobalStateManager from megatron.experimental.gtp import GTPShardedParam -from tests.unit_tests.test_utilities import Utils - -# --------------------------------------------------------------------------- -# Fixtures -# --------------------------------------------------------------------------- - - -@pytest.fixture(scope="module", autouse=True) -def _torchrun_dist_init(): - """Initialize the torchrun-managed dist group once per module. - - TP+GTP tests build TP and GTP subgroups within the world torchrun set - up; ``_run_distributed`` only skips when the required world size - doesn't match what torchrun launched with. - """ - Utils.initialize_model_parallel() - yield - Utils.destroy_model_parallel() - - -@pytest.fixture(autouse=True) -def reset_fp8_state(): - yield - FP8GlobalStateManager.reset() - - -@pytest.fixture(autouse=True) -def reset_gtp_globals(): - """Reset GTP mutable class/module-level state between tests.""" - yield - GTPShardedParam._chain_state = {} - - -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - - -def _run_distributed(fn, required_world_size: int, *args) -> None: - """Run ``fn(rank, world_size, port, *args)`` on every torchrun rank. - - ``port`` is unused (dist already initialized by torchrun) but kept so - existing worker signatures don't need editing. - """ - actual_world_size = torch.distributed.get_world_size() - if actual_world_size != required_world_size: - pytest.skip( - f"Requires world_size={required_world_size}, " - f"got {actual_world_size} (launch with torchrun --nproc-per-node={required_world_size})" - ) - fn(torch.distributed.get_rank(), actual_world_size, None, *args) - - -def _requires_multi_gpu(n: int): - if torch.cuda.device_count() < n: - pytest.skip(f"Requires at least {n} CUDA devices") +from tests.unit_tests.generalized_tensor_parallel.gtp_test_utils import ( + _make_gtp_linear, + _requires_multi_gpu, + _run_distributed, + _torchrun_dist_init, + reset_fp8_state, + reset_gtp_globals, +) def _build_groups(rank: int, world_size: int, tp_size: int, gtp_size: int): """Create TP and GTP process groups for a 2D parallelism grid. - Layout: rank = gtp_rank × tp_size + tp_rank + Layout: rank = gtp_rank x tp_size + tp_rank TP group: contiguous block [gtp_rank*tp_size, (gtp_rank+1)*tp_size) GTP group: strided set {tp_rank, tp_rank+tp_size, tp_rank+2*tp_size, ...} @@ -136,7 +81,7 @@ def _build_groups(rank: int, world_size: int, tp_size: int, gtp_size: int): # --------------------------------------------------------------------------- -# 1. TestTPGTPProcessGroups – group sizes and rank membership +# 1. TestTPGTPProcessGroups - group sizes and rank membership # --------------------------------------------------------------------------- @@ -168,34 +113,6 @@ def test_group_sizes_and_ranks(self, tp_size, gtp_size): # --------------------------------------------------------------------------- -def _worker_column_shape(rank, world_size, port, tp_size, gtp_size): - """Column-parallel: weight shape must be [out_f/(tp_size*gtp_size), in_f].""" - tp_group, gtp_group, _, _ = _build_groups(rank, world_size, tp_size, gtp_size) - - in_f = 64 - out_f = tp_size * gtp_size * 32 # per-rank shard = 32 rows - - layer = te.Linear( - in_features=in_f, - out_features=out_f, - parallel_mode="column", - bias=False, - params_dtype=torch.bfloat16, - device="cuda", - tp_group=tp_group, - gtp_group=gtp_group, - ) - - expected_rows = out_f // (tp_size * gtp_size) - assert isinstance( - layer.weight, GTPShardedParam - ), f"rank {rank}: weight should be GTPShardedParam" - assert layer.weight.shape == ( - expected_rows, - in_f, - ), f"rank {rank}: expected ({expected_rows}, {in_f}), got {layer.weight.shape}" - - def _worker_column_correctness(rank, world_size, port, tp_size, gtp_size): """Column-parallel output must equal inp @ (GTP-gathered TP-local weight)^T.""" torch.manual_seed(0) @@ -205,16 +122,7 @@ def _worker_column_correctness(rank, world_size, port, tp_size, gtp_size): out_f = tp_size * gtp_size * 32 # per-rank shard = 32 rows dtype = torch.bfloat16 - layer = te.Linear( - in_features=in_f, - out_features=out_f, - parallel_mode="column", - bias=False, - params_dtype=dtype, - device="cuda", - tp_group=tp_group, - gtp_group=gtp_group, - ) + layer = _make_gtp_linear(in_f, out_f, gtp_group, dtype, parallel_mode="column", tp_group=tp_group) # All-gather GTP shards → TP-local full weight [out_f/tp_size, in_f] shard = layer.weight.data.clone() @@ -239,7 +147,7 @@ def _worker_column_correctness(rank, world_size, port, tp_size, gtp_size): ref = inp.float() @ tp_local_weight.T ref = ref.to(dtype) assert torch.allclose( - out.float(), ref.float(), atol=0.1, rtol=0.1 + out.float(), ref.float(), atol=1e-2, rtol=1e-2 ), f"rank {rank}: output mismatch, max_diff={(out.float() - ref.float()).abs().max():.4f}" # Backward: dX is all-reduced across TP group internally by TE @@ -253,12 +161,6 @@ def _worker_column_correctness(rank, world_size, port, tp_size, gtp_size): class TestTPGTPColumnParallelLinear: - @pytest.mark.parametrize("tp_size,gtp_size", [(2, 2)]) - def test_weight_shape(self, tp_size, gtp_size): - world_size = tp_size * gtp_size - _requires_multi_gpu(world_size) - _run_distributed(_worker_column_shape, world_size, tp_size, gtp_size) - @pytest.mark.parametrize("tp_size,gtp_size", [(2, 2)]) def test_forward_backward_correctness(self, tp_size, gtp_size): world_size = tp_size * gtp_size @@ -271,23 +173,17 @@ def test_forward_backward_correctness(self, tp_size, gtp_size): # --------------------------------------------------------------------------- -def _worker_row_shape(rank, world_size, port, tp_size, gtp_size): - """Row-parallel: weight shape must be [out_f/gtp_size, in_f/tp_size].""" - tp_group, gtp_group, _, _ = _build_groups(rank, world_size, tp_size, gtp_size) +def _worker_row_forward_backward(rank, world_size, port, tp_size, gtp_size): + """Row-parallel: weight shape verified; output is all-reduced [batch, out_f]; backward produces finite dX.""" + torch.manual_seed(0) + tp_group, gtp_group, tp_rank, _ = _build_groups(rank, world_size, tp_size, gtp_size) - in_f = tp_size * 64 # TE divides by tp_size → local in_f = 64 - out_f = gtp_size * 64 # GTP divides by gtp_size → local out_f = 64 + batch = 16 + in_f = tp_size * 64 # full in_features + out_f = gtp_size * 64 # full out_features + dtype = torch.bfloat16 - layer = te.Linear( - in_features=in_f, - out_features=out_f, - parallel_mode="row", - bias=False, - params_dtype=torch.bfloat16, - device="cuda", - tp_group=tp_group, - gtp_group=gtp_group, - ) + layer = _make_gtp_linear(in_f, out_f, gtp_group, dtype, parallel_mode="row", tp_group=tp_group) expected_shape = (out_f // gtp_size, in_f // tp_size) assert isinstance( @@ -297,28 +193,6 @@ def _worker_row_shape(rank, world_size, port, tp_size, gtp_size): layer.weight.shape == expected_shape ), f"rank {rank}: expected {expected_shape}, got {layer.weight.shape}" - -def _worker_row_forward_backward(rank, world_size, port, tp_size, gtp_size): - """Row-parallel: output is all-reduced [batch, out_f]; backward produces finite dX.""" - torch.manual_seed(0) - tp_group, gtp_group, tp_rank, _ = _build_groups(rank, world_size, tp_size, gtp_size) - - batch = 16 - in_f = tp_size * 64 # full in_features - out_f = gtp_size * 64 # full out_features - dtype = torch.bfloat16 - - layer = te.Linear( - in_features=in_f, - out_features=out_f, - parallel_mode="row", - bias=False, - params_dtype=dtype, - device="cuda", - tp_group=tp_group, - gtp_group=gtp_group, - ) - # Row-parallel: each TP rank takes the corresponding slice of in_f full_inp = torch.randn(batch, in_f, dtype=dtype, device="cuda") dist.broadcast(full_inp, src=0) @@ -344,23 +218,14 @@ def _worker_row_forward_backward(rank, world_size, port, tp_size, gtp_size): def _worker_row_correctness(rank, world_size, port, tp_size, gtp_size): """Row-parallel all-reduced output must equal inp_full @ full_weight^T.""" torch.manual_seed(0) - tp_group, gtp_group, tp_rank, gtp_rank = _build_groups(rank, world_size, tp_size, gtp_size) + tp_group, gtp_group, tp_rank, _ = _build_groups(rank, world_size, tp_size, gtp_size) batch = 16 in_f = tp_size * 64 out_f = gtp_size * 64 dtype = torch.bfloat16 - layer = te.Linear( - in_features=in_f, - out_features=out_f, - parallel_mode="row", - bias=False, - params_dtype=dtype, - device="cuda", - tp_group=tp_group, - gtp_group=gtp_group, - ) + layer = _make_gtp_linear(in_f, out_f, gtp_group, dtype, parallel_mode="row", tp_group=tp_group) # Reconstruct full weight: all-gather GTP shards → TP-local, then all-gather TP shards shard = layer.weight.data.clone() @@ -385,17 +250,11 @@ def _worker_row_correctness(rank, world_size, port, tp_size, gtp_size): ref = full_inp.float() @ full_weight.T ref = ref.to(dtype) assert torch.allclose( - out.float(), ref.float(), atol=0.1, rtol=0.1 + out.float(), ref.float(), atol=2e-2, rtol=1e-2 ), f"rank {rank}: output mismatch, max_diff={(out.float() - ref.float()).abs().max():.4f}" class TestTPGTPRowParallelLinear: - @pytest.mark.parametrize("tp_size,gtp_size", [(2, 2)]) - def test_weight_shape(self, tp_size, gtp_size): - world_size = tp_size * gtp_size - _requires_multi_gpu(world_size) - _run_distributed(_worker_row_shape, world_size, tp_size, gtp_size) - @pytest.mark.parametrize("tp_size,gtp_size", [(2, 2)]) def test_forward_backward(self, tp_size, gtp_size): world_size = tp_size * gtp_size @@ -410,7 +269,7 @@ def test_forward_correctness(self, tp_size, gtp_size): # --------------------------------------------------------------------------- -# 4. TestTPGTPLayerNormLinear – column-parallel smoke test +# 4. TestTPGTPLayerNormLinear - column-parallel smoke test # --------------------------------------------------------------------------- From 46952174bb695af4308e133bbd640ff8bab9190d Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Wed, 10 Jun 2026 01:56:25 -0700 Subject: [PATCH 28/59] fix comments from Jimmy and Deepak Signed-off-by: Shiqing Fan --- .../core/distributed/finalize_model_grads.py | 26 +------ megatron/core/transformer/cuda_graphs.py | 69 +++++++------------ megatron/experimental/gtp/README.md | 2 +- megatron/experimental/gtp/__init__.py | 6 +- .../gtp/generalized_tensor_parallelism.py | 34 ++++----- 5 files changed, 48 insertions(+), 89 deletions(-) diff --git a/megatron/core/distributed/finalize_model_grads.py b/megatron/core/distributed/finalize_model_grads.py index 408b29c463d..a0fc914466d 100644 --- a/megatron/core/distributed/finalize_model_grads.py +++ b/megatron/core/distributed/finalize_model_grads.py @@ -491,34 +491,14 @@ def finalize_model_grads( pos_emb_group = parallel_state.get_position_embedding_group(check_initialized=False) dp_cp_group = parallel_state.get_data_parallel_group(with_context_parallel=True) - # Drain any in-flight GTP reduce-scatters on rs_stream before the DP gradient sync. - # Expert backward runs eagerly (not in CUDA graphs), so its GTP RS operations on - # rs_stream may still be writing to main_grad when finish_grad_sync starts the DP - # allreduce on main_stream. + # Fence the current stream against all GTP backward grad work before the DP gradient sync. if ( config.generalized_tensor_parallel_remat_size > 1 or config.expert_generalized_tensor_parallel_remat_size > 1 ): - from megatron.experimental.gtp import ( - get_all_ag_streams, - get_all_rs_streams, - wait_async_comms, - ) - - wait_async_comms() - for s in get_all_ag_streams(): - torch.cuda.current_stream().wait_stream(s) - for s in get_all_rs_streams(): - torch.cuda.current_stream().wait_stream(s) - - # Wait for captured bwd Phase 2 (main_grad.add_) on each CG runner's - # stream. bwd_completion_event only covers Phase 1; Phase 2 runs after - # it on runner.stream with no other sync to main_stream. - if config.generalized_tensor_parallel_remat_size > 1: - from megatron.core.transformer.cuda_graphs import get_gtp_phase2_completion_events + from megatron.experimental.gtp import wait_gtp_grads_on_current_stream - for evt in get_gtp_phase2_completion_events(): - torch.cuda.current_stream().wait_event(evt) + wait_gtp_grads_on_current_stream() # All-reduce / reduce-scatter across DP replicas. if config.timers is not None: diff --git a/megatron/core/transformer/cuda_graphs.py b/megatron/core/transformer/cuda_graphs.py index ff0f7deb27f..eed65288f20 100644 --- a/megatron/core/transformer/cuda_graphs.py +++ b/megatron/core/transformer/cuda_graphs.py @@ -765,32 +765,12 @@ def backward(ctx, *grads): for param, grad_added in runner.groundtruth_grad_added_to_main_grad.items(): param.grad_added_to_main_grad = grad_added - # Fire DDP grad-ready hooks for GTP params whose main_grad.add_ was - # captured in this runner's bwd_graph. DDP's autograd hook returns - # early under is_graph_capturing and doesn't re-run from Python at - # replay, so trigger it explicitly here to let DDP RS overlap with - # the rest of backward. See _compute_finalized_during_bwd_capture - # for how the set is built. - # - # Fire on rs_stream (GRAPHED chain, GTP group) — the stream that - # ran the captured main_grad.add_(wgrad_rs). Stream FIFO orders the - # hook (check_grads' grad_data.norm and DDP-RS preEvent record) - # after that write. wait_event(bwd_phase2_completion_event) is - # defensive against future Phase 2 work on other sub-streams. - # main_stream stays unblocked so the next runner can start in - # parallel. - if runner.gtp_remat and runner.finalized_during_bwd_capture: - # Partition by (chain, group): dense vs EGTP use different NCCL - # comms / rs_streams. Fire each hook on the rs_stream that ran - # its captured wgrad-RS so FIFO orders DDP-RS after that write. - dense_group = parallel_state.get_generalized_tensor_parallel_remat_group() - expert_group = parallel_state.get_expert_generalized_tensor_parallel_remat_group() - params_by_group = defaultdict(list) - for param in runner.finalized_during_bwd_capture: - is_expert = not getattr(param, 'allreduce', True) - params_by_group[expert_group if is_expert else dense_group].append(param) - for group, params in params_by_group.items(): - gtp_rs_stream = get_rs_stream(GTPChain.GRAPHED.value, group) + # DDP's grad-ready hook is silenced during capture and not re-fired at replay, so fire it + # here to let DDP RS overlap backward. Fire on each param's rs_stream (the one that ran its + # captured main_grad.add_) so FIFO orders DDP-RS after that write; wait_event guards + # cross-substream Phase 2. Plan precomputed in create_bwd_graph; main_stream stays free. + if runner.gtp_remat: + for gtp_rs_stream, params in runner._gtp_finalize_hook_plan: gtp_rs_stream.wait_event(runner.bwd_phase2_completion_event) with torch.cuda.stream(gtp_rs_stream): for param in params: @@ -861,6 +841,8 @@ def __init__( # graph. Used in Graphed.backward's post-replay hook loop to fire DDP hooks only in the # graph whose replay populates main_grad. self.finalized_during_bwd_capture = [] + # (rs_stream, params) DDP grad-ready hook plan; built in create_bwd_graph. + self._gtp_finalize_hook_plan = [] self.grad_enabled = need_backward and torch.is_grad_enabled() self.func = super(MegatronModule, self.base_module).__call__ if func is None else func @@ -887,12 +869,11 @@ def __init__( self.fp4_runtime_enabled = None self.gtp_remat = self.base_module.config.generalized_tensor_parallel_remat_size > 1 - # Ensure internal warmup (inside create_fwd_graph) has >= 2 steps - # for GTP: 1st builds chain + tickets, 2nd exercises prefetch path. if self.gtp_remat: + # Ensure internal warmup (inside create_fwd_graph) has >= 2 steps + # for GTP: 1st builds chain + tickets, 2nd exercises prefetch path. self.num_warmup_steps = max(self.num_warmup_steps, 2) - if self.gtp_remat: self.use_stream = True self.stream = torch.cuda.Stream() self.fwd_completion_event = torch.cuda.Event(external=True, interprocess=True) @@ -1290,19 +1271,6 @@ def create_bwd_graph(self): # to 0 when activation checkpointing is used. See [interaction with recompute]. global bwd_buffer_reuse_ref_count - # Tag cross-graph chain-tail GTP params: those whose prev_w lives in - # another runner's params_to_backprop. Read by TE's wgrad_reduce_scatter - # cascade and wait_async_comms to split the captured RS wait/add across - # producer and consumer graphs (avoids cross-capture cudaStreamWaitEvent - # on c10d Work.postEvent). - if self.gtp_remat: - pset = {id(p) for p in self.params_to_backprop} - for p in self.params_to_backprop: - if not getattr(p, 'is_gtp', False): - continue - prev_w = getattr(p, "prev_w", None) - p._is_cross_graph_tail = prev_w is not None and id(prev_w) not in pset - assert self.grad_enabled self.bwd_graph = torch.cuda.CUDAGraph() @@ -1335,8 +1303,6 @@ def create_bwd_graph(self): out_grad.requires_grad = True self.static_grad_outputs.append(out_grad) - torch.cuda.synchronize() - # Freeze GC, to speed up capture time ~15-20x. if FREEZE_GC: gc.freeze() @@ -1411,6 +1377,21 @@ def create_bwd_graph(self): self._compute_finalized_during_bwd_capture() if self.gtp_remat else [] ) + # Precompute the (rs_stream, params) DDP grad-ready hook plan once — it's + # replay-invariant — so Graphed.backward avoids per-replay group lookups. + self._gtp_finalize_hook_plan = [] + if self.gtp_remat and self.finalized_during_bwd_capture: + dense_group = parallel_state.get_generalized_tensor_parallel_remat_group() + expert_group = parallel_state.get_expert_generalized_tensor_parallel_remat_group() + params_by_group = defaultdict(list) + for param in self.finalized_during_bwd_capture: + is_expert = not getattr(param, 'allreduce', True) + params_by_group[expert_group if is_expert else dense_group].append(param) + self._gtp_finalize_hook_plan = [ + (get_rs_stream(GTPChain.GRAPHED.value, group), params) + for group, params in params_by_group.items() + ] + # Constructs a tuple suitable for returning from Graphed.backward: # Pads out the actually-needed grads with Nones in gradient slots for inputs # that don't require grad diff --git a/megatron/experimental/gtp/README.md b/megatron/experimental/gtp/README.md index d27c3783b78..d7806c9d749 100644 --- a/megatron/experimental/gtp/README.md +++ b/megatron/experimental/gtp/README.md @@ -198,7 +198,7 @@ update_gtp_config( TransformerEngine owns the linear primitives (`Linear` / `LayerNormLinear` / `LayerNormMLP` / `GroupedLinear`) and the low-precision tensor types (FP8 / MXFP8 / NVFP4). Megatron-LM owns the GTP scheduling state — the prefetch chain, the ticket-based buffer cache, the per-param AG/RS state machines, the GRAPHED/UNGRAPHED chain split, and the DDP integration. The two are bridged by: -1. The `gtp_group` kwarg that Mcore's `extensions/transformer_engine.py` threads into the TE constructors when `is_te_min_version("2.15.0")`. +1. The `gtp_group` kwarg that Mcore's `extensions/transformer_engine.py` threads into the TE constructors when `is_te_min_version("2.17.0")`. 2. The hook registry (`register_gtp_hooks`), called by TE's `module/base.py` at `reset_parameters` time to slice each weight into a `GTPShardedParam` along `out_features`. 3. The `_register_gtp_side_streams` / drain calls that synchronize TE's quantize + GEMM kernels with the side stream that owns the AG/RS NCCL ops. diff --git a/megatron/experimental/gtp/__init__.py b/megatron/experimental/gtp/__init__.py index 8fb8a307275..9798bb514e2 100644 --- a/megatron/experimental/gtp/__init__.py +++ b/megatron/experimental/gtp/__init__.py @@ -20,14 +20,13 @@ GTPShardedParam, classify_gtp_chains, get_ag_stream, - get_all_ag_streams, - get_all_rs_streams, get_rs_stream, set_cuda_graph_mempool, set_cuda_graph_modules, tag_gtp_params_with_names, update_gtp_config, wait_async_comms, + wait_gtp_grads_on_current_stream, wrap_module_params_gtp, ) @@ -44,13 +43,12 @@ "GTPShardedParam", "classify_gtp_chains", "get_ag_stream", - "get_all_ag_streams", - "get_all_rs_streams", "get_rs_stream", "set_cuda_graph_mempool", "set_cuda_graph_modules", "tag_gtp_params_with_names", "update_gtp_config", "wait_async_comms", + "wait_gtp_grads_on_current_stream", "wrap_module_params_gtp", ] diff --git a/megatron/experimental/gtp/generalized_tensor_parallelism.py b/megatron/experimental/gtp/generalized_tensor_parallelism.py index afad2977ce0..6b41b7fc6e9 100644 --- a/megatron/experimental/gtp/generalized_tensor_parallelism.py +++ b/megatron/experimental/gtp/generalized_tensor_parallelism.py @@ -255,24 +255,24 @@ def get_rs_stream(chain_id: str = GTPChain.GRAPHED.value, group=None) -> torch.c return _RS_STREAMS[key] -def get_all_ag_streams() -> list: - """All AG streams created so far, across chains and groups.""" - return list(_AG_STREAMS.values()) +def wait_gtp_grads_on_current_stream() -> None: + """Fence the current stream against all GTP backward grad work before the DP gradient sync. - -def get_all_rs_streams() -> list: - """All RS streams created so far, across chains and groups.""" - return list(_RS_STREAMS.values()) - - -def get_ag_streams_for_chain(chain_id: str) -> list: - """AG streams for one chain (all groups that chain has touched).""" - return [s for k, s in _AG_STREAMS.items() if k[0] == chain_id] - - -def get_rs_streams_for_chain(chain_id: str) -> list: - """RS streams for one chain (all groups that chain has touched).""" - return [s for k, s in _RS_STREAMS.items() if k[0] == chain_id] + Drains in-flight AG/RS on the side streams (eager expert backward may still be writing + main_grad) and waits on each CUDA-graph runner's captured dense-GTP bwd Phase 2 + (main_grad.add_) completion event. No-op when GTP is inactive (empty streams / events). + """ + wait_async_comms() + cur = torch.cuda.current_stream() + for s in _AG_STREAMS.values(): + cur.wait_stream(s) + for s in _RS_STREAMS.values(): + cur.wait_stream(s) + # Local import: cuda_graphs imports this module, so a module-level import would be circular. + from megatron.core.transformer.cuda_graphs import get_gtp_phase2_completion_events + + for evt in get_gtp_phase2_completion_events(): + cur.wait_event(evt) # NOTE: Coalesced amax reduction across the GTP group is deferred to a follow-up From 3725f512324e6d24f21414f9b6683e5c70a70b03 Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Thu, 11 Jun 2026 10:08:43 -0700 Subject: [PATCH 29/59] feat: make (E)GTP a first-class orthogonal parallelism axis - Build GTP/EGTP as orthogonal sub-axes of the rank grid (world_size = TP*GTP*CP*DP; expert grid = ETP*EP*PP*EGTP*expert_dp) in parallel_state - Keep DDP and the distributed optimizer GTP-agnostic - Compute grad-norm / clip over the full-world grad-stats group - Add end-to-end correctness tests - Update the DDP x (E)GTP interaction in README Signed-off-by: Shiqing Fan --- .../distributed/distributed_data_parallel.py | 229 ++------ .../core/distributed/finalize_model_grads.py | 53 ++ megatron/core/optimizer/__init__.py | 105 +--- megatron/core/optimizer/distrib_optimizer.py | 17 + megatron/core/optimizer/optimizer.py | 94 +-- megatron/core/parallel_state.py | 393 +++++++------ megatron/experimental/gtp/README.md | 69 ++- .../gtp/images/0527_ddp_param_bucketing.png | Bin 196594 -> 0 bytes .../0611_ddp_egtp_orthogonal_bucketing.png | Bin 0 -> 187192 bytes megatron/training/initialize.py | 2 + megatron/training/utils/common_utils.py | 5 +- .../test_attention_gtp.py | 24 +- .../generalized_tensor_parallel/test_gtp.py | 6 +- .../test_gtp_grad_correctness.py | 541 ++++++++++++++++++ .../test_gtp_loss_correctness.py | 197 +++++++ .../test_mamba_gtp.py | 11 +- .../test_moe_egtp.py | 30 +- .../test_tp_gtp.py | 4 +- 18 files changed, 1222 insertions(+), 558 deletions(-) delete mode 100644 megatron/experimental/gtp/images/0527_ddp_param_bucketing.png create mode 100644 megatron/experimental/gtp/images/0611_ddp_egtp_orthogonal_bucketing.png create mode 100644 tests/unit_tests/generalized_tensor_parallel/test_gtp_grad_correctness.py create mode 100644 tests/unit_tests/generalized_tensor_parallel/test_gtp_loss_correctness.py diff --git a/megatron/core/distributed/distributed_data_parallel.py b/megatron/core/distributed/distributed_data_parallel.py index 9208b9a0245..bb0b06b7d1b 100644 --- a/megatron/core/distributed/distributed_data_parallel.py +++ b/megatron/core/distributed/distributed_data_parallel.py @@ -97,6 +97,23 @@ def __init__( self.intra_expt_dp_with_egtp_group = process_group_dict.get( 'intra_expt_dp_with_egtp_group', self.intra_expt_dp_group ) + # DDP treats GTP shards as ordinary params reduced over the replicate (gtp/egtp- + # EXCLUDED) DP group — the *_with_gtp groups, which alias the regular DP groups when GTP + # is inactive — with the standard 1/full scaling and no gtp-specific buffers or factor. + # The gtp axis is completed elsewhere: GTP shards by their reduce-scatter sum; replicated + # (non-GTP) params by a SUM all-reduce in finalize_model_grads. This is correct because + # the non-averaged scaling is 1/full = 1/(replicate*gtp), so SUM-over-replicate (DDP) + + # SUM-over-gtp (RS or finalize) * (1/full) = mean over the full DP*GTP domain. + # GTP is "active" when those replicate groups are strictly smaller than the full DP groups. + gtp_active = ( + self.dp_cp_with_gtp_group.size() != self.dp_cp_group.size() + or self.expt_dp_with_egtp_group.size() != self.expt_dp_group.size() + ) + if gtp_active and self.ddp_config.average_in_collective: + raise NotImplementedError( + "Orthogonal GTP currently supports average_in_collective=False (the default); " + "averaged collectives would need per-buffer 1/gtp scaling." + ) self.tp_group = process_group_dict['tp_group'] self.pp_group = process_group_dict['pp_group'] self.ep_group = process_group_dict['ep_group'] @@ -124,8 +141,6 @@ def __init__( param_to_name = {} self.params_with_grad = [] all_params = [] - gtp_params = [] - egtp_params = [] for name, param in self.module.named_parameters(): if not param.requires_grad: continue @@ -136,35 +151,13 @@ def __init__( param.grad_added_to_main_grad = False param_to_name[param] = name - # GTPShardedParam comes in two flavors. Both need the GTP-peer-excluded RS - # group because GTP's bwd already RS'd over the (E)GTP axis: - # - dense GTP (allreduce=True ) → gtp_params → intra_dp_cp_with_gtp_group - # - expert GTP (allreduce=False) → egtp_params → intra_expt_dp_with_egtp_group - # Non-GTP expert params (biases, LayerNorms inside experts, etc.) are - # REPLICATED across EGTP peers and stay in all_params — their expert branch - # reduces over the FULL intra_expt_dp_group at line 263. - is_gtp_shard = getattr(param, 'is_gtp', False) - is_expert = not getattr(param, 'allreduce', True) - if is_gtp_shard and not is_expert: - gtp_params.append(param) - elif is_gtp_shard and is_expert: - egtp_params.append(param) - else: - all_params.append(param) + # GTP shards own their 1/gtp via the GTP reduce-scatter; DDP reduces every param + # (incl. GTP/EGTP shards) over the gtp/egtp-EXCLUDED replicate group like ordinary + # params. Dense-vs-expert grouping happens below via buffer_key.is_expert_parallel. + all_params.append(param) - # Group parameters by (param_dtype, grad_dtype, is_expert_parallel). (E)GTP - # params are grouped into separate buffer sets (RS groups chosen below). + # Group parameters by (param_dtype, grad_dtype, is_expert_parallel). buffer_groups = group_params_for_buffers(all_params, self.ddp_config.grad_reduce_in_fp32) - gtp_buffer_groups = ( - group_params_for_buffers(gtp_params, self.ddp_config.grad_reduce_in_fp32) - if gtp_params - else {} - ) - egtp_buffer_groups = ( - group_params_for_buffers(egtp_params, self.ddp_config.grad_reduce_in_fp32) - if egtp_params - else {} - ) # Auto-compute layouts when using distributed optimizer but no layout was provided. # This maintains backward compatibility for callers that create DDP directly @@ -180,29 +173,27 @@ def __init__( ) from ..optimizer.distrib_optimizer import DistributedOptimizer + # Buffers reduce/shard over the gtp/egtp-EXCLUDED replicate group, so the layout + # padding uses the replicate group sizes (these alias the full DP groups when GTP is + # inactive). + dp_layout_size = self.intra_dp_cp_with_gtp_group.size() + edp_layout_size = self.intra_expt_dp_with_egtp_group.size() full_param_layout = DistributedOptimizer.compute_full_param_layout( all_params, self.bucket_size, - self.intra_dp_cp_group.size(), + dp_layout_size, self.ddp_config, - expert_data_parallel_world_size=self.intra_expt_dp_group.size(), + expert_data_parallel_world_size=edp_layout_size, ) # When a full_param_layout is provided, verify that the grouping is consistent # with the layout (same buffer keys, same params per key, same param_indices). - # (E)GTP shares a BufferKey with non-GTP params of the same dtype, so keys that - # also appear in the gtp/egtp groups diverge from the caller's (non-carved) - # layout — skip those, and skip the exact key-set check when any carve-out ran. if full_param_layout is not None: - carved_keys = set(gtp_buffer_groups.keys()) | set(egtp_buffer_groups.keys()) - if not carved_keys: - assert set(buffer_groups.keys()) == set(full_param_layout.layouts.keys()), ( - f"Buffer keys from param grouping {set(buffer_groups.keys())} do not match " - f"full_param_layout keys {set(full_param_layout.layouts.keys())}" - ) + assert set(buffer_groups.keys()) == set(full_param_layout.layouts.keys()), ( + f"Buffer keys from param grouping {set(buffer_groups.keys())} do not match " + f"full_param_layout keys {set(full_param_layout.layouts.keys())}" + ) for buffer_key, (params, param_indices) in buffer_groups.items(): - if buffer_key in carved_keys: - continue layout = full_param_layout.layouts[buffer_key] assert set(params) == set( layout.param_index_map.keys() @@ -220,8 +211,6 @@ def __init__( ), "Cannot average in collective when calculating per-token loss!" gradient_scaling_factor = 1.0 expert_gradient_scaling_factor = 1.0 - gtp_gradient_scaling_factor = 1.0 - egtp_gradient_scaling_factor = 1.0 else: # The goal is to scale reduced gradients by 1/dp_size. # This can be achieved in two ways: @@ -246,43 +235,31 @@ def __init__( if self.ddp_config.average_in_collective: gradient_scaling_factor = 1.0 expert_gradient_scaling_factor = self.expt_dp_group.size() / self.dp_cp_group.size() - # (E)GTP pre-scale = (collective_size) / dp_cp_size so post-collective - # grad lands at 1/dp_cp_size. Each divisor must reference the same group - # the RS fires on (lines 341 for GTP, 372 for EGTP). - gtp_gradient_scaling_factor = ( - self.intra_dp_cp_with_gtp_group.size() / self.dp_cp_group.size() - ) - egtp_gradient_scaling_factor = ( - self.intra_expt_dp_with_egtp_group.size() / self.dp_cp_group.size() - ) else: data_parallel_world_size = self.dp_cp_group.size() gradient_scaling_factor = 1.0 / data_parallel_world_size expert_gradient_scaling_factor = 1.0 / data_parallel_world_size - gtp_gradient_scaling_factor = 1.0 / data_parallel_world_size - egtp_gradient_scaling_factor = 1.0 / data_parallel_world_size # Allocate buffers for each group. self.buffers = [] self.expert_parallel_buffers = [] - self.gtp_buffers = [] - self.egtp_buffers = [] pg_collection = ProcessGroupCollection(tp=self.tp_group, dp_cp=self.dp_cp_group) - # Grad RS for every buffer (expert / dense non-GTP here, dense GTP at line 328) - # uses a per-distopt-instance partial group. Cross-instance sync runs separately + # Grad RS uses a per-distopt-instance partial group. Cross-instance sync runs separately # via inter_dist_opt_group during optim.step(); reducing cross-instance grads # here would mix independent data slices. for buffer_key, (params, param_indices) in buffer_groups.items(): if buffer_key.is_expert_parallel: - # Non-GTP expert params (biases, expert-scoped LayerNorms, etc.) are - # replicated across EGTP peers, so reduce over the FULL intra_expt_dp - # group (includes EGTP peers). EGTP-sharded routed experts are carved - # into egtp_buffer_groups below and use the EGTP-peer-excluded group. - data_parallel_group = self.intra_expt_dp_group + # Every expert param (incl. EGTP shards) reduces over the EGTP-peer-EXCLUDED + # replicate group; the egtp axis is handled by GTP's reduce-scatter (shards) or + # the finalize all-reduce (replicated params). Aliases the full expert-DP group + # when EGTP is inactive. + data_parallel_group = self.intra_expt_dp_with_egtp_group scaling_factor = expert_gradient_scaling_factor else: - data_parallel_group = self.intra_dp_cp_group + # Dense params (incl. GTP shards) reduce over the gtp-EXCLUDED replicate group + # (aliases the full DP group when GTP is inactive). + data_parallel_group = self.intra_dp_cp_with_gtp_group scaling_factor = gradient_scaling_factor if not config.calculate_per_token_loss: @@ -303,9 +280,10 @@ def __init__( else: assert scaling_factor == target_gradient_scaling_factor - # With GTP: full_param_layout contains stray GTP entries not in this buffer, - # so recompute a fresh padded layout to avoid KeyErrors and bucket misalignment. - if full_param_layout is not None and not gtp_params: + # With GTP active, a caller-provided full_param_layout is sized for the FULL (gtp- + # inclusive) DP group, but the merged buffer reduces/shards over the REPLICATE group, + # so ignore it and recompute the layout with the replicate-group size below. + if full_param_layout is not None and not gtp_active: param_layout = full_param_layout.layouts.get(buffer_key) elif self.ddp_config.use_distributed_optimizer: from ..optimizer.distrib_optimizer import DistributedOptimizer @@ -339,75 +317,6 @@ def __init__( else: self.buffers.append(buffer) - # GTP-sharded params have already been RS'd over the GTP axis by GTP itself, - # so DDP must use the GTP-peer-excluded group here. full_param_layout is not - # applied to GTP buffers (GTP manages its own sharding). - for buffer_key, (params, param_indices) in gtp_buffer_groups.items(): - params_with_names = [(p, param_to_name[p]) for p in params] - if self.ddp_config.use_distributed_optimizer: - # Pad bucket ends to intra_dp_cp_with_gtp_group.size() for dist-opt alignment. - from ..optimizer.distrib_optimizer import DistributedOptimizer - - gtp_layout = DistributedOptimizer._compute_per_buffer_param_layout( - params, - self.bucket_size, - self.intra_dp_cp_with_gtp_group.size(), - self.ddp_config, - param_indices, - ) - else: - gtp_layout = None - buffer = _ParamAndGradBuffer( - self.ddp_config, - buffer_key.param_dtype, - buffer_key.grad_dtype, - params_with_names, - self.intra_dp_cp_with_gtp_group, - self.bucket_size, - param_to_name, - gtp_gradient_scaling_factor, - param_indices, - self.ddp_config.nccl_ub, - pg_collection, - param_layout=gtp_layout, - ) - self.gtp_buffers.append(buffer) - - # EGTP-sharded routed experts: same story as dense GTP but on the expert side — - # their grads were RS'd over the EGTP axis by GTP, so the DP reduction here must - # exclude EGTP peers (intra_expt_dp_with_egtp_group) and use the matching - # egtp_gradient_scaling_factor (numerator = collective size). Non-GTP expert - # params took the full intra_expt_dp_group branch above. - for buffer_key, (params, param_indices) in egtp_buffer_groups.items(): - params_with_names = [(p, param_to_name[p]) for p in params] - if self.ddp_config.use_distributed_optimizer: - from ..optimizer.distrib_optimizer import DistributedOptimizer - - egtp_layout = DistributedOptimizer._compute_per_buffer_param_layout( - params, - self.bucket_size, - self.intra_expt_dp_with_egtp_group.size(), - self.ddp_config, - param_indices, - ) - else: - egtp_layout = None - buffer = _ParamAndGradBuffer( - self.ddp_config, - buffer_key.param_dtype, - buffer_key.grad_dtype, - params_with_names, - self.intra_expt_dp_with_egtp_group, - self.bucket_size, - param_to_name, - egtp_gradient_scaling_factor, - param_indices, - self.ddp_config.nccl_ub, - pg_collection, - param_layout=egtp_layout, - ) - self.egtp_buffers.append(buffer) - # In some scenarios, we want to put buckets from different buffers into a group so that # their communication can be aggregated. For example, when there are both fp8 buffers # and bf16 buffers in the model and vpp is enabled, each model chunk will have an fp8 @@ -431,47 +340,20 @@ def __init__( self.ddp_config.reduce_scatter_with_fp32_accumulation ), ) - self.gtp_bucket_groups = partition_buckets( - self.gtp_buffers, - force_single_bucket_group=disable_bucketing, - reduce_scatter_with_fp32_accumulation=( - self.ddp_config.reduce_scatter_with_fp32_accumulation - ), - ) - self.egtp_bucket_groups = partition_buckets( - self.egtp_buffers, - force_single_bucket_group=disable_bucketing, - reduce_scatter_with_fp32_accumulation=( - self.ddp_config.reduce_scatter_with_fp32_accumulation - ), - ) - # Flat view across all four bucket-group lists; used wherever - # callers need to iterate every bucket group regardless of dense / - # expert-parallel / GTP / EGTP category. The per-category lists above are - # kept for code paths that need per-category state (e.g. one + # Flat view across the bucket-group lists; used wherever callers need to iterate every + # bucket group regardless of dense / expert-parallel category. The per-category lists + # above are kept for code paths that need per-category state (e.g. one # communication_stream per category). - self.all_bucket_groups = ( - self.bucket_groups - + self.expert_parallel_bucket_groups - + self.gtp_bucket_groups - + self.egtp_bucket_groups - ) + self.all_bucket_groups = self.bucket_groups + self.expert_parallel_bucket_groups # Same flat-view convenience for the underlying buffers (lifecycle ops # like reset/offload/reload iterate over every buffer once). - self.all_buffers = ( - self.buffers + self.expert_parallel_buffers + self.gtp_buffers + self.egtp_buffers - ) + self.all_buffers = self.buffers + self.expert_parallel_buffers if self.ddp_config.num_distributed_optimizer_instances > 1: assert ( self.ddp_config.use_distributed_optimizer ), 'Partial DistOpt cannot be used without DistOpt' - for bucket_groups in [ - self.bucket_groups, - self.expert_parallel_bucket_groups, - self.gtp_bucket_groups, - self.egtp_bucket_groups, - ]: + for bucket_groups in [self.bucket_groups, self.expert_parallel_bucket_groups]: communication_stream = torch.cuda.Stream(device=torch.cuda.current_device()) for bucket_group in bucket_groups: bucket_group.inter_distributed_optimizer_instance_group = ( @@ -485,12 +367,7 @@ def __init__( # layer-wise optimizer cases; the latter sets overlap_param_gather=True # without use_distributed_optimizer. if self.ddp_config.overlap_param_gather: - for bucket_groups in [ - self.bucket_groups, - self.expert_parallel_bucket_groups, - self.gtp_bucket_groups, - self.egtp_bucket_groups, - ]: + for bucket_groups in [self.bucket_groups, self.expert_parallel_bucket_groups]: num_bucket_groups = len(bucket_groups) for i in range(1, num_bucket_groups): bucket_groups[num_bucket_groups - i].next_param_gather_bucket_group = ( diff --git a/megatron/core/distributed/finalize_model_grads.py b/megatron/core/distributed/finalize_model_grads.py index a0fc914466d..9fcfcc01d58 100644 --- a/megatron/core/distributed/finalize_model_grads.py +++ b/megatron/core/distributed/finalize_model_grads.py @@ -442,6 +442,56 @@ def _allreduce_non_tensor_model_parallel_grads( _allreduce_layernorm_grads = _allreduce_non_tensor_model_parallel_grads +def _allreduce_replicated_grads_over_gtp_group(model: List[torch.nn.Module]): + """SUM NON-GTP (replicated) param grads over the gtp / egtp group. + + DDP reduces every param over the gtp-EXCLUDED replicate DP group with the standard 1/full + (= 1/(replicate*gtp)) scaling. The gtp axis is completed elsewhere: + GTP-sharded weights by their reduce-scatter SUM; the replicated (non-GTP) params here, by a + SUM all-reduce over the gtp (dense) / egtp (expert) group. SUM (not AVG) because DDP already + applied 1/full — summing the gtp copies of the partially-reduced grad yields the full sum, + times 1/full = the mean over the full DP*GTP domain. Mirror of + :func:`_allreduce_non_tensor_model_parallel_grads`, over the gtp axis. No-op when GTP is + inactive (gtp/egtp group size <= 1). + """ + gtp_group = parallel_state.get_generalized_tensor_parallel_remat_group(check_initialized=False) + egtp_group = parallel_state.get_expert_generalized_tensor_parallel_remat_group( + check_initialized=False + ) + + dense_params, dense_grads = [], [] + expert_params, expert_grads = [], [] + for model_chunk in model: + for name, param in get_attr_wrapped_model(model_chunk, 'named_parameters')(): + if not param.requires_grad or getattr(param, 'is_gtp', False): + continue # GTP-sharded params: their gtp axis is handled by the RS-mean. + grad_attr = _get_main_grad_attr(param) + grad = getattr(param, grad_attr, None) + if grad is None: + continue + grad = _unshard_if_dtensor(grad) + if getattr(param, 'allreduce', True): + dense_params.append(param) + dense_grads.append(grad.data) + else: + expert_params.append(param) + expert_grads.append(grad.data) + + for params, grads, group in ( + (dense_params, dense_grads, gtp_group), + (expert_params, expert_grads, egtp_group), + ): + if not grads or group is None or group.size() <= 1: + continue + coalesced = _flatten_dense_tensors(grads) + torch.distributed.all_reduce(coalesced, op=torch.distributed.ReduceOp.SUM, group=group) + for param, buf, synced in zip(params, grads, _unflatten_dense_tensors(coalesced, grads)): + buf.copy_(synced) + grad_attr = _get_main_grad_attr(param) + orig_grad = getattr(param, grad_attr) + setattr(param, grad_attr, _reshard_if_dtensor(buf, orig_grad)) + + def finalize_model_grads( model: List[torch.nn.Module], num_tokens: Optional[torch.Tensor] = None, @@ -526,6 +576,9 @@ def finalize_model_grads( barrier=config.barrier_with_L1_time ) _allreduce_non_tensor_model_parallel_grads(model, config, tp_group) + # Complete the gtp-axis reduction for replicated (non-GTP) params, whose DDP reduction + # covered only the gtp-EXCLUDED replicate DP group (no-op when GTP is inactive). + _allreduce_replicated_grads_over_gtp_group(model) if config.timers is not None: config.timers('non-tensor-parallel-grads-all-reduce').stop() diff --git a/megatron/core/optimizer/__init__.py b/megatron/core/optimizer/__init__.py index 0877e989118..d3bc283dd76 100644 --- a/megatron/core/optimizer/__init__.py +++ b/megatron/core/optimizer/__init__.py @@ -1034,6 +1034,20 @@ def get_megatron_optimizer( intra_expt_dp_group_gloo = process_groups_dict['intra_expt_dp_group_gloo'] intra_dist_opt_group = process_groups_dict['intra_dist_opt_group'] + # GTP (Generalized Tensor Parallelism, world = TP*GTP*CP*DP): GTP/EGTP params shard their + # optimizer state over the same replicate (gtp/egtp-EXCLUDED) DP group as non-GTP params (DDP + # merged them into the dense/expert buffers), so they fold into the main / expert optimizers. + # The *_with_gtp replicate groups alias the full DP groups when GTP is inactive, so the main / + # expert dist-opts can always shard over them. GTP is "active" when those replicate groups are + # strictly smaller than the full DP groups (it does not support the Gloo optimizer-state path). + gtp_active = ( + intra_dp_cp_with_gtp_group.size() != intra_dp_cp_group.size() + or intra_expt_dp_with_egtp_group.size() != intra_expt_dp_group.size() + ) + main_dp_group = intra_dp_cp_with_gtp_group + main_dp_group_gloo = None if gtp_active else intra_dp_cp_group_gloo + main_expt_dp_group = intra_expt_dp_with_egtp_group + model_parallel_rank = get_pg_rank(mp_group) if get_pg_size(dp_cp_group) > get_pg_size(intra_dp_cp_group): @@ -1111,7 +1125,7 @@ def get_megatron_optimizer( model_chunk_offset=model_chunk_offset, config=config, config_overrides=config_overrides, - filter_fn=lambda g: not g['is_expert_parallel'] and not g.get('is_gtp', False), + filter_fn=lambda g: not g['is_expert_parallel'], buffer_name='buffers', ) for model_chunk in dense_model_chunks: @@ -1133,8 +1147,8 @@ def get_megatron_optimizer( param_groups=param_groups, per_model_buffers=buffers, model_parallel_group=mp_group, - data_parallel_group=intra_dp_cp_group, - data_parallel_group_gloo=intra_dp_cp_group_gloo, + data_parallel_group=main_dp_group, + data_parallel_group_gloo=main_dp_group_gloo, data_parallel_group_idx=model_parallel_rank, intra_dist_opt_group=intra_dist_opt_group, distributed_optimizer_instance_id=distributed_optimizer_instance_id, @@ -1143,49 +1157,15 @@ def get_megatron_optimizer( ) model_chunk_offset += 1 - # GTP params: separate optimizer with with_gtp DP group. - # GTP params are sharded across GTP peers; their DDP buffers use the with_gtp group. - gtp_param_groups, gtp_buffers = _get_param_groups_and_buffers( - model_chunks, - model_chunk_offset=0, - config=config, - config_overrides=config_overrides, - filter_fn=lambda g: g.get('is_gtp', False) and not g['is_expert_parallel'], - buffer_name='gtp_buffers', - ) - if dump_param_to_param_group_map is not None: - for param_group in gtp_param_groups: - for param in param_group["params"]: - param_name = get_global_unique_param_name(model_chunks, param) - param_to_param_group[param_name] = param_group_id - param_group_id += 1 - if len(gtp_param_groups) > 0: - optimizers.append( - _get_megatron_optimizer_based_on_param_groups( - config=config, - model_chunks=model_chunks, - param_groups=gtp_param_groups, - per_model_buffers=gtp_buffers, - model_parallel_group=mp_group, - data_parallel_group=intra_dp_cp_with_gtp_group, - # GTP does not support the Gloo optimizer-state paths yet. - data_parallel_group_gloo=None, - data_parallel_group_idx=model_parallel_rank, - intra_dist_opt_group=intra_dist_opt_group, - distributed_optimizer_instance_id=distributed_optimizer_instance_id, - pg_collection=pg_collection, - ) - ) - - # Expert non-GTP params: reduce over the FULL intra_expt_dp_group (includes - # EGTP peers), because their wgrad has NOT been pre-reduced over the EGTP - # axis. Backed by expert_parallel_buffers in DDP. + # Expert params (incl. EGTP shards): reduce over the egtp-EXCLUDED replicate group + # (intra_expt_dp_with_egtp_group, which aliases the full expert-DP group when EGTP is + # inactive). Backed by expert_parallel_buffers in DDP. expert_param_groups, expert_buffers = _get_param_groups_and_buffers( model_chunks, model_chunk_offset=0, config=config, config_overrides=config_overrides, - filter_fn=lambda g: g['is_expert_parallel'] and not g.get('is_gtp', False), + filter_fn=lambda g: g['is_expert_parallel'], buffer_name='expert_parallel_buffers', ) if dump_param_to_param_group_map is not None: @@ -1196,8 +1176,9 @@ def get_megatron_optimizer( param_group_id += 1 if len(expert_param_groups) > 0: expt_model_parallel_rank = get_pg_rank(expt_tp_pp_group) - # Pass Gloo process groups into optimizer only if needed. - if use_gloo_process_groups: + # Pass Gloo process groups into optimizer only if needed. GTP shards over the + # egtp-EXCLUDED replicate group (no Gloo path for it yet), matching the dense side. + if use_gloo_process_groups and not gtp_active: expt_data_parallel_group_gloo = intra_expt_dp_group_gloo else: expt_data_parallel_group_gloo = None @@ -1208,7 +1189,7 @@ def get_megatron_optimizer( param_groups=expert_param_groups, per_model_buffers=expert_buffers, model_parallel_group=expt_tp_pp_group, - data_parallel_group=intra_expt_dp_group, + data_parallel_group=main_expt_dp_group, data_parallel_group_gloo=expt_data_parallel_group_gloo, data_parallel_group_idx=expt_model_parallel_rank, intra_dist_opt_group=intra_dist_opt_group, @@ -1217,42 +1198,6 @@ def get_megatron_optimizer( ) ) - # EGTP params: reduce over the with_egtp carve-out (excludes EGTP peers), - # because the EGTP wgrad RS has already reduced grads over the EGTP axis. - # Backed by egtp_buffers in DDP. - egtp_param_groups, egtp_buffers = _get_param_groups_and_buffers( - model_chunks, - model_chunk_offset=0, - config=config, - config_overrides=config_overrides, - filter_fn=lambda g: g.get('is_gtp', False) and g['is_expert_parallel'], - buffer_name='egtp_buffers', - ) - if dump_param_to_param_group_map is not None: - for param_group in egtp_param_groups: - for param in param_group["params"]: - param_name = get_global_unique_param_name(model_chunks, param) - param_to_param_group[param_name] = param_group_id - param_group_id += 1 - if len(egtp_param_groups) > 0: - expt_model_parallel_rank = get_pg_rank(expt_tp_pp_group) - optimizers.append( - _get_megatron_optimizer_based_on_param_groups( - config=config, - model_chunks=model_chunks, - param_groups=egtp_param_groups, - per_model_buffers=egtp_buffers, - model_parallel_group=expt_tp_pp_group, - data_parallel_group=intra_expt_dp_with_egtp_group, - # EGTP does not support the Gloo optimizer-state paths yet. - data_parallel_group_gloo=None, - data_parallel_group_idx=expt_model_parallel_rank, - intra_dist_opt_group=intra_dist_opt_group, - distributed_optimizer_instance_id=distributed_optimizer_instance_id, - pg_collection=pg_collection, - ) - ) - if dump_param_to_param_group_map is not None: torch.distributed.checkpoint.save( state_dict=param_to_param_group, checkpoint_id=dump_param_to_param_group_map diff --git a/megatron/core/optimizer/distrib_optimizer.py b/megatron/core/optimizer/distrib_optimizer.py index b388161a610..56ef0514332 100644 --- a/megatron/core/optimizer/distrib_optimizer.py +++ b/megatron/core/optimizer/distrib_optimizer.py @@ -394,6 +394,11 @@ def _build_model_and_main_param_groups( ) if hasattr(model_param, 'shared'): shard_model_param.shared = model_param.shared + for _gtp_attr in ('is_gtp', 'allreduce'): + if hasattr(model_param, _gtp_attr): + setattr( + shard_model_param, _gtp_attr, getattr(model_param, _gtp_attr) + ) # Generate main param. if not config.use_precision_aware_optimizer_no_fp8_or_ds_fp8: @@ -426,6 +431,15 @@ def _build_model_and_main_param_groups( ) if hasattr(model_param, 'shared'): shard_main_param.shared = model_param.shared + # Propagate GTP/expert tags so get_main_grads_for_grad_norm (which reads + # the master shard params) can classify them. Without is_gtp, GTP shards + # are mis-seen as replicated non-GTP params and dropped by the gtp-rank + # dedup, under-counting the grad-norm by ~1/gtp. + for _gtp_attr in ('is_gtp', 'allreduce'): + if hasattr(model_param, _gtp_attr): + setattr( + shard_main_param, _gtp_attr, getattr(model_param, _gtp_attr) + ) else: # When using precision-aware optimizer, main params are held by FusedAdam. shard_main_param = None @@ -449,6 +463,9 @@ def _build_model_and_main_param_groups( ) if hasattr(model_param, 'shared'): shard_model_param.shared = model_param.shared + for _gtp_attr in ('is_gtp', 'allreduce'): + if hasattr(model_param, _gtp_attr): + setattr(shard_model_param, _gtp_attr, getattr(model_param, _gtp_attr)) else: raise TypeError( diff --git a/megatron/core/optimizer/optimizer.py b/megatron/core/optimizer/optimizer.py index f37fcddf1f7..0c07d45fdb6 100644 --- a/megatron/core/optimizer/optimizer.py +++ b/megatron/core/optimizer/optimizer.py @@ -144,26 +144,16 @@ def get_main_grads_for_grad_norm(self) -> List[torch.Tensor]: This method filters parameters based on whether the gradient is not None, the parameter is not shared (to avoid double-counting gradients), the parameter is not a replica due to tensor model parallelism, and - the parameter is not be a GTP duplicate (non-GTP params are identical across GTP peers; - only GTP rank 0 should contribute to avoid over-counting). + the parameter is not a GTP/EGTP duplicate. - Returns all filtered grads as a single list (for backward compatibility). - Use get_main_grads_for_grad_norm_split() to get GTP and non-GTP grads separately. - """ - non_gtp_grads, gtp_grads = self.get_main_grads_for_grad_norm_split() - return non_gtp_grads + gtp_grads - - def get_main_grads_for_grad_norm_split(self) -> Tuple[List[torch.Tensor], List[torch.Tensor]]: - """ - Get main_grads split into (non_gtp_grads, gtp_grads). - - GTP grads may need an extra GTP/EGTP reduction that differs from the - optimizer's grad_stats_parallel_group, so callers that compute norms - need them separated. + The dist-opt shards non-GTP (replicated) params over the gtp/egtp-EXCLUDED replicate + group while the grad_stats group (intra_dist_opt_group) spans the full world *including* + the gtp/egtp axis. So non-GTP params are replicated across that axis and only rank 0 of + it should contribute, else the norm is inflated by gtp (dense) / egtp (expert). + GTP-sharded params hold unique shards across gtp/egtp and are kept on every rank. """ params = self.get_parameters() - non_gtp_grads = [] - gtp_grads = [] + grads_for_norm = [] gtp_rank = parallel_state.get_generalized_tensor_parallel_remat_rank() egtp_rank = parallel_state.get_expert_generalized_tensor_parallel_remat_rank() for param in params: @@ -198,20 +188,16 @@ def get_main_grads_for_grad_norm_split(self) -> Tuple[List[torch.Tensor], List[t ) ) - # GTP-duplicate filter: only needed for non-distributed optimizer. + # GTP/EGTP-duplicate filter: keep gtp/egtp-sharded params on every rank; count + # replicated (non-GTP) params only once (rank 0 of the gtp/egtp axis). When GTP is + # inactive gtp_rank/egtp_rank are 0, so this keeps every param. is_expert = not getattr(param, 'allreduce', True) - if hasattr(self, 'ddp_config') and self.ddp_config.use_distributed_optimizer: - is_not_gtp_duplicate = True - else: - is_not_gtp_duplicate = is_gtp_param or (egtp_rank if is_expert else gtp_rank) == 0 + is_not_gtp_duplicate = is_gtp_param or (egtp_rank if is_expert else gtp_rank) == 0 if grad_not_none and is_not_shared and is_not_tp_duplicate and is_not_gtp_duplicate: - if is_gtp_param: - gtp_grads.append(grad) - else: - non_gtp_grads.append(grad) + grads_for_norm.append(grad) - return non_gtp_grads, gtp_grads + return grads_for_norm def get_grad_stats_parallel_group(self) -> torch.distributed.ProcessGroup: """Process group for reducing gradient statistics (num_zeros & norm). @@ -243,59 +229,21 @@ def step_with_ready_grads(self) -> bool: """Step the optimizer with ready gradients, return successful.""" return True - def _compute_grad_norm_with_gtp(self, non_gtp_grads, gtp_grads): - """Compute grad norm handling GTP grads that may need extra GTP/EGTP reduction. - - For MoE optimizers, grad_stats_parallel_group = TP×EP×PP which does NOT - include EGTP. MoE-GTP grads need an extra EGTP reduction. - For dense-GTP optimizers, grad_stats_parallel_group = TP×PP×GTP which - already includes GTP, so no extra reduction is needed. - """ - grad_stats_group = self.get_grad_stats_parallel_group() - - if not gtp_grads: - return get_grad_norm_fp32(non_gtp_grads, grad_stats_parallel_group=grad_stats_group) - - # Check if this optimizer handles expert params that need EGTP reduction. - # The model_parallel group for dense/GTP optimizers = TP×PP×GTP (includes GTP), - # but for MoE optimizers = TP×EP×PP (does NOT include EGTP). - egtp_world_size = parallel_state.get_expert_generalized_tensor_parallel_remat_world_size() - is_expert_optimizer = any(not getattr(p, 'allreduce', True) for p in self.get_parameters()) - needs_egtp_reduce = is_expert_optimizer and egtp_world_size > 1 - - if not needs_egtp_reduce: - # Dense/GTP optimizer: grad_stats_group already covers GTP. - return get_grad_norm_fp32( - non_gtp_grads + gtp_grads, grad_stats_parallel_group=grad_stats_group - ) - - # MoE optimizer with EGTP: compute GTP norm separately, add EGTP reduction. - non_gtp_norm = get_grad_norm_fp32(non_gtp_grads, grad_stats_parallel_group=grad_stats_group) - gtp_norm = get_grad_norm_fp32(gtp_grads, grad_stats_parallel_group=grad_stats_group) - # get_grad_norm_fp32 returns a float. We need to do the EGTP reduction on GPU. - gtp_norm_2 = torch.tensor([gtp_norm**2], dtype=torch.float, device='cuda') - torch.distributed.all_reduce( - gtp_norm_2, - op=torch.distributed.ReduceOp.SUM, - group=parallel_state.get_expert_generalized_tensor_parallel_remat_group(), - ) - total_norm_2 = non_gtp_norm**2 + gtp_norm_2.item() - return total_norm_2**0.5 - @torch.no_grad() def get_grad_norm(self): """Compute and return grad norm.""" - non_gtp_grads, gtp_grads = self.get_main_grads_for_grad_norm_split() - return self._compute_grad_norm_with_gtp(non_gtp_grads, gtp_grads) + grads_for_norm = self.get_main_grads_for_grad_norm() + return get_grad_norm_fp32( + grads_for_norm, grad_stats_parallel_group=self.get_grad_stats_parallel_group() + ) def clip_grad_norm(self, clip_grad: float) -> float: """Compute and return grad norm, also clip grads.""" params = self.get_parameters() - if params: - non_gtp_grads, gtp_grads = self.get_main_grads_for_grad_norm_split() - else: - non_gtp_grads, gtp_grads = [], [] - grad_norm = self._compute_grad_norm_with_gtp(non_gtp_grads, gtp_grads) + grads_for_norm = self.get_main_grads_for_grad_norm() if params else [] + grad_norm = get_grad_norm_fp32( + grads_for_norm, grad_stats_parallel_group=self.get_grad_stats_parallel_group() + ) if params: clip_grad_by_total_norm_fp32( diff --git a/megatron/core/parallel_state.py b/megatron/core/parallel_state.py index 0f8f275347c..ad708c76340 100644 --- a/megatron/core/parallel_state.py +++ b/megatron/core/parallel_state.py @@ -466,7 +466,15 @@ class RankGenerator(object): """A class for generating rank groups for different modes of parallelism.""" def __init__( - self, tp: int, ep: int, dp: int, pp: int, cp: int, order: str, rank_offset: int = 0 + self, + tp: int, + ep: int, + dp: int, + pp: int, + cp: int, + order: str, + rank_offset: int = 0, + gtp: int = 1, ) -> None: assert ( ep == 1 or cp == 1 @@ -478,8 +486,13 @@ def __init__( self.dp = dp self.pp = pp self.cp = cp + # gtp shards weights 1/gtp (ZeRO-3) AND carries distinct data per rank (like DP), + # so it is a genuine factor of world_size. ``gtp=1`` (the default) makes it a + # size-1 identity dim, so world_size and all rank groups are unchanged for + # non-GTP callers. + self.gtp = gtp self.rank_offset = rank_offset - self.world_size = tp * dp * pp * cp * ep + self.world_size = tp * dp * pp * cp * ep * gtp self.name_to_size = { "tp": self.tp, @@ -487,6 +500,7 @@ def __init__( "dp": self.dp, "ep": self.ep, "cp": self.cp, + "gtp": self.gtp, } self.order = order order = order.lower() @@ -539,6 +553,25 @@ def get_ranks(self, token): rank_group[i] += self.rank_offset return ranks + def get_gtp_ranks(self, gtp_size: int): + """Get the generalized-tensor-parallel (GTP) weight-sharding groups. + + GTP is ZeRO-3-style weight sharding: a weight is sharded ``1/gtp_size`` across the + group and its gradient is reduce-scattered over that same group. The shard groups + are simply ``get_ranks('gtp')`` (which returns singleton groups when + ``gtp_size == 1``). + + Args: + gtp_size (int): number of weight shards per GTP group (``gtp_remat_size``). + + Returns: + List[List[int]]: the GTP groups, each a list of ``gtp_size`` global ranks. + """ + assert ( + self.gtp == gtp_size + ), f"gtp axis size ({self.gtp}) != requested gtp_size ({gtp_size})" + return self.get_ranks('gtp') + def default_embedding_ranks(pp_ranks): """Return the default ranks that constitute the stages on which the word embeddings live. @@ -658,15 +691,15 @@ def initialize_model_parallel( Generalized tensor parallelism with weight rematerialization (GTP). Shards model weights along ``out_features`` across this many ranks; each weight is rematerialized independently (per-weight, not per- - layer) via async all-gather on every forward AND backward pass. - Carved out of the data-parallel axis. Maps to the dataclass field - ``ModelParallelConfig.generalized_tensor_parallel_remat_size``. + layer) via async all-gather on every forward AND backward pass. A + first-class orthogonal axis (world_size = TP*GTP*CP*DP). Maps to the + dataclass field ``ModelParallelConfig.generalized_tensor_parallel_remat_size``. expert_gtp_remat_size (int, default = 1): Expert-side counterpart of ``gtp_remat_size`` — shards routed-expert weights along ``out_features`` and rematerializes per-weight on - every forward AND backward pass. Carved out of the expert data- - parallel axis. Independent from ``gtp_remat_size``. Maps to + every forward AND backward pass. A first-class orthogonal axis on the + expert grid. Independent from ``gtp_remat_size``. Maps to ``ModelParallelConfig.expert_generalized_tensor_parallel_remat_size``. num_distributed_optimizer_instances (int, default = 1): @@ -766,20 +799,20 @@ def initialize_model_parallel( local_world_size if local_world_size is not None else torch.distributed.get_world_size() ) - model_size = tensor_model_parallel_size * pipeline_model_parallel_size * context_parallel_size + # gtp counts toward model_size (it consumes its own ranks and carries distinct data), + # so data_parallel_size becomes the gtp-EXCLUDED replicate degree. + model_size = ( + tensor_model_parallel_size + * pipeline_model_parallel_size + * context_parallel_size + * gtp_remat_size + ) if world_size % model_size != 0: raise RuntimeError(f"world_size ({world_size}) is not divisible by {model_size}") data_parallel_size: int = world_size // model_size - if (data_parallel_size * context_parallel_size) % gtp_remat_size != 0: - raise RuntimeError( - f"data_parallel_size * context_parallel_size " - f"({data_parallel_size * context_parallel_size}) is not divisible by " - f"gtp_remat_size ({gtp_remat_size})" - ) - if virtual_pipeline_model_parallel_size is not None: if not pipeline_model_parallel_size > 1: raise RuntimeError( @@ -810,21 +843,43 @@ def initialize_model_parallel( for pg_name in high_priority_stream_groups: overwrite_nccl_comm_cfgs(nccl_comm_cfgs, pg_name, ("is_high_priority_stream", True)) + # GTP is a real RankGenerator axis: inject 'gtp' into the order. Position controls NCCL + # locality (leftmost token = smallest stride = most adjacent ranks): + # - dense/decoder: inject after 'tp' → 'tp-gtp-cp-ep-dp-pp' (GTP gets local placement). + # - expert: inject after 'ep' → 'tp-cp-ep-gtp-dp-pp' so EP keeps the more-local placement + # than EGTP (the MoE EP all-to-all is the heavier expert-side collective). + # When gtp/egtp size is 1 the injected axis is a no-op (singleton groups). + def _inject_gtp(order_str: str, after: str = "tp") -> str: + toks = order_str.split("-") + if "gtp" in toks: + return order_str + anchor = after if after in toks else "tp" + pos = (toks.index(anchor) + 1) if anchor in toks else 0 + toks.insert(pos, "gtp") + return "-".join(toks) + + decoder_order = _inject_gtp(order, after="tp") + decoder_rank_generator = RankGenerator( tp=tensor_model_parallel_size, ep=1, dp=data_parallel_size, pp=pipeline_model_parallel_size, cp=context_parallel_size, - order=order, + order=decoder_order, rank_offset=rank_offset, + gtp=gtp_remat_size, ) # Build expert rank generator if expert_tensor_parallel_size is None: expert_tensor_parallel_size = tensor_model_parallel_size + # EGTP is a world-size factor for the expert grid too (mirrors gtp on the dense grid). expert_tensor_model_pipeline_parallel_size = ( - expert_tensor_parallel_size * expert_model_parallel_size * pipeline_model_parallel_size + expert_tensor_parallel_size + * expert_model_parallel_size + * pipeline_model_parallel_size + * expert_gtp_remat_size ) expert_data_parallel_size = world_size // expert_tensor_model_pipeline_parallel_size if world_size % expert_tensor_model_pipeline_parallel_size != 0: @@ -832,21 +887,18 @@ def initialize_model_parallel( f"world_size ({world_size}) is not divisible by expert_tensor_model_pipeline_parallel size ({expert_tensor_model_pipeline_parallel_size})" ) - if expert_data_parallel_size % expert_gtp_remat_size != 0: - raise RuntimeError( - f"expert_data_parallel_size ({expert_data_parallel_size}) is not divisible by " - f"expert_gtp_remat_size ({expert_gtp_remat_size})" - ) - - # TODO: support expert specific ordering + # Expert grid: inject gtp AFTER 'ep' so EP outranks EGTP for NCCL locality (heavy MoE + # all-to-all stays on the more-adjacent ranks; EGTP AG/RS takes the outer placement). + expert_order = _inject_gtp(order, after="ep") expert_decoder_rank_generator = RankGenerator( tp=expert_tensor_parallel_size, ep=expert_model_parallel_size, dp=expert_data_parallel_size, pp=pipeline_model_parallel_size, cp=1, - order=order, + order=expert_order, rank_offset=rank_offset, + gtp=expert_gtp_remat_size, ) assert ( @@ -890,18 +942,22 @@ def initialize_model_parallel( assert ( _GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP is None ), "generalized tensor parallel group is already initialized" - for cp_dp_ranks in decoder_rank_generator.get_ranks('cp-dp'): - for i in range(0, len(cp_dp_ranks), gtp_remat_size): - gtp_ranks = cp_dp_ranks[i : i + gtp_remat_size] - group = create_group( - gtp_ranks, - timeout=timeout, - pg_options=get_nccl_options("ps", nccl_comm_cfgs), - group_desc="GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP", - ) - if rank in gtp_ranks: - _GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP = group - _GENERALIZED_TENSOR_PARALLEL_REMAT_GLOBAL_RANKS = gtp_ranks + for gtp_ranks in decoder_rank_generator.get_gtp_ranks(gtp_remat_size): + group = create_group( + gtp_ranks, + timeout=timeout, + pg_options=get_nccl_options("ps", nccl_comm_cfgs), + group_desc="GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP", + ) + if rank in gtp_ranks: + _GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP = group + _GENERALIZED_TENSOR_PARALLEL_REMAT_GLOBAL_RANKS = gtp_ranks + + # Tokens for the FULL (gtp-inclusive) data-parallel domain. gtp is factored out of the + # generator's 'dp' axis, so the full data domain spans gtp explicitly ('gtp-dp'). The + # replicate (gtp-excluded) groups are the _*_WITH_GTP variants below. + dp_full_token = "gtp-dp" + dp_cp_full_token = "gtp-dp-cp" # Set NCCL_COLLNET_ENABLE to 1 to enable SHARP for the dp group. if sharp_enabled_group == "dp": @@ -912,7 +968,7 @@ def initialize_model_parallel( # is eligible for using the NCCL COLLNET feature. # Therefore, dp-cp group, which potentially requires SHARP-enablement, # need to be created before all the other groups - for ranks_with_cp in decoder_rank_generator.get_ranks('dp-cp'): + for ranks_with_cp in decoder_rank_generator.get_ranks(dp_cp_full_token): group_with_cp = create_group( ranks_with_cp, timeout=timeout, @@ -1002,7 +1058,7 @@ def initialize_model_parallel( ) # TODO: Are gloo groups needed for hybrid cp? - for ranks in decoder_rank_generator.get_ranks('dp'): + for ranks in decoder_rank_generator.get_ranks(dp_full_token): group = create_group( ranks, timeout=timeout, @@ -1026,67 +1082,51 @@ def initialize_model_parallel( global _DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP global _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP if gtp_remat_size > 1: - # Build rank→gtp_rank mapping. - rank_to_gtp_rank = {} - for cp_dp_ranks in decoder_rank_generator.get_ranks('cp-dp'): - for i in range(0, len(cp_dp_ranks), gtp_remat_size): - gtp_chunk = cp_dp_ranks[i : i + gtp_remat_size] - for gtp_rank_idx, r in enumerate(gtp_chunk): - rank_to_gtp_rank[r] = gtp_rank_idx - - # DP-only with GTP: create one group per (dp_group, gtp_rank) pair. - # All ranks must participate in every create_group call (collective). + # The replicate (gtp-excluded) DP groups ARE get_ranks('dp') / get_ranks('dp-cp') by + # construction (gtp is its own axis). Every rank iterates all groups so each create_group + # collective is entered by all ranks. for dp_ranks in decoder_rank_generator.get_ranks('dp'): - for gtp_rank_val in range(gtp_remat_size): - dp_gtp_ranks = [r for r in dp_ranks if rank_to_gtp_rank[r] == gtp_rank_val] - group = create_group( - dp_gtp_ranks, - timeout=timeout, - pg_options=get_nccl_options("dp_gtp", nccl_comm_cfgs), - group_desc="DATA_PARALLEL_GROUP_WITH_GTP", - ) - if rank in dp_gtp_ranks: - _DATA_PARALLEL_GROUP_WITH_GTP = group + group = create_group( + dp_ranks, + timeout=timeout, + pg_options=get_nccl_options("dp_gtp", nccl_comm_cfgs), + group_desc="DATA_PARALLEL_GROUP_WITH_GTP", + ) + if rank in dp_ranks: + _DATA_PARALLEL_GROUP_WITH_GTP = group - # DP-CP with GTP. Also build the partial (per-distopt-instance) split when - # multi-instance distopt is enabled, so callers can hold one slice of the - # GTP-excluded DP-CP set without the GTP peers leaking in. for dp_cp_ranks in decoder_rank_generator.get_ranks('dp-cp'): - for gtp_rank_val in range(gtp_remat_size): - dp_cp_gtp_ranks = [r for r in dp_cp_ranks if rank_to_gtp_rank[r] == gtp_rank_val] - group = create_group( - dp_cp_gtp_ranks, - timeout=timeout, - pg_options=get_nccl_options("dp_cp_gtp", nccl_comm_cfgs), - group_desc="DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP", + group = create_group( + dp_cp_ranks, + timeout=timeout, + pg_options=get_nccl_options("dp_cp_gtp", nccl_comm_cfgs), + group_desc="DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP", + ) + if rank in dp_cp_ranks: + _DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP = group + + if num_distributed_optimizer_instances > 1: + assert len(dp_cp_ranks) % num_distributed_optimizer_instances == 0, ( + f"replicate DP-CP size ({len(dp_cp_ranks)}) must be divisible by " + f"num_distributed_optimizer_instances ({num_distributed_optimizer_instances})" ) - if rank in dp_cp_gtp_ranks: - _DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP = group - - if num_distributed_optimizer_instances > 1: - assert len(dp_cp_gtp_ranks) % num_distributed_optimizer_instances == 0, ( - f"DP-CP minus GTP peers size ({len(dp_cp_gtp_ranks)}) must be " - f"divisible by num_distributed_optimizer_instances " - f"({num_distributed_optimizer_instances})" + intra_partial_size = len(dp_cp_ranks) // num_distributed_optimizer_instances + for i in range(num_distributed_optimizer_instances): + chunk = dp_cp_ranks[i * intra_partial_size : (i + 1) * intra_partial_size] + intra_group = create_group( + chunk, + timeout=timeout, + pg_options=get_nccl_options("intra_dp_cp_gtp", nccl_comm_cfgs), + group_desc="INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP", ) - intra_partial_size = len(dp_cp_gtp_ranks) // num_distributed_optimizer_instances - for i in range(num_distributed_optimizer_instances): - chunk = dp_cp_gtp_ranks[ - i * intra_partial_size : (i + 1) * intra_partial_size - ] - intra_group = create_group( - chunk, - timeout=timeout, - pg_options=get_nccl_options("intra_dp_cp_gtp", nccl_comm_cfgs), - group_desc="INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP", - ) - if rank in chunk: - _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP = intra_group + if rank in chunk: + _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP = intra_group if num_distributed_optimizer_instances == 1: _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP = ( _DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP ) else: + # GTP inactive (gtp_remat_size == 1): the replicate groups alias the regular DP groups. _DATA_PARALLEL_GROUP_WITH_GTP = _DATA_PARALLEL_GROUP _DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP = _DATA_PARALLEL_GROUP_WITH_CP _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP = ( @@ -1151,26 +1191,24 @@ def initialize_model_parallel( # For each set of GTP peers, union their tp-pp groups to form model parallel groups. model_parallel_groups_set = set() - for cp_dp_ranks in decoder_rank_generator.get_ranks('cp-dp'): - for i in range(0, len(cp_dp_ranks), gtp_remat_size): - gtp_ranks = cp_dp_ranks[i : i + gtp_remat_size] - # Merge tp-pp groups of all GTP peers - mp_ranks = [] - for gtp_r in gtp_ranks: - mp_ranks.extend(rank_to_tp_pp[gtp_r]) - mp_ranks = sorted(set(mp_ranks)) - mp_key = tuple(mp_ranks) - if mp_key not in model_parallel_groups_set: - model_parallel_groups_set.add(mp_key) - group = create_group( - list(mp_ranks), - timeout=timeout, - pg_options=get_nccl_options("mp", nccl_comm_cfgs), - group_desc="MODEL_PARALLEL_GROUP", - ) - if rank in mp_ranks: - _MODEL_PARALLEL_GROUP = group - _MODEL_PARALLEL_GLOBAL_RANKS = list(mp_ranks) + for gtp_ranks in decoder_rank_generator.get_gtp_ranks(gtp_remat_size): + # Merge tp-pp groups of all GTP peers + mp_ranks = [] + for gtp_r in gtp_ranks: + mp_ranks.extend(rank_to_tp_pp[gtp_r]) + mp_ranks = sorted(set(mp_ranks)) + mp_key = tuple(mp_ranks) + if mp_key not in model_parallel_groups_set: + model_parallel_groups_set.add(mp_key) + group = create_group( + list(mp_ranks), + timeout=timeout, + pg_options=get_nccl_options("mp", nccl_comm_cfgs), + group_desc="MODEL_PARALLEL_GROUP", + ) + if rank in mp_ranks: + _MODEL_PARALLEL_GROUP = group + _MODEL_PARALLEL_GLOBAL_RANKS = list(mp_ranks) # Build the tensor model-parallel groups. global _TENSOR_MODEL_PARALLEL_GROUP @@ -1352,18 +1390,18 @@ def initialize_model_parallel( assert ( _EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP is None ), 'Expert generalized tensor parallel group is already initialized' - for dp_ranks in expert_decoder_rank_generator.get_ranks('dp'): - for i in range(0, len(dp_ranks), expert_gtp_remat_size): - egtp_ranks = dp_ranks[i : i + expert_gtp_remat_size] - group = create_group( - egtp_ranks, - timeout=timeout, - pg_options=get_nccl_options("expt_gtp", nccl_comm_cfgs), - group_desc="EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP", - ) - if rank in egtp_ranks: - _EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP = group - _EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GLOBAL_RANKS = egtp_ranks + # EGTP shard groups are get_ranks('gtp') on the expert generator (singletons when + # expert_gtp_remat_size == 1). See RankGenerator.get_gtp_ranks. + for egtp_ranks in expert_decoder_rank_generator.get_gtp_ranks(expert_gtp_remat_size): + group = create_group( + egtp_ranks, + timeout=timeout, + pg_options=get_nccl_options("expt_gtp", nccl_comm_cfgs), + group_desc="EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP", + ) + if rank in egtp_ranks: + _EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP = group + _EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GLOBAL_RANKS = egtp_ranks # Build the expert model parallel group global _EXPERT_MODEL_PARALLEL_GROUP, _EXPERT_MODEL_PARALLEL_RANKS @@ -1449,7 +1487,17 @@ def initialize_model_parallel( expert_data_parallel_size // num_distributed_optimizer_instances ) - for ranks in expert_decoder_rank_generator.get_ranks('dp'): + # FULL (egtp-inclusive) expert data-parallel token (mirrors dp_full_token). Expert + # generator has cp=1, so the expert data domain spans gtp explicitly ('gtp-dp'). + expert_dp_full_token = "gtp-dp" + assert not ( + (gtp_remat_size > 1 or expert_gtp_remat_size > 1) + and num_distributed_optimizer_instances > 1 + ), ( + "GTP with num_distributed_optimizer_instances > 1 is not yet supported " + "(partial-distopt sharding of the data domain needs gtp-aware sizing)." + ) + for ranks in expert_decoder_rank_generator.get_ranks(expert_dp_full_token): group = create_group( ranks, timeout=timeout, @@ -1509,49 +1557,19 @@ def initialize_model_parallel( global _EXPERT_DATA_PARALLEL_GROUP_WITH_GTP global _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP if expert_gtp_remat_size > 1: - # Build rank→expert_gtp_rank mapping. - rank_to_expert_gtp_rank = {} - for dp_ranks in expert_decoder_rank_generator.get_ranks('dp'): - for i in range(0, len(dp_ranks), expert_gtp_remat_size): - egtp_chunk = dp_ranks[i : i + expert_gtp_remat_size] - for egtp_rank_idx, r in enumerate(egtp_chunk): - rank_to_expert_gtp_rank[r] = egtp_rank_idx - - # Create one group per (expert_dp_group, expert_gtp_rank) pair (collective). + # The replicate (egtp-excluded) expert-DP groups ARE get_ranks('dp') (egtp is its own axis). for dp_ranks in expert_decoder_rank_generator.get_ranks('dp'): - for egtp_rank_val in range(expert_gtp_remat_size): - edp_gtp_ranks = [r for r in dp_ranks if rank_to_expert_gtp_rank[r] == egtp_rank_val] - group = create_group( - edp_gtp_ranks, - timeout=timeout, - pg_options=get_nccl_options("ep_dp_gtp", nccl_comm_cfgs), - group_desc="EXPERT_DATA_PARALLEL_GROUP_WITH_GTP", - ) - if rank in edp_gtp_ranks: - _EXPERT_DATA_PARALLEL_GROUP_WITH_GTP = group - - if num_distributed_optimizer_instances > 1: - assert len(edp_gtp_ranks) % num_distributed_optimizer_instances == 0, ( - f"Expert DP minus EGTP peers size ({len(edp_gtp_ranks)}) must be " - f"divisible by num_distributed_optimizer_instances " - f"({num_distributed_optimizer_instances})" - ) - intra_partial_size = len(edp_gtp_ranks) // num_distributed_optimizer_instances - for i in range(num_distributed_optimizer_instances): - chunk = edp_gtp_ranks[i * intra_partial_size : (i + 1) * intra_partial_size] - intra_group = create_group( - chunk, - timeout=timeout, - pg_options=get_nccl_options("intra_ep_dp_gtp", nccl_comm_cfgs), - group_desc="INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP", - ) - if rank in chunk: - _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP = intra_group - if num_distributed_optimizer_instances == 1: - _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP = ( - _EXPERT_DATA_PARALLEL_GROUP_WITH_GTP + group = create_group( + dp_ranks, + timeout=timeout, + pg_options=get_nccl_options("ep_dp_gtp", nccl_comm_cfgs), + group_desc="EXPERT_DATA_PARALLEL_GROUP_WITH_GTP", ) + if rank in dp_ranks: + _EXPERT_DATA_PARALLEL_GROUP_WITH_GTP = group + _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP = _EXPERT_DATA_PARALLEL_GROUP_WITH_GTP else: + # EGTP inactive: the replicate group aliases the regular expert-DP group. _EXPERT_DATA_PARALLEL_GROUP_WITH_GTP = _EXPERT_DATA_PARALLEL_GROUP _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP = ( _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP @@ -1565,21 +1583,40 @@ def initialize_model_parallel( _INTRA_DISTRIBUTED_OPTIMIZER_INSTANCE_GROUP is None ), "Intra distributed optimizer instance group is already initialized" - model_parallel_group_id = 0 - intra_dist_opt_ranks = [] - for ranks in expert_decoder_rank_generator.get_ranks('tp-ep-pp'): - model_parallel_group_id += 1 - intra_dist_opt_ranks.extend(ranks) - if model_parallel_group_id % intra_partial_expert_data_parallel_size == 0: - intra_dist_opt_instance_group = create_group( - intra_dist_opt_ranks, - timeout=timeout, - pg_options=get_nccl_options("intra_dist_opt_instance", nccl_comm_cfgs), - group_desc="INTRA_DISTRIBUTED_OPTIMIZER_INSTANCE_GROUP", - ) - if rank in intra_dist_opt_ranks: - _INTRA_DISTRIBUTED_OPTIMIZER_INSTANCE_GROUP = intra_dist_opt_instance_group - intra_dist_opt_ranks = [] + if gtp_remat_size > 1 or expert_gtp_remat_size > 1: + # GTP requires num_distributed_optimizer_instances == 1 (asserted above), so the dist-opt + # grad-stats group (used only for grad-norm + num_zeros reductions) must span the ENTIRE + # world. The per-instance accumulation below would NOT: gtp/egtp are factored out of + # expert_data_parallel_size (via expert_gtp_remat_size), so the expert-generator groups omit the + # gtp/egtp axes — under-counting the grad-norm for gtp/egtp-sharded params. Build one + # full-world group from all tp-ep-pp groups instead (get_ranks already applies rank_offset). + all_ranks = sorted( + r for ranks in expert_decoder_rank_generator.get_ranks('tp-ep-pp') for r in ranks + ) + intra_dist_opt_instance_group = create_group( + all_ranks, + timeout=timeout, + pg_options=get_nccl_options("intra_dist_opt_instance", nccl_comm_cfgs), + group_desc="INTRA_DISTRIBUTED_OPTIMIZER_INSTANCE_GROUP", + ) + if rank in all_ranks: + _INTRA_DISTRIBUTED_OPTIMIZER_INSTANCE_GROUP = intra_dist_opt_instance_group + else: + model_parallel_group_id = 0 + intra_dist_opt_ranks = [] + for ranks in expert_decoder_rank_generator.get_ranks('tp-ep-pp'): + model_parallel_group_id += 1 + intra_dist_opt_ranks.extend(ranks) + if model_parallel_group_id % intra_partial_expert_data_parallel_size == 0: + intra_dist_opt_instance_group = create_group( + intra_dist_opt_ranks, + timeout=timeout, + pg_options=get_nccl_options("intra_dist_opt_instance", nccl_comm_cfgs), + group_desc="INTRA_DISTRIBUTED_OPTIMIZER_INSTANCE_GROUP", + ) + if rank in intra_dist_opt_ranks: + _INTRA_DISTRIBUTED_OPTIMIZER_INSTANCE_GROUP = intra_dist_opt_instance_group + intra_dist_opt_ranks = [] # Initialize global memory buffer # This isn't really "parallel state" but there isn't another good place to diff --git a/megatron/experimental/gtp/README.md b/megatron/experimental/gtp/README.md index d7806c9d749..c235c864007 100644 --- a/megatron/experimental/gtp/README.md +++ b/megatron/experimental/gtp/README.md @@ -204,10 +204,10 @@ TransformerEngine owns the linear primitives (`Linear` / `LayerNormLinear` / `La #### What the flags do under the hood -1. `parallel_state.initialize_model_parallel(...)` builds two new groups: `_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP` (size = `--generalized-tensor-parallel-remat-size`) and `_EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP` (size = `--expert-generalized-tensor-parallel-remat-size`), plus the corresponding DP-with-GTP carve-outs (`_DATA_PARALLEL_GROUP_WITH_GTP`, `_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP`). +1. `parallel_state.initialize_model_parallel(...)` treats GTP/EGTP as **first-class orthogonal axes** (`world_size = TP*GTP*CP*DP`, and the expert grid `= ETP*EP*PP*EGTP*expert_dp`). It builds the shard groups `_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP` (size = `--generalized-tensor-parallel-remat-size`) and `_EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP` (size = `--expert-generalized-tensor-parallel-remat-size`), plus the gtp/egtp-EXCLUDED replicate DP groups (`_DATA_PARALLEL_GROUP_WITH_GTP`, `_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP`) that DDP and the optimizer shard over. These `*_with_gtp` groups alias the regular DP groups when GTP is inactive (remat size 1). 2. Megatron's `extensions/transformer_engine.py` reads `pg_collection.gtp` / `pg_collection.expt_gtp` and forwards them as the `gtp_group=` kwarg to `te.Linear` / `te.LayerNormLinear` / `te.GroupedLinear`. TE's `module/base.py` calls back into `megatron.experimental.gtp` via the hook registry (`register_gtp_hooks`) to slice each weight at `reset_parameters` time. -3. DDP carves out GTP shards into a separate bucket pool (`gtp_buffer_groups`) reduced over `intra_dp_cp_with_gtp_group` rather than full DP — the wgrad RS already reduced over the GTP axis. See §3.2 for the full 4-bucket layout. -4. Optimizer state is sharded across the same `with_gtp` subgroup; clip-by-global-norm sums squared norms over `model_parallel × with_gtp` so the reduction count matches the actual replica count. +3. DDP treats GTP shards as ordinary params: they go into the same dense / expert buffers as everything else, reduced over the gtp/egtp-EXCLUDED replicate group (`intra_dp_cp_with_gtp_group` / `intra_expt_dp_with_egtp_group`) with the standard `1/full` scaling. The gtp axis is completed elsewhere — GTP shards by their reduce-scatter sum, replicated (non-GTP) params by a SUM all-reduce in `finalize_model_grads`. See §3.2. +4. Optimizer state is sharded over the same replicate group; clip-by-global-norm reduces squared norms over the dist-opt grad-stats group, which spans the full world (including the gtp/egtp axis), with replicated non-GTP params counted once per gtp/egtp axis to avoid over-counting. 5. `classify_gtp_chains(model)` runs once after model build (in `training.py`'s `get_model`) and wires each `GTPShardedParam` into a `GRAPHED` or `UNGRAPHED` prefetch chain based on the active `cuda_graph_modules`. #### Buffer / memory management @@ -268,23 +268,60 @@ Under **full-iteration CUDA graphs** the recompute-forward is captured; `wait_as ### 3.2 DDP buckets with (E)GTP -![DDP parameter bucketing with (E)GTP](images/0527_ddp_param_bucketing.png) +![DDP + (E)GTP interaction with the distributed optimizer](images/0611_ddp_egtp_orthogonal_bucketing.png) -DDP carves parameters into **four buckets** based on two orthogonal axes — `is_expert_parallel` (MoE tag) × `isinstance(param, GTPShardedParam)` (GTP shard tag). Each bucket reduces over a *different* process group, because the (E)GTP wgrad RS has already reduced grads over the corresponding axis and reducing again would double-count. The diagram above shows the four buckets, their typical membership, and the reduce-scatter group each one targets. + +**(E)GTP is *super loosely coupled* to DDP and the distributed optimizer — they stay completely GTP-agnostic.** GTP is just another sub-axis of the rank grid (`world = TP×GTP×CP×DP`); a GTP-sharded weight rides the *exact same* code path as an ordinary param. There are **no** GTP/EGTP-specific buffers, optimizers, gradient-scaling factors, or bucket groups. The entire DDP/DistOpt stack touches GTP in only **two** narrow places: -**`broadcast_params`** (the post-init parameter sync) uses a parallel selection: +1. **finalize SUM all-reduce** (`_allreduce_replicated_grads_over_gtp_group`) — completes the gtp axis for *replicated* (non-GTP) params; a no-op when GTP is inactive. +2. **`is_gtp` / `allreduce` tags** propagated onto the optimizer's master shards — consumed only by the grad-norm dedup filter. -| Param class | Broadcast group | -|-------------------|---------------------------------------------------------------------------------------| -| non-GTP, dense | `dp_cp_group` (full DP-CP) | -| GTP, dense | `dp_cp_with_gtp_group` (full — includes GTP peers across distopt instances) | -| non-GTP, expert | `expt_dp_group` (full expert DP) | -| EGTP, expert | `expt_dp_with_egtp_group` (full — includes EGTP peers across distopt instances) | +Everything else — bucketing, the reduce-scatter/all-reduce schedule and its overlap, master-state sharding, grad clipping, the checkpoint format — is unchanged and unaware of GTP. -For GTP-sharded params the broadcast group encodes **two** orthogonal decisions: +**Why this matters:** -- **`_with_gtp_` in the name → excludes (E)GTP peers.** Each (E)GTP rank holds a distinct 1/N shard of the same `GTPShardedParam`. If GTP peers were in the same broadcast group, rank-0's shard would overwrite every other peer's distinct shard. The `_with_gtp_` carve-out keeps the broadcast scoped to ranks that hold the *same* shard. -- **No `intra_` prefix → cross-distopt-instance ("full") group.** Broadcast is a one-shot init/load sync, so it must reach every distopt instance to keep replicas consistent. The `intra_*` per-instance variants are reserved for grad RS, where each instance reduces its own grads independently. +- **Free reuse of a mature stack.** GTP inherits DDP's bucketing + comm/compute overlap, the distributed optimizer's fp32-master + Adam-moment sharding, grad-norm/clip, and the existing checkpoint format — no parallel re-implementation to write or maintain (contrast FSDP, which replaces all of these). +- **Orthogonal composability.** Because GTP is a rank-grid sub-axis cut like TP (along `out_features`), it composes with TP/EP/CP/PP and the DistOpt the same way TP does — no special nesting logic. +- **Zero-cost when off.** With GTP disabled the `*_with_gtp` groups alias the regular DP groups and both hooks become no-ops, so non-GTP runs hit byte-identical behavior — GTP can be toggled without forking the DDP/optimizer code paths. +- **Small, auditable surface.** Two hooks is the whole integration contract, which is what makes the correctness argument below tractable. -**Buffer caching.** The per-bucket buffer lists are concatenated once at init into a single flat view for fast iteration in the grad-reduction hot path. Multi-instance distopt is supported via additional per-instance carve-outs of the with-(E)GTP groups in `parallel_state.py`. +DDP groups parameters into **two buffers** by `is_expert_parallel` (MoE tag) — a dense buffer and an expert buffer. GTP/EGTP shards are **merged into** these buffers like ordinary params (no separate GTP/EGTP buckets): they reduce over the gtp/egtp-EXCLUDED replicate group (`intra_dp_cp_with_gtp_group` for dense, `intra_expt_dp_with_egtp_group` for expert) with the standard `1/full = 1/(replicate*gtp)` scaling. + +Why this is correct — the gtp axis is completed in two complementary ways, so it is summed exactly once: + +- **GTP-sharded weights**: each rank already holds the gtp-summed shard via the (E)GTP wgrad reduce-scatter, then DDP sums over the replicate group → `sum-over-(gtp×replicate) / full = mean`. +- **Replicated (non-GTP) params** (LayerNorm γ/β, biases, router, …): DDP sums only over the replicate group, leaving them `1/gtp` short; `finalize_model_grads._allreduce_replicated_grads_over_gtp_group` then does a SUM all-reduce over the gtp (dense) / egtp (expert) group to recover the full mean. SUM (not AVG) because the `1/full` DDP scaling already applied. + +**`broadcast_params`** (the one-shot init/load param sync) selects the group by `is_gtp`: GTP shards broadcast over the gtp-excluded `*_with_gtp` group (`dp_cp_with_gtp_group` / `expt_dp_with_egtp_group`), everything else over the regular DP group (`dp_cp_group` / `expt_dp_group`). Excluding (E)GTP peers is essential — each peer holds a distinct 1/N shard of the same `GTPShardedParam`, so a shared group would let rank-0's shard clobber the others. The non-`intra_` ("full") groups are used here so the sync reaches every distopt instance. + +**Buffer caching.** The per-buffer lists are concatenated once at init into a single flat view for fast iteration in the grad-reduction hot path. + +> **Single distopt instance with GTP.** GTP currently requires `num_distributed_optimizer_instances == 1` (asserted in `parallel_state.py`): partial-distopt sharding of the data domain would need gtp-aware sizing. The dist-opt grad-stats group is therefore the full world. + +## 4. Testing + +**Whenever you add or change a GTP/EGTP feature, run the GTP unit-test suite below as a sanity check before opening a PR.** These tests exercise the full TE↔Mcore path (weight gather/RS, DDP, distributed optimizer, finalize, grad-norm) and catch silent-correctness regressions that don't surface as crashes. + +```bash +# 4 GPUs; uses the custom TransformerEngine and force-enables GTP. +export MEGATRON_GTP_FORCE_ENABLE=1 +export TE_PATH=/path/to/TransformerEngine # the GTP-enabled TE build +export PYTHONPATH="${TE_PATH}:${PYTHONPATH}" +torchrun --nproc-per-node 4 -m pytest tests/unit_tests/generalized_tensor_parallel/ -v +``` + +| Test file | What it guards | +|-----------|----------------| +| `test_gtp.py` | Core GTP shard/gather + DDP bucket alignment. | +| `test_attention_gtp.py` | GTP on attention linears, loss parity vs no-GTP. | +| `test_mamba_gtp.py` | GTP on Mamba projection weights. | +| `test_tp_gtp.py` | GTP composed with tensor parallelism (`tp_group × gtp_group`). | +| `test_moe_egtp.py` | EGTP on MoE routed-expert weights. | +| `test_gtp_loss_correctness.py` | End-to-end: GTP per-step loss trajectory matches a no-GTP baseline. | +| `test_gtp_grad_correctness.py` | Gradient + dist-opt + grad-norm numeric parity vs a DP baseline at replicate (DP) > 1. | + +All tests require ≥ 4 GPUs and the GTP-enabled TransformerEngine; they self-skip when those are unavailable. A green run (skips for unmet hardware/config are acceptable) is the minimum bar for any GTP change. diff --git a/megatron/experimental/gtp/images/0527_ddp_param_bucketing.png b/megatron/experimental/gtp/images/0527_ddp_param_bucketing.png deleted file mode 100644 index e6780834fd033a873171dcadad9183d46923fca2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 196594 zcmeFZ2Ut_vwgyTVE{y7xZkzH{z9XTR_HxlPtwv(GWd_{Tp+cA}Ai)=ug@)MR92 zJ9V_xO~}Z0Xp)hEzCtO0l3w!5jbvmJ?{FF@oM+H6R}TysucX?>H(p5%UNIG32{F=N zNi`=|w6ibT6L|1Kdt!Jc)p;c()YXlEXDoW7+`-?`9fNcAat7W2#jZ{pm7JWgBI{K| zeyXs&_pLEJgS33nSQmY7CyWQsgHzB(58`5Ck{f+G9p5TBD7{hQ?7KCN_>Y1kuECo# z6bEKcnvAo*s}sg=v++g|&fD7q=ZgKg(9zq=3*(6UsUq6f*E{g%YR9}iHdnVfDi-a$ zHR49^q=F-kXpi4ZEnJ;&E}H{LiEYl0R9ze6>g=*P^nrt$FFes(l{X9gT+mM5fg9~N zi_lJJEY39mqvq}5?YlWqv_H<9G?U-Ad26$My}g0Kww>|Di|zlCL_rPXK{|`AUEUm$ zS3>iD-uC-m`C_~_*ZHq>hjYaN+q-_e0ca2Z-(?bm!}tPI-zIiqfv&)IN{I1m2xyra z0be%%s!8yw0s^)9C^)esxCNB58&Lyfv;!}S7$HayCbj>z#Ju21AvCEfVik^ zzVLK)aw0WXJ?8C&+Z1Q<16$9VU6Xp+lC~dPy1C1w9x+=N=&vMUYt^L1kPci6*L5ksEKMR1RvYkWO&p&6XAbZs<>E zEVcE#?Tr6;ETZMEdV2BY@@i zKs#VO>^w1^4j5lQ7gy|WvZBB7YNLFc9{F>%@4I2>|2eHsl9Y8Tu&EKYG{!cqBeVX8 zBsUv3-((X=$N)tf)B0b&{=XQA1Dns=u0Md`HfFN@`ipO&od1Z0_2MH0V0mbWI z0kkH@t0BQFBgQLB!hM^FLCnkBOJp4)k~-LS`u_?zP*>5A(Nz1c-nU)ggK9Dws{dH= zZ(aQ(|F6Ko|A{8}Maj$l4g@5&F8y{*AO-9_X{Y~#67v@to?o0KCVJ@O7+(W#Ur#_? z9P}U|dIvzj4my**@k@|usJf#4059|Rx00l{`e;uFH1O$%L*oDu6yx{BU_AiPkHa_# z@Ja%n_OG4t0&a;4;9>mu*9m9m0F1FAzbbYf-hi)75@i5-0w4dlJ>cF3h&zb}Zh&;s zS!n{TH%m8q{#|-C9asW0fnvx#W0d4nxWM+T4T;hLVXaDea{b%j$ zhC==WBl}%qfiLT~NZlw%&1kC>kWGiJ7r-arTm_+ljpje*zfMT3Q%~Q$Z#5%8@V?jC z_SFkuLBzIveUh+iY&@*{@9SLEk5(izaFADaLr4Mlal^x3AIKNu-;Wo*6QG;K!QTciNc@#Y2nfOv5GTP+EUA+LS@8w9Mp#Q+oZ{Q1(8|mZ( z=p@lVZ(k?7?~i{3DAwTyV2!c&E}r zR@>3W#`le8z*w8@er>S<5RPCxj)|agIDpk8jpgC$g+cq;`M3uF5C7yqn{75`_uDAz z$bz)1>s;&xZL_hPTZ0{Q1cSsiRQX` z-;m2c!zRCik`1Gwfl|{mQ`a~GNaqIlz+wO@=}!O)$r+VZJY6m*|qDeu9bv6R+ z;S2b*BwBtQjF7Iu59qSZNALz9D^LmNLh>;-#zP0W{(HgqzbxkeeZb{cG5#9?7s)>b zF5B1+5{4l;XWPk+E%N&Z+wnu9NMfi$az7=+{QdmV9v&pg)6h3EwFEu`T`&L=-ony= zAg;rApky<&@NXha0H$0@;|I*yMwqBe%c=s5;vWGZC$yi-_vp>`@Y#Pq8v2zY|J$G; z$-hEF{1{Ix?l>TO|3*9e@0ARaqyG;mcG>km|8?%xuM+b&GQN`kl=0Odnbf~Fx|(Ks zdVuQruFOa+NTisK!FRK}>Gc1V-QBFRVJgK83`xO1tS?674~*?Lp1{-J4+m6q^>Xy^ zcOn_w?IZmcOZy#>QC+O58~i2&F={HJo%UJ-QSM} z`0XD4ji89sKMzGTHw^q=Ba-hIcMFPaV*s`T1VHG1I9%Jl{#z-4%>n=I9KgRUbcJGu3T zgMY{1VkAP0)c;Q;*Y|ARP4K=>acl1hbjD^czs+Vd4*BPdpP08V z(93V_)}58V&=6w$>mCTu6ce=WFaUvnfT<&eDA57lK#Dbxf8y=$;IZj-km8Lb!?+dt zG}y{X`QhuVC)SfTgkKLxdjUSzNq>jqShOFh6z2jEf9r%iKnJ>#=Dc+3o1X;B|M8}%4 zDl!uPI9YeQ2HQ*__%&bPdm{HQxdo)uykD~Q#Q)FI^+=*i+Q&`AEcFL@1mfaqj z*$@z(^&f!zZ)9{!0%E55CqGVDfA`a`M7HOJufN^;oyx|G%?6}jwru=fLDu@K1sNHP zOh;YCGzj#;hWs>-E^Cd#oifjtq`xODr`LKvS&_bfnz9O+dw#X>cCMatir%{?IWLQ> z@Y7n314YFbQPu1`|aAMf{d;CT< z(ucezKG8UBcGy&t^b|z_9yq-I4d`WCt@WoF7O=`mwncM#%iJIBf36p`4?N(q-f)!( zn8H^k8_~1B)SZz7mpuGw@-RG9OneXC-p_1vUD;4V-Y=~-rwx^doMzj}|Dlrex7lsh zz=lF9;(>l(PB421sn!8OFaDo4fsDsK0*X!EK5ghV()gy+?B$ztg^R(4dTNv_8W6MLq){QYt_SIGyNp?~-W6VE`<#5VwCLl>dK zj9(>C549K+5rtK-_Ow8pcT!64TSXQhq1Zwe#&e3}-n+Yo^;B0^Bn@{#&)LLG4(8p- z2dA94UBCE#e_kul9?Hs<9yJ(4dD6Jz(}JwXp6Gyk45EtSO*=f1g4T<>x#fvYx3M)} zR4SL$`7&AUDk{>-ApS$Lo@t$jkm_mjWV@3>7IX^pnC6sQ4{nt6Fmt%;sV z&OfG@;M+x!IcQsWbuQ+4OCAm}#&^n;;pPqc-kolT&%xyK(S(|{_EYnkP7Y;ZQoYIBVE@&swx{%wHPWZ(lPOv8CXJT(CeL5!Y_Nta00ro84XN^}9i%f7rM%2^jr(c7M zOH)5^4%7PlAm+6c_h~J*BlYgz$PCjjy?7+9Nk4aj;dQv`OkJ{!bs8kQBsiF#=B*)f zt`&6lqjNt0#e5z6kNc~u@>(yW?zX>KWTosqI?e+L&ohnsk|Yk+89xP%a^dv%uJ3nw za8weXwv(Al%hA@HrP6{1lqd=;=yk}6M&vptzM39x`I^uA6x?K}8M?3zM@`jD@lv!jX+}v^ z89}8LPB_3}HFwK~%c&pCU}crCf@9#!B)=|K&8~)wO-M#KRl__e!v(CX@i^9-w{L@9 z+|iG@Wya=@7!+loNVL+f(2mw%Ox;B+(_!FMBD!SHToU1D%`WTf70xe?uIK?a)^_Dx z0}sT%K506CCYoJc!S;Q$(etmwYjiIwyWWkek}nDhsmBWSi=2G3Kdl1HV-K&MBX>s* z?FjAcs6?=Mn!HW8c-D}IK{XsIOr??TmXO@vd;IV+CAD#*oY-N3`eial`!u=xiBM?5 zp#^doHGBOEgThN9k}+q2#aw&@LXfSd(@Agzis~QcqZ!0pusW^RUM^c@tqm_o7^111m)EIBQtepPSh4eunl&) z^D&BjV@UJij+%=OYD9BrK$=&nAb^j&0g8&thI!uUnBu=xtH3c=_39L7RPFura<| zYjS6E{_B;C&AiQsZ>9MiY?R9mkdJc?i!tF2><+8;tE0BVoT#z*XWY%yQ#4}DD>rbR zl+&rZQ#jse*}Se9te{_3ie5cTpkOuz39)uSD!k7NkJR*gCksziqaOG;QHs^E5&O)? zq#O;<2KJ9B16otW6J;J@26#$etEGKgLK~n%Rc9h7mKfL4j43B#?DdJLUaQfJC-s17 zwpGL5V*}ao;F|`Y$6`U7_KGf&2uikfx-~O-yyhB>(7`M&=R|=AzN6Iit!mW?L%h3@ zFt}Q9Qc$%vG@fwY&ax)*T%E4JV+Pp>Gm^Pp*R^TK+f^a!bVMh4Vsws9b!K$>vOxFD z2<-@2Od7N;$1pSUMtw2UXZvSyo?eu(;BI9ysa@gK)>QGG`(cSev-*6988Bp0G2J3a zps}5EM1zrAgjHVEUI2<9HxCOM)>0T()5?fTyP*izH&jmGM)s!!-@@3`6bo7VWEZt1 zv#QxcXieF*eb#T&I|RtHzWwe@)#r<8G#a#X4c(Mq0r|Zd>7ttFn}oBc$wviIS?M#- zD?v@tpTX(7=%7=)Q%G#w5QZf?)`g}`QvrS@oonB|1d5?}9IT4S^mg2{y}6yQ+eq|@ z#EwXaFVrW=9s)`ggF;X~`CQO~(4p8WBWcJI1B7c8mi{g1YP{$as6SdfM#>%Zkd9rM zHY)wDK`9@?{)U@CP$g^E965rTvxn-r#7;*}pTkCqty0h#(^*A-AHI^d>sqIyWrX3o zgl{pZNOQqPh*xtGc0A-^M}~=6_>`uz&ST13cg$(XWZGI|ONXdSSM5W&4Q|-E9I7n_ zF(ib<+}q&^a)s7w9=ly(pMn;tXKTrUOlzFcnrGRk)LdDOy`SYokUfg%2s3?#hnmqsIgyKjg**-Dt;2wHiqmp4bDkD`!w=U-dk@~B8 zb>Pa*FoimWx;4H~agd%E#ms}1LZjwjyf|4XwpO5aAu?G!agAv?ifX<|XbHBKC%-Zn zk9^W84efPWU~re1%5X_qEnWnJCJ%7>2yfmgPz)JU0SF>x(Lh4P$}nZJY|MnlQ_1!{ zX*dyyBOL6!CGyR_le>?1p9kSy?uo&a=v4(*#r z{CES+D|ZGmZXf-MAW)=#0%Es)*1r75TZ>QLm7`V201DzYdBXyg#k_*1x8xJDQ9k{%2eYcA!HeGVyz14x-&w?VL`D?Z~N zoe^*l);J-}pnr{v$ojDvr6FH}^wH#4Jkq{n;;ZcIC&thob(I5ynFLI{X4UxMKH)<0YOWb+%ec)9DPX2*g ze38oaIkYZ^v^g{5{h^kfn(1OQLMk?=UlNjzu(*vgP1MOH$agla#P||Y(F&2t{zPAe zXwH5J>yp|HXK08SwuihH0{WaR@8+p>h@n}*#E)`K33jD_=Yt~qDMU&o#KN$L^$^VhOqtj3LK$o*NRwy)BfE6jwpm#)xJNh* zhrR`J*xBoOO}-jNbZ9Wog~ODOI!x|`QWwRus#d2&pTlP{Xmg=t>lh(&LEMqoArR(P z6;wzqQ%0X}isxOzKwY-nJ$;yYGUCN*qy}a8t@8#N`3gZ9=E%Tn$TmT$V40V6F_(e` znvz*YZZyqdpvVEj9C$aCLS}M~^D=jK*m5d#Z`6(2F^xSX{WB)}0c-}=4wkPRQdHT2 zOd_N=nR~!;f-)ekNm9Io)JQI%Y$TcsLf=oP)M~q+!=RgR#0kD?%RFGW@@Qwu78ii} zKwc(Fe-a*c90 z3JRmEi-O&}hfh&uZx*$7kGv2TGid3XNoAW@q|x|Jes(sM1)PUcgr*8-IUJ8C74V06dF!bc;RzZqUfKJQ^=W9vYjN>7zgV4PlfX)b$Q zpK3WTMLcP3?J~c~+H=Bd$DD*DN}sC%R>d+EWmDV}#5LHsz2fmt59sYeha>tuoe12$ z?$tXFq#gs@5iJ$`7qlG|rKJ7BVzT1EmexXH3J*B4Su!s82*U7!sDG zcgKu#iwKpyrxkz63A~h%@zF11v0!eRs>=OMg`D4V}^w}8H7m=54@5_6S zpcikK)Dx;=xRCW&ZWl@@WqXcPqflK<Aj6vnI52)Zx?6k{wyKdz9Ep1)p>`$;Lxp9;T#9 zyYV2~L5H!pGE%~brOde@4M}HjOr?~-0F!&oO_yP4Xz!UIOA97bKP22KCc1zVe;}5P zEy3GX-M#=hPL@w+WpRn}q?V;4Psx62i$cpZsz%G?xAJOxO0EtII>5qVFUFJixF_7% zwZP#-IU?NNP!Vl!?HH=IuFm0_=&QxeT&;@fJQZ5mc2UKlM8nT2@cOjcg7dItm4x9G zRaCDddqWjlaBozlyUsVXm*AxCT`FfSL>j_;amA(b0{V#>7x=@FDuJ(I@kVi3U6SE& zYVFb9B8&LN9EWT()C*$&dxT?xT zG`p7E6iC|^mbcsu_BkxZl4f3 z^|r!XUGpnunrDvsobcK01!ZXEkUf}Ix=zHUzy zyAPv7rPNlR>o?P|LcwB9_-6W&O2m%uqT^@PNY8F~DlF||USIG>*0m*j@mLA&Aj=(H zWY^i#Z^A2JrRp`PtE{+2$Fqnz`}_Onl(D8{-SF%%<-TlUJ@w0q3_2xisJ0@^XeK%x zq(ZBdAX;&_r+tr@Nl?8T1u~@#Z1UcLMi{HbmaPomgY|~R5ha+yqA;*BzI?-8tNqL@ zol7@~xAoq-I?NGQc6v(h@aI5Taf4Wz;nf6%J-q=54J>9Agyq=DjN%;6G+aXKF^MTE zMjv5_wY$n^cyWow2@73`oxKCu*^l9hMxs!%fu)t+*e~#u&oq%{m)!1aHQ7JvJKU1P z1jpHn?5JA-+rZ=qt#O0d_Eraa*-;`rN;1ZDZQrrf)3-w;oHIT~e@&lTCHz*EFaTeVPKh$nXzKb$Z!S)zTp(xqh| zeWMQX@F{(7K#C#116_Try)dzfp0cj#Xi)$Sse<1A{d#=fzZ} zNj>t1jm@YjtByu+t!6t+aylp+3AX(_ctxuUq0~%a4eb@LfR)T*EjL+LFcj3jLl%GS zjT=xvm-I7&Af7q>xN|SJpcl)|Zp%l%B*FN!QiMK0BqjLf=%%ZBrXWqO!-Ou)ekw0^?9I zls-Kc{!;ndMZ30ZChx(z=e|-)(D=wiGiKMgDAg0bk&C*}w8Ar^Sv?8`Xk2qqb-LpdF#S-Xgz~{*UGntFK3~svU#6-^Zup$CIzPxb zDPTzPsJF9LbrHS~Bny$Cdc)VfYc}e0wAhGB6ubnhDzE-kj5&dRAmkDGQ0$jPk=9P* zD2W-&j`|8ilzY0?V_vIymPz2E5*E2)_{tuPE>oq46Ov+^BwCM;qfUB3)i?oa>DvjP zI7#nYi!2{wxZWqBc2rW)gkma1Zl6Si4L@h{7_|7REZ5705l=pzEYpK=S5OP<1R2NN zBa1mGrg6>!GRovm+si&4U0sig<8nWJroe@Ko# z_i7$7$8Onf#|QNJ69pU>qh(JxlP}L|Pz5j(y#<1oV1ATuRp=+#-obi#K?1aq0&e5T)17Zy;IhQic1bJ-1aSLmI&wW=6T&uGO2Hci_V2jOg>Jk=1QS zsgXQ{+jT6`Ft#9cOZ)zWU8rXQ?)$67^3S&+)G8AiXx{7!iW%}yLghyl?oz4ORd^D7 z#Pi;xcwEBaBcda40uJ02nd6*EK1VSd^tmA!xf+za!@xzd%p=+*&TPN!vP#uZ$&tdy z%p<*e*LJpP$Oh+k&vvKD+1F4hRJTS$0FcI55;&x#Ksap{ro|3H2KK#6k?7>p*fW}9 zp5EG7m%Pu~p6oX3SOFsCOOJMta(x{_JBX_H4q+%pgS8F(Fg|F2+&-37sj)z~QEe!m zNPlaA$3uXy-1G^ZzBi#ommkWyEMj+%p=_6ZNqt{0m6EX5aQ<{}_KgG!aI#K{Zp`dk zvR092n*Lnrmq&)4Gr{t%y&`HGwiL){fGv-9N5(YdhF`NDP|J+W3Bx>gekIyYDB}8D z(vUE3ID0s3xf`TTfz`4StL;)+&WIDIf3RXg=>xqULhitn9hlHC%f(hc(~v|v8I9RH z6n{Xl)*4S&Qg}-<=%BYaQW-j|h{rY6fW-^@6Mdo=DKQRof+iGb-MVG-N%ko&fLs>s zr7wwAh>@5eo}tX&Ie%FFA^n8L7YwmkgF#li*=IkN+t#v!u+!%Jv*KnX)b33=?==M! zz49lrh;au`Ev_B~Rd#I+K(#51BUgjLCuwFYawZfsvx=-yN#H||sD|Q0$Uca%RJ)ys z|1$g{7>IFP^n{fxqLi3z*{-ua(4N_U>PWB z!{kUZed}_RHKZ#6+1yFD`k=XwQiWO`<}BcH1=9!kV2_;Nx2)DH0QlG#cDrI}c@3wj z9@PrTuGd|@r2a@J#W-fR1my0rtYh7I%xbCg&RSS@*qw#R8?5<)^;Mwfz9+O!$8JR0 z;AAi(`duI+W{bKFAYs2BIwUOZQFC|KDmAifXryW2cJAx0*n9ih8j;gSvxk|Lpw~uJ z8LX;&E^{Zy!&nhX_CC;h7AqcON?~LBXGv8(Y3;;4jOglRL>eRPc%N6QXHK69RkGN) zioFh%31fJBC{%66B*s_S7eTAS1{T^?JyV~={D!}TY1mqi#519oOVTVsb$T=%@@cz(C_%E@(S#NW+Pjcy&_`KspWDzH*j>F+*rqy=$ z+2)JY7}A5^j>Z>W1Vnl)W<)WXz~*|1wjS)rS%uoGT&>3SnV`sT1{-fp4oOBb*kf+4 z&T}Vc}(K?@-OtwPxJlo#D{*`jo8E^Qg zQBKt~ydsigigKd4k61&d*&2U*B*UI#p>6%PghFeL>Z;;zB`=aYGG|W_rWT962CUw+ z$fZ=C@JR;#i{xTzWY?6#s^^K!f;kh8H16xhmSQ`MZd$aY7*0v|$N0lq*odVNcNo-! zxeSL^N%^E|TCMf9*?>Nc&@iAWyBuZm`1nAGJz4!OS5&z%7o>v>byi%Hp?Uf4Dwo?N zXFhw!wOk;30+N$V^7TU0@GqGlFd~coWDfM1 zN3{!H&jejs1!qylqKnox&^(<~RtMD{?g`^y%8eHS0^W&8Lh0p27gZK!Fu?OhNKJ_2uwN&JgE_Z9a$M4K+*VDAbPJv1h)@@@lMw6k;)h zCV1&7@iM{aPJ*5vL`ke=8A4U`oGV43WasI$;P(k?w&||$GGoq>YM~2{tum7r+vhlk z9$ApYWLcFcQyNMs>pw~jSDbHO6$G{WW{ zJxeC%GtbX&_a&I7SUvJgp)rdKv>p%|kjzWjJviV!eYflAbMps2hVu1v_Yay{x;+_v zO-{}{&GdYQ6HrW9fRbBj^RBcFDr>$j|3P9STOw#Ls1#%G(Bm$1i{(c`0E819bgSu; zGwdRvr^#F=Js;j=FPqG|Omsj0+>JsUf}s5PX@b6(=)038rEh{tqR;&R&>V(?;ddM; zS?P30kHac3p)wkd?lBJ_G%xB6N^q#N2+JxkqOw<>2S+2d<*)5!naKW#S+j$PnakH3 zFuaTK>wD1Q{XFQ)G_pK>UH9H6Nzwgkkpcd7v5MLWmSGaRIxZrNi)rA}B=Uj#zR`4( z^=4yl-P>iQpLH+yaY{fYejas;qh~HuJMB!o-_ct;cQt0*4w~0i>MQio;-CO|cs2+4 zpnN%JHA^R;HjjAIou(-Ap-g<6YZ&L{$ty zY%FG=tXZh5}A{&?g$uQ7Adp}ha%2cq#4Y3g`fu47@*@J z2|gdruTgzVBYVD{&Y~=)MtqXcWgn*)nDZz3AK6gsm5tDEC@J*28bTELIim%OgH%Lp z;y72eyEC5UiKd-yC=3`@ow@NfYkEO}w!g>fe|_JCNWqfhk61Z0#yR z8d9O~ha)Iz0K%AuX!#801l{~z|K~jhR?h{R;^u)VlM-i?mHoDet?fOIa}#}jJOfDx zKvkIvzZ+X+%2*Bjm&Y4eWBYF7ZQ#He*bGp3#f&0|3Xa=$@~4EP7A<@T+b&>hrTAYT z@RIz4{ftZ`d;Wa@88TqIx+<41lQHZ>!nb9;U<#(==J-7!PsF#iks?34D3ac0ufFXQPZN)q&IKnL|pA|wwtiB zG~-*y(m z#YxlqzOqmSiWzHePGmx(UtH+xi$nAAIn4VdTF0)l2^>DMTB2{cu`F;1E%vKC#c^60 zA{)_f$A#jVbCF{^mn}b#19$D?sAAA{X5WyrL6$0)y>`wWNfwkX_GM5>xh+Pwv3DCo z;*l+67ldX)Bgbgd3+5`-D8M6(LH_n3du`1-qk4*3jBcxbZA^Ay$}UM!32nxYfwne( z*_U>6b;Nm!3x|fBsWw&2&_|Zlp70zMJ`~PLn{%%%DPRFjf@06EDDCbs?8y!b!gvC^QVvgG)|X9MjwE1_c(MyfU`f}W`BAP4MQi2SrIVb< z*_6V_Nk_9?*1}WqpJf{s28PF&`z#~(?9MAy)k<-878!lFE^M#`*boaKy?90NgshrE z;JCYwhcckPU6k`5`-}gSBPg>6JixW39@cMBBs>XFiYg1uK6f_MDLbUX{HGe<^Z$tA zB)Ka1(&`FGJ?sNaCSr>A{GpJ3Vb&*kMPnTosv3r0zdCe`LpN->)TTywF@t`%Jx8>S zP!+m7t(3de`*>3Da8;*ORgj{7u1xm95-a`AQ&gARi)h9$ikYq=hMYGWw16(`iesM+`vGs-GlJ6qj`!U+@l@m?mMNb<+38}t_(hiXqWPzEm0h= z3HscxFUwDZc@X@S<)Mj@=!n9}Qo9E_HtjTRw;mNd*DQ({drkg=?v1fe+v4H9SvP&V z!p+M4Un+*K?weGt@()|$QapG?A!tFJSu5yu|D83c{@I#@X8j9lR)JGY9adhm2Zauu ziT^^12G=nHLLdFsbIgEr#+=}YZ>yJX^HQ5so{W8Daybapc7}V$J-1%sPGec$apgOy zMT)DQN2RixkhU_06Pdy%Py#pT9)so~z(TZ94E!EC=K@cTwpfW8=ka{mD}ZYz7s5-7~?g?5d*V zvHh@X2^PL%pFDGqSt1F7nbK#8`zz0WyDHBUzf)q&@Z0E^^=A+zs22lSWs0ng zUb`f(aee&otIHe$POY$>TP^mvwuK&^E1#b(^9r@fWKF;NvgX9($8vMuqG@P$C`RCD z{iB|nlVrhbgOpcCEyIkC)`VX_>ubc>mU>?J5Hr7J^o0jO8s4vK1Fw)P=vMkniYEE- z@VA&~m7ZwDkKM?BJJBYWKK9PDyhF^^m8b1iX1Hj?E!)~-r!b7?6+?`BW6LfnrWYT+ z0Qfa31Ov17&c18h7txNi=UHMt`@Y^CiB=R`)ZmJ#)`kv|D@q zY9GjrBW~V_&mSg4OL#XSC!>3rp*j@glnQBR|t91 z<8XG#3rxhs0T)$qhvve^d!HKv13X@Vd!dJ~A%MGAP(5Wm1a=*`%iDBkjJ?aN6W@vM{7xiJ#mP- z=k#1r4RcQSfoP_9*q-+mIoFtVkCdm16r10pUU)yxdPY2uYLN^0zXR$%HxHBO!X+|G zRtZyL4lr~e6BL6-X#J-S#MH;qxO=(ZiHpb^b4x-VGgESCPDmB58nN7KcuvvK@WQuy z={A)Ccf4dd?{!I#3#*Tb9Cb}ST7kRqqI68w|5H>-^L+1ASbWjooCI!RpU&mWhchxS zpWxEG!QCr)$lsx(9V~=fw zvy=s+deq6RmWHFoWA3{(F-ow}&QviCfj4-bl36hFS;(u`Roer_AvuwyNcz!b1JXdbD)^tq!Ie=Q!2*` zI}G<-%X>=bsET(%?rF}R*0-9qWB3xhg~`<~zn;(5^1NH(_hpJ)qPXo)=!abj!K-@B zmbM5@C^EXmfGtlJX;&lAu)dzgtMem=d&*)n9(g^x%Lyu2J?;y;P@ah?H62Ml4<|k* zLDxaV(3_XwO(d6^s+`}bQ|OOaIX=9)Tjp3@?pea8ySpYm$Kkvgbr^f^mASW))n5B7 z=XOa{ySJEEHidnhV2<%qe(u??bGB@n2UT=<$5YJxhxI+TpBUyTo-t7fw10f{F0E5~ z;Yb@s^ER&g?h6U3!uLCv&_X;O)`TWoUS93f+Q(gAO}y^e+?Q@-b z(;cjKzEgBo6fe==f_If4qr5yVyh?h@%)!@tgu!(vs+L2|PTTC1FOlE>? zEcuvrPEzUzTOMB1;&km}8>;XUNflo5+8-E4QiU{;dbxMn^6hGZ1#J95xi1QILvCkE zUP*a#peE#n1 z7-`whe^ThV=4dDFyV@h`S5EK`dju^g;z!)VC>R=yO0w2xON6&syA686v#$0he3M4U z>AAfssl`2Oy0dbz+^>WC@M*r%U01YqsA7bL_lE?yo^Y~D)bbptSpx!RWDs8HLqL(X zS1pu14JkU|Jy>&P@oZq5gkTO*IdsUCux-6($?t9m(^xCPdo^D;dR5(c}WK{&0K!@GW2D+~`^zo(i?Gs%E zE$KI_?C*z6hetm&>Bh7co_y?eMO1DwaR%%vgV{&>}BG_pF zz-K6EUcY>?)~PG>c3Gn`#W@XXkWQl5zRHLBOy4}Mr0X-x%Ls_ab_4>g-TmNPFKcP9 ztQ>~Rx=*Z+7f#O=CF9126pz;mmbpLFe#)!S)`R=hEpq&uTbA1a-fZpu2j0?VrPgt_ z)ypBatRq|TNk9w2XBhCzqlCy;4^1STwI?s5_m+K-h(~{1{!sZ6dC6>~_xzVPmWA-0 z43rMHEDMg#oM{qTRM|ZisyY;}p30s$(vrNhw6w9{pr{olRIxrF0+p66f*IhDbpGc1 zYE0xVef}P&SDG40^&X%LLrq5P?*0u2J%yO4DJ9y_TGBDu1Qp9_~k1rx(4zmdH zm$$S7v)8}&W?^PU{HTmO`?IUY#wQE4!S9;dp7UBQQOF6W;H6>qW}3vCq#JhAi&C0& z!#*&8P!hIP_VRuLI^9lQXQ1v{=LFmYc1u>?3s8@1HO}uO%Nd^OCWdPY>F$Q*(2R9- zgH&dU4xi7~OgpcxWoVZl-SXrNRkytBJ8kZC;+;-)aX-i*xi&d0e*OoEnY}?ku}UGv4{cY~kag{?T#|77d1&%SHLliz?-~L{+Ed1-!M=>nnT5 zK0Oq92Zvjf4#oj@?8d?H)dfrW_}z(Jx%1g3eW$M$F^dRB`@f59*gte2*`mL~C6__k zqebiZd%tX{F}r(S*I$&%%ieL$e$6Y?`}F#A@2kqTqw_f_QZ{ANa>^|R7mAYJ9`2m` zd}RFVDc!q%4Gy_U0SJAHWm4vK3Z9%qK9IlCo1zBp-?9SOd$2mgs#B%*(t^ZW!TQ;Z z9`C*$y?~=E1c&#C$^+Hj9inp{#)Xml^v{YaRPW+eg}h&R7d2;3b+pihDrt|%nM(e4 zu2c2q!MumjU(cAeO+Gonafr^W7^Tr{SK}tnH91=lkDFUI-hHJlYA?5{`tEY;g=f#6 z0Z@iFY$c_e#)0KR$%|J?#Y-QbnK?)~_e!nIkEPwkwu(SaGf&8)>tiAt82g~uuUnc9 z&&sg;?JMQI5B95dZ4+7T-TAUiPEX7x%Q|n?kO0?7X^VqVS(v)eN_mCo9*I1W2bWzl z)f-mLlsQ=PrXN@wG5$3cVlR; z30EXpUwGhjz*5OuK-UI#r!_|?ghj@yP;E)Y_si;=-Sdw-tl&n*2X{5Zy&V%7B9b;Y zH5PDrQ~i}Hr9lZ~7H|8Yru;UU*tlaWrc^p##kx8f9Qn;l`n=>6gJWo#5?OlBSBHG* zJ;4cB%=~n(NAbyceb~@)8FqLsL<^dYiB&>USm& zU;Cnq5@~ntIZz-Z&6SJ!pkA)Oi_v=Jal5%BW)X66*_vU zvif64O7@k$sEAJ~vqKTN#@4E_bozb$$m+RM2d;-W+%>4tl<@qp7I)+wPUos+L4PLF zna>qX?JMOhI{HccY9s3wa0BiF4TOT0L-%OozNt5lUj|AY>lTi>$FpapW2V-$GE}E6 zuVtCGvrIQ)7Bee7HN}w@02zIqlb%tb!8V?zr4~V09iA?|0bwv6UX2iQ)AvPsw3D zBUt#F=usVU$7WS2f$O@v3014#zP=P}X2bbP^xkmQe9q=xT^s&2Nx2zeddb&J=9qx( z?UWY}BBnX-4E8ha_p;u-qvT26AvK{KWX7=E;-0Gafo-O)Z*mK?2daHU*roH!I`$i0 zFgbeH?aa$h0TcbFf)}Qc^F#5MXGRU+ma_$B?H1mbfn#lzs@?QrNf)`4Va6Y106_8m z3PH;;6g#<)le>GaNb>5Q)K-*E;c#8Gitq81K2$p?p=+o-pfSoE7o;jJ^ctSm?EiHZ%9m_cYRP=xIF%{Z1R4e_+6R z^L2qQv!DQT)%x&Q%vm+)T95VXeyj9X6+`2-Qt5N;+TGtX1eyXhKIr&D3%4Ut3Mk zaUEb$sr0Yang%V*d=8NdSdsYH7ZQVQ79gm;EMx>0$;C~~-E?zS_e7c{Uw-H(?~{Gy z!kgCYhvyUvBP{$DkNX}w&_e&=l}_{Z)rpo^r$Q14b5KvyRpgRTK7|T#}6oRr_Y@NX4MzZewZC?62CIsQdr&JeEmvb zd+4&Uko1lu;;Z&}r~a6(XsGke{MUEl%M*6xcd3Q6-#t6F)Q(-dz~L0strU%6>!=s~ zTGc9P?QPR%c!kcR(7D{O>*3eEt#clztB*FWJUfIGh~cQmb|$1fMcIYLV*3wq0Y?|cr{{7wS6-mb zM9S+NYD!p_<8r|OgC?ZTOwJ`cgFtLnyG2Z#HIAaE_})onnqepvn7uMVOG-1Mm(^k6 zJ@uiG-q7$xh+pNla1E>xMx5VqL9)|haR8*GJ4B*nheoCg`la=*`T;-FP#y7JJy{?X z*s!}lXh@F65uowIo5==gYFSA%Hspmkp2Yqqw9wYi$AwO%gIa3eOfM|o6mp&WJn`AI}c5rYwh}7^q!5&1r9K6Y1pQ-=OUa#VT2$=z-g~j8nbp#aPzXMS?4|)CsV-^SY zAodxy?;QQ$36Q+S&}VSR?z!V?6j3d_-eZ(%*7|S8&mKC|q}v&qJ)cr{WP>YD7ALxc zOR3~3LL~rBKk|xzyz|g;@^F?f%0S%cO9j26+YVcE)k?W5@aRkAu2M0-M=X>#hf)ky zrW(nexjkp)r;mnDC>QSJ{v{YP^L9N90C46itIigHOu1QYD)Ly-p2XEEV zZ0rKwyP0h4?Q8=nw82h3LW)uBN?9Y+h!~E=KlKd^AF8is+qPUz@>&k^pqs>b4YBa* z+b?fYS~&E0c|)aBw!&k-hPzg2L81xkFVB)@!L-NV*BVg(!*07J{~K|tQc@_@BEvyp zv-pb|-m398`Cq>c%P`<>g+Z>3E>~DNnMNEiT|YE3+8^BhE4maRQMgWg@^$ zxe$jQ-TU>jkfH+YK-lm5ZBxETs(O+5Bx^Gw1+jxvjQxBUH=jO{uH+{yD|_Y60qLLD z^y6vcclOFKAa>ma4|-bh@VBmD6;SNQAEmGbQmk%#P;Cmdr4@z-q1f*}@;0p?!%m9T z4T!v@`^*Uvjr_AyAq64PSS>*AD0hQiwM?aTp+_RmeEffS`^Lb!zirzzw$U`Ulg4Ol z+qTWdMq}Hy?Z#-V#*JBhiPW||wWntgR==T)m9{q$Ldm<%Ye)5d# zID@9;Jv}FadxC=kg6Ux>?c%#ICz9zYV+22YJCJ((US1Nx@sK|Bx$x)*!0$-JlBwwt1oHG^7=6qyRED7`G}bLyXEVJ4Y9t;HPN! zn5T;zAOMs&g`;Q~_>~kPa&kJi!ocVBXb%%g&ju8hx6UXfKokm( zU}pa}p7`3c$IYGzL*@MX#_X;&EZ)!7l1{BVpx5!mJjoxPgf;&sZ6q+s8^!@k-Ap}5 zy-0mX{dn&lv4_|YaySO z!u@OO!GHgs^u)jO%|CudAU8NSYpuzj zSB&0`@VT}x3K222+7eQH!6q?sn?;*#{OOSo#EVB~fJjCTSQ|hf z=YSP9dkb#yzwt!sren5m^n+$>Vbuu1hc;XD$n9nhff<6amv00fCu31`M%L`m_{85d zY2W*={a^xdgMUMC0@BxwK6xJcx1xvYLmDg8qC2_1H>l315gTMuD)(r!m20zr?M=lC zBOCwFU}A!7!v+h@fcl?-ti^}!$?+~MN)cauF58!*5{xCF+hr4t}6AR;Uk5fqUWgNlJAl?VL?jUYhU@bdA`;hV1il*ZJGMkxkgQUMML>3JMBZYAQn!NpUi!`4Z4F zYNZhHCn}xSwSSfk6z+ePjUknUzYEBT|K-iF>|ZEF);6dm~5e24dD4)eUV4Vy;8AK$=MCRa3q##3s zQJ~|Y?F+m^{n9_wk!tB5t67X}caUm6w@j6cXU%>dRRc0Pv@7&$)ox>7p8aT`KKS8))9&|ZBdw7>fAu5K{mG_jJe63AjQh{zI|RD)DdrC3M9@C4SNCOn zptNfP8)@Z^BuEu~l*0-++32zXC_DU@LLh%vk4N<)#G^NfZ)WDdc2G0=9YSno3JUZ# zbgv`&2}pY$@I0PQzS$uY$m&6z%+k!u*xTQMk~g?MRdfMMtcN$^mN~-p&zuZeA{Jn@ z{?EU#Mdke4JpP3I^vkyr?66i73mDgH|78Sezj`Zo{N$&KWfYx+M8LU4&R&cgRdkJ@RY|KzGQ*caZzrE8-)6{mk;@ zg+Znb?`e+M^0x?qqI_yXEOjA5&gfIg_`5d1h2ZflNQdMA< zI+r##@W$GDnVDJMTIdIWHHAN6v|mDl1D^beAo4}@QhE*c5`PUHRN}u>?Vs)lQExe+9LIT)^mH%QJ{&yw*Pb+eY_zAZp zu6S~e$U-X~4x)hXocf+`&uLCjGDU^#Cu?CWh0e!ATrGLV5kBihuX{#}iszW^!W3(GlLOZ-1S(j)BE z_^;jt7sh|G3sSMpk(E>;>8p$DEb)L7#RHHUg=P6NM9*lz77vXqk&f zHDeL+Iq6pyx~=%!c4XHdW_F5a(-E}Rik56hvC~t25i`onzB2eJCF`t{Dl-mH^ z<-fa+zY9nJ*8jw5WN%{;G^(eYDW3O!RA3} z$Ymn_s{l7P!E}oKR$uyRER~R^hW9%+r-}A>@twrn>wctHqRY{kOZwe~p++c)j_rCI z1&o9UKcwnrdh-!A%t{yv@$?^`^)jK8;-F>>Dy6LR4PSnDN0WqyCW*k6ee!a~RA4A7 zu}1P~CpU0Jt4+{Cv(A(xsaiXQ|D8B080C@`RQq4=zxyv|P!Duu`QPp0zgk3GF#nvK zbBd?uY7y%%b22%d7nPf zK@ zUvdfW#=3{zt=pYpxw5O%Cn>?VR=w7XUhL!?aqU{;qF-=U%JXSm{1)lVzEtsRqsuob z<_qmIs5@8>^zjjdq9u_y0&gC!`PLdtvpS6=?cwg*62M#fVaj{p;mb!YLoL^jZa9k1raus4_l)AHbq< zv2>a~U0#s@@(b=Rue*>+DC5)Yx6s|R1^^Hz$1RARHfDs+Ty;AUd-S}0Ec+F4c6IUb z!hb-oO&;56H~5>sy~xi`o-Gq<1rSbLybtvU&O0Sa^AOwxfdPBv;iaDO{Vvp0Bel#xM z6#1U43^uyo5lbjfq6=SLZ2FpS++HzN*`)B!1_vAOipLcaqyU)t+ytE1;U8I&PhG}? znhaa1v=)Y3?QUpMc&w=&ZdNh*r3zi?%0-6QM=jlnw5nm%S)Agli*#`k-&`+Bc1k8C z^PAPTK%Mt)eLU>X60KhX$Y@}8E0xhxh~7M>7nq?+d!SP>&uKw+h@~0XAR{VJW4lj7t5p4hS<@`;uK+4qz^Ur9x7)t~ahSbtEDDnAY z_XKzvpHsecI10~~JN&N!@g^fezj^6MTCN^i}XZY>0fsmrBA7d(yA4rMR`O{*B#bgJ!?6~%|_l}RXhlg+`K z%mBFsvPe2RaV&*OKxsH8cf90YK{<=@RD&~f@?U`iW?e%z>z0u|U#vb7P9stsu$J6N>y3y%l!TY$WN4+f=ND7+6>s`2H zr~4$e8Yr08^YK~?5Zk8Zu2AvNS5B5}z>ls^2bn#dPhmd6_0Z>x8vzWgPbO)PyUR{) z3%e6ZS=XuHe6BaIgHo0=-QPr_apPnOR%E62Wn5CkM1;dX$FWuE@<$xV5+NsZWHbsr zyaR*{<+1>R$u`$}LLKfpHBHw0<5}6Ems?aaM`NeOuLhA?gFRkv)H~S#*G@;9|Fo}3 z<)h(1biGs|pMN4sxmsQn)t+$6dNfbxWPMivgwGrzLiBM8cSGRtVH8Zuq(?w-`|~GU z9zSPm5k9X|(oR!OpXc88q-;;H9PB5Po+x)^2@4>b;qM#3q}82-_{dlgGt zsjGefKssr^HZtwcW3jNUq3GdkD}^n?mw9)QzZZ1&LWy&v!E9!qFh%nT`9kd8x{%Ha zQ{hEbhRPuyTz7Pc>pil!Bqrx*qJsThJHi|N;|*s+Ch(#mrycI2>mvq#-B*}HO>^)m z^krul=_f=dsw+ix6e~LukZ6|+Lsb2C-AioF@$|T z`-iSbztHf0DCeROR>^q%qEh)q(nM-mXt#RLRmcnzeFNca&Apq|`74>eAy%pNCaAk4 zn%%4B>}92oxTk2mm6L+whx6aU?z6*1C35)Bls#@tJgsh8%}shs$s41Xg)TkbFOnos zE+vgHee$E77Lg#GVh~aXc>{*L$D_+|C2w5~F#}w=YV6w^-!CCxS0J+-lp^_Dx6VSo zz$-T>V0>Y8Ep2IdCKYN9z^0O`v*sN=E^)u~H3?RKnLCgbagQ)7@gj3lEbg?h7#$O- z$B!NCT}VCz+BnsOJA+pBr_*K<%M*^t+S|&w>gpi} zY#LMezUAnI;;AH!uULh{+f1&r@O?-$K=skI-4sEqBR);X|D-*hC)F z>qO1!aTduo`a$rB&9>sk=MzM?TWUNO=?gRbY2IcQQ-JE}`ZFEn-En9DF-QWLOi(V4 zzldRov&9IBL>5z91~;{Qx3t4iR*OC7*@|bZBNmk$-Rc7&1%*tzIJgE}i^K{2txPQa zrBHH}Cc~6`mFALplxDjy%5^2dh#3_JAI4UoSoB zuQ6Ec&7mgxFFdq@L{E+fv5OV8mqB;deaf7>F(w9)z2(3A12`DP3H=W8xJ*u_Zyn~! zoX?pOd1WDW+U%bLdLAeUkA3g9F)+q8BVFwrI?bLpdwk}$Ii<`mTvsnj4&<^*uAgY+ z-y0!Htv4JKc_duFEc>EUfzBi)ks+ie5pCrB6?GS3~lekcB zYXy1*(^JIKV7@sJmr!c3C+Z{FJQi{_n1^idSh`_vb_(h*bs#r4Ht1ApTcEm&OAP+@ ze5cf0A-Z0y+@Qm!S#1)e|g}hqJXz%wO5UTO`7T(L9g}fu?6iKi9S9b1fM? zZvW7mOlE6sTv#^YWK{HfhZf3a%)CzF?SQ&VC6){+ zJ>ahutSp=;4Av(VX%*@-F6Xt@?Tme$%@>Ox+IhV(L`I6BQgOY z3|+>=dI8*^3W_Y>k?B=+JIYk;{I8(m#DQ7QyH zDiVN~+^TDPxSjA53S1cp?v=CVI$J2tloGv>mO)F+anuMYCk4ZDtV>Zwp!U1H zM-5n%X7{x7J!)ohFaE#Mi+{#}R!FPA*={svR8Bc9uoPq$W9v{*kR+!UqtJ#g(dAGu z{%p&RNU6zFriveJF3^`@-k)LFBTfe2vZC$({Oqa2?FPG0uKL5?jz4T$Ea&;|RM>n~ zk8YJ$X3jVt_*>Y2B6J9Atv8siB47Q=mnvjUXi_k2S*g`4bFH)e7U_&9(baZC;tFku`9KIKw#+yMa9LQiHURBOq0rhHH| zAL9w2Kv{v`e!yv3Ii#XoLW15529nH=t1!j;T?&P~GiQlL=GY{|r!_?QO6!@Ys=tN< z8NBGagd~L*?{EQb$KYb5V0Sp?g2im*bGbTg#k^2<&GsJV5qq_6esR;d{35zSnL*RG z^Zl-Zu&2bBa;dE?(#m36N`$s{%Ii0oMHNab)Z>k6qO5P;+AZ9%rV}U01)bR23N_I( zhm%!SiWe<}pNgY#L=Uly6Tf3a3E;9>b4dI;TdS8vbx~YKV(@IXD=a5-l8juW*E=jW z*5c6V@DkW&GujrnTq;brzDYIk9)w#$O8e+rPN=s#G^&71GXFTQV!}^3%UBwDj$N!1 zp5E#f74ufDHj0{ed}Q`~Pj0_I6Q8!CUaaVX`p^{eyQI-; zNR+mkD}w0qGFfxg6o`GNwBX%Gb6c!72x$IEyQMqcie||e)U$9lo z^1JXe@HXDS-}dd_p@=bFB;fOz{^dSbD!s6+-00==h1(It`{`MvSf|HbWif?bO;Tg% z$f?A*a}VMvDfsj}3d%fj{Ilg!KFLHniySg0HiUz-C^Br^#6zBS*)@}ARk@4t+98pV zMAs69OlD%pJ%Iq4s7#*EI+SD=oZ(v>K0sLbZU|*78}U4#d0gaFTyqRZ8$pk_7XH(ObLtJnVAubb0H(f0ZlS4vrJP zvrJ!WQiUgDFWUdqaNw()ientK}Zb!9c)#x z20$xIm(HB_?&&I&9CNmKh!mz#xLQu2bsm$=Aw!owUm%~i_JHT^O$~IZ$$a&}&o7}yb_y_)ec{+^X-D<|m?d~Ecmy12lV6@5W6|vSE$RuTh zT}<>o6Hsrc8KKC3FWSl?XZJx|k8wfWo)ZfSr!f~N7_3`J~kq1-JEd731H}sH~F4nr-IAoeXMIxVbqO1hZj3y^U9g zVq6(3z}|N8+=J?RnM6}D z__!9X7Km?#ht1JRN)77p zquvRLbcy?5Ku+VZFg)g~Cx}XC3U5a`jizjOK92`Zn-QjT@jqvi#>CZhAm8kAY0-9I z73)MHEb6#i4>`)}ul_&^;%GTr3nc8z6gnH)blC3``|}6$lFB@@D7HB1?WEeQ71zP< z7*=?v)teE=zReA`2I0ezdQsd>x3kYLu2Yq044ZO7xQ#e!UKE zFxE~iodM6o$VsM4mDmx()?CH( zgmxy}=Iir8ZvCPb>=`syB1b@KizbScpRrUky%YLC3KO*h>hu08V1uJp)c@k9@I>82 z#k*=wE2N0+LMTu9>Pu64d|&G275KYQ6}ztv)?o)*uaaNV zX#CqXA6g;wIaTwp@9PDh;3nj)xzI!{aMyHkm^n>qLm}3%8B7b;4dV zZvDNC|MB>4oQWI9YH6NvmOwW$sV%}ZVZ8Jc)9LbFp1PZUR~h);_UZ>FUd9;PF2)Jf zNOyI>s~4d&-FTqx4Wi89pExjOiAp-)@SPjYvx*|#-+(2n)L~n7_}>4rUBrQ4gQaIV zk75PBuagh{gNz6o?|P>PaeZ~#pboQN(s{9Gj%?pONOA7pSU@?f76-OAg0o=d@D1if zo=e6O>UBV(ZvEJ!Jwkxn{Nq+4w+5_aJO}otiXs}U88Jq&3ie9klX3EHYawjbFRJ8L znc3W)w5xF{hlP@HnyjH?VGP2})f*yUo;XWZP61flJrHT0FXwb+Ff@vPAva_YXyqDh zT}|TX&&YH1cPnM-F^(r%&`ZvvF zyDB>!zGz7P9{r!@RVQj&ms|ZP#}Fu|SxgSLmapF(oEvhvW1cP2U{Z5w$2n>qg@T|3 zsN%`;f&hw$1VYVjLs8&;U$}aVS4$E_>G1H#bk^*=I~TZ^HTYe(ddtUhf*JTpXT8dp zCPf<@pYL;KEdq}5;a~&S=$~WBC3?LChXB2#XZD))_^r*JkLiFkIP>FF3nt#oR6fE{ z;AlhE&)fEB@+KmQnJm-lxE1dFb__b(pf!HJowb$sokEhT<%GRzBhpU}TL3Ye*EL-f z0avDhe3Ja}24hmH7n0+AxjDr&I!A(l&#SP3VoLEQo$hchR1Sap``fh(SM3Q1!U=O^ zHn*1`3W-Rf4h$YSOK=Rb>@yuhDcTn70!?omDBMcYH`VEQnFt%rNGXx5aRP^&i;Z>~ zr9LtH4VyrdmAwBgL;h3~P4ONi}w5fbU*zzSbS;d9%>hL_>b9&V&wE4kCzPrC@g>E8}{Vr_o~Nl{LtzpieAJ{BY^v zVskCgZGRU9Ta;^LQX(G-qhg_mFCbC-`2s$T&FqLgz3F?wBLzkr|89O{D>>-p@t~99 z@U!FjJR_HbaZe~Hjns)3Mf7JnF|IpYX-~P0{6##^43*9RrT0EE^Ovo07Pn>0w2SHP ztXm*sEkD?Ak;o6xa+>c3aqcpCPfOqaveawd8-n1VzJwo9)~CYQ}y+;XvjJo5V&cKvkf){aHc_s93!BeW{r z9>ww~+*`hM4iDI^5kF6)^O+~9p(YB4KGJLTPq7h-Guw_@$Ue2|cjCE5aUvdiT|N>^ zb~*(CPq1@kZwKaEzlxZ2b-4RsTJctVA`xxBQ8%2fbm+7L&1f=xG>7?mBVH69W1J#P zfK$$+BD}F%!t;bhX4uJ0UCaDx^LT6Ys9!5L`3{rtRe)*#`>(yI(JnX}etN%RwT z9vvfB_P#RBe5rWP)<>C=A4gwB<5+pLh}ER}RHI4}ly(Gz5mZ<3&+{IRB7^N~>+IcF zk$4$#$oi47Au^OGpq?Qv2|;$ zy}58toUuFpf$zmfx03_}4i<7-ZP-L2A+rGv?>sE}DAF)3A%PyRuLKP0Ap0=Y)QZVT zVbV!B1WnP){e+gs)vwkBs$_^CmhR*hXG|{2u5vmtD_HFQ4wZU6hPW@;m_x8X5&sT< z*XbTM5jU|I=1PeacS`~6a~9#nVqvjBQ4|gp?S})nMA+GH5vO88YZ0-`Y~`9&;a8Ff zajEw#@~MbSHlHA6&->jxXM-yZLMh_($DW@|ucxCVM;@*}m*dN2>>VIYUW0xdot70S zcS+-ytMK-a>fVh3*Cf!ANo0rXSB2B4i?MY#xp> zCyv{z$zpLmAAcEyVKIdmRp!z^mfzDsrI=VcYlvy`V7?jV_m=1V4LLEW@K#;tc1)~I zemtoqw$U6KjhxnG%H$R5MH`t{Jg~I&x5EyRI6%41ZJQgO?aj6+qSa%GFdU4gncL`g zr_)JPX*NU%xML!=PyD1^BYyk2j?$qXJ?QK%P)w$n4vJphIE(Qtsa}292f4Lqu|ynb zACi(6Y8GNq4>LopYdJR2k^>3Z6?E%hA*k$r;DXm9Zn<$SPK33fXPUJMcoF3l8e|QD zgI+eNGibV6OOnxGJakv*ba9_p zVXn%hugRVr&QK%oYmA2IcCjfi9rf$kjcY43m^*$M&a{0ruvg;B1ZwTo9abLT1XKBD z2S7_+A(wn?NM<#b&BDf$kc&iqmI&!do20MQXhQU7(P=%^D_<}CB9qN1P|W9kuij2b zIWt_R^iI*g2<}*!I>aEY+v83)=ipCfdxR<~2!KE}Vb}Gfz=7;~kDh;>>$Z@t!d3B} zma{7}1q1t24?l=%=f6ATNvSY6o)t=zpbA==Q>^%p%3e)j9QOLSLz)AU3Rv_eb1fsQ`~N z)IQnAMIwnli5nIoxiHN0=@r8rV%YR7ebnZ1#7L?y2!UiHFG@9wP$Dmd*aenar>>E6 zV`0J1oIzsS_y?7n|Jr<>nWtux6^6-Z+MrDLi`&JxJ?RxLH84C}H*=kdvkIMvCvt)e z$NpA4-^&O#OG4jvy;q337Y;{&5^WZX&!B7F#NcSWfSm$_o=wwCjP z!z5m9)EZ(sKtjMOk7sV;o&lLC|sL`}qhgrpJQR zdPQvh@;I|CXd;Yn{E`5=S=6+%?|on~xZl682fsiw&P1;pAiU3(rxOS91YxDB9=|>8 z8Lj6|J}NiPJr-lLSU@pZK;@(+5uq~h!Q6kd~q-r);cQ=|xVe%A9%ccB$Nyb??VadbDK%6-x z%P94kb4BM9FpNlF#!*HIvnB?m8)ck&Mx7C!4u-{$ET`_fSvctd*MdGkj&BSE6|e8g zK$;oCST$dtl|g3?S+x7B1O0tx#wCXZ1596IYNUp7heZVmNbe5o8^!V-|{fU-$i-1iZU4>o^L)T6M z^6JB?N*yQpqeD~k##B0IRO?A&*$1VSH&pxFhHm=tq+;iORooW)A09%tFrgwkiNx*x z9S%bZI-?DsW`>fYj1>7u^&vCVvctd# zz4?!WmK??gD~^yYrorMy(zVMXinr!xC*7FYY>ez_k!|yT6>JNNW-M_V7c1%OIg^bu^Jy^_A`3H|Z%{ z&o5JUWmvkMx8*`RCP51(8<7q{1gah{&txi93-ZOHsH9Mp$SA+NEm#5YI9wFzNWz60 z6MYk0H+Matvmc2`28R!qS~nSR50O~^%xE-zBT~nMi}E;(q&8Y+1ZAQF^$lo=`fpx@v?_kMP*iUZ1YCGn+aP?`y4i`fJ*R1M_T z#i#Hjrfi(R2I-PWJnAV46)p{@{RCZ}OyhtkxXJ5E6<)rH9G1q(OKq4uUL8^bk4BaoNoqtXm+~hu zdO(9dQSBu-qYkAO>auSnwmP<_Ix+<$4mc_q3+ftx{av}{^hdZO&`d_C(NE{1yp_4` zSlW(J>b3^K*J!syBI5DzC!Hh~Yyd*bj21g+KG+}vl6Y9%&fa6t@;bvI;@8xf6`3Id z-M%%U&iIFqE{a!<`L*B%O79i;zYMok`PFK=GR-&1ASa2qk7tCP z6a@zvWAg39fggBc*;46TCSNk>ofEqq8;pOEOBmvkeD!#Rwp=JHlZ>yX%e-VF!?4yi zoyZYr%23Ao-d%j0WGPuC9!IPYUCCMGJ02}_9p;B(6oUHv{cZ>YyN2GNKd9&vPt-Tq zOC#m8!4$_VkERr}T53GN@qzI2X6}ttPY*9}=^Xs#d9n-2b&1|qep&!TjA5n+{1Om)=aMbjXQqPu)m(uIqviNZ??{D`>W`^69;@Cv=!BXg`YWXhXi?cIe!=#s{f!25V4y2$x}!_1m#m%uGX;;#H?jfNT74R$@+dFo%&mR z;8~$gI}(iO&o00BR`u^spmD@vMC$MX(ywx_N4H+of}Bp*@?Y6kA+MmdQ6EZduQ;jvqv@C6x>#T`1-J z{6MC~ctOCHqBL6$NMkY7XAdaa`yl_IL26MQFprDhGvV}c`9$7?)0}7)5b#;XsNQgv zWkM*EW#ac=@!@e!1)x*qHqo@4sfOBGn`4VAA7SPhS1Xgj2%0Zlid=ES;disup}-}% zeB9#s!efCy*v!?VFc-OJnLg7*%a>2sQu5Xzy5W|2@`IqZ?+UyZw$(hHF{Zf9MFo4h zE|GB#Q;}03{ZDrw@*om>)6rT=VsL#?b++Ez+G0UTzk>jl{~G7@$k0hv6cw5?eYzJ{nPk8Z) zfd-`V-waw^LFp}a(&cAkD+)%tjl^eGIdK@}i#&;V8_^VhF@m znt@(vddnz8euwRj%H&!Z7-jlOtjIoZvV>!hB){fW_75YXR*Fes(nTGwiO+@5iDwDG z61(gyx+s^cW0z|cbmQ=7eUd?AS0XLL(=`g`OR zU?+)3AjjOF@<{`_>bQ4-{^8%#i!rh}1!2EJ9chb<9^RRR3OUM*xwK;k!qz5PsrCQ!K#buxKB9V1ZcN>+tbnczNCl5{#e zk#$>(J(-lx--yy*lvQsw>NF$WsgE}tFqjy`5{RCGIhQM!-orQHA5O8FU+m%@x!gj(xpN&-tM+{X1j}!Eh>{+up11p zz*30M2Q1`c)I{dI^gaq+hICvW4G3x$PcXVH@5+@jY;B;wBc5`}zZ1@eN%T`W&X2Iv zt`cep_iTBWU0m|cA_*d!IgI(WdcE9d?aREVJ%)uu;t(_5K#P@`JA7(Wua?x%xL}z> z($gSSDz;8D9}5`01`J=F{+@hgfmw2sFIJi9rz&_YJ(h{Et^|rH%aKqgZ3og`Facr1 zH=)dihAWMy!}kJb`zfM^OkSTaqw}>ULdU;53T`icHzqV!uToE$RL$guMBbd{TV%H& zJwPV+X*ikORdUE86+?f2B}bJO*s^^=^w&ht90Ap*fyk0V&@%jWt>-b2PA_Whi7ef1#LVF8!TowpoH|)mpoi-ZXXbSqy zn%%wxyZ=tb2Jf7~v<4H?5`(KpwJBHb!av#yy5Y%Nl8uF*2FnZoIwshZ6nnpMsHZ210(j(C?P+-&cFMmckG?K>SK_xE$& zGj7Kueb$JzRz7nn**VVOyNu?92hc*6*(!+SMl>@9ib$|Xl*SgVO@sPPZwQU5TcHqm zvhknal^pEHl}qV&Ivxo82_piDklCcw+sN?vC~xZR9fUnwv zzw_C=%}SF2pMYDqI#jIQaeu&(oSF9cWZS*f+B%vBMjrNTkfVzn-o0AJE^lGQA0 zoe%!9yJB{a`}w7b|FVfjC|zc4Fur&F0O%IlOC%pUMEvK~C59hO_w(;Jjb?KK)QL5< zv<;HMmf)3sFAE7R=B24UhaJ+5tsn`}Hto}RSA5`4{@6ZimW}E-$j$O(uYWL(Yv>kb zFk+fXWY?WArszqKk)b<%Zjy^9boqI>TJn6!rQyP;yHrAnxwLh3CZ3ctIs4G207$P$ zbmzIL)P1MjbTo@OBT_9zE{89nK9k4cxH80UtLz=2uIPkMhY* z*%b1mQ=I2~QtRJe!D(i^25Dv|d7z-VjKmQ0>x;r??sZ;Dm5aE**LeEKjGkFct*-*L z0BLiPu>{&&V%moykp8+h^_38JQnTgAsOO8V*llOV)qFn~GL%6V+?`Bt*VR#czefLt zo)38x+9RAjW-AkVjLU1O*-W<@Nn@bU@pcFnmBX`%K3QvNh-@HDWHOx)r?AB<2~0tp zZZ=vqKbfIZq!1i{zWz00fEF^7=2>EKFpAgs^2>HCmMoCg?rG5kolcZRVtN$1BEom6 z$8Ok1!zfMiyoWmphPRUguCLap2GRKs@#SyH1A?pq>ZS_$)USDDkiF>MQW=a@14r{& z38+vic+DjjJ+(2D8+<~n$7(4v9G-KS(KK$Av3t3?Wwi80rqOXvB@BxnAMyZ{=RW0{ z`I~;FEr$(nMVyYSId)Gt3R5Lf|F!XKLv=`{Ms4CVQMEbTv!92^^qgE2Bx~Vh9oTST zM0@O}=Hi7G#axyUhf+p$)luKhO}$R9Sva9%C@?lOC^K%SU1^nXMq2DDrsUk0Z?3xE z`lX88w*b*hIfZd^!D+s|+?xX!4BN)WtWSD)x_7paz^ZG|@uG05)v49+u3GpM0 z3GsYg7U)X~e1wMLirbb#qwUB%BGrS8K&+Dj?}cl7N0YNn0HP8KjMj_L;VZMiW<04B zm2c}PQ;ged?>C28baZxILB5|5Z-Ee1x}IHg^ouUs;9Sk;#NV3q)2=eyu`h=Jdnb8x z9!=*A&c0v-u^*_U$y!6XLCpLV(l}CME9edzD4FcAv(pXSj|Rlno=bCsxH8m>VvTrv zewC?Qo~Zt}3k{V)(RL7GNPh5(EN?E8Unpu8L!AtbhQkXg5Z6e;nk3n0h*6OSYdIiH zF=tLqz)!#U>w<+SRTLxaVpWPo=j5NTM-5;9w!8&6)@A%Z#+(5OMp?c2vL&{v@OC&W zP7#E`X<=Ha!nkaFYCyi&)9wnR6|c#pQYLEgr0iWrQ_5oAh?x|*YzE26db?4KvOvcS zy~#QiNfrGZMPz>4+)!GP03L|~2rF`;y<9e%kA>syfQ5u6@ucQZ)EDUn@(&>#waF9- zs2JRm5ErWvN0b=x#FAssBee=mk+Ed5f&rtG=q?ubXYJA%=oDVtWB2RZwo^5+1lE(F zvIdjyfU-rAU@;Z&4vhEF?)G|TkQ&S&(AA>3}%hz8DZD$ga-=z0|S2yDQAWKEJGyBMXf~PfI4A$RyjIt%U>D!ByZ4`H_-KA#opy(A<;rqVf9wTiZ%;D*utvc@&1)swo>( z4wAFY9UbjO|6E97WGejWexU+Yccz%s6m9y_m?Z%$&)>%(z-C&Ye;&>w`I=@R8P&`mb}mZH`*PpMhqPZ)wPKwN*8il0|K0YWKE zh`{IbOsc{!Nc8&NgXH}14pzQeNay|d_=7r`M&d6!4gW1fk|*XGu$J`a2k-X$AlMtv z0tl~~asXvL;mKNOwosC2wa)R39Pmd+J`<0l{Q-ZX0}QhCxB%D5w4TIk^eRG_ z+cYPvxT+iJ2GdD?h6}y?^$fLSDtaU!-br-`>wUHZjiwXJ<`h<(yO}llh)^ z15Y!_Qva&rzqZYMT9;8LTSR8qFzA2xri4Fa`)%aS>4-&`DP(SLjQg3_JTMEMKUxdJMj&eopY;HsVA$_WsB632PNf8*E%==_O%9Jo-^$x zH>Fmg$+L!+K&juBvR`HB9)!h;^}u1fh!x%ks=4tI#@Lq_q4-Y~byqu2nej3^SB$2sk$X6nt(QhMdJJk*%G)edei8})jG{{gh(L%;Nec+$EcS6&u|+d>bo;!8(XczRe;>@I7F`+NxK`)c^DBW&$>T^|z8q|q+Z~i24zK(Rr@U83frPH|C9g_u|_=-x&D;6#Q zKHg5rl(7$T`jwJtQ9Op#_~rTwQc(n)_}uS0Lq$M;gj`sI|C(#Qgjn?}^-~W9oY@FB z;9rweO`N(j^nw*-jmvH7->p3>czr;Bo+VXmI(@w7@i z6-OtH5_T9!P=k5Fkb<{r{+C!)>#unR9Cq~I%=_Q^RcH@r4)F*BgV3t&sFS5co_42; z?k8zE^{?-Czd?>?t!kwky8nprAwxwdwaR7lgp%w5pMu4AQCPFV8PUbsXbm=oS(<>` zi`JJGT+-v|J?*Q`yWbQbv)scpkFA)#lLkUYsq&2Yg=&3efkt9!k<2vm5S_$>r9mC7 zIh+zuKXUI)CCp{9mQZ79HO8DSQ%(&8-4(lVO3F>C7#jvWI*h7z=LcVpSMHEc#jDm4 z!l5v_?0bQ-{Xiq*mzVsEN5d+DIL<6Hg;+&&h;D&*6krtptwOgkldQ?=HvNJ3iVHvmP>Im z-aG2Ar-82ed87Y0(Cah&jUa2s>jjDO9E1yAJ{4ICA1aj$}-f)E1~asY6(UNZ$4gn@6Eq#Zo5qyt#3* zEDt2%3N0ZXN-x{}tGiPl4rPF69K8&+;5XRaaS`mxqb*^3znF_K>5yS3o) zMzartbVjqhP6iIC`2w%zSpeKI8@!mU^%o=fcbmpX7B5IO@j&1I4XnZJZam{rG|K z!hq;)Op!WkR9V+jeKClz90_lsQn`3ZQuy^YOJVDHscCk_dGL6J(gC{rqVZubKeU8Y z*g25fuKd>AqTw=vS3H`<1@r4R5US#SBw|zH0l3S8`|CX^lMc2xIyQ2@;@SN$^h+aw z@((p*G!8DV0Tw>A0E;UixBWmGz<(_#XGs6Z!MVtwPu91?2!-JT$qbvH`^A(Qyw|B3 zT2_3MvG{p+G~M`O|EFTLA`|3Y5TQi7+x|-7@79Ur`XTrb)_C8CJVu4%12(H{rJZJr z!mV=1TeK1c1tWBK)Xx@nGpu^_v|-7Qf2+U$DLOCv>A7$Vzt$naj4l*!1Q|JLVHvqF3h3!c zu<`@Ts{{rndBSNbhPww2ZK#F0eLZdX&#j|(O_*lOK{{@4$8uqJnYglP37 z=gFM4Qoj(G%N%R2$F&Naboyht{4O{A5@gAt)yCJTy)oR)>b`G#4*EH$zMh9#3S+Pw z+ekE8%LY<;KQ94=lL}sqby-?dju-7{IA7HI3}6cphd&7uP)<;PhTZ51A{pQ#H24vM z=#GK7^=5xT$`pU4=Ib^Tvi~cp;BTv8jL>Thu_5{l{!11IzeX<_uy*H?FQA)LnDoMn zT?<|)MG?d-qw^_1_33_0koa<8^*ca5Zfqmc#j$B? zW#-kbrG@jEhRZyoCJ~xOR)u`uppzpD$ciJ9va`xJq9LSx(M~TgIlkDP@?tj+IoV+r zS55pvtM*!s_6t0)PZG4=9bob9;D&!v?;zzt*aDQ9oOF9N&QP|={O{ODhIk&?8d0%3 z`}eW_R+sbtr;J=>)Yu%=gseJdL)1n3a7FjoCKojR&651jlP-&RvVc(!qk_PeTAJfX zZP)+4Ofa7|$!hlfLY28d@&0@8#>^jd7&pc)24rFd$vMco*hW^%!!9RmjRFAd4uNk zKe35Zcgq`&q{)kzwVDT^P+-z~Y5(*rGy&rm%Jv(R9j5!F7rKVdUqMJ^a*4TLJhR}p zV9|s1PQ2ehngVBem*SOh^j&RjsA+1shLB;k!b96Y4<2}$Xuz*8omJie)?NhmR`F%0 zM=+nY=<3jx&0ENF`dYCt28IN~@Db$~H@A&}NOmrgRTOyZ2#7#WRk+D|)U+5ogL+^M zFF*`(6ZL(zm<_}HEk@XbusziP4SoSRZAVJJ;)&XhTEb~{7* z1?Fb-|2>0n5;&tHD2KrI_7m&{vh58H#0cu`J_>K^^#UJ23fuDYyIfmP8%b#013G0j z?eDK{fM5MLlC0;7@?K=Z7?+ul(-Gy5`UJEf^yFjDcTf8^29PmN$ub_XR0+e3^fEc1 zK?HBTfdfHEa_abOHW7T_9TxOj6$i<;&BT<~jGz*C=#S=1i_q%W@_s=rQU8Y8O!oRA z8!pbu3O&#IiI!^+1;D}ZriL_q*5hfdM^$`SGd?oZWzAcb+bMeznmFNhKO-z&ag3)E z{hZcDC<2NDonE&v*e>fm2hpz(ZF~m-QMLe#voIB{N-nFErranl`m(LOmDc`j0OSA( zp%SPIoQiJZ7<>Q$lh{&8x95pTwQNqze4P32Jy>6W@09CyTgTHVSA@TSLPfAo;tB;r)ZG;H0ZR#Oeah)!UHF90F7lUj0a4C=UIdytkeBL9Dz^kwG#_0|FDt7B*H^7`y&Z3-#D1!T>B4Tc6 zvO2y>Vgg&oRhT~lssQp*(UqbKKw_d^nioz;Q8~yj0}xY+^;JQvJmr9{GXNOM z|7M0|@?Ez$w;g~CDeO(t1JEi}nV(kxv1zA%6Ze6Q`Um!j9K2lOp4F=k7%muA1m5(_^@B}*>s+f1)6NrdfQ z-#yR0xYP8u|QeGQs##j@+xw*d#VE20x-3xvdDJW_G4WJ1qd)f%^a0 zRy^;f0T7sYjRPR;JpG-z2p~GeSRN7oKXpo0tvv6b-CF?Vvs_9iOcyH)U^N+pZIF`* z7L3ute#2UQ;+2?5P5$3Qs;9B%v4_)>G3W_A9-kHw39F!g=_=G?9-+?1=tf3;8AvmE z>g8J}N+yF}ZM_NTDX8i8Oj^tUdCO$y0~VmBL2Hb&cM*hM&q>J&%8>ty;AjMD8D$w!D1 zu^!S640tSyvl|rJF4!+gOKnQrS{x>d5+uc`UdHf$D2Hd=ueR9$HrYdK;BPMN=Z6R4 zj(3188$gvrigz=NbkqMYrz$S{E5^UEfPavIo@l4=5YqoBb-??A zkr{8SI345v$Ds4(f?V|YAGhpZtdMSW5BS-Ddo2q7pGW`R6o3IX&Itk?iGW@B-;AMu z=nvpO095b~L<#u%w>koM-{*Q{XZv5T4u%>29~?t>uuPHrYok%x!LF{=Nt?yyK>&SM zIPKp4d`-|pxI6P7(jUOE9HilB>E!Nk?P$Dk>aq@2ytFf{c^98+ucd17y=Lnu*HLTP zN;NE-{txr31wh3q$7#hG#aYEUhL(G?i!hXoq!VMV6F+{J7b>k0z;gborQLgrdgcWB zfMq_6)nv2oZ$l4@m7AWZX7s)~+rCOCQLT!wK3l1UC{eAh49wqqw0xtjspGDbqSLIi z%(4Ex-Ho^cMK}6H$dhDL=kXaI)HENq;BhOp3B`+);LxT$I&BJ@U<&GhA2(+FGe=H< zITDkkrTgasI?p=qbw9!060e568$PV(_9JboXW+s=m^e=@NjQKBsjSMv$%$0aV)gl` zq6hx++|FUQHnu;u|7n9n@@>LxqU}sxX-E2bSV(fn0g$?n?3@*m&(D=)AxXutuG77I zE`IKV3GLO^TW)&@WbXGx-5s$;Pj9qRYFJ#IzmO?*>Q^!D{1Agw(%zCKj14KaixVsD z7*g~M5^K5u#rspr`VJOH3hV#c(>c+!c_P1^}`F_w-Q zr@f2=wju91lX@qP$-?T>JR|3RKz{1=GRKoTd`Hb+LRj)h$J z&l|1%*BeD$$P5Y&iVjK)$_y%?A=?sG;$fPx)YI`C?CZ1B1<1TTK#Z{a(Yr?Jkbxb| zZs95(0g&zfrSF zva4UrWPLlU^33)Ryz$ZczXp$qQ}f#k)C;tWw-@LalgC)-mT7+&d_>%#QL{Gf&-7Nr z1WpAw@R(|I?LQI7K92_eoKQ$ixXaV|d8n8!%SoNU0<$%gB!sH_FO`oMpz{4^z{h#u zbj6~@g$enZ$EC*Q#+CA$T!Cqau_~#*c*%wsr_xO31pJVI>*A9!%kRO$Feo6fl!%Op8D3XWw7sie;=70c%z%&6GzCBV4q10@L#I~{RQ&dw zD&*Ht1`^4*H^^8N9$ZnpVjO}smHX?JTXPEh-IDM0C94^UIzGQ;)#?vaq*BA9jwvVF zlKARXK~68ehrXHp?ac1~Y~L;P3lx2-ZIW%WZHjHGZL@Gy?b>onFx3qSe;EN9NyB{X zCs{%pAG%P`MrYW&`2I~9LArviU!=z4w_}uwVnz|2U_~`}F=)mU#uvU{x}S74b=?C$ zcDA{8HFiC|(`k5;+^gsA3AU6Ni8OHwz@&I>Ko=P*J~<=AIypv(WS^_C|P>)x>ZKe$U4F>cPf>5Ph z`(9PxYl*A|1z_VeC}d<5n&E!pv;n5+WCmVJAN$1Xl1_EXcUnzk?{QCgW$W)P;)E(E(H;ixEYK ziAsiWFs@ex)&QULfI+rXpYDRPU>#U4614NT;c2$%wi&jWIyT&(p@~$5{P{O&iGL&s z_{t#7r2pXRfM$``n9Smtk1OjWAoPpk+QcjM!wTD zu_5xLKwiXs8{oir6G4J!1A7$4QV(~xD>+js8nQW4IDaC*P^D#74d`bPL{u93Y!lh} z<}l~3)_u4a9vZkEa4Q?~{KMaIJ#iy(Gt7fnGH9qj@`7aN8Ge*0VPTq-$qN3_s_=$} zyz*Bc;hhb)%QyM7Ae@N=JML>~YT(}7KIaRNDKl^76lQ^)Cp4giwDBN-S)@FumQR*a z%9zSjY7ocZ5C(h>9Xx^>XIkIuIryG1uC3yRRDRJB@B=03#If1ERf2B-eu^rD+6oN9 zF5nHi8@nHo2e%pa%PAj&k7=e+aa4c4A&9h%?xh@ggEaWjW(hcd)eYL3oC@3z+br83 zwrg%AUTbnd2S6f@we40drl_gY`oV);Y`$=T)q_Gko0g_ebnx=HwA&w=58)@T!{+8l z9#4ylXTn8_1%AQ{vUi{w;lY|_*4m2ldp@9f!fONLS@*r(Cp}F)cQ|eIT53Wt8yUsx z_kcO9&-D_=B{8vJRbE89+|}E7Dw9WJhdD({3}9$Eg}(adp{l?(tV_zo$%i-kqo42u z`me|YHmz+JxQ7`p9};SXz*7+(n$e@Y>HP)H0U}^$I`d5uC?1F@+?WgtbK*r!dFPB$*^&m~!_LcjwFUKjA1bzOLX zMyaxKM@(S#zapvq5DV~z>o<-70B=BSVxBifu{tY{FxKWzDEqJmK-oY7nlX$C=q;v} zkPGhB^tds%pXyqC8JpC}@x&rXgr^~^xPa{FCy`bZ)YiZKbFZ7U<1SkUg{gCU-bfH} zjROA;{?E*UHUX}6I@M`Rnpu7a0Lp;wxcp_#?}%XOm+J4s#gv_NN3z@i!yY8 z5$0(nf_GGR{!=Q2Fvn{w&d3+#{ttjV{t80Wm>Aq{^IsTt8c4nk@;?AQ4z)Ooym7PB zic*rteJafRt*?Zzs8+@8a|OX)dWZQI*qR(`StV&@#k zi;wTsugtiW3R5Vl@C!}ADDr5U=d;5`5NSLS$-~JAmo*->NxtV>trbvogtIwGM%C#I^dQnxwwVrux{5zb^=>CUm z4yC8I4m3UnL=s1!v*lrm%`)E8vm;V^UH=tKi*5)D!c!?Z{T!IUJvmOS1rq)_y!vuXJ4C7e zHx9y=ma;bRoBDZ4$vl6`zd{%p&*zk8`dNc87A$!O;pqvdqz!nCX5fH|OIrVDuOGm& z;|1NQrFgKeYt4+E$JXB;!-V#i2MY`EU_|1&3X$L+OGvrsC$I?H0F~#=5&yqlH z-_ZjkS-8XU2#8s7^Kzut{aR`fkQiunG=x>wY+fh@`=3#{PhvbPA;p3b>JGkt?qU4p&pjT?vB2FF%eCg?U<_M~QvsN)SU~7( zRRWsbMOJ`s$fXMT)_&oGZOnnhI_ueEy)k_l;A9=*Lwq=y;?&+5x~kbOugYuTAAn(j zbO>qnvB)+IU=X{05GBzF0B&BP;wtXvu{vPD40#>wnQ&uD0RLERi--ZlOQ=b`;}*bf z1iK!idYo)Dwap-C`nx4q2<{dWV$cQ5!tK6lts#3J;{APa4nFPMtSkn@*TWWsL%{X( z()M}>xZ2oFYnAd=Kln-b%#M0HngQGx}fw)tKmmW`TrxnLXN5D7p z$C&(`$(%Yi_*2gd2?lEexp?nqJ%W5>5P`laxS8jBH+s2=klCU0C(P>a>HM5H4b|ek z8ulCD?UrRvm-cVkf+yc9>S<8I@ z@{WcPu2^h)T-;|5u>yep!&hF~OVA%)roOKlX=Ah2HFpM|G+e5w1HFu z;CUlUSDMEYYf6`*xA*tO0WzKg`cC4j{&VSum&ZFnV;`&i&BkEQCXjZ@3xHeJoPP&8 zVwBAXsLilqO6myUHX#JVdv{tY-`{j#_U{z3N8E|hJ)tTPVqHDNuJEvY&qjmLNL8Ni z+t?m)RUb?K8Qb05TzmyhR<3%*UlBmnL>NzT;O4kb6Gb&JjfgdLH#vv=JEfo{L&k^sDu|IjIfU0G1SW zR9OJL3^5JUvdh*#)~gkhZ1!&~pte~k485yIxY_l}8@H-JBK|xl9WvWa;eIjSxSw|8 zajDjVhfJDKSviH6i@5=TAeIP_&wq%fk-vs*-5q+V8*b)TOGqs*2~IDEX_hqxb=L1N z9hDkWG=shrKi$q?DZ)>Ux!D)F6F_z7UU!wmHgjUtvAE!a1FIj@0*kGz7C#&Kp4#QPHyD!McR&KB{t=JTK(n(&}QH z#BRlWL$cC>hOvbTOPXX}w>Bw!IDD)>xNdL&nB7vw=gSBX=2E>Gm{p~Sgqsiw!I;Lr zUb7!tq*BcP&J+ItAVDqxVo4xD&k=*H(MWPCP>(AW%a9cDU9)bn-X=!%L!QuA1)=P1 z`+>0`vskh-)pC_D@>$zKJ2H8ueMy&h6QcD_#~4%8sj26N{-SQ2Y`I~C&rxSn1Dok4 z8-5B+{oNaGU@C!<=|@x{3kt`fS7dM zT-kmGA!|NWhl51OB2*1mfAFz5;@5BEn|&mj^mUQyqj>?pP=YLy9uzs{Ghr-i{w0h0 zN|4evez_qe-s-C-xUl1iJev-amo+z5i(!4RK_4(WTMt`bzDb75vG@$vi}KvL5$4()2=bR(!Nj6hXMYVOJaz?O z`SmN)h#G|^M+j?jItJ{=l*+)cU}QWe!;R*mr0Tl@kRp@qA72$Y9dW+D95)3kV7bn5 zTkf(`|Egl2qWv0lZMi}d)s2EDBj|z>^IRBPA_r9P?*dfONr0Cqf1jLap94^hB$AaG zH05G3H4K*M%xbcL>axCD;oq2Jw`=Hm{Mo*f+0M$}Z?*~SjTgr>xM&w{kB_+KG%qtv z%S|hF+r;leVGAWHEV_UxTDcf6%pq|v^4YB;;YbpDDD_jf1P<~!y$7hPAVtCnlK zzGqKQ8WS5i^V_oY)mt&XXRk8J1n!u@RLak``&|08F{P?%zH|&G<3<^o>tsvJ!z3>+ zzL#0%92eu|5;By^%L1&3CAo`0-j)xH%< zpy+*#TA~GgR&IFbDxSiO^uc^NMYG9Cnyb1f+lz!;PKm#wU7%VhUcAzJ$P%bKqvmmU z3adtpTzg(pt2~*spq8s=&WNLr1}LUiB@Lqygp zP;-f}uydeTDnp59;H^)qRZnc8-W)@()g;s-{hnyD#e7OvmC=r>SiLNij}NHSS1MvD zfi4t7jS+kZOS_ZU0n+#xvNcW14IABVV1>tjV~zA0O!DEfxMqYSRGKc?&X*Ywc)ilB zHE#|!TgdQnZfF@KrLVqc6bMh4`@NaQ0>>b4S^h;j?&B9@bO5y+Xy|XB$A8ov&0~jb zO4m%eKbOx&uxyhAsO^cHNoMUFak1?cfOK}GG=#=7L}K-KP9ZY$lw;kYnH4Y%ZJh5P z^JyP`gV^{pz7#6@OfxJ=VnKJe!ac!tAN;aR*U3mq&xAwZCV|&&z}v$5;qcp+&g3lc z=jGXj=zPN+>eT^L_;=((5rFMXtY(nmR(8BpXUPzP$MJa`P8W~#+okcCNyj-|G?9gf z>1akZcdR{N=0wxmf9I?{qU7M>m>@2<%tQ=$OP-Up{(}BU3z%nrmV;9|_+Z>BCoegg z8L&Ls_0LyDB6^1V7sDf}28@77T9)4aP%jp41m{2iC|)ZA3bsM{-?HT^22(6t0IkC* z}0m)b{?4Pb)Mer{@e!4rxg7LK&puP|EB zgZ?I1Sf`c1%$2ai%>9Df@a4j(?A>+8CbYB@UVd=2M5ARS5q5ZxK`@z)ly5Bh zIdAB6o)i;JD~zkk?7>N}*dX|L;Yw^@7!dn3PC zi#5I?sY*3%qZ0mnd#8X|s!UF+S*TnhjDSv>0cIaz7G=Uh z6=VIdeN!Gv_{qV4y(>f%Kq$rDz~3NFr`Pg!1x1tMmwL8)neD9`tQsId|C7)2gB;p8 zW@aaRAjH5Xx2cYKIOyuHC-M=}m(qQT*T;kaX$!x5M!3D-`XXWI+g7{5K3fLi2a5$o zRZH*DZ%-F~!}z>{2vMY2nQ+0F!FgqZrPtA=VxQ*g-SMstCbjw}dSpx-B^P)9Y#E0l1P#REyCHh`0D;h{TA=AhTa({r^(8IG zZ`om2j{KZFqn>VpUj>$rI_V)hHK9BkQ05)b-2RDGlS!M%L(qkjL75`tcO z-a=>jvH|UYtxi%GMExy}5#u9ZBN2RL-Rg}>?nX)E0}KQJO6DTB_V(wX^V|me-)~~* z#P75P^xU+C%hd&2pElIjO^&wXfHCm=a6<4CjiEuaFW90qy(ur~oRhWm^8nUR2EgW! z2P(Bu$+DgH0>7;RMzrI8T(oXB_2>YW3>n8=&lL)PWR93{@^_?P_qCqzKh3CO3T86D z;rt$rF7G}f08^Zago7{%G^tv;YU}|ni%q9Qi}hftP(ajYK!^Cpd2({Hfr*1=HUQ_3f`f5jRoxMUrL$G%Li&k@BKL&MkCC1Pw$WZuUv0} zWQdkxpVz;!02~rb#m#bvS#S64M=J4X5YyIBvQ~E?BE0Ax)`7~mpemVeXX~~xSn9dH zxv%R!wH9U$3K0C!mUDHVpYz_qf60b(oImVi)m9m-&&IHzfe`dvOKsw;&I zxXgj%xBPKFvM_V6@dH>*(gXF-52OKNFs5t^O5ff_4w8g#iMh zK$;y0vZULQz9mo2l@0s#mT|&JYH2tjvunFGE}IKJAZa7{)@Yb02|=x~LlmB2IafWK z6m3(>Pm>^x9%efHv{M2*mf3KC5=MV9N>qkYad(t?1PExD&ef4+OQ+%%Y#y4MoB4+m ziA_8=JBs-0yMFS9*@+#|+N}fet@*xwC;y8i+N5TjAr|f1<$kBzul>+uTC)@3R&QYPGm#OPPICh!;2L z#5&OG&2-OTRB2vc!f_Ydq>NaLBh&PD8ZRx2fe1o8Hk)vef`_&O2uBFhZJ2$B_NdUT z6F`0=4BXzA@MB+_D#US((}!+2zqbI~L;#HK1Dx24$_A8^m2}oU=KX!QmuD$eO8{pn zS5L^>n41ZAGun!Vp8v~ftZUjnhDkT8S5XcXUqf{!6W2abo}Hg~lg^lw-~ypWbK~RX z!H!J4*4CKa2PqPjkm1x1V@c{6l4SFB7ER_Ii8Bl)BiA8dMxI^0BR5bBmzL@0ps+yy zWPCk7^zE)4=N{y*e-hnR4{R-nfV7SwBL@>d8C8JB-6g!PHUZbp4JIhZu+CgMMOe+5#~d_)V+Kda2GPF&Vqg zrp~VwQVSo`M$-i*JX3s3I@89!?ao4ceQbJg+?!5L93T0s&-*9cbyO7O*v8FF6lQ(E zT0*y+?>=TT5g!YXeEK3N;HE6(MPFApE9YtQzfG&coy?Nok{Cbth(#|V;b#O2_x-yE zZ~cxJYXxVkt*E_`Pk4v{2Fd$00QB*>aQIR9LLx{uI{Bj;t}3G*#2UV0zOg<3-4{7a z@2-A~>?Oc4yQv@fVaI!8xgrW!U|H~JBw|g#lHAL}hjPqG*|G0!Yq1v2>ikxmSpR04 z=N!j&CnuRY@!J&4mx7v-r3`=vBD??1B+odXnAPp@n+ZhV(M!q|i$MYgfM!YMaVB4Y zmsg6h7?@%WIc>Z;>kuuFuPi7s>&#k%9$1{nxzMxN+HwNC>Qg)*Y<`7@@1st2hEAHp zZi7CW3Fo*}sX-EwR7Bflxl${hKfaHixgm0yYfhCJZvcQoCeyyU6li5*zZ*#we-ylx zzUh=WDk(ld@Q5*+q`2#lsKO(nEdEli(QFek9?hWVn(*PyW3t*#E1sNTo$%+kF0$aC zQ`FT>8QVBV7&%|VF*#L8Xz*e_ARHE+uU8K!47C?QE22WbZOFjNpgnUhNGW423$7|U^ zu91GlJw0EUw4Rao5``j1!q$c3i4)~wjAb$({kt=41QQ>kJ zQch+YXML_|2EnpfEPxgkjI3n>X{OOAC_3OroBNAALRDMa0)_FpktC!+TnJX$i8&3dn@V(I6bCaGF3wbqzkysjD zG62x_I;KxTSa{AvK9?lP>s}VMLUxi>k0-$pNREpzPLLaUO9(WDiw#EWLXDMvi<1p6&! zogWm@q zOav9SOPEE$8ZY-OhJ8dD0|SG?c)QaAY` z1A2K1a5Xq!l&dzMfh^`cQoy{8-?8RzUxEb{BHnHpk0mToBO;+@iT|!%(q|XkKCHai zq}%I(c{Z;*mmA;AHvvEztV*k8SF6IGgNc*D+I)?nuV`5ut+@FpI#tW8E_C)jqH6h@ zt5kOZ)ao3%tY0B&4FOz@7tmj=n3P}VE99ilMLlN=E`*I`d>t^Tv*@b#xn$OD?pQ~2 z@0|rcd2&lRk!9TWtK4)6O-T+#D-wBjUEWnc2<^T*U2){w!>S$19P&skOdn=EErj+~ z*j!Wk^TsxJ?6y{C{vbqGrJ2Su{|(nyCw+@@#4c@wqCtF#bd_!ycF$P(0zXeRUoKJP zoT=VDic>y}L8It>?|{cgplUDTQ11RSqqxvv3j7S(`IYPo$%CB%KNuVYJv{k-b6*MTol+>qz-Tce`ac0cE?Aj224D4p02ar##ID zTygAQuPo5{up_0!i@B=1*O7wqdv8p6>~`Kr(n|6>CD$H?2;*Vuw$vbI#n}l*01hW) z3lc>3is4u6;Zyi;$8&_wxsBoT%r>ee$Gw(|*`55V6ft2-X-0bbWP9j11vxpPM|jHG z!9E1A$ap9Lq|X7MDg}8tB^b2Ge(HQ)IT8)w&%_aYE!Ir3m3hR;mHmAvf|=*s86S$> zex&g6fx@vN0)*N#>Fp7B$t}hFaF5KW7K~VsuKjmu^3-H=;uR=zi)giE6%+)^iD5x* z&KxmSRh1FnB#EVhDlU-Gyl%Y+Ri){V1fgj{66J5azuz-!`S)g@$&X8*#Pg4>_a@WW zeB-_o4md=|p|zl3j`;)?nfvQ}hX!<^r5VOJt&h^bX`o-Kh|r;5h`DJm^c8fPh;}MB z!R&OK*n_$a0|^P?)oLZkC#>{yN#UJOg}AHrXH__2Z&t_U&Y4&FS7edDy2T3=enjL3 zUZ0R^1PDP|5Li&>B#^$XFP$*DvA>Jl8;iWo(vu(!IFfJR57=Z!&8J=3VToTOM5Roi zxT`iFW)42)eeE#qFk90BbCvkvw!~mQOzA5}N<#XGKug)P;{!a5mnR?S3Dy7QIy;p| zhYFpDPhw)mk{7;PUSJI6=)uGsw?$nsLRxhzreM0go&b)2U~ghxboWnRqC@DYfk41M z0z7mUJoa2iu9IEMkn=!~)fOlwC==)a@Uc#kGTjARj8Pg?Ei`9lX#eo#gK{_v^ZB&G zCc04QS|9j%{XhZmiTae#&jh-xf^8`_N$GAP_TOx&zU5VV$I??BhKobdY2@aODk=&^ z{c~A(U@!sg=lRdS`h3v4TT-}ltjuXfRJE6}LKsYHxtau%R;sBC=`RwN{yIQ&dlB;S zp=R>F`tn854GX7DD?H2ClE_Mv%Ji-6p9_UkL!K-PS6oO`3M7Y*vb|n#&nEgvkEzw{ zdS`-eV`D>gO_8Yokc;$^9=f?PQ>x{$V66Y59Ng3GE?wS0>>SuHKnc7DsQ{nipJ!`b z5dqO3O9alyv@Dd$q-NX&rR(zcJRhg)U+*O_(tnix@-o#PrW?Dw@lx=T(ZUD%O#<>C z8+=B1_I|M`FW((YXY$9-PTZ9;!^|}5A&=uWWGvBqz4?oR16gQvDyT7u_4mQ#8{c~B z3dNRRM2(^KHdQ9*zB*U(Fji^XxWwo{w^T4gmIo&tjyI4j3k&z>e75q9ZSG6@v5~{R zK7UoH_!x1iv9zW~u)2MGufZeynq^z6ZCWES@`FptX{+a&8{ZGhG28EH7lzE)Z$QT5 zIiD;S>z~!Q>ux~U_PEoJCJX6Sf&kb$LjP%NOPYm3x2rz8@7s^R{r;zeyn=&T8>ueJ z%qB{{*_}=W%~0m5efnB)g8jmr8yAN?n8FG-6{sZm;5(hCSj#MeV%^z;LU^`57o`7` zRgKI^*gnamfL^5*U(>!^>@soKaD!&`=I|rs(EW;o3F=6kuetVvg7`q zAdw09$rZVWrp)@oIq&|d;0QJ?h{J^{grFVvbh+OjDRpHbj}oW50pAYGxa~i&fU&|8 zMoAnlm2?t??V{EY3PPJ_2Y6goi=T2n^*$NJ->>lkKTd1ByGBO>h8*9th<571d&PW3 zBcd6&ZCB#j?TRe{i{Lw-gp+QMF&DJA>uw-`rO1+5AQeC zC(BqzMMV=sMml2uOdV)Q-oRH!AJn_I=k!Tj*!dqHm7qhDxqKC7r2EGoKU~!Dnqii9 zQI#sV5{}rl8Qq=r|D;j*$mUKs>ACQn#yV@mC`oI2!90~%X*&4ecrfv9=LGJYZE7O% zB3VRAr1t=I`TB;LsREcNr);RGox3Z1JU+p7221iPRR@Dj2&Y%wy8*vP1D3+C54kY9 z#Ie^eKQky68U&7pv(-A>anDF3P!;9@i`2V$P7WZxe{{QDs%3My`sp_La??Jn<6$hl zj>Zb&`&e&qa*Wvg`GH;8cl62M`oQp3HD53U{gs$9kPLF z)1uDwEcO?I#C1S@f^^+vKYxLaEEi@p!#n2Iar!ZD;hj@J8ZD3c2Y(c^p%%e!$BJ#op`82Bz$T+qy zWPO!!pyzU2qb&`^=f&8BQfhSNX0*R1we;945MHDYX|bO%q~~&Zej_4e=h~0~NHs|t z0bn{VlkeUio`^!xAU@wkhRGkg&aN=uH>H(NTYT+!E^%GI2Rb4P#w*cG2j_ibv5M-q zpVFKlkR*5>^ZsakwE$6T{qgy|abpK`QBq6f&CS^sBiF^jYW$(C(9scSIY}vHe>%gTnBD-W$=XO6eTxZsqAhMQBK^I~bM`-Xy zppYJnRZ{?j=g$RwZ|6=H8xaTl6YW+gGoqqD7B6;MIG9Zly!Yc4i`MN=WI$OdWRYo? zQxnBY{p7xWabmjiPWa8ba>l65azP!XiWrKxxcK3-Vq)&$+;9GvmD28+d`YPMf_x%8 zvx)IBfr;s%w0J>uHlN6mwwVR2>Mt61i#0{d?Im;>`COV!u0Pv&d_7MIEP1^jgpjC) z*!DFrI6s!R6~GkoU1Xcd2+wB!_)v4;j&HGFwbkd=!WG~&UuS|f&9Krs@Mk2uBSU1T z9*}=GaDetVZgktgeS-}lZWqt#NLIvklR-@X8}G{RP^8)+xxP^|M_pJX^82~%xcSNT z6*g}p3O=?ClVKhK>>E5dVu@51NKayEJzkG)5SQ5-JeGHwMwWK+lLj}2qs;Db4w*dq zo1}-_s~v|fYr!iBjZV+dm2W(z+wJ4(j0ut)Exi|-3E10v!{7!fPV5|=_4g|SIwI?d z_rXgTEWhWkk8=9P2tJXEnHfKWbTT&dE6wKZ0z@eie@BpS0MhM-qE=!UJyv`BWEzLdfgsKl_R2 zLGL-Zs}@M@h@H@klV`pG_EfC(P&^DdhPohg1fdDgzBk}NJe+Na9xkm13cw()`S0ks zN;=(kgp?JPxubo$+1)jk-sfyM){$o%Kfw;fWLmvgY&JrireAuMYokzZxFN1?xViCg zBe!@H*(H%Kip6EFzoT=mH&JjzsZsDoV8z#(E09^3U5Kf{FSI`0dE=J~ry~r7DS{6a z7YYe0Eruq=x|OQ2YscGc8~s!YO(sZ}jca??Jfck4)*K6p!@0xutK8uLl#2LPvCVUl zt#Xs8kEkEj;|&5gM>M%R62oUeNetzF+SG>-Ne#a#&bNlb2!8$#O@F@_(6Axl4RG#%3=VY zK4?gfR}fFcKUY1NJx^AS>lfaYxFDa(HHCe*vpsd2gsVMV3|r3*%nZ`N8cgO!i7``b z>nr?}C)dFFYea4MDwW$6l5iwSrO^hBd=NLx26+Pw2Ms~^9Eb}u6We3v&?N<$1m%=7 zsaqSb=!lGKc7}#<)fL@o>HGG*}C0KBDkL8#<}5_Yl(`BgW?b z;(>R6MI6I};;egLYvM$Bm9nNtA{Sdb%O1ST@)k-Ixpf*!js4l^Y1o#4I03K5{73RJ zm-W-WeYpCVYXCe_J$`53{uxX6TKimdxx$YR5q-!__@lo)WCm5Zljjh&KgoWHd(y<9 z(Tolx1{<=x0iNSB*XT>k9FQp9CuO<1s)tmn(x0LZ6F6pv4Buc5rF7olBv(1{yx!fr zsFu$jSnJD#1I#ZP<#g)(^2FVo;-axyXSKm(PE0g39p(>#&L(;SE_L;I0Xhn;2~KnA3l+vB zjX%~Q{`ujZLdi_N8x{r|rB}j~X}U?rwrgK=z6}8?k(iouxO~GG0`1OWjVn6b_Q6ib z+()-nn@9LJfw6A&KF6@4M)SubS8L~{gmKKY>r zM4VpMcqs43gV4gab1+Z2ZGuBALQG@K7*`}m`|1&am|`mz7Q+{O1?lGtoqtP>S4j(12ER$W%ITVW(kr*_2D9~h@<`(@=b2#2>r@o7r z0?@Joi?Ix8{$ETlc#fcm1pIek2s2>~#MB>qdY$%+`rZkV|8a8wyWUGO_*K(hQg02A zozzhgLH4Qf<<-33#ScFeNjk~f!N3?w-)EBh-w(BqDzy#}fw8QwB4V3+o=(s2YGU^t zc1=UPf4UU#wGorM=9+C89M1`xJrMK7Kbg2_glT-Ptca+ifh?FBJKZApWB4=b^;P0@ z?3b5rxnL^P-b;8&gL@o*ghvd>wKc2s^a!bGw#^}$Z(r6Mm)nmIV3V5N2tc-USR)2FTx|Y$UquvuaK&94b zNO1@o+&Yv-V=SM`ZEW%cs2ruI(BU=Ig>K|qW^Nk~nkyH{LzSfLL@;_>DiGkPtiTEL3o1ZB9=s;XD5cRq8@ zwfkV`-=`f#Aw6GdAWI7P_^2&b>ftdlm1$?k#@2)we)kzvV}pQGyos7*T^P=1@a%EQ z8z!A`?W(Hfivu76;6{E?*A|PKnH3ioYowe(3F2JCh8^G_w9pD5{y+wkwz9%1SAK8Nk36gRLJ~Umut+JYc^oB zv#Zh_AJhD}BT8dVtpn;mRSR?EBD9WBI})A`UGwLWUCx zb``{FU=pwZ9cu%I6UaB{y+{csi?c1Vh!LkNXXo4yb}u%Av35J8rQphWO*T&CZ$OXo zV;%b^i2t3+(*%N$yzAh2QPiYhbKLR%Zjw+#ctsp$BZ;<(s0;xHRj$BLNbO zqM&pN*@quyZugh)qC6^*GM%p4~A{L>G~gc{c|~%E!mPkrIcN z>n$q~?0?Ct;doyXKl*6gV~;yy+R+;l7(d2ePM$Tb{V12P&Q_?j>iq%)w>JBOrY-rybE`0zG)BeJ4g>H3`ZGV1q)fRxq0zJQ*RgZ2s;o}r``IG%>N zb*Zm!EYTDGBRVm1LC&*OR=QF!6>BsgPlpy|)i|3M$Jatq2_t$uo`R|lJ%FRbjVu=l zFmkB)Jk>wTc*f~)yLtuh(%NK3FDOQ_8LhmDT#q!GFNIl{t7`vs5jX!dk3MCoK-e4Z zbVhjS+d=uo=YPR~NN->fc;5Rh#)QIB5v!?XMFeEJuS&QpycMWE`*sN)sx0A+1 z$n|cFEHfL}JE_(@Y`Ca~2+aM{F(c<_amHeETcgLoWkB82e3duz{=vCz0OtDv$QWgv- zQ)$q&m5!5iN*EZ+cR9*h7qz@^hMaarlaGM1A6Adwn1?w7h<%1Rk{=` z>i5HMnsZvRb-DYABgObZnN`d!Ue?CUtW<+=GeL6=9ys8Rl}zzqhkSWX!Ug7HFIxx- z-Q^rCO2(-BKZ^%~HNUU5cRu8Q zS%e#$rOhDE&(B|@;kvP}Ie4>h5#xhVA|9FVM>hBaEGBMftP+PjM2F~6toHk3VS?(` zGq$9TO|QmAB`tnK$MEv6a2CS}$}pZg)YU|8RUathi9v+Dc!el2(g-n9B2WJig1%mo z%*x7Y8fIqZHD*Hu&P6p_9pe$GBY!KVPT0`5&6Ghq{G=>c6;hGOEN9kxlxd`B@V_S{ z8+$_I;avI^j^0k61@iQfnAum^j}==Z?x^BCe=e;I!KgM~O}GL5_Y2?w4Y6Q)J45>< z&NkZtkF)2W$wH-G_ANmnUsoZ;RU{jSuFv37A!DSOGfU&(_%TOde4HVjhh@K;Kl?fW zWVVC(X)Ik`@bbePqVvTr8G}NUYQBP3WLb}qdLz!zVA3X#0!XKnnQX{yl0BLrH!_^S zBa@7iy1r>NOi6UZ@j&0q)%4F~0IIejmVY+Dy>kv;FpzQ*GG5R0mG5O-Az=F$r>0aT zRHD#RF;y>&wZiGJfW5ah;U$iv_^e0aBA^hh5-QLC`;BFs0sJgV#XSl}zz_4KOpsC7 z%sVb3dLq=L#;VQtQQE*`fkv5l!sLf@(q*@o5630^MX@U1{Dd~b<1NGC1u0j|wb{IP zC^f6Rd^Vse=OaLGfyCVWKdyoY)Cjx?-2HxJavNv%j;^$)#FYms^p;*bIla)bWPRSg z2daF!8<7Iio58*P!>wjr_-A1x zdXo*QTP=?e1KjJ4^c%8k(MnPhj2M3Ra0U(7W;(sNMXDC8LV>`Erk4tPSF77COjL#e zShUxh7C{Uu)8B;*B9Czl1H-G*1bND%^i!$FgkcV<<_cyimDm7r{{x7)Ba6EB5&V+G z?vot5jZOo;K+G%cuiQw}_W`O7V$e5I1}I1Vd}AGMHyM)+=@@o1~*m$xc(+OpD=#X9jPzr@`Ef@7mr zE5l>8xukXMD6ss~4Y`L!%1MJY)XZwd|DdyD=ksXjV|tsZSq;un2a z!Y0<*@u3&`u?Ksn8)bwJiU&kq6d9NtbA2pZfQk1lQ~yhJnoG!9E`qrMzYxU{y2NuT z_Xf&uC|>sAk&~*!G&|0!BWS+xN~@KPAY0zJ+tfT#%lRtI(vZ;d_lqh9vg=@D!fiaRW_-PCY#9A^d^{UW_)1pC}aW zHs2VBTPmwm4jt~>duX3DH8?L|skWlM{nTh7_mh?c>vt=X9<=d0UcrS28*kf37FhV6 zQEdOG64Hnp3=AV{i3H=Z& zYlqFL8~G-oBY!U?#k)ix8?6a^)5>-OhyP+k)X0G-I~U!ktVzx5UbZvBLc$bRfhVw>iv0Srot=#}WX#7q z98kJW9X2L9nR!ZbGWRbm1y2AMj(z>+)ub8oiibk{`Bjq$LSimpn!kxgIv%k8?(vb2 zR^Vwx-4~G@!JHS9mI3+1MEx0)%=q>YL(V+mYb5VLI}qD(=PNd(3GeDep6_{sih~2( z#zlQVs7;4ccHxK$3i{*tK&A{V)RbLt(~oyiLVTq1*@)I1LE8L6(rJ9Xr}UKv>-fi)MVSuUW7lrgLnD_<%w zleoIdR*pelfpm6aT<_-leW2aGOS)BgM+E~K%YgFbY zpb*nqLg=|rHjGER;}w*i+*ky4-`n3{qPJLZ-@ZZvudOocX;uf9K%M90sG=IyW%H)bkux?j!wQKF9*?f^z&$ad2x^CumE}{8^6|nB;?hSUuJf16!Nbx8^(a z#YL)coSgL-oEqxK!V(C-guMUS_K872G()xqrdB5T1I--6)aEV zvuz>)cRZi=>D3hnwKY0M9zLR#Sh-ZGW_O5;178hCR8%I+zZ5}WNAxcpg@b-ZgZzp8 zB-!@eM>o*!5Fm>b7e;&y-L}bNCGLVliQFG*Ej2f3Wo+;Q(u=U1IgAZw3viI^l~D=a zC*~J;&x?AbW4mc;Y3R>ZSDx>weIo<5X0 zHFrw8yr*J{3~5_#`EFY1PBJH$owO5GWyqbAi5yocYPOE?#w#hM<9dB z_s_*SjfVRcX7Lky7%y$o@@&Fj^u1E4VMx;*0zKH`Ju=~(cUVpH+0`Ic2;Nj^`263L zFg^~`pf?;phGn_V0Y{c#Ic}OseSKk@zs|$|4ZJJv&-swDJJ9OMa&zDt9m_9MmiHvf z7=O?rW2jXL4!K;&r+GeL;(VdkhXnM&f_$|MZjG8lqm@Fa8QQcwIZtsN7$k%EiT3})5iS!<6GJZ!QP9BbfgiCD;Y} zut~euxY))3csNvq0d*Bg`=HbE^}~s%d)!cpRAK*wAHd~gc2#28U5p3`c;I@L3iPcf z9w=4l61ua%is$A|o*FBx>7p7IwyUMmOEORDeqZX1-Wmn+Sw;*3iMRj<)@6>sdh09j zP!em{=#kH^Zj;9yp0}&xn(e`SIedUnPAP^p*Y^Qz-deBb1#dBfC`m;nCDP;`q>V2C zSQFEPb;Tu+mT;k3=eI>N$X0wgHs>cj`2~Pvze}7kH!5)KebjKN`GNP2@OTGl)>MO_ zs+o2pbQQ~+=~_$m{(y;kB@A(*2t#OA3yts@nvGULkMEE47F|Mf4u^g#%lGL{qaimeY{t=vNIFv#F>ancD(5b`65j?`k=rHaL3y+UO|y}Z2;{kBV3B^ zT9wY`vU!wKVkvr46eevQ&x4m4FhOqqXY(%jd(%s>D&6gzy!?q3z!j^Gmwp~7@&(C} z0g1nRJ%C=)o0^5x>RJM`5B&G0_ z+dna?P3?EvK4W*m{ZiJ@TyL^Pr~G(v?pqlmrsZ$Hh@#gxXpIVTrtY^{;;JpZpnrKKKWn#ckXqzfxXX9WAq`^5h#qs_mF<=h4k+gbrQ#>r z*j(v-eN*eXcIzBJYd%E3*ZUw@2{^0=;r3ye$kTZ{?}fh7+ZY+^@9s>?`E;g7qDN9# zdJJjR-~jJa2vGA(imUG8^QIH-{oBf9ku&0ecv_7Ngc%D*K_5ISMSfL6AtRke3|T)c zS_Qnf-J71WPQ9LV=uFQ7(0Y1$R?$3D>2WCXvk5Y`Unfh^F|7mLRgVGS7_Dyaw45)c zCK8ppWpHN(5b69b55DFlna@d2BeE%*T}j6MkG-;l&Uce}vT!YqSiV=mU-y2j(;Kk5 z+E@2xQG{D9^`e`;xBR-gnt;ta0wJU+A|g^&=h7WD#Pad; z{mh94wnM2q>Ae)M%qS_5UKT?8|6g#g&mCXD4~30#p(7oEHGxxnnP=mJEh%OLFk7{~ z4S<3soi6UlEUA?n@JK!qp4v=&@{dKP01`g-omW2YP}EAL#EMxtw>3PlYw3eBhl}-* z=oUf)QJ=GR%(3OazyNDqJ>3!Km5*+M4W(uk`wHnNC^kdVT<2zTN{tN0@V;z=2h5_W zuisV#lFzu>1D42^$d7u(S#t`~J#=M|!wAQ**y43)3BE_NwbE7?%=#oIavJs5fmZ|i zh=3jvoK5?!H$gZtT`3v@b)Glx1H_P#*oB8HgyLben)@DEBl^u;-w736Yo%vIz7xux zaDV~6HUhG(sM;+~LA$bq*2|?1-VxM}+ID3}p%3k~XOHvxv~i;W*_V2^XkkKjVJY0< zqW2#=ymmyM`d^X+ROh}srkw<1W3L5+yCs0;5fO}cZZj{W_b5PZHrZgh>dX3+FGZjb z;Gr>D-;Z{J>~GT6XEs_0stuGPig*uw%!BlnCxGa4^d-yu4Xq#O=^}PS1j`loRW@sY zu`7W88&P$&-e4GrLuU!L>Qd%wpoR(ddV#r&?|4pPNEKC7c2Y~cUrPp&(8L-4T;YtyZx=xyauf;-dVwjZ|o%|pgPJQA3yEgxTqbkGzLpU7gsaMz_Oa3 zGVu?!AYs{XDqPIx_)u#kDqVqr`;ZV_xs{|851_%kPS3V4! zLFB&D0&!S11=ZOf4Ss&bvPJjpu#`|#SD7HWW01P9w$4!dxb@wqWAtH&zJy)9F zlIIb%#CBg%7%MKaE`3GO`fF_wS$xVbu<;co77jB)l?d?hLr+iXjg=jc-h~p1l8fo< z-X)U7)7vBIZElQJ>DsFO_Or)53z)lzgA^$6G9YPkd-@sO#SAOaJ>Llr7R*VZEL2i| zFAChIH8v8Wp}&~J|K7wXB47hg`s0y$`m=D5b!J4L9N?53C4qGkqFAm&PY{t0#b6P0 z8e2&=7*I*q3o1Y+fug*(TI|6zO7?lo=3#ZcAw&jiw$h^!3quvaxyw5J$i(d*)jxjF zolnQnY0&vCk2r?r8bq4Y%ov?`%4d(K*u&M9Yf4x=zvHJzhGZ}}kpn6!xD%02^a1X^ zIVui8n95N@dWtlPCa>pPV1p>hoLpG=!R`uSecZdtMj;bUmdOuAxX59DoKR%@qkiXr zZ^-F*wqY`-hYw+MIR0m2zjFCIi6+`KOcu)aCq9ouqE$<~^`XGIa_1{Y>)S_;x7EXc ztQCb&9f3hAO<&K6&l@eLUK+4s)endR%O?8~5!A>(L#K(-~f3xvhu{>$UqekNXWR znrtfX&Q^|6S|)D}ued=_Sg$+xc;4=t7M;S!)J}Ev1*7f}FK>}cVgH&G{>X~l>Y=&> z&K|=(lS%wEK2JY7*Sb#Mzsxkis{VSbi3GvG?tOJ3hi`vJz3XHf>g@H-vLk$HMWcv{Y_L&THwI&=P0p_aLxD#Kn@fm zMKjvp@-etVTcTrOu^*doDRM{==%ScEoNay=K$^m1VE1|SR&aP>C;7HSo1Bm^DN}Ly z6K;mt-AA}nF0Y&+Sm74TPfE~L$!XeBLW9%NLkC&9M~vxlSUk>;Z%RrUdr-h}8q7vP zusKx7hw|zXS&&4f5nl7B>$c(ge)4Ha{<2d z-hTbOjj2UbWWKQvE=-HTTS2M2l*SbB^E-iHUl}7%a4Qyrecjg4qPBNgK1T%k8BOxe z$-D>tzJt5+m?${w>Y*u)CT#g(&&_YaIxCO)>g6A&rl;js>X;j@j=K6)%Lv`Czh=R4 zQ7l}V^_mpC0B3_Q^u}Dnn}0Qci1Qwy)Gn?wTdJX1^wMscYEg!@5EB%Hk~!pBogewI zh0=^?Lc~RPE)omeuGrug;MoD3PHm#ngKN5P&xGrU|9NPrNQkokey+4Ys9i=49SQXz zS9?s*0EF9+nUwr_jn4yhqo}&>xD5|Du|~Intl@Sq!mpwW;G7nZJT4thSwQrOT}%JAJEJ$fu|43|d5~aT(n&t$n)%f>gDH&mYFlcwa9?v^_h^+~)&G7P z;2yrM`2CvRH&eL(YxnuD{Rm2VpWV>5uk4oA-CEaAwZ)2YwT@p`38SKi0j%X6HuY7h z0gv`U%Rk2MaMhJw2Jc#MEMXCQ$&#|*$2IB|_nJ52A-CgC_wc=L8I@sm?;1o7=Pts7 z`I{cPH4FubY{6%0YU(We;2Jn?Yj{`IlU+I|aFXBy%C7#HHyA8os#4T_5F>w?pVq6! z8wV)8a^&m6=KeP>=_1pK+2E*GEgb@tzQSMjY{w(%A~xI;H5x(_lBntLb?86airW-m z__=ho?tqhAc!?e` zmj3hS10_-IEaer+M3`F+BUwsv{eHwJOls|*)+MnP0cmNi_vHnBY+#{E)1KA{-+PmT ztr+1k1s>zVuD*8!p??lis>D_3OytncS##bovvmlotJ;y`+3fbm<*R{^(E_0Xb?aNh zx3t*N=oYCy1ZKJ*0z=_vK3dlIz?8N{XRrYYWHda`yh>Slu-o^7PXnxc-_Mw9hr(x( zyK}XYj%VlWb-$A+J1J7nd>^GKLHZ4og{4}kffLwmY#$YiU+aJ)iE`X|?i_ht9ejNK zbK;{cU^4qhQ0i^F(;StQlsNVq40c6A0CTM1{kvTh=5O8=ko#krU6Qfb*c+{XiEaPq zYW5KU77W%r+~s7wD<5`}`bU%ZHXP8D6;)pXZ?|`C^n8UE z9%97geGxn}UY+|Fs%9`pq4!vRSb~Pc|J4}fo0O7cyJt&{6OSti##~W9NqIgt31=G& z=9WY#WExGLaH!YoDa&D)CdIDmM7{86WNQ7#tNr)7aX>_52nJS@Z<^5142jl!Nqid# zrP{=MeZud_@nd4UDSidI48R!~AEc*yM8@DMMoe78R2{PV=C+b&<|_qNJeeW~cvWIR zy1-oW%O?eq8+`ksB0ovjKZyMygWqFWr{at$g*vtfItbw@p(Qi?uS?VS87`pUFXiyx z-A^AgaCJV&`eyy->FcwCgY*CArTJ>Z?=yiL)9y4dOOZTW%B+5*4LRRukL9^)+v8Tu0$2Z;OUxqH)D8jhvGk*XfbL?!WE!!t-{(^`6bK6y4?Dati{uAtO|+T4jS?W z?p1^o<=wlC&%G)4#-8Z)OIm2uhwAA3?5rlQ(n-me%HtkWTpwwyrip|kM*f1?4x}su z95iN%qR5%dKg_iGR_0A+C#mFspc*>lRE1;v;yOzkCl|DUJW2%yq!6dW{xaw!6MUAy zLz>WPhs=3Z(Nk3){c6nTo&8rhlt~2e1ibLz{uvf=e{BK_>#N?fCCsP1a#Vqio*Qyk z(84!aU}3AMS%?2~?crZ70PmJbOYv}%*%qu)tdXd!;YwvCAo-Uf-55@nO{jL3sfjl^KB#=;n z=~4JQQwFGK>r7ImvwG-ZNyXUjA8g$ya%0gNfbt9N{q+Ep2)iIZcxBNS>tH3HawpT) z_=3Fn4L|=AcnEEU@VHwbW4F6}!&!au^EaT_#czJ)#e6t^qPM+rkh+Ykva|WqRw`&YhLBIlOf~h& zdoX}Fgb_kI+&CLF_p;V_g*Ij?^3iHH?v4z8FyP+VTWx;VvEpZykZHERMD?m`zP&aH zH;o&$t;R&m+kEwaHo3Jk6YPtRr%LX~>@W_n7HhU`r)a2c>}9xEkxT#9xY zmJnYtwFv&<5KfHZ+aejZ?afst!nr!<{Nrc1gE?Lr$Y3b*A<*C=FN%FSyRb0GY@X#+ z`~d@r zTF%{FP9MbWu~o6vzRLPQf}g`4t37Ua)7c?b#l+_ug^?jm2OJEq-MbS*pUJ{gOQXQCuL zC7^%z%KT)k?Snp_I9w{b-#`!k1Q|}qD*&+!Dc(5v2g(d~tz#asNv0)ItMJz8_Q#-J6XRVTJ0m(r4FyYVEt}aegGN# zb_W|Me@R0e_P57;K_@2q2#5iq{QJF)0R%4ykU-T2EGW_8N%ZUWMrfVw0cYAhpjdL8 zu{q8gZVmnNAH*;KPJHh?7HpwZ%|Lj@olYxSuJeEqGSY0Zdf!-E+SzafTfV%_PJwEv z)jH6dz~an&UFIQE8LQjV1#(In(Dot3fiJVYFsU+Igr`}UYfFek19+HQ@reQ3Pg?!i z*y=AW)`Y+o+vh4IB4=-MG)*ZLR+L0U zP@>B4A`zi$#OH5jsY}jl4Hu{bxXM4wfuu(6&iW=S`&tX(B*m~_O$}?p#{PWpccRZM zL6M!b79(b%p!&A{9B=&6V)VYyR&T!^v0+Hhm_a$#j8l>9y9~#O;KE zWasiKzQn*ldcNe2LeR^r=PqKFIN%8{7%1=n(rYjysUv+!d~^YqSu$Do;UbG8=H<>W z2o-0skqn3CIs8V*VWfec#@Fi}KGZ3)W^1xmSWn}uU zF*~B2@9*@^wQo8(|L!G7KI8GO^!P4c_eGbz;$bw`JdylT!efk1Apq0};r8P8VU>m0 zWac5CEeIRKD|3{XksA$28Q6LY+07J~24QN%F@dicAfKS>lPkW8EVZntQI(`g6&!+RCk2tO9 zb5(}$0D%?kOH!rG*NI2}y(xOx{cfhWd-D@8Jy0CPRq?x~ne2>pOj4kL(DB~|8S-F7 z)VN#bTT-*O7P6RR#RkG~&MmY*wGlwSz0I_#R;T~oC3Pn;Jyxn;o86gPklM;Gutl97 zN3NAYirV+_?=-_O-k9Mo+#QE|x9XnJ0Z{}G2C+i;0+M8`q0n~i1!RA=IJG~kI&=9? z{~HFiEdw$K+NheF<8iEIuB$GuPTF)H!F?}DJ1Hbfi-j_rfy#@xgg}m)96?;%7+U4G z!U&k%2W)5KNaWRj+9rw#25^4)vzX=?Jp7syOb#uoVX!%4p0U(Ithgg*_F1g@maVgS zFYUx(JTqCt1{jBY&68W3u~vXNFw>PxcbRT z4u#G3kZ`nh>h*`Br^#BWYyXjAv87-^RJ%|MD2}t3mSXSnZG|P#a z%R)r1|7`Pb6<@dYwGR8T4fXG3`qub*!|_Dry))}v4`>ONno+#b-m(*-!Wm#145_oC zFV2|n3hht9D1N)Kz;95``kL`GU#SGmBnh{3bP+{VGaN|J3!tjq>~D$!G3_6cUS~xv zf5jL=P^Twng=-Gy({pnXLd`xFIttrQc?_+yj^ySR! z7db8yLLP~?@23*WGI->MTM0erYPhoE&W9|_5`VGamvWla6J|8zZ*XLgP zX^ZbK{>F=pV^!I48ixBZ5aRPGq3neKR9c$qMX&4Q`?p-b`;CGv=JQUIEeRm;K+-&* zoL;?+tcOjm%}Y-E z*CajY@KyCP;`x(KB{X_&n82sTAHa8^UTG*?7_s++m@HB(fdYHs zfcE5}G%M#Hbzb{G4@3lL^IjtY9i1I#muHIk1)i1qCtt%Swk4tw0!X)gNfS!kRfn`$ zgSC3y+9I@(zc8ALqrK}^wr71`{gJ9$Ckr%RkRTX6>b0W~0*dz*LD?8DR@o`R7I*zO zMo^ISX}kaCNLz+nNzUo{lB3GLWjIAlu|vem1q>vhrmgpv$euT-(1RD~i@v|zJsKHG zWu;(Ne8B5N6&{+92Rv@7>bWG5nRZGl_tzZqT8ST3N&>J(btH6Toxr1@ir?-}iaOPu$ZlcNu*s5&W!P}*j`D?*loE9) zsQ!+SIsoF>Q>Igp-Xc=a!r5)RI}W$}As&XCLY7hp|m>NCE;3%1}rT@CKmVPm)1 zXU|vWh4VYSFI@;z3!c4To-roSbg+DD4ns7pXlFLoh0r_Kl!T}xE*s3fzYDcM67$IY#~B-loBaT@0;}dv!IyRJsLSJKE=I%~ z!llj>pho619XR5O*D#(^3WVM)+4OrE+rOU+moj1R)Ue4E~{3@7edSV0cVry@+z%ii#GENZW(v6iCUN7)L6jTec-eS+rng3sf4 z`G)bZ57?2FZr)6ds0`~tLHB1Ofbdy?3JDNsf$$VQqfv>(nK1|wtFdKF%(?n(psx&@ zV-DnthzU3;$G_w%;b_ZH4dzC^GcQ@l;%|ARmk9V6_vh-5NqB*6GCv)M4UqBN=nm3f zKX-0Y14oi_o=bctj})7$B^(={@e}X-kpETlgNezX^qi~94oa5{(EK4!KJ14w+tHBv zvt8N+;E;#~Df;LQ0rAWjvC?8ZLKtGqFzXM#Jy%KT%9X6~Z0;b+jbBza*z|Zwj%I6g zIwp(V2qgG;39=?lNkZUt(V(WBF6vLwVjJ6b$GXbTfG*1qYy)jf=1dCj!u9AZ%$xsiJ%-_w zULE^(|2p<0ex+*} zr2rKHqpYr4J*_2EHM2l{kU@Qmo1xMQTov7}jR-=`$pkVmzMjXi3@33CLWx}O7KVBE z5Yex3(_MU8zyAylY#mvv@R0%s#u*#s>kmyal$l!g_U6TL&2wD}40E#FsX$1FBxNRu z+s4qE1{@tYo;EA=`#_^{W>aksr^TUnhnlkP&?S5{Pzs{!jB|*k*FbZSr6}?D_3IpI zVCz^Fu>uKiDkz!uGv>&3W^=|ROc!;A2l2w1kR}Ngm(JBd7Gaf&0O10n&+5GD%HYTd zlT6a^YWvx1C%YHWi6rl^77G@}Sz^9-{;S-;=gYsrRhPy{%xPc%4a}-=tCHbXV0vr~ ztuXHZ-o%rpZH+bCb>38Nm$rUsF#Gl-kN~r$Vn=!iOKpeveNlIRivNeAiMbny35=zt z0-VSf;80)wCPf{EG^KMrJ#zBO-L$oeb0^lwi(1X)7QMujMfvF#ord|kW+I(~3OXWX z#jE?YMT%Z_#)lP{h1q+HN4T?Pt4tBmY4a5Zy(3T*)_YxK$t~wTqWNe7XdO44G(XXI zPuj!#P3bFYd@?`@qN5&y^7#P}bc5e784J1S^ogZ?Q~^oL;A7BzKL(?-Kph=wglJo- z24Nlk%BXO4A#SHRa1?ocictEf0g~Y=u}Qj-Jg3Shodo2-iC7k{_P7zI{f-1l>kX^N z30D%4uoZU4IB^It1Jt~u}+PPcI4j+L$@g^tcpz_a?a;7FJoL@jm zCPdP;R^6~G=?Q|RMwmHM}=rE>_0DDVem7YJ80JtGuT4o<&eHESBz-*T;@ zSxaz|MvK_VB(WpLh{@O~=7@~VB|!e7`edESIStvoW3v`$Y=q2l89+HW&;m;pHn=Sa%`aWb6xqo%`V z>*s~~KJ0f|ojJ6#)|L>Rmx{g+usd8+QjDHs_W1+dFXKQZ*Rk?Ry92x1J?h93hqp)P zy^Wg3^5d5VWB6-lj!w2c47@ycn|fg=TQe)|(ZQJUbC*`!mNLSLB+*0{9nLVd!S5Sl zKe+y|mP&qfTgoj-JT=&v)p;wRrUt)7BK&(|!ZFHPIJVks6*8FZlOb6;klHD0dg#!=3NdcG;d5}>#fbw-# zA6tSddybq&kM!LF_7sa__zPpo%`Zb%9!|aS2H%N~2z%SGcM_s#4u5nzt+VgO2y&)W zI zK84$tF4eEuxDm=?5okxa$7WMR@vm7DMf~c)=rU5dXZD=b1fu&S-%Ggzi-FMA-riOR zfV_SDeaR)q(|G$YFAm=O_=&KpX|?c^=4!WhFh$vFSkj;pq#*F0gkAN zKaSy5j@l9}UZReAL?;fE9EHh(TFb4 zF*$Z<$AA1-7_n-}MDt|qNqyHWQh;*QD&C@ypa6*HcDE#fVhY2DiLKl-jropnh$xc4 z37n#~i*{$)5h;t#!UDkiMoz4iKKwn|uev&RB)oB0<&Vl)|KoqURYmQ?xX9+AMr5d< zFBvg8FcOsH5@oapi`nizImpIJdBF!1XLkklW&DD=gd}OGNx%P1j;V}^9Ygun0CQU? z7YGM@V?5{Ccao2kKTM4|mPXIplRcZ|J@0{|9>#0+XeP5%R3~4ga(^X{s8q7oVE%2fM31fUE zVR3x!3^z)kzS0D!ovRO75F}XVJJb<{rA9-5gSOU_`V|kRXO(1GA*sLrT_(@`_#S%C zSB9l%`krbLxTY*~P#=TxpQxZRqk%_@#LAZDQ>S+==HQwI!p&a{u#JwGe{hoO<7He( zzzCo38{zf|$rvL^sGy)HDiYwAfJUebILbY$0C|`;IO`q(B9-<o4)=itn!5fxq-dAS6iVXKPqn1Lnore(L$eP;G{#T#ePhL-cdV=BustPfjJK zLk>{)_F;6i7yWW0H-=LO(5iU2Sj7wWYX?@ET)aG%H*R>Pjng?4@yhil2ZW?=ye%^< zLZbq|DP`2-{wX{SZ7%?_Rcn?cs2wdY0OEU^UhN0+pX=@u2eqHq0d@xr2v-`ts68-{ zYIqO~JpTZvTfx_?YmfMEMdJ@Nj9|2FyFn75iNsJQKG0s$(}F9F`CV+##K0Xj(C#YD z?QuzpNpp(DMw3;G*r?H*5`#d3 zUY-sHfz?VF2&%DtadQksP2rL`N}Ij)-$bZ9iwfA1>g928IwrjmRH&@gRHwVbnrbUDY zaWV`3n#lTBVajqaDp_L_*mVD`liZQaH(5&b{loXIdwF3GF2F z5uq#aAaRnghi$CBx4=6Cg(mmaNb-$t0z=8nQKA;j!69wH5ms_J-)Jw7mmo33#hElJ z&5$WLxR}|t?*V2;&MP!6U-`MX^_$OrEo^r+6{n8LS4RSp2RI zI;=DA0Q|gmM8S=c1)%6K&@desj09S@d~G`0ktH16WenL;ln58{&|mN@ zG#h=~?(7pT7d_QkWiQf4LwZP4EK61;fJVVSx7Wl2@M`qS&lh74#pyCfuhGXl{MIt| zSiq*sP^RiiOYu4BK2np^t1TRiI?_V;4^xw$E{1#?wiisp#nw@$t;}3j!9Nyidg#db z+Dm4zdSPUz-+JRZ*9rgOd+&-lHhmMy-pvy*?ivk`2!7Kieb5SX!xI0vv@)R_{1xax zvz&MKLsWAJcTfA@ltGIpu=|==K4?gaqasPd`kmZ6bT?_$F^da?mWi!{_UF#Buvx6x z8`n-co^${L!0Y)OQn@>7b+;z}Byi!@>xRqfKu_4n#l@C<&ewnBT=opHD|1oN_m2%> zZ5P$)nGsr6r|GVK#4}yBZxN2$86FFT76zZb}%(6SO3E?ive{6x`l(QS|-O=RUE?snn!c|ICYj)2+khd-m zx<^sfuW5EX7ml{#CXSPbKi#H`zx2-8_aI)PpATCoAXAbY+dK9TIYX}w=DOaZ9P@0s zmufc+EcUH9`NKP5orMkdi~bR`Ip5s$57*?)9EB@ZuS*=|IyA?QBO5Nbs)71f3!qfL zB(CHG&Wzx8zrj*R(PG$CAeGu{~KiTc1piD2iZASZt$t`i;i8vH&Y_3C92yVyl z`fCW(mf3F7ngEq(H{)66vd?NX@inMWjqf&W1{^F`I(G$HBBeW22o9=l#T@>gyzyQq zZ+62MDU=0MTMQEZyb&#UtnkXy8=f#G4~n%4SEkQ`+MUx(sL~}1`cvu0fGvRpkWL}M zWO>6Hq4ScgKj1jQj{qis46-0yGoo@KA`u3>?_r#=?-(4;xnq8R`IY$1_&mNno!7Ls zk@$1iSj5M%;E2quENkGrH)ySvH|jke69;gX4lCs#U?ka;yckWp>dtKz9d8bYdV6W+ zPZ55+c~?pC>b57L(W_h4Pu|fxKip<-4_I_{w6w;ZF#G*ry!ZZ-4-f~+08$@crc5Cp zyh!9LYwDGJ2?N`vCMNuVJy;X%@b$_3Pb0sAT8azba!`m?SX5p{jJ~G-*@&o{t5EVImdXO`@SwmOSMR9RukVYS>n*{ec+^$ zEtNnb5D=X_!(mBoE3>;VBCKMTH(1ZIFEKPX#0bJldjH-#Xb?c1mc1sNWh?gM?R#rb zCg{^QaJkriws4805gavibY)Lrv-hRlG#YHZj-^sVBe-YJEGh~!H=~^w&;yH6AtJVO zW{a{wBCSS`-}@hKtd``7zG_^2?!0bzYtVF&GI`-+N2#L#rl}3J)3(u3U2T9D`FSCm zFg7;!t={O9$%;Kco)isK+*cuBgiu2PfN0H7=WlhdbEY8U;~-fOlu^_+Ag*aak=fr^ zEq|BrQ;SGy-_3Tb2X=KZKQ!}cKx>6EJ}>W?3{`{v3c4{@#>MBFeY?QW6lUr2lUYy! zB699t&V^)$;-51uvx2_S{s4i|d<|IuC$i4QfQ(ZIfmFTr#%QeBi$AS#do)yLNj{h$ zOnW}-SGc$)TZSy*@dH{7;osR0JU`y706y1J!K>pCLu>Am73UWN6C9r>Yo@upFfI;{{(C9)E%yzU?Y-(SuBPV1%J z|5*Qtx&41y|Ajr7EkrHA8q_3OwTG|E4{8Un@=5ndgs(iQQS6?Y9G150cOqwm;Z>W~8%`vocHd#`8n|D)Z#*2OlJB&#c6jC!znr~> zU;y-t&&aHJSAGg15W}skH4wO3bzC40T&D^z>G&^kII|>I87D(%t^}II-iYiz^V%0Q zcAM}kP&vq+2k=pt5Z=J9Losgn2tHBNA(i}MDrGZ`2*5lgf$+()LCPrxOB^O}D|Tj) zLk;Pm!YLx{Ro!B7FdiYji-D-J*Kf=-Z)M(C-Om~Wt1T}1`q{g!YL8CsSQeJ$p_sWn zeA&?q(&?#F{v?~1QwpuRukm*18$kDzU_DHzFNU2D$g~i^49+SM4J`0Inui2$AXU1K z4ok`5I~4svVq{D|A0VY>yG)y7o=);FH7zG|UIxJ+a@!(+^3l?0u8Z0i^sqk%DQBgcV9JQQ9tsZ2}{mur+~t=O3iZz~(^_jrr&lF>`w*TCuY zE%vu>CbAP<0U+KoMQE**MDVgc5`P(nF>Y<7RFHmiHk;38z2~W^NC-#2dVnod{$#xb zvlM4*hLABLNJ7)sRqSe*_d2u~xsCIZtvn}Hpyj%(FmIA4F6_#^QAOM-+@Ie7pg>s=AtQF0s zE-Jn^Ac-Ingj2u#zI@$J#O-kn5B3>eSNZjMeeF0#eEM@pvZ$M8uhn)ky3TmqPHRLC^SR&y700vdjGXb z9NcF_wBjW~1^xfTN;C4RA2S2N!-X&M4i^#1<}<&uBD|swk{F}Ttop@xTYqQOb_NfA z7^`}7Ks7qWBMzmGP%pJjshov<&HP?bMk`?sMPGM9_17m4+!Qo2HAPSvef?_)l8+q zf2YCYN|g^rICs8a?e%oTT0{Rb#=U2SbKrq9b;shZG=$! z?Xe}&Z4j@}p6ZGgU6%LTeU3tysG4{vHX6$dSNOZYQ7%Ial6L2sWptFaIZttPXLv)u zC)<_*k6UFF#KpePucAVQHZIS#{D^wa;e%EQ!A|9Ib?8?qH<;>aU0q#`_3f@3NMy_o ztG^ZPv0d*gl%O(qFN+wr72z-fdEF0UJa{)G1)1KW7rs$6js!_xvxsuzcEdPg=7N6}#H;w7ARI?np808m~DeNUdCGfA{TcMm>yfS@S zu(9z#VBdgfUm#()C@fIZ!{YkDZqdU2QtJF8y*)Hml+Z&-BP$5oHK(w-Y=wc>R{f1{1Yv1pw< z!M^v7S}j^ea=~%;qD5H5V>oJek8e9PaWMUeoaMDzYNU19$$YlqA&=urwF~D(d$B|_ z+7LamDc??p8d_I2dv3SukOX0JR51n^O+Y`hKd%!;387M9DdPt=o*HL>yvG&Q5CplF z1&h1Nij{*(F{*#>Lcf>0<}+|lF)x?d{Tj|`f$xh=NSv8=Y~R%{pKSOIhq$%oMOGef zbyuOcZALq+ARp3HOj4%697s->vM%@6Iuu&CTs5g$hjdRElUfzKa(gt#qAZqTq54KX zNmgL#YGeasG+r2&KO=E`j%UnMsr&ar6cU;bsMD=QX*gf2xdhcjQ!tVX-I4VYhz+(V zKVbf*p&N-3ljEeOaliRm0B+>q5`2T!CBIPnQ>)3s@Qb-VX~FJpL1t!YVs|wq4b6ms zIp0}&l&ho)(@o((QcaA?TnC5yaOho>n-@rSL9*RR)a&fhM7aoT=E7?xKS73_LJnDq z71+m;yh54JZ_`lYi5yq5m8T{?0akZq)lsUiWc*=BoX-2oz_`FgO73KERUE=>vV3I@SQ44xJYC& zcj4~QdZ-rcOu?I!s)JF5DBJ{gqhW-Kbm~1d38$gg&QE*cxuQ#n7Z8>}OL>-k`rhj; z@h=v%=QDK=R;RmMv3!+rJvWn8ew-wool?n9^Hq8mH&))5qlM_eDuv}i24ut~hm2G&oEqo!(${TsPlc~(?! ziJaZ=nF%yJtWXT`bF@63>S&w~#PA#&kwIPX^$&*xqV=-^Xqn=QkUU|v*LIF;x)3&K z0P5A2?`0`W2A6(twF6{#{%0q&w9EMO4G&&DYZ5)&C(mmTo0?5!4N=^CoQ$=t-F=C( zo|ws-pYshQ!}+OD*+O0Xo@z$;2Rwq8SF1x9ZKVqh|BsEVNKTiqLnI>c4t-i(SMcbo zqNcJkbk0fD@|&0dklmwbs*1LcrfF+)Td({-H?nI9PZ>`1PwJyPisF+PCvw*A+2**G znej*#bNGriYkJl&lH5b+8$2(dem+Pk=Ud=krXzm7s2)5_;#n*)UBDAa@WD(D2@YQB z-OJ9-j?!S}q7a;*(+J$|7;_%o3UQ{9Z8CPB*BaVr##71D`jo!*L^u)s+`<**`1+y@ zJD~N#PN?qjc`}99qtb~L5Qq#}prWFR)79@wVUKLN=Tt}{8~kOvv7XLqI&K`#JNWh8 z$x?;!$mvF3l0wLsR=HhTu2DQ&nOzJS^-Q_PX%kcLGbWYSk{1SpX+J6r2MP3*H3x@9 z<6CfWX2Q0-;;PG6e-GDfjpTnc=q#sgFXg?LqCnZwix;|4KgVIgJu``iZfNU*6hJRv zRrvnEpZ!{$Exkr;r1BF1l(8Xj5G9;am z@G40WBD#p9m1IIh!YzWdJ+K2dqaZ#;M+SIPjkD>vCP|+lP=Ix~q(^)u<^V>*v1nyr z`ZDp5{^dKeo998&n!SrvN0+WzwVs;rOLJi z9}F{yg90w;>&a?-A!P>bwsLzcuG7*==!Qd+fl)?mmSa8@LOE3UTLfr{=r+#?P^YaF z^&{Eq+yVeMkU#Z#v&*%*$nk~mmPo~r%T!Uf>HDcqj`yzP$7vbj;0J;sk6f^pLd$i> z-<1V}qJ?t1O|Q$Y5`%En=GGf*>T$Lf(6$$U85_q=9XVonS7LR={f%16)T& z?Rpt$f7TP4SZId5da-xs`$yWRs>cKbIItm$qz%bRVUSvDe3r5TX(c1ZmexEa_fh5_ zJ;iWJMB+h{E-%qL_Nt9_o{iwXgw!`OT6&(X)7fkpS?egqY2+s<(!i-w%XAnS^WhWn zx<99$DAq)lKz|-Q>08PHcsTxle<-OH3!B9_by~5u)^?^b6kdllC_fKg|_Tnk*)96BZ zk=#orH)E~mHXf%W|1qCq8fc0%;%X1cF;2*pfM`O z7W_%0)ErisK|NeGH%!2O4&RIki4&L_`;rDn%Kzd(c-%Wjg1z%ldiBYKE7Nf) zwCys6QoqT`$?#pLh+LV7hzOaL>N6+=hf7EgJY@c!zppN`-dAPsR-ktnHwoTXt-^^Nsa%b69)+03^Ii=Or z!~W?PWgR?q*Y>NTG_(hrQ{z=X91c$|7(lH*!J>}R36>W*Y&eDILjKz9&y|V&){Hxt z5hR3#Tz%4NPf;r#C|QSjQyeurvcAs|eIu6$o!Eb$koHus%-{#-A!BOfv(KUB6a|E= z(YKdVT#Bo~*K%1BfKBsDv&t{`wqn+;og53&>ksXdraK#ZmDAx)TK32i#vEn%v(7%~ z!5A$tk3bI)`C1bjA&PIrH@b$moUP0?dA@$aTA2}4}db zMZUDO`e{P|vN_k$IhwDiuV$JM3bWYyhV`ofHFfKO4fLLp14b(R`5kY7n2|>tAs<){ zrWW#=heuq$cARqVV|p{)@S24G=k-Ffair~IX1%N0rxq3@wqDbhA6OZRm|T|zGK*vE znM*jBmIu;7X>KgpPNLv}PTft4m&+@p8ak7FAqH(u$7NzduAo!}xC=Z9owoLNpGL>W z5At7|A~9y@3j&@%&HLHguac?sT1}F0exgE4Ksf?r0T+qAij%&lSU(qzPt1k0(bZq! zzUq#w#3>brC6?v$jPQ?mrE&cfEli5;3pE z=d%V|1d<}$h#ibfH)@5*lx4n5(^5TEg= zEN)A)c}1lRJ%xUbs-zcr8uXf+P+>T8ko)6)eSI*m<@7UeYP%eF-9o-G&}Y6ihXIXN zx!DYu90rx^G`{3rD1@yM|EhZWsqQ%Dg!-U_To>&nZFtG(7LpdAq1oQv?n?jpz?cS234d$6FC z8LbSM!-jG9KmGA~>WaxJGDD(?`XtPQQyT#1Kg4I;dXQ(L<2R=;)G=C<=P5`VJ7_ zg=*&}3QFKmo{@zJsE~Oa$l>}%r!}TYi8N9)U$Ci}5by3G`7gHqGor+;xV=HohA$bR$ z7R$2i%X9%$K#%nP-ubSgatlh8O?oIs=+pr9!VmAU%=t_(gWp2~I8&%qOw^m6Zzqq5 zHfcWf?9mNTFxMDDhYSn&Y6JE zPF?-AT?LEw7$i179!cs;OcYyg2-Y&f0eA_bWz@GYxlm!88BW|3J>z!8X^FfJt9QF+ zicJ{9fogj}`=b6mBR0Q3Q~S|&zZyA|SOV6kcoJ7OaD{sgg;h*qX@QS9+}oLR^swbG zt4M80>Cp}k7j;hn-2itHP?)>5V7tg3Zl+@l9U}iB8MITbK9JnkOaZY(t96f2B6~@5 z=13ruEKb0N>i(fEQ8f3+9AL_(JEb3Wb&fG)}S$bZnu6+-YB@;~5jK3;R~+wWa&OZG8nAJDzHHI2;7*TU$Cwg#c=4+1X$U9M^U%e;(y*{K*!V4@ zjm4-eOmWo-KMTORS@Jw=H6)T({;-$Q^!&pzn0jTyLyql0yF1%A%4-=Ox3#GI*2-P{f z0W+5iYPl?!?w@b7SXekYuaQO?I2?8--KBoA(WL7?r>3UHQfYX;a}kdp$uvb2&oui0 zQs97Hp95i-4pq?3Ho7E(S=P}F4tpTO%PQw5A|U~!Hd3x8NFlR6gW&tE(^LLQ+i!iFJQf{w1zeXzk#tMbLayfKURTldQAt!-Xmm?tr zQp5?23a}x_WIudhmlMi7Lm~vgmH+hElSD$WmDstBYO4YH`3i%o3*J4lD{?A7WDicg zT7^f~WZh-g16s2Y>$1!ReSM%%7%rCb+hDhXruNj#p_oXbh+u6s6^8pk(;sErKRara zH($tth8o>(JP4Q#!t`!0kA7lzJID9X;_FfQkOgOFuW-d%l-YQQn2lebZp9#fXGnUh z-NH<+KwboHq26(1t$`>HUGz2}p0&lTFM77z9=f5|X5y?oLBMJI=ByzaGYQZSOI&%jHD3~33&?f_} zqrgW7fXM4BR$#{7{zk-2%Oa-$c!ve@Ojo?_#}fN+=ZB4>TD(6rIn^}x5_+T1mL@BO zuQiLb*b)q$`VlV0s0QY}dTD(o|CuSSuG+G>vzI4H9UcZc>ACl0%_qA*o%v-gGl9mD zEB)2WOpfHk8y<@gl46Y}i4WcKFs??-Rzg~e)dhYaB0Mx(iw3-A(OcawfTAK(fx(nM z%Z6;EjAPk4abhHTZ@$|1;EhD>vHrIQF=DN!3~4JTyW{yIk#=``wVXN6fY_Vw1MZzR6kUTv{u~z%SKwEBQ)3)Mg4_lpV_(wcgc6K@;9u4ME|Ldqh|dv2 z+C3LGTj)BNS^%EYz=W?7kX5_vyE}DTNEn*=YrGC-$h1GeDBMq zWzV^AzKthn@j^cLT|q*A5d=Kf^k{jeiY^iTa14g-51$rdQ|2Q$@#~pVJau$dekYsYYThiEl_w z^C^mp2aAS$%Q-y$p!D*kbP2D=Il>100yI9I=|`yUs6*Q|k+)gAZbB#5l^CSp{rFc> z=>ZX1Co2is+cN^YA2Yp&(8i{>Hn+rNLWOPmF~n?rrjw`&xyf5@|1T^lA%j?S%zoxw zou2$4NpbLX(_aZBCm7>{0Xi^0r5aHhj1P|~t7yt+(eI7d7?<`haEvmVXv6Nsk`e^C zQCYo%*b7D}oDF^rr{s6Wq|(lW?T7bOkFletE){FXfb^CrXsa+vuk}T=Lte;un6XCZ z{dng%!p7kB5w7qseF~wBjg#wZ=x_m~8wd0EBqL=RVpDOU23}dq7SA+(pND@hz`59; z7n-O#`vnN8E0CXRru6w{fcjWZct(~2#2~r=o6FJdrq!VLsxS!DC1c?M@rdAuTASyA z5j6Y3Y;$eD>1P8S0i`q(#UH^C2&($?+PI3F{cz zoWAZ@;X6ueQ8}FvgmDKwB$5{;C&R_=TvmNMVp(k8XjLjv?Wdf*hC#Jo+%IYOHAW9X z&MCp@(-;FOuZr8np{ErF8^u#E3=fUOAHksyzj@|2Zgcq&3C7YB6nZRX5nAF?Q=All zhcTnnNUFS}@dD-|} z?D%|Fdu}~k-THcgcJSGS!$^ftC>%7B5w6+~^DX{Asg~RUIsJ+JzpvH^`BU zdUu6% z9r-$Q@my^rIAw8o<1+NuIx8pR?6JC;Cfw(wm@dKm7gG!uWNZw!B0nCj>gZ$%2o(rC z7c4$KT7g5{nSce#Z+?qk@C+uj3dO|IG&N9`khYLYXYX_3T#6lwuplD?{SR|aS(_sO zAYpZeD5UWH9GkU(iD2!F+ARg8}W*P!ZLXD;5q& z0<3f+Y+s*t3rc56|3e-%=6)w2R0smClT{BK^8t;&Y4hd8IG>#*`-}#kw0P=U8D@^sn{h#~oqKgaz0TNs>7Yg8!!A_pC+N9EtFioFv|`{yh*MKSn#EbjZ+!OC&mp+>M!jPGCcMfNq%^6xanf!->4DcR51D zoKxw{0vh7i{ETeXT-82S5BqeeL$9%|B&PA@4v2qQ9Nd%07CBL!?lLQmi@@#xeppHe z-Ly@Xs)`EE6!wuI2Wfa3*U|eT$Ue80PvXFN7XvUz7jt|l$=DZ-ZS(-t*yiR{F z(BR{Av<;ia;~DZuEp^RLu#hQz3MN@}t!&iU*o zsUE9o$_gn9R)~^SbKALy+?|R!C>CO|Nr$6_k_Xz^cTW zL_bC)C$zHpAVS1Rj@yq{-4=T^g66c0ZluB`9Ks7^KHIx*5O`KISeiZH!z2=Ni@T#i zevm>i7?n=B?TO(aTt=!nO=E)5SjdQRUASqFOMo%4r zk!E6IlHvRWT`Ki$j3s`FTR%D^G+JlM_iG+fir{@j^4 z&q4Rkv4|V`$kFhD8}WhQiGK*@)2Kkezn9g8Y3&Y0DRDkv1Ypu<7$?`8Lx%9vcAFjE zneGg|u|Jx7sq`zPzQzYy&eTAMr+CgNe6a&twT?Fvb3viuYiLn9hF?oCPi&?r3oyztQb}YXy#oNx>xG9Gh%7vr2`Bo=LlNVD84dgAc6g0q_N6 zNW8irW0Hp10j2Lc4ZxP&XtaX$V7R~;PbeTTTd9sdBWSXBV{oxM+qFKt?7e^Juy;(q z(E01hy6}mng!Z@ZH(faaYVC_08t}gO9Sq@I(W9G$oxb4kfSQ>&Xb*@}lgqEd*{z+* z=5ft6ipE2j`_T>BS<==Mp0f_1VpEdItWC}rhcKN zuMyOKX=ITjH3A+P{^0mA1w8{`rL@}Qd%YIcvqH~hDQ3S#X(`$$We#TEg1Amc>hjjf zJ3{KJK#{g~wW{?fNb6mfiYorQybRQofeb%n%H{Gt0n{)yPQ3ppds%y1*@NH0s`UsO z>Lv=i<45{@_)%c!SVko&`H@_?%ybo&Ww%q-gmla#H{b>j9| zZc0XWLFf0;+fDr*oW%dZpnB^)Le+6d9|HIR=5%xWW%OQe;m4A+ zW!LTSi#uUgI9N@*`7r&i;)*wWSpVG7$1+>0kSQmgvOl#7RVIy3fb}HAEqy3rZbql? z@Cc|maD&=jyl@Roj#qN3`&9@_9s518fKO?YOHA}iC?^BZr9hhgNrO=#6p6TDUuuie z_9&ZZAb~kfAzlys?B(%H$=iAIe$r^mg{c?2ug52TOHapcQbx7y4E1v5Afb!bOF;)K z!URIKmO6Jn15ACdDvhGI!@r|qxn7N!&%QPS!;4-Czr- zA0ssXT#Invy-OfglPVV?moxDDaVr^&@Q6VRI%17|cpysjcy%fxhi+n_$<^AX<1{pZ z0j}M}dyVe(x28uf01w{(-||d<{lznl%1-cE>Gng<4SNKX0>Y>F3&;12pC_G)M~al- zW}R_TXv33O0;?sN@l(~+MOEH;6#gQ1eOFOo4VGJ%TCUHITkl4d9^={|>j^ey{`OqR z)-iirXMPvI>ShVS$_r;njet|UV==@=C4m4G2;L9|ag#ThCVjF(BGx2-3%!AzawBjM z3gD+93=@{Xfe23+f{x^g7xII?)B1iK?{u3h?4VpCwOfW*xll@SyPH1Fm|?7(T{G(d zRrFiPpZn6J{m#?s)|;3sUdwA@JCg?cW#qPwUVDLny{KfhUT^5Mz7tQoXS!AzyXZk zVHzptN4%ki41yp!4ho=f!)4myg50(DKGg~${KJ%8|I7k%Xs?9VM9z{Aj|yqikq^!m z85m54GjIq4r=?%>(#=$tzjgggQmF!DTwfY{9oT2#5kI3>lLEG=GnJM&E9XWhFX!>3+r^{l}xS*K5y z3$d&`;KKyyWJV*5j*bdY6{EUKf0(57KWFN3F+@H{oZY@>z$PYUBqI^} z0*?3Wxp|8E75U`glp41QE2J0??0AjpV#HUin8r-}*Al09V3~!Q9-K-=CxuN&^^@D z89cJa#(cs9d4(~u=XDs&0Q;cQo0L9|NtC0N05&8$6%7pt@ zJJ3vB#5VLs&||2M%4v=3vCabuMYs4rDp*AuLf@-)en=6{Btz}``uMrC^|07lwpTZ! zb+I!-Bo+C^VSmWH6OiemIXE>Q_|=r zx1-oW+fPmO>e8H#S1A3fKI1;3V$r?W4h_*RyN8G;1(Seth<{

b2`!MA?b-Ae6>K+EX39|M)9;VtX&n{05R->JQl>B0^bTEu0-1{znWb zRWiAaRd4c-k^`eiW8J)^>0DE)?Yl6@P6^hv-FAw}N19M&s)!`aou{s|kl{WA8A86+ zW((iX%JX06dPTdFr#1!|L8+fc&(stwN+)&0OuV)n6Kw0 zHW+wu(31>qieKWIK5hEB6M*dd>Wg~mbm{&^YlNDvLv zo8?z+=2f z0Yf)G;)+T^yi}9hmT`Pv_30M(d_aABh;uK-)cy6DWL9m34de>-HC%*uWMHJC9}r%a zznkbxqjS|`ddzf%K4PG?ZbvcJ>;EQlaqE*V9dsE`d-y)8zQbKyBom#oLWc_Gl6q=okxqYdOs^`6+1yxRYj4 z8vO@;(Ege`YQ0&UpG#4X?BKP%o*^Wlm+FGg)(zA^H}RTj#1FQLVa<`1RZtME+D(Rx zcXtPW?0>CVgJYMZ`&e^)oSNz$Znu|Xy1aB%z~>IrDp6&u zl9>yTc_5yC>UjBAm+_5KKlNv@mR!XFq>4i^KzW{X{S-jz{%JP0&fCo8OpO@I zKz_(jgc+iUb`_8G1v}lXzs#C^_xqc!!sC>4d`|U*Ohhl#F1eyG63ohNr+#p;m!!Bm zzVZ0;_Vn?JOpgEEgv%DA#4E^%;@!*_+;c$puTe*#_ z`u?7tDJ0>kH0)<8k|$IyBemLavd5ge?Tp!E((a(f(NBR9O}j|v^V7z>@Kmyt;76hOI5KNS^ng5hL8@bMqP zI`Y6znd~Bd_47aKi7CW{g(%(KkpTQ@a}~r(Qev`X!h29st*s8mI7UN(7X&)%Z7NRJ zn^Z=BNTEE&$W3`%E3VH-$RqeW_wgY=EZH%2VZJ?ddkN(|n?wd^WvrSoH;mZ9i2NvOOP6Fs5}<9;fYNUi6|>*$X)jY zZQ)H=5Lw>WcPsWN(Fckmrt2XAAK7Ifpzdz+ooe^W#K8*@ze1A0wqX=@-k)kD5b6xW~8v!e(SLMv(o? zzB6Cu$g|7nwp;g^Ag>%uevxf-%WikJY^8|#rIve0a%F$1-n0 zA57YQF^A_kR?0bn@C7e2Hl!Rs@^iQjX~@pJxskqk=v?5f)mqCtn9J1W*k3dPZOdPC zQ=-G3aeFvLVx;%7aq12i>05!%rs_1|@%77b+7w^ux$CTeQXC3#8s#6)fj=O#y{jiF zM#NeT;1&gfe8i+%Hk;@7)DLh-{EWDsF!zeo-#3H>eX@)cxkYO_EJ=uFPF zmCZ8~cDh{fD=TBFGFQLiQFy!+zd^0Brhx~c?~sBEIV)>h5jBV6$)-CZdS<54ha$oY z>#hQ*5^WTVb2A44qfeB11J>`&b`4MMOIi!wL>%(qHk-M5mG4*-KY%Cewber8R*Zq5 znp#S4qyAB@%uu>jG*3OSE09JIa>EiUu>)xthXb=U*(Q_4LW4gz0lNkUS|HA@p zZB$!T6Vik6nQBdPGW{b!2XbtY^FylnSxejbV-lttMZZS{Lo=UWbMFQNqiqYVlp4p& zkyjsld!?~nPdj2GC9{2=kSNWdYz05_k{e7q>=c|85STnX;tp=d!_sakI>O#~OCVJ1 z{D0y=|Nry>uRtHL#5EhW%_BnpLgynmp+XzV7)p=7`+si(hX40%!1R`#2H=X%k{x<) zmCW`0%I-vWchS1OenJ-|2$g5YF@9pE)Vg~U6c71@sQaU~hjX-YP)Qt~qT=Ht{cjd% zB$LwVJD~v4Wr0I?i-4d3PWum~Iy*KFKd-R8?-#0~UCvi=BA=Q63j_-JQZ1D?xF=Q) zqG!600`EkQTfVm3GS5fP^~&ndCQlc#xTnoVzc1FVBjuk{^8-4x#KW-OL-#N0#8clz zuCJ&$1%mQJ4C$fK%R=i|)xaofS@!h**gm}O+9j@eG#Oksy= z-Z7n#%WEh3U!UY`if@Y!F+fmiwhjmzX$VC?mS!UcJYP`eWL0 ziBr>T|7l=&AgBvtGg{z2?&>b$t0q`VKYMzm5dnOF^t$|Nac#h}>WhY5HWFiyPG1R{ z(PZOu|H|Qs!g3KEAOg0tFWG*mgw-OY=oYN39wedf0Rmhg{~0wa#EXZNQsWz<)Rb@F z21A;)PBgJZa0$2TJz_}NK@9^5i;|)un$<0|W|MQcz1sNO22-bY0(CJ8RyPP`BA^O~ zl!Yk9E|T5;Td!{Ry{qSXAYBLC1hOP#HtIYhNG}@DM3s z{CnpAXT7x|8d2{0v)*OzFprwEJki~7YfXt8%VZ@%UK3^ki7 z2U>}jxKk`3#s%YGF%6Q`Rn29Yu+O-V*RPGcuQ?{m8IQBIPo9N&x%;F{Sd^_OxCMvm z-lvO#x~uhn&^^7*A*2lx3|hSb*eotY0EBYsrvLsI^i?6)*Ly#aYAoydJivGEqRCM-+4@C#2J zIX*u_bf%BcG1Aj-6dza`!TykY`vfpXSS;q*3PW04M&kEMjryh2fbtEF*h2gl*!Ml! z1`ifQz2iq2lFtZ(LCjH)8YTyYrpp+VF&gW))^@NUqkX@dv`+D9G>c$@PQ!x2_yO(x zD{w6cPd*Ksu9SMDY3ieB>Ai%7Eu$fm-XizPlFR+}3u8cIa8fW){g;hoJNNZ_Wwgg^ z-{ZCN(iA?(`*8}oxK^GZJD4}H@BSYw(4z~|ZQN~H`Fn?*pSeHUXQ)E&F?|n9thn88 zF-^7?b|;yaqlwYQ1I6Sy$v;$`YcRS_(KSfr!9-EZ#(I?a~*See*g3SQC>JE5wvo! zx1f8+C#FoQ9b8{FTs;`P8avY@MHmCT24VN%7imC%&p zd(qsvq;k=K0eea(e3F+#0_Xxo5&XxN8p`RViG-pVvQauj@(fJGUnfKnH$#NMx}0e* zN_;3C^NlzVC6!A}At*{Qjup-X#T3l26^n@+%8{=AHR6Y*OQk3qtC7~Q*Z@O*qu@HX zkcj)XnP>yRa_`)V(F96aG|tI$15XYuQu*NE;B|Xk8L*jSt2&=e*uzsMUbjWGREQlS z$QG)ClwH#ET>HxJ!@T#8FT(J?2DU+U4oBQ@wF*hS0Ag*>V)Z|TRjgQ`7ETy$EDXj} zC}7cs*#PJtYrac1Q0O5Yxon1)b~fwk)RVCB;jN;}EY9?1j9G6s9%lkqovzA|EWnE; z{;b*?#--IciKt;-yTa_+yo_gsP;o%XDg}saA8^@PdCrLfw)vtr1)!%g53qzo#|x4# zbyCb&izZ-1c~E@M36P$<`|5T^0su5G>s>c#$A9j|5WS!ENlFDeH>04V`5%Uu9#iOa}{+n(1rPW z+Bx>(<1aVja+#sri-WxS=qMU@ith^5$VLZ{{K$0gVv4=#3lT^z5p7ZfW`I5tqx}-* zYTu96gx=0}cLBsYt=zTqp`oFS0(s*9qe6Denwd2yupRB8?XH;vuh24Yeo&2teew+U z5$oBJT4h9S5SE?<_EWfV0u1ugQK+mEewO(JY#Dk(&D`z0@=MGC?QarVC*93J@c!-< zJdrsTC>}O>0cG`rzsx)Tck;1s=$P}}SqhyCRxlyGUfUB)LJH*w*i0tqo^Q}A=Njzs z14KfChd`}88-U4=UqS@8NMTIi)VLsJ2b^)C8DROCu{H-(g?jNIHfT~f`$gu3b1aFa zbgceJACzk2e6Bsum&l1Qdw=$-0k$!pJR8_b z%CgWjoEpu(bw$KIA`Dje00xyp8Sqb7}|nuHYOrJ>LxRRxpZlaQThR9JbG2q3B#95>y78H;~F1f;!dT06gb6|IM4Xg~kZJ zSNjLI`0rAfcpzIZN#e@}3ieXIn?$KV$6P%O$_i+eeIm{v>)BT?{=U)q5tEi}H z?@a%2I@v}tO7D5jOwoMcM>wkyc__~9a)`bDk`|JoDtL-J8G3Lc-exj6WA#b7xOYa1 z?t^Txj!H2@I!+urG#!=JfY@(9)7JLdRee4FDg`2E$7XT~;Dr6}5&OC{y|;fK{|vrPXz znKQQJ2C|zxRjMHR@@1&Zc^GI=J3N_Rwf5f-BVVa^jDo8sgLawO zWY~e4%Xx?7DgJWMZ7po6PRt8rr&LjXh;3L3Q@WZ7*o%yt*{0$b#qKInkcs$-FA?y zy4u8|?|8WU63N0qzJ5=uUYQ!wZI#J;iX%t5+4Y>cMtV5xmZ%7%_4+mxo2+|tsI%x% zyWYZYmcWrl7B@X{uakF*W`YpC5iEx?dYm?%)iD77vJ{hkTY z51$HB0V8TZU1)DR5wBw54r#m>7bAg7%X>cN7HR^k8j<%rR=c}$MtURs|LXsU9{k_? zKlwqef9gL>s+tFmtyt%RE+=9N2a=tPrdLw0pIeE}b=ZshN6vt~Nu$usbuW=aLq1UN zWqmZ>;sUDQUezi~?{>Ub<>-(q1w5y(Tmf`&Rm~g6&#{@7qln``ABOT-Utj9J9ta zN1PHCq(gJq2u(c>U5Q&J3PK5*hrM6R1s3d*tnVBjx;ANuKNX7Y>r=@SR{>?Om9gfF zJ9;!Ttj2}|>6IsUO$O`2F#cES&&U6y{`lUe{)i~XQZgbntwvz<*Rij7Xs0KpxM9P- zC~wGB7e?e;7JV*yCK&X*dLeaptPErx!jvINuwJ7mWmkyE*)GnFu30#mfB#Mro<*uD zAFNRIFZ&(ml@)Qz8D_5^e`!fs8>sH0#31iq+tTRQeEqd2g+r8NR3Z=L05_^Bs(}0l zdZnY)m>cA-vQpSPNy|f5jk1b@H#-f@y8amj6JZRf*-P7vSWK#^s*bFyE3#g7| zE<@l2kUzMb%~P>Ie1HnV0*=1+=qN}j0T0w&#QxL`s)jH~r=;GXbu*?roIuK-67g@2 z3`WrB*ar>W7j6s8zZ4-`XDNE+AH&9zN;I+GQ(iNvy%d)}F;{0TW8IzqNSdUNzu>>U zwbN_8>OFRlPhWsbO`O0E(qlB%|76Fci-|+v*)h`e35$Ip{hLd zg*&Kr8L{q2W;dJ#eJm|{{iA~8x9Aj}ZA#3TNUxliO}}9m-olgW%~83Sd^ok;H4h|Q z?%8xLk&X(JDyil7{8y4|suHJ*Hl1ysNPC9}ByFT#{+BwnPlCHCUM~o)kejxi7I!5A zRRXOf$1%N*yq-O(bJPkD_eUM8tI*yUcn3RMA{ZEJFe#_jVnxNLb9IOyg5mP5Uu04_ znPHfBfOn+vz5Ck?tGp07I!t)+MD}9~*npmWu6AU&-b89jC!1sc_6ym9Lmvm&@W30h zKDjBFn)#?A+@9b=vmQ>Nf)~jw{q|yeH8{cII`msaFoe*s{yr==WJ0T~#_*IzGc z#*%mkP3~{k=zLP~QAG~Im#lMDbsOBhme<VV5^+OpzPX?~H&=+o_ zHl$D#;4FDC15J6((BNk63C1`6=jVvZC(FyrQp&~0nHaT_EKLH(g6TJQ1I;)4IeRA= z4)@q8|4<+N>sNz2%V1=1(zaor3Cnz>z*RJ~2&qMPI;ld-)Oo4{PFDQ4PO>~N2lC`1 zX1di~IzH_f>b|6-=S2eexBROp6}YxB_@3fhe|ON1M)peZiSdv9!@U7G#!XT8;bw@g z`N>t%jq+Th>q>){G7(w}a8?Ng?*1jR(_4$1AgMG7s1{IJK3GvKjaB680YSHBWg@3R z-pkiTt$MHI3jTF{jv*t?JURct2~`%HY(rm$3mmWs;z2t2!8XAyU|FI6L#ztw6JVn4 zy8aAeO)jHz%JK2o-;{X%vl+c+hPb+rQTw&;nIAAWOd<<*U(Cm*rIBWd`yoo(JugrW zTCbv*g62kvi8=S&0fBp(cm=g+Ez4}}U*P0%@8rdQzB0l2)xQ2+^jx(@hgSpgJ0+Wf z=ZC|~GRQuta#jy=+*u7P5);Yj2+#^ikR%)*q2P@7r&YoEg0J-<9gF|U9XLc^ld2P_ z$-v2U;FI7735=Qj0cQwp><1**j~z`=-#@CAZZP;EHkv8-cviG!FT~16V3})kMt;dInke;$uK6!M6>E!gx1TTmb)(f*~h18bm#{L|L?CH zPxiOQnV&xyC``!f(J@n|b3GI4nC8sMOu=UAW3%-&5TAlGL_t}ntab!dW?{h+@-*v)H7kFD18dLvzivNHC zJrKCpm%vA#jK7qY2zZ4RT{2g9{H==fJ{XA~*HMInRPD#D z;mxEEoEbsk;THqf!W}M13g-$ zF?0net;Nrncc*t>k5p-?frDUbYIt+!ahSF&<&rl)0*!=EfNPuG*?*z`qMa^R$GdU# zA(58F<0gbX?=b0Y>_i5?y>04tzKo`^-^~w+vdCYt26>Jm@)SBtfKG3u%8bB4pYm_u z+CLTv+<)ee7%0iD2wcgC@twH0`)tu%W^um#vAyH^*a3@s9I?kFYj?9&`A0Qn<0uf zn`HM0f|%XBkKXP(s_l%b)X{l-_kp|8-Dzn)=y9{$^}q!Jb#J}>4;_$afqg}$En>Ir z1ENyqTU=3%H%Zt#>WE8pXYWe*&1C!H zlm7X7s?kdnP*o)v8a@OC9lA=#nY9~2BV?#X{|OWW&{04@TF~Fq``QzJ$!zeDFu-d> z00kjNN={BFeB_NrwKBALbEGRY!McprvchK?Lx^B=IP<&Scl@oy_>_lnFega9RsLwB zi>ex#Bvl|Y=hC+V)V%wuV=Mp!r1HrB*z9xBr__W{Tl1_lywMPFdOxCHR`yVBD zCgYWsBeWb1b#y@G%llqQu}R^pm^e5=^fXF}^M+-re?+KT@!68X~VbqXPTERtRfAdhJm`U6hMmRGg z!mE#^kWt1rW_VAjNNV)kdNDBwf6~&8g#HM)S`6vt z2LyA61d&5F^|!8YhYPKJrMe8=Im{IDv%U*Whp&vc`bdJA$&-l=6p*Tab%pZd*`v+~ zQT(t*tR=iWP>DrPo#)~*cX>3PkW2+H$$jsqvUc35guadeiu{WQPbNUVWA1RSWIHJQ zKqymgr-#!;^T6~R((U`s9wX|g0z|(v7&RA@9%7{ zt$f>%9!(D<8Fr!9Pr#|xK)(aDTv7#G0_ExYSK<$Ul!keZlLUT-2oaFBezM05G^|`< zaQ?E0$~XS#AXlEsitpv8E$p^7!9_yAro$`nY?+7%0^Ci4dPo-ZKHGi^6tBdt)hjS%%?aLhi4MijOWkA|r{eAlCE3EMLNpxt?SD zYJb#MLgN?8_tBO~DR?GIq>of$uA{!=us*Fk~`%2-#E6d=MPE$)UY=qL*x;>h2Z0R@s#{XKqdm?9Le9nz?QfG z(>e)f(Z*a+nbAyU?A)#)EN>SE?zFuVWh@+03#IPJHMVg&?R;_Wtq}QlvFBF(+u1@Z zzIbvK3HZNs9YI}NBG0dz-dG@#E%200e8Uyp_{VO?D75>d--Ye&Of60=33JR~vXKGE zpZLM~Go0Z{OxpLxHJVw|VYr{Ko2&KbG&1uX^gPWv^;H6t*)pN)%tbQBinIHpQznIZ zs0QquS`>N4kk7vvsov!`XPi_~EYusPE27;GO~jQV=n9JokMJUXVp8K*!@i&GJz`Q( zAEcq|`d|>jadbh%X4(486VQ_Y&90Gs@MZ0*{mo0vW+sFS)hPjok-MP4NpjiazXTtZ z?)J0je$;kLjh#fnbiZoKii<bJzz$nQDp$8;yB$j&Ewz5Y1QveO3(kXAzYp+XQvcaGpm5aCSZSA-z zGMD%y%#~gJ!I`we!i4{Xs{cbz4nH+{5s*~KTEp%xzIK-WyV1u85Fh0djF!udd};8Ga!JTZM4RyXqlHUUmE1vrsSHNJf);(p!c`Vkue1SJy(ZE zjR_*M4cuPQK#-OYdU^(}+AL~dRBF|IVc|5v)~mRtFed(w$;J|Uc(P+yu~1F?`Sd}W z&@*;U^M<7S99(!4=`{aS<4*VIP4`2wgvWGBsi5Je|7ehPlH+;t62uqYe`ib$__nRf zCM!+Cj67Y9#2HA4)l35fq9%(oqqd&y8B%$Z9AJBY2DsV!(Z=xRd@(9u3^gpd6SD;GjtfqY3Ahzjyr>b!2PH&1?3GXaVqZ zTg)e2V5q~gz~vkK0Jbe z99Iuup3N#{XxDB+0X?bbIz$2nl;bZFF+8tM8Am!lEs#k@Q}Ez@rfPWWUMB$pYlM8D zn$p+*wYks7(ViODVR6g|q+%IYDvSGg+#t>h?#B1}ea6M34jdY6c)aw|4XQU!2_nE2 zzcq!S`4V94gi=}C{yb~2`uJ7m-zzd)$HjJ>8&~b%JBY*iLFC zPwg4^{A6>1`0s_LIK?xXz^*31Ia|l$P)P8DuR5_S2XJ0rzWR-mG2BIzDW#oL_!96} z#}dhJBytf_k)LozS}9Dy3-|+)-SyGDAn&`tSun$q~b7~J-ibM3_w;4sFkU&>S#J_6hbd^PgeM7 zcpVX*&bYPZ4057jVRlXkYlMSx0?pWNqyimy&5ForF*wd{KL<}}B>V!9e_!$Vjy<)1 z^2F6>tU_}bc3-FDzOBfhe4x+QtfcHMe$=p2>UIZ(@UbbqI)}*!_a=~t8m=TzsvkD_ zH=6>!bc7*t+O+$|c(gBmfd~Wc`xdkmihPQ0@PMY8o6HyxVAQj!jCtk$t9yp&2|*Fy z&bX%&^?#5LXVnSU?7V+CSMMfUOhHW?n^btU@DCO+S-EUD3|VLn15p;1(0DYU|@x&V@Rg7;6>t55k>0bx!R9Y^)DF0JsADLbExmWyr}{qNJHm2 z-;7#nrapNJ=Fc~^OPdQuUpqnWVPD}CM)6+*5(A7cN2_f^?V^wM-m-hSSHa^rEDVP4 zTBYsDt$Wd&gnoMF9UKz+&1S>znA$Qd`?u)Q`fitRo<;ca)7dW^5B3&1j^{108~m;T z7_snU@%Q@%Qb{YydxKD%3}JpA*R8vhu1Bry-f{5aR@;K_!vI}*0@>>1;BfDFo~m>s zC%iD&;vOJgbq^&f_~)3Dq7_E^UOpiO60&Y1Zej-auRHlnfVspVvNCmNWBSNW3nW>c z=bQ#cMzSKsnz!x$l-%Obg&0UGS}QWXEuVGZXOXy~Nz?%e+wRx19NVl!K%C zEBw^Ad77jCpgN3zw`aJ0F)Yh|d~8%@RU7LKxqY|NSF#nmdx`yq&3<v4pbz+H|9T7aE~NlME!8)`W%01eR-*RRGEG_L4*~gC zE8CQD*rxAYuEPbHU*}4AMRK{g2+=s_5R||3y|!72O&38a>Q7dPr4+7b4>Q0fWYZ>; z+vWtM6rg_9!#ZycCt%1>R@!Oi0Aw8Oi@=uKgCG01<-|a6z`j2G*tJ4i?K${mX|8OP zZrb7B37E{`X}rhz_f$Hgk`WSej*;G>7!V8jZ&AiwSD5zmC**n_t*Yv2zCS#^(6xWE z)1${v>QMIKLty-fVAby*_-)t-?Gw6xxsD#y;Mc$l0aRlk;Vt25 zO=@Ynh%hk6tai-u)LF57HvC|QE_c|=Onn%s=06Xk^o)uQS^k`{bf+#R!k%*W#!5G$m)Y04IYJg zY;tj>vM%l(4mMRxqB6aJnhi7RkOrUM7_c!Whs=zx<}iX*q zKHgnG{(0%vp2{y*s6sWE&c@6f<`5MC+*XhEKHPoqel8e9r&+9BCj*}l1toJ_SC3zT zP@oEQ=mM3Vmp*ZCW&70>=Ch|Wd7uohz_&E2XJ6erHqwUCgtmqOf8`8La=xu0hY8k= zF5;$WEsnu+^KYqhiro2`$9q4&ge4ZK>S!~>K0lP2>-)8qo}LwE?~y_A|D~w7U$4= zxHcFEH^$$&a7>gL64|Jb9oU@LPHvh_{^YKBq?Y2=u&WlO{PxWOE-v-|a7@r^RcT4X zNw@(eME^Q%f=|N_hq-DVb_%1unN>`_)S-^w1r7K`M6(7x<&12qj0c}tM5^_}4D4-2 z$<<>rrNxbxOY|cc%|Lxv$$NZBk%0EIb%NKg6Q^mhr}6b2KqDBneeb*H%OVim!vA49 zt>wBTNt-3@VMbng6UVyHUnj{l$df#?GQ9JxDt+q@3Ry3M=Hr%K@B| z91GQVo2B33J4(9syy3$6&~i8NmE&#g$M6>FjE~kL8VkoOTsSl9pNVb;VR1WD62534 zJDZj88%y%IYDf?9FKu#XRs>~&COOCRj-@=PnnT3(4`k!2n_x_z4_OF!pI^o(J_yq! z4dtstc0L}!i^pMnGWy|;hRJD{?WFo#3$nzAP02=@!qd%8b>;4YOK>k}eNnK17nuBo zT1U4(FUp#hy-Ik0t&BX()x?*aFRHmWX=`lbS0#~ zTv@5(OCrc}%cmAg>EnM*s#Y4Q0JG0}b9r*rsVqbjtE=siO zC$<}Fd+;)iESK^t_)F>JrOK0qV=hW@XnK)CMLZ616F{juSUb@pfjf-Oz2U~io&}e} zUu!aOAvypU+K(PQOSVk(A02mclT9N*yXHsA|Hpd<@K^iFCF6!;=AR~*%+`4{^xqAh znWINAo-nPDb*_Q1?gCIY6lDE9hjG$4eDPJW!Y8|Au*&!@s8|`;>;Y zC&&Iq0Q|A0Sjt=EH>kwg#Df3H-sNMRJu7ZRaYdX|X!=`P1-=2r1UE%SeC4|i{uhVI z*thJAH{3{LM*+u;`{i0n&8!DU;CeUr`<}GP1v+5;0#dkvxaCRrMsuU#H^p@OJCFYH zCr2qX2d1WP|8cS*MnDcG4b3}GzQ--zdkJ{>@B%B*HxL2UL1R48M%wf`BXF{wM)3Hz zC^aioAxm_2j7i@f)HDiGUexh9Bz|O(m5Upx!^_u){aP5vNPz>f0lQ-H&SpwSKD?ju z8O3xPxU%3-*4UjuoJyoZH=n`~b72+yp#mP4T))ofO*VK}5&w1-Q;lU@QyRA*eV;+E zZIWd5^eXQ98v_vteFbh=d?bJGhg+UfUS{X|{>>`Q`$zrn&K4e&+~{ZKw*X1U3**J- zpcZa~3y5q$wou7Q~PCHxBC&GYR+&>hg+^Cg=xU7Fo+r~~q z(Reb1ZI8J;F7BM!XILcAY|-A*B;YX6isCS4k@yi|tX1}1Umr@)AI!@5fB*1D!V-VG zeKfepEC$1;xCRFV37B2lD0(U5r-j`U&3-X0{!y2NDb(w*BrKj-(93-a+&{fs;11@# z2!bzMRK$L?ZO{4dO(^KgZS<0~>#imW&dTTTIl8_B*}T%+Kop_bKavqS-`8na5wN|- z8Hrc$(Nb7ENah->>P)nev)m!%r>8FH8+Qx_`fgS>?v1kILwtyyn6510evw+tmCsXx zHszACuqtyQG}n$abVYYIS#@k~2z}h;w5a{GhhhyrCHBYn4P}=9i9wy zNh4*9XR|TBh&j{x#a<3&18hm4Wk6+&z6^eONCveUo)>MHWHV|UKT`FxME5*n z7gxtLhR@AC8L3O?P22A}@OFgB@Yn}iPfPxCKd)W%c9)FV_6?_hW`~{@3R4|M-9iam zWs_`a^t?C3wG~>*4YJOy(R!M1&^LnA)?u0r-hhiU_u{0C_{B4r<}5B1e6O+x1VE@; zEQqf!w{1Gw7al)KOQNR*>6g*b0dr@$xj4Mll{bk_z!I*={R|1~Iq}w%`OjCcV9Z%K zW0**5LQH;so)`P-Ug?BDhDd>Gp#jHNT;ep=yl4J1rVQTuf?EC_zv6JnBpnHF{qq1* z@(DEdGKO8sw^df=gdO~v8=q~gVuo)5BuL?c|JDU9LH)I6{r&J!wM!l@3bQjY+Arrz zx_5tz;jiB5h?82Rkqu}*JBFf);&kNV#6OpW61$0TU0j}69(R+9wcN1&+=XS(jT*y` zMP0Bma++^mQs*6g%Ow{?j%n}J#uLa0l)zhxgx{0Ft

kn_Ee?T?$T zq^Xl2DiFOQLL|hXR^7T$sZFhM{Zz!;b^>~4`TnP2F(ydUqU2|!1w@!E77Ya`yGZ#7by?u*-B6Z|LcFS0P6R} zl;$~-Ug*Bc?{8A_DlW(oi%M|MqtL2aY8ImCX*^?)+bF6f%=)+1Dn`#7g?)QOF3LX9 zks~JNYYxYB70IKgq7veo_uorAGX!6Sl1w6%k^bqS^`ABz#Sh6*gAIL=Fp(ljvDodF z1-HM|bvTk|rJrnm5fy`YYGY}k-G}(a1k5kMr3s4vF9;NxRvxVNv796#v@n#sxDR=K z52*{rdD%jV`r4N7y5I*gedi~0VwRjB|0$gZ@IlfBASi2BI`$c@nECmO_ta@c9V9Do z%}e$L;fF|tE6^75&rdDL-=wZcDLHuHI=rJ-2jgo?DgvAef>wtEv; zU~p4RoF$OaxSWuYf7~fEp-8t#gLa=HkQ8x<_@rzeVe}@u@fz{PrlH%V&~Kje?RMR) z7Wey7eOd}cp!2Ok2v@>jj>ND07faL2B4PvR6<#sEJ|3+RF8ANPHl1s{$ueWYKn#)M zmJ5JpLgz;KWm^Q#k%yqmWGb;-lS6Y|E2575u>rCo^+pE2&>hjL{A+S+=Dk|pnJaH< z1Qb7T4ZGAugP220a2^=Eq3im*Zmk-ptUJasyCeW1E` zcFcxuf_C4M-#I&Op$uz5>DN|7>_8)Q-$u;)B*%=&9}`hORt7}9wfYUh2J@#DruVjR z>>Wfu|8TL7U+GVWrtYtIHUx0$OdA4G!FZ8&CwCXBAC4~Aw4@@g*=sLLhN38j1j|U# z;nL%xRWTi}Z}tl6L>;6YbUQuo-oY8()X&G&LWA6a_!udkJ~?$N<-BazJ>$*GY7GU# zL7pYM7`#;5HnGoQ7JKLIbNyQVifj2qG;X1`u$l#oBoGVn)DHgMsq5)ki;-e`;-hDm z8*#ZVFH$Ka8ueHD7dp(7i*V7DAXB;1mD4SV&%VBGk%gwy=E-Z+<7NEF;Ihn5V`Qn6 zp`BvD#V03amQDSUpB)1G@f46vUH343%1MK5MWJGw&jz2-nD=|E-3K3)ruqBPa6ir~ z?-3`1s%L>6_d`cd@$iIs0SjUc18!7rhLQ_#~IplxDdpgC)5pIV*Th5K<_ z)opH^qxzHc58t1$6K;-|6RzD#nsGg*$wJjHlxc0jUChbf?cN>6UBLvO5T3}Dhcqe? zd>Kgb#$$akL9fQDLx)?7J5u}OQBnuPjZKjNRt{d9!uv|kgaU7__ef`q*Qu{`Y}kN6 zTjCI|xUN2GM3P?$iuo_(`>*M_`N8MkKU4!(jpF;Kj)n*-Oue-i#&L1xRTg`mlp?Nd zXrJZujp%Tz@gBF>+}KFk`rEeH-oR2zCTR3;Oo52YkwR@wy4|zd*9ZjjVDPgeF9hL= z<7y8q5BbhdTkw@ipC=(HEfrs&L2r~$fCM?wMuRv){B*tmkuU!&fDY$5*a+($Gne^u zls<{j-n&dnH_+E&eqjXtR!-pzn}6G4(9Eg?PU(Fb$duEx|H;}Byj7T$sS&yogph|k zeiOzvo3Sojs=KuABaK<^3^qCY{7d}F-x4-j5%Adz$9pVk<8RYqBQPB|o!g1!ljkwk zLuT_{VbT#B>o7#kC+ehGpnHGw2Z4bh{Du8gynR)d-FLf{yLG&|n?U@#le`sK=Ja_z zqvkS&gkVeHBMu!l*D~VGH_&u6`JQlM>b$giTvk&2t;YOftLyeGun3#Aobz|#;MUju zE)V)&1+EU3E4HimYQ$%Tf18wwgKt16%Ow=Ut0|8O)Gzb7Jk0iC884%(FlMZmv%+{uP^C%boc4avDsh#moIW9Of+!!u*=UZ=B2;J0nb=FILZ?3R0tf zh%B3d@1e&n);HFFMh+&=VE>#7F|j~~KcPk|>uxq%#EJ{ub^-igkb!_vOVH>cc{ix;F3Y$5f@S|6P#|cNwe+>zh^8lqvIR zFfG$@vSLCoVe1OZKoV2jmTfUR&W_DvuQx|)`xcfoz?5DX!)DF{ti7%uf&h(9U zyx-;Z$%39nav3h#H?*n+7aZ+V*;B<+)l*HxUWQY{)7gmpn@Hz8Xf2iTjz>Swt7>U6 zBBErUO>|uK+&Sl)cJZ@f$$DA2zF3fe0?ig1c9IIu0Z8H8XeR;9-ZjUSpu7RAh zfl-M2c^n}h1tZEH{>upKJV$~AFirGQ4W8w`E^iuolwKj+ZS>j-gyTEyBn zd*TvHbKk>W;Ea{BVOs|Fqu1{?(Dv&5e&(?{_w2qlX~~n;n-7SmY>9chcDqiy?sKw9 z(0FJ9G!gm+ngk`1-i)gmnk)FBa#!;`mR-K@^*(wX*F5SO_uN-!$adR&G4(e9=Etwu!q|{|>brW}nN>;1Ii!nQ5y&I^$xcnpE_>;Ws$c4s5?tLOJY{lA zc^?8Ln^2CgcMTb(r-$KUu)ZPqSEPEuL33dz&jpHqFb>IKb}~KA*g$ZpahBp=-I+Dq zTiex6b@_F}Hdrp{t{ASEu9&Y%m2`6vUt{gx*I&`8GnS7cWdSJ{m%`2UKW9}SDGtc(~J&4%u` zdx2gvZE$OcMo6sYu1SxB&W6mzaw99L#?kRWg+q?Mm_Y4l-pM_OGRSgw6#gr!^I?Hj zKc=hH&D*`P>%SYe8z*gler}_09R2=P<4K8##(Txdp$+O!2K8DwZ+X4jYB0nEh92&z zS1UwpAZSWIVmZ(gI&Xi4aMASslZB*}zg3IXjTP7y!IxL%XlVGMa(6@cCit~I=;)B= z=Vtkpa35%_AFdCfFvBP6|Mw~Rf3ZM5ppkg3KY#@g3#4c6qt4=_Zeq}PDJwNs+7N^w z^B-KPm**os%X?e*j`j(x`mWZRa{i+wT(wH^Q?^qs_3^T*+s%gVVhJRG`2eoCPG$8i zIEBA?bNWnXqs{K#OZLVLz908qvI^-X3cME5@0QWPKn@A?Hvv1L+CRdt3+#lmtELB= ztJb2%(WRcnCz~SW!dCCUZ$6S${&63#gQwSuNn`ub`O%X=Xe6}!)1SGMwo5`|6@Ws$ zF!Eo<;o=t&+r5wA7ta4nV}lek7HMI!Y24jZJ~PH zz_sg9OZ9lzDVJb&VJUt{&{ve}E%L`LmN%A?JIt39SEf~zT6zU};o7h8;1>nZ{{!41 zdT_l0ZZQ4%0bKq6{7Huph^rYL8sJ)yQSyI(0wWMNE&%Dz8)*!nVOKf5hyT70Zy&CD z9OG$m{I3t3-vKHj^o|s((chA=Of z0LQoR59Yz=OKM5K3)n(``ta{Vh(HxP{;!c`Su{bfsw~UwU1S&CFLcd-LjodM3V21micu>x!#MF9_63D5#{9RI3i@z4;B3_T@4RVbclyST0BLA1r{Pi^Ydf zq7nP$!OF?Vu&RF@ZI*abu$)icJr&e1Lc~<{Z>OUJcQVCsC4Y482@dP$SW<=+V~xm} zYrlqJ1my)t?|)zbgD!gaZiDsoZ@+ndm%!DCtae%hD}7UQx)Iys53H|Egm=-Oo=uGC zS*wr==y*J2jtx;OC7^p}$~$Fudqqh-u1A@ondX|7oYtKFtE^4u%A4d}sBP8ON75Q# zx9iSwvI00IJfWSzuUe=|B^6T2YfPt4{DcVu`4tu<^oi8*tzvenJ8DOO;m`-`_Zxt* zw{5Q|Lc|t+4sTzaezPBT$El)-f$`Jv+apQ^v%oHswepp~UWnzU>6>b@OI>4Ld~5-E#`9%HmcOws`#-Lw2G zncZYY9nwO5Qpjt+cwM-Lc&Zx?V^v5h-fe+fNAwjE(hx-XXp{tkwOPl{>CUtXv^puW z;|7spr;J8QUne-TiDFpS<-QY0u?4Y*OyO$; z`EA*YTMi7d;Z~Dmy^$43ikBSdC+rbSrd>v3Hk9M+=;Qyt{ek{!T5x>Ai}rGpHU554 z6^(auv)vPx;e}Y&N0UOMtyqI8Pp+ZG-*OmLA4$I^{!|{LlTJ?OUiP9VsRCh)X^5%A zOxZF-S|mi8b`$l>KYArx3?YBaC|Lsm-7`afZ!Kudp$t@kI+-b+zSX8~W@ETp{-i7f zlY;!Bx;WnYzH+J%4}FyV$pHhBiYeW`ChlWkzcDHI!IvGY&ad)~NrK&twwp=GHx2A; z!d9J`=9cxK!qV4L9S=3)QHrZcxALwmz zAD$VfjV#6U7)dCHCPHGUd4^glGFXl2aPjfZe?6#asa{BAgj>qQU1h#laqwPNN&x5z zEY9UOE2(8Jr5ve0F_e4O>RWCdt|HalUB7^Gd4viIo25OA+vP=RR=ibPgX*_8H!R zgFT1 z16ZW9Q~>{3<2oEza$azjl}djZ-`Wn0vr=G$zLoTH1v*DVPuq+(b3~< zO4jbX9VY|Q+xG6-Q%4f>q1834G~_eFli+)Ml}Z`bDh9NVF?0;z%gsL)0WLhYHa{9% z@7S@=iIQ;B&p&jbWf*#KB&118mpYxue{n{nBLG(oknj-!u;#9<(8iTx-W_EX;49A z1H-=Z^2uvaDHv-?ERDCoKrjR{wE$Qw0d5VOf7;%hpK~a$>W2Lp_S~FI5A@tGq_sua z&VSOphp{`*UpL0ztZZY)oASIXf&F45S(^wxIv56$5AL!)Zm+hm)g0Td6Y6g^)@c2e zD5GjdF2?`yndv=w<8nAaUM>-M2fkZU_zpx*MruC{*yhp+*Q3%wRRAc6kErh;N!JPn zz{Lk19@~qHeTO34JZ1X{>5TSRalG#qy#siUzg>$R(TPg{443&)i3Qz8+dPcX4#r5v}~e719FTzz2 zx8aIsFz&5iq*FY!i4sRfsUb*)kBEnya44}z#48`AV7xed7=ZF%Vj=X8cW*#vTBH|C zdk>hD%YtFHI%M#Z;x#dV`}YK>&#OQaXOpRGmZK$DIV2r>T=}@fNB;fAhTg zo0Ef9nkS6islf&=(9uHPBG5X738Dsk3^=cKe%=&*AhCOV_GVm`92$X@eCu*SL9>5! z$KNUB-O`!y@R6OL@E@(O&Px%}Lb06PZ<(&c7 z0&3~k@;>;H2@6BGlI%vb;CVY)Puu8!n*PJ{Bc*wL^aIC=`~J3SYjDrPYj&jF8%k6; zWPGm}%r**oi95NbS%iFYwR!?NK%L$vL)6ry7*36Q=3}sL9626p>ySrBmpZBtM+&+3 z`TV`7iZT2trrSn@sC!(#rCuaGYNgK$9}Vafl&X00R<@{MNr;DtMTHVbM1~jXQYbGD z(?2qV`Re{2F`JhT&mpw9D!@VoCFz{PT` z6iEohI@co(k*boBqCq`&p?fI68U8>4AV>Qho?d=$oMa=2nPhM)n#!ud5?ahf+gu5d z;Jj`-<$BYyTuy0DUv{vT`jF)gI2#rJG*Dl6wtSZ`_`sW5Oc~PhOkwZRR95= zv;7sV;6& z4Zk9!0^qmQH)SpT|79G4h2jL!+xagm4I%-DxzC3p$`WK2N4*FO@;*O2>35peOaJeNnIU3g z63cU?t{xZ5MU|Or2j7qdD;L@_4V`?l6eZB6V=O0)5Q~ftRaX-6HUMV-iwh`Qv&I`f~gM324@7 z_7l@7)!bFTmuprU$EA;#x3M)>JUR;>vuyI$HJ6VHDl7rj?`_2bnN3?6DT9Y;OQS)G_6Fs`)kb!TMi!!mj;OUIKk;#Xi$CU7 znMyq#?b@<>x0HFQB_cMqP?eo8cA0uAmlWP|5x9 z$T}x83gws{z|uWSRQ|p5%Xs&wAuem?WM%wC(yg-q8)DfV8HDnQqTo>{`T6k$ABxCX zdlgTR&DyaG{rhJkrMivI$`uQ>#3z(bmO;*lPB*jNR#_)0cVn)a+tU%s>6+En1Kn<| zuZ}K_zE|s4LU_WFwWeQu^s`(1d%ehZ8=JC1j;OmCf!!t;0Opced1Sqq?a31|Lu!Je}b@+QvQ-d}2aEO(U6q z6R*Abi{C_3r~7dz!WpH;4w`&4eHzS@yz35yF@;{K>2#(XHyH`9O@HuA%##A^3w*$b zi;-bKLelwQFZIrmVYy#H18Jj246bH-Q$u1Jf-A4JJ z`X>T+o*QbtE3dEXGZdmZ-J8MbCLi2C={HO&(^C2I=767A(!z(#_TuAmhh-9BsBc-E zo>naU_{R#=nwMdP13cG65MJw{SU=frtMhZChb=pW?5^vp+8#%j$;aa?TJ=d2>0<5z zDyfxS_f4EdgRlo3-_~M@F=D0}0TiqXoBBb+LceT#Tay#}2JvFV-HFz?! z+v~wV{h}wp8DpL8?)E?$H>ypiEFoUVQR>{tS69~w`-gX!HE7!3y!$=2kS9;oNbHFk zM=2%1p4Lq2`&HQDd)rB3te0{KmY0T<4;GocYBx!FoNJSGI{TG=ZYMkT?e8NN zzSNU{y*y)_AuYNdB}LCJdlbYOO|>FR7+3eh;f8dir_qVw^ttp@oob<3{s=M@V2 z2@)2^xa^sdMCHW8Ne4q#_xcKcM^pcx8HZ5>zUp7^DQQBTIH3;&Q$ia~uT}As=Po7R zNTysA_25*-GJVt8h#9H4FMroCIrEC0Mix`t*YWSA6j=(V-5s6;))OCty4Mo;#N#N1 zUaJVO^>I_>_8B)nzC$hP6V034|M=(pC;O>tx~@U$ur?-42Ax7BaE?9@Idl!^Ml~B`#`}(iL;m3A|TqjWBk+dmRx>OO^D2$B?Kpp+Qg3|W% zoJ$>#p%PXmr7;H7GfSm~*fiefq?^OKdH2m)yjak+)(4ZAP;qon;&MWx&Ls1Fl4p3I zZu>wF?@WF$Nz%@eXBEG-hmfd{Tvj?{G0F+6&D?+gOi!|ZKn}#lHkz1Y-e?{&D6xj;R)skvljXSA(YXY z7e8ObpGV`;A=z`O7C)_0PtQ=+Q6c7tiDCQv*2x_iosGeF>mloKs!?(V{60KCUN8Rc zeHP^ZRFMoV3$4O+`n3U-7rD(cKk^rc($Cn!HNEze?cJ-Rb1n_eMhfZp<-9V&02`Hx zmw7yyVoEw;6|YS^Y_@9!<+B-P9M_-{XNp@N`mP%Kt3SV!8@D|3$J0dC&VHg)MG=YQ z4Y!$?-jauaE6o0>ha)AOjE7p<6^+4D^H{>tyJvwqy{WODzCP(39%ZElED*of@M%Db zRnbAa=71zJ3Hprs5SmUAVrJTITgFuDDMZ9+l73-_NvqD&#?cdTW*c6c#j(WWFxr9O@;^3r|<)Ha~`HR?9Qh*$ME#p0ga{Og65s*uQA zOEK_nptbUQ>lh_Ul{n|(a9E~*Im(xoce5d`v@Yp%zW0(?4Ny$X{JVbfuXE(YsJW$& zR%uv?`5mHNujrg|zx9{U7@>YPYpoXfCF(981I8*xcvu{bWs8T9ZjB0+n9p&*dc2g_ zVgmINA%}Tu{X6>$@*=tDJqaM0hB+10TP^3BJh0|D36IUXSi#d7$FvwH7Q1VYP@77% ztIHEM9G%1&Tg6()=aTPLMR)6-&nrl?%C zdbfSsIE`&Iw(X>`-K4Q?yRmJzvD4VLRvI+6ZS&6l&))l-d!BngXJxI4cfNCsp9VVJ zH{93ISLjs!VibN)+4Qx}<$FyHo3de1sbevMv?%6-&lz9JLSUMaA3d9T}%5tT2CmE3A!D|3@IC z1aZV!9Se?ihi5WdA1~0jGqO#|G9ggifsNIvLOn-aX^;t0KNI7zxm1-3$>DWDytiHd z5`464Lb!Ta+Wr8$C6#QZkUL_u^59UAO*WD^e33etyF0}hQ~2~ikvpvf6Ql~!dA}DI z@!hfsP=u5IvYS_5>F@#9giJ>k@P;t5DVw#Xx`)Z`wC6u!vkdN+O2C`JoXoVmnasS* z^(INDl49X5PkIg~N+XVl%1NLSVylAFMN2G03+2 zw+ja&0`o#V8O-)->PgSp>i1ov_Qd}}?*#25JI9O)?lGx;~H{CHy zGK(oZOKo)bTq1@b!2+MM_4XJ7bkpv!EY!*3FSBdmHFAPszFv#QQZPAePN&(Th?-kt zG$5|be8zP3P-?UhrSJrOixcvxkap^r^0Jgq5JzeBA0}pl5=Jx}701DFh()@-M9F7% zZ(NyTZ_H>iP^r;QBLbXv&fhCOg-#vDKvw+_J2{DRnDk=ShOK%3VgQE<<`%00B@CS# z)o?JvB8f^V|6yUR5QrJ6w{EE&$v+#{7C?iWXZ&zaGNGlx2bh>~8Ok{#)=lH%+w^U9 z^hC)KlQk`#Ky|u z_Q6USA?_cl@S=teU2)~&5x3yn2gJ6so&=8>fOCg3ZkmLf@8JDq`sf{mfQLxWdy~YC zr%KCkno1o9yUbY$c~7jyDniZ%yeS}XAV68X(XOlC@~e#G*vxGgNSjUn@N|~F|IDw_ zXtoxZUMl(mvc7XX;;>`1nYC5cSz=WGN!)n00DmStiu03Y3VMI#v98(S z7^+OYgn^wzJSoJjE0J_a9@mP? z@AA{E#R~4?RZ92o`1g^A(OsFbd`B>Beb6tg`M6aIcP4$o}tc7Lm!)lSq* zfCrq64o~zEyI5-D>S6F$%55!Z04`OOUN50JW6#NB+MLN*H4%&w%z|JuiMZ{$UGlPh z@(ebsL9O8m4h)yA)OUN&#(Jf;FO%=`Tcb4OnQ*K7BW@O-4_vvms^#+(i&jtH*+TkQ@MERc>+j)=!E{Z;Q4>$MVMKHZ|A z$qVl0ed4tapU_%0*~X;{<*U;6lBqm>P}$=2-F9d!v)__;yYDS7xHwXrIQzuCQ7n;a zp$(uj6yX8bRfNN@-!0$u#E8?A5vur}w&c`pxT7pxIwvvdH5(&j&sV&Lkz68?Ka;o; zwvf!6I$;U5!lxI$&o*F%5l|BL1|dY=t!8bw?h|O=W%cBsaRO|QrMinBBhfo zeoTZ+D5GV6>vai;1j>fq(`%%TktdT(10Jn<=gV4_4zD3yL5*0T;JLHor<-xw9ge=S zI4yYZohlC_phfHrf{g^(v~3=hE-y5?aw48`#mJR6lA1)QPGjC9m^{*{h^KL75lHiH z_6D;KI-XBA((`OA9r6O^FUB+hqh5S?A|~mO=!o7v^2d6e{0IZ6im-&|Bo>gehwRrp zb0fP?+pCk4?#s9IqFEQe5*yF=VK4-VVJeVr(H5e0CA zh>eMVU?;{ozZV(h;dpuW4ZS|add`vVaFKXBteSi)cC z9{{ylVESv0;m^3~kQV@a;9q|s#s;(kQ~KVHYBOL^0QRWB=iq0|_6E!|^M1Hk!$vG4 zY}|bM6X|~WQkt7_v2*!F@Q~IdxI34a_<3FBUXXdy&ZEowH6XLeZsX?x_P zWfU&e=>deF zs+oUd0Xvl2L4|x3j3)U9dgxF01S_;-Kb!y*OA~Yg$%@@yK41N}K`t0c0TUbDl}Kqn zRMNXS9nwW|3?n$ENuJ2sGz$449LZx^6L;5ix^%VdUI#+&K)g;J7oNI;+)yOLQ#O%8 z9BPcVUkzoB5tUWLfbLecHu+q` z1T|GmeMP(NnE$fVgM;k|fF|@xOPuu-X-nd34$1E2&7#xEiqi)`JSiv%0xJ~0O0o!z z&ABi&SSi4f%H@I*(ajzsDnl z-=(2w9R~{?U9$%KR25o%djuPA{TClH=3%$#MbvF9VKva!fjJ-5(_7ZP?%wO-fOTc8By^|346pVaxw zDDKFpyZ46uvsY-a>TeGuXC3b@I$&ClgoH#&#o9VAGI_^69ncG{2IXSrS*!wX0pY$n{;AKJsw~JFFT);JP7yl;Sbr-fdV`6ZV(C2u3}~= zmPiS}59@6fOD5&csLjK zI+nZE8KuAIBF>DTGRg9l^g@f3V^)hZXhEY-DuH>-?0$iLP5?SBxI{?i6Ncq}zLb7t zAFHH_@MO5Jqc8qF9#n%o0D{%+gok~Ue}v4141>W{_S5p`Uz|Tq6#sWQK}ORl1+A<| zy-8?JiPEG}h~M5tB)~LWTs&L^;?7`m`|Y~oJ9>TJ#nUlJrN-lFsEGVCw#N)RJCecL zBwGxMkgMYIxFM#9Atame8~Qnsa!b28mH)ipvGWOdfd^Dp?GGTuHkr>9WUxsQcxq3s zC7PV`cw*ET4v7uzH4I>lV$~ll!1+MnT)(Yql~F%_g$erZDD`qnFb!9k(U|<|FQ?1m z1u1_f7}^l26ZhFiy#9FU6YU(XFU;}$*=2`W0m#2n0-09bT9~l$!-(A8J3%^C*kVhh zsnNhoLo@jk?J3r%9>i_NpA-m^XYNx_2Ae{Pm+?HcYNZA^|C1YvU_HIy@b7EccRNH4 z1(lztEaamgB-sJYSHXn+OMi!l7n)_rqF; zU+5k|MlFx>{)}0!*%yx|rsF>|Xe^A;4^Vb9t;cU%hT~kWS5!}kXTkm*p&L(&Z3|?B zyp8!rv5jo6fj3x4I(MyS@t*yQF3U<}atZKiN9xGiCD0KVH2(Q*3Q~u^%UBc0r5IkL z@HoBmYCwE~?cy3(?_BSKsy9d^G|A-wmAp+GKppOXr;;QcBYxICYKdSZ!lbBg-678Zy_y9wEQH4jmc9!p4l(+J2H4z!q7t6dFGKk z_fK?^Ys~S!gv&06w1c}?H@IR7%MJG*Xy&YgE=i#`XCjx~uOG^860O%7uX#&<9OEIt zOmY$p%ETMLC>E<;_SPcYS7r5Y_^v`edLbePkNNl4*t$(5EDj!vbcsu2F_K8_8tz*D z(Aa!&e}2F{)8>h3Z*j`gZh*oxi=Ec(@ow?^)nJwAAJ^=fDXZh_mB%0ldxaLk#FFbkA#F<6a>+X< zxeEV(VT`^70B}35yt>qA|X1ikM6Fh~ev>Fs%c|K!ID&J7FxUl=|n=jj$x`J8lc#P z06XTW64wp&-Wt*Z%YRH`HY9Pa;2I_&B!fTPj{-`2{Tn~fptEP1{O8iSW?hv_(nsnl z-S@ZcOrCaTeyS(1*VZfnw@-F!?Q?-w4*)V0mCg2vpXWBEprFVijP}WT66B*;E^X5+ z_}V!xDDM1a6}4aCdn@}9spAfnJ%*1w?p4oEhGPqwc+sxnr;W}&525#;9n8S=EPO#1 zc%zWF(BD75Y!6@dad~dRh$G^{p$4hS8rTFH$dks8FtBRa4ZfEC*EZxk7tgA#>HAcV zgG}L!U7%nU3bg9^l0Ak|cNCnqxUkz!thlKgTrabyn4$br%(wA%pf69)7_TSGF$ zn zHf(8hSyV<9UyEkhfu>DzT4UH9-tzGBcWZ))3yLRlq7srLhMTDaUZ_#XC1P_$-nNo= ztVf?--hAIgP@TwW=LUxz5=|D72nA9KaLlM;-t1_&p5qFB3XEOe;(hURzgZw)KWd6= zj7Y79^+}F`KqK>yN~PPP#IQ2sGBJ%35SH?Y-{= zhP$Ar6AJ2T?@T7bsnH~TG_%ZDzNEI!WPxN^Diq802Vtr~ArOUnHk3s;sev2xg@`2$ z#~1qT#C2X&?=GN_1f|+YW>I~_$NSTBHc@U5bl?Z9BAU!a>AH9~z>EM`Btm{umRhq3 zvQ9ygaOf}Lzq`!LZjKjWc|Ou%$l~uA`KtW%DTBx7fC74S(R15V6l!HrYBF1_@hsx7 z;>oMFpn3A({a_DNiZJL9YIqgHfd~U@l}L))p6hz2NnZ*afXellh|c9hD`nkJ-ZEsF z`7_NvGM?N2B>rHm}jeyv`IFeAjGQNzf(9Xpr>0sKi#qVem@?d6@tL?sUS*vdD?*^rA_LIWg z&ldo4c`#STYbuvP-}iWOCBvfhu5&s2O3lN03_s|!w>98|W%9-@Gq2wp7Jjf?!`Rw* z5@N6=5K_m+%XUgNGm(9P()(FGOCv0oiLh+0GNPq?|NE2Hyj@@w9-vq&MdP)#Hl9o- z7#PI_H#0ptOo?G0V9zAeG)$9@N@FgtynXr;r+{2*O=PAEda#2Ei6IJT-V*K!y(ii^ znUMG4A<6XNFwdX}?*aMn{*=z9(Qhk^7j(`Ok0eS&U)rQygCwhZCdX=q}v;FM|nX zbOv2@U!_dUnm;D&mTFYr%TpPL4+^h~^9^oMn_c>Nah!FKGs$?RLZrnQDbhmM8bC0y4>(&_7oB!Y@~lmKrXYnF4SG`D_E5li+;YKglUGIrj{$K7i5p; zH6v`<-`8SPjvUoakZXD0AjG84WDHl;y8Q6nqE)-a7pDRDD{F$T*@}F=$i(zQjage2 zNvzvBPKxRtIufICP%0X;i0I9!zz2%~aJXtJdsnWpUESQV{x8XH=W8TQKDzviTMmAf~_>{l(Botzslp-Sc}y zjeo5xCgsAkLvHt?Ca|{oMALEDXy98Rv+pQ2lYqsRF#t^`Msd2h3t)q?m}!x&Bna$+ z@3wp9eR&f4rQ$B-RsfdazyXf@0srWfRD*9hp0)-wt3&yDQ;6KV7vs z=t$I`E-BAt3&OOpgPXCiDug)WD79KLw__RuIoq~tohUM2=|%c@q8*M-q{h>D<+8fo zf0xS$%BJ(fwhx*MCd<({WZR%|?gLUm)P9tW&Sg49**h<%WmhtELX+Hv)VNHeprHm?h1s9 zb#;*Wzp;7xQIkVjxXqpBDV@(0EBwBXTJhRI=88=0&r#aM%3(6-)xT=qOyhA97;mE0 zpF=AK<=Ol5-FPN4oN7s<&S@10LK?!dP$*R4Z*7j45@KGXp|dE#r5)V z)2P}7-RV3BjJ@#-`1XHa)6th!#YKRfT=lx|Sj^-1p_g~|{WlhXc@}R(DNLeTGJ-TT zs}K-Yq1(d8(_llf?Y$kBwCmyVbi&Q28cOCJV0 zBxX+K$)@LOJ1@kheS#605m1nqLIu*&>uY8<7COD;d^y&yJle4c=X!bZy+Y)Cbik0njj48wdnGJ49w-3o^I&oH-j;1{gWAjmExdZGn zy|Jg+fRhR)<<6E*5kE1>!Ji_ABqbSNS_dwPtO2;-_9t57c^# z-Lnt~Vc=Y&UKPu1#|1*R(j61RQoCwAEX>Et1e8;1V(+o>C|E$QEr5aSQ!nL1?=v z@MI;&Jac06m^uz88_Z$;V1VjJpiikxL@MYL0q!YYlSHLBJG1W{mo|?d4lq3{4_2e_ zSAtE4>tPu@Enu>h7XAxbJ`noLV1Xivqt#gg)gn~GP*^dQVYWW&28gnNy0XoujHDBZ z1U-jI%}N{@6{TBxHIYfh>DcV%zZ9kOupe`NX;_L@>XKF(Ph&%UZEffsA7I!Dp=>C{ z1-O3yk|`ti$ZP7jMG7bl+n*;KTn@7sff5R6oJBtiTN@q>{6?%p%47j;!$vX81N~(J z@skV5@8Ybfe4Hx4<1K)zHVw#y?^nll=%8bA`Jndw^P}dgszgPcMi+1rZZeqdGRUNs z*{qNlT;^Qdy|=}h6Mh&ym?;-qAZXeG=oYi%i@o#t0vz=~^JN_$?L5QK4Yx`Zi^}YP zs-VPN*2cKXAjY^>7&J8y7kR_}NX+_>Xas9F^EA_CGGwZm;c|rzM3Esypi@!!HZzJw zhp5GA>epGWeoD7JvR6<>?Z_w^F;D0L)M9}QSw7Tg-kT45FEg_Ke2_|A>ZCg^!?9JT zUc=;PStfn8JPn5t(_5calRy1Y1msB8Yn`yRTbr{u!atDQaH+4IkAJHqoE6mt?8|C~ zWvCr*^z=PD7O}uCO_Prr{^@m*+_~+D=d*B(W8=)W8GoXJ&4AHH5j5srj$yr$)nYP# z(BMMAF+o4uH_sqc^hGt2LZwWS>E}Lny!rJ;Y%q|X>td;z!V1%(?e%zcn?_Ek5rip7 zriq0bSDDE!G9~iJRN#kHvHA;=QXv<7r1TI?eDvX`Jpb-^Pc}szBxjXwSP_ENm}X49 zQ7&n@Hig1_HJR&)Mjm53XBjuFBZng&9yjSP{Sg}Cc$t8Sk_|W{vnW^V8QB$Dmp-gS zAId|&>pS{9vB)Dh+P894MqPY&cW*Y_>?U_e@#tJbbe}o8@YO)x`Fy)r#02~uc-=GB zQGQu&4jhCuchD!p#aYZ(K0DRT2AtJ`4k_!p&)yX~%Dn@!XUCze?Yi!|TNF)+bq=8j z2f_8FDm7t5J6-XCip166&Cx&E#5_WpS+>BIGPTGU|cH|75I;D!5bCnw7-}I4FMX{jhsF#vme9( z59_DLO}i)jsGioN(=B6)g{~&E7Ru0wj|9JqwD6iM)`rQP&*n7~f&jqkqZu(zqaDTe zAzSK>hHp}(GA28&>QE=av%}_!n3Q<@LS7|e$I@8s&7wbl6yb|y|l1HYXy+= zcou2hxK=^ZCXe9;aF~3Hf;(p!8?~gmg{g9Tci@c6-VM_r-LNRHQ%mWD$;i7 zduU7tFR@q|I%eG+?lc-Ver>)^tZNt!FNJ(A7ea28QlrIOzoz+2142Y9Jp&WpLymTml20-+HaPaAiE;#bS94;vK>ZKVmI#XwD9?AU&>q@byP>dg?F&db6=LF z@81pR1W_1$N%R_a^WDGg(+=cP@i+TPfX1T;D061F#%Bpa83P@MPd@@9CE%9X4oxyz zX1=^=D5QS=t5XaIWNV~;n+ba~0Xq$YmB(K%j&_~wEP!%yuwo>?W57<(St_ON`N73{ z!R3>fbQ0@ccrFucj-ZzzJitN$`9)l$l2iWo1C(Dj7&`m^r zDeSyT6^gL@Bg-}Q#GR612!xwtVO8D-(^YLv&QPe7(!GHWjk)md+}q-rLfTT4o*FQq%XKr5&>$nSn5G?+y9Z8Xf}TcTzQ%OzHf-DH=|=s8kM z0)_l1kh#y!?N%8g{}Q&|O@FEeKgRYdAaXDn@rc+@vQT#oH_iqa9ExozeJ%33l(z&i znS9IOqXhfOgE-7Fb$a31k}2bFFYd6r@jE3qRFsmW_P0+6WC|%ep&Z(LEl8;f0BHzl zM8Zg5z-T(X-u(f=uz4>(f@p?XMMElk-NAOH zJ=$861-H)Cav6UmGx_Ri3PToiTW=hDD*CYV_U8N#Y%HN*PyHfXl%^z)BgfumK4r8+ ztJ&DyDwED#$V$&jSQJ?X_?6e34uk82b4d@MVFJP?ROriJqz)d zn>4q0&5ks1?kej8Z#h{cIyUlc`4T*BJri9!&37;@Yu^gb(i>Wa?O5{FE4xll8nRL#FLi4J3&u z%%nLrwK8t!47OwfAGJKbK#W(@ISWi@N$A}Ps_BPRNkP9mk-}`$Af3;bS)V^Bb%zvrTOUqtCbH?OQd2#F#Hn#b4TNb$>U0 zJ}SngGKAeE%Pu=!MPk&c|D{dgRAi=6&SyM>Ps}mB$O5;*ro!Rqfco-z_KLj>ZHa&B z=0i2-=o1Fs#mR_@MWFm(MaVhIS@Cj%AJ)ZyGPkv(q2j$T4Eg8}#DT#%sPpyCd?4fH z!+Mu6IyS)Zb~B=c(ZPflD-b@H-2aJ`+^9{k`U*d`;G=pc)*GT08BhLvzY9+%n4TQH zGOq%R=wxx)DxtkQUxNbll;gSVu#7o=@4dgT7u@M(1U&6?3CUx5IjZ}%JZ6yDzro`a z_3=w;x^9t$FftAvUR@2a7D&IoQUDXFB!a*DksxzBqVxGY?G#hYw^oQZjxTl`+D}gG z1jNlq$=QWdd;}?z86->Jq*wBC1d3Kj)!qzZ1rZz@@X*G=Hl42}Fn*1|B*zU@MkSYp zPmzH|qo{~_M%xFVxwIyCju0g5V3feYfU&m6EyAT8M{B}ZeBS;*rL<1{Lq5@KlFcYs z)0`xZ(r$hk7nY=vCfoI$k7NdgLjgc}1!u`pcHez=z}j4VGxHf`u}+1^^KxSo++#L7 zNte|sHk*$mKaATUihz~lEDTzlWjYnun%=Fd7rWVj;)Pu5*HD-(Huxh+r3|I*_ukZR z@j~Fw+FL$W&_|w@qvTiwaAcpX*#?u9xl^8EtXuPQ&h2v79DE)fW=t}K zNg$PI9KRCd_1*-HL8sd55AiG%)yH$U44KaY@5zJ%EnNs`G~Ev0i9mD)sTeFyM4<2f z+~_@QktUm`NZffOpwtjmCjpEI4M zpl^_V4OSc^?r1#&Y3F-s#NANo|BK7*Ew9PaLA+9-jky@beqz?YSKZbhFiM+3vLmv3*UF2m!QB`<%~ z`Ow{yzl9rtm|HbMmwsZ*amG1j73p|5>kIF2^m+Rb2%yCH*pyeI*ht2gv)WrFM|<&f zi)Sy+;X+0BM$i94|C=Ow3_H@9&wEZZ!(cE)03DyhH5XMIgafr9kPU15+CVX>SoA%WZ&i!+a`^*&~4xi6M|01xfob`ef1mVbX0 zmK)b#j+><^ML*Megx)hzPl)s3xk>I{T}U^&P)9ym!AcG%`=l8n@SN@}@mY42N6V7~h4TCZ7wBy%cMX^W|ZyASfJl7}z8XdPCyxyd9gza{$4fd_<_) zRGx%{Gf~x4iEiE6qa&rR6?$iV1mmwfgoq-X!SGZ@-2kt`oK`1PC-n);{W}7#?KB?r z9Iu3~8(Kg!XsD_K{^_G2aW_sTV0URsBTXl!bF*FjK3}bCxQp615iB;4D+-8>0I!T* z$ns+1!FmTfhn+_FT=w%DS?Dg=UbRY19I>Ui{woMgGK&lccb+O3_^8KN9EFc~63tgN zWcpS6yIJPT_r_E3e`5i&Q{dh>Gv9Sj6}v3(>=f2JOxWO7+MM7@XK->r>Exw9M7r0J zhcl@0nz0H>dwAW>zcU_fYIR7p$J1E@kWXnu^-GqLCeT(D$R#3bsh3lDHrY5 ztyc-V62+fFEOv7*IQp92r6|XV_JbT&OIu z5q@l+#MH6nA-Rl?sMY|VU*q{oVK1ocV~TGR$42(kl;*tMlNQ{LaqN(o{1IPw9=c_H{#tt-w^ViW=g)0a-x_&bPq{C?{E@Ub zo6ymYT0~N!@*_m20a7=yI^#W0@c1Y#tW`#TITEM-67+l0K6{kp!@t_^1x@hwXy#=R z0J1;*@oejdQx@9;4Jr4k^(Mso^OYgM(UvYY8UabW!-0%7snefh;}u7%$51elre5SQ zM;KmD8lYOPn+KJn<$jofNrSFIsCPrgn$B$pLG5+@M8DGP2@Y&o;)zFcO10jw1r4l8 z21T~;SCDdKPLKRHQEZn$#Hyf_x=M@Redp|kZ<2QBYOM;K)Sv?tjhU9Cf?AAG_LFmh z=;a)O7N9zIvt1LupW_$mok&t`V>q(5d|ym7LzSS#qm{+w*4((%K5OFv%t3K5vPL3z zt%8nNUGKJ=qXXp{d|u2@_t;UspjTv`8@I9$Rx`QN83jW|r#snc4n6(;JEyD-!dh=7 z)ceTfXf`MB-Y+w3Ej!u!>8HDH(*UNnu`-Dxfrw40nwfr;x&d5{9;*N{#+s27T3YW` zacPkHxxH&WSyqT2_t^t|wn`CM&5}bj+A~J^>x{3?*@RW9 zkM=ZUG!>8)iUhM@=GF~NWGo<;W7STbSmG4_S?i(H=m~4Tc-vkYsh`Ra$Vq_n5Pbdi z%4>}2W78W(4TDBO$J2Bg0@Z*UPSg@Y^sQ*oO07H)Q|?>7TCu5Q<|-7V^sac)uoBa+ zNyXBl@Eity%&gUC*U{H66Nxu`+O;~h$|z|OV<|2hk^VUy?jdjdP}p?|Kgg%J4YPi( z04j{{bxgFgbO_58>Lu!3BxPUA!k7LW;atb5vNg!P4~*@{QyCbJXK`V%?kl9)jH+>! zlVycdvRY*ff_n#Y+GLDP@Nz0P7$VG8|50S1Yy!n$m)4REHR@1l+g@_rI|Ahvdgi~4vD@(R~Nm08)8#=X}*kW(?HyzOM>igkQiEH z@kpD2$wEeoVcC(6&fA3}HVPRBo5$V21V@vQ8;DZ^k7sdWVY5_2TIIP(+83H>Pu1yk z7Sl%AHWm+M`-SHE&{t@5WCky_GOU1{Wys z35JB8pJfS4xSwmTM7fRH)7cgr)uR;`wotYuqFWWzjH6y?y->fZ#pmMgbLbI?Mg4(6 zicv9-g=9d^UutIGaAak~(Ao=nXilfU`HvcdLk@=l1RxA2g{9k*OCu(%nBEoLQq{Z= z=3BrP_|>FM(&@aW>U_#Cc_yRTXz2s*as! z@uQIC{2I7E-i!$?@!8l@zU!@Y8ovoVn_Qwuho{`~n>}J`<+o74t;p;&ZSH(9r?5r9 z^9_~FYFW)tw^>Nn^p+x{NNVeAb+PYwT-~o(BQLDU^e6{3v+rKX6!jx?_w2jya-|n= z5;OIB<}YdERy5X8#^&UM;=0G}>fWssKy*Z)#d|a|QcjE%-;5b!82%zlzETJ7?{~Hd z#n6g$)*r?-BELp9h%fb84e32yY$*TSbPlCg0)8pU#2z6!pyv6SH}y;Gi@mtPh<0Jz zB!i&jxXaNpEM;s~KY+7su#LjPqLK?$f97X)9p<_?T^oPco4_-!8-+@cgN)ddD9lJk zW>uGs%Ha5v4;3S_SD^VW@(#*!i+(u91UHj zhq&o%CY#2{Zd+Nj4T0KQ0={!xWKubzMWKiUxHChOAC`4W70H_`mcq`!+s62{3j@02 zBp3)d8x-DCT(r7(H^7JB`!72C$lq{KO+?>8@>lrwRv-4*v2rhd=r*+e0oW3y#?<{* z5&sA-r!6`_=^O*-DU^2ix75~iX+0ENR!<|Mq*x3TV}&?tcrx=VTMfX{wc>= zX!4?!PAav|Dd_$5(NFA$V++kfhaVO&X7+O`_qxD4auJK!6_`Cop{{w3_M>sD0zfPb z8IcIX$Dne)KZV>!ZI2~58c#@7b8gBIdAc5vrYc;Uh?pVi^!DP&>{bZUh=(Byr)2WW zAVPJ(n%NayF7@`Z2~Qd=5w7a#A=lHL0|NWnRnJa9V-w);r9T#Wa3GujOZ&>_TNHoA zd?6ZV_bb0e=ApMkobAj_8hzc*oPkft-H)VSyZN~Ro+|oP3n`N5fn%IzjD6xumx4R% zgf`$~LF1hL$y_qY)RN^rGkSV8%r6-UuoGiUZd(x!4ue75%CV;{u1|=A2)SW^y(_Q5 zh}Z5&?ugfL=&56E>&5Xcy~cQorP+In0ucH$+`C5;F@DVNX(d|lMQ_u-8|zRij?JYX zaiU0}Rr`n)t2flx&}PDa>v`GQLJY!kq2l9wZo~)Xem?(lOQcl~ztqty>Y6i2CW|gD zCOaTS2K>*!Oz00;(Ia!{)M;2bc0@3a-BZa00& zn5R&^A>#M4*ZUE*MYZ;!A&Z{5@NdP?1bP{994t zts(V6{FEvw3P*~QiED{&eh&`;b;cY>hIvYh(Yv&>z&7t5He@ti+P^q!PE)cKXP!}} zX&2dyhOQ*a8QQlx0|enGJYNaXXhMuS_3{2jBds2jVHr-wkM~C7mbn=FL@oY|y`MQS ztW=XWcXYjkT#sf@s<4nYx`SUg2os)A50x>bw;rO6G}_D%5OoBo!rAxi=GK%@EpJpF z67uIi}a{>NW?iJ+s?K=pE`LXr(-H)8Cm)vB2`@ zvfa~&TQR{m>pV(llj}Q4uKP9h2hv*afaxc><`DVv@m*E&X=aUe2w6%xWvQBSILc{S zL}#%n%ds)wQh8hPHb;xm=8wuViC`&52aU5&G`Vk+%-X3d6iscmu*z}X&ReQR(riT5 zYY1+2y*_Z9@HEVj$g95L5XzcmN?FekE{{Fc%I}t?KrYJ>=uDf*QZ5tHy2|oPzxcv4 zZrARN8%8}Ro86TyE;tzS-nE=2lCjTh{8<|EyF&ngb$ls<~+c+(TJWU*zJBZ{J;FIV-GDK7V6L>;|47BjvM;x}%fY9VU zZZOP#vo}zb6u>_W`=b!f&~TXz$pqDW4%;ovSMo@=N?$Kuf-;rwc)_%b)_%Df(R~wG zKTiiImgt8;**)n#n96%zdg=`HnyNQ&(mQ^SA%Rr^;hVhPyky|2gZ_;Lbn;tfK$RQM zZH&cwXAV_+_S5=&58DfzPkwn{P}9j;S`ZR8Ip(ONcbY6b$(T)x z>)>UVd)Bs`I$f~9Xd3%?EHU~^slOUIy>9z`+ zPw-IBt&*CH9ouM0E%P9}W@6rKnb}_42kZf0+3j=Ne*4%%RjjDSZaI`!XDC36O?L2R3-I`q!XnXh&ZD;?-Sc2@X2dEZis>b zaeI_!jLWNwHdD1C1T&!{g;D*%)Oeyd#$S(c0&IfnjCX89HM|#fjc@%BqG}~wiIqNf-`sKrvN4035@nW>`051UBDU-CI5!yJEO zGaR7|<7|YOD&1>D$m0YR{JYk804GosA&-EN-}vjTcqv#sldv~k20!d%&TF#>8CADR zr?Z27i<5qZRg87IQknZ^w+6c!B^pGYMC>QqF67VN5Qz)l?;UeB?mFBZwjX2+R-Cvh zNdq96N3md|)3%}j$ZON>tvQRY#pP;!1b4&Zgk|}&-lq3Q-58o~KNRA_QO)5^(x7*h zKHt%I5pT{q#{LMjF@?t@#|v@5v)J?eQZ@(q0StvyJV9m$yxTdBO;a41zn@G}U001~ z_;a^roQZT2oa3!J2I2~gJsebMmdBeXdCN`?tMj{OPnl38kpKCy7!U87!~NWM*sxWP zK5TO6x0ja=;HuV0nBl))i@&5WD-#`5g z!TBFvu>bhK0PouiknIZpufYyTYIwju|Ch(?e||yl)SuryzESp-v2zP?%dBm9RGC|K;+#<>9V=~Bu0G<{t&qB&1kxlFAmzG4ftCkx;}}G5lzvpI?QCRX$e-pI}Un zEiSf%a}lBp@op`6FZ?o)-+GWAHrphc9PjN?mvmzEv)Nn+kUcs4)!-BKE6rgHc9~=t z>c1B5|J(rN2V}`(m|}>N_ziouiGI;20UqY>zx=V#yb+i z>aG}Mrgp-jgqX#bW1&by6hTNtYibW&4k;yJ>$KCEFah;W#q!xXk?xNu1t#d5nsi~a zG3c9#&&8CzT$l5I6cLxaehy&~sjTvsY0T#TYPu3Ok& zhYjLW)bSG|u2>`U|H)za&qalygDLS$BSt1QS=EZ);!f+Qs{X&%iIBZG@d9#wK*2bH z^h>xAy|CyUCMA7nTxGz8_cHKnsd}B>QWU*`!DQ%%tP)Y!Qwk5!xquw~4&{;;!k|@X zn2sXsLcn31qQGx5hhHKDk-)R$0_k8Xe+cR)tI)&c-RW}L7WVD-P%N2YFcx>PlhAB_ z9G3uUYlqmHRj;2^STgcIK`w7NU}gIMuSNW?B_;^gX=U*1v$&X3B_xifE^@qP@I|4F z6-12%r|Ec_2C%BvR{;~kGf8I`vewTwZAs?OgOflMlx0M>_-Et-3mS))>GH3P3F&02 zSP@Ku>A`D!o*@W$>e~40K>&Hxz}1NgapLuPGiY6X)+m;WH}~Qk84QI;82?oC1;as= z=>Kb=q9{jUobwN$ft6u$G0b8J^|c8jcP9Ek!;W$#63i5w^J{iF!+a zEe3QGAd=Iyj%Ts>6rFyDbbb=?sNwXoYS9-5lxl(h-0=T;-2J|PA1(Xm*#txN=W@;N z;SUwV@d_&W`_%otpb$n7!0t=~Legu$QH)E<>80cxWFbqA;5TR z)_u%?!*df$%Qfy5H&EK+MVZ%LFeJdK{yRYlP?JG^EG+-$$o}(S0Rt}p>5K)z&oHGD z!GEqZaytr)asR=&|7yNO?4?LT+8*X6UBC-nAJ=9zmE`yW8KsyudI(N+Br$s;%oR(_ z?{KZu<*`br0a!x7IM*1>T>9f*G8uCjWEv zLiVCU4O0J{*NfNRD|&>74H^FUEiRD~^78-C^_EdpwQbn$BO%@09nvKo0@59l0@B^m zB_W7(OG|?yozmUi-MQ#)zMD7pe#f`R{^<`Ku-062&HK8}^EfQ|f2*54o~%Kef4KrY znRaH;=w)1fIT(K{uv-H6FfykW)#R}OjLsd#=G8vNUU9>A5e=#{-MM0`~P~s{ZakV#lxSFJw#JHbERV_zmvb`tK`Zjmq>kE z@W%hSE&luS`R|uQS>oN)3$YO1?1fGDBVBolxP|KEaDivB#S4iw!&wTpW#>7S4@`0b z7kEn7AW5u#m0H@k1JoKEt4gScVs~e zFDK7`oS?Fg`prhgUcSj8-bM3wR8Sci8CvD;+!b@(l3B!W3^Q3?eV<)0n)tg_flX?Q z735E2snfuOdbn~at`l~L<2dUQHE7?L{MLg@66I)%xi6NaN&Z?2E%&Gc&TTAL!_q{C z0IQ35oZL1R-r`sbt5m-!dat-Pqvyf>t`lRDDRV_v@7%n+op|yX0E(jw2EvWL=Y%&zU+=QR!%;0A zPtR`2xFFw!GX;Ibejq~pR!4<%SRN+_(&$JyzL#gqE}hNRyEU*c@njg&MNwKeBR8u& zGN-?EwrP#WSNb6{@`nx8-R&)GB@cg`9_D@XO3_C#dUdOys^~*~qifQ=F_lA5W!b^viTSCXdhTICzrIla$QAq5w@JSCF;*?B*P=>L*z^CZnWZfr~xL|G{R zfuWR$q#S5*f2IN_SJ>K0y0Io(DogX^0pH~AzwAc6yO!M!cj4OjU$+lEdp$|P#4V?Q zsdckle6@mmwPn;~t=ZuYCQzN!lR10U3Jr}!(WF+Z@Oi|DGh8l`^Faf|^K9Iih{^hc$_?Hhq17XsM55gR;_hC6*Ey(yfGI#bptErX<&L3F~N z-Vf&Jg#j<1hfECkE$3KSP`=ZLk+xt-;Uu_Z!1E8(qeMQKBBe&r+JI=X_PY<46svOJ zDUu5H?vk0*scC*b6hH=8MX6mjLv$)L8{V3u?*PuSI@WOy8f=v7G}6>GR#h(zwgAvz z`}?sEG#ji9i{1G*-w!Mo`L)NWoiwDVNC-O;SBUQF5l z&H`xs2O2xNhvT<6SzP`i<8zu)tkcOeg--AWyfba0uOi^oATAnkm@Qkw|4Oft-Rr5! zBUR!}uTsMSs*RKr^67FrBiZ7WI!>+(r%fJ@_iX7 zN0b@!d`l$V1R@0M<@Z#3BNj;rF_e;GlNveMcW%y+eo6HtDDoI`)do8LZ6Ym+9fGm}-2cfVWNaPp{aoT8S3J-v ztC8JS0PxK>sZ;ek|L~&i2O^rvk%!mO8F|-6{u!;6l5*HK0ZhzjdPDCw?AcP{K__c@ znU0eQl0jIkcErkBiuW3k4t{6Ycr1D#=pgdKQ}8>L{brwzaS2-Qrrr-X2>2-hMw+Ewyovd|af#Ge%(Btcf_aKE5ZflZi!DSfW-pw2AGFQLzg*tJ; z?T~Sy^G>hXQ;&8;y9}AyFtuu*npD*-&M~bo<&7o=A7d@xaSF%w{1Lcn4=Sou$(45* zG-woCJok}|@)b8}w(*erwAb%4PMV%zt=C$Y1}kg4HvSXgQaAy)DAXwC=l-)~ql|qtasbAgw3pWK#5~2H zG3HxUW^$Y)8ErY%2Xb0d=jN;?{1B-JJ~da1W5e!y#+F2?h&Q0L@&!H!@!djyl^A&|dY$hf z=IaZakYCkAVX1>hTD)Gd)Em4F6)Kmi@T}59$zdO(OO+A0iu5FrmxEckn#7bc1rr_X z&+(>mPHxZX9F6Y?{&Xr>WM$_?oVJ{~d%H)Wi5(%un)W?d#(K*q^Dm*zT^%le$24UK zq&`3krEE7HQvJ~%6BwMThM9osfkg3rCcLK9pt<*N{y;Q=PbXfXqBkOz$Roga$xO%H z$CCo=3*42BlrRz}*S(ANn~Yc;_I;1z#RpRW(=~VaNYb2OXh{u8*R`>4IkD z=>j!{(6L>dxVG=Ut6PqiS~K$HBu)G8TbI*Q*`3cUJ|D27?{S)S&8|HU^8b?n4{Khe;!ioZh-Ie#U86KocXiH59NZht5jjdb|=^6i^Hq zFM$JCx^F7aC_QUktAbCbdkO`duo#mtx1AQL6jN>FI>j zXR8UEQwam^+Jzcxq|R^*Eaw82pa>V)3&K%_D_u3AbJy?;HmDayFQ}W3)|F7ryV{0! z=x@B!B|BL1b3kM6>WCrMJ{)@&J~z;%)x$AfF!W`kAQD0BiY<_Q7`Z?xPCwWs#1=gK zP$ib=1O>YgB2}CtFibKw0Rl6Txpi^^%I9d) z0}Vd^nMjfK(MQZno0&W`)p@P9HsJ zCNj10+NIm|GW6nK=NU$r#fIm9+>Vx$)^HF1Zrlwm%+`6nI&d}-9Y7U=c>^rzPCtso zA|&C7xRGC9A4jGKRGFm#8A0SfNt&9)Dj0jPp@lukq*(B(jvf6d9N*;zmJ}OgD5EeI zxb37GVNT2ac7&sgt!HwW!9FMCIry3G=(DL*48R3?jl-{`D`Mvzws{$%WviXfQY=ye-d)fcG>Yh=+tVN&Chj zkzI3fJEQs5L_tV+sGdvB-nWPu;*k*bhkcwUO48i8A7+nMJSATZpHaR-NHsxq@UgZ( z%N78tzPt>#Uxk)b9Jav!(6*kb7GY5T6Bs-+PbI#6Q0IJ3h)OmaD1~-x_;5kfYnBN| z@)s?-zAmcZ!kzCZid5DB)P}ZBhyOsNGr3|_2&b5604vp`Ce&YkdQ6$V0Y#D6rE7KU zg*CfNR$&JoeufnvDcTqV?xRu=FOxeTC2}OgL`&nreTxXv$A8a7rNNNOn^x54=|pS0 zB>R=dm8Sm#`xu*hrjQR5W=|+gABRJyN!8xsOf>n&JL^g3_vyjRF~Z`wi+_D3I-w4` z>UWn%2&TDfq#3{ETL+X;$PB$7&QGV1-v8)p?)+QxQD`Xe{C(C%#p9+?TD2ndB;{zdAKrw?&mm$F-fl4zRyP5W&I za4ihe1z}~e--M2VmQtLa*=)W%^JPgGJ7oKiXdUj^HrzRb`Vjd*NIm~{6BMCww9oRB zRKe*#I`?3{IYb=|iywKKY5B`lHcLnpDU5l!Q|6fH@Z*+^?T<$nn}Z0>OZ@kRmoMjmr$5mrGFRD-;Y+|02>o=^bYKi*U2m&}tQkYq8uPku)|D>?yI6mL`4!>f z#5dvm?wF-er<`ejrX(2PG>k6)DmY0pFYxM5l>eym$>R%k2AJaD#G>;8E~UDK_XhyTukg(}f zR#N#eOBI0&&g1(gz3MMEz>I@&XW8+^qJ^NW5LET145)~{`u-bF~?h&HwJW(+%8&u;4Stj|W8=4w_l(tI1uB06nOD(ZgZnSY!FVrX9`kl~m6(L}9D?BIQvm6oHQ9u1L0scZI3dg6Wjxsh1#;^xtjfG(4Z+%hw~7nf;-dbCbDej4)8h{>rb+ zw1Fxfh7Ahb**O!zk(=rC>p2G6(UPR>!e^s0?!`ql3UERuACqcfaonJb)<2?=Ra&R+ zE8jRVed7YN*TcKgeUv8ZmmPTWlksmT+Bl22p%|Tzd6vhCXl?RIFRTVXwSIKm0s$9kTP$QW_ z?E3)hV|pPKK`L%E{0*E516jXP?%l0OLlX?S%N{*QY&vn>0lPv7Q$X*}%=L*HD0#Ub z^ayC(&bNCTbT$ihuczOr4Y>2?*Wd#h^9h1~RgEQJDNt1xX)q|yYN_7M^j10rRWQAFOiPsjpZX)rSbcjfm zZk7Fab?w+38OU9DtkcDfVHvRxpC6%$Qu1n`@vQMA3f*;sVeP;xncWCtk&jl+w{D%^ zVn!Ps8Ev#Hv3jmEL;?Dh-TCP=)h#_v6Y+4>paBEV_?EosB6lle--4(;oQx5zL+IZp zsKP0TWQ%BW{0Q)*OpZ=I+N5Tk##=U(nyZvb_>pvY-;q^?G>%Ro0D#D3Ew^T)Rhkojo1@&? z>G2{S^=LQCpOT$PMq(or4A)WUK56m1ViBUAiEr)W~j@9QV)G^*_)_urlp)c>tw&wxy_`(Cb>z0Yk%8U-!DLA=B23#fcB0m}ji!xzMRC>5P7Q6g~@ z33X`lFwOoR7W_j{u|fH(kqrDA=(9f3P!-G{gK`vCf~v9_^sq@&L|unG!bmR%iEm7}wM-6@HCncj1(+vAe zW92tr-zTxAiFY&c5pW@1&xI4yFrYRN9=X&t3IU2R{YK1u3$&vZ z*kL=9WZ}IVZ2MGNp`+zyf4J*oX`JC!6Ayd+rg*~@{&J(P#6x5K%>6d)j4|WR0LYwI z?4|-8d~gOyO27bxRwT{f@#5deS3>gvoRT9;zF~dN8Y)xS(Yd}4C(Z1K+Y!GdTk3np zix;G0a#owkTU?gtj-p&j`59n*zrS1LJ!NxQl#f={t+(U*rCxZY=0l77MvK^5Ph;b9 zfrmb=h5kkBGKXZAZ9&n2_6S-r8_R)S>IvCcAspny3h*MtQT(6np<@Hhy8^MBu|waJ zP1Gsvg5LAb6=`A2AhHb~%r%xQ7`c?ze3e2dW%?0iX z4-%9CvbnC8v@Z|v&c_?#Ex86Clp(;J>)V;4K zs&Qw+nv8ExCUmo0BEWNuKhS4Dok`6(!=%kTqR`xa8*VA=ku4F082j6k1l52q{3`^H z+}RZx7j0fA{j)=Y09> zo4-q__X~TMXj5+j3&!3w&G0Rso{#=;r1hC><5F=ch$k(Jmw_#y&2pJ)^N%1_$#tZ! z;OM1*s$#uU7bw0{gU_- z5c`riVmdctU8dL0lSYIS&(FG?le?3%Qt}Pk4rDk6iEQ@R9qW4b=eLkv$i}ynztVN5 zQ*)o@tuq_`(O^vv@p7|kKRl2r))s!ANsbQ|Yen);n>!e*^5rC8*bB48KjYsF39FMt zLa2xoQAXNirti2VMwBM1ciQfj9m6|bZh#)vpk8CZ8YF)DLS5f)x}wqrO|akOb*(Y{ zx<6)0pXFM=%0^iD+f(zqk1J+#euUw8Jk9cctleLW1MsQ)cy+_AKmO_zu9Fgsr&#;J z?V?juAON6D(aVjfw?8cjx0wNlZAGx<**y`17S&)ko<@zk{+i)%rNIxcI#s=+jjH4( zS;&sPb?d@=4iiup?p-RWJqN&HjP$YLr&x0TS)Ph#ic(LSQ5v0x$MhjG*Qtotnmm{F z3_b{{N~nmMjbN;4mKqpBhM8mG3rOrz+bg(jonMuw)S95?KX4WT z%XX^u2l>P*11YM+{c^Zj^G{VCwFy=9 zkTB6c4^MIM8=b$qtZfdxNl3cW_7ctQ9$VqZjbG1t%fz1+IqZiQ`N~}PbR1j!$aY5b zTlq!n`e8Uj?t@x<&T;2RDnh-!(5ptVYOOyiY1|i+&(+&Jah$-)$nCIj4m_TJTD%Dk zA@r_S-abgC?yLLMEmn`6c6EI`&x+79xz~7{^>Fre?NzAw(?=&8Jua_urrz_^0zn<8 z8?gfUISxZGak>E$^rMxgDB2IHtG)hIH_1&&6~S@K&P z;POiB=Z@I$_$+Yvx|Y)pJ)uWTRHN2h;Fshj3rAKCZ2fSyHB1!j?hzol|UxbE**X4J?O$HW@3JRDJ&r2#@O*p&j^qj-+C` zvs9CUE%}z)D!3hK%r)B4rLc%eRX<8BUkhv@n)}x;Hant#U}k$b;5zl~kzfxdiXEBH z7sw&0|6YQ>Ma02d(G7mUjkss$_*i#hu`+}(w4WFP)|*v!A90rMP?KCv`qW>Gb~|v} zED!d5N@A+0%Zg$OT@+4F9rzogJbs=CsKwrAI)fAuov9uQ6K{uhYAjM-H-GL^MCbhK zoF9r_wK3yZP5>((qEX;b4c3}`9i))QapH+MNM z!JXu5vwy^uHfu&0FF(_P@lABupf-Kj$7QlNs?>Z`HC`<&*%rs97W3C9Yy}v+j{Eo= z-&}#)=pO=36Uj^2wU2XBJ1`XU~!) zbr)5CdIri$rwi`6^5$b3EU^`}hrP^QO>w>eK7+!r&iEZTgIE zF7~igXSc+@5~9$o6&G9{&66@}G>}VXbL|RP!0=Wkx?B|z4`-9CPeL#De>oI%IH8}l zQ@GccTB_j>W`e0}F&!J1c)2=>5n>a=2Uy^hNr8Bx587#Sm?i4avkDX$_mnBk5rBVD z>?ns|8q9`!TouOp$~m3p_&-g|5A_01-#pI|`Elv|wM_^`Q!r(!i|YyOdupV&Lub`i zq(sbr*PixAn>)(NtJ_-Hq1it^(DtWsA#qUme|t(#AI;MAn%${Ra+xM+7<8pq<`O|P zWnoIXJ{OfL<}DvBb@sDZH<;N_ncyPLbr_jc>rtcvHpuuz$KmS#&H_dZUJ+ExW~mOz zI=wDXx*9i=4qJqE&QVF|UVSRWpp=ZS`COp>HFjLL#7f(P%Z;n@ZWOl`+d{dVqC(rW z!${OAGKC)#%!MT5rX$LYCr|-e3ZIO5(?w_5u)i(a^NrNTXD5hde*!=~F< zbPlVOdX;{OyAxf?_TpnXG@D5c&?K)S*RC%xZ-fbLG-@RKEKp_2QUrS{W!n(0UG8{H za3JDBv^Wv(@-kAjahp!7FEYZOr{9<0+KuV_?a@H$c~T%%b2&@nHmKK()59tx`sejf zp0KTYp^ntu-k?Ffo>7Sg_E{Rz${WN zh~ozqq?^~TFhe)e>24))fFSRM<_;iq;zb7O_ln;g!gtwCZibr#&X^&Z5-o!D&&0)@ zZ>>`~s^xV7irz}Si23oejzCREav)y0{kiC1W;2X)0aLth{%M{_0cu)LGjm9pQyc$B zUQZlLTffE<%S!Rz!-GcHEM4X#E>1C~`k0UQ3-e37b&vR{VNcx^5d@8b;OTOB6Lz)Y zDXgb7$KcwZw`zm;^)*Q_v9@zRzjA`! z@45wIWhf#jIM?F9odr0N-hK&AlMgcdpwjy$$)bT|Awm<4bz`&yrBJsLvF48fgUjav zacikGk!C5he&d(`k38EeI)YOyI+gaDI@c?5MbgFKookj)vtRVxZ&xu>-j;;TM3wXk zNhS-~?yHZ}LfC$~;SHv9M~-PVes%2RGZShM z@z}!UsO@CfApIXXS~^S#>@q~4SiXu>nCGPB6&0kCy4ih1Z7!6F+Q`TlbUWP?UTpG) z`i(ad3xn>sQ*Scg0VybV-6PgE*xo=^p59qbh^uqnUE>ZHTYUZqnopXi#72j`18ta9!M-`oe;kS)Zl&yiiZ6e1r#TK; zDhfj{xIU=Kbz`2_WrzBz;ZU68w+Z+n#ltcOp(}=J#tFu*@^mUxeW$K%@l-aSJ}!A} zF73PI-lQkux-v3dRQv7#mvhI9c?_8fjv;l6$`5!Q$I?O>a;MsK=^ss!>Fh{*=7p@w z{@w7xV$z+Ni2_&QS$p5ChZ9`n3U4r|)f)GN9_|Vhqe;V8qE4D7SILNUai=rakS)AU zf+os6Jgw}ZlAmQed+Mk2I=J32i|hjJxUJ>_tnDYb>EtW=HGsWdz}~z{2_4{7WB(IH zx}qSczODb;h7MwTEb*Y~wo>0z;S0D@?duU?%D+{Zlm7tV^2>!-hR|U`vQG1j&W{R> zKm`A#Pj|8<{)5Yp^5XvY${Q)%d{7$yhXS49%K<|rqQBpF@ItbGN~LY0p7Z?>10wk{ zPR!qDo;+H{pEdVV@+o}VJEiT?Refb%e-c6$l4OehhRp2CbyJfT6dFsJO&(haa?mg* z#H02sRh9r0ACvR`tQzy0e1TMd8kq`$rCFWYVw~uP4?001h3WD_dl~ zdiS4sQ1nU=aV?(xB%p2og2ubdf~*4hABy zGHRILaAO~_yFx&vO(PclMq5&0;!}cP5q^iwgWwTXC0EUx7;49MvhP2KfLGxWT?vZD zF&hSVAX}(LZ-UGI7XMg{54iQ97K$vF&)qqHLQ$al!(;fYg!sK4?tqILEjDvz%BaT2 zN~7yd{(k!%pOj0hA5Tc~b7v%xF0EyX*##%1S@93B_?TR;9{Jn?zmUYBz(6f)AkhQFykK>K)biY2L_@moP14HE#rt&{v zK!pmG*RR>}1tV_t=n4LzLchjPWHNm0r}I;QuEIbfQxwHx`sq!daS$5btKWDfA;QL7 zb=7al7ba>K{+awUM@}B5T=Or3)w?g0tpss@h^KgQf0Ey)I<63S?sF6JaBtf>IKD;{ zdwl!=Bb&(T|3r1S7J9E1ghWP)#KXijjQ{u>2VwoI3(+XbTOxe3t=Fgdh5KL&L;MO@ zxOh8%{R1ijtu5%Q;UQvO{Xaw8q2?>ZS!eUB&sit>BgI-r;7>f6RDa^%eLjq;8Csx< zu+2S5W67Xgk+d!0cXvSm2=$%!*H#Z)J(6F+ zkGuj2)aJW|$H~(9f%`uTJa!!$fjy<_AO4DwcT%oqqG)y&`ehf{z-q$?``mp?klS7b zqo1egVBnzmsaGkY+w8D4?vbm2xfa4NTq%=?<1 zhMLE$@;7aDWK{gMzd~rv^Y7S`M3=DLj7IpEn>~i23qOMQWVIIk<;ge^aH^I`32;G&6EyjS&{b+WEdSz`(cYJ4A1$gl`==Oxhz!RNw37iXk_(95|1@0Rj@%$z;$fpJyF9?bpZHo`#=ezwsu0}@1}T&Y z`qJmf;dWdE6pB<=164A zE#jq-ks!OCmwEmah71}A6|=XDwHwh3&`c}4hz;^{qeZ5A#rkG-Su-v#t|9)0NPUUo zS;w(*4_kCP%T+Rg{5nuQdlfc)Bncj0pB(?FpCbDIjA->b{Ze4PqUip3m(cPy_jy!! z7V;M^weyFwTsU9JLL{YStVyA2EtH_3pfcc>5-<74%7$p9$1uq_?U^sutzl(FNrA0g zTi@f3!zTzv=XV?;Am;hUyz0qP0`7|RJCw(yRgXv)yThht^$i}ppx4v70d|<%@k)-& zZ$ar?+Rl>IQ)CnHcjH? zFz!Os+*BWxx)asP@4>OjtRpHIef{osyvq?3L$f)$2Q$n}DsbGI>M?9Vc_U(Cb!rm~~R z1|Ux|)|P%*=y0BOwPB2(>g9_%5&k z_TggpS%=3Si9@}u)G|Dqr8q;%RoJjnZILkXQ`e`?*YVatfGij(%r16y2zhFiqA>H4*_3%iDV(h@ml+ud>o20J4R+Q&8vb z$mWYvDRsiouJv!4-+;`BJZC3$KiA~+BdNeQ;-cB|U)h9x=2N3E2>qzFKGi~0{VU`x zkp@Be&lbJfX%6qZ#W(7|4KTFNP5(J>cDE#FO)S;^K87Xv1;#?DphQ*EEv43;vI!++ zrmb>kB=>nUd82Tw?0XgGXnkh+$c+gZt>1s7FzimFFSZ!Ce|uq&j|7Ml`ZcwimzlTvdOo<;R69>Xye!|$v=s8@#!9ULh+J~>C-=JvSFoSeid$YZ}i^Z0o`>^>6s zEK!NG`j%R*tnEn@h$v>$hNVE=OTzMo9N-wQVj6Vn>f4!+u$zuv-mO9$Zh+p zy#LMuNJYLrbTRgw6`pgR@q&kV0!oJ(Z^zeXdmkQ zy}WWeDzpI3b&`Y*ZloxlHHV>TZw^)3o2fNeKE9E&0|{pC;?FU@0CQ$Nm9C!#+|rkf zT<<{XhvRqvFSw7*I3ld_Nafa-MYhN zavY8>9A9Yu`!$epGL;;Oo*U|!m(YqSRJQ;YX?}-WY0_6v0bUBtN^B~fvvs=zOntv8P zlsYojEzQ_^)(?P z{5k@7cH{5Sjts{tZeyvdt7Q(E8Evn3S8%puKeR3uAxQ{j8r?1SNmH=NP<6B9H=)Bg z)(VrXeae(Vhdp&%8}|_OdBPs)(wZvvF$TCd^ocHfpVM(zCJc8^#&pj~8#l z*8O{s_6nSz)=eATZ!tk)rSk{E1e^VxdpPkp8dbMqcR5zUSf!lqueKshy!TON_@yR6 z4o8a(Xz7#2x3>J3l5wxAO)^3!wm&_$Q8_d?AWZFZi>Km7KG6o1Yn5&4g1$AUR!kIo8U(=PbYQ0Y8%{WN5%C)oThDhLxzyg6e~0z*Q`wah53CVO|6-OA*Tc-&z$iXHvWe=b|B{CZ=Bz>#)% zd2#U`G$6M;bf}MtDop#n0o7pGaoUXg{Sk@ApM`J9&UiVtVYHZx;HHI(4xoIl=`p}) zEjU&zBsRIjgAQNF6xi9+1G2lTf|k7V*A^j9dT9>I(QbWe)IE1NknpY&okqlIZjJw+ zziLs7}y_ z4mJ;(Y#*%Dlci4E+_6~m$VEVskkJY_%!cSY?jAJ&#B9vjK}ic>S=?86KgKF~kaxG3 zo+rt~PJ+pky|zb-#DVMwrcjV}mQLGNOr~O&OCQ!)mxZcV>S-<;iNkW-=;_KsN`mdX zLuSord3WD^sPbDyAv{wEHEGx&F}J7|N*(Yng3y&aEztS3fBjm)u$cSFv9I~&Dli&7 z54VpWGx8#}Kk(jd6IlvP6gQDc7glcOSiJqcOf-cvcTwUSYsD8ud{-x$)6QvS427I) zu57w~{!*P8IUGJdpCOCW4r_o0iKkp_yd&6TOM>*}SfK|Tm-Pfnm#n5qRlLDmYPo~Z z3th4U&Xo$p{B}9rnHV%mL#EfIzHqI|(dk_LP>ri(&y(U!wlyLLwqNXiyFe@{J$CmC zt{_wro!p0;F`!`uAt!lCNDP&x@C7j&CN=XCEJCI22H|HsC22;Gp!TPtrE-cd6d~jY znSahYXnwjEjz5Kxx zdXV$Z9f(mqQc3{~NMaEWf(XuV+;8uE5`S)r1t~_#r?pb2(T)G45SD>~nh1 z#TozWBMe4YU=ep@Fc~+83NNWXh&C3bfgS!}h9H^586M7?{Ty_7Wtf1VQ_*2#mAcO# z!ko6|QTU)8XA7UbZRDQy(?yLcK$usxr^wAbo>lE*Y#ud>X)f9@iKx?G5f8*zFChGA z^Cy8sbIDMej9B59N)rbond6tPc98WA{C)fTCz$AN_9ha{l&oN)X=zXy*m;T0zm@DW z`pRZmOl~FObJZ4bZGjo^d&ah1T6A*!X!3NJ+)#!YGYxoFI&WeE0cM9KoOIeCd_t-< zIYFK$$sJO9^-T1SfB2-|-rsuGi@WEq}iZ@c1bJRp}z ztsc3?b}vn0ApoA!pECOb~vp1`1>Et39Vc?@b|hAQ}~%f4fT$A(y}AaZPgv@ucs_??VYu zjvEs#?N%C5wP8Fcu!7>=HUv7LWMv+TAO4bsI(x902jok2pYa+)|=xFgTg?UE4B0g>nLi^A6q#Y6dfchP)HTA+-!>BwYMliCx6WRlvo1zyvP-pbnoU>{UdJcn?kU{yl&#)!I3 zPmr#bxHn}4b-|U;OjVgx@&e!Da8_e|yhMV|0YPNLMJAq&{bjP}EAAOcZF4Lt^z$pn zL+hx^qzc6<*8c`-&SYq$dz1)xh}~E-%jgLX$0>m}?FTvh+e|{s1g22=@%wbWzg!$( z<K5@a!}l|7?bPOXLsTT5wTjR2;^~_VLRb%Ve0p=V>_y&@(4W*^?F^TP?o6;Jx3jM; z-mHrkxjwj-9vfd1pWy1D_#=^51&XvaIqPE5dS0VODl8TrDY3ZISQL4w&Lvf*3@A<} zX3b(}l`;h5P61?1v>#hz{FrP%o9^OrlZhu*Uw)^qjXv++ZzOyA;n= z6H>iGs_8JmCFm^Ym* zvnuDA0v-KqYsl+f3V7N5Lm`ieh5tj`TSmpTE$zaA5FilTo#5^oT!Op1Yj6ne?(Xgu z2yQ_W+&#FvyG!#evd`Y&}jaoQ#$$ndz*_besd%>2AnMVHN&FhOV?u%kHEfc>Zi;`r%WzS+(q{9#&Msk zr7eO1r(4XalciR==LYi+5LSR!uaO!jV2tzgk&)uH_cklPU}Y3B8cP#eiBP4}g#+TZ z`+zVX`L?)qQO`QFi5MWoEnQ~Q#~x|!HMM$M9)%FQPP6XhEC%$42%F<#saT05`=c!+ z*9crrm^ku!!K0LFyS{?#!ERj+4@#6vG8rR6HJZ{>^sTdi(#*{3+Cw}7On{Ff z?AWPxcR6MLHYh1P>3foSyW{YXY5SGAmECQ-?^mO{*Z9a<&|?>BuY9}LbFY5_j*c+l z?U@mTspEy%y2=~wUc`boriJYe9q@xdvW+bo)x`uhshj!&=}d_Oe2%Y5_<+a`4mj3! zN21`czf&hB_S^?DCXro|-4c~}4&FTVNE#v!=t4GCQbr#pkHsJBr_*IFM1PdwiGa8U zl6^;*_uW{DB!(HM^rYYQxn7yb0jiIeFk$HI^(kn4Ipwl95eI;D2ye}%(*4eTW4Y*m zOt&asnxucNQ@1CtpF9r_+%+cK-wo>1Xmz|%VT0^kH4Dko4JlkD1R3)MTEVD43=orV z$4c(SYmj~(o@2Co8NFRi0n#kN>kf9e0zoDh@(!y}&ncgruy@L{{A+K=I;-3A7TixkejSW5ZM z6k0eO{jE$F`QXSm+Wq6XRv>c|dlG;#r4&>j`Q77vOc7RdCjgJNjd09vqw_IXh*vlg z8_yzXEV)+j$5{CAp1+X0MKb-Hfngx)sorkVwL(1KlBY$bTs0HNS3P$4mXlxMyRydf zlOYK3rUommURW6e^BwLMfNLu9cdpxOfyLVits5_ym}F~H8U87j;q-F?0FIsNoIXJnP;g7kg=I`lXnWH7Uv_ zgA2a+5QfU&+9k^4sL+EA!C{O(PD!Me09T}l{DcMpArv}IBs(-2q!rJz_Pv|<6SJ3L zPz>d~$x{xc&p@P#oPyR%#84_N#%NP!uX-c%c=rq7W;ao;ZD^LuLiR}UP#z2j$M+>& z5UUHaq`dmX)6(&JO2G7Hh1(<_5#H927WSl#?pT7oe)T_vF-ZqkXH#E1^Ob2SpeaP*GKCf zj`~_6ftn8g!m@h>?@_MqtGs%$<<0z>(*m5e>C&5tY>ZJgAE(Gg;slesXD9;D)zxfe zYc%D~fmglv`lRSx&BZnU+n{o}2Xx&)KR~u#GY8;A3uT}7G>Aq`}V~fc-!1`sl zI&2&tvafu7)khN$@9xbI6EyM;z9Aw)11^c#H)8>J)QZ)R6*|p6xTTv!=~k8=mrp?# z%3s2L`GQ;~Q*Z#Z3RK+>V-tDW&UMb^l#Xl_+Cfi#GlTj=^?l;{+ilZq~ejqNYweS#3nt97GvuU{PKk^y4>HE8- znT+SYsOF{qCbs!yU^bmce4s=V5TPRfZh-ThVKE(e@+cR87Gsq@Blh7D#xLI8ce1_6 z+-$N2MFe3#Qi?J!JpDyBBB|3UmZL(Qm(RW^GWza4ivO-3vtsQI%Nl5_TAwVl$Xv`c z8H+^G0?30*LuqKR9mi?*ySlNi5Et;kB1EL9VX;|)q1*5c)vlQCSLUD|1g?$0nt!Ox znJTUq0;0w!Vu}ej`j}}vhXD_ATpSkBYskcyPwA{Xv|0Y4`9(gpe4p8oHp5M-wI}jK zgl=d!<;n#GY`x{eC5SFeZ$8&Fj^*$Q#rCq%4l2{XPnQ~ytCd%N1p-)VQJSKe?V9!h z(hZ0LFkj#NT=cGyu<#~{bj6{z#vPbpXAC^uP z=V;bmufO|P+Ls#X8m_{hVYL6NaFex6C7{T&Qgc48=WKt9yr60*5QY_dkVo9naQRb1 z(xYXK&d8$U+X0~>GtT=2! zmA60$|Jqut;o1-J>^4FU;syYtKLmvfJ!v#OtfCES;8}J^PFokdf~~MxHEh z2x;y3SmEV)H^K?vO1%a+``LPrs!Ouz_vXP(j=nEy^ zRBxc8iH4}C*aD2|)49?-qs95IJSDSJ<|Odh-q6WBrd7M$a9hYmZ#PTb&z3wbIoE=o zi>(dsMO3HFr^Cd}rhd>j9c3{YbZV*do*Ur>MP~=*V-RctUXm%FylFdOJ5y4>F2$?Q zi#>16P;N`ZgwDM!KGnOP>Ea-a!g@E6f2dlb5v)VW&F6LxBxiu}3bSD+?l{?@xAc{G(rT%+Y8{*U!6^5? zFzc_dNo6ZMnrM4bqTcn25pa_P-CA)5NK-e4UeM*a_vx&$f#Yd9#|T@yL(-W%|o} z9T?!@CPXhr;l|zaj&6vz&f+ayHm}c@#KF*N3c89+o&Z?#n~lNLxAy98fRbp0zp#Vs zzL1EdGYVwjW^i8Mz{-o#;U#V;*N&JO!48EI&Sd~nI=s&^IZG>dB4Z3`+vOd*l(&6^;HG~X%EIqC36;Z{I z(q$Ua%xvLw>;Vc?JB?uq3)!sy5^4c{4Q=w@KG`{WsIun4{Y?1nTYExf& zIs;*Lf+J>Wj3{XUcK%n~p3_Emxc_^Hm$ySkk20$~q=&NrtOkc$8>w!iYKJ-xXFkff zAs|KAi$Atw(Tm|aAWS7Ls;-R}`tQxPynPMGQpuy4VGI^>GH)`744X8$Kpogdhxov( zKO8Z!hF{bjm|{8_n>qo^1_5N>Q)020^7e0af$%Eqe6J!p#iB-93u)TNBnxC% z&FMG`T-SYn%Rs9CK+Tg_0Z5y7e6RJz3zz&b?sXWGMh@IDXEYRMkFLSJ`IyAa!1*{G zY$Tpz&sJLW%#j}U_^qf~+MMLQw3RKrT+3JU?mLtfAd39S((s|K6p3yr`c|gH_>4Ri zz*~j`bgg-7Mb^;|CYtdiGG^Gc&#}a!;ERK9gU|#S*_omQH4yV=C!91?l_BzAf_jeu z7`KX)JwiU1jBp5Q9*P}m)rPNmjlJ5lH{vdQ>TCws0RDn{pH%=1e`q|ix^j_eGx;Z$ z6*~k)NA`+H`U?Z>H`A*p(kW5Q43faDx;Z!?MSsAT2oO%cBAsDFMQjR=TZQU=0kUB}#|iDf*8U6r6%9C+ zF!&O8qKZclbeHdn?bQKx0JTKmU-|(+5Jdt9sg~>G-tp4iWg8ps-vXB|d#CF+vJU3s zI@RC=eBSEcLC>2( zM9okeu-Ilm@%Qk^M_|K3xDcJX?4^8F;M+qwx1{pT$-#fV<;pJ8=ov&s4E zd$>Lyjbx`O+`l>$68>wzh&H42Wn-!Oy4*|00A%k|VTHo$6cu~}zABh~S*hZ6eU;F$ zs?+9upi7pB2h1eG5Tcyuw+~H-IU{nto`C?%J>My=DWuI6j+4sg2ivT8cTXjU;i9Pl z5YB|cL_a}ecKj@$+}S3=R__~g-sT2eUEbEm5caWEP%2~M9}M0a=&Bb7TtqmRJytqs zn~=MHPoEHv;Z;&R01<`FZ4Heb*yrSlc;AkG8Euf5EJ6F@^XFtlYlX1bEr#G z_GzMksql5LAqEGt9xQknIw$-I`@!fPHxK)#7{J3#^pypg(EU)vSy_iqZQLVgkH?@( z3y`%2mJ&cCKS=Oi&?zfEM5G4Piw2beOG%a?+>qLJI6MAjj_^ms2qZb_%lBZ0FSpnT z9+Q_2tvkT{b5|vJ;4B+_UA7Pi5E0KW-Z!J*GG%$No4b>fRC#_B6vm2FR77GpAwCIu zIfSI8gktjqlxhV&VwA{}R91wLC794enUl+p%P&WxZnqaJ&i>2B7^9$@CHG>}EKX*J z9jR9N=U?~o-#8LCj6LvXHm`o>6hLdmKwGA$N~(BjTy}@b;G34y1YIIebNro3H)Ya!*^-9jaz14}t2d*mzN$y~{M%}WRKWr_J9 zX&SfKdG(Vf zm`{i~%Tu6zC@JrlPc@!8yOZcLX!+=&(|1uJdqNG#6>9XrE&ryXI4}shHxOp0b?%DO zC0;*0$Tzw|5vMxJH5z21@Fzi}NV&gDP<>G&-yCN;$&y*|{k#nnu3#$gm}pK9(-JeD zsi6W7<0k+=PgH;e|Ar}z zbsYm>z%8z~i(CH;?UT)jd&jq@%}R$3j8F^lPibp?a*^w|C#D-9_S03pIF8ZocygIR z8%Y&X>n-ebq8#V~wLe{6=-7u+u$1XCJ~~>?I8vQBujhG8VSxmi6{CG++@i^~v9oVM zF)PVVi|Dm}{L&p+7Pq5TQl;*i*FgtRj~4V_?WzB1uNYK}3ZcZ9rtpO*_g*X;KiBhc zVWh=)LN@fKEA;(HWRN1jArcWECmDBV;f5lI$%a@GsZ%9v#^<3MIIiv6Q$8zJ#_eeaM`+u8n7%1qxvr6tda8 z^D4OW&5oxU_r_gB7T;}NNhgbxlQ{(t&pdkQ9ldU0_ZJ!Eu!#idwI8EXEn|uE8&qQW zCeuL1%LM64oY?PL+fjWl zk-!r2>Kqi=$T!YSp;#m)K|KZW*!b!ffGbl7W$DMVG|^WAa)77OQ}e8f$Db&(LtehM zJmDGux$|;&W_EnWygxWS?kKTm`7n7<0A1h&As03|N*PRy6y2d@Q8K9N6Wb|Pr#L2yige!E30n7GpaX1znM#s8KUlpHT2G?a$pAX5BcT$cA&s~jnZJrf8vRM6r)r%+!K}6)n zp?=Bcl!#8giAglkYB!d%dDDXISPlwb?d?A}9zvn@iWCD@?CGBq zV&$l`V>hL;azyX@qS~{B2!VM4^jKa`N_O!}e<@wsM^O=(XPx#($F`#-_x}1B@7F%U zL67U0H`eJcR6pkyewA5>tGjfNhjfa{k~mBN!bprB4EI-=UN&uvf7jI8>)rlxh$WR) z1lmUSd2^iTB>7l~w@9EN;eMbbzD$XYE$0;TW2-_kl+&Qa;WMB7J9D%0X%!@~0chNn zYT>zi=|`R_{a#i3hp$NFnk%ZK##!?MP7l)q3fB0;2u#n->(r^wLt^)am(De({W7x} zFN%2JY3s1aU<=g8NjH*Q4SaP8Hz^4PnYuAK1$lyH1}tdGDq1yznfa#;^tz%%(IzxJ zws$EWFN=^^>=vo=arU^$?lAJrG}_fV38-(keI3T$)`>2V9-|mj{OP^p;&~}cvfFO-Zd)>e{)p-t z<;HrB8B~4y*c0)Lm8A`PWOs{lQ*nv)GWYmw&G8e$EN3qt8NrfBNjM?MPBgRU-+0u6ItOc?3(|`x_PZj0tU1>_q|Nbd_y6tj%cpeEPdseAT z1Gf#={6)LHUe{Gt1y75FA>vJiyDXo6C1~5&_RP>9%e_bR+!rIq<)VxF9{(;cZHSLT z5U+9MB1kT(d4}>b+puw&0DGZZxnhg+_+~L`bJ~8%OAFXsg+p4)3ouD|5h#kzdz6-L z#99hGCuPhl>ogZ^29MNPRM!#ZddxV!jxskoXVZ7Y5m?zJ(!E`^@Lr`pb)r~R{4zb=}8IdaQ7p4Bt1^|~f&Ji49ZSKYsm7;2BcR#N!zhu|qUdkwF%n1%I* zzLa*X;^JMQx$Nz6am*m*Z1#f<5fF}ccZVr)&OVpjc&E5L&3HO3#zTPJ479y}GhXGp zT!!FY*e4|9PDKg%XMz1|RlL@hw)+C~ch6gGFBuSPD8PY3#8&5HP9Pvtf&n|c+<*Ck z7U4(B|20=4c_7H|@Oo$hrT_KPe|+uofolkUCY*SsL!Lk>5+K?9_hK<~&}JV0ej20n zdW4q7{B{9k-hVYp2!b|){`<#&bR_OQm~sRe%6M7?5h4s&Js?^A^U8me0;ClJFk1aV zVW7j5wK^=S|8DG`o))M;u&n<5oc|j?P_#5JjV1o?M}RPFk2Id~Kf2HV<+aw*>KT@? zu%ADK`KMI=&wcQP%?bF1+4hD?J-p<9qZcARn4|JvuQ#2&G9plQSHs|c8TP+Nx#lMd zG>^dzOYtV`XP}QylKg+;(my;K^@i9`6$B1+1p4dm|6A*TJp2KI1>;`>HU(@4B4aST zaOMA{Y>XDP*{6TgaKY#w|M}`fwnd^UTPJ!li|EX4X zaKhr^uCPtbM}gsSxAe%h?(4-kDSbV3DG*4Y5X`U#O$Xop&kqI}0_s5!KBVXHy!ls_ zulBE_3&_vh5EO~%pj@4J|8A9&Eq8pS&YY;-UKlc*xH1G~Lg;lR|DQf$hzNTb8rLv( zYhbSLcs@K}UNP;Nj!zYEY{!~Vyec!1V4?@CxGzn5Et{j&a8VrJmlU>a?YH7tW|xS~ z!?{V!D=TyV>h^2b|NeBxLMb^Z-K)Fu|La4z!o7B&JKmKO&FF@u%IBX2=^1G;^MV=% zuu{>ir<+`}kB{z3KsaD3n}MbC>b8w$|J42SWTF|3xlIYBc{neym;F^a#JSG`wawoR z{pWWEEke@+#udY!;rLKawt%dEJ7gQ&ssts%D|cEyg{nrGX(1s^&JCnUPcF~*QV3<@ zszUYV!vD89^Uoe&3nI=T<#vY7NfrF=edfD687K0ce%DB$gnPu^pE0X`N3kbv^7 zt*?#n^!a@qDRy{%i?E`(^p}P|6^TKEV~hNcm80+$e9vMs$;`4Q3?IrfigAzlGa)b+ zQ03vy$GRDJ%|r$5Oy5x>dSo{iy!-#zdVDYI0`{!NSLL|N#@03WmxzHn9CV@H5Bd_z z5Q7)BRS?mW{*Og2W(;lS^shnE;X;&ydJi1vGUAGl;=UQM7|+V~FX7>M=%c2D|)GBD(0(_n*SpF!Awy4L_mtAatUs;FPV*C&0lfp}4 z4CJO>Iuew~U?U0t%O?KULVJPkZVOO}HoX31hfvBJ5A(Y9qSc6FR-%0T5xO#v5T6|P zzq~u(?^BfbS9fCcAjs(zLsXJFpJ9^XHgbMTPIEx`*)Ru|F;HT=}Gw(Y$WjS=ZLRV3+nxBm5<2b zxPY`fO>ggJ>TK80K}OyXm8i&tKCs>aIW*w;0(~N{b6xmvCQyQbjrjjv%RjqZ_v`5e zC|IylioyY6*zMY6a2@eGjxEcBTamK0z{27_;cz1QDY(H+&?)D{M^~Ll&1sjvTBO~ZR1DjbX-C&?T(9qRxS>)o#XooK{D^T(d(Nvh_0b>27LJt;=e@ zS@9e1oB)#vHNEOq8QnJr&Me5Pl zcg?P_UQA5^gRtBClU5;8>F16SF6`C|`h%%Fs>YOGg?PlXw{^f&1EjchRO@8)GFEAy zwFeVy@Vj_c>sbpm$Mgm9ka+0=6xfAk>}#BsXPR81YqN8^ic@R4IeiY#8&_W#F`Kd} zFR~xwTpVn|^@PV;=bJyYbWx9%GoGyHSK;L`RVxtv;?t>Erlb zp!ia&ooffBtU-h~Bhwb+aOO+Cme+3~{Pcq7)@rpb%sXG4l!a=Oz7BVTCjWQ#t9Q}6 zBBnG69~9b5$knUN)a$ah?sBY47Q3aWNgKbzC@%n(U{ewkI}}kQTy=;xmTgYLqqG+SGU`UJ7`f{GN#*Dz~%SRi~aucAiFut#$9G_ zDbUG()aQr(ZiNuIUB<<{!TE)3j;Q0S#RE8zQQj;BB9lY{llu5h2p&&tSEgiYFf?Cu zd#*v<{$LOO$KEs(rAmX9h2xenhbh`O0<)5>l)kKA+cQH{NXDx8FGLd<`a4 z`TDT828wl)$2zpA;0~1IxU4$_^FLtojEp>{dJQP9q76u=hI}S2U?_@2*8&^PT zR0;H?s=F;qnYbbw8)HyALk_w`+FX1erPHpFsblh*O4aHkjYiyJEp^zHW$<-qI-c%w zpX!e2T{re;rBph!x#ybJqk5_YGV?}A!!UX!J~>;GEV4@{Tj3NB%ptVkyn7Ci1*Owq z$v%joLe!Z2=BRcqbZ9JE_6TP?^LQ{cT!B*Nk>sq%_!<|p=+ z-|^qLOUYNZ2Fw-Cyu)`N#FJ&BB^T<$9eTY zEj^;vZr{V0LFYykOl!LLmoSe>NR8;z`f9o{q6u|;D_$r{lKDDCPr)t4V}K>Nk` zEM9W9H!j<6T}%7q4oS{z_PMNCxOVnXQmB>66f{;27Byfn2FXubUrN93T}7Si-cG;> zYBd?!DX%Q`MrfZbVHg@iGrPhhW}VxjBXCuIQ`nE%s}rwu8o3}-IfTcjP2(ZDGlW+= zduTSLfk&dF$VN}bov%%E359w|vUHmkvftb}ew5xBtrfLz+zo>J)<10?KJrA5n!uy` z%%4Q7i#8~JjOtq}LO7z{mxGki2u(o09+&4Lt8|h&5h(JHkK=3f& zo7qC*dE>`87`*`xi|Pv9$oEo%o&WI@lZwd8okM0$|Dfl*_uJc}vE=^99_%!m{9e zdLr#o}_AD$lgNBH72Z>nUMhj|*fp^6%ee?jEknvz*D>b=ZI+$SXH(fb zcn7iUvUJ2}AFtF0<4EJr2P@`l&@|GXSlkYj=UcaR4P~{fiT%+W57w6j;v;yXtrneN zx$NxRt3*WKvzCHYzFcDw%ktO(3H&S{R@hPc4l72e*_U0uQ*wd&qFw|WtQSd%gPfAM zRd<-;B?G4H^4%p?T9e<0O$})WQduMF+iwq=KN*5y76r&zY_$1=Rp*YtQbg@65)HYI)ldOxQ@A6JZ^P!7? z4Tkuv{ce)5%liz{!wN4|SikQinQ@c1afWf_D8QF~v5vs(AeO1hz>23DHvLnbhugul z`Q~(uG#{~&YRP0T?JBNf^j4x<;B0VoiuIG7R%j;cR*3efo(kX+`Ofp%n*iGz z-QvS~Rk!Ej&9VtOduzlGp+4tA^Y9gqwC1+jqgs>XlT+_eVp^)XFV9QUh(-nX4Xg%4M`D#Oina+w2RZNh# zzhvHB?0cmRILoEONCeyJ80d#K^o^q1@Jy{%$G>wwGbn~xH%rews8d-^woUqi;249z zmqshZmqJDm`K;x3jc0M>f3dlXwK;zYeSqV4As{zdV7*2wgC2m~oGLd{vOIsr-YVd^ z@R?kh_$blW=$@W<8jj}$c)mH1vvi-gF%jAy09Y*LCMS&UJ~F*&|GG7(JHI9@L#!8*;RhAWT1(9>X-&1dyWOAd2~AJ; zQ4%71WlvACo>QY@$xOdG?shp=!@)dRPm(AI44J}EEs}e*IWD5yJ$6SNQteETWnZp$ z2OWEdD;UWh=-ciBj7BpC5C@~8MVuUlTompMr3}mc8L~(2f@#pd!E-rz>F%kPNHP?> ze5UiAb&aQMHR!{4F?blZsfxmg?H%lx3I5^g`L`_~7%rYzRE0SSgVLk^tM@+Ty_pjG zl=JuNjGd;dINV-YbJbXuz0BPaKa0jJM(6n5cADaI#h=Q4_2$Z=b5>y=4x~N$tHuHt zL)qTEoR6qU@qvwGJbyn2t*i~0RR6|Yg0e?`PJN_2d&4c9{lS}wEO>+UNpS6OSH4$# z!v_JY1@rby(sLXt4&$ipqC0-9m99?tNg%-&uiZnF6v-9b0rhMSs?a|@c};du(B)$A zE{MzJ<0%7roN9vim3))>0lS_bgO}Q6CG`v@mQVZ4!g+%ys`)rOf+Qn(r;90P;t1q~o3&+esl~3bY1!3;PxYtF@(jtj)<6C2 zvboQs=l6?aG0yHfX48jaZa)ncqv)V4&^zVxhAGWl!d)N!9=xj#hYLri?_0=#Nbfi! zcRe_V3RvDyL!K=+_bfbia5UO6_1tPun8nfYCtq>7R>$JK`@DbmWayG;e~Zp;as;E} zoLIeylz?xYHTONB6gPB@Y(PF5we* zQIs6L@jbhLj`r>+Mo4D$qxA2S9eQB`$zP>Lmq2wI-yEq&)F9<1fcgKHg{Y?A1?hQ6 z-x+1;iRhcqa7U}IVv?%r*p+h6<~8)OU6K1U!u;}fK0$+INU)b`2uX)emZJz{j7J#$ z`XJRg+bqbL$<{)A$a>_f$O6<-!BD+cky<3H$}WLIhAu@W4=$5dBZt|KOCD)7lr|qQ zltAfkG?*ZtGR%xctFeVmsl<>iuLpYp?34hHm zwj-8GdJ)J6SyD9O4qG-1t+uFbth@@^v||#@#taboaA@hIZNCg=+!HGfCOG46)5z|B zk&~-f9VM_7+MCl$(tD_PskaK755fQBGf>VcdB<|TqFPb#^R1d3!B(S7wIEZKF3v#A zN4%M8WPJ{JS)CLP<|frr)OgX*0In6eyLvMfMKW(d-SEEa7waKB*(CFzth})#I;#7u z?`M$Itt8v!(5`=db@%N#+!3N=#y#ZO7=8Z6xIMFTY)?G zIxlughuQBYjh+1nhA>kBfuLvS=+a7J(ddDKXk84{gfn<+sQ}2<=TJh1+I;kt9W@wh zYE^57zA6~>VQS~3c5xjqs{)33!YC8zFDc6$35GAeU7beGJvJ|5FT7)^Y|b`4Di>zo z9}9-UqhTdH(W%X#cv@JK=Bh}Jsp&d|8?2W}CEFiIj)r}rQ{VXR11`^L9#vB*czmk5 zo=hk;gL70b#)d*2Lv7PqW!MThBEh>~7=+fi9Y#0_Y4c6peZ;!(5_P!3%|(C7 zY=KKETz&aS@9eF?WFkdBjdAm+)iZyQt$Jy0cG~{7W=VO&ASe6%@1o8^Ar5+0i{E`T z_8y!TK3eNHHdm2NhS42fvX8fz#6?<}>! z^2Rlnpja21U0V9!uS!uJF#H1RHuE^WV+ff++=Fs=jr4@;QA8{Jdvi>raS`cBfGc$-)Uk<-p9HH5oEq+r!ZJY=hV`)m04o z_h`mCAECA}ofoY<9d5%|oH~_!DY?7EG4?Io!{s{Z*nc8B*xJlN|ZoPzWxVu&)&1KZp zYsHc3w6=RL*J{Gug1{H8lSCctLkbgCo$sE4S4DbaM&rKx`G>6)>`JW!D!T@A5;9b{ zFj`pynh{g)`(AnUN^y1yyiUzL8S2&5mHwPs-b3ns`QP818N!Uu@+zs7&Zq?Fp3LWU z1v|{TF52&lEViG2DR$4iQX}fl^W}QbHKo5n}kM<5-VR%eesE>A!u(uSt3iYqA zdNWJ4>oAB-7Kr;RxAtdevD?b={G=D~EM@23bc1!So0G5_+tynTyK9V&w z680vYYh5HUMDX)db(Dk%NC`*aYQgSxE6yScc)w(&cPt1Fy zi}Pq~j34H-&y;5&!&CFRJgyWe?d*p9`~gJj}$@;lV4>{AMg;h&}_vBCfWx6g&4; zeb!R>XZ&h^@O53YRellpLn5#B%uq%y4Vxz3jt@txGXV(KdZKEpjlH3TptpQ%D{^#j zZnv)H`&4f$e>}htGrX)-eBJ0b+P!zWeiz947NKlk3f#8ku_8V2x9-7a^+RHSHpHBJ zsC}%nQ;W;775%%!5S?sd9`aA z)zU;c^otQ;Uk<{6iZ0QpxClc2g!X5VRNlfy%9iS{1gCAw0$*mDX7Q@ho5*784;xYp zUj**fLodeCX8NG9&y?bOe-~;WHct*{kC@uM;VZwWh5i{3?!mKC^^NPioG8AS1~Axo zF|}Yo-q;Y&IlJ$R`$0@{+0>(uS#2dY{DL6LY?=IIyn%DE_Ut^wI;*9>H;d*i+IPOS zd7GXIBW3o5+ao#x8x%M1tLHkxwU5h_S623cmL7rEHu>jUD-9m1vHB9@jfdUrerlF9 zzQ>U6pu+cSydk@bq#Zxv*GKGQTQJ|~9Ch9;9Fv>9)oeWeYPWK*bNa3}#ZPbv4t>-6 zRUpJtwZn=^_OFUYl27pEp8F&uA7t>Rhx!@CF@Oo)H|YU59+HBxAv+%ZNkLmVvKrtoA5(6wG*`$wTeok&2Gp?~$UKtuT=p*0M zK^hF`1N5LR(mDck%oz}?JV&zyO+LPpAM31gQr;U5+qJ53grKyPA`-7Kgw>kb<2K*p z?OdpnB!r-^QB?Qdm5s~iMURrQ?KNfc-Kbf=}fd18~;+MZh05kH>61B%Lz zy16d6M^CLg2$rs2Be{z5%rHNdDy>jWokFM zDDZHS*w@wHY)e3GeLxSZ&SvMkprY9*kj*(}w9cdHTUNMtmrSv<&!V2yyFKrAoy@T| z=FJbSj`+Oi3|SX{fRGgX;7FZDov#M*87aK)v+nb-q}&|e`ry#f$%kZbhFZhs_n1Te z)oAo951&O0cXgtMzG1dv;xx(R%@A(~L=qH?S+6fjXrB%W*N?paSVe@tz-l9z5pxlZ zc3)9O*QXkTuMh^b$23gdU;%z573=bKaj&u7e0n}QnTRvKKS znya>!&WH{gbvmY&dBlEu-Rm5w%p0_sAh{66aZLY5J%&b(TyDGX!+bEhQS8|MyT1s7 ziAD{Ld4ZE8-FKCv2>})D!}v+@{I8x0!uF2|yxuSO)#Ykz*oGOUp6PCa9A+=0^bp%H zW#_A2jh$LUKdX_V^s2b3Z7Nm#ss_44x%^r?Exc4q#qy4j*G9VeaJ}wI)pC`tsCegA z)E`93;g%hDM|P~<;YH=x>2r6-?i_PD!L|esCWS7c0hsa)1Xw&iw;`6=MPKD-hL_$wgRT%iiRVSvd0VS19rHDc zQt=(!XMy`_uQqPO-w>@1E~tXtZ?~ISg1@?% z$7pG8$*+qZRKbeUki~j6@nXE|=@!2%m5f#-ql;NBp|dahri!sD3T8{NI$WResH zPl8DM*&q5sewwZsfvaS&tmB2ne8+4dJZyi6eh)e1SjNYCo0RWUYrWE#3}wGFd_bpw z&N7OXCsURY)*ROhAVWJ~nVaj4kx{h&+(@X0xQ8Ft#vr=HXXZxo&PO zQbn;_&RL%n)NQ%|_Y~NL&P`vTZOoq^tU8H28Z2Xr)2oVCVwCSh?0(@0s*9wrFCssS zbwMf{EkslXW}5ZCu;RD(eM)b{8fMakTbe?=3#W3gUoTY%Vm5%4wq)r7GK9V~s zn92Q3WKT`eb2?tuW%GT!my|&YPT^dGyq(J~LSUO?3)xW*BM?gl`EvWNT>)iKhhrC| ztdFbN5$?*MWp3uQru^TheQ7^rB2Kw@n3#6t?yold2qbeSC9RkGB}>|~?bjd~gSV-u zhD>u8*zIA%92dB)I`&53psBc=PgqY|xb5Aw9#U=3bG3>P$lI*7vsgTTe1DN+DjeVs z309qfOKSG%SEBV+{?He-p;f0k|_@!(_tFk1B^% z=rW1jRAdns%jl4_+!`n1;Cm>C{XU{CJh+ORbZn8V{vs<@VF44U6b6px2Un(hH2KS; ztjtGvV1Rv)-lbNpLeAK6% zk@RIAe4B}E?+Pu`c21`*D2b>DEaJpeD0uu{3AvCIz{X{~<}kMMwl#n&sp)N~A%q%X zxotq2%+vO)h#u*Wd^*0}xjll82*$xd8+3SNRN-F#-B=!XlT@}J$v@&lTrh<->-TL` z{y(zLIxMRGYxjyEB_%Bl(%m325`svJ64D?b5(6R(IY>81cS|S@(w#$>NcVsU3^8=f zz}YhkG8zB#?B{Sc(5> zcpSj9#<~8>}uO3k1mNWyAQwm1eqF89EXg(#=1w zGlfkWI%4Ril-k2FBOoxhp>Di@s#J(FhG10;(I}*XW1QAjG}Y|O=^sz0b*HH)Z9CE~DF<_Ge!I54p<@@y5WJwTGPiAA z@zNA@5ho_i^xAKGD=?ORn{Obniu3*<1mo560F8O+WCj=%R1(36Ks#TI;6x(!Yfr0f z_sDX~W~I<|?mO{z5AEIEY8Jpl?I{8CZ?vAN^*PM9Xl6*TCL3j_*EwDctNqb1m&z{!Z^K2W=uE5t`XgW>BPh_Lyvkk8V~~mWd&oE@|d{Q_I%a&B>651eyMn z3J+FNsd)1Zdzti_sg%!w6MAg^lwaXy54)(}qJ1TId0l94GVkwUQWGq*F#K#DBnJ!A zA8`59c*I;d6?)Vd`1PV5U^8!V^a(}B0QE6|!YT1jO2E#2aah=fPMZQrxaAdtDorDP)otz-)XnUjW=1%3#oC2fQ8RIX znp}Fw*?x-U#e-fZq=d%PGv_^>29l(Pth4Ve*+g4&LN0>^iE zSP(HD5{7*JkTB3_*u-Mp9>VIR{KOH5_O1LPZy3!vz0AtC@fzQ=;kR5X=KDCz6|}K( z(BM#|gTz)|AoF^(%$FxvdSsLl~*W^Bb_1oLJzM_V@ucM49W$$Xd&%NqDvh8p%IA}6PDKa+>INVOC|`64qm>`zoM{)_a8PlIdr=po3-za zVjKl@#H<7paHV^FNUue(a3qJ?05^Y-u~$PjFH?^Vu$@!u(ZVcUsWg7II5A7!e@niD zJHN#>+g+dl4LCdw2>^GJZnC*UDQcy}Rg=`ZS}iU6eE5KK6h0%D*%lv&`F0A`NSWVs zWd|DDPEqPHU%+dv1977H{76xc4un#bs`{&JG3lcBD{1C6?CtfJaTKo47!s<>s~`JC z{UmPCgjrHzAfG$;#x1%3GuYoG}0;^wkIi)lO<#NdfEvu{C7mu)B1C>P^A2% zf=5G;2GlCODIUzw?0n-_MLI|0%FmavCB0q<@{nKUdC8FdMMxBM%ul40v*?B+<-fi zoc7$D-sFbph$8!fT8A3a>g>EyNq3A`pwEOc_?!~VC@;HSuHzGo?CbAa<`brT?K)r7 z3|Z-UY8aBcLtMZ)bLwOtZ#l`EwaeZNIf#jshNzwINcCmB^WfRRB$9$#xQF+4pl7NE zD0vk9IQ3XN(>XR`tEh$?P7d*)`k3JTLF-7~jfE!mv+Esiy%Yv!qh`kvNsEzMLa``D z`WXBQ66G+9SoWt9a`IFgUl#T~`}*Im%~QNGrJ&K)j|pu$3cJ{EJGTCn?DU9Q(ho_N zakuYKjhygync?waug0`)evo-E8ycjCVg!p9Q_h80)b&Wf@~|zrEveyL+vAY+5G>-p z77gQvxUjgY2)i33jUwtfA!N8l_~1*)hYhj^doum)wM80KaO#-GS6@h-`ZXGU1$)#8 z=6M%vZQSgPsV)#cP^S|Czk5*hTz*Y-C*R;`ScfRJjUdKowaa`FZOf~#&fI{ZYVCr;#A1KInhfR~*v4@4^ApXsKHiQ^snmfJ($#I3v#2B#jouR6 z&u}U^p%LgD_Kj6R39+(6LW%I$)1|fc_;5ubx$0OWklC++kzKgrSM22qjC87iO;Pw0 zF6>WPrq3_XyfbIUq8WPEkR+4{BetgCB2cS+ixq=}U2_m*N{|>pliWW+N~w~BQTy$E zYuoF?ESNFt-|z0pjh_-;PvYru5dXytE0&+q_G+VJ|Kv}7`sr7=2>c<_7lAKF#_f(T zOl*j`6ANK@3l}e}8f3jYX zv7=t`+O|sNv?47T9*H4P-KegJ1Ke{y=Qn;TVx-Rk-J|>bULXzeQ}abU$Hqbb(E=W& zsrgIEtc>@>+zWf|+I@OCQpLVrttd@PQ)zv>;LJP(+sHDVKAVJPkCL+8s+mc24U$Mi z^VgJ6cFy5tV&Q?fAW1&D4J<`koh2w0DwpEUwB5%}-y&OpHjaN;dKSzUcmbT8q@mqL zGTe}!*m;=dU;A{ZmwcomDZo7O6(pmRGEkC+OS`4g;QN-IR--B&t9^1YLoqIS1zf@$ z=|ao?ZjN_9yEl!c2&d*SwKqy-t6mivAHiSo(T#Lb_eXK1f}gZD`{IC`!7_H_0T)n${*j9eY0*}ba@fuiPuH# zQBCTuyFs_resXAsBNc}7WV+zhclhE(yuvaB z7P#d9?c6?MS*daFU3z{n;-0d8Yu(@c*Bn1qI(UoKgH%JDlC)M`0LCh$S`J!FvmcUI z)^f5=`Uhs)0W9LzgmO&nSQe}CrQc*uidXHDn0zzB8AWthRFfM*Y1!G%#C&s?58Wk} zf9MtYUpf;zJw4!#$B@kPn8$I zVReDC?ZCbfcCW40`A8#OEJ`i)A~+{B&t&$kZi80aU)*QF=ZeGrpsMD7r!SSS_OJ7a zO$4Wl9FQ-8t`>|XWZ4{55rCTo(O{(aGrG|0wlw=}s9_b*xVt;te)T z$OPH36bEoul=Gnkn48U=HP!I+LQtWGjn%zNO{I(+o+$}#h$Cp^#)9;DF|`g~>k)Lo zw4pB61#!X9ZAtZjbU|R$n#U`D=7XHGtLgyzaQRXFRUL)f2+623^KPOn38-M-`<{cuE|hRbBHBq)3_r~X*+~@Aq3n(WgI`qn@cz^ETFFIkK6&~c7CARa zgGG8C`6;l;Boi?F9@g`&ua7#szqf%4eI8dAArpa(_No3jNOhg% z%Ln&3&RcIYI*U?5V^{)*R56;gdBy4Uc}q0uvuF`p<~?nM^N%jo}JA-+uAG%a)39;@aOVGgt9hhH=<&LqA0REO7 z!GxQzOCceSW2*Ab{8ou*g}5%)d-h2J4_5*tnb42%&8r3o+HyX=!L)e6s9q{NrNAY( zil}b<6T)wGo$ay_8n5R@*Qg|9`YF}OAm_{bmcPtlh{APBP>Ci9tc~KK8%nu62 zI;efjRyAAYLZAv=#S%^yv<#j__tE%;K_uiW8KoaotT4nIE;hIv1LJkI722jt`~d}& zq^WA9$~Jg7bwab$baA$-E9Q|?OOjN{^ey_kw!M&ZM>a1zXwjegekLw^IKiaB={2T{ zeY#Y9=)t6_OA5S-Hr1?B?*8xuU37}0iTH1n1la=uC@%>=)w`P8C}t{9Zn0HO+8-)X z2$;|Cnfy!a+~gA)v0`R5$42a^%zCN%;RHP|F~Ny^9(vh}_G|X$&z?rInTKi4$FeWe z8P?bQeGmrQM2SQQ4(?`)S?bldLIOBP-MSQ(P&F*MtfoV5uF#OHr@X4GHi-<=e2X+p zNi18Y{IpMf(PAdV_S(-#Ab!R9FQ%BRk21CfjUERwGj7Rh9Qlmd1@~5 zduzTkOL=Vx+hBHkwCH(-`ppKx@ld21PX|SP8BJkoox=P?tfy=^MPuI~|D_-0Tvep7 zbhnJ=x2T1GoL-R_o`7q3Y^;=)|GjcL#l^2a<_ z-aM=|+tbaCC$OkA8s2}V{;QXZVGWl}ekdtA!4jJS2iAw9dCl)~&dWw?&0%B=9A8m+ zXL&qw+RwO;abpCoJlV~icqKWx@@E>?^`S4HfpiaTm4Xw>V%kvNXTtF>_H-$#=PAh) zU@Wo3@T7OhE@7FPNUp(75mRLd67B*yxT9C%hedJf=&*isntxyjye}2#I!{RzG8B_- z6AF9X(8k2Uki6B_^OLEZB3JXNCT+}5=Xvoi5#?8F{+Zh0u z)Xdz7ClAKmI>>#e=+^u%sKix)PCIP;?E8cDZ)k1n-DtJvCad@DD5sLIjvdeo^~ty0 zNM|jS82vSy=82TrO;vt2OGPfu`6YfbuH_>-^EzUb5r%J6Z8q)&)BlvBHaC1cDAbYFrjyO?Li^-PY#1z zYrq#}?-Gt4`RYD2FYTm_MoE8>?$A)`TJ-+qT1b$WU>0lL%6%>P&|Y6yg<{NW4E3D+ zrk}wlq`l(o=*bOF(7HC;mwefyFUS4esMm7_mYd$m0iOkhf{^q7*Z7fi2C^uy>F%~ zAC5x6sPI4@mvol_&y>V!odtA`QaO7Skjk1w7L z_4a~r%2XVeTz@^4uWH-sAfU{~6rfq8Snye}N%_M7fi{&tfCM#yda~RBTIK*ut<%%7@T-dKe!vvt7t(-UmelAY z%4sl!Q(Jr!t9RXmUgOdk2q^bi(~CoGtc# z0%(zOe9{b&)EoKJ>x=uES4zHUX!HA^!v_wCzqT_?^uIM6}TeehhY6zsKX)w*79`yDxo-%`EHzoN~~$4Oe*q_bvEPfkT!pgLNnB z8BXuabj+=K+D}y{WOItEoiaMa`E@hcw8bAKigHlzeLPJ4{C2Ta&zvPWf#Lr|83KJr9Q}99Vt-&QH;u z&MTr*!W|nF6#gWeK$(GbVK%}DV@_aMyxN7MG1@(#?}LDUJJ9#vB+yOvTmwS6d(cyX zdtS62Fg-`cjJUh#JP>P?-^BPi6n{=JmsObYen{t4_0}-Y_GFRd7KcEkDEKP$QZBFa zppEe=xtip;6V0_bWNx!Lp65-u_M$6doG~GOzSsX=`||AioU#Qn@hXEPOE!DX)Ukcf z^U`J0GdB`JtLDQhc};8gh!fjyv5{YFMx&A?%Cdx$-6kwK{*_Uxy}-`%UazBfEg zbA7=mjj`O;emAuAmK`!piPB;isdN(&NR9dqPCDKERwsF>hB?}L#x1Ct@61Z&SE9Pa zM&;&mB?!*}XHItV3+vugxafoHdXmAH3{EB#Z}1a85}UDODA!9o_G&v~k$V0_M)$=s zw4`kO1~D=IMn{G7X+CX;GBp{WRK|FRsRU@J@D6=6%es(nXUVB;l$iIul~44xisgB> z#>%aIe8|Z6mRSX(Ls^XbtNox)dO6rYd@PRmE_%z<@Wk~4WO-4WiWKJ+NeO+q~tFcx|iTJmB$-*Vfpvawu}D-`%~)Z%_u5S zosNVL_iuO6*XM(B&t3kF{{C0y_3r;%{~!}8CtrIeHA;9+EJ8*qa*BFd zKKbG{OOU43Z#;th9VhX)niKyg?KKBk{>XkO!M2^|0&m+G(_#G|E#SYu%m4HKDj{F` zyn$4C_{4F0G~B3btk9R$iu-56P=)8qS?xxTq4sLqW5P!CKHJeXgIzw{76|$rO)oGE zeb*`41frw~zz`y0Ubz*5r2#BnjJ*ism%Pr>1?~c+tg7N-$Vlq*#wz?%oFmGF(Od;( z^$f$heUS+|Y4@I#TQsCX@BoOz(t}Yy-jBUCn6(|?djntfpantYO}v49H83#_zyme^ zcpBYa8{HgpeK)QsIoSqjIP#tbI$zv`fbx*cna0g$)LseY{Ow}$;frrfxmH#*lA3NJ z1FqeVQfI8rwpJQVV{Ze~rEZlib5MczZ>RSj_D|%)3V>irq+xByGn~yQvY(oG3~!RF z(#7C*jaB*zI&DvCrP7af=drm3_*5kA=$*EK=R zYOzK8zGc&R$4ILKa@|SrK0W3Cw9)Tu&~BbC{jNPdp>bE&_IFECBud~!@X+bTe^S!p z=o{(~p55i_a^!Zj!D%VGHg9m5fybN2REMlZH`7WgzA^_@Br!m*el>`Bmj2M5G{5+? zOw`3cz3g&cCN%u^ah>gHF>ZH7?D^lTgc8h*sQM3S?W^pKn}2o#O;e#Q=5M#pCgUd@ zmt&l6*5jhZ`3}x=AX&+GhN<%vsEKabG6xW0rQI7I9X0WR4Gt}3C(Vlv;a|d&J=ir3 zp4!bgB!b(F%|E~k=_bszp8VrbzeO75ixe6qw3;61Dp9wLyyX~6t5Hl7Dd*dJHQb(|#E%N6}X^KN1hVo-gk+0^5Ux*R0`bjF3fW!2ABM%ihOkTMj@p zG2u>_SiCy>NX=5w^BY}Iq}G@P-I2Ql(#HOr|BJKo@9)-e8TloN*W}K?Wa5lX{ z@udXcXw~teo(zXhkq^1?Ebwgxsy9o9wf6>eCk7x`^}6~-p|mL+ z(5Y=&aVSTZc9CQEQGRg%$dzgK^8(=5372QWG?@7Xevuk_O4d$$D8oW?&rV~oO_ur*|%cu zJ?e^K4b_`OsC*sT{W5u^BDJU~5qDf(*x2P(F(53OY)qt4^;+L>_Tk7k&KMiLS&m zif(mzx?CGa1Lh!pz+R4X54~c8y|^9#ES{Am33R!%hNkt<^f#sRx_|if z*?{G|Fcg{1XFUH`;4C12hlAf2Wm_N-&GKL+ljAPsC}KPLvP^k!;ICi}mK7@`6#8e* zXGlY1-)^b-JSUP$^p_I*xnAoppda_yXLOkU5rbHOjQ51FA863e4QYY%JpO8j1Q}%!z-0z>Hg*jONEbEKU~9v-4h;>lNiW zm0rhj|4`}N-Ngg#94`T8uvzS1YO;2K>$!ynbeZD;+_rzKEMc2Xd9F__>yFsG13VL* zgf@2Y0gG4{TlqCtebUg8tXV=--^4l27C#3a%y|Q_Y8SZ7bjF>=h5LV+rrE2-I_j_C z^;A7GN*vLcp*N7mI_nRHXkcauNE?jAq!zsC+a>O4rRs?Y}yqYf0%>J4KBCBig z5j@A$$y_;hINDViA-L4Sb9s|8>5;dQt^>7wMI9G zOe?a;z-_<>efi$36?-nIJ4GGlA7FI}I zZ87&j=LQ&(MfUH~X{aN3NLQ72*Ko?bVaGjXvLDLl{9o_#_&`cvqhh6RD8F`)rgyvF z-1z-gQfjr$HLXhvNT=`-rl9i4chbRLXEZ<$T|vC2yVcg5YjOO6I^1(QZbR46MEXM2 zK3=R+j))UM?+q5N)EHV`Za0(YbN^RuAMpRe_FqNp-wX5G8_daXQ<1i9aSZnX_fiag z)o$$6J2;--P-Pv346jS{lpVW~?L|C+RfI20%va9LY zzbaIR0b`EbKO`Rs5D5VFx-_VG$MrwzxnCf2&f5Qcdp;cC`tp=}Q$z9k4ol=`hB@`( zfANX_c}W8?Ny9iy&}Zu2wyF>^$fj8w7gI+lngLN~&gv`x2g3?@)gp%9 zhi(CVE*9Qef<+m}FQ7!li};d|kjOt%N$Zykm99;&1%lQ`=V|I&Z|T1HygugTNi^F2 zs%yj{d7N))ZS+wQeL?b4gZ{KAbIQ68!XY>*>3-Pn@td9fA)5+0wk4K#>Ki1Cb@>4{ zVKxNrWHjjGcQs+beRtCkbKgOD$n)^KyVHEdB7kW_`f_+z5Kr2>=kr}RE{GVN{=)nd zjeqd6&wjJuFzWPf?WX>uy0Y?&vcj4oU!=1lt-TR3J=!G3Z2?9~{6AYo57 zJ^gCr;p+My?u+eQVfJ|6%L|~8j{aQ;rOdlIjDNjd#cq-$r%=S^aTdp&nd46@a)5x#&h$hRpZ*xEs2$wV~B*E;$m>bNS`zl0;orIM`(t1ZE*$)*+N9=tF=20j`aL#*oAWhX?CGTP?%9KlWB%m9 z>H?OHlZlG=dq2WnGu|fpJM}z8M(JfGj_ejb`SE5=%;V}i_K3LKNV+Vun5Ln)TKrym z;Sk7IJ>fsh4h!9D?IVD5kfSJ%SX4R&21V7Nqk@ec?_?UpTy4Ult`?=+Gn2?v!G|{bIk{4c^f^^Hacu=wtYf< z!m4+@#5a9_lZ%9@pGfURL@PpZj?1tEzcb{7b0W zW}y!8y+kTp0-?sEo?jKI2$&V&r*mWR^frIVeuyc7D`lpytJPF2TZ`_#J}oR3uz+(Ov@glgmzWw%moIPu4<6A$2D0Zk_H#8>VPY)9v+gd< zgX4?I#`Xz6fH9ZE%@>8-dbUf8wq{LBbU*qHPb7*wO7N~M3H)s(4HZ21%1>Epa!9Uh z&V#g9^{Gj&{1tunE_4%z=hY(wkmwv$E5`vHb*FZf-)9?E>z{X{;FF^JN9o>I5uij#~lxMwEv7q_O_f=5%N=U!zB(1ZEfF~lO{!8c0}MzI@4-bI|Mj6AHD;A`ig=~r`jyd(SfDsA-c zHAf8OZyuAfYouL01fwv2`)Z-T5=yCb&}7)M>pmx{W`ERE{*+B|DtC#u`u#RoKJyD} zb7t`Z@B)>YtnT;cS!m4o9QQf7?)`Ug&YjNv#9Rh7;pR6uA);0XfM>^c43Kn^7<}J_ z)!JTjoZ9lw8uoq>_esDz)6J@v%9cczvbG+!#z0 zZaT0exhJJ6=l$e|W5-I;9`Gqs5Em_EY1bfloEIUabgZHQK@D&4 z<#i~G7bO1))!>5A!iKdR{Jh7L6rxhQl{FxgZZ|^P<1^zBZK6Zi4LT&!rvcmNvBT^N zL?azpOAQY#QQ8|-@*pQ0D@fk>x>zo)HUCzGz;nxEd;Su4d-}LN)7#A7YgsxOI9Ro0 z$e({%(DDRGcjV^}C-K&V1-R)0p}W_+v7fs54n7cRGdR;(&L!>;5YgAg5WW4Q)k2>5 zEpUC{qAs<^Fz2-kOb34ND|`UFtp@KkVcWPv&cr zpqY3g5B0ElE_LaS;1=43_mdj7UPTK!o=sGHsCSO(9j{Qjlg$Y&lFN%38Lcc=`2Z?h zB#&w~VHVGyIVj=WcW8c=Qj*2u3&*p<`?4n_ZZ>Rsq-Jc=`phAdqp_LEN&}dI^=M@Y z1Ckrh(mRNX&=Lt$xuvGm>UW=jc!C zFo(vr!nPFCRg{*?*I`O4As;LsyukyCk&jO#rv`n*G|>;$i2J0E9%Y^2)@eRU(A-zsxCHn{47O4>X!=;P&%vY7aU< z_+xzDHd~deMtl21k}G=hzTZ{|!>x!T_|$=Sb10EGp#(>A{aQgK=r>*cN{1>=#9?@) zIKYeGe0}lJk|7!UQri2X*va1Ku=2ao%SL1{hM>O?r(C~JAyUB~!=k4+RX$4v+r0RD z38qpgv}ybH8>*FcW00GVJld9NwZ`k4Z}!$k!M=hq*58l$v{qd)j6!aoiE6kDdg!8p za3%d2Ls?{^vl%egYRLNiRRkbeQ8a!O7vP4RzGaJYPxaW?okn-e~__IqPH zER7+O)}tg@}1fmYco+r|nb z6-c#jMU^$veG}@F{o^=cWigMn-t$NqJgC6#sgjF%r(f7OAjDdJ(|i(N$U~N}0tAvB zy5^MIO86)fG*jB$kXRvD5lNN_DmoOn;kazB9QRi@i?Y47JWr`<%D(6~ehP3hsx<3t z`DC-5KFk-X<^3Y?-=sV{&}h8*!+SweBFw>enM?F>zNLN&@W|VGEz}<*wV~n58uoc| zX!iJLWPwC7PW7ix&}}nsJItOVd!)+AreZF3t*@h(J}IXb2G{?s|5prtEJQtDvIi?p zciO9QsqYLXGTp!V`F=RXWR-+@X^X21Xl&9n6}-;roUX^-{-MEQcy&n9{R|(y3(zIw zYWFSl6Y$+|3T*Xlx6H09RL3k#t9J6J5N6&pJ2q{5?Vmx4q5ANsxS3wTa z#Qkm3-3!LJq&Mq8f>lZL+_<`~-CJ2w`-XGs?pe!OvPEEX^x4jc8ZXPM2wcXC57$y~ z$n^UXYulg|PBMAO#@n`=sq?jpf7{J=M}|BO0c{YM>sP(P@%^gGH^7Ius=ip)n2Y74 zI@Vh31ij7cqh92l0Jm)nVpT;|MgHoi6r;6e0%0afl7Z(cp;>{Z@&jTQn&ovW?~;@- z`$Gy|QHa7Rtor!Tj6PUWt1_1AkyU*wAta-(i>n@OS_1;-gv`cLfI$Wx zkFEg(AMzF1(I7?A*kAF^_{;=AD;L~mjH#}ab@|AU3;MYHqk@|^7PWV{QNV6eg`-GerC3pkp4?-?E)5e9dm9x8F z){?ybzV6V~a-Z69Ed=F zi}3x-cB1fqhjT`aJ7ro!z*r&+pclSerke$hH6ZS4VPz1rXyR^><>FmR)OOQ30i=8G z#o_2Pp^H|^K$T%bpLaA;`ws7RH9Sd<823-jwUo{OO2l?`r#PFYG%E>PbAy*}5<01lyKU*J(;9(SZ*m~vQX_A>bAR{%AXXi+c2nksCQ zz(>!@)~qhk*2AZZDH$l_hR?hKBr>%_@e7_>RpC*$*{JWbyyZ?#N_T9Tru{8jG#<1^Sj-$HqTt+>g*r>)4U8yS@1- z(LY&BPw-*9sr_X}jmxu4p+L$(xxDS_6E%Vh*3^FTt?FDB*g$+B8wEbq(Yp{-Ir30O z=;MXzC&TZ%n|&NuqNFc(CCPjPOBiUP#Y?ULvr`E;QY74jYNzpuH15Eds}VW35_LugJh-yS28vm zQH+a^rz`1d&p5h@rJ~^;@X8hE0U}aovL4>0wmo1sB(Qf7l6@nh-BXZB6|>J$eEV}S zl~%I4smx7= zc_J*g7cbpS>T@&m`h#A;iVsuM-d8tb!LR`F?_OP1HQ@Z_i%)giUm^ROR(j-UBXkq_ z$W!OZgrGv>ny+1$pPNgbTKh+0NYnCdtVBGpRxNl%CYlXt2p3>Q_->idGiMus@z)^P?`pygGgt1}v_tAypkOiM~dIUni>?vo{+I)B#{ z6W%3fXb__^X76TgyM&FHrL-ra!6U>|3$~19jo3?4pC~x3Lw1ta!p4A_>R| zn{$Lh)_GBe%d=zM@omp$AZ{ST6Gwb{R(2RO(lhYM zkd{;X%`2VUa$pP_$e<6~)pyHo*4LjgCT6*c9up^ak3Q(hGu104xe`~cvF68(ygFK> zRBoQml;m_D)*PaizHln7xZ@|@x_VlDsD`^dw$G|#sPoM{fZO! zzLT-3S9{LN-FF+tm^MzwcmIV-oVF_nkjJEO-{-lZ&DI{c}jFxFGn8W)ZYbziWiu zf<~$Z-7)WKxacuVZ|)o?`KUQu`*X-($o#*uYoIcty1R{QD=~@tMXp!mc;|!(_)_~* z)fIRTkF1{*{(&Z(a#8DD*tMTLxWDrIpO=qeUsL`vPBFL44G?rqtoTBCh?}(+&75yc z(J)L>$E6+3ASwS8;_C-2<5dd%COxCtkm!lk7&;alAq1D)xW(4JGpz~cnM0w^WriC2 z#%P;83d=vrcA@<0gkkddw3J*sJRrW8cZ`KV#qIQeinj$}} zqg_n_@y~Ay*p4yForz`Uh)a2t(KJf};gT12e=^72aqV!|(gryH*z@CSv4Y4ZbM zOi(BM7uaVegGszDk624w6>wpMCei-9`VB^&ZeH`KoZc8v8QG#1W`41JszdkUKG&~3 z@q^}9D?LM6#HJH$Dlo2iaARW*^1$~i+fCOddqgwiPt?}iIDj!d;PX2`$aP+CVge`? zie7FY`O|DS=DLmM6(a`0+f+vLZo0M>xPqMbPFcFBome_F?I!$+$J6COe9vg6SAp{{ z@x1FT@^-W(q7|*Cb*-qa34&fbh^GkfOaXg)isL=sN*37sgl5$}vk{(Wxur4-pAqh_ ztKCUxYS%`}3?aDTgzjIw%C-;mKYYRNUXy=nLMsrG|4&^&Bh{&nt5GLQp&zbp(#`u+ zKgFc3&C53k#t(OO`*_vH0gMe+9-}(Y{9$+sFlY=O|qGi0`0}LU8)4SQ=l0X73VQ5!uGW&oO{E@?x z>T2~*wd%gIwhMAcO!(E2RUF&^r-ODuAHLU(vb`Kx?v*7AHbRmaqHZMO1|yjdS4M<; zAMZ(X@RFr9)PreBf9n<4l83yO97dH3_apgGOu zwgfQ6Lu9vphdl2Jak$C+b+}c&U|%Yg9uBs{iik0=aV6)qxn@BQKD#tFATQvOQbAIF zlqFf1UE9M>=@x*P(`x}>-lOA+5>`GM>X(KpBrhLhioAZoNX|km&a1)2mt!qsj`Qg6 z%)40!;ei^RNR_R^ky|C@hL67=nzK&5`=*{g4OMD4-J4S++)K3!T{3rjJxpRK&4gkO zG*03!DmAVrFOeX{Q|+3^)hYg>wxOR{=dl_>N#Tu*Mm7P7PU^!mcIzTi<T47ZJ7I=wcO0<=EO~bZbDc9I3+Oery}Pt zX8DhU5Psr!t$uXQElMsildSuJ;N9S{$k(Zsic#q;oFWSiq|TI92v zCRSU85B!_Uey9h|`R9&qF+bqBwm*sSM=BVa-PD&HRzcnN=k22N4QtL@U1r)oz<1ik z(-@py0RfQ1ZZ;0c9XpOp`S*`S>OKCp(;C1bWFGf?t`XnqE<7h;z)Dw~;-k1s_L5Ve*F?0Bq56dNjCo7JJ5u1b?PoRc<=|_t5w_z2+egPFLjgB1dUe9h#7ysu z{sz&VtH-Fyet)(5OzCEniZ{<7-j<~VHrMQK2J!S~a#nr3-$m&cgP{#uI< zgJ?va9^u_{d+&(`F8RKbIZF_N>2z0@EFsYZa*dt=!&%p7XUxOkGt<7~G?)Dc)|>YG zF1-Y-;xicfSR!soi0Bt?Ht(b;7+cW7mi4a3?Z1LkRKLKlP8af9=e-pTdyoM)4@`c- zALFzCgp~J$Taxt|ia`WZors;OhQ45b`s%v=p7bVF$L#JV&8`aY_LYV68r+^gwOwe6 zHb}5QusHsE=!f406X0ODSaa>Xu*S+_(LhvJ1>N}+)*2Y(vARy(^)7XQjOVoEhLq3M zXY12A^5M_hlX%Gq`BOXdmc^;W`JoDe9ZTlegtUazusQu|!>>bEbF7|+<0Q6oK+7G~ zWC;woOaOsT_Yz1Emd9C_G~V_X_HN$b^bYx&SV5@@p&eJD>4--Bcly3t!?^4&; zO-Z=Z+fGV!#VSg19!aE*X(5Hb_r(4-1L8}$q{0At@c^@D)@Y(Xdj_d)Fn@voOtF?z;gawt@T9m^5(?-x+-X}|z zZY^p*^1$g7e)?!d3#_J<@&+p67+td30Gh{lUv%>E`%rTpfGZULaX)XfO^Ph9K^VEI zPXQsR4qycm%K9|nV00zT`altjuS71uwtN-KtT!}a2^$&h);eP&8G8VoZ}n=2v7u}L zgSr389+9_Mo#GQ<6I#>v6)a%Jj@jHC!vNykH7aliLSHG zX_9}n-MQ?g+0@D3GqPkii(gd-T4;8N@Gt9U5IUZOROx(Nomzt(h8oSl zI{oNc4;NO8uWy{-fgOO2#JFw#<35XZh~%cm%V#|{VMwe^{7X3xWs_C@^16Z)y~Y=2m=WU8C+Oy*a-0@w%Ie6UAaM-~aPb|?FHefzmynO+?H z8o&=?uN@jysM^1e3uR>AqAa6iOJR_~2+3`$gj<#x!brI# zGn5b`WSKA&lC9CAl9DNfn28BPmWc?-HufcqCEGB+Yx-5s-Sc{$*ZJejIe(pVoq3&e zUElBL{rP;^XbRl-J~)2FZl;f}1&CjB$Hz$#<9LO^s9;b^NIKSEeE94u+n#4mF!8XRaSs<(jv}X)fSa5@lVl^0OC*kcu17LZ@!8)ZB@O)Q!ca zj>P<5-6W9fCGmi0dTB6p09cIVI&)0oE3XzW?lPt3t2ls)(*1j%zLA#-SX^#ha~;9Ob_{pG z%`>4DDZ&x>1*rEi@*QM=h4@(xNpVFPnrZcbz{SSFT&cXdVH-t_jit`e1=POFyDp{X zlox&gVpv{l7=5X3Y$Hg<0+d6il4ox>|C0#~9qh6r`}%H2e7@_$6t)@VsT3|rc>BQ; zHF#D0KtK}u(3UH$x31tTPixHS^b1Hk3`!hlJST)+-Y4~!5OrLY1+)z#Fegiam&rCN zlhL`Iz^MV2x~fuGmBE*NJt1Q#9ef216bvc4oCZ`R4ZY~hAM)ZsSj8~Bd}aPAC#%o# z>3M!doe7{GXAbl3+Q7Amz9%hss5lqw2a1S? zc;%uGn=Ay-uy`8Z<~W;?q+NoH8`o}3bq&HIX;stRJ4nqpFL!=Xxnp-NweR&`&~($S z3m$&)i0suo*;jly2zx$>%6%7%h0_EX!8kGAIrz6^BhGwDgahM0x6MXt_pOS z9$t)#hruM?2?7n$#^bfpnYbdVYTdcA6_T8+@=J0VnbPxkeuMFNA+d80|ZT z>R=oWcqXW|L8DVVVb>;vr<^#w)5&i04_WB;kW%2VF_@EBj9Way{ zwuQ}NLTlbjHD<%Mk0^$#CIrmMgSmtt^)!B{W}pE~C~6m|&bw@ko}Gc)J3gG&b^ho) zq{s5`*$C|?CY{~E%3C2}V%9D)rWL#;jW1I@pFIai482JMKXm8G&0OachUk?ZX^Z8^ zHo<&jzOVILzKS$>zMe{(X4$zIDYzym8>`o+Ll%j>p@JD(HcnY2E8vHuE=VmqGaCVP z%-A==wzEzo)_X4ImZrZo+GdQv2b#bAOu@)3M27-ZsPyLGYO!ONCLxO~O znw2R@vf!Spo;L?3@9Ji?sm*3EV|=sw9o#FyjLiEJ?dIKa0uZp(L|rpbH&^j_`=nSRw2mbG+yP0WC( z-{7kJ-2wvH2ajaEVJhecxkmy>U55lztD<%;uePZ-c8@gTBi@>~RZ898XQ z2Ysljv?bDUf69lSVtWys<)z_B|U&n=eWajvJ(&RXS8==cc3@21)=i(#B8C5qScd_x7F`hQc*!Z$rQE zV)malpp?YG&-g0I=s%VNYY}=|#4Tvhago`qbONL@fc|mMVaAx=Ih&HI7v@V>Av-MZ zt>`%P&P5+Z=r(I^>;o&A$3Aw&E*H8KpZx}^F@#qdASlA89Qi6+rgJWls zQ5yN~{4F`Zr1ic5wXbg9y19L=EDe#Z}}mGHJGB`M9=TT*uwy{myNGldlqz%(v z$_8tO2=7*K<5fTD`ujE~JCWuQb-q$vW_@x04=TB7? zPZ{p_on?88QUrD1kq+|I(gYpbs-QOzN+x0JI9CEB1uy0zr1H7|`tBXtTl^#>VfbJ@ z)c9C!uY2D*e>77RnrieFpqJ}O`tdT#7nisQwsFU7Dltnl2*-v)KScAn)eT8B$8bAm6a%}3oO32O11r6hOj+f!aIKp$ zCv0=phw@}?Y1pexbLsuf7}48;443;C5>?$jzjhMlg& ziVpQ)a;n}4XRMUdYt^+*vECJ}oCd-i|0G!7qE!h`uu^QauL;iN9YM;kHc7pkrJDV% zq7sLKI(ScZPr=7~FruHmtMudJ=smim5ERqYLjt;*5}Gfx+tnXQ3|C%qHa8ZqNmG4O ziL{()ce=3D*u}ew*}dOSKLIICcE~X%Ok&PHNO%F*Tzki=Q3J z2{!cC=1~ayAXhaL?hW%t^Qx!#!r0X=T0vJa{kMYhXxuEr;^3;J9%eMyQXeoj|*2lF4ye30wP|U zKJMmb{qxcO*yV6>>sbA96_G3Q?n`=(FT@DNlW?COH#Ly;ka*YV8`FkU=v(1>X|drpgFw5k+jFcX?4H_ao?T5va&wdsca6 z+MfzSn0Vd30M2k&{bgfTWa=LEA9?W^;Q#;t diff --git a/megatron/experimental/gtp/images/0611_ddp_egtp_orthogonal_bucketing.png b/megatron/experimental/gtp/images/0611_ddp_egtp_orthogonal_bucketing.png new file mode 100644 index 0000000000000000000000000000000000000000..2d311138e8d9dad68b2b05ab0213c86a715f9e46 GIT binary patch literal 187192 zcmeEu1z1(vwlFE(C0$Zdn+EAdX_b)Pz$PR%9ny_bA}AQ?W>u_+0rEqWv z2B;SRNb-$mc5rYi#!j-@PBw04mevqB1|I2?Ck7r_29P8JHwgBZN7~d9Z0-QI0e)DT zGVsVSaC2+lx}zYd3D&W`=B^{du4t$t@8|@8fgZMC8^}o?HLy8^oeKs%kL?Vnjhnm! z*v>*3Y6`IiUNdz&c}Y-!?*xyj`w4&x#4m6HF?To{{TL}X2nM)edFOOEE?}~-379)u znnD~;Up@gjL7~=8mUce^O`x{65EG{#I)WV>psqi5GlN>6e)H+5cED%-@DdDg!vt*o zGgQmc)XCzcuOQ#)^kBUeAeQD9XCrfgPFrliXPr+0juv23sO!o5ry#H?*v`q)1tJZ# zhB};16zuE-g?;DGt9-WB4p1mC+BuS*G@SnzMtNz7HH;Kzt9&{n1Gn5i|L*s-a)8*L ze(1k~&dJgVSl(mux`3^nPgj~*mPP)i8Zcu9X&we4fJ;C!GRHrG83WIjj!vp}09S$- zWVsmxK@38?z_T?J>IkuR2cArz&UV%ifVMC`KYNiMY-7j3BLzU5EPw$)rT`KLOJipz zpd&X33aq^iz;F)0v!fFjh6R`aJJ{S7>gZ%?axy_jC-*Z}aDY17n!<*V;R2AlT39+k zZrFiMVAFR6m>+;yIN4aAHUko9eN5J4g7a|6@yZGa0bR_Xwod2Za>sv8;iufs#eequ zbZppgU~5ZrTL6~{KmlOJJW?(Y2Vjz5Ysu3F8%tAD*i5B>PlQ^XuoTeW7HWG8JYzl> zu9Gjb1OGwC`k!#FBn+J`AmkuN2tWj| z6Aafe>&wZWNWk~8U?{({DS++xw*%}=fby2M=D?E~*u)8@Jh*|C<8cE6S^i#^IYD9C6h_t?=TzYF?-K`siwD;J zql%T0logbd2H-y`KtL+^r3GcB0JHZqOF07i8xW(Lu<>MgVekIC^&}Ty%8pn4R8b0@ z{yfJlJZD{gP?UUsrly2(i|kL_BCD+?d;R7Q>|*I?c%mN-VVcg``q(=>RZM>szTuM< zJfZy$h7;f$L2fA?ewZ=-$@u>He8YFf6n`teF+OuNeslXyA?Hj3;I{(aJm+{qb=99W2(*v@)}_1)R1!5JMYSl;030UO?#Q zd)NsU-4YUH5ClFBh{oCg(O9SB&=50(1&A8~K(L!7(Er~ihQHe+2ubn_9J9~K6#jSp z{ogs1JRrf7QP1g7F78vm;`!%e z_~)bl{|;W_5;~2!eBbw<8ohIBnuqsSYWlR_pQ5Hs0eOar^f@*C10x(~Ku?@rb|6jU z1WOnGf_ZLeD4#LZk30i{0^;&8*} zcFth@i#*3I{mYQP48IgVU}S$0n*GB-((xwwG<^W$%|C_1eih#T>v7o6JF2ry)ERSF z+B!La4NdI~P3#O^EuAb3Ph33a?`iJW0G!*&gsGpxV2FI!d1Y~#Ee`6Cf;5$@QRZb1q>0aTCdwx(Y z|D!A+%l`-Z;%xKycehbIXN6IJZ6^8IJyixQB{=LrNdv>N0kHu)o|z|@ha+iv zT$Xn1(VUd%oS35TS_SCydxOM$5;g@|z(8gi3>2DigJAmRL?IoQT<|jp!I%sNWrl$P z?@CJGJ5HFUItjo!fNiaQ4!`~@227SqhEG=DS32tZcIoe>qj&`Wn=aB>(ZipjqaZ)~ z+p@=1IsmEvfx~@Tf%69*_i5{&?s9+6O#j`~8Q zIu7P^A2Zf6np#u1b!h^k<&;cj)o19;Va&$F}PKajy68+2rrxdP9D8y}$cD|2F6QR4e`D zeILXBzx(}z4Lh-C|2<3gH*vpBxlSFXbIJu`3Y@DsP5(e0oGhT`P#~Qs`x{j1w~ZDo z%X_Xe9AaxK2|LOI)QuVgu|}(3?Wexi`2Oq>|6!od5$f!4e0&4waTcO?0y~&PfZZvq z7WypG3Y+=~}+)?nB{pL1CLq&jbiDfg+L@i(H)DL4H_ zsg3^|(dLv6KhWm%EB>=+!~2V5_;1=v=C7#JPU-LiZBD=9KZ-Vxf4{x?zSa4^v^QrV zs`Iot3p4y@(dOT8Z~kT4{KDRx?au$RX!Gy4H~%tieqnFUcEc3m~Oo1(3((JF-CT3YLt59ee{WTYyzi@Cd=SE&%MLQUciK z0NV_Z4Fn9!jGdNUoSalYt*kgXdwZ5f|9-gG6lw#@rX3ISL+KE3DE=G}$jbs5+3#n1 zLCip1^l@JPxSR;q`n@(2NZi8?b)G92F@rh)lRC?EzzzzZ)qlbsj~l<^HvQkv_5*pe ze{@RkXG{ElABOz>i3(sHPA+fwlg`ic^8H*v`pX0Kyr(kwr{vv$n-d%WRtL=I`BVvv z+1XE8K+Gn<**=z&-0gA@0i=Z|AMiy6b#p?WWZ#C{p@i2F;d{x2B#Bo zIu2+4aKQdI<^(Vt%OI>La1h_w?j*HrdwhoZB=h@S4o+&6e?atI>ds^VWCc0Chyn_e zTF9xC!*a*K&*RC17>KaLoiz z=k&YZr$Ym=We{*(!fD;o@zpZtFWUg{!oKaq^Ez)*k4@U?h-ddA9a|+Qi<6auSRbz? z?C{OWa(%zx=zqUt{2!I?{!jXBf3}at$9L*`oh!}zrRDl})JE|D9=qHh+!S`)_QQ>B z=TjQTt!H<{oiv=j0GtQ_G8nLn$F4Rvgu}t%!d;b-yy*r%ua9CxC`;m9WW}+5 zS>t(5=)(ta=$AE-B=HJq@2m0BT!Nq1-mV=Yl2wqHH|9juyh(ToEfj(O&X zWM;JelGj(-aH)4BuwMzpho#bS(E^hfCpHp=*OpA+x4}+E_cM0)-ROACg$O18*qRrB zw#%yU?b&?ryQWbLzYu+7oAh@hfnoY9cdIe#Un-%RydiWFCN+M<7~yv^??^_RnF>B1 z%OAg{2C+jrh-aQbORw%t6C$Hw>t!n%*|?fp71arYXN)OZ5!}Y*JzHWycf~sk|1i4}OeRVrfZ_*$+Y4Dv< z#g2}C1T#vfW+BWJJ!j&=bvqWbj@U3kzo9Y+?r)>QN7SGN_}Ye^j(gJyDKMvZb`3HN!?TcGNp;3J@e5$jXo>yg>>7We~zb z0$*lTl8%~8DbIwalMO*e*Tr-T%^ckfbS$9^f~1~&zMecX<=ujLT#Nop4&}GoY;SXX zdg)tvFVltJ)+*4OYR|rwrEn!fQISmeHD}V-BK^nHJ~9!X?*jDmbBL(ve4w}~M;*pJ zKbeH;l3kzHgb_~K7;)5CtTk@PD}`O3y0~E6b8+~EC8EQ{!y7B<-E$oys1`#Qksr_~ z5P}-%av#Hsa|bDC2BMqKqI=+m+NgzT4)XXVSC|k62~j!?7Iw3kKpdi-x)}~wCWFE2 z^NF2KYb`~hq&tJ0BDokgI{YD3p4`!kDV}Je_Io|AM<0{Ec3Clfe+B7Ewg>q`^IJnhZX->pkgBzg4^=LyN^JPr8yf-wRk1Qx zO~!FQ$hS=!QiGO0v@K=!Y^h{E#j@HPyHJ#=QgNTr(3?Zw)1^d8-P>#9qwh{Vb$h0PWw?_K{n8@Ud3+Y?--+h&TcE{F0Yzx(a zrTS{yD=%`x3TbfAq5cEnb}}A?thJ@#8c)i$vQcwa_WP8(Pki)BN0)|C+N7!??uthY z`IvG`GD>|kaPf{p*g%^SEY4Tl$Y;Hk&7I zf0Wd3-VNy*`8~s1ZR|ebQqSt68_;eZwQf|U$0tIL+8r)HK`%L zvDnY;U?c-}*~jc;y;+kOO$09L(c=3?7t&{1?8x{^1w_T>ZW|VYk!*!Eo731#?0Ee7 zo;bo!>FWr9$vdSB@-&_v5VNrma~101AX7s6yJJ6zO}`bqM#1N67EYy2^UW$8Md+BbbX`OFrgPLiwt z$o-N#`lx%Piu}kSB^#7hhYlnGJiPRw&+;0szFvEnSxbf@tKl?4`# zDWIxS;ZmzP8ml zRy!DRK4}m8x-4%Pa*G0uwgn_oA?@v>G8Byi`8XPvCnXfM!IP&pGSk`>t#}8^xJ(+Z zIb68dI&Zevv{AnG^>EcTm{qiVDokp9@ombth<5(#235MspBG=}Gk?w_R@PdZt$wJw zD?i*x+B+y4_u-BbOUH7^1LxspvJzLe9-k%p@pd}skd7mnG zhb~;PW0rTvptbm-G0p06YdACZxkzl;Zp_j*36~d_>)sKVD$zZxci9tKQzbfjr;*Yr zP;I>yv@TxE$M-_Fe)2}y=s>O&e$dXhpv1>Srl$xSX!xOE(GUsL9Q1`nk3upQH1))Rxj_Vwjf2dBj^T=%j`7-cc*wB=)H(SG z3+5$}Q?zMt6g4+K+ofoIm!VqyHGO{btKyQUEDuQmo6InDM_+btHd_f|@ToeC+djed z0QVJAXF>`z^Qvp5!Y!Y}j8$r(HT73WcD zS}dLRQX1Q?E+?`plX!e^wbZfP_-@-#LqyBv$)&utFlVv`O8KCAAw!-cjCIl9 z@>!=kNmMU&NZ*iuutCKasi?6<<=CXYwnbg0u`m%u`6UjO6g0S5lY7nmaBZm)N!DoM zeRE&0qy9KMxv+sC)xo!n8)yOik(%jBv96L!o0qxAbO_wsL|EE6BmrXBOW-;^@lp+aFVD;Qahpg7;uzed`T9dKnaN>FfY zcc{Ju-=%)Re%^(m8uDsc7+ZTw-5`s%bH`cy-+){FNQgVe??47tTRF>LY z3n;3o6u6P@x|a6$;JrDs!*3X*iTDt4)nLWcm_K-tMs0X4J^y`%p}25&NfB69UUbNN z*$lxf)V2KseO@X8V=_GP>@p_%js}f?%J{clajgS%41w$jeF;SCXb+o=-d;yfT%R^fyE@y3wT z$0i~*j=EX~Kfl;bUh$u^DyH1_-Z4$TBDS}mWI|XqLbT4(@N$>Nl(vc-+|y6(vo3B# z7CIF{o&JERcWLDgVONIOOfNP2W2yV)m+gJO+Gr&z=8%}88syel@m>tWEzUW#my3H8 zIDOE@>i9`;=a4=ItyJ1uVk!+gRZ*K%)_~?Ur8kWg?ZrdlQHxb~(XT8_#JL|QKEPWu z?yqc7Jy~hF_G(Hm)b5>7c9&~{4<|_6(eX&WuPUG)d7tY-{cw8Md!fXN?($13=yi_B z?dhr&f=fEHUe|b7b@@=-Y$K<4GKM8S>b4V#_=mbNy7AMDm`3a@tx?n&zDdQ=8D%DD zM}hdG+>evZat}VNFrP#g3#h2y4zXWeOb8BHZ;gLwkU)e3-)?bhWj5c60tr3@j2oFn z(`V=xNR8@`5u-2M&jgL~c*un+rsQz{sR>4VBxwEy346#WJ#!N!XL*cD<(0>~k@_=h z;`kVvm-E-7N5PMZeeb$z7Z#PetmK=rl*OkGx0ko1O3Kd161TL>^%WiAdL14>TZ3xZ zI@IZiW-7rX-BwxYv6>C<-n9j!wSWS-51?)#d{4cVC%wd%DlRLA3e2}2HE+nT-zk1x zm#V2e`z$J$_qncq8JAn$TXQaA`iq9l5uq;;V4d_FkQ_OJn?hs$vyWR;jnPi1=Jwmjt% zflKqd&yhT(FAhBe&)JtPQ-K)8a$;)xQt{*CM<(icisb8`%>V*Yr%3d*XbKZ5V5d;69l{WcP0F^j zsgF^&qC;*(ZZCD&XpP-z9&UMyJIYLQ!O1B6?aheDOz%OR&Ks`btIDQQmg2jlt?;)h z?$v!M6{#?zYgeVitls@%+7p$kPQuaIT~{KjfId&>zhJU~0ag(U7wq)%g* z(&kd0sHjZQvoMI3OewmdFf8NjmJ`8O^d`EuIn*~y;Pv@$)!?WNEgy>3U8_$>6N5?b z2eKaZ1Momcm6OUA|7~?{y7#vfHq(@e(`;($-^FVdP{^uj=OaJ8l`3-<3&0P9)(+3? zX+8;de*VmXMmXRdyP|!g6~~C;$d^Y1L&AFlFJ}$O%DyXO*6 z7#6zP*-Vqt5mIP`kc?1Jb1?eg>!(~13szHFm8B625dSAU=(6k31?%pZHO2MqaILHlDfb4Qhd!z+O6~ zkNJX2`Fs7Zx`HY)5K)jwmFCf<@d7C)M-Dh}F9@J!%yEh!N@mgJ!3}wS>kriLhdk?z z-kA2mY+fB67v0s%)q(gw2!lra-OSqr8vv_i=+H)oU-P`v1cK@DFo+Z<6 zFNzUwNBQH(ri!W=zaV$H7*`-*QzXCE4}JT(`jv?WKYeI=IvrBk!N#x{k^eM45#H1J zP4WwvZAZ0`rCek*HWeCdMj5xJ09*GC*9nS^(3E{#{~mM>wy&-pgtz(@UiXA0;HGyx zXLh^L99%dK&8oe(Ei&ZQWPiC0r+lwuVDHtW$WwrY%S-e}`-7U%_tv1^S6#)o_$l9S zQs|0TI%3p?+_u}N#*oROi0m|p9-nmmOwd)sIr@C9hlHzO+?yuQoOXPld?{NpoHg<_ z-A6!e*QqUO*7<88Oo_aCvaB@XvI-a8W8Zi1PHYFhnx5i-l+ohby(lmtO$fLnKQ1X_ zUrlv>x8QKXW0F{^BO-3yyz!l6+G|6G%cCFm_IvK)u(BjqLrN0VkUCOELxN)NVi>Md z5;bYba|u0h)UUV`gkXQz@8N5wbhIS8H_GO?TkG*0nN;AJE`eP1x)z>SVdg3pvZUn& zpBj!pHBbKAA8)V?XMRa5Cg#Dj&5kaj%EA&iXWD>tme>LiRUz%RcPqGu&38 z0)u%!-CeApaj@#NiQ8qIR?XYJx$u5+;!b3&>cvjgp6u5bgD$(?q#Yt(W@atJc>mdR z7tw3SBv`=py4NygGHc(#e0Gp0UcbYZ<5EYWycUbUo~-wLKF9i++HU2&`ATqj{l%S4 z67f4;=ww9|!%=WiPH*>B>gV1Qxlwq?by&yaMe4W2`S|6hEggxi&6lrjrmJ7i z8MX^ru}rB2zJ&7+OmQb^0hip!)(jCqtvZ-o-o|OfUt?Mqc%LFsd4=%EyHXTdUYY5L z%@pOKIcTSYyr6tEBm9s7BvvA?TgR%cwmN#33$xkBXDxDT8AC=JHRbJ$#;bP8ki=2< z{INq_&XtXrsH%RYZfb+km=(1b`Q3I|UcrYOwQ|UW4%;RKBasZ>Ub~l@;~}i2g}FCV zfS;&{AcYeYTxD_%(`o#)0bl6VJL=lvnJmfnbPE;Jf6%%)6V-)T3`8mNE%+`Pj*^;o ze1II*qP}kvmN2DcRyvPc?*#-h)_Gi~7Nm{Rx&9I`yG-v#uO~x$w_{eDGcU%V(2B&= zXGq z9$ez0YT)TXnaIGnr=n(?T&}jOf8=(-3~2yVK`km^j_I}6=JIyrDN0Ur4&knOJN&Nw z9Cd8M(&N~)gJGM)q}9jN_b-_G9NOu|+I)?E{z3TeBXK3>{CI`&qy?WX%D_0C1vz&? zmuDi|Z&)0eB%hgC)nJrI^fjVBER-S#Wxyqix~&?E(pPkcOS*QpEyZkf@?EOspo@&E z#eTB4JvW=fe__>`T$G^N&S0b2l+Z?+LhXw_0cyq0CDl87dgZ<<)r%Z;D8ZmoI*J3r zO=%(bD%bY37TWhIHNu~H1QT5IaEJB?+XGPvs(iFZ15%Q{@Y$p?W-RNz+$7J!RROjY zU?!?;5%0cN0a7;NtIRCO9*2Ib-+J?l+sT*m-jaP{13jzauj0H#AfcpPZkQyc)`v3i zx+U(Ttz^G+wuqFpc5P8CmWP|Y%Y2Kry@R!g?aoeu>_%<+WP*=Mrb}~SF45FO%22f_Vmsw7MtD=X-+ z6ydcl)t5V?Lk&Vx8^%ogZsdwHg)d3D5g&>5En}obrmcH;JayxHI=tc9{b38q0Jpfx z-A@ZcYj0yq!c}a#M8E)xNasJ!01tOm&nNwRn|e0?C~) z;mQoCtINtN4&Kes+iP?clV}WWuM9;CNA}UySi*TU-L8H}pP~@th|)4+tFvMVrnOl0 zYGYnW>z!#V`P^9}M(H*>zualcHlVwP{|SQGUbg5lsz2*fFIokK)(oc*i>xt%N4L62 zqjtBmG>a{W^0Qnc(RZ9CDWeF8V?vbNwSz{3gCybsIaRP?+RK$L0aZ&ZVkNxNt~5Zo z1-9wy>$cdt>*s6k;%)p|w!|OLLhY+#Y;68|v!RC(VY=?E)JE*bDlWHuzRe@7tX803 z-)MapEAGMyenv=&Egk)p{TnA$OWNyObqdI_tHB};?)Yf_%ew8txQN<2=uQ*OmyOqx zRPhMu+pq{?T$x!4<6^(?=PxO)Dt_ij5xtKr4-H(#{B*Rq!EC$fW`TlAkUUs1TRuC! zSdJUKnBFS2SThV%3X!Pzv(;&Ro7N<@CT@ zP8v4GzHwa_<_i`m0~=O%BE{=%nJ7q#2I7@;UE;Uh(3Cnj^BU@8cH$*!s4ge<7ZZWE zcyC_G)y-8adV1rDz^{F!>Fw+5qa_G^nw!g0@~ zMX^XjXDfmXmutOZ4!KWc61&Eh?}=$PXY1Kh`PpS+yQ9*2<_=73ZSCObKE#O>9}^X1 zTdXm8Jd91bFClmxjbJ@sVVo;w$w$`flI;*dLCApjg!=W%@76*bAd@~ojF*V9Prqn? z_EV1143phaeQQPIbf1Q#ctCYhq0#CG&abil=36*D*p0*Y%0J}hX)j8cse9ob;2)q8 z-t*MuoxJW}5u{Xe6(uZ%c*qS8_{^-!RQCd2#g1)0-4gX#_rQ=>`l8sDz~IiYIU9T_ zZXOMxZzVCG>#~F$Jg1d8dGZKWeCkxhTn#hdtyN6MU2ar)&wNb#z@lUOmiNl&QRXnM zx4LOFv1fSckvj=1IIc8EqK!mP^}cTCu47Y5V30+h^tY{0^Um8#XoPJG(4nI)53xq{ zu*m~Aw$iz#(V>*t+)3)e>85SBz2bnLxPz7bJo?@$RO?&&U%(8+OHZ&xyNh`465y%D z3tmgJ%1~bX;yxB?<%eCd(T=M+CxN7{j*0yb|yaYD23gTkZjuhbwyFcsSVNY0H)bj-^O zJU8#!2<+vO&Uv@0(T+^FnTbAa5@%b#tm-8^iray{h_R7PTP|V<~*S!c6S(8-20Bhrzbf8XKDRP$soF)~PPpZeo(_1_x`D z4suMlC-w1O*Db?`t3@T;vNYm?I*UdIOW7s{9-jEDS z4!Vapjn$Db_uZzA`FL zl#wIB*V?N|cU88R%5gX^^b^%3FNl_B71noQ zN_zoBKa|zz1Qix8yeAV4J$%hBW;3`F_vNbe9-F^Zz_uT%{Rn4ZsyseNfQIr^@_LnU zfrxdM^}$=m`6HBfd-~C8y2dU2j6QgC>7Jbz7y;!q)ugES<^iG zLe(8xrFePNN$?ZTQHss0d;~?iwWVy$uCCRoXpy+CR)*Y$j2!+2CJhgwLooRz#% zMAzDV(C4p7s>{;EUZ)kocx;D4Zxszk;4Iv(<3eG`&Mw#Zj`9nPa6l7Ul7s@Jo4nk8R)XF}(6orabJcZ8tXXt27TW`_!{n0{5ncodurcgYz%r%$AmERWA7U|y_eP-0dg_$O%nmX}|*&6E(WeOme&S+-`GMo{?G%eLT&UY9*MTA5Keb|O zl0#_CC#nX&SvtLSyB&ja<5OIg{Y2ZnquSo;j^<(OH;KFpt!q8D#(QR1&r-H5*xlfl zl*a`7wm63*r9aNgm`7A6=okowfodg;nN2pwUpTvKQP?2jjIvJj;mvqmb4z+xq2Ml>v6SxR;ipuIpJWpz_@s2jhOUfKsBE+kIEmm%8>lJEEWL*|!7k z-4>5zpu?U?T(s#i#cf`{r=y0xu|uy~z7M#k-Uu8puT%|+E7oPAJW-#BT~~*@ToLAK z9=P1XHerG%7Ptze5Rn`%HsZKycL)M`z`Ev|A+K%X;;()cpq&7JgmT%eZvmQL)RdO- z$3`=e#Www8kOhOW#c^Hy4pa*OSA#+hthYbjH%M+%uFNp{;eiN=hr2vo-26`9ZR(C} z(X?Ts#PXUwS=Y?QIBTCVec3+A@!+I1Vge9<4P`i6*+$eBAV&yCx8p9t=HK+v!3tz(j6Dj%M9&)DnmG_r|7aBQHHvDvlxCFd5%VS4Z3gshjflug zbDuuY7kI)I_`Zhh*$$KRzsv|P7%@Raa3pFOc`2=UbX%(i0nrURX%&_8vd9d5MX<>S zVO-JC+Q~pJRL3i&>^fU9=Dh^A6jn{~Q9>bE_v^%Oa%bJV$2%tQLN}@7h$2|hvAKC~ z3irOa{3>R2NmA8t#o%;<@JBnNL-Ya3g(rNzaZd?w{l?_?2?{%z9Aaqh=SwzyiCBkn zM(&TcMIQ2cSSQ}Q7gffLF+8+YHGc1E@kV)`8bWpynZ$Oum>B19ld~I9VhlwQhWHXq zpnO@S*~c5aBU7?fTeem}a%P1EHuJOk-gXI;o)<^oH z+qVK;$vVv%8qHU$fmCx|REp8aOEDuSW?@x^+M39eceO?VDXKgDdkG#F`#>F%w+zIN z?qp5Sz(?L<5L+k9Lq;tf%|>2)#X=k9QBAnB<%8mBPo3bNwL6y6K7LV0%{8pL7G?YP zC2xO(R^;b?%UVN&67AJH43aN>sa@_=ySqHSvKj1JIfBA~RKA|zo`$Av;z}NquS%74 zjZ&9ij?g*ph(uE%A19NBeQY1LSEeI~d|=bMd!rG{Wo?+d?Ze{A+?<(yB%@LEj@$ht%YvY1 zH!`>@%NLp6uD$F*)r95W(Zsp@wnkz@hOr2kV{sg=gi$ZrmXGQ6G_Q1$up!V0C*Cac z{A^N4oCs0`IXVo%q_ZZgCH6LhTtpsW(Af z&B7Un?kEp*-IhA;g&=E&N4aI=S0p4XX$}iAv&*HmQs#QbkL;MRyXKdnw3c)WCLo}= zhrC4D_N5^Q+_TO7d#}bHlFcJX9gYbj-=|e`Ss94NHslH>&G8vQBC4}9y~ED=z-PDb zrXZ0}c>OBkcwA+E3r2SO%?28QioLL^Afxw>hrDKp>)O%5BzPv=`%!#AhO$>?f`w>} ztRe-YYGwQW?(Vt?tN02#_J}iC_|AP-&BO;!Fp|A*FyKjbj0Lu#jZ+Op2Qz_^R$9=n zJTMRSoL_Hv-j3?Xx(!|o69cpB)cW)b&*5c$Ly{S>ixPz=cje|sYZen&yG{GWRvZDp z70(!zB(o?IPx9i|sE>LN*8{IOu(%UpudvnG;uA_Wl!@}&B+gasy0Os`p22X z^M(v-ud#}0`Uz?=ib@trch1hUFJWm8JE~!S zT}Xpid2z(9cycwrEJ!{GT*{$rl`=N+9k=MC4HEvesj9b4=XVq>hbk#Zbz4+=0?kL5 zXk-6i+$3Z!c&B~OCF5nhe+9PLbXQKl>ND@VvLcBhh1o&=k@}hq#WA9hgTuBs#3=2z z>6gr~6JEs|ed*r*8jS*C{)A$qk88i*O*GjQXILw(NwJKwssKcv3=JD?gQtM2=cSuo@yvI3yz?9JH&~s>6|ntyu|1*@*+=G_xR;lQ%0D zRE(otx>0JJ){aghmD6hq!jnU46Td%Dhj3BlEu0o@;L=S3AQd~Vekogx>w5HoZCVSZ z$Ak1;{nTAu8)>RXO0TsWsDkb8M--2;NgQsIpkj`oW(|Lxg9}GA9bbDp8`<1L5;Y1x zRL~ZtUN~;STO|tTk$M=t`icBn6OG6lNp^?wET}6fM&;)^e$#qJgG?Eb4qeHrsc=wO zVVK7z))xM((N3*Q`En(m)e8tX66AwUNZisct`FM!OzDG19FH4`49E5b%!6o)tMUPE zZVl?`Yj+398MzkQJw%YWe+S-@&8JP~N?*#ftnqsRHh~GU{j^VGN!DyFb^2dmk?{B> zBSfSfdPAmm9-}eRF}_j>XW7!I*I9LRMmPJO%-utg9qD{XoeM&Bp6{z!sqPiKysXZ$ zhw_wd4{iva;jsQW6L}!LZr@A4K{^T?8!>A}EY)plu)~1Iy60`ztN4{0J5iskXD~H_ ziF&zLcvv(^AEiLXl5kito8Ph>sVQOJbMrxN%@Y+1oR|?#dF4S=_;t?YAr^r+lN|$2 zxS$k-p{9WteE4as?(yu*GoPdoWiJ`9?{*sc$J!FHoUXoyH*!6FiW-B8Zt6`y)o(Oa z0`ayD4Kk^_qx5_8qX$rR!f;N{Gg<)Pjnoui2Q0~(KyTSPf?}C+{ALS4@m2%gPkhADySO08VvmhQ^6`Z(EDXk&feWww@8Y}`IqrRP0VKnC& zDU2~+j`1X|ZNHl84rgEh&X=j%N#V`b%?KAyYxBn ztGtYQNYEeqB#g*auBcG$f=sF0sDxEDuXs+NtVn1ks8J0NbHZsEVR|o5D>tlVt0iN& zVLBJC@w(v;8!$i9tK*R}a8ucQyB+5zOVNP1Q9Vk9up}J}-mmK&-0Jtfj=c{jvg~ZM z?q$@wN*6VNT&*+dJH29Elk#YwF8a49ZpIw!NGRNjf_vTLFZ6mG`u3H)jEdY+6!*YJ4|Fh7VC{*d{Eu8wkFwlKI#_eX7 zRl)-SgkKT2jZg|aX6`624+#wqC3wQa$aF|{5<87ZO;7Z;9E{pIIE?se#v{|J78qY{>g~PUMxTUSoju9A#DUYVD17*KEy+YyuQFtLI z#K*IA>f>Boj$x`DFoNDKr-)wz+q?M}cz z>&{9d<>))oW=(^J7ZA{u=wsqX5%pjN_+_! z-u2#P-ins!>hK0EERUf3gKh2&40x~C5^ZT(2iq^0S( zj0HBoDF5oEuAxg=TzjHjAX#&~eFced);*hyDxg)1^Q~*sg7i0 zF_6fAwcEe+$;`MIJHRw&CCXJq-+M;}PTn`w*YDb^n(Jb^H>z$2;q;Fju*VDDzEyR5 z`dNhcAr-~CKD`|qUdO@WOhyC_8+aUUX>+VNCd1HphZ%lNfnuq#{Oz_cQ&ppy*zKoQ zX?UpNz<`M3>0W#YOb7^XvCdT}A6GZ~#h}CIk0jrW(iPAI%9sViecxFASTdhz0j%Jb zj7R!g(c_m>*>F4Df2fSk@kK?tX{Y>PHy1>-44%Dgb%LPgrKCh$`fs()H7n8*ts}d) zX^^#A_Xkp<44s|dotw^0BP6Guv$Akso-09Tu>g9zn|Lka+lIl0@DB!QR<-ONPHzfO zYfD~VoKV>9`mw^FnFq;9?i8urB~*p(gRMnQL-)8f-^JT_!aWQN=o|m<5vGW*ZBaNNKG<3_JaOMSL0!^Rfe5RCe>9S1fz{*GIf~m!Wy*u0= z2A4l&{`P%mXACIBk(}O<`NfdJ&ks;-kN19cE%2DMX`&rg+;$;U_*OViay$LrLzALsf%O6AOa}%-tsL9xkFUuA2OG-~a}Y5E_NgPf1L@qO+Ksq=>V) z?QZt@IkLS^zQ-iHrtz9JlC0LZ7xO3#0;&FO;6R1Y@P;yBaS!jF$SrCwyJj8PsQK*? zt`1{}j{V{n&}$PB zkg1!m$^2HASV5U#0&sXD!fz4f?3l$5%MdGs)3>2AlhL7=mLjDZRE1tiw96`O=!Qa) zjEY1{_Zn=yCnYQ=*Mf@_^pP9f>!e-Vt!=?}a$e&HM6d%KYWyNb-;zL*KOkeC5iSK# zapTi+z41w7kk-9GYl1ufB=YdlKAQhavCkiJ6?SjU9;W~PG)Ss@svDJW722ype=+n* z!d;e=kChbpfL#%gi?AK6KywL!7SR+*>M2b*rL8h>E+j(oo4bTb_RH>{&z2ai`05V_ zQX;_+q3&J|?a^w7xP_;>%Dh1(^_?fJdvRC6B$W#|gM#Vi%kZ%2)_C5J^ARj_gS*0v z+D^&X(xnbwC0tq7Nc6XAl#v3)4Y(I5Sz?dPi5RzC#wT|4Xf{zZ%ZkhBI%IVsYv7R& z>EnqjM^ks;*RI@wP(`>Sk8V~ZCia~uD6{=?fxd7`nP0w6Ze~QUYQ$|TvD;mj~;HhP_l3|gS_TzSez5E z69+OYmfyY1Q7y-#Pt3CBY{a~lW0r*_JEd)3{A^mApNhFgp8u%p-Hr(}r;|P(HMI%V z$QZg|+U3$i7H5?1y0PsQiRw%8N}gkcTAp>cwLHh9m4h#BZg5-pT`42$+3hEa>ximH z-zJ%yx_j06;~KrZjOm53#wJz6L=sJR)tKF4-Fp4cggmCZM1*)ixU)*{s5${V`7@VV z-^g7yL zkDBOh?@sVP_G4xg1G{Ngg^rsiTdRg~KaN{8JF?D>;g^*QUWkfN-A}jNU%!DHwRR_z z#VGm3_P3Ho%;ceAaS?tFFhfYt`;?a&ciJ5^7Ah}JK4f?p*iMMOa@TBpJ8&xN8KTK@ zBrAu#)77q~DPHQi+c)>J9%M=m)Gb^QVxrviM%23|#kF$RYx}XyBb90Dr#vD73m$sX z_|sjXcyi;lJY=tgQQMiGKXk@E3Vlb#6gKGS=9lw814pr`Ft0M8e%_vauU&J%M4dVo zxy*qkTUN!oIuo^2I;)TNu4pM%HF}6!pAQ=vUz7<^!M7SxE-tnuHG}u}1B=RreCmMQ ziF;!MlNjFZeF82jlcy86ESFi)afGRfh&Z(`?h=Y8h}G5=AH5SIPLya*O1QqSp@Uqq zo#>1iQKK$J^G0V;Pm=f6gB3=!m!0?5!QHiJ(~o5Bu|opu9G(c%l^c}UkJnv#XMZWY z`%S*eVH(?N$#gA2Dw$L(%HY%J+gqHfE-ddQ^-_A{BljNamud1Au!>IMqU0Wxvax7A z(4J{=6}xz?(fESu9BbxGbPt1Z*<<~P{tyKPGP$nK_bNjDI|b(R7(S0egPSmwXPTbA zt&{nPc&TAW&%5{iYeMOZuUJs5`L24uM|X?`(LGI=utgesKRi{~Y=lA?j_MOg!RSG7 zcYptaioAhnEDQ3&)V@Kc_tid@^n1%C8RQUh(`3um7Rsy_1-0y)&c;cFnW%kx%F&hw z2<~_p$UF8vhyslmq!G5RwSf8GwB#I&H z<1#a&Q;52}B%KCPwCY?CZ!oh*r~peuHs;mjb=s#wv>BxDOR{PTnAzq%Z{Dmh0$(o; z!Jy^9AbxQGI1GpgP47As2Ac>l;8`GvmxQ=j*>?`#KYWMta%|=KS5&(9SX{AMw*


rahWqW!BK=xjy}8$@jAH zdX3nL*HgwT11tJG+lW#+knO>5gOz+5iNup!9}-jt87k@3@oIU}wGK8-DQyxcA#;rK zy20TuSsU^#xgznB@>U`?d&ODau+|&1-psAbeDPG;e0_vle4;R^B4E`i1AJF7S+Sza zgECIInJ8sBoK?&|n^kOdgBTHijwaelp$;^}?lDZ1QrgRRY2_O+;)CaP;@ZNv#J(J6 zaGHC2D9>@02xS`@n5ssNDR8N`9r~%8F|P`S95Alk^SQ3HzfQ#Q!W_^(F^xUVjoWXn zORv0gG?1;jAoMvkqG@PZ3+Wy03-FDvb=A!H!Q+~?&GfOcraCJFmweilQp$T&gooeG z!R0HrHfI`?B4^XyCKE5>%ObzDi)_1}mo4uYb1O#4)a*&Bbk z$YUy($az0A3PpbfEtkxF2aa)kPdtw4L-Pw-wBF~u^m2mn&?-~0vfz4tekze72uO_e zt=>cO;_Wf0!8+H}y}|%Kq4v~@kc!d@lK4&T$~FwOX?KU^fq)H35v^<2xQ$B-9NsGH zZI!rLre-q{sh8tj9epYNG~1z%`02)gSxP!#p1!6|`Q1j=@~^K*dY<;BUG34A#!s1Y z=zPi!s4SCwCzqmcBbG^}L-z?hI*%&$30^N<&E~$$PbnaIAQ-Vy#9*s47fw3vk;w zG%^ezl3wsxB*$rNP)=z$Tx!C6?5zQ!T3!;61&4aFJD}Q^U%JEN2$`**{B{*Z^6|HK zR+sLwDtBXML)r#Lp!c2P%kO`TW;*gEM-m8q_{D2vC*Sg&aLLmTN4Tp`FuCBs25;l{ zp2|0&y2E+Zt32`)3Eo)I42!rdRFgb;dyKJZM{1L{_dcnY@i=~$5elI8lBfI=?_uL%uZOk5xadg$Qna_;!CZic9s)uS@gTPt-HDGfWL?v6f+@fKNkvAR(5m+Q8lP2aLmlIu6nqpg0_(8To7)qT6E^PBrGSbN{-D4cpnh54kgXa#UI)4@vZn=; z$HO#>z-iekGzyUKE1D&i%zgYtsZ0Nds<)16JLuN73x#6Ey*LC3?(QC3THLiz+@0bM zA-K0_k>ak!9a^M7gA^|g!Tk%*InTGw`)`u9ev+9zv+sS)z3o!Lv~sBJx?siFNjo9h z_UwLOOgkC3Xy^@xa8fGt$mJvhkY2kWPDA1DL+eEhT>@jk>d&94%kWdO--+V3E~hr; zD?0ob=p!iPUI;<)^JQa^kMjxrsg3Gd2_Bj^+99CL9!tqNb|C63KJ2@ET7~hFyOjXO zEVlzo2|Sv&*LZ{c#PT(QdV|(TtP=m$+efyIHdNYc$F0X#e{ zaaal8Yu5$Wi)@A!C3a_qGA`A*qR|BsNIYtpej$+H(sIot&)jmtevZHJHT}`Z;546$ zd(W&8zLrYZ?VAF_Ue~du`(RI!@xC0s7PF^pUFKRX{EjvisNd^oHL+BY`tA!81VisS z!8QhQ2q^?LcrjATiIXb$i>sr1rDp@dYAMHH091LlQxT%4qQyFh`1;6+1EA>*+zV1W z*YbJ4T5T4fYKoHAM4r30M=Ky28+uBqK-W)0bk5^6bwmOH1(?6vz%`5E)TkBD z#NWY+TLJsmQFG~|)-9&OjA;g$iV-L#CiCb@$a@{L7$RB=pbyfA8kg@7Gk~wJsEQl3 z;W2cp=cy`^MYl%fvayg5Bw~n8}F+<`4_H=$deleH#2k zHc#8??itT}SVwI;ohTLfUg#?43qt&(AaJYf@R9l4fHkW=n?Rb) zsh*1KX}(R>_^lL~;Oc=a%#vnX`(Zi25;RL|C3x;4qs9}rDFBh>K$ju-^{i=Ojy{yr z>%)?iE0Q%$xt;5koCAq)(cKl!9dd$Qd}{xY*_d{~6ZPms2%+5YT9sPnFGX2Qf~s=K z7V@1HfM_G`<+-n~k#kEiwb|+EvwD3dS&uSDt@U+P)Z%_Q6evj-yRDJ*?w~5DZFyX6 zG7}w=jZOPwL^?Y(<#Ii+`es+Lo5n(o*>5j#mP7G4X zkdMryHgw_oR=adp{$IbrUXLyXM762mnA7ZwY5%OG?*(QaZutFErqfu*;bb;S@QhIR zL(=VhJQT$_xpSccnBfuHDG&?3HxMi%Ji>dH?vvGum{mN=n%2Af2J( z2mv4mU6n{Pxygq^+RwXT@Uc<2%hnTbz-j? zHl$C&-5MtILmm7#!~_eoM4YJFS2Nj%I8((gD1>foBt_A|YWw?Z!m}s}KP;z-I6P8^ zJ9e5qd*Fa05!a4r+ZhsBXM_&WBrkdP2jTfL#mW-Yi{ zNl54(zF|d}wJT&V%aXNZ3kV=3DJ-Y1pgWHz-zl@w@l%r+i7gR$NSR0j^1z?4lQ!HZ zkufcQ<&$=`S+0CU2R3YL=skJ^O#=TaFQB+Jd3It{j6f&j5fMg8Xw1&Y65uL^aYpI2 zL;5X_KE@8JQyP%kB=F!>eNaCXWiZc?D`QRt+>*R9&aUIWU9Gx!uX7P@%f$#BHCxRY zx?Syc^rin~4V#_#JFN(0XRUa4F+pa}Uo=I;w)<(8`9x5qYV_MW63FPNtPC1gR1XZP z3pRQ8hQony>}xg1Mr;p9KN0oZuis^2eKhE2yJ2}h>`HyifbZEdYCZFZCplutPxFxg zyrJ6oku@F7Zl08bbRd1*@@cU}RsMvca<|icMO%;CFzU!_~%jKhf6I+4X< z>DV0T4lO?UM{QTg7%Xbk+F(V*Y1PWwJeijCiQ@1a=XVN+_+x=+myc3DNM$xBg9??n;xCH3Fp#)APbB*ETYoAAKd1 z(;pD017Gb?ZmP|0l-|)HuMAF`dFmLz8kSLp$!Ke0iZaFjjW5`&+dgaRgm7ieNxw=a z*-3Yk{ypZq*L3iRnNRyHAwqS=b0eg8>vIel)&n7LSB+p=;?lIBN%c6ZE3X)F=gOKd zZJSCR{YHI{gOqvAqNwW0$A`#|RC~Wcea7PNnC`S>b6;N^ylC=|TjhMSKYwRIl*FuM ztCkq*wsIdEj51SfL!MR)Y;NDcr^i2Fc%OlS%hD4_qJqQ7VlnOpx*$M1tFE-4k`Qep zP01iW&S9d(ir(}fo>@ldkZGMZJI|ol^L`#=u$FfSDa2$s`vT2nOVDIU;T?VC=@@0} zv+v&mk)(3z=@au@yR>Qce9tbB12|b4AeD?cMn|2i^aZ;fQLhzz!C2~!E&oA}44=48 zL@T3c?|8(T`5ddAy;D#EOML1i7PPIvUxGyOT0R>_erDMqq_wj_1 zQU=9rHx4vh@#l)B|9??M2lUxfotc7 z3k=&0_1U#6{sa;l;>dSjioZ@+7Tj)tn*8Ne`N`j-bE}KGUSJ1ma8v8TirY;zk6_!I zO)iBBT6_D76GX zNU$YBITdHMqPpYn1m)8S$C>m8V-bLkD<-Z5tiG!~>Xr~bam!clxR@4%Q;bE%DtlZU zY_g4TyJ2nn-*5tBzJ8g`;+U_b(GgUkSs6^E%B^O~0!<8`)JlJ(?SWkN8#xEFgbKWd z5sjhCLo!>iQJxr)fI`FE_&p#{@bk=1SHVTgxESSu&|fOFTu6apNG2?2nV@EgYD+R2 z)uw-F&U^f|dhxC(YC{(;Yo|Uw$GxX=L4a-bd_7rGyYje}`VAuosz$uU2}y2%jRxY- zSDU^IvyPxqyobbH|7I(M@4aI!`5pT4xpE!SgFx^hx?Xwx)!eNeQX(S!Ftr%^mwnbG zcEp#Q$Jg@;VFP4kW3ZprJ*xHD!Z)gssVtsSs$SfX{qfx3-WMpl~l4pX^mo57vFW?2C*<|$P6 zgAO7@+EL-$yk&lyTUbFs{tprA$Q?_)xU2;_?vdp?d=Xxnbk&0j8~x(-;~!VXY69gk zPE4(C4}BRl_9*Trsk+b?15=4o&(DSmmOb_>#r`RW+)tbhM^3iK zSbCO^cR3x{BE7o95gk^NdJ7GaAj?~N6{9&hrXRHC(wc? zr$%RJ_}}qSt^z_YdQTK%`-iWPnxwOr9kef{fbl++93)#GQ`FtwOxuJtW7r*q)_r`lYMu34!#CV#Q}jt%80RiAt%`)yQm!FKp$U#2vUW7oYxdXk{LFhR6SU_U z<*&*;8uDZ#6qfajR``W`IsaSnBPY`8+Y!Sn?#Cme1w=Rb+Myk);bSLm4;y!#(1P8; z<#3flX?HV_59X$Ev?astVHeXg2BD%BLL+dn$@*@-+dWMDroQYp|IB=5+%|=s-FE`6f3_u#8S+gH>(H~OMAzNGi{MYP0$TW3ppz8vJbdh%1TEf*a+&(x zFrBVH#8s#tg2R=4q(ug(6}+oA6-t3b?n=$Dynhy*{uTLY1VxHJbH4sYDbMrGzk<16 zs0wr4?g~96p?VK88m(F>ZtT@Ufui0$woy=!p42rmnt7wsJBhBMyt$6?X!D(p#l=Ky zS@)ZDA@cdjTt0F$S}+;P zG$5h#wXOrY*=oiIg7sG&C;MA$mYPnQe<+rkFmY%wmOIl2jW<~v{y?9&$tq4USNq6s zVbHXA*`|vdV@HZoG{1+m0WSqZ(|W1Ae$vOJF^rP^DwQWzb7TdXNMiYwxm<0$UWi;qFr0<-@IssnAUm#SRka@iJKO7Ur$)ZGhSecY0ru~GbrY)h})O=!;x?L09HPq%C#FM2Y-A2#4h!klt`tuO@rx=r~QK9 zS}tw#X-9eJ<7};{Nw3?EeYh_d>1h3zABf=KcdcRT%F>-fk4F`HCaITWdAcRQt6EdV zKpbCUIk=G9i&l8VD#PHG_o+6Qa`0^4f+$C-#%!CI-Zp|0Mw;_yq|r){Fx@!#;TO&( zM*R%DUY&%B^j-Tan#RveVM#g-ESw*JxXNF8N>uDmPkx>z2@(ph^TAmzoe^7P- zxcFj1%v>)dL+hupmX?y0)nX9dmSF<1DE>OD54RG5Nn5&=2wFeT)gzOUJHu@`0>#bE zEqaTY(x=q6WO|KhximJal^R8yKzhzZa6qCOQ|Q_C%ewY^oL=Qlj7Nmsg*R$U5H0l8 z7`(WbEW?S)3*-9CNXN-I?T%=;Z5Xim8lGNC(byq^gdo2e2=%OY;i@Y*=DGsuya<2V18g^WP!!a%4&Td~_PPC3H$lUIfmfH%o&1=s&} z{Y7szfwGFm;aH*Sgbtv3C!Q4$z|?jYa_qimn6`x)vDl>j z2-FZp#Xw1ZU=1dGsA^+77|E%T8uB!9p|aa~U;#J+@DBEQZ+2q}|Hvua?Xu+@)PWn? zNgUmFL{8Nn$Mww(eHNkuyNG=f4m?GMjkjafmw`8=s1R-MVn&L$I^NDRft{}|0e9l# z<>v7j`)iS)n=jMzxeF%TfYj|W!4v(PKjlihy^z?Aka$AmdsVJWjirrd!johrU(yV3 zp7G@1;@Ml2mTmsSFLV!z<>2`B!~x%Yo0FtkGOTj(0q@3xerw|P;$lj{tLdFhz`(%U z<4=aykLMs(vPEbtQy&SoeDMO-!VTwoHIjRRGXSrXQ|Q1RNpF2QT*l(|-2xioJXXhS zn|iJ_${pYePPY1X`v&^RPP49Qbt|>SDnZN|YdYa(srgNikiJ6$TlHN?Y~(9-92SGL zBo65m|&=r?}d>Ls&W-!~>PHxij(<<+8NiS6*83O;c?VL{^aEnbA6{23UX3 ze&!lJ??M?95fFi=zb({J9eE?u&V%NGue<0RwuJJxaUCS#^^Sc(2OU3Hp=g%jFfTcq z;N+gl2O*BjV|*xM)V8K^D%+KOEBCDSnv6fsjP~(g&HxHhwJ|02yn;4vUqN!x_?Tqp zfp%2|)5O_7!VuCb3PnrY8Jm)IOWkyJsg!e;_f+JBw$Bw+@hdTlSv71oF)Ot2YLI@b z?@#07#F6SPPF3Q2Z|IOuzl2`kgR7j8+5+(+o#Vv9(&LizCz}PuOCEf{meH#DT3=8E zd%g&Vc~xBl2%53tAAOFCU;y~=Ap6dkVM1qNffVo07{(y1$dVk|~SLB-)Lq3MO!Q=V1=<&IDoO@d} zUzUVRY)EW{h5FBl_*8l_qO@#=L)pk5)Ck(wFh zU&+LS0|B}Dup*+?%nVaWGQG$~?$MR#bGuONtym-~f@WZ9 zS~^~;K-d|P6_W&1etB4JFzWf`-#Fl&=s_MLxN^=*77wOJhjzO?T^|x@MB*fkQ*5Wr zM&v`UKAZ<(4}H?7Nz5W;fjp(iUMKTVVFX$|hpFs{&(5pX({H_%NXfquVjfr*Ci+Kq z+Q7tv+O3$Al|F;%MlVc@_~=hG19#Ah>zigL#$0#W@YqV)5vRyn;zR7I`pc{6*Ztvh zU#Hb>QQ_D$Dzj~huI1ZxHzyV}wFi~gW9-gNjbCxUX`qr2Ry4QQ4nCj;7$vK2SB9qdiqX(n|4S_~SW z=H~L1T_Wv_GrJ|UEsQ_vL_eb9nDe?RYv@qcreezQ$(YW^KBO;Cis+&ld~f2|lLir0 zqGS0FS=L>qBtb_ddRxkG`tbD?aiwa zK%eV-CGr=~{wXMcT_}(yzXb*#&?Ufyu?FzySktxsI9#g0zK{Sf_%8l!)EE;eVO#WG zoc*-P8N@FkgLZaz6K{3|lM|H2xTDN}KB*~rFn+JIYI{cEYK$>p>0R3{WBxu+Vzhr+ zN0?q&G|mAp=%05qMu&v&cvF7Iaz*#yK@9Th<>HOWdojV*AA_zmGGA~APbEweEsYA; zSau!zT{WI}q?-mVC7Lt7e{^GIos2|~nQ$CIdt-(Vj-ngs zOb{5_VY~sUO-acVJ7-J_q8S5ymQx|iC?_CS4q}JCN=P#38k;np47V#4#6wb{>7fIj zp0g-G64d-d>1x@;JdJJqVA&AfhQjmTi5|Hmpza)Kfb@ukZMcY((p>?;oDSq>d2d97 z!g1?rPZ>_(FV^vSAReKh3_N8L@+k}e@#b2xaA~STCLH` zrc~%O5^)ukxWAb~8ps#G(Ff55c0Z<_$&>sx@;DxL4$qjMeHzd{l96&sY94*cL$^(5=1gT*QBcUnFuz7otn!ZG?BF~38?&mhaN>N99 z_PBd$HiNXN2d-)ffA(@{)$PZ%rJv6I7_6X%U$>dr7UW5}PgFnmP{4jWma=DIAlmC^4qkTU!s?@W&0SWstGu z-{l;{cY%%)3Ix9DI}EBF9-=R9>uhz;i5(Euppwb|l6e!3``32yXdrN3ncWfqm(6UN z4i!d#Q&>b;!Ma8p9=ZXD;$t#!Js`<)votC6IY$1sSVX{;Y?@<-4*|+o-rQq&W{6Ewoimt-2eb?o?H(582(5z%!^9jJ`> z-B8|B!%_A6UC3gq?4@BpDz@IQ9`rdj)lysvtq_YZt(%hH>~)J z$)Wh9lm|+VoJk}gOtD%@_yb2_R<3tOfcK)U2GaL|dZa~1aaok)W3yuIN7ivE+W~~u zq}0O)x{@=31+gYxUmM+O!b!)V2^uUN4(2Y-tstqPAe!wUyrCf2{U&UjT%?Cq=`giV z%m~$0e-xj4LD4|@1xT8}e_Y_uhuCH^z(glM(C^x8^eow>xaB?%8YcmnWVt{{0l*Por)=Yx3bq-me)G1&qfREJq9YO@ zeiW-dFttu(Qlcx80-qA>)ikrFu7m0Y8?ova50tEUVGu|>e6)%z2IFU(r31e)b}B2w z1qW}Z7b+%PG#vO8N@4X_g~-x$f4cs5U?rRXSq5+81NXqIfnlUP`FOKD9Cq5n#GwJz zPuRrMCMW5#v)rftai3CP!NF|XCsh}7$g&%1Q4`MLNlIVIaX`0JxYqqx(nenX>~>FK zu$G$aYk!*-#fPVV;&#yun$^r}_{u9eSb4u*i&$2@Dxir`6ByxQ_~g0oj5<&(r*tk{ z{VeWj4cq4I)IU!5IrUJqs2zV z-Gw7BzMD!W;fCETv}!$X4roD&fUvDm;vp$n37yUvL0L=8%aQW1&RG|wP?xR!fYn}jA>K1GbcdCV|v_~v?!!8s!rmE zXdeSh+t}>0&d}nmE!D_mz?f(5wnYFudeyjKf--w=7;1&$72_=_JGmLy&EIW`W!0%^ zxwbmO1#fi=@4V1QG+%0-z+)Ye5JsH9gNoZVB_O`yPw5~XTO~YoG!XV*0e)OHfQ{=S z4=Iaw{%7fUTWk?a!H9b=Lgtj7jXJNChZU3H#Ulw@YsFi5e54Cp-PN}n-E1?s(dxW& zbZ91S4PS3fBbKK83Yb=>8-!u;oexK`56ujUG$)`;e|%4kK;GAy4l8G=>fNx6O5o~T zD}jNQUjQlcjWD=3@{It6=y&`KOv;smSml787t!*x1^--9G4QRO2Fe0hcElk!K)=s- z+WSmRcnb~M6aFC6DNF}p?oS9pY>@*qP8dF)H?LbRfRQfm+rie+C*{f;LqlP<&5bkU z{BgmFZ?dGUo6@j1IrCg|FHFTqKDA8gi z{v5{2%HNMwM|yM6CYI4B(QK*(fNeYsyDZ9=vUOk4geD~3Et;`l{AM_dIWwY~vvYoX z{rIBH#oJm#&QSX(gpw<_OF80cT5{npK>k(FEo00( z#j?XmPE-b_YD+Gz5saEY35t!=@yn_7D?-?3vdwTN1Np?u9K{9A%hAW|`$Wo7oDal@ z4XrKOA-Imrje`ilfI&l)qt(q)Ok$03rJwUQd$pI(N}ejX@5^xq#v2+=`4OiY@CWMZ+&v`R{3+q{&4BcY`Z9jl5cb|`PS~=AOriPsg8<=eF==hZ z6`5NPm$s9`B|yrs!twP8_J`lp{y}}djkemxy|vmQ2+pEkwVuYd_D3TpP#OR+@{oUW zDy6>HCuqRIA_tS@}Kng>i+H_8dmpXh{0*A{Xn=5AZIaxTQL|>iIm+>S-+q;Od z3($XO#3O^pjSq~oH}6qzY1;O9WPYL$^sV8VF+TV%yUFa;5>uo=?WvP9X+dtLJPRoV z?j@iyaQ{uNt3Dm5b6BsFImW;xSA?Xe~VztR70-Sg}Qq)k?xrvH`$FZ59kulw8hjTvX{mD?8vF19#0gwzssnpxNM%$J3x zGi~3K{}!y>;OKBSoOjN33&zgq)1!^>8MO++1MHJV`jVBg`GA&Ud`q1)&)O3I!zqQ> zP@~=cFQ-HsdWlA0_RfL}tuC!#yAAM9Jq32ginB!r`g+L)z`@F<0W>A|wPu~irOIlL za+1mEx(hC?u7KBZqlxnr#`540QW%uB4CL+NdsJd)0*E=xVS-g=q)=o`&SeW63ak-Wt9 zR4*J;KP(t&mrE7hRib4#LeJ!8+`2rzlO&@&66n0%dg4xH_-?#D@~As*`%sgLEFhu| zIq(;CpIv^-50p@qGM=@w!sWc{8bDom(IL-BrsGnC$Aq~=(B}#>F-9?8$K-T24Dd z10{nuL6izJ=QEsob=!*pZ6K^MqpUk7!$9I)Auxq9j}F6HPFlWjfnujT!qeNRETx%% z{sMBV0hDNy;tc9!0tPoMtN#N&AdA$#5t^a_)_A~SOH6BU|C5J;l{)vR!-$s~*^Wu3 zPsXXFnp&b&iQX9BEW_QpwyH-ixv^B-x*JGE!?F8&7+oa^| z5sYjwOP@1iyOA!>xANS70B*m7VN|mOO35NPxLlFvUb{l>Izu~+15?Mo0E%5R_-HA$ zAYP!EK_`}~ZRmTPoM@awS+~uq+TCAK3&)}{G_kkvXC|uP?uNx)6Bh12b#($E?$Wq@$Bx;A{H;f z+jfLl1@s3U`9R(!6BFJNLPt|~h}&>~QNXb{?k&MNIueI)qc-)Q2uRI+t~*0hHrTh!3MubGJ(B-Rz!{JScX<Zv?RwMbK$W-daOcub+npDGE}w=h$Qaprt;YzRAVqyEC=0G#0<__ z(y?R>aBiyi%rl%&J!qN>$#)aW-O{y?RLu;2bUv>Rf#JHuW#~FUN-eyyu%W@dW~;j^ zwzxIUhbs~SdAqZ12N|F+n%1q?VUU=7dSQg#qs^%r^>}{RH%#7&gebqKXVmfaci!P5 zA?uX$eGAmf$|$fcDOpBr8z8X^*={^FSodu0T5~OyF`V5A{J6w3vl}=SG_V z?zk?FrYN9Er*VwSITncNi43RO1rbBHpnyG<)$jjC@jnaD|9aN~^lu23$bZ|Jb|5;3 zgtHYHbIBT>&2)YFSsF(E$GADTqofxI5&>Q2&c!TUzq|vMj!FZm{{| zSA!ESOt?dWx&e3>Q@>Js9?ARDpJi1*JceSlVQT_A78UxS?pDXMk}QVRUQ;RB&HKn|se0Y8 z9}2c@;5myNa!4q2QWtR)bNK~Rwk(TEHA$cZ#X}g^E%5ihkE|x84zK4>YmG(0l4-;6uRpz~E6b+7BN=$S zzzJc&6qMnkJr-Nyv2GPyX)LbqMG~ z=0Zryq)MbKz9D1xgdh@ea3#s&$Mk#~=?T@#(K4~G36?idN-Bh$OXJ+w5+|}jH$O~` zeh5g$$Gv5Ipu2ufxE|?f-N3)ziTK5SMK(ro!OR@gO-F^H9`!crA^#l8`t}#;!}t;A zAI40HVJ@}TK+|JR_xKi7?KeBqICpvzUyvHVXK{kqLVbwaS25-?jjp>tn>O!Kn9WLU z{12c*oAB^d%=3&PQDc#dTtKJ5uCw~^qP8Yp$3gcFd+X0xR}<7|@xrjScJh>Dp=m0% z7^WRq1s<*^mfmR$WLUp#Q#!%|A`3zk-hkS*6#J@KLq}A1`1SZp*d*WK&BT9((z?bE zy{QxpvvPMTbDo$;nwXN+>()lvK}Mo1rBa1hUsU{9S84iAOZ(-(h!W=%t9|<`b2}FN zNFshn07~Ewwv&-OE)%7@=(K$7w|4oefH$XD>-V<=B?KidErJi?**K2+$~WP$Pxpz% zVwg?n1bK`j3;)@Qs^7d)`Enz7TL(E`fGeIYpQ}J|ggww!S97hg^241N;rU6LVM2#8 zctynGk_?~xkN^*^xGVVKfcBtulFJ*>j3iJn)>hv8JIJ@qz|f)E;iM|2x1WSP z2RROxVtsTWswW+@vy6PC!VN-bXJ7)0*hMm2YcNTqOR!sJBT5jt3DxDREGrN1ofzG9 zQ!QpbFQed>(T%WrxvW0pal6kDkzIAajLbIATE@F9SlD`Sy`c3ZFTz$>3^=aVsrtgu zka@4UGdaz3CI6N{))_xWA?84JU!AiCl^j`@-BVeK8xT_>%GHuz0=w$TDm4TinER%k zekJ#J@m|%}eQ>^RJPfIA?vv(^*mSEdQn#y}X^WVM65BUxzRUJ8@wt%nnogZ4%EHab zv!ZafotD(qJWKdrc7~#K&*G^oNs-kJGiR*hQ^-A1A_qqQ17pJ{+1ER>YUIzKg0PZg zY&4UDL(X|3=uk$ zumbq|va7L+HE*1Rmv+rtESbL)NgCdQqqE`o;JZCsbg$u%EzS1g0pqi`q9FPf!IehyNPM5q!oX!&cV?r8sB?Dn(=3DqI7kF;0eaBaz}+Wj%qD!cSY&nn1IM86Q~q zQB7VGB|bo%rtIH%?6qXRiYPh0#3TBY8U!b1xzv^G42`aKJp68GfN^!){4p!>rjNZ! z3XAFO8f?nRme#e*F8k!~Cdg2C@>vJ0H{{t~x%ZP)%;KJ2j-M3b(+-A7RWH6yO?_YG z*5^qvj=zCC!>i7?Cm0reH@ww2fNRm~)os;Odd6dD2}wxGJZ%`jS{cu(g!s}<_DZoI zA`K4Fug24bNEH&Sv#lm;VYv&!$Yh1Cb@|`kf;o{Bm&n^d z^%#1{3KD5u4eCsiLOI#XA=JC9c=b6cR&26q+|LxYb88SNFYfU&Gn`6SG-oLOmP`km zBW_j&)>Kh~Zp~@0S;Hng+teD^H;C0s`q?$QxnC9ZPw#m|5yo>YxIL3bP|Rn3MOzU( zv}f={GvDHIp3}jaKC%b^sIh(dE9Nb=uPAo+T`vD)YuvsD|6#cIU)+s%kk5k|Po+>2 z<<5~5XJLlgU|rpLBhP?Y&XtCx6?V9^s`ZlCooUi8EVTKYbiX@_3rOgnqbHX)^nY@w z*%gyJi?eG_$;R&Z?sWLGcz;57qz1h*t4gr4IDm!nlT+}gsfMCF4Jx%N_F0l1}m(k1=)5Z6@jqmz}@ zP2)Sv=g0o|x4~-GfB9)+V)ya?U$iB<@ad+=oFqriDC8VUd5Pgw(vkyUbfc;c!8UYd zun#$Fu|B`94=m7*UeEXLLC)v#r|7ouZc%8PC+Wa$XjeF1zxdn3Jo5!-*I`g%xW9zZ z(GuY*5zwAF$(NdHg?HX!B{2Rfy{vcE&&y+bHe0u9kHg}i`zxJA05tr>%g`9zk;+lg|(uI zd~b~!ujWy6%}`H&C^b48Uc2bn?{4Tp+4D83gqIcDz(>R#?(xsLw^-x{enKn`;{tt- zO7)LRE24Y>$zik9d{cAQ6ey4Tz)4X8aK zph#lXazaJjq#r+KJu+2E^PBNW4Lw9QminxojuH0d?;Sd%Yw<)$>irh8M1_s)aWZF^ z=FL}`v=XpsY}b0mchhB?QSGB)R7|#S0FOMjl5ZBvJ(d35*G6BTQ<;nY^ ze7DRum~?y; zsT#74NAmXN3{)Lr<0;VsXaX&9#fX!VhBvKM1!;x^+JJ@#C#>S03``9EUv z&61eqG}RR+-3DHNGK(e_sBYQH#CEwG&x+stTGC`hGme~j+5emw^TgFuyTdA1ew1Zr zG#IvGoW0aAPU=btY+o-)*3cMEPKu4)bMoI@Ov#Wms}J&e4`&sQKkNc{lUX& zm$&k!?U-Qsjg;Gd;ssz@vYu^c6N}=!qQ& zZCGif7JwHj$4vOdax6rorr9_h5-dT5uMqTH^bo`X;s6!MCd^Sk;M$O3u8X{hK4@6+MZ$y|FI0~65Y{e z6|5$TJi|TjHKF;~=pwj7i*<79u8lgKbm$cxAty9PJLz1xW9bl?!SJ8yM`uR93-dfv zKt{;3laryQV-o`Mu$5F8(Ch+{pa<5)B{!+bRgJ0Y1FO;PTG2=k zg`LBWoJtgr6aHC&i&Oz^l`te{mz}Y#_0?|*RxY=+bYj*KV)$1FImzkS7S|1j8W8yT z4wHt46ULzfUxFPik ztiE8xBKPkVW4@2o*8mmr@0^_q=$(IejC1lx_sY+H5N#Jx8cnH=5U-Oy1i(pm^tsnh zMAp?Rc`ftJ)Sta)Tg5k{)0nGg*&K-Z?Xe`Lv7#7JurQ)2PV&1a8!804I>hrwq>i)p zfTQtCX*w=LvBl4VYwqDRIiV=6PhufqgvZgG_aZ8@+i;F#_)`B>@ZVm$ioU6%p0G!S z-l-iev%ZcKOrZiweHQH?tKiXXOfPgg1`&}O?%}owU-%kqM*=?20F|>oZF2f|j!>et(B_5s9Vp_Jybd zyhAJlruM)L;UYld5sSY;ESuW?-w6+s326j`XwM*%McP9(2yqyZQA_nlx;w`6kDffV z_e&~d&hDgvJToZ;tlU8cuzMf*H!Jsd`Mi%Tg^Zni(1%`7sJ`56;CmC75 zPuBY)hfWsTn`tzNPU)3v*2f7S$Hp8Dv3nRyQT3A|UaR%{PJO6Zqh?=@#_q_&kvL#V z#Js9sWN)LS^Q|kv9iKQhx|Z(*U(?_7oD^4YbIz-o;G8ev?(in81L)Pr>%M(X`pjE%LS+pasPOo8yF%Q|EP(F&Otzo z#d3wnpvQZb)M~xm>6l)RMLDULd{ICc<&6?}a!HHj#ds8?(+RVA2gpV>?@T7vv8FxQ zS{`k<#GudmROJ^cQFmb57D7|oxU4Y0CEk_i?Hzm_@ao$S9?X*E7hDX@I%6|kkgW_j z%TlBaNJLEW1f=q0R5&fmH{)|09wIc?qI~^wb{5}UN*|2UeXdN_s4i(I^NuMw$S&6H z&4jwrW+!}of<4rZ6&WA6pNc!enTNBkG~7RL>X$=QoC8l25RsItbF8B}EN~>0O+;^& z#%oHddA8JlbG;+0UHoFju+Y_tzG)c_@%GD`3TOhqHwvJrhI`qNlOe@hYR9w7!OOMq zM!5g{(y5S|OUkOpSuIeic=^8Ps{0qGqOGR^J9zihBY8=U`%Tj!L-<*kiX#p!oa+5s ze|nCjwX$(G*n)$Puy8|)0kf(e^rVq*x#S{ZcH&(aIk6v;(h8a(1G{Ppt9v3dkBOqX znZN(VzLBA3r0q$R+SwND#bHgnT6n2bh`rP)t}+ksUfPYu#z{uh+%l|APQ$4+cs-i1 zn@gb*0R*zq@f)jvYlCnR2>zMi{!6L*n@knP|LH;eMG(oO_5b%A8bgFT zFQp^L3S<92kc9#^LSDoF{wO>e?%pJWq@?iTi1mLO8N(^zf5QLqX$t9oxeh1MpHf^V z79=Fw;{Q6s1o(WuFpZFtye6HoMA0O*{Bl0~$m~J+1%F+grIoQyd(KfXA2s#;$HHkl z+L`|}9VREmMkNZMr%9}?af_4$Ono<4^&=?AE!Lwj)orHgDQpfHAiTz+{ zY?$!lOzsO=ebZM#o>caoDISxh9%ipRu5_)yp*GLGnfxZ4@+HSrMZ84R2o=jet@NeRYQ$)cZ z^46fq8ZLLVG5bi_W&D^Xe*f!8rivIhjyUR+Zn#cYBEzJZa4L z^fzyBVS5rpwR@!{o|8Q@Z1B?`u|dH~nhz&jBI1A)O!-zP*y-h3c-ytL-8M zHQ$|B$xQ`>tU+#kC;qYPh~Aoj1&swYI+q7rNn0Hc{XX}3#^v+lpE)PbX1^S|UU3Thzsoz)x3&1k%T7*IsCqu`j4v1o9#^-b3k8vjAYU^jkPE}d zfu{5i0btwl)CG~i^5d1&Dp23FGN@=JVSmo2Qz0r7Eo|!Jg4aW|c;ctXt8eetYGmfc z^DGmtCJ(e#!rr~_kmvnSO(3;yj>U~sFRauwxAcPjFcKny8Nn=I4loZ`04xHQkh$~x z#_jiUni;GNR__w#r#P|KJNdQfRrWNBe$sxRy_PA;e0HnzmL$~}-0OwPBvO~rorr@# zmCsW^Hcx9exHu|mZe-eKkhaoPDd5-X*nf=g*F-+&rMPgGh>$0*EB4(wU>+;sj-<}> zk9L{Uw`Y?aZR|G=4efyVouk7co`{>JrA`75(cjZSp8b$5%BYShk-Yy96Ag09LZr;fNAp-2in{QZ?JPA<&z~7TWtLD=Bw?lbw9Rn zMp4|4x)Eblm7&t^U&lKr;y%UtQ2u9r$M^2*OI-$@-A^0hn()Uoj#=>&xY83vGmeDE zJB?CQvjt+y_I%t4BfCflU>ZUz|F1`@qnd^UA1+@ua4$wue4y`N3;5` zH5NFp)GU6p=(gmv`1R3omGR%TNL`jDn!`pULA zgJoMZSa1t&!9BQJfZ#f~6M{Po?(PuW-Q696TY%uMgS)$&N%r2){cwN4)bv~3C0)I$ zmQ&3?r%EFUFKi>?eMzp1wWHbmW8W#fbmhx!L!2&fQj_&@3H= z5tRu~I9ng%>sEWmPujR`zV533cFl#`H#$8q^3K_zc<%Wwrtb|8fy;V4pj2E`k=j6B zr+lN%c;tfb;XPE|_uPc9+-MlJ=RMW<7dHjwOt*p|U3h9DR)%$`a*k)pKW4AGUh23B z8?)7QFmJ^dV(|r76*!-A`~}N_^**UTbJQeG!c?xz^Aw=6GSDAGu+E2ZUIz$^rqQk9 z1w4ugFMI1L_(ThQmgPW_|xLDIZ_joUiYi6__Lb!=$3P9UY1`YjAmTZ=G z^V+VH2GIHZDe5V`ywt6V=x?`5>@A0pD(d`a6~Ds#{wu4UY7K7AM)i2j?U1X3{jDov zbl@Quj4KS7bfT9IhM=Nmzg)lHnaZAZWPJWt3jn;@e1{iyRyN8b_{+MUQ2;M|Ch|7m+{skuoNpsWz*+rzTcJKk zcz-Ddj*7HmSq0;R{a^0DqzS>uSBQ8@QbHI zb3nU7bEuW89|YODZ3a{t|4h;!CKaA)1Ue-Wnh2#>Ww7He(_gR&(N-%wbxMsV#WNkS zkD9I2P}4WL(>YJu`PuxPXNiUeDfS7*4Zd};!8`qOq@2no^X?{b$ol1vGS8;*bQU!! zl+oAeF~erTi=OyIdPzZNy>m5WChkhPUAn{@rSH=||1y$a!4Ny!zFpS&ljk!pjJQ6UtI)PQJXxsnsc`IGG#2#M2v9{RugMT=4LG_t{8fh)w%{oDc_8Y<6ruGX88P&tVl8@m6GS zYR-;Ud+W((N*BY@&4OiC+uJ}&0R3EsBd|TrkhO7H^@8f~NSy`7$r=2F`SIV+oh`Xi zQg4o0jK6q;bHG4Dk~K5}s3WfxT2!wan|yZ;<}NFPg`>u@t%q*L;3i^hm{l9DC0a$Y zB6FNjOq#CU{cPelTk#+e4K7JjI!)8k-4PDt6t zMjpL3G?sd_j{LEm@f~xn-%;c_KuNQSPl#DTAYQ=Zjd9G%S#N+8g2(+aN2R~A%}dy}?Qa@z&GfXGo^pwXwv33o`&vQf96hH6*g#fS&u_%`IyxvYyiJ?^G+ z140rIy0@nfFDN7-CgCT?MiGDynr8+@2|xGIsUEMzY94xC;)qQ?__-yH{n%mP07%$%6LI>BLddM-PU`->`|ebbX?HgM3@49(R|kw2Zn4^!h{}a=iQItz}MD3 z+^(y{eb2*nFE-srYRb((Bw7<-@4=Rn+C7fbZRqtu@3qIYW-%vqzZf}<$X&b9VX>bB6Roh9Sj9Ku7~3{Aj(Z1x7ZL3|_&(89WJZtF`eMwi)uB+V{E0 z`ZhFFfB}TJht8P8#C^TO_qW&TRY041`TEggYiGnqSDOVr{-#BDCXXMf(#rMosqZMg`f_L?c@S7H_L0HfUJ+1Sr||bkc^GV`K{INq+xywTeI6bL{ikOuP>tO zag$rxTs%Dt_MF}C%i^GFVu^0+x75$?j_l6J`0i4E4#B#fiC4^LO;oSgTF(>5PPdCv zSa^5<9VcuXW_7oW&&?>AZBH!GRXXC0PqSHGUfx@Aaii4?HYmmM68b)DUbh27H_KxR z$>K@8F&N~3z%K4$!aBX-aO(W0%LG1aBLa1Ef=&-GVo}SGiS;&U3b~B4RDeAq#);v>bOg+e7eiqQvX=I>}(0A zpxOf-uzn@pW_(3-Gjvy{{hc2lO=FTTp`$WDhf)fYX5%h%nN|-nJ75y6SbNe&usd|P zuOqUCf-Gc4B)_UVZQiF=?>2~Hq&%h9E_uW z9|`hZZmm>mAB59gX1AFt&quDA6$JR|$Z2czjPlp`kgN#_NiDtFR(CxlT(y}?W9WMHS+je*iLoCxt(Y6Uz%M@z zpf&4>J9&b%b1bx&lvO&a?bguF&VjNod|zg9QP(KfzO75W{yr7y}3a> zyLc@4COnRV2u$0$-{|(C@7DKTF9Gr>Em$(O`VxCRFs*Bjs2OVFX{!pL z;Vg6`3Dnqew#L{M<00YCUt2A;Wzy|GrDJ%u2zlCm$@MPR^H54%%!EU^SZOnf4}X6g zIxnxGesjro06i9emdma8Xj0~uZMSECeV){(v%08}NyLj3QqAci0>F@(wl_tXFSnli za=8!1nEwhA85xK(8P<50c z!8KZ;-OV&Eoe4`{_*=FRk851@?%1|9n%DWb;AH+Z?Q-Ey=KZ>q0+Wyb1pofW-Ojh` zTkt0JjMnz9I9nB82@A^TYaP*pR;s%SX&C~PhFb5(8}nu|Jbsk(Ds*|(O){r|&6U46 zkg4^_2;1a-Lbu^|p)$d1n1bHt@ld^&N&lV6bcPaFN7`kwHGS7?s=AysANm>fkAFc> z;M$LZfwSB0_m}eMSt@zy0wr8MvFgqT{JgT4E$`hWbyu0*71KF&BXIE3EXc<`WMn6MmbRCtMek47)n#W9 z=daloGog+GDS6%{`-emmqz$kUWH#%y$ZBw7LZ5=wTs2d~nDNS7VfknC8cp9=?}{wF zyv>jS!-_6T;Yj)Qg6 z@A#Y72=?`-+98dVia;Sfct_=`@CRRa;UQTKrlGeQ%h+A3RmvsCJ)A;mB>^*l!?lI_ z@V8x45*!H7RLEJLrpn;+!IsB9Hhhfrdbs+`WS%LBPiT$?4*Yk?A*QuQ`9RPJrUYv7 zewJ&>grHE?nf4Xbtzc(2V!cg0Ce`DpNa|v0W3PYeBG3BESd7zh>b_#1-b>_4X*WrKJ#*X(WaBn4cI_%7gRcvt7pOqFstW`wWkv zb4@4n7rdK+Vv-V1eFbd^MT^7$+vmG8rAO<>TWX+NmF>Jw?u&z?!$Eep%M4|}^R$O; zWnNJvMK8{8sT^^n{Xe+@a0lUrrkE^dQ)P@q{C2*?9nbP@j(g)-kyiJ+%mU(y8PIX> z*A0|NW@{bh`DGiGplZb;cDW}vQ1htw#imH&p1#^dE%*h&vD`tS+k`=&_Cc>V+PaU5 z)uQ?CXA9eGeTiBp6>=zn{4bmWjDr_ z@T%FN2hpe0 zv%L$avv=1{*RGCjEkdl{yIx@^PjJ*Wb6`+E@%r}m_8nY?WalJjxoZ~E9&sKgtsIsP zD8$Vi76i-Tml3toZ&&RE$w}er0ppZ;P1pHlh=?TK%r;=fm~vh{?#r)8TA$I~&&Tf% z^#@O9PzCd+`iB1Kx4-4d5pdD+CCsaMIOpqozXTOUW=mVDb>)@?to1CYSLC*aU7L+D zYnLb&($oRT$^q39hK7I0RA+sgg?!#s5{2IH9xY70_euC@tu=YKG$?@n7mQ-+TQ`j)ZrL5 z&hS2pHLo;C(tp>#oJLG@?`!aFIAfc=0mbBg=rVIZel8Dsu#8;Cp5ZoY0x9F5- zgw72_$GUbgn=z3qU*u)1&9^%VpQjA+7w)5~@}0{0V<kx#*utHmLC;`bhC1x4i}a_0g9;>AVl(!l5Dv|9Gf(48-G^`L zt%%$+Aqa@;BY>;aSBCh?-6Q2&i`lx}w3Ovat)>~iW?15h@=0duj$^YaRQ-2FQqRRg zF)tgM_N}cg!DbUYk3;TWdC=jh!hTnJZih$ax=QMNWs5_OFzSIo?1wwNuM0ZtT9oY; ztBJFfrd%e>r>C2K0XLv~A!TEAV?@>aZs7ZAx#KBs<6*G&(9BDSa;bRvo;XhH@uY5- z?l@ewRsj%NN#eu#gV*g6@6r0RDn47!3<@EU#E*MHchn_ppMIa$nYo*-dlJQv2_lUw zk{r~H*Kf4_p+Y~lKc!{y2T)C>eJoa{NgVJ<=Qe@zdK=qk_r7vHT5Qa}3esOuSq4w% zYJNbsPq^EXc6aIHqoa!sF!sw+J56&suFy8$qCh&>rf1$8*l?$Nxwr17pJ`jGzp>GJ z9)C_q&jf{ayOQa8t<5UfBGP8{Am4hC#f}#=tTO|nc`5%{~||I zXzTq*h1|-$E>z;nm*5UsBA>b^=nAo)#@wkYw>kcizaK?2^REaaUrGX zHrikxX09Mcxg*eLKc-7Uo;VIJ!79IfT zp4?RFlwicxOB{Q)yl3jI+I}oSl+L9gxg;AAL3#Yv`@ZzG<-fu z>NVEhvrS06B&e9Z|Lr(;Mu}w{d-ZWEX?G2>5!jmEXeJhsttY~^zb=zx6rQP^5E0|3 zsrlCEQEUSEAv*03)dI;E5v=3X8m=w@ylA0#kY`svAI0 z5_R+;*K0-Lvm7L6`zRK55edOO-J0R8mv~poW8!o~188Yl4Rr`;TIc!2MlA2>nUl*V z!A}B7vsJ4(+NKL5j}z8Ps2~uDP#!av&sM$^LcHgceW1%55t8TGz&!)i$^0XK2hPa} z@N7NxgD&?sA~tW+*Ol2CdokORA(}G6eqT`*Hj@u>=>my4X1`D+&6MC4!OeGE9yo^< zb9ck8@192-B{J%|E{RuN5xAEj_jrL0UWoTG*9n4G5ZmA8 zkGE6qpJ(#D?dV>}n)sab45@ZG7L6;}oY>Gu+;h5b zo0Tay)-3=W@;$PN>6BiBZxIkV34Kmi<&g0?a&|i|SRFP^0*!|jx5QIkbu}3HbL3Ep z1Mt~xgpUO^CAoC$p^=4LV7x9SLPD#eE$C%p(}OVm0}MlJgxM(%IY}m$A93$mBDSF@ zY3FTsIFheg=4cG~XA%V86_0c)B{;C(7qSH$DR#L>Y;}z$5v$cZm5NuAqZsQi`|w@m z9n74s>FJ;_ak|(_9h_z_L`)khwUk!u$QPc$2|&Y5Pf%G{r~GlT>@g02HZ6}{{ZdI) zm6E>1m*;KoYhjDlkTZH{57v1R&|E3t!g<|BKp{UY-c)J(q#dmS_}6yplc7Cax%W2K zf00T=r8NSEqffICUR=>F43dkfz)YXiZ723u=oFSQ zyBC^G!u($5nHXK%Mk?1F3lPd$(9V78u$9+AJ%fo5?IR07NBxr=xCou<0q#SS}{3KjQ)r! z!ttv|mY@}&8A;bOdSOE&Um=mM(^3#(lxFj9@^?_aicNP^Qpbf*?@;kY91YDky1G*} zRi4mbc8Y4e(ge8^LUO?upOq6$_`ptf)yg(WV%sr(2JO?w&2KF01l~8hh0;7irkhJJ zK{k8iY)FkWSyewO(?-MGjd1TytJm|AH{4UReqUsK zdU#bBKJ`+VQw}_ut(F0wa3!T2Qi=_Z?9Qqy-Io`mUE4Ks@^BJRMT_5WW}!z>w|o>K zPmrc?e@HV16(t5R&-Al`I$yu82D&=BHJ6NEz{2~PwkS#GJ^GgJ!hM%_4Am+Sx+9`c zk>z932qvnK6on=v>eut`Mox`7^4I+Gug|^o;vRc+aCaoC-YZ{Ft`%r*2U+btSLoIA zew5RLd1}DdX?1T*vyabisgNB+?Mtub{MY8s>fNJl%H7+0hX1G8+WEyvT-@?kn9hd1 zdOxP1)oiIbpfO*oTg3QiA0S-;+z`C|aGOQD&e3=f1>nv#8n{SNwEHpMJ_)*54?>$Q zw-xd?!tw!frMa!#I^B2R9px*8I@~U%8$5th;a8-@#H}^WJMpXfeoo6$Pw(Z2$}g8p z*bIvOY~Cxf_v76}6QYvEiKg()ke3vy6$7XR(|nd{)BUUzszhAxSD3~eia{c|UB=5W z_&htZb85R%U*)tLl+|cnRj8x#L`>bWw01|Dz4t$E@=g7O!~L`UtYS7ASZ;nOh6Es9 z^T@91y$~sGP|wOABJK)disH*W%dp49LkR)jAs#C<&NrHqp_ID#Hp0~`>H+B}bk^JB z3)2Z<>N5hg^$7&c5^_ce?engK5$6hpfHD0Xq{-u4meH|sajd7MXR9<$+sf4^peMR7 zA+f9ba~Z&LK3(8t5N>jFc`;GvB*lO6xkcY-{;JlLyD!7#)mV!=9~iY< z*e9PUzDWD>c6^S&7y2#UX@SV;`X)V*{N*-Te`ZXf8|EBCsm1}y#9vqBgpT~SkPcI*g^ie&?=yzL9ZvxKyE&O)L5muom9O zqQU#%F3~FGx|!SrUX9Fd>x@5WeqK^z@_HZ_T67) zbH}QgUZ=f=1~(M=LJg|-n`P(NNRYQmrP4}qlUXz6CK{C%iF9hG3jWJR&DZg$>q_kj zVv;%{%$0AU>RHF-Nqim9#OsCq$^NG|%^T>NiN3aqA3SaaBCJEISK>(&&nrbC-5-}E z4}Bu`%9-&&GNYz9LfJ6y+O^%9@32c_ege;b$mqFq+@$wa0IyuH4+7N%`!nXI8Dav@ z8B)W?!AiIo(XZrN)#=_lBU3^(balz){0~%tn;t#KO?cO{e03#b9h%J;7s6}L*<2~| zr%TB;>8TYM9T&6jExPRpw@SpGeV0}z8uf{43?4~Gv*ijJUwyT~%~fjFr$5BZ1IZ)r z-KvQ_C`9c0t5~BQH`3kPpF(l5MuJcU#1}Osh;ecSNxU;f{NT^0wJ!bBgI26#_`stT z0`JXgpCha3viUA9_`EjbL$WlAmZLx`F4U7dQcl?J4g_kfzOTNE3 zjPP#1>=yfNbmI2L%TqFK2ct3##DuD_*3g$S%tu|9EaVWcCAeaWEHwxQ2I(#>xlqRS z8EUX1rNXzrhdq~y?V|*u60aqD3XN5nY2EBV2d>G{{QkzyVIDq8GRiVKv=Fw1>c63$ z?X~NXjH~}RpmciuA-i}wN4~UP`7r)oi0p(RrhC`@GtxJ1rbr0FRJyZ!ZWRo*qRo**UE*`CWshQH{7S2x8m}SuE!$+#G8r`eQ)&K^#_)P1_Xq_Y-dq_ zX2C}c2}q3X3r9zmt{#?t^^VFCQ9^lRlVII#uwPGxiVF(D=8C1Y>cFFU`B~1aWZhfq z$N&T3L$7ClE76h7^;W^c-*{hYXHiE6;pS;En=|RXD1w9|Acc(2c{z8Hx_gcjYOLV+ z$iYRk3+rv`b(PN?h322-y=$F_A_N!cI|?ZKSMyv*e+NhgZ#+D}JpqsP{FQ_4Of^!% zV>+Oe((j1S8EJSl(7RvKF6mz4Q8&O9SE&-dgihq{xBO9wE*cW+q32(ApcZzF9uk`Z z0zSj=vaz`e1M-imv~6G3UN6{gb`|*MF29$biPGSxpA%QV@7lUQ*JopQJ`L{E#Q=+) zBphxhE?w}R58}_8%3WfWm=n~CiMyH>7^(cfH4>)vvU=49tbAUC#r|HzUm} z2YfbTVlmx$+p80pqF;L_PUtNK^HDji0M4&R`)f`Uu$S<8xB8x<8mph&=A5x`aH?Kz zDqUfs#Xk=3s@2|>#bh|Wd+88QNNY(~g|wEY;C&`)6asj^?u{q2S1skK$yRboR_27K z)w2DTB6xoxquHYNxVR+iPmSYWTafj35LSDiN8JNvpNTuwcLPvGdW=`) zjOUbA7uJ2E)V%7#FS_iiXFTGb=x%IN1BtAC{QWl?+!#7r_F!-0s0yA>_7zIsVP6p> zL)0AwOZMj$^v6NrVqU)y&a~ZfF-`pC4;{j#pZ9QW|14+{(AYc}xYCPec0E}WRcbur zdtJ>8IlY7KRT3=#s-LbPX+!Qg?G}Z#vTL&x?^Qf9rQdEF+Y7mnu~c@Vt}fRbZtctJ z;W8>m`=`u2eMjBFcp)J4mVi+LviN#B@&J2OE|;LbSm^s{lkKdMb3ddHDav|85W%Qp z#5^O`-TXC5Fxj-O=iC4tQ;lys!@=X+WGVmi-TCnt%FwZr5aB>Ge{KzQa^pr&1Hh_>*A8^f1goVik8(x z&{_=NR{DB-Gy0xfY|#NCWfkRaSm?h{h=HWF`Qe~G6HTmyuP07|p4p~c*lxzfSfd;# zCL)X0)^kMyuw{JfO9Sc|$P_gt(UFmP;OcZd8o2===TDQxO3%G~wEzZ|Eln&k!2pb$ zZHy25sp9BVlCTLp*(?;oQ8=%*eT{L9sgTVy)|gECF1%lTTXSS&mGZ5>jIAdA4K);V2rU6tgZx)XpTqrS)lHKB(8w8$NdSi2AgL^ zDPScD2Heg4Onl|OuIuGKGJ_Rb0qPZn_pgg_Qrv|4q4$@3{S-^fWI0~~5T2K0t)+a6 zKXIp-un8_&gT#X%gNzR(U^L`2U##+&Wi0-<2SsjQbO53I}dx1(n z!Pp;XsG;*FiJ@1lM+8;ncZEi7*V$vL;fu#+E}Qdxc``ycd9j|HUOlR7Gx(W_;r78^ z2M*ts4?Ken9vkwT3c4okRS)=^ON?N6p0F%_@McMXQIk2shRKa`?a>`Z-R&(0yf2@~ zd|mu6MJhaSg@!Qsz4~uH_2}gpk!C;NTVd-8HJKn%2l`uX3;^mfUSoC(M85Z- z-zvEcTb&4B=NvvPin5T1k=NpkU)iqSsl%mhMHy3~0!*CT9PiDS@0CsGxYy_FWK49C zgEirvyXt(l{y1Ew`=K<~SMcU0Adq6iBvv=9{uB5)FZabja!e*8SAJ70$02c)oz0nQ zYiw0e!5SePV#nzfzv^Hmt1V9z6ryGqAX*MRN&p4guQsO~P628q7r9<$+^W$s(@~!M9vO; zhRbd*jd{3Iaf6kj1(-)KUb6egtGK@Ps)W90wx;y-Av^FdZZ@NXAL0>#2hmN-LF!Bj zl80Ec^hvxCzo9=lt0n-+-zwzGQJ8A*J-sa-bNI*mk{{0UB6x+PJk;-K*6;i3PmeLx z2SXd(e+t0Nng7UC5!@iIFdeY28dUvLFh!yz)sebO{L0n7fh0?V1d|Jyqkc_&g}@}9 z$}>);Pn)ey(xq(M(U+eXcbr+SZu&E=E>z4x^8Pj3JH2PrI;rS92I>Q^!nk@GoV*#a z?@J(I5Py(PkSF6I1O(4|i_P4sqoh31g!yX>Eq&_bn~8mCN7|jWgLr0{T-_z*#LpjT z=M#mtUlyjzs*|U2wdel~o@P%g@OAzF%f)iQzG^J^g6GfybONoCHD}>X{{`6nB(DIG zsKC&Pi|cH3nFN$kGTv0bqx?a#waPj7UdsL16jdp;4~)r$>@?;u{}|1KYSuOLT9tPV zu|vH>14Fk%w?n_fuu7S$qQwy0!tPC& zC{p-2an7Ctst4^;dP7H$`RM#O!2%);ZTGRwn6Im>0*N3Fd+lVb>y7B4GUrX?|kF)UR12U+h{PrNnp}`pO zVd%f$tk~k%^4RLw`q<`N8B_9M$3EJ$Ob<5~_&ahi=8TSVM0;oso)8I!WeRk> zyJbuqQvMbfmcFash|{5qsiE5v!=JZO+P(yZoMt~ItkAo!i8`0F^Zsl*X!x&kBZ99Dv)hi>i(R#0N(jNDt-+CYUb|za>FPLzgn+-Vr-K0aKmB|o*TLU$;E;k zRv*^}C`LBy%*CV4WxfUtwXST(O5*pEKpIDUF!f@y=IHT<{Ls^!j`-j;>T0J; z`&Dnd(W1FxG@|#+++Ua;JJc@nKS{G=Kig|UM5U|v)+)8{&6IZ@{2(b4Ui=5c@DdcE zM;T_AMW-cb`>**$$CE#wD^jhV#Ml*m?ng~aMfn4d#<~()vTrd}EmBGMj0{N~^wMm) zL5AIIpE)`@%k|&h4Q^g|4Tt%k?YT)%t$N)*Zv3iDqYnDaU$AxHz0lwiA4GYT%Dtn! zIr5Irar3^b5O&}OV;lo4p~PmwVtj4>x_i3}w3?j?N*A%p4?FHr2kvn0$byB#m-*kP zH>#c1bBX8)ji?hei;}#1U^C!N+n_Jws)TcZW-u>k@Tf>ad`*Vq7T)UvylX|J+uWIw z{mXQy|9I5F(Z_G|KWpnT#z8S(ZUH9pBB$MLqtUdG9;bZqAX>u4LAM769LdS>d#4m> zV-x-?fQj`w70MKiK9eiTi=B{m1P=yL;brW*f@~G_oD#Io1G8B zwXlYQP}{Un&q;3>hS4?N9Q0f?UMReU|AHS43M7&DDw?lnwU$8J@FjGBR2Ut6VS}>G zS;WtN0HyQ+F5Y1r>eV&Uw0xBB@Q7n`nRBibL`k0*KBK#<0Xs5JS z7(ZJvVEn+na6*`R_TO6Y{V2pX&5D9OxsYArK$2nDs|Oh;A>T$tOqA^>BzM)RQV(V2 zAz)Z)(f{_M7(1yhiX1d{WyA2b+pHJI4aw5G)P&T{|qW?6!N z6!m1?O{M3R;LS(ny6%eUBwNB6UnECe=|NoGgNEm+V0TQD^;RP41gIN+VPiuAx5%0r1a&VKqT5!6_& zRthYgIS%}vb-$sP-~iI`*~^KXUVa&1DAQlu8WTGcBhoq`P>|{G==Xl#W#zr-KR+W% zXI4#+go*Bu?`0umJXWo);8>*?Qr`bh$R1^ca<){9DpM`|5S?mx0(0r)%RndB>Gr~+ zr4~_DITSOUQ#|u8k@7?I7rv!)&ZAS0`TI%22&J^aeJx=W!8anM2g7z2I?W%%#RIW2 z>kC3+FdHICB@%?PQTW{I2^j|rhyhZCY)`;ot5JgFYdqc-#gll%qgj8Tz(YA`D$E71t{ znVePPmNSm|SGxl**@Z7#PXV@eKHydc=@4HQLV5K>Y2PL%yaQD-vK)&4v|xe2>7b{^ zcPyM@R@_<=C8iN>E874&?m&7v9iN;(DB(?(;B$}fhY9vYkD!d{U88OARrq+5Fw9Pj1ET8(mkLmhlq@KfVW5R*#a?c6Wk{AN z)o#mj((c|rpppO42$h$=^akWP&YkFqNx>2vq<+BwzDkuD5*n6zxmvnBN zxlB2T&F`4@=O)*NMb@$kEF31iuqU>rV_=R}Su@llH_jD>K_pCk@uCE0dU35|WNHem zw(%hnNL^^~N#8W1fZ$>NXe!49S4Uz@4iDtBx2sKN?n+SQ`ts2h5j$N%@;`|&7`qSF zMaLGJ2slPk4RDULP-1nEgXYQsi68C`g`(sQ6}`^(zu?nhhPp&S)WfmO+|eHX># zXU=THUIlWIL*0HLO-cP?KSzeGqTUke2~F`Y4Bwmwqx<$muHWwZ_ny_y44jugZ}2&9 z#XqkWMIxfkATlLhZdg`#N{GUf04lW-nOZiXo}P}Y$65-0nsC`~e@)|au^GXzKRK>y zkNy*!?emsO>~X?crc#U|@O)BFsLaYtv){b#PA~rTt75rYB@Hicn)}Ux$cEQ_ro|s* zuGLKspeUhtv?#n*iyc|#^I3aPF9-xWe7(1i{c56AMnFXzBltuEC|8RzGBTp!=d z2M-m(1tFv7CF{ADDz=Kc9>Kl2?3`1yL*!IIvvEiqlmg@g`kCun~WuunyTmx#oV2)z*RgKNO3oO z`&{^0psPf(#1?!xb;x?8J^*JDg^LWx7-X6EOD-(w!oRBr_V8##OLpy+n^Via08Dwa z5f&&s9lt#4_wJz|H`-pLDQaEjU8w7EeLR8aYZzAco+9uuo#X>=@n6T7lDW-&Lu~sQ zY3#bgW_Dowjv_tC;RE&PzzgP{6F#-HSq6al9d=?uI1@qFcM zgSpDl3V6Kj=4Wc^ne*=Vs)JR>$wF4+K^nf7!5ETiD#YQN7@;@C$@}IFZx!kH!eqUp zm0bx}DFp5qBEF;=0w~X1v)<@0tgZgN5pBL@sy$kyQ{uR~1xGIHZu^f%bG62jz2BXO zW65r|BbcWj_p=r7@bJjg;U)>=WZKgM8pP3og5ih@h9<$j%oH&hxUTm&poUCOdufZf z>5q<|{EDC`#}9YHncX(UBgnX35Rio7`Lvuie?d|*zdfIKf1!n{gK{|jibqmm8X#`* zZpvo2DIBEfMkG{$siv;))#yI>iCS2|<4*N<&~X|R;V-%&9SSx;(GNsIqSNHEKZkZ! z^$|u#ObBb(5DPCYn?Sw45fhN%;tRiV{%CjBtam;B#FFrPg$HdYYDp0nXl>YoQWe4I z!@A`jYkQi)%eqCw8vU)lagm<++s6)<2vV8DGx!jy#yrzc9ipYR;Jk3QQaf3AaZN@> zCSR}9eM;bdvq5MIY)DecIt~biqR_kj%WJP!W6X)~89a`&^6JetYX{ep_D0zU8$R!a z={9Xrg*_J5?(S{X+X{k@pPi3p!_v5Higg3@Vn1o7O?Y_+6z2a}BH%bieDB#Y5e)~vBA-FAN_ z1L2;pUIMg88&kowDs~#d2O%WhIq$FMJ}_4LUblcoma8SxY;-C*Gu#r#{Rz5wx9h!9 zfSboU^oNf*;t@Dg;n?&C;Dbc<;`Mym%z}uPg$wld+XblW0!q#-=PQ&ZQC+n>BM>$N zPz6g|ktZQjyq*q=>GWsuB49QL);m3B;7B}$g}|fd{JPOy!1x9WND=Wpg5?f_9;ifh zut6YTN?s7d9hOI;`)2H$nvRPFa(Rs1@XGx7_A{e0(PU&gDQ0n~87G$D4Tcid3xir* zk#G3>O)2=Whn#gi+ek}G=jDopl(8DjmMAs2Fbk?X@$&+t##Z93oPAftd?=pi){kKQSn6H)RgAwz03M7R$k z;74DD?5cz<=i_5_bG6lOvLfTJ2KyF89UVe} z>)&kO1uSi`dL0L*oKC*WX7Oth)xu3^oILDj`;;hT3*Nk3PtMdBKC}obsi`F`A9nAh zSNmaVlnQ$dMfBcIiL_9{sD8FN- z;Q@TdMK98K>S6f~PRJmbE1c@URfjP~y1sgiqFm<5EP-^pGMKuZ{1`!HW0)Q5noAf= zcZQ!LOCsHJva+g=B1(hhXrqShxe8B%aB5`=O_x3XP{w7iO9rfHhTud+c#WY~luWO_ z7XC^*hS}Y-_5lPh2L_!Uj?sOFU10yZWdhX4F)>BRB|K!TWJ-T1tB^3bRNedW#CrKns`Nz+MMU8n#m4o>bj_fFCVqUb$hQqy`H7IOaKJzjk%9VOc?dXMo2b3C zP$z`i^zXkgrH?aR)=RV4s5ZXEWLlmSP$0u2vf0698;HPRIy9gnvB5VFZ@ciLCiuY; zH7p@6?6Han+tHG8iguxqU^ze$)v`djtU2Q@Aq*LzBNdZhXhA+9KT+avGPXkb3=0jr zWvpw|8U?w}<+WUX2fKI+$eURe1#$Evc6c1T#6BS3Ur!cZoaJ8jtG-sgW^o9$pOF~^ zcVvYA0P6@9L7GkGgI+a+TLK|0s)9%6YwvsSA~wuuX)+q)50lV`W|Yi?cqPd*P%y|> z;}2RyIN#om=znM>0wc_62U(41svksI1-AnqEYXGoUCiyj;t>WV$LHqMagA>hY;R>Q zLR(c2!EgbCYqcZ!)9dv)sCNABe=JEW#^x_!3nyHX#h8APWEGV*m@hh#TcwU*HBWVgwPpq2q=8?)#9YrmoQF851u@A+GAIygimfaDWE4sPQo@}H z2U5&mgecPo7VDfqZD$%k1XSoMFP;XvNZWs%Q2u&qSOaG%1Z1y3z~6=xvfK(cJ`CltcTV1%yFs3=NLPejQ-=qPU@DVY~aU;3Xf`cM<^!GT%M z(9jS@#m&|B-SnJ)GoPl5`;9YK2D8uw{Ser*?qM6Mi$&|-*{6?gWx(vKjE!^`PUO+2 zCkzXtR3MeC7r*-MwO@DXpd;gutnwBKHSmfCr_|& zs-$lwUT!}M2Jfj`)~e_AxWAQHQpgz{f-b~1@$*i+0u0_)o}Ju%H|=riRpZ~dKYvws zOri>Y)vyw)My{TS4bG5=E?#*NS@ARy+}7-+-jiLHYa3_IF-Wk2!KmBZQ+txFB3Kw1 zdDRdhtwLubDGz+yr^gnkQH{^@n!q#pd2gRzXgL)+l#Nedb2>-U^RpdC0TLeeQE6OA zGAu?hvxq5ae`Bt(4~+%~rpC^YQM_lMPNGnjlZFY6{^IRfDNb#EWD@6xV03@dB7Jp- za?RpZG==`MNjbGADRE?;*ObfBm-)g?sxxK>6z)xk%S!03ykmv+9-X*JZxYw$6kakc z#R^1kgvo19TR~iL3`=5E+eZus=&2`E;0+^J`tJ;$886`x)aTcWz7`s}2IOAp#VR6O zo=}zPZ~I#@F0zkb*wrMrF$#%*i#?q_;GlI~BED!b?7SbKG#f;MsD9_l9fXkdqoV3T zGV;?_6FP_)K9@T$up7 zDHOS(^B82Go}T>?pxLyS(Pi*uUoX6Ns^-HMNW-f2<~qTc3!r;%E4?8H5EyKRhp)*b zaqHxEPQGEi=Ijj)SRcqJ&*-$$#vAM*#umTcYqdbf=kk;K`ZLCe`4 z82wZ9`PHq@g(yD|do|-EBcuUFP0HDbgfH8xyn)-?_akwi&DNruqe}D6)g*EfQpmj@ zpQVCZs!3oQF5PK<;C6X5QLwND+OImq+=iO9kL3*d+&f;{7CSvzBXA}OjK3;AmJfs$ z{jU~)NfNcF_4`gm&3I{0LU5hUWG?g6T4U37AF(jF(zYN3a}=BYjb@}H+4-b)s4a%( zk|n4vfIQR)TaStV_qWGL3smOnHrSWLP+_n`HQXnz6lv z)c+Co)^SxeUD)sqf|PV4a0rnGMWstpK)Umg(%o?25F!H7C5@y=cb9Z4Dcv9qhla!9 z-JtjLJ~M0H$Q~VfyonY(vQksqU4u zHxz}NOUN2pi$~(ivW$DjrR1T42OQ4f<^M3MD84>6yqvagy?Gz>u-ng`e5Ij#1=S#@ zQZnt_UfYz0Qii!JV5H7?hxb6SWOm9rRAti+%~~Hs^%` z^uwYhAxO_|e4Xeqf^{{G7$9{ir}^O)=Ta2B<}b+k85hcho*QY~qovo5y`Fw9 zm3?&9)O%XVkYKw|MQVNYC77DUu>bmdI%)k|3#xQd5k(7x#0}z4|p5+lKPVByF_1e@y-rjs=Od080nzl zSo3Um(i(41=Iy4(Jyrjwya}o~qc$XnY*@0oP3IFJ#B%oGS#%1m)vS z&16_heP>c6vR?zwZkrp3YTUYZ|={x_z+GP)X=_Btb!9Rg=HEiInVwm!M6e zqLf+wIS@&LVk)?N(AAI8XN}${hnyVQj2H>*>rncha1nE0#+bQ&n0L z;)Gsf^@HZh1+zOzt87?~$Eb^vY!2kjgbv@a`Q>jiL@-PVj$ZhKpBQWY69(v6Jx+=E zMTgrHVG?t6GGsa?quy2rw$FqFX^M55-v_RZOvQ4pQ|ggm#|$fUtg<_Byi%y)=ZItJ zCO5`st(_*(4*Vv48;S3iS*ppgik3Kh9DmH5#L>YCh?x30NU90 z=O^<5e~ERkrsK68@1{4avJ5-ZLL~4{GV24Na zL$CfTdmA2;5c*+>U}-^II*G0vHj;RzAWf8FJG(`&r#p8P)z^a%9xY3aB3KS9_EV8{ zg-o+^81Fo?@`__26~I@aF2LQ)#ft3V%DP;B{MKWy_qr8@ri_hAcNrcj-8tQlXUk)Y+xn6#xJ8nml_~Pqo*&PDw>q z;RaBOG2Wkc5danK49uHq8e`g3;J@-^2o;UJYDbIPa@OemZnh&^#ja6*-)>;WuCY2L zATFIqp_X|KuTttZMhvb}*t9F#F855)C8XO_0MRp_q0@k)E8|YrIS{5XKnF^UdBpP~ ztFvmxrMX+J%^daTLwWxrT%TlHVrj4Lnw?X9;-lJr;%~BrUILB%j5lWi&On)D$Uzcq zq2$QQc!%q(Fp~Pm47$--^6l&d5Di``{9o>gBr1hc#Fsw%tz7VR=68MaJz=J{TquwH zRYLwr&|PDJ5%YyLvBy(|jQ)<0Ka1avx9$TEyz3nUix?)%G4X0w0SzO<_J?Uv46r*9 zUxubiF63dNsXC|z6&I^Fd%or=Q73DUBq0heG3BAiTP_0S0@^t8!5r-9pt6`<}VhXaXq@r zRrXw|?YSRZB7#oQXPpF)F`TYDjM5gx~PXVtaQl@SX<_KmO;KGwCHgN=Rx z@X+)BJM+PG!Y7F2Fad|PTH(x@lU8sgtcLZtqT~&rAnSLh!T!v7Z zl^E)B0bL@;tFBfByxHQdltRcf^!I@&OOR$(5T_ic`w zXWNv_Mdrn$+})4;&oYmv);WMlb@b8S0>a(z}gq&~c!3HRT# zerfa-^M9-2(F$8O#k70wXQLh`i&FIS|FSB6v`YPW&roaQ^q*Om&_+?CYq9=2L;u=P z{}b^xqPdgCr;~JmX=B<$*8;!{|83-CQIJ#}Y@Mt+M^`tHhv3W&a6KGgd6B_tudfL{vb+0d)ObW$`F@#Ti;oY!}F55Rjw@d z-;9~)5bjlCjNH~l#VIZPxgQ>I4%(yG^_M99j{w`1kR)eG^&DuDr)Y; zNfOPJ%YTP)O8*mr@x1cw-!93e_6L;b$#}5DF+=Ad7di3~-kVAnRfnz3oJY}4v_P> z?n%5ia==K-4-6QGbD_jU`Z>N;L}_@h?by+=HEm@b|7RRTW$5wjim`DMU9O9K#WQp6 zlrtx))(g-%nlwD7@){S6%9Rhnaz#cO<$rVN{K=8;x?1e`K@{_DDGOSnu)Y_X;B}^4 z3GtLWA+u_eXA$?`hPX(y)y`bm@z?yEF-0fh0UA!iv!ao4U!D=VOJDWMm7P`v>KGN0 z^`D0f@BQqnPv;4&KB!xx=MV!K@+Q{Hx}Q+_7fr+*kH`^xc5F7iafvX!@u&gpMS{uF z+Pl(xv&P-@=Sj=me*JgmLQSoNL_#BRrb&CJ^Lmr8FZH=>Q-&SkL62_uqrfW42WVzR z@&j(bRq0$E;Yjfr3K8D~iQ@~qo-+a~BPEkA6+?iiVV7wi;?${t!8zA*Cop%fW$I1i zO(_seA9Z>}>9$sHInB{%{`TFohfx{5%4^Jk8P}l;Xs%4U1)4}^XaeR!uL~{>Qk1ET$d?P$ z2$r3o2=6?$sY@K3DAT#y4pq_rvE9*yS+I$|>CuJqH+xgTrI=WYQSXyEg<7(| zVe+%qv#jBC0f+GWsoTFCHortje^z0f$6byvH{3CfmR6* zh0WfJt_|*gxAc7rl2La^NgHqV2_v)J(Wz}#bfs@KWer~0D|`57r(ujD8z+H29Mf8%Mh zzS|7|(;ApvQI);3MCF|LHGGc~e!-RLdUbPw1f_I5Lxd>Mh!IH@hY7LSv4Ink>_L|5 zh>16&@peaCX3zg@RWsk9qad$LqveL8d?Dt(+w;*-BP$GlYYI;+Rm-)8>b1(MXBU_M zu^=2Msj2FKz#62o?ZvaMN~ar)3myeJ7Sh%oEG)!Jhb$^3T_HC;Q+qSDUPzb;{Y8@& ze}?dXR*OgeE3Y%#Fgf~dtL0NN2#-F<;QJy)V|}yzc0&mdZR-wAutO<24ZZtP++b3p zQUW`;;LC~-a)Vbwe0~lT6Ok@nvbRPNWvTK<* z5S#&NRG$f%FG+|LxdyfwYbVQ$i!uWWj89^!MLoA)3P|L^xhh4?VxsGFw%KiFf$%3= zq|mG9iWokp;gM!%;A5W*pmARh++Vqb7u z&AZLKl{g5lH`#(4JkU>14##ovbRf@{xbh^6eQ-b~qZWd18|r$%rd8Kjolzr|h5eY@ zHh$r9>DX-WLQ}3t5ipf_p=}81vtL%9xDq%U&+HP}7lhgb#Sm{ykAnjp3S);rbU-gL zZ8w`84`|dX=tvR0X~%96(0w^z%ZsVy6bR{1WwSw#vqgE@bQqxr*|LR=f1WiR2(>qz zPQ}fg3d-+KBE^Y1fF?g>r1p+tySSl}u01(m5U8@)g?B1Tg_q)FLfG{TbF|Xi(~8FK0TznL zA2Fr(dyT(hKL7OtuSV{!Ml>6Q!?XA;ppSxDn_xFWlrHS@AGS?*Mn1|rS8O8W3q$G zWxc?T`Q!GkIb|z+XCYG`-DL45OL{WWnW*?PXe-~1xau0=3M>=!+$IP}V}wQz6y;l- zF7Ug266D&D9E!g6<0Sb`{p~_oq=3n$ao0Pc(xGEfb7?x@alu00;#=aP>A0X4xL0VZ zuYpo~He2YEo2;7-=~ht^>7Uz{`6#UL^*V!?_69EC`2nWu(DEG~MZ9 zHIBT*cU!P!CmwdWmZ66j8*U!1@A)c;NJhf#9S58DO+467#1%$Nc1c^|%3AOuIGp#l zHVaw>7ON987!&m%4T5LAqFas#nbC5eLP5Fm0lZB$;0nvc)szFM>wY5mmd!Uc3|-(S zM>p|mn6JV7t#7+Zzian_`K|3$hS>Eqa;?RY)7E5j>thq?^l?SsWC9xjj;WpW;B=yr zM)0Kj>byWw68Xxaz#J&f`j_E#6YqDWG+XO>?|jE>uhIM-%MvK)+pCl+rL3V(FS-Y! zuBj7WaMS^x?*g+eo4lfUc(}b_9@TV=Fl~w;GjPQH9LJIy`iLk+q*F+fXbS#C zTs73mej1J(u%$>I!d*Tc!m~iP!T_KqrChD$POai||7n$tVNPU>YAxR|+7gP>u%2U; z4Ufb0rYu2So>rWX;U%%oef=WD>wAPO#fGKkktW!S`r5K`l6gLl=-_uLd7Bs1wF9?% zDi?}?1%_G^u-ke6ke@n@T8#)rQ8bu@ zP%o6fN2H%eeuC_powqG~(JzE}Dyi&)gkPtVz0%sR-;=GsZf9I;1m)KY3K~}Woc=_+ zlG~ekwbqbA?Q|3v{3&5EZzLfxqh(eIe%`d}rhPG@l24|37P@l~Z(Yl~3Lf%oOL1JsSxvGcgR(sdfv zQ^kqE)aYII0^BW^+iypkS3R=)oDUr2$w}D;CAL?mg%FHG$uEB#`GmZMT)*%7B*DT2TSXO|ZB9D+}esSV-ced!`(2e`tpqB&1F5%L$iPm>P6H+mgFw$}{m;Gs`@ zY!PG!-7Q0)z7!G4OfQxQ?|}_c)CQEuPZB%BQJ0bzVU$@m5YT&z(-I3*ca9|0T&4+wZ$|>m z;i|Gzt03E7fwExcE&(OuzSutIPb}6K3zmybxe>kOn+9yHir13q-l37MjbFK(#gubi zyt1|lq;GQ9_PqA#%+FR>u4v4u%csD`bve+k`Rxz@DM*l|eyU~kh@IDcfi7RE6x^x| zT&@($`7}H>FMMay%+qSH*a`j<4PB!Zz!^H93sD*?e zBd%PB-RThr8O|iVVdJ za>K|WSNP`j<91~V8PB%6aL|*RqP3bkW(Utj-cKI&AC6#wxV<2gwhdsv) zbkHkGfHe!a$MG0=w6O!1xL%&)gc+S1aH8!trU<&6SF5;R+xD?kU8c{LDy`QBWi}Zb zEmm2E?bhphCoY(cL=1n|Y4&<$IhB{Is~<6GuPLgCySV}`eCLXN<9?!E$UbWu;&?a5F=(zvKd#lMahIT)_n_3^=Q-Qf)XpP5ht~P^gvZ|X z#iPOjPOWOsA5=oS(C@D6uRre~fbCX2Bz0jl2$_WY@H%eL zd-G%LRfBi%`J#Qpo-#StZSWj_x(OT6^Ipru4yoKIoeYh9f#q%ht#zglUBku^5@1m zX#79lI?vWfKB%wnakT2{i<3{OG?mk+vC+*=ep^=@Y@^DMynhC+Mmo|_9f3Z7?jwk# zpU20c%&!|lf04r!(A_4dS@|mF#xC;fj;?#kw_8*Owmkc9v--^N%<@RW7&@cPX z7$OQ!Bre{#@-0jfLX?^~)o+Oy*vbjV=QTsJYtxy;^_l$8EbGz;;CVP+8@9Jc zKOj%{*!9O@F0nY|ajhA~cC_PkSIrvEg7OB$-qx%Kn+VW0rkX)NNph)>M>CNj6o^)f?p z;E6_a92P?j!?jVIOWYfg!AUiu3X9XS`6TukyxHBdwR!KJ_+i*UhtXo;2P<8n*GXt- zi?TBnp7i<8lt)6Lnld~3ijKO;_Vp%2%8txB-tYingzqnRR?U(pks@AUMLLHCO~?Hm zP6aHfIpyq+2O?W9$~})svZ!olnx^aHS=3!hpxZ^EJpC7qpto+yOq;`m%P1wlr98V& z#%?daQ3jx4o_E%YxP}oNiYvp9OHoY;h0D6DhrU)P>e6U6?1D*0wB{Ux^M*lQvK!6E zyC&|T(8VL_{cuhCJaKD6qvb5u!gnhsmDe1$$7Z~OrbSX6G+_U^wr=^?gU*3sR2M=c zV?j|HcIBaE$`o`;Z{-`7zhSKfk7Nu>Q_bwr^fjTWyn?DN<|TikJdaWPRa5D*Jvzi5 z6ZCy!pj69gZUdKkTEYgw+pc{(J!~+3*#Qa!Cc!j(C0n1%P7JdoF}~WmX(P>rrXjL- z6bg7PUf4xH@S0x@RV#{OM%!1`$AX01tpx$yXxxZK~T(1lHmH_MDJ$2(k`bGX_11OyGX8hTrx9Nc9jCg zIu1i!#FZz3#Pc(e9vR_&rKyEvoQ%zJAu)^~7{9AElO%t#^%KiA&C)iIfxBg9;3$qP zQ{mLtFqOVA-NPH54m4K$1?71i*|_X*#)1s5i9Uhf9Y_=bj2xbdQ_p&Wyk@j9a>;$8 zFQ&#_`hOUl_KOOo<#!TNTE0?P{?`|9v5@0CD!$YFdUZ(#w6A(=Cu?_W@UBZ%rP<3> zoq}tvb8I+aeITJXMiMKR?PhzbaCPB)bvenmd}jWe8KJ+&o2MMgGhZZ4G;tXCbdoNy zNC$qjC-#NbgZk?5AE2^Q=+gCp3q~g&lm3eFGAKOh%`jhn;WacP=ic@- zPdjiQ(hop&?}+IOQ14Ku+$1U%@_jynwQ>;blV}TQklalbRTzvs-)T0KMP39;H>dA< zYy2Uu7(&fbme?5yqec>a`EPDLGqhM!CM!J8S&#`a9gs*Gjg^Fej5G1ndT$32kpOL|XW3HTAZ z{p5I|;*x=7uq(!|g19z|a>SZ~bL8B0-=GN!)y!H-;_o+$&o>$F`JS9O5nyfN6?`(6=*MINP zVW*%3FI0h{PYVX4Ha&+Mc&?5#=dK}5h3x{Kn)4p5kQ3&|L@~1yQwwm3hI6AAe!Qqz z@;e&@cCRsuqgyexs;!C*r%kbT2}hc?gel`=3DJi(;MWJ0g{r=uw~2d=kr6(9UCw|l z2U)Gq3#^4n+JG52HD;@=ryBbZ3&9yNTBqE0T2tB&M6O}d%QyYet951p)rLY4SW@l# zTk40$W8<>&pzMjju1cK=mi-&`qHO5MwY1yfEv5G1hzj3Yn-JB}FefIBM$hwAu@C+Nw#PL~6{j9NduRuQxMYIg%TI`PNPmne2t1rBD%&0t z2@mxu%T05HZKk3IxPEp=ixf5nCEk_=@(+B#e!Tv{A9sMyyo`fsMOQ4tYoGngBD{IR z04=R_iru2!6_q|!u+-y#Do*1{1Xv&6?+m(z|6n5}#tPIBc&W_VY5rZH8~!Se70-#{ z=AzIp`eW&;3TSptJZ4B22j8&$V^_YCn4PD=(ZYlBHIJ$RF6%SB*Xd=4SHJ-swtpSV zAi!u|xCYv5RMk@3en>nc;02 zLz&Hbn4j)!Yrs8%#bOR@4uC7&w>E2bVHt&58A>ODBe7bi5#BIY&vk6O48erq>@KyV z>(bjPJ-3pVmU>{fxpmX?YJIj?h!4L;^WN+}jWp$`q!P^Sw@X%)T^BpuzWXrw+!-Ca zKswVk<)pq{LrZWTPprjhofzP*c~OL4>?Yh-y;+~Q5Q{WruvSyH;J&FodcOT-zgZz} zcBfPj>=B%F?(?{oPwLnHkefqI(0S8|+p@SEQ-q%PmFxKO^_vXV1D7!BqnTCD*KqOV zjixCB-*j)DwSlXO0@lB979$Xll07 z^L!(g#xl@n{R)oB>Eb>3kkg@sw_dr~aSvH1H0cb|AZWG>$f%U0VGMTc3d+4aW-rm@ z78!Zwn5J%|=gnj(J5JvlH-$B5Sx@&D8@Qv$iw~z9NPx=r)3eL9(mjJew^W2{Wuve^ z!3Qo~&0*{h;FM)`zoMof_060M?y)64t9?3EM5X3Z+piQ#?;*P3pI%(r>U$jEZ4N8M z1-akq@^77HN)8z7vsJdsdD9Py-4uyx^R#S|e?Fpm2Ed+v|PFq|33w zeR@?JLNizI1s85+l;g^8*otC;g6y}~^hvqv3qG!&58FX-Vir?O&obm#+?5APa<^Ba z9vS%SA?%h-VULRq`kO9S0>9r@t>7}mNyb4$a2O5)Ln3>@tqO7EB28?I5cvD+EcUIR z+qvO}7b8XIcMk3KYV(8GX;pCkN7Y@A(s19{Zqg4jAJy+TB;N_e=O-J11AY7=6e4-H z7to8pHGM<0?8T9KSpvE4iw(#EkjFnXYdlN!PwEAqomgF7*YcQk-n7r|gvqSVO*$eb zKOFtD(d121EOf$7dHbX0dC5cE-+SFyt|h6rA&1~Hh{WJXkE?;8cg&q1y}8AT4}Pwj zzjVE&Cvr$TSe-9igQJSm8cDt-{OW6K8vNoCrRU(l(!OEmJ>5ZmrFwFOR>w`9T|O1F zA#7l3MBovbbB;2uZ5t@$H@%j>Ym>Kmr2cIE(KhrD%NOuByE(FP;6+H@mgupMOD}S> zU(4<{-^eCX4X9mcg$K1q2k)~#PE;VNF?G&(VRyllVv?61xj@0ZRO4}W{r&q#scGd1 zd{GXE*7sv_im3T6-KJURqB4V^PwpGsuY4A2(NMi;BqeW4VQg&&?CMMI$a za}Ty@8Io??b1|-sNYe$~F5y;*B$N6@8(*UwxHS2#XZAH0vcKm~)mu3}uM8a6m5)yv zYGUMD*Va=E&#ey7?Eqd+qJ>cZ!xs-^$qwePCfiG2Q3v7W$~IR>De|yf_Nbk*DSt=q zTSN+1y*13GWVrR)RIf*>p{VTg`0^p2c|s|x1U#`$LkTe^DR}dKlrFb2-c4miw7vs7 z*Z!sG?A2ERwLai$^Ub5>I6hh&EsbxAsBQOZ_UeDbcdE;l|}gzaf&)DJ?3)3@gGhKx3w z%=X{6b?X3Y51%@fsWczvMiTEBZP~Jys2emFpliabz=()W^I>MG7W!eZYHasu>cp#;3&&Yggvq!fm=U)|G~b0Cg&uPsPW^;y;V<;G zd&xZ_fz4FcHKT{vJchL}d^X`@=sG5_$Gz+b;h*-UqCN;#{-*uU3the2K2H)edFWV#eEK2!5~}clhe0P9)atXh%>{3$eA01j zx;z>`x}Nm~wiWq$y>oJ8Lbiz)n1uCvI4MWvTFc9q4yKIYIxE4dV2t zr9YpcX($w>FqPY=KOJU_6C;{>O5sme6mht~!66HpG|~G$?tlF`T`3);PKrvwTILnx zm+5C51Es=ic14sh8w-lUhqaa{p~9|5`0PaX&$M!mY#N2d^si*V z<2yL5*c^Cnuv*#5J#vPL1Nu--ZIdJ1&4E{bz!X&3!9^j`60xt7quM3%Fgk@nO@9d$ z34Me=cTToZZT_(PldrBg`mNM`z?Fy;nW^HSnOl$lVET35Eh6zS{#Y6JhSPH5_?5G8 zj%uz`ZqCumKkt$uZ$ zq?DTnU&Duy5aWbG!E)M}=A56btUi}M9??h)#FbOm#GQ4IR7-UExL2=e{kgSjE7Q(4 zw=iC*;b{TZVz9(Gqf^DlS&2wuwUZta(@}eM5i}~gw|w74wlK^ZavLDkb(IY{*-v0k z-h;Zdv*+6z+nMG*Gn|iqzN*hrXn7EHfFHh`28Ml|qGB3n>SkIVUxzjjVZGFi)wL|q zk!6}?8n24w&h)Ty>0Ff$XNbIwcs5&$aSbXQH4W(o4RCv2$I5KcFqD;K+41CB$y&!* zFoyDZ`mEp3hBLym8Ynl<$*)R2d2PFD!$*)Wk7oOvy7`21i~+yF14?=z8qMU3@=K`A z?E>IjYFrG^Q_IFogi=ftVt_FsYsP4_Psu=kQ3n9%X(Mr5FE&#unauune(YoY`7CNP z$LBTAbX7NHk1tm01S$%~FkB~vi@{s=vSMx(q&TS>%IkwpseaYZyhZ5{iT>KR-Qbvk z^M8E-tK}Lm4rz-#F72lvlr0maB2@s97QSucT2@USbwr%p?rCc0@qLCQ2OcpIv2`TkY zr+-3)lh`tlxIw+^9uf-h?^iSmBH)b`wrP8WtWgi^i!gH0kP!VJS}T@H!$T`4Qn+p1 zp!8~~u6fq_2q-wu`npK}UU#1a_zf|JWX8=1a%bd_ERJ>y8KJuG>x$p1AE(?ZSgBkW zKVF?qR!l zH{;sE_0^G<$XO8tMSFZs_%ZNj)wsMXQ1m+{ z$57HAWPFo|>@#K*k@d&$eIFHG3c`D>%A$9;bUp9&2KnwGMxUBbxbGhX>(p4VU4y2@ zJu;U|c`mLgKi-b+0nyC;0~d6#)6o_|ul~1X&W|J`=}({tF6C`IuOvV15BUV8)R!abF zVutQ-5516Ya6RCx#i62?pQU2;rgSKQ+{|;91W!-%IbF}M>IrJdYYg%H#=Pzh2V7BX zlRt6NMxySp6DAoRL-80QM#Kz@b@)63c8yia$|Ue>bVTHNs$EYn5NpFjv;?<~_q{it@1Pj<3sJr+IZiD*LCJqi*JMMVgFq5jHFO)={D zOXp(BG{pEgL_}&(UIYd6|AMa1fB7%ydOr?wp=N0;sevch7h#qsi0GwR7I|ITdTBJ( z-jnX7zwdj9QGfJpAGQ)JzTPz@93^nz{niPY4>lh;rZ@9Iyfi@^XHM12z^&0nt!s+- z5#J5BPYssqbY-@)vy;<%_xy{PJ#lw^R$HA~hog|xU)zsvgbqODm%j!SruTkC;C>Ec z(``}Vw(Y;|3m8)remhe;|9V*PhH2WnRDsLB-8kt*qJ0^vbxP@S{V^%(15{q=jvxdG zh=%T|I(zQq2Aj?|gmaxBjv|h=t!x)c6l}wxN;CW8Vpn>666|A0i6mPRPz8Z=FWc+r z0LjOa@H824LlM@k_S4$=R~mN^2=jEeoJ=*5i{8or2FPGpis^T(gZ|Fiz(ph8{|@?U z*!Q$4$h>a*3}8I5{U?~yIU$14t|Kic=@%z+a`5LtfO2zZT$psuorvGzjr9Kl{;zn) z?Nd;NO10hcdwsXH1H#pTjJe6SzQ>;7Nb50btJB2Lf6E@3;&5qF_~EK+vo(65`G zB|pYST2~jkikohPhnsi&m)`FYtfpGM6#UWLI-Avj5pheVzQN^u%d%JL-cxrLTwyW) zEy+eRKS7nL{9$k&aTf?XF;e7qK|4Np-n*Hy?7;3jcy4^&S66dl3CU;eZ%}Tx;aFi9 zMp+Wo_^Cc0083~@2v|Yu_d}1@zdI|&0q1oQ9){oJg}TxUzL&JMb#36^Ww{3z*|<>~ zLlm;ddDwH;Ly#XkBYaqETjjMfQ>r+LM(27UHXCzsnp1QwY%?-{wINbrQ$@k!yt(Ti zs-7;lhxz(3cimg(Ztk*S1PvKU)W82kyt6m8pk1zQ8t_e+=JBWD>+inqS~K@7S- z?xDQF|MbVS64$*8-}aoFM{D4+Py)Tq6mL(YlVxf5MI#@AEyK)mF^maQaqwlBm?hY)|Fu2AX@x6;<;SU3Yh{RuQ znAFoYbfx^Vb0d~Xyd!JuAAdU5x-UMTudkvkofpWiigYXv-1>JrY26#kx`Q`fwB9dF zsFF)-tGFaJpb~re66Jd##ca>Jmy1^6|PURx~k-OeVojyo)6$E%khY zPTa(I*}6x@195kb0nH+(rm_aFbaYTHU4VO%BmGa|H1`De#SFIC)+ClN5>9tebZA^; z9nkD=xy@;r7h|0LW}9ohi0j?$nU3rJrbhVhUx~fIN22dwrK9*CpOTuW6A6PaJfRt{ zdYa$0gZnN!-7zlm0)`k1QBrM$kx;W;#ApgI?f6{>XN>O_R4o7hIJIOp(x(47wIU;C zWJXl?*u}OFz3rnEsNCY3arnV3wzajCUJ%f7pxgud-K~~ylwaPod}XpZUSBSyhr0(K zGg!27LgCNRdN*=($Sne#`eM(H#J1CdAn+WaVR?<{PKyed+1cKRqSeF)7U#<~}i3 zJo2(@G5u{eN9)?#TDA0qDgI7mCZu$`l9(TOn&zqOCZ`w0)Xv%b0nC20l#s&jVe6;?tvpvS81*K zRYe-YqztVci+B|7HC4ZejU#RH!TWPI)Y^*mr z46uBm(iMoCNgsJnz!jqs^r-HPE;s}0cL=Tz`_0aSB-n)WK9!lUQ}x=&d9KMd)q2Ik z{QH*+-UxVexfm!*pV7=L_OJC?UMzwNh{DQ1J3{1oH7u^mu z&55XX{DbSQM+b~YZ-l^0;yCe<rCeEt0{y~=3m;LWf+?qTAL3H6cSwtH@;st^YTi|remu;9g_BOY zOM1XNCiVF^xMym*vA+52NarVbQ`1ic1utPqGyLTwj(F zUVha0V+PE~k>V!o$Bhlj#ZWKJe{z|Z3mpt*GBmKA&tAQ|n`8jmSr5fVScW6Lo1F{5 z-%EApvUTZz?~g!CT*&ZOcn8e5RFI@H?E2nE;eXj6qV*DO8?jEF(_z(P0LGtnPD8t# zX33KPeeH=>yxEIC2PmdT_mF4yx6@VZ**A_iva=6$I{uV&SR+=z84G+6={tP#x3hq@M-v~&d5fjis$-0(MBoxFARGK0Mlb? zuj(43qgc5nsLQ>J)4I`cynNl}WrZGReh=cVc>h|Vn7hKMuc~64VJGZ^?NcbIQ|tX4+(#Qh86zd_Acw_ zZ^5MN9k~jNGwBMWA+hU2wSwm4Nr$_2%@XB;2HlCsGZet{^Yf*A`w;(Rk)Mj8@FzY$ zw2A3<5Ye^~8Pi0fQ42Ia&I<*8`M{I^e2O#;LL6UNu=|<0u0UeSBasPN5Mt&A(bAB4 zk;Jn@nDqQoI0?};|AzsE%P7RI*m73BG(*-buvZePAKRFAPc4fs?42SYJ}fBF1GPBu z8*s4ZHVmenw(LOp!yj0O^{ zAVv}Be}+tKB`ei2)@AKotMFHAmt>!X9%Sx6~fB{J`@}7$IM% zAiMKyOt!o~aZR*n$ILc&@4vo)K!!$JdyHMTN)$Jcg&O}eCe0~J%Z?6H9R=d{>gQJH z7CI4sDIoIeHET-B>G%}S{!Enn5x7@7pp(>5iohR4r=Vy1 z<5oW9$aejHzYV)L$N#zx{KsjPba+z&bahJNs>lA{bbt}?6qzAssoN&@U#8LD%K!6& zDg{)JEUxhgwFvWWn8(nYV9RP4xu|j)4G7=kfyK*}zEy(Vznld_bSC7CArDD0%dKs| z&&Flj&hu}2ocJ9PESR*p%6)pq9zD(tg<`6Z{g|}0U)ncAY_)H*C*?1#$Pl<3ZCWd# zBq<{jqV{h}9M4^Ld(F8k-AVj1L_9J|gNs#y5-Z_QBHoLEztt;k<6BI9l@s%M^AapJ z$QJuQ^f!P2iRypCJbTBk3J>}JVOPCvA42_3`Dbk-^yhFasyiB09n~k(TvYdS1xq(FUJeJj`_G}#7^X6i}f|7y2IzQ=vPJ zxs5!NUVjwJ-;@q7Eb)#lrRB*!gvZ2rCz3%))#;sfjRFm?*U(#(pZ)OH#sbeUkru6X z8Z9U4RxCeF7^B++0X68pHpVx@@P{UpB)GSJ<}fip$Wr@mI0JLdw&$Pow0kzC=OdcC z`KjJ*t)0JaeH(3W-U@_aUrHGu_Kp6&n05CeeWd?M8!9jxB}=#rROJC%OkuPNHb#dC zxkf+`tDp+a**(*xAbrzc>S&MuLuOqg69p)#_jZGDzf0+;3KcxSc`Tf$pR&)MNbQv} z2Z1xuceHi;g(E$F5JdUlFX1ZL>UnxuK+5(9GN?FB5<2{)NsG-wl?=iiVNl^7{6VGJ zs>`&3$KYNK;sj8l{;)x-i&*LfA*fCjIS1{5W>b0BGS#xL`sb@_aPBNTZPeR;UnJ&B zd5c(XdfSdIo#SA{8ZU)j=3_UxD3{Z7v^VxueUj*Y5CA7a?^OM55#E~Z^j19j>}Eny zI;;Nqu6{y;nGZl>1ZK4@o+v=7imolNiUSO**yP0r8l|QM=2p(Ajx!6Drg$COKO(;P zvpS9MO^4<8wiHLO$>~?L+|T_eH*}i&0<=_!=iV`%+AprWe>`I4W_-J&o62h#<&0qs z`NnSu!S;7R*-2m}amm$fdaM1m{$7XgWul=)0nEk!F!IrngH#-L+^E!=?}$%@L2gwr zmZeS0=Fqro$b5-Up|z*Lf9xy~jUn5;AMw7hOn@(Gh)TL%Gt(_Uf+oSigsBx*T2nrG ze8`FkV*QVm132HJZK40Mv}SmBR~O9M%)HbS zcmD%ypDkg^)Q#BpQ2wdn>iw+#&&2{!TqvLZuqZ9V?%bDPk+Uby=!k`mBtCSA5I2SA z|98oNJ}5!ZwdC)9W6+-zEy?ETaokf5-eGUQ`H*i__yk@^vU;y`;J8orj(9G1`G}Rd!7E zH%WKg^DP-jY9uEG99Liid|sNtvms0xR^706}9^Ag3cSgH&cBJ$wm3{Tythc0Q9~!~m$TJSXg&x=5>L z>$$2QyRletnDrKu`O17WG&xU2>OE$Yu!Pikc*-$ewGq*4Yl>C{o}Zew_{_Syqyv0) zmXz4G`SlK^2?F6o)`=j#ttQAFH9V;p-PGb^JT0${I%M!X#dLEsfk*fEQ%Sng}rJ8px z5$>y(J84c){C9tHx)nd4s#HEb-VffKZWyUuU9b2IhO@L0yjM`dmL)37YG}RND`xk+ zvPS6WKmak0@}EbX(`;N^&gc&&>q9iZr~y3L7yx0Y5XTXEF1!kc!T>Vs8Squ9hr3IZ zMe258T0MwP1E1Y8KhIeoMUY3=Br^84!2!VWOagdDocgE0tXT%g0o?+4NQnTXlkW+@ z)}C!4p?aNmw%ZTW)^$kdAj7-YjoUTf4Dd9%$=NeoFFA|iwyUQad+sA zN4NTjV*nT}jN0#kTRbzW(cx*kDGNg61 z-WbVXxbNmABj>RSo&0>&3AbGGHk$$bNmOakC|Y0>_^DYwOp2XYPUc6}*sbVEM9|)u zQ7?P!GwlNKV9%c#ZC>CuSD&jnvyVPS>gCKilF_c=NF;oBlL~Dm%)(Ea{Pr&dFx7g| zL^Frp{q&CQ^VZXNtE%IhXLS#36_95@?g~Fx{+5!ljAF8GWkl5T*v*snVHSo4uPc9J zi|e~IeX-WGUxt0B_>K4DAnP^yTXr#EC%`dzrJ@=ihq4@1J|sE4`n8xe41CyB5P^(kCf5 z*YhC1+%y?EPDv*cvlzVGcB=5W!%P{$<~NYe^Ba8~fVI2(+QcO^--ybl#5UL4+RmI` zX1#Y0QubSY7dfv!kz4CeXzM&}vUQ?_h=AJ0I*H(l5C=ursfx>};u-FC_-}oJHb8rs z(A#z2&u=1-?}sa6laM`CAnl3+=hqAJ(RqzVOe^4VVXsCsVA#C~FjR8^T?at+tl!j( z0!)>307tYfTib@Llyroqpdw4b@&ZvWW+1^ z698~#iQPlWe?2-kH|Gcdb7d#_vP?wK13@@n^JJewF?(zeSj zbDubCG7#c-peSYGu!~8BkIhGoJEZC?TOTbBW-H}+x~is1wYeHEr?qukPrgtksBS*< z?H*z03cVq9E=P-+KV#b7je!xfbuaKE@EA2Pw8X`HKInAd>!Ax=#AI#?U+|H~$4F5x zR*&!wYOX3CP=7Ks3PvnGaa-}O|KgD64WM)jSRDd!hIdQ`aUbrrOQ$JyoQ65T*jNkU zKm`NUR~>D*o(}`aK0I5jW;ycQ*A)+jpL4{WQ(0rvQu> zIY~Z&S#99BX`i|Axa~gBL7ddx?RZ&Ntz1H9duM3x!xl@+$;afMwKC2vH2(P0maIYG z4~t5am0u_1D-c-DLN;)17UPm19a?{e*weC=6?R1%0aB|BuXTj;#)wf|28^8BY+c_< zmwAf=EV@mxUxsEkNTP39?K7@v;n_x!=>`M|$aWBtY2lWAX71klAUTN$nrC z>7x1b`3bNdf$n_i2*-H(6W0>u=X|-?F`guT3?uYKbM#TvU)|Xn8#@3T+zADGvf+{D z(DU}_Ep^SZ|3$p63;=g-_+Yy@J8T!>QzifdQ-5`SF!!w47yGUt$lNxxO=@(&il<0E z=OU`N$=iT6db!?6F>$#)*E5RGdj)3p@4eWaG)Rsn$N_m@D7A6llxWq`>l43#!uvel zniZ;-lXiGrj0!y=&H|rzI$^)O9tRl85uFzJj$7iBlU>=O&dCRU(7c1^5^b?Oy=ym| z;=O%1?yk}uw%L4GQhd)L$CdmWXdVYlhKA; zgjC#~HLGms8RhLief@_f`DNJQZ~v$n+$iInIT(dI3R8H{l9Z~bhf*ke|02Tu>jkw0 z4l>+&iW!s+kuc#!9Vz-HD;p%*En!ghwoTqI1cB$ua}m(5N_SS zHm!#-z?npUARW;Z5$nh#0bDcG>eQS#c>wNM1mm}y3efc=0)yu=-R!FW2`TFrUqGvN zf5exCMk;JMQ+9|W4AU6EW6@)@?nf%6hy&^Tk>oCuo49c33`82q?xA!Bnkq`X9;F~e zCqvMCk<(y)EKe4{aWhVK@mF}8cWgy5+ z$0s!naqqW`# z8 z(eCq6Ar=nH--sq_dJFGJ1!wd;OJ$9Qsv{%VMqY8mGE!nj{YrJtsS5f_`!_gd*`b#t z2)@+ZC_%geK%h5uzh@N5x+d2?*cnQRM|h+Cu8}2l6(S6NldL+vmnBD=Dv_*|2*<=pyWrT|XrSuW^lCzuzcqUl=T7O53HZHzw zUd7hpy%{Yrgt;1afSkS``_QK~@6RZ0EdS9Jc$l7@+^e-{136ww*Fa;YU9fE~P0jea zVw!40=^RCc9*EUnYrwYFiNb=TPa2CCY8VOHR@`RlLAiO4R-{^-RQ}u#@YH-RkS}|y z!?Kz4B7BTm-#Zq&UnbkpX8J6O+O_yah7&(w+8B}Y{2^QZ3S%J!p^ywMG15N2m$)zIMlRw% zl4IZgh(3*KXZ$Qdo<0~W+^LVIS?d}AtF>?3gd0m1E8`{F_(ER$6(<*p5k+fEo7KWy z$3`jm+f&+>OlP4-rm;O=H~*qcIZ63XrPM2{gn3$3#j}JfK#dMFNeceXsup^pw+AiW z1Zjky8Vz@scGIDCgfO=8_y~ z$rLAYlTS%{feT(A3Us9__}@x&~4%S!4{@?D6UQdO9GL8U)3F=549AI_WbbXpd}f@ z_nM`h;Vzu`AE2o1GhevZw3-Bn}B`cQUQj0|J4ElJ|=kV%bAHbfN4>?i{;aKJ2}ERZ8cL0baHWBkJ!q(q-oMO>pzoczGOOvkY&w?;kAI=li|UQ2e0EvO9Y)2D`xpGg!;>wKzwKMh(`E63?(USVl5AYf)pjGf{u#WgGcozJ*YKIa};9UuXk&qodGp$nN3Yf^C*jz67wuFA@t%|YV zD=cVQ(%CeLCFa61po-#zXy{}yZlnaE_Au}(o{&$a_AL%_05er8>$9(p5$Je!*}6jP zlh-HvVqTQ%JP{7P)o<|}SVbC5)}T9dH>~;J)PKB(dGr$94}GE?UvqKNuSWd&(A~*B zz>EUpw+q^d#5Gk4;0X)LI;>lepRcpougmSF|C}_5rELHHqh_Hiif_h#5SoQ^njtX&_SKnAyeZ_o_W9#6iHBYQKX#RYdwrO zx-@0d{Z|l4P+N2G#%X#!nEOU?8euPsWv`)2pVUK4)XUc@x9Maj;Pd$2B%>=1&I{_1 zh=|pbFWIKq7yJr6I(;4p=K6}LIC5C3PmV)2Z+~rz`WXI z!6mhjZ=r$GVXWxy5%s7)1PU2EiNNPgpZj}%(PlvBNhnmx3BEzbS~LG83O7KwYwgp8 z{8b36^1C$u07ux%TRM>R8o53#l|KXaOB3L-GSzl$l+0-w{A5wmXvzD5LFnBvtntXq zzeEv^b=RXM`1zlcDn=YfjU$54;ZqSL>ITIbc6yy!y?hSmYd6YTsxj8;(;YP;?I|#^ zt3WH+;wV|4O5E=q8muvmCVG!QlW#1Irqb$F+V3nIZha4-CH>{SoMqeMAgia=N)ajA zLx;?lL`fQ`Nr&E@UUm9a9HchAv4zrFzp+(&QRVlu?fz(=7t1OldzE}fS3;nYJ6rx& zK!_t&_R@RC=@`;a!SSzew;tf%uE!h>scQr)8Y|^Ca48nWFwhbYffnPTJB=aM1Gk_| zltk2Dk`xKvLZxb@T1T~Sg^2fp&o%^23yEKa7S8UT4d3T5j6?qOAY=B!{to7h6p>gQV9oAr!Jolsl}q+S9)M`8w$#bccn%p}EMVbQ_kTm0H> zQri!nay8k*7)U|in5=>eIDhEw*vi1gO5M%jR6g`~{NO99bL&eZzpd~Afhi$9x{EKK zOkhRimC3+Y3A0fp^2?YX-tWLucm$g>Ch6}PT2mi-&tOrT#r}Xl0PEW8+`)Fa&DG&=qb;YME--(@lY!i?Me@8B zPx+8w%S_l}G*!TOK#TlRBhI#A?X_mTZ8`2Z0#BkT5K8qma*4DQT_Ph;jG1eh~6be)@vf}XB8rZ+R-q<6EZ)0YC)wlnxA54 z0A#u9lq_f!fd%J99k?-&puoLB`M0{IhGwthKoavhMQr)dZ9M+~Ff+Yf8$)^{d!W|R z`OT2WG>DL2Q-C_+(wAvFl>gfaD}E2VX$%5E9p7CF%=LavF}d^<@-5Mf^gArXNQjid zlvYS+*=-}TOYAf(m~R0athlq5aTv2mw6+jnAxplI6k^{_IuMApaP*dj|95qskj^;z zFj4<#9edrAxD3U}4nT1K^)?Y5AWYM)7)C1L431THL`5q)!%jts*@8kQhK_elU#X*t0D%LBOcFqaQB@@Sh&dk=h zBakO8`l+&SPaj|>=lHK48sS;MSj(MIVW)i`MQV6MCG*#NzI%48=nK=ZY=Uo(Q_N?| zfeLE^1V9-Pf5H89(MYj8YX({K7<;r|e;dWtL(br$?B)*DFo3?Djf7W=Jv(g?z>;D(JS)36W}bD>pVJsy^d8P zz_)@uC9DR-{m^r(7_Rrbt5}7WG4K8^*UPfu_#J@jqTsT>6UTcSCFOP7jwC48&1s%g zg*J}W=1&~XvSn$dH*lD1YrgU{#Tw>=?V`=Vp?-${f)>A2aM)DR0P_p^N8d{1ks_i6 zJ@;sw4(CoJ8bob7ccGX2x#jMLB*v<5t;jycsam!}>Rwb%CHE)0F?{=$;+s9Sk$k=s zuOO)Dz7iNho?u=#cZ2?#*6akBCVO5Vb#8WBQWsyh#EV1r@|YRl#07%TXI}Hs+7rX(0AdnVcDm>}3 zR(-tX2o_+P#-E^0tv_%wwpnn?yf8wL3S06A;qS3?l1I7;DRAqJp8)mVD1G18KRn~n z9u@qp0PPM=@3e;rz5XJ{W}yyeP||v1N0**T=R9rI(A^s{@A0?s%>g ztP_1bV!F9l;-2+*E%`VH!G+|KiEu&?gT}4UI4PM{QK8Q5)qwltS-Q9wpZU47+V9 zaXjraOHa2ERyzBbDcu8D&ImMrX5(l~fbP1FXrG|b)I@8TrQVR##+4M&pB!Fg*XLyT z*y(_r?B1LPnJUQ?Ptw9DmxM_t+}w$=WnHPnAuK}lk)7+kseAd%4u0XX*j>K{s{Yt1 zxTguS`69sGiQtwDkF2q`!vEZ><1!z8QGKX47RvXryxs>jJJP#{#_I<|!ncgses~|e z?m3R10(h3IhyZzJ)bQ0P5F<8=D#9E7$;*jK8Cpe<9OIw5aBAkb^mU{HUofd0veqm^ zzA#n9oJg-0!=yRqxb?u&H-AY0M}dbD8PPBF!HDgC?q;+XdXr0)(J1y-EUV`wVJRJL zJ8eho#I44Z{&$Sx7HUZWJP10r^M7jr76U>uYm$s@8mV;n(%6|K8qg=j;LXo8DJXIu{*6o2?#ipap12#FT#+hOc+gxLNeU#!VaYnMFd?t_5`(KADov_Iz|yt?B6Yy8}Q*Evlc`7 zF)I9z!6iE}6ue-)1ODbiJlG)H&7BTEi$*Z#nBd6^p}Lc^5lrN-8C-EhsOmy23k?-w zRkh((m|FE2G{3htlxTASFSe`pEUDLth(R_5IyO0auv`m&f<=4=}5Yn>u51q;b zN!4TQ0DAr}s`$|4JJCt(OQRP%?*bjfT3epFI^nlcGQni4B$S;kM!)LlacmN4j3ugZCszRg`Z{CYy{|~Yz$sWp&ljzbTSOH=e}HH$0Q*1M z)lUJ)=3v`&e2DJLJ~KS?1maM#;~PZc6(r%F8$>b&0G{5y@RVY63KUMp@5%)`;9KJWoYkEbzKh5! zvK;==!QSX-m#mv=^T0Fj4tRV-WIWJrJgpEOb?n=V_*-;FFnaRiNhNRFMRX^kUf*AM zA_lmhID_k=y~dx1jTwYN-tfsULAc3HN(uJfXZ#8cwt+^&PSER>Z#_xc^Esi+@f$_k;czpVK%+)uhradviktT~0XMU@S3&OLS zG{^?6g{;v9Y7Z(ROCKLI7~u*(t5JYn3U@pos$eus0fi6?Z{Q z0|h}p3{)|)hLdqLm1syh2Goe^{3^9Zn!h3PlA_&IpsMMsh3I=8a$Mu;4`jYE2sN@o zwUGPstG6h`IfL+n`RZd+$dtGmuLU~)gEJ|PM(q`i+`3YjxzJ&*UVKm4NB=Kd1BsoM+{H7o zh(qS;lsbjzhEqvx)u>?z=0lbr;t0kt|Sd=@f%@UKOF?MiIhJbK};eEs5Ey{ zDZuQuv3+%|1IUxkBB7nn6%n+(0pF}=y?~Fp8ADg9_!N2yY{}s8ltjzT{_c7e`o!w{ zaU^#j>g8TG8e{cEJX=rgJHdn-b0q!Po_P^3ksiKViZxu1rMC<<@h#)_l00zOJ*)$i zcvP?iq;7`!1}YGqN?XDn7+4GqjQiOY4(fORbtcUB&|sf@|I=r6buAM8@&>G-_(dm& z5xcinnZOBy?V_5N6g`zNZh2+r&z=yeJCB&056Ma3W zyAXU%W>AdI=wT4JNVoqHfxOcz5Vk0uWddD==8zm5ASSm$zUg8!o@E=40gJz9a3;Q< z=vIW6XXz*>S6en8=Kj=22bs1<*(dM!wx$p9gmSWjGGQB%661RFqTjk-clLQ2hYR)I zowhNq&&Nfx;V#<>$=&Bff*2=;z(eDVY4=V@YL{ak7T7p~*I4-SUq%@C`rM6~4|9AV zZBjF%3gNElu=|l?p$7ZtQy0Kr_V9DB-;KnaYa>*UO%S$&HV@yH*>@e)u#xED>@sL)S!-uKWSQoecj${YkaLfQ80AsS6stmdByGL+UmA@kA9 z0zzvV?NA@oKY4d}_p*6J*Sb#3Om5!q`pBxKl?DhB-E^*?7o;>xFLd>$*s9{Mcgf8g zqs{@EA*>gp=efZC^HG2VH2KMo&~K0)6fef@nV>`()wO|Sgo~zd-ty;Cv4;kZ@+GOr zKKOQFV+Y9%GpZ&Y*j)`H4CcZcmhu9oouEo`^j-dz6Z_8TDyd(u@NNy)_JE0(TLQ-! zdx!YCc-m<69o+_4d!xI~NX>T*{?tWwK!SV%MMk5d(R6&EyC^ycUhNia_br4AkH$;< z4Manz!gw|0XCMTavz|P^;(VT<*g9>Q-C!jym*R&tm^TCfp=W)-KJ5J<9Sp-lP%i-; zR4;d6y|i9l)u)g3i#u2*+qU?9E5_Fk5)XLncC6?(1}j@@#NqLPrZ{@FSKuHh4-*7o zxq*GuC2i4!+Ym&JjNa-U7}`fkh)gVJwOC`PPe7y$M#tfq-vH&eOuYp2gJd|@z%H%dz=k|v=!kqi#g*l4birrWs`)|ysgThE(64HTs%guF4YW~)g z1by^PG_Gq8(r!cPmP0vR{($~^@6$p6uJQ7FqkRHxyIa0UsmSi4!2pBY8Yu^_O{78T z+!}L-U4Q)JjLApy!y5O$H9TH$$n-=Hj-d_x(0HuE_Z|F5W=6>`j-`Yy#zZ>}8D#9- z*Zi-rt+*B}yKa9W7o72S7jf~U%9cMd#h-jYDL{MH)5z)**SLD;4J=i(;N@v1grLL1 zI(rBH#j5{Lj`FN}VMhe;Xu$wJSclhFnb&N64CN(x>j&aiBO=n&(R?X z=ZiBTa;fJ%V`ftyJ?lT4(x5di1aSy{$hiIN=X(k$0RO*!(o8Rk7!~+>FHwtD1uM9c z|GR{W+W>97en{VNKl2?1=*2&n?f=P(0EH_2vnCToef%y4-LEK-p>G}){-ME55D$+P z?jz>kl2$R{zj%PqhZq&h9D4O1^!r@pbhU@|!GG)C6bUi@Q7)_fFNfm$bU9P|V`Is$ zz4|a9tpW%UscRwkeU6*Rdz>krQ!!gjheA;NF3$jlb%O7!zRW{Vz~*}+%2>{B%R5r0?I-Qb_<)hqa$HbwI$|HK$j1q*jI zapY-s@vi}aN^Pl?I=0?0qQ(P1g@`w=h+aKMci=ewqokY)xSp>R+wv@8 z*jX$7Jo!qc<3TIf$GLj(%G55Wrt4=}XrwIBt2`6n+dk0iS~B(6^1`IGfAy5vh!wiG ze;yX3M<=gh2S){e_OVJ0vH?j2PHq{i^W3%fA9Q>F`dj_&@~SQ(V^(mcacbNVW-G8& z)5`o0!oJ3Cv3n`B;kKtxx4foBZ|joq-QNOFt}max*x|Xl|044IzZ#bm2(E<6gy=ct z;p(z>`F|-4|1}>#JW2a9PcVb7A@31O3*rB&;?Ipmn5-6=nSOYR)E089Cq4}@q z!k9PKJq%bfvg#~j8nG5AZT|P%B~$Hjn7>iC&r(NLhw-3FWG2KZuxAgzndY@j`jYRF3~Db zpG5Q$&>SM$8ggqg|6QOsKsd~;w&=bqZ4<$UBK{Nh_tVeiczIe!+fM5n32HTpO&ff+mZ~QjNzbpbzmW%}Bt7#r!#748J;{+c4vlO^HsAlS) z#zXK@H(SFYr#l3!_|*slw^P&bW*QMF8ErY@KXDO* z49wGZ&wubl)QFA?7jCHK18aw4qJg!_$!I-`FHtc{s1)(qlZcrEcxy zDN{tsDBw0ijZAoC>0bKzw8#C_@1io>F2%Z80@_GB+gRg-yPMnE{H1jJ_b)_Y|E&e= z8_jG^XTnqf;j&9WZA;qYgJw%R^;fQQ%u~p7d$D^{qkkUIOyqEQm%DOFS7#MBa)xwU z=%}IsZR*f^L-W}_{DpCGwlM}GBUsW=@R*!OpO}oNU2iu*J3%LBwX0|?Mvps9=a9ZY zY}Q4(eBW?}!d*b|EH+0TS_C*Wk|K;*pjxd|8L~Y)OOzYoPTlt|Qi_dR?}llA91x*d z1a);VEXx&NHNd|kqHz5LWAi;L{ny-)uN4=<&`?-5dPDTG+uyf(7UJX&r}jvOTg(wR zKP;bksC^xG=*AA*gVURXiL&ZQg=V2iw>Lljb!zbRsn`C&3m0bDmPzZrN|hB;do(={ zS&HBFR0!5db!CcT!9A?miN5f9l)806N)d`Fh7PE2TGNX+(>1_4g%*_;h_&@;?f?2-`JI&p9KOICgszSrVHjH``sA}{`waUw3>ZJwSr`IMUW79} zWMy0=Lz)qUiD*uHLk)}x=78KF48hmrKstuzCQ}tBE)b_fugFx+KbgDYw}Q}^1iT}Q zhN8?qyFs6;FdgD^$H;oV#q?MuynNiH%k>BJ;QZphfEY@^Ed+!ynYOibXIKNh+T2&d zcA$Gk&#dr{vTFmI8K?hqrC~tB z&F)j7%cBZ{$`twE?@L(-c}Jvi*p%p$A7$!XGp@&n9%hOtudNN{<~O~ixRbe5YRxsQ zy6HlJvN$IW8J~U-?Ed<5V08wrehzYMSn|6_=H<(tXPb!3+m~QRPW7Xts%^U|9fd?~ z^7A|KpU)8<&$qaXc;h&o+}vN5*@t|zf^DpwFt=-D0_+LH@=?8dQ2KQDmgDvXMfrC( zQ;TjnO2>M?2|8Nk_!!G34p(KaeVnT|tSdR?2RBr_c}2ebXBkWJ?(@Y?=py{6PkrKN z4h?!VoBP*!9F>Gb%cC0L&xxFM(ayhJ{V!7sizq5gf{b*t&aOsk0q+JUMui@LJ_+$< zG4pOrg)lLG39l=0goi;IebfK#r{ki8p=ra3&t;WDKDuz2aC-32EQxFqe>tIL_~5@= zuQCQp2v6it(;_2F-pNN-yawgbbjL?eeHzFbI5W;OOPI6Hn{ZZfZwti(TaXwSV0I-&+h_O+i5Pb!U@Y(8#(v8uX1N%bOxr~QA$i2kRHn}o=D ztB<@Ni;BfvTQ>X5Z4~_K5UhOYm4WQJ@iQb)&VAp)_r@fw-qVz~%Blj*(!yF*wHl=X zlT)312F@&C=?AMmp(8Ad9I)}CxU!Lo9aw%tJ!+R}urJ5tdn}$zm{XT4Ij z!C@|jgoNH#o9d(jAH1Ph(~~ScSMl4xF5BKr%VgqA{9apDjq>hoZ0iI_ z78J7GgHTgu@?B7IA^#*a;)t%<(!FScaF|PPaFebcs5UZ$PZe0O%mPrG9K97Z5WTkM zD1XaGHQVmw?&AZfLzt~O)-`gMO5JiXW4_L)_bLhd>*UGn?j1>&r+rB~9{sNK4=e08 zu*8!SX)sB)#W9`D*x8D}{O6I2$)SI<_oV)_#5|9+vDSQG0E|ZR!p$idfWQwOoC6 zbE6vPtW}v;x(~=+Vb{);iRxqcMd!T{I#w2<>R|U|4k|3L@%bqB{fD;5B#Ux8AL%i( z>P6Fd^sCzFatkd4-3kJqH8$l*lm9o&D#in@q*&gDi1*GP4{-5m^_QEBYw&T7=+NQSZJlnq@1N+rL#Fewc5(3b&hnm zw04^5ULD>zFP)bgN?d0)x+BE_j+O4I(7+d-d5d5fVzfJ^&P zd8;Z_OUOrE0-b@%IigYQN(~4sPni-Z>diT8cAp5Vm_UrY*S4Q7 zF)Xox7t&fGWaas&=mSrssr$JCYa>$nEqPnqQ61%s9v$Y2-L&hwVuN{Ekb?w-iOwPmxGFfY{d*4QNA&V26wgl=8>CS!-a26TfFs;n^#P%L{T}N z2k*bp?)}0vCCCS_FnFO-T>1I#L&0Z>sa73Ff<&|8fhO(ZSRZ?Nav2v<)?OC+iTslb@<^CgXkMY<1 z9(>W*QFe;FnxkYs&FK30O~_AXvbcKTxK`=CLafKEiNEs7IaUy4i05_w24DTJ(gSrr zQ;z&gI$*yFWx$3~T5A@!AG2CGjhjj?xVDg2ZsYs5vk$4o4ZTq<{+)wtDjy&&(*BwP z^rA5xPPIyJGED=kTE)VKtt`Y^){7MU{X0I!6T{$?s=)z*h4Sb;!=|2#Bz>)7lR_xE zm@c?qDuv?Ir>G$8$NG;?p=pgX1R`@p*@d)2Gerx^`;YW|qPF}S;#tSEK;zKK6@x~` z-51Buc{%SfMGAe&c}-tBh1~4wt@L|bF->$|$@}-IMxJK0%J-7o14glytIlIu(&}6+!R;QkW`l59v7SG^>9)7RgH|YU68F%%35?{yG zBWUv(N}*ZBfhHZTO^j_%MA$Xy`&jAkVN~3{bD0fxlHa*~*HXH+Ho>?e(*?tfa&vF^ zumM_5zCgn-bpgO^?|Y^Akfest*>8t`XygQ0FN}Co^oxUwI=B^D;?LYJfA!B;CAiCY znMbULtjA_@U1nuL(K#t{ByW+<=SXxuHGqlM(}! zCwn}~O$@FJMT9joJ&nrZ=cJGQAEf&L`oe@51*wk+FR@qfhlO+Q!U;otp4(2Vv~|im z&z+q9b&mPZ-nY@%EDc&^q|WW(`~@qYm`5e`y>>buEzYh#>%!i)+Fj&eIimAzV7^+Yd!O#K`|0ctHn@2Tn<=TrYXx>we9&;*@7Rx8g1u#??XyFXHzi23 z^;t{Fu*2Z;y9@LC0ka*W(d6%ZB(#TU{JKSl6&b#BFdtYwwiwph#3b+SZmJ7r*;0G4TCQ#N))uHm^Gy6&XZJy#-L5ZRSd0Mr&Ya*P_rzm$_vD$25 z`6QeR^%xJVEt2o$y+&29;aNYF(L!)^RjYhj?OUt^6@nNGGcaQ|$j6fy9gc_MSd@65 z=rjvpK6^ZBcp=G+Uf)W#1Jr5D)+_>di|Mo8n^UYQhE0teSQj$AXp~}EyODV6<5i6l z!g(mX59C#f4hO{w)s%hB?tryM{wd9>Ps7;kn-kQiZlk1&sH zl@djfEABlV&1PF~y+#eMGxjZyibYTv3d&S{;ognu@V#2T*G&nG)ulP8RlmYXPQ) zEfWrqF*C*)_wDcYMePr+wmZqgAhhg+v^c-^VHUc^4?(8iWBiFuaxKU?ep|cLsgzY$ zmpt5~A{pCBzq+72PSh1|Jk~dAg}O z-|n4a)K-w@9!3|g=F6^3RFe3>I7T_F>g8>60T$0(sS#15+iIdCYz?8|Mohu;cw8Fl zvCDK?f0KD3c-ooz(dz@LXt2N$W3SDX(3ECCb@ipC#2_)=)6F@y$*KTFUhh=B%<=xQ zbKypYkEkop0i)TA)~#hFu#IM;M`UJ2wLf^-nY9j~megCkSn!fz^iq4=c42rD4XQ&ERs>|CBB z(<^#Jx4#F0Y-AUaTML!;vf^TsYKgOplLei2DELwrzn2aVG$Cry$#6U%B%9Q4biG1; zGCVXClfF=!WG1orE|O^41)a6lYTDi+SG`b2({g&2nnlMu)v~i3081&`-SbAxAJ+t2 zsCHjpUJE)5uwn?)Gs*tqh1AB?E4*C;v zppgQ%5-jEYH*K;TAe#)CN)j0L^o0Et)Y#^CP1KTJBSgdskKk$E^n=+PB@|?Gp?~t2 zs5B9O)Vd%cm(uSEctpm%D8i70IDS$KvKXY{L0u?6XO0?Q?sZAlR$aR@K125(BX)P` zcqG~X$=obGMX!365Pe)AmyAlpHIQ|GxOEef0ZDlb;pg7g?4bWz)dR=lFj25JmmQ4H ztVK;Lg@VVF_+Ei`LZC(8ed{sUZ}UO`=4NuC%zDrg|^rF3)=MRMYI>O z?g^*I5GZvjC%hZftZ?i5Nm2=8z4+aul0tDOmtv{u(iVHI;}eIQ!H~!5?xfLZE3=!> zS>U3(g&Wx6LJ@o#iLT@6di+~0O2ArZuKm2sak^jfcJ92f7v$iZ=CF?G*e$GAFq%xc z@&HSX(F1_4UPy9kAxG`Wav2;nEKslUn@y8}OSxN{{AM-UQZc__fbSF-6dhvQtmXwD zUZVl$y3b{eW4?Exd3IoR+>azlObgxFVz|1J)bA6B7No0R8B@QSZA@Quo}uXc_7i!C zLzw3y`Z+|Qztas!i$l|1uD-h{2AND0=npC(9B-({?^J#H zIemp-*DAqjJD+QGDy>;z9@@oJlU{52JBDKPpy1lp1bn|6LQ-xt(m2GFRIz10(h>yb z-(k^Vbi!wJTr2m1iQD?vmn;+E4?F~v#XX^Vi$U%#-I7fUssesOIE8>gl07YUCu?&w zuRqR?(`S+4u?7F7CzK1v6V~6evYpKTh5M>eoKn{Jl5ZaMo7S2n zES|qE#P*O_xR1ySUj+4P&tgd7!)eu9Q@2B4{>0@nQ{Rxvk(1JzNYImM^u3COKLMKY z5uQtPFqK>CS@E$K<4!sxO{IhY7URNQjcmyTxvcbrmvuO&M$H4qzh-xtFFCOWuhC7K zHXW8}B*Wa4Dsi%Z5?8H2JNarg7+IQsq!L4f&E9(Hf>KTf_>TM@6YGVNJb%-BA6fJc z^fkZcX3!wrpoKb&wo%Vq@hC*`dHG~5=7`=8wgiP|Ph*d~W-M>GVcd0Fd+V5~HE-LP zpIi;A44bdAps>)e^DHQlP5@k#I#hPvCqceO=1JYM>WB<$Sq^ROK*(N|=^?MspG9-8 z&eODl0og+>4xP910_DxR^js@6(Ld8~+*yoOn8Ld?n}OZ$c^0YB@F3@7QTj4I=ebMw4+a@g*9odG9d;sZ z9Sjz|U!AXeN3V>4d{US-N-{t^znO3rT_32J2SW^wC!6~!CO@gaE??j&Af@mg5&G1S zZ=lJjF@R}qb1wReL1TFv0Tk#7@|O9C#3QOzwV65$dK&gzC6v#a-B93stiWzU@KV zzQcUzB)VR*PKyP`bm0F)IPHXtKP^+@0=deM+#96Pe^^C0UmVu1>+{U;qGD13kA$lz zd=Ol7k7IG(Kh}L}mV5jL`wlxBKOdkv4Pe?$2yn-DTgSeZ?X4@VGyIt?nyNN@zJ5Jt zw8Jk%iX*RM-rZiDdkHCMV5+eeph3I)6MpP+`h}|{>I%6d)eZ0{79R9WN8Ljig^Gi| zA5JnG9a&thoSS3_@NNZ*;cF7|3Wc%BP-y2gnwI~`9u8WPj~NzGxiGOWOot1NZB<=pW zyMuyC4)4>6EePwVNyHZX?*I;661Rh~l1i_u%jgk1pri5~Mt{;BJmVL-p@p3+Ph!b1 zGRGw`-qzI*7#R~8p96ru0>y(<8X})m{j-hq0mYMk4tdnMx%J08Dz&;2)Nqy4F@y`@ zy07%)5WR#r;Sp0Ig2_+0P{cQ7%H1~uh^H2~xEQAr*Gaw;J}l|dn=9Y+CS=cTY_Ptj z31rQNu*47eA+x`ap+lEs7<=UE?|aiXn#wTfF1>pFQN1wtkeENeJ}pD|irnnh?dtyQ zmZa8XJM|%KsF?e$j^l>n?#xrWI26ss6(fOY{%4VODoKD0;(}!8`EGNDPrM?0%#F8W zVgf=BKSl9$$#8*J45`d@^mnrA$L};wZQi2pib8iW-lFmKXFfcqY?@s6D3Vz+p&6A? z^y^#~%|G+v?H;4;276f|9gkV#dVQjiecbh{oK%?2Up}bzk~F)T$6p`eV$$-gD5Ox2 z@4Zu?tvpO(N{1{C%{eVyDF~%~_0){EwJa@VZm~Mpkz`v7ZyHqHJ>RfzzXub_iw#kn zBK2ZZ;pFwL_=mZ)Brgd0fGe`TmvMyLvV)R3~mWda39U$NbO`IwMB3Mvj%!5ct?1DW zdcFR2|4sq2GL6Ho^7wsRFp$nUVJs#5kSgu0AYoatGxo`RADM~fu&`OiT8+k?pH+

E8?*C3Ex;wAsqXUv)LGkIb8 z>rA=X4a~@!E94*5Kc zUzdn=Ji0WMsnWyIk=SJl2psWVtx>cegn=UbT5W)Q+DhQjn8-aW%#WTXDO|_o(5kFS zmUg|LEX2P<8Yc#%0B;luIf?TO&Jvi9bf~GY#S##Yskv+}3pT785p$uLXAmDb0s+TkLRYucSJ3T#XM zS94;$9VbC2dhGfZUZPcBzptooVhUH*WX586A0Q@%S3xfA>KVN6x2~!!e*o8VOjwi@ zS|{CSzt9Dgty*SH)6X?BJbj`)7Cn2(1h}V6!*qP7_H*XL^?iBY;1>51RoK%KUp)on zkmw7?h|Z@l6}$C?%%e*`3B=Mz-8!va;in+jmV)xxy+l#fXe+Ho;qt>ZQ)1a!Iccuu z2GR)QAm=)n=R=!JO8ghcZqpidseItx(|)8~qB`Y(MFrsC0E$n`FTsW;}Akf4BkN=(n_l z0VBg=?F9n`YwjHSR5XN3Qti_oNro)ZZJb`k6Zm<%Q(}A^`HANfjT!M=^lb&N9~v^E zUI5R<;pZAdzOi2Gj)3{4q6=@?us%V5#c1j} zd4lT7zrFyCr;D{==wW39+fxZLjr$~#rr;C*T&+d9toF?;9oxl#;bzT3@gQan-(ZY> zy2I(N^PX2P>O_OP`jv2Tk{n;lQ`gq}%Wyg}+poBXp8LDOMwh8K#hn0#g?d48ve$y| z^~l^C4#ryF4fu8Mg53}~9rvobX5r)oFBLHmF(>@npGFd|cvKWEERGyA?-a)ni$9!< z(=TJtB<>*I1m2;FQ3pfY@<*E{a+N%V z0d(edqcTT=GvO&*O_8Kqv@Ok^-ft4GLT{vFG72KJ2HM>WTeX(((VsxtMK{YwHvX5( zZLuFn0QdTzgl!6mN9Z^(gwc^(M~4!ox7|sPm$MF?mJTO24r0@`=FT2l$TlOz(+Pwl z^+%o>r(U{e(D$Xg2f8QL!;!rC@C2cA3KK;r!1(J>@5c$f)5Y7kz1}srm}SJ zP>#bVku%_qh1t{V_iYPne|fat$x`XdT%2JE9aTPT`NXn1FJx%`YLcm_$vBGK3LW7a zi2UmQAjrywR9jYfz0FA(5nMK<(cFam`mBe{UC((!f)StZKKr++|M3;iNcu@OFfg3& z5Oe&qU6X+8e%(>%ULPRq>iafxHz!}w9c+)d-wV75Dw%D5GFX1-cR(OvJ#eNRmo!F! zd6Y(QQUKyxRgHiZ5DFcBNi4+*Oz#$}BVi}MW%YQHh1!>HTYVDw9IRJ8%nDY-9f*xK z+qIb9q!@0IV8}Au#;G&v)lhhRi2r04YB{h4U^i3~vG_%vntP6&n-r;PFD0^Rwr+18 zBRQiC;a9}vw5paQ$*dI@m=jw<1~u_{sggSh!qAgYD zlfvsYts@2N|AN9-d)zoz>yk?;0>@wHe{QdzkGxXtvKG#-sGYnFpWwMH^UOSoo`WMS zux>Z}pkQ8xjaREbTEdj$4q4j+lz7bxYcD;(YeJvDz93^}=Md1}KPoNfc$69eB|2N@ zSX-6ZL|xkSrTR=wpL768H0SfD5qG`SXquEHthkw5feY2r;B>W`WC7@!I#g0}girJG zj+pK0EDL&NDrU3+;Z^pYJ)`-&0i3H`$IKI^q5GWk!SmKt^X6)&+Os9rXeq{=?IHds zCbmsRGk?~%lE`icqx#dW)RC~Q9k{_iSbN~1O84xFRWI7x%S*vT&I<&Ce-#BVR^Dgq zN5LgFF9sj3%sp^30a3C2%Xev5!-&3e1G2uodANY367F(1O6&gUqEiw2>{zysiysoq zDzJwegNDaOS~2Fr=}G&Xr9(QM$hic7E7W@4bp|72t$c_h%;rgN>5OStTeb=)IyUm137pj_tfz9mk6*6tg)($cX-k#4ow;R*E{E65Ob?X?=D z#e@KM6)8RX34oR*j~5#|frm@ADf=FtD`#6vra`DrHpI!lr~QMlCH~9sSLGMEJCd8B zq4?JhNAPQdeZ)H-oS*ia4F~flJ{FLsev;6C_3Lf-Zg+{^`q+Mas;!z>Dg1d^j<7F@q|Nc6{ahdtL zoJ-&}a&2JQaX^;YtniFR9*HWJM-jIN$r!;hbC9|~a)WKv9X!vq*VRaDOne%%0Z~!f zBKp(FwFa$%$6|P1=*SDkVh!7_-`iyUuCDqk5V_Xjl=40#o?lH;`y~+0NHqP?CDTu4%`d30&O0-$s@MyI6 z^rv~&@q5I90^p=vgm9q!PHA5A9W775vq+foq|)Mu30A~6q4k=4H> zp;?fPAd%G=%p%#q$P+@a7k@e77#zV|8jy^Ccj^$w4Mh}S-^a=FvvDueY?U717NtbYuZ zlv%VAFdKLgf#aPx>_Ch`P4Dex8|$!;$|1?9R5i%dWE(Szcx(aV7yA+gSo~7UG$kw)C1jk;Cy(Sk$LDn0q%Y898{@VlDK&#sOons~&Z)%UlAq7mAo5qBS z&YCtNiOJC(PE*U)@yxb#0`MEl2CK3%A+DX*^F<#hI$I6SdS((i z5MjULaoK3`&cs^tUDJmFAqL-5f23}Jqqi&s$i^oGD_mh4NHp_!(gH#s5eX6w-TGgH zfa%{e-uUU?g>D^*-&_z;v4g5CN^T110|6@1R0^(sLH=lkbg|Qs(Cd1}!j?-i2Fx#X zhhGKmjfZe!!l0!h+<-}el1F3P||OY7}& z8EfQp`8g+qIU?_ z5%yEHsu5|pWYMibLz zh^G4N*E00v+p>KOf)RyX@oHU4`hgML@bX+ZN{-ppScl_C&kTrvAD2`X$ZLcG`( zsc$V^8=P8CV&+}=@{@Yh}AE_joN~QG4X<lif4C3se&@i7zy^q$!S8%cNz1Ng(aZYBMNLiR;m>r@yHsfB3QUB$T((;fgLNL%cYg zeqYq8*I6-h$m;aYp!zcADk>yq$kK{bNWpoEqk%Lfvz`uSNmr6KL0VhJun5lP92uMe zv-T7Gor>A25*Pr%wI1ek+<_UIAey=GFzc`(i(LJE--?p>rHDTlJamy;RjI!E+?#=w}F*Uz@pnse*WB zDz{&7UU3UhOtk7@wY;kkNNbXiG#Au2U$ldZCYZJ=1RtrQ+qJvIahpO~l6%K}RSxm& zE1Id|wdj`$RAqM|vXYQgY|p^5$>}=Xl?!+A8MU8p6u!_M(?1<@U?!}sOySj0o5q$u zM#fv1^Lb3|M>IYp`Yg)JGhZd&R=qJb7i7K#Fwd$l3#b=cHN@5)-Yg*EjQw}y~(V|TX^0bN#Z2C%%zx*=t zGP*YoM3!ilcHW@!LrxjggQ=9ST<`b;v^C#GkOF!ip@w*V7WTnh7u;64<(4OVwP`w@ zk-(4p!6`Z`6zRdR>m+$Qy^oG z#LVYao`{9&Htqiv&Mqy zw9tVb4~@DCgi}YK<{0?3`OFsQEF@arFLgB?&3%uR7lMA)%EEkvNXC~rsGho0rlQ*A zM$-ulW9A&_!UC5>?JhPvuI>wrNX-TNAfJTGE6eMRA84qIBfj5c{ zgJrLxgjvXFaKycfPfSZ3Oy%o`<~8ILt*>yRy)9T0TN8D=R0#;7Z8&l%>lYb9dgosn zt)?M0ic*1$b%-Tak|ueg@eQK?QP=!Uh^^?1MPKblj3&`1#3mR2)AWY1y4l};$utCW zSRGdws5vTSHK#r%x*}qTT#FR97|<;`?08z2+Ke1?yq$E&eKA!V8c){!i8%S_ZnEvk zi+`}{v+iFF$NvP(K{$S=$lo2*e&i!ioUGY*>P&;hXy87B8~mN=A5oNCePnxJ2_8?r zXNb!?cTJ*-7N#Q-NfIY_dBoB`qHT)JU!^5fG*qufamCwjL^861T-{cO*8EN-jtd9J zWPuVuu{x-!e-uQM*lr-PJb`_B2fF1mVE%(`$#f)-qBC{avaZ&)LbI;M=JcWo6**j8 zK`7I{bQ-Adv4GXq*FI1Ld|>$}$~}+NU&N|CmLX=gVq%%%^FPQ4^RNHJo*$Sb`=wZl zhwFz>u3EK3y53gS89#_>aw@>|9h+A49>yZdQZdf8bW8)Uyr z@W4bGUa$5?6{*<#H+Q_>qTKs4xh^@l!rp2l?34Ky_81ympKBSKUZVb^c8C%{aOHX5 z-aVe!a6Jf*IIpwU{Ql`z0ebGe5#FD7M^Ok%q?}TJw~W>1lrH5%mmF7iZ^d^QNQ9J% zkQCr}xJsM+a2itmE0I5~TK1zW|FBD-YIv9~c{n3fsT1>98Fg?`t06Z=FlC~h5`U_-FGuc?Xlgo;fA9O7&yAHYsS^+xfo}JuT-s_PE~Z9iMt^T>t_t9@7@28vG{E>@?QDo(l_Q+ z@64F}6Q=%u%j$gDeoMcKI#SjA|4fGes|=BMzuWQ7#}f2iv~%KrkAr{TTbP0X_TC{& zZG1w>MS|P-3+4E0tbTu>Qo#!7R@9V)U0s?Rp;Yj{uRsqgASnBz*Pq(KfC@AJKs^6m z3qtzaDzwO2lm2;6JF&3dz4%WxGT;1G@@cjf&-{<-FOwccL-XI3X2$(K&d_E%WBKpD zU^)jvgi^JC{`5VUfT`Hd+qnL+68@G9gbCjC{+t2Cg$W*IalQU|XaA?`GVjR^1YsHq zU)A5OE5_z}{I6E|{8i~v!@<5E`z*Waf6Ux}Do-Ad^7{BYOclrdF7-{~|F`$uD5GW> zRcy~P-$wp_kDLES!bq!+uK%ah|0PNW{^`4BSv4p|J^ue5LjT^LsEFT6!3gyq^Qy_% zmGyrmi2pfJ;9__QG77bu^efA@7y1Q_T?{+X4jj=?k2y4m?(0E=3lW^ zCzc|DP|E4ofQgmCg3qNW)@A};zonZ0kzyLd`kP)8;a?I>&h$Q4Gtyd6Z|o?n5Q@L5 zP6$7|(`T-PywBS^_b6=LUvlX8D7-H781M-i8?+_?|-*h9LH#2{l9Y;Av6M@IA=LRG;9fFZ3GCe2^+L%HA zP-$2}X?49szv&XVi=sXz5`XTKG}2#vC0&mE#-D`|Ob`m=v%GWRV6v*cc0BN}8^Dsr zgEi`9wEiqY_gT_aO~ zVvJ_!D=`_1f`*3v-}7HdkuPcylgX#XZhaty?`@*~Ey$!+Y~cHIydTzO-|)BvQd5Ti zf$hMGAqT*(&CH+JwhzH%k|qj(Jc{0M7tV^qBx5W!biD2Y-<@9{&i z`ngd>4`06>0cka-X6nZnB*V;o-n%&`Y5#o#Hp=QS@jsX8pMHh?>xa&+e#V=a6}?Ao z3$;?_G|Yt`N53VN&M#6u_880;xMGW|R|1L$q_;C9v$_Pydh$NB?EiyvbpExB$P>NGm$)Yv=Ng+d{YU zgmtiQ9M#FneN{y)46&uEZt;)Zq49f$9|Y7BabB*F3z@QsPasAp&4J~z5PR12)k9(= z;m3&K$HD{9_)U(L=0W?D=~wehj}#E^dki#2{Qpe$8eVr@~GG}s!-FR>0lfc zYuQbBjl3WcGWczV5|qqMO4Gr4{wCfzI88po_z^(*w$qK z+>P)s<5pY$%KaRGW$GqJ=205`uRfd_&v)A@xxYIjfiB*Zz6J)lq`GZaBsIQ_{+ynq z`JT<|Ogvqwc@W*Tfk(XuHs^0g<(B`+RSUPxjSv&w`IR>`&#UGzW_afMZ+QXazta~4 ziPKDLzZ!Z>tC@jO(4H;t{RT4G2R=JPhVkdpSYrJ6*XK>$-9R;JuEj$Z&$XF;@M*=` zOyk?U+xlc&%OM6EUrP1sMug}=xX;S|aUh|RcLs|^HJ}cMTSvLVJg1i!uq_Hoq?)>PDCNS z>C&}Q0c!)D4bq!r9!df!rE zp=_c+g|uYRf8%W|$yf2F?TMEY&{v7c%-5iyK05;mrKPD*bf1fVrnL4xB8nr()vQsxZPt_s*tAXDFXHu5{N0JTH~M-te;ViZdW^5#exl&kk*6bC(x+mPVkvP_=I!EuT zq+*PYjNsGHIpOC6;;O2W4w#gjZzy&IM`3k?;dX@FE@~RSxFSAe!qj+y)W5>i1{PB7 zz|yK5HcWD32d{u<97jMJg^&#-!7>#-Wrmq?Q5=Sbg68O#xLX&=Crz*VJl(RF<@fN< zc~sDKidDz_Syfba8vXFYVufo3*B-)DI9Qu1x1g`Z&s@_>=NgMP^Yvv~lfcstDQKGWQ3!PX^QaUMNPl#t$(}nsBB#5IEV;-u`=YV?CMV4+ z8R`!Mwz`bRpp$ff&rHB;@utQ-y%>lrDW&k}_#=r8%kET~2d~y^Jbtjbgu9=&kefW3 z>Un70>Mb#uq#MyWx2&pyAIj8YA@!9HKfxK!Y!6o}G4l!QCcR>KS2r2n!wIbIw;!B^ zGv87Q{oK0}a+P{a=capI>7auUbp|)^u{|P%d;CSZ>t`<0)1g?ikj+goA@+#w-KdRA zS$V{Fjkw~HdUl?|;M*IzS48`WYs5L=n;0)3lq5b94DTD0JZarwTA8Af|HwsS!W62V zFLeIvVZ!_5;@yJkx9{EOJ%E(JBGxoWs*{pB_;Jg0(E$f)FtXTzHr*Q2u8ubS>(iTn zRLo6RrJYDvLMI;r9y-02mKFQ2I`*TQH-%TaOY3+JyCEkog#HCIdnZRnu1-u0f|-tq zulg|E+!W2rl}ioj6^?@99gq(TSi^Pg5hDUBm=TJ^eKY1<7N?Yrc8T( zQKLZn$)$j#cBQMTnmX0_R>o9%B|Y)S(9=vVH|PdTW0LMiUA|JOSKe}Cq1omJdjFM% z^Y}bz(&*QN3w6AyfQe1pRla47oYT!}tW34X52))LbK47YtTCra7{4~!t|v(iZI&V~ zRlndtxv_hz64zcRDy@xDbQZf^F(9{4X%o|Roq4V?ux_{TMmeU_qv}fU4nlD385^1I zd9W2XMF6RN%f=R61yf&T9e&+ZxFo?ViqNp5c~@X;Y=w=Fousnt;qs|;TT_eh{WGv) zld>^*Ekr%fs|_h|4*j8mFKb*glV@JQIy&ICysPzjI4KQL4dO*?Wk))_dwMeo|9;2`&E9h1lv>F1 zfuL$&3Z40Pi@eBq`icmC`5E?mZ}r)oisdg-nBjS(9}ZTEpRj0NUMmR?+MDs$aN2)) z6$-a@*!7BdUcoj~j=t*z0iC1vuI^qSZ#Hg^aW_6;%zjENEz2==yJt^WQFs1XSFp3{ zBoA~sp9SKbF2hLod2o?)-!OuhSy@18UyXlY@x~h+r%iu@KXy50$$72^lh5V*x8SW4 z9L!g0`!+5639Zb+qiq83T~{A&5;JmFINJiJC)zNVbR+iNax3|E(1O#J)o9|+aau^J zD%wx&_4YPx+M^oktSKv#yD*vT8;%N4XXr|$K4h~hc*u~Ea;I9}e6f=5krbkzij4p2 zY_0m4BZ*}e^boxviGpuB5fteqjWyO*VelnHXY7uo;Fl7n+OW6!qimHxw)c0#!yxYp zCYBhR_9wo`l}aVdt*x~4@s^2;``EO+0|+r|yhJELQ!S`i9FxetCKP!PQ?)6T+q?V) z8QYkyzmU4#0aPhb|FQTnF=QoyK+jYDh}gM7rT-37l%(-MJMGbBAEVmxW}<9ej4IuG zi*VQbLatgj?Qp9Nb7esG(;cqoCSS(F*@qkOUL@GO*59oQlO)64+)s9));!jh8Q^Xa zM)Kf&IRf8VoQ>YOgPrXY5ctjKmdVfOCmKB3mqL_mN{R`3dU8LXiC2`F0ghBlx)V#V zAJV}i;gweI!L8Gyg@(#bzvst`yL=AYL%gQj%giV3y#qr{?3=-RE4x+e$>1?Wk4O=R z1KF57CVccWd2Ni8>F```GYkQ2{p#-7YTE($wG3fvzwnKTJlXlos0)8?&a?<3Za+p2 z+ni_n92-w($J^^Bn{%yBu}6UOfuBvF%HvL<@Zj*2k58gm!bOENz4u5CJ8J>>A)GwB^H`E)*pp!U^<;7F30fyY`sVT9!pbUv~t`IN#d z!5Qf}01)kS>L!aIKUy!@`l`etj6rxChcKV>lj%i9cBya1{v-8)SgGLku)z<&YTE(p zYQ$sgtd(F)uaLARr1A=JesXbUH`%EqkaMcY-F-N*kQM7~V$duykk0zDRny+a&pq#G z^uoCR%X{^#H@>p}Gxou>CYKXXa>JQN4%hVbr?cxW)V$I^q|zzOn$RVwY? zx*T|(@X*<~1n-7Nrym`_~$+l zBXxIo=SrlkY+Z+JxQ%GytiJYIF41?1m4hF4$gErM#iV3{f@wdH!2}aCGKqR==Zi6$ z9f{3rcvM)f0=8pJDQUd!lE+K0@76w!Wn|H8Vr`19Yfr)Ie{cSE0}!Dn8jS0)WJrt5%x-41&jn@ z0lSz5^Si1fu0=(Q8MxP1eL3sDN(fCY@bVRy<5QI38o%Z1)S{nU7eUokR`p+|29>ya zO(BnA&RdG)!bb>rtFp|$td&ZiXB@gX42~=DHXYH`a9^JSt0YFW=tc%y(Zr2HJy~M& z4XCi!+$I~n`yf(H!Vw1(x8eJ_+Q%$Vhj=ZhyMF?k$I#m(0jn5!{T8oq5+OH@pNmzC z<(eH@S{&AS+%2@s-jm=|%<=%wD??2%^BFNzE?OC$oom zO-trSb$qUMIMPJx#c_9#VpZzZurGi~EtdLc4&uCD3nIgV<#Ugy6Pu5a3E9{OFhT zNdIFOmml02N${c=@w_H#gGI&WF{eA?HwR)qFABxpYTo2^jm|6Dbz*XN^>%GzT20*+ z)yFZasWGje5e(5y%_Bj|Us0TSau~3Eld*ljnpl zBByLJ%d}jyUmKPM*{`i5c8%5QkH4jZjA8g8><3;Dpj1fVdhIBd02QJYH8(X?t2pT{pEGoTr`fCzt#fq)MhAm`=9AOxe*fFuP(V%?|P#6PhAPe&)x?yC6&gO+H^2kJ(?9@SRDwn znzlMe@;M(A&a1OttFvmyMTdFPKS?1{?=Q?DK0X`0DU(zccl``n-_F!ReG`rukOi>WBN_ew%`w;o=Q%W9b(J%Jp82)yXUPzkcw z{DNt}4GLQCsa7QPZev)7sLaVOZGKg>Go0@veQgl2Hk~V%_D)k-FkXv@VX<6=O*R9r z?F&Bl;_X>}a2&;JEtFbrgpC2~+9F~X0gER!*-6`d6K~l%2lzufsEciF&iPeR~h}N|#mqb$|83?*0rg>puXPvX{L36=DQvEh3=(2zVDz?b?=UDv)f!#Ng$c5kDPd+wDB4X}5|E^LzW zU*vC|uE;K(xM?zMm*R;54%`EpLTopGVKbM!%a;ev$fVL&8N}s1YGa(-Y)D1gT-3P_ zKl;Y3$)fMQ{xQrsDHcgHYkrFJJ$Bs*Dm#FbF%V3%g4fMQQ7aX9*r?hwKrTZc*oahp zD?snm6aOTO_<);<%dNuFvAp+A)b}P_jV!Kp7lLtKQb@_pc&!K5QMi1u#bY2yh!>|A zB?VV7XcaHp7-4_oSx&}_ds0$BHcEf3>$_0l#|mf65eijT?;ITn;eB-1ldvZ$BVouV zdwj6FgV+2R&mHdPk9M8)Z1Sbo-ghVXMo)DKSDKI5aYtQ@FWTd%07ZRtXx~C@;@vag z&vKaAol#pHKb~c66zCpLr-(Lmw~=^hLO1WFCBxw2DO0nB3cu-T(Ug9>++gtSkX<-} zde!SdzZ)u;*`{A7do+4;S}|!ZyQzC!UrK4_aJ^bjeJhnVk^is~s@ySSc~+=lxQCmqV<8Ali%MT_J&VD5^~C9-(HHMU(tW zooN9Hud_Jn$1Afu*M4EDx~H2RJ*>E{bJlihs=kP*I0j{l`?o=S)BEVeC~9hqYv1b% zl%rPMNEcAwjUzAH3?~UTkQt_52~Y7XwwWfDo6VOpC~ux+E9blN-jttTLEn#HItfH# zE{{?%r_?5d?XnL%rC+c_GXp=;VVIrBQiZ38X5i7%J9(9}HmwX&)z*3TagpwLAS4^c zj(89#eN{c}ra@@6TsSiJ6;>-{>0tbLL>M^h2to0|vfyH%vtI1=UGr1H4ETuaVTRRZ zdEc=Sfb%`A#YMWr5e~1sd?uk8 zbjfFN_p|z7vEoNX-N>7EQ?ApwP)HD$QP@x8T;_d{63WFn;nm z&T-|1GbE(C*(%^m?(n>xyR%>Myc)PNX2L0>7`qRE>{pwD6;cJ1)qlC$e&W?D)5UFt z&%niXWt}zh9Eg^uZ*`eHN=qiF&Nx`=P+wtj zd=yl#&{3h4PqG~*lW6a1llmoy-632l)kOgDP?4v5D11@YWbY9`Wv}_KFCa6_Jp;9y zFWP$Fu7V8(pwtV9%V-67yb-$#i|d2 z&aZ0tJPa~AMMNdx2L}) z+ThZfRhmyi7uQ8%JMyH08_nJU`KxY}uOz4If-_u2OhbrS%c+5G8H*|OzPZ4wMi0;f z6)9kXD!^H1?-UqIS#m$oS9BtYn3_NdI<3l&V;nNiv4?-;EBbkew&=ASPtCTHND)PS zc$$Z3pMELvn7Js3=B3gZd3|+C|E<(#MVgEyYMH?1_T2)(Hp1)w@CC-odZgzkD)tG<=_P6chh(VN&^)-C-(vNH)Sf3q-aO#oyQbpXpI}ZB2Am1 z2=P=w7t5vSjRpy!tcTCfn9gSowF8-WHsU0>JCvUZrZcIezWqqKrutSch>lwQk)(Pq zYl?@-zlib~j*IvDceo0dY>|%gI0fi4!U@yvCL}MLF`axy#Phm?q})P2X8z(*i^rtm zA{F@Z!e{&3WwQ@#Dg59jU@)a0z?F8mhKGnJ%%^7_$j1_!GMki15 zzOdPY_rt4VoYU!jwSmwIVY;N6e091F_3|e*#ezPa4M>^owRL)fQ5oN1KA9BG?Q@}X zUeImdIWoV!N2&y!Vcqv06G`{$G#?rPhnYmx6xQ8eem{h!y~^i*p{#5`w+1I%#)R1v z(lksSR^@Jv`l|VU&>N$eC$YB`Z@9K5?{bo0)ZSTbu|XFH6_`qF>|mBkpgar^Q1#Z3 z{uv#uB?3k_lHc$<2U93blW>tvtD_P!@3_}hPXnjd(|t=*1bgHgNTnzMwIzIGY|Wn< zU7hfT2kEWl6qcv`NRe2ws`;kxrk>tY_tV5)>m#N$1y7%pn^yeQ$y!g0dfHbikGH`% zw2{-qZ$-Cf4pR`}d@i3Y-wE0FFAzXFvcqT^eLH!}3-?gtd$9$~n8Mj`S9?Q=6)e;q zK*W)qA1lGM-fsnBte<6Jb&^Zyw7Q;d9dcK%h6g0F8_hf#xK0z2V9HODyhn9f9ARtd z#>^0IG!?uV01YLzXHsNhBJ@iQe_6u1`1T zO!z%gnB%S2czLVI7H>7yh1VBC!=fV_=>Xo!JiI|2uU_J%D{l1U$z#kRX#2u!tqq{#dbh%on+Dp(kX6Zif+WyJg`yI%|>%ErBvK^$IR!A1a zjY!<5MzGw@EI!ZCdQIbmR+2o`p{kG<<8R?DA|�Lk+7J{zY?YOpnUIwfxa+U1}#0 zUv?O<=CEB76T1riqTyG!7||6wCK(^La+K!bTypFBa)?kA>I11KhFkcsf8VWFjNllD(XIsEdWhe>0fp_En)AOS0r6Y&$H4guG0qf89- zI9;xZPRWjqV#1BmE%i4??*1gIL|YIlxq@6?>FSeY+FLBHO0!*f-fXV!h_Ys2O%U_u zXuj1sC!l;b8d>7dg2b9atAkZDulLH&?$cK51)B}zRoT?O_zIOZ$+FuCVzzB|vLOQK ziSg$%a9`wjCLydUQQtwBEPU}NA^Jciw$=)IS(mVR;u`s3dRM#)18sMMb z=LEvv!nXUCdYxhz#Aton*%@<&zYlEGD)vungT!MP-L}P3FMSYI63vMyFiuWT;IfP9 zREfBIzla56?yB+5ZKoAIN4}C(J^SEm;7YyJDR_!wcD(%Xldn$w67X~X;yRry(&fDC z1ULg*+pn3eo&()IA{F}%2fbuTuJ7i~5|6cc%6Dj|4I1Hdey$g+n}@(Av3MP)f?o{sNKRbO-}jH1P9HstfZs=*jDbi^iu; zA*(n)fCIbVWE-|&P*V0hhEv+!P=u6@@yVyBXF|g%`owuB&llJ2jpDm{kbemVJRlfE z@;hwT4t-}q`cD4PMZ5fwh|{{!Xb?)K z$$3tEA87#EUTPbdq#NM3F~NgG_w&;?df*05DwY%BM6etz)~N6-$=-E`!pCkJ$|v^p zIB_!1bKoo!fH&8qsGs@}ok##?UpGQ>D!6hq+8!Y(x1KZcCF=ASb5u)rj`8;Kzz{ll zTQ;xT0ad?g{LJ;)DF!63u;&37C*@PYO+qm)D=x(L>p7}bdIcL9lxJp~FFT!2;K7OmLbp6ydS6#ot zZ;-$9(HOQadKdgT7&f?0A2-=?wn*=?8XfuZ-ZH-H2Cw+GU1SKa?j?D*1?Zo)(eWFY z34a_hDZCJAt)g}~OmeZ9ESxm(_r4XdgYX!~3zNej*VwohBZB;jZ?v#Lg1%+~iq98o z`(bOp^Su=@#Sh;c%-E}v7YxpZFlgn+{ny6lmU}P-ODh)H)k~|osF2X$E7&7Xk$XOW z#f`M+c>!=Xd~nG59@m56w7A&@dgdKT^-=m0cCp+u$!DuBe!vLm&JX~5)M0cx$21Is z6H@&y0gmR=Qg7Wlg7xE=NR65tbrIW`92uwBs(+fhGqsjj=1s`U3J0jPZq{*^TVRAc zy%4IWorC`ZPW$F>yE1qPfBDMCleVwrc>ju=fLXV2pODwN(QBzY^}zN=hBh+Fg+HFI z>7XAEt@@!7?!jC`isjNaC+@3=mk1jzI0a#m_cd-O=cdZSR0LB(yNi>l`*DKf1IK@3V@KK}YDa z-OqTc8?rI;x6W+q3a1MOri>9^?~NCvZ2uTzWm8MwUYTC%hnnEOe_RtBgBNO|@YI5( zx&4?EWagGqa9N?yK9mE_Wx-viDuy|L1Qah)-(Z5OpK}Bqbh=mfqjha3IUxxbv@71c z-wvxlyVk7sh$eyU5O{Zfg+5}vs2F>D#%?|FdQF?~z<&hl_R>Ud+Fs=NQLyfGuPQE* zzh=>~WBd4{o!=3_PIUK&F{(Z`sQn(#0nPi>uN<~Hr@%|p5`L`dtU_*m`a@R- z2o*#E40EbWpfyVURB5`WbiK~(un%UU&wD1p6K-F^(D|n0LlYC8NA{`%;`YuPn{`{c zdnCM7QqA&Z`%g_efl9VdN=FZ9#f)!(X$01|;|kIZEQD3pwV}1+oI{vPPuACGV$~3abtWWG;O{Cm0!CbooWSwtz$yDYf2v zn%$S`=(X3F{^B{-MTJono1bV;opyPxJ@eN{($klvz!?aVHT^R&$eZ1V?xr z`vkG5_deIVcNu{dr1s3kI$IOncepI=;z6s8lWRpi~qP?)pFPJj%e$Nnt zC_Q1jH^l&cjGyZ6d%ZlGpwj+EQQEuI_sYTEGg(&~z?<~>XKDGoS+Zs{^WUuQnRygZCdr$752P47;u;OGeT zA=E>ty#cUEJ=PkvT@ zHF^z#h~A=&88t+k=n$^W| z?+<0Y9$w(!@}98&_Z)#+FO(i_^~6`EckueBXJ_c$b6ik$7Eugwr*j^XMnXaw(`K*= z$vh^{`HS2THN6y{qsIu2K44873S^DIuq5G5`#N(%P=NCNvMV_MHKD=KsTlNO(zOWF zu0$?ITB1TM61d|}pIYzE-unLan)OLp?zbT$i;3zUgFh;S(kxUXB68IpBYDLf_hZsj zW!41E4?q^Jm52)7&8qxYty3lEY1v5Pv@OS@^)YxsIunc6Mys-^SMGF!6P1MPcAzm0 zKWV3IBp8LfM7l@pCe>MVAR@;(OI{5iEbDk!o1bEixJa1lmI}a#%Y^~i5q{VL8;KU{%x;X*8cCE}$?=}0N<7jr9Ge55W^ zVWLG@OoV+RLu?lRHkFp6cHbllBcHvZ2VIz&n&(|LDJt79<>pA2d}RKZpH(t70qwz4 zACU<`VLtTG{b3oX9~ZZ0fsiPN^upr^RKGlLuh~H<%FZ|t$uc`;W;n1j0J6;quVe@K zH`G8JHegQna@#>ppwL%?ws(!VzF9c?ocl;OQu^pF7-VVpI3ICq6;NKFi`#S`t_OaL ztYcst`UNGU$AH$hi?VN&-;z1!CwFT#ixTY#8zjL0#jt6%eAkJbLK({5Jgpl6?Y#c6 zzv3x(ljq}jQbWVD_%Nw&UmWH|wtGOm|5OS?e@KT{A zU3T(9h${{S;7l1dlh84jsW*6Ip)SL&myU%vz!mkLdleY)&E_$2&0Z-~eB9@3i|%?C zS_LpIFDU=@zP7;czNk-jR;XTecKX_AAwe`DV58n59LlOy7WYcE1QnfiT%ZF=bUN&c zhpKUMO&mvw2W7$Z82t!;NcQf~9SwibF@xXxZ-kEYvgmQ;b)rY*PvecZiPgo91$Belg%Rr_dq3yymSLaH?t=13Fg=0Qa;@_UlT znGC9zgmQQ4)2bR>fL9t7K$}GTQQ@_Gn**m}ghLNe;zp9^wQTk1TbT>MBfk_bn}-?} zFRY1uHPsxjrWSDTi3oeY$LW<5_brT^&!uXgy*`w~zL&9azOd*I4+^Yq-o=5beCz{zWC6VC4oyQ~$OU%$>cM%r)x2tF`j9U2V7Z~~Y=2A{k>)q7R|1*(CRHe2 z9D%99R#jo%{-6|RarPJj{Ut+iF7%z{xI}O*G=q&-A9pgS?9~Zw} z-=A$zMuDK~b&ljbM~73Uib6y%@d+n8OLILeu9^7aV3C{m5t=B~WZM*t;S^D(i&dJl&SE_YaMa<92t<`xw5pQ>e zYBi57LZ}NpjV9JvvMH z+g=PNzB@na+X;?-4Hl&QDc#3(7Tz-RPyYRWNZ;Kd5$mTh#|`~=gL*dfsl3c`j=Qg0 z{5OMrs_e#Dufv)<#8eGQ`W_%YogR08INfj4|J;NiToC^2u|^MQ?F9PW>2laODkiIBL>Nrg{sgBU%cNZ!83_k87+aivJY4U#`Y52c!FzKc(RhBg zGEDN{-0nabJIGtS);+(eG%NQo zr(62-WUQzE_|*=V#5$`@EK#3&^I}0{<$(q9sZ%zJL~mJ%QEw$1uxpyA>DFd1nBtlte4Rab#~V_>SXoU=jQW-YXXNF%N6z0 zPU3i_(*V==eBbwueL`h_fKD^X!8%`?&v2(Vct!ukKd}-KhLN!PXn`eYxvQ4ez-M7G z%k!+0M4NXyBLCvXIh+G1BNwn5BC5cPoy&vlKz!p^aaAs>mn6HMOeFTG)EwOO=$d}= ztRu+!vc}JJnM(wHLo*a{^u{D(QxilmT5O(Xg8(kD<&OJIg~DCW>K|u=a1?zYog%OvSvhE^>d=o4?babu0J3A z{@~fe{2TbLr(>4tu}p&NP9naVZp;2%&RRSQXrOB3g=UIb+lReqxZNd3WJLDBr&%`> zvif1o-_4Z9-uf3V-EVLp{}9*|VU`NfA-I!Pq0bW*}f`r-lPljEj6J$B#E993)2)jS@3QtQBsc+>kRl zUtjuo?uwuZ$Mo*@<-nl1ckpJeb~nCUXS+W1VVmdSgCJB-b$f#kB|< zhB<%7D7%d*b=+OYbzILGMte(sCr-F6fJ3OvAjL<-B^BLAvJ_9ULm}HIrw`*I)J?ve z%KT+w_{hUQ0DVb*i7rY{Tb}+aFk46A{AF@h4?JhnYV*ZR@o3>Shi1l9)>fMX2}4ZK zF^4bmlx3XrfkV~ny-m&6{}wcQplY_za1~xqhr1aLfncOdA7qqi#@noLgA zabRW+gCwfNjQnaRWngw%|Mw~4sQTN)KL6!ab6Jmzd=4;7Hu0qprlmMP0zvvxXO|r2 zLSdfdnnI-SH0NPEn=H~b^;gT5pPQchVIXU3d9E{3StbSYSY{?*Fi6iEg*kTUdi2T; z^M~m*8ITCn60lX9eNKD5Da0s|+*an}RJ#Y`4x>{iS?ivi0R;#qfy+1ph%X|)z_gG~ zp4|76Tomh8v`)fbWolhg)m=DmAu)$b#>G=8^Fhv>v5G~8i~VI@YmV42YO&GSJiYgp z;BV3Ogwm6C>eJxdp|+A}xW*C(x>w$35Tj>TTLz_@z)(Bk4&l{O<;EDTr;$Pm)5$D! z?q@*R2edQ&a^H__qBTF@b^Mbl*Ay*5#YRhg$@F?W6{y27_|5eC%Hdu!-AZtOHSl8#XrS@los1 z0uWESe)FCPZ0>lKRaF6jqX~cOmL0eJl@Bv({F$$jDq(ceoMj+3TjR|E)6^xpepNKG z>Yr^IOm2ZlmH>vpP(-pzd?#0h7OG*DD5C0ZG1gsaTI8?ncj>Pd_m3F=j|-63FEwIB zD>++#8-%40nMyRZ&+=b2g?cfe*P>fXP>ADY63vxU7c8KsBq(Prca8w5?Q`#_SI;OH zotGYjxO=d90RZ}W2UpQ+DmLZL>EPv!pXd!f*VN#rs$T)I*K-6>?24FT8|wB-;eJ<< zfKm&W8$oS8Nt9^gLD)cD?)#&P#lPVC^CR2n(NbQlj~C|)*Z`?do@cW&d?8!OdGe47 zsvq~rtE4}*%I@71^WI9)%`ZSF3?4qZ-)i^ILg@7ZpiHepB<4iJc>+oO@|iUCJ>8`j zL>-{CK4s7V(cq^^9%5SJ`#PO{9moPF11G zv3uD0YG+s1M%3C>{!upA(+8*h>-^a_SO9M$evsT_%|=Uai_Ms#Gz@WS;UBX$C4zw8;oQnhVlLke%t3mt_9W8PdlO1r zy1X3zmq#sRRB5%26Q^V7EWX!ZJnVZhyG3V^>nohBSohIM_lI;qyu1-*Upn?xDGiU=Bw0AA?4YG$GEOqgj{8ld8^%d;<=%k*kQ} zEax*uQmTR#IZs*p-8AmE$FPAMqM{kZ+Bwcr#4dYZ2c*?0 z)nw?Lj1VG`JMf!cu2r}uXANf^9>!H9my}5GI4=`1O^ZSkb@F|mM2{+>BLl96Ag&F7HW-t!l9F`QA{~eM> z{)ZoVSj%kE?PT`8r&P9Vt};tNc0mHTE-+6dYfz}od$5I1^{eH5p~i#gJJIzy2oo|2 z{hK@hmJQ=P2Vm$eWw7wv%rGGX%%W9x#Y*$+sUDrRJ>Asog-@*vtWYdl&WY|ly^nVF zFO{Ectn|2~w1nc5IN#ub3Bm!rIK~Rp=kTN$grmW)*S|1U_KwAcsCzBgm{p%Gg5%T; zxuYTI$ENX&kIxUGZ0<2LPN)*|ykCqTGHzkVWd_ zUFJM?+{LxMN9Sf;O!@7xDf%B|?RE0w7(EIc*d6vRc#X%he&4yTL@u;(RBuTXZkf|w ziz_H=KC>e3zwzY@imR~aH{V5n`m(4|`f+e(Sf^ZHp~3Yy*mHkI1Gg}$toKMMmifgr z*Yp@i>NKeInWksxgh0`z&TkOZ7A;#Qf<>ilT`` zZ^`61zQT>_x7_^D%SZs4tGpTc+HVJ8K+mh2RHCb6N2PC(dZhLi<#Viy{cP^V&R^3x z+MZ*8p%t4gA`CdcN_&?7%WP2NWYfWAr#77`n$*`L;McKPkspz-^afm~MMwS0d*L~3 zV4}gpuKQcKKh5*8Qvs_X0ej=Nu=Qc|H=PkKPWj_q8vTV2l=tTd@avH{(B;*<;P-8**B4i@cx0t^#wDfWD{Xh5ci_23hpl zaI8YRZ_|j=dak5pLF{itrSiG=-o>yn9>31DnhFl9-kHyVq|5m0J2iqQe|2uv#Y-Ex z-xA~qZ%(j1qDpq&bEFIX?I>m9a~|cJOWl(tp;7y#pTk+=PYeO6G-B6hy@Xzj?LUux zc_e=Od0sro!3rww4g9;@VCSEK4m4(SO2mJ5K573;JEPz_aURRXu1fq`(xMr-Ki>O1 zD7Qg4az>DHTlFxP1m%BqEn}%>7b{n(kfT^(BM5u&wS!Pl@bfM{pNmCP)#N66kUva$ z?9Dsf0Ea!phR+S=Ow~W%l_oI`Fhqg}&iZltCOvRtki(F(3SqK}{i?lb&fHCQaGBKrJK-SHa*F;foNo>T^L>;)MKW{I$6#}od@r!g| z96K{jQ9S0gZ&6o~BisIWOh{UJJ_|waQ#76@opk#73S5Ph?MWI2nAVuKx)B zJ+#3qo!q9sQY~GMwBc4M2QAx-PY^w~U;Wjx?%jPS#4~km!z+DJwut5^5ce@ATWu`K zdw8jQvAAJ7`z=M4H>$LBuJSFI2%J6i*&ytj_huC+Du`1c@B=|;2@qy2sb|4N4RULI z5UYzgymC`b*e#m!ljD5e9;r9@vFL&EL>n?*-^Wtx^x;sPF)Ov0G_$2MYZx7p1-eK^~B)xz4Rq2=a0n4S0I_}?=aTM*xw<*-+9|sVX z56L{+FOjQg>4{1ehyEY338~;?Ak8)XluKz`ARnf+6{Y>SpFTILq z=|j1BE}YMEh7NR-r}c*ya2tOEpWOpHJupItO8_QbKV+Jq_6mM$#=4FYcqKzi<iat(cj}AJ4j`Q``OkClI8=YU%~U^4 zm;R2z&lN_~@|)^Rs-F*toY$B?h)m(<88#8?E(yB!|FX~R(t?Ra31z1|1gX*TmJUAE zie(Ve-GWn-nfKw2!Fy~u@(6drhB}B)hHWQN@Z)51jIp8j z2;u8@0*&D7{=%9yFajZ8+)PYxF2oR^6GTH5itZSbAahCZ8@{t7dy6pV^;Eom`TPY= zSMnWByRqhtcAT&6eqNNY@a?(>FZl7^{B!BT83t=nSW|Zx*h1VJ2s^PStSuHa(|1+D zAK=aQZ6Q-e%1O+kg}k;U{bw`&UYz7d?4(igv>lSU7|3r&c~@rnpj4K_{zM)z!Aa_b z<~WkMK-IR(TmcSYBG8;gXs2=5CK>2D48ifc2P>tKB1SdP$zT6qH0Jw}s)4Y|LYQq7 zC!rCJUk(+meVOO*q%~Klgal1dOR1aO$GR7H?Y+N{L$?B@WBrQBxJfk$pr)qoGz4i& zhd88Er@nnDuZy|Zb0Nn_oZ8VOj>DC?*wI17XP+k+@Rvo$Y2Dk*w>z~TEG7>O)Z5lk_r9SJGZ(L@OoDJfY3g% z!fpAwX6=Gs3$Tp%yomVge5Tb%#}jmy-EnBeaRc z9>ROR9E|+K3nqI#uyMd-P-9qa+k1s@%{?6IKwkPq={#W7Q_cPc2Ke(C^De)QU7esiGjO$-lh)Ezb+c3su` zxLx&L@|Q8WiX1Ku*sM3CWa&S=TyvhUcC}ns#wDHl&v)1YhL0=isFWeh(*_3w({--L z!2Bn_zUdB_Yd!RxG;FNiIWJll_8Jffm=Vt4Kfsy`_iHx_1ZZegHK!^HF#OWzU^cvb z*{2k~M_dGCio?l=q0QZc56u(kp1NbE@*j}ie>Pw5=Y3eH2`PX1__mcbg_1 z*ZFM(A*mndS7gu-qeb^!(jEc%wFFhe&8ZL?O`a~B)|Z;q#COy;)7tgDt|LXzJDx8^ zTHL&@+H<&7KXUryuxPkb)86MJ1zi1muLD;;@-Hw=0ph^Yjqcg3NrMTMOOowWaj{0t z`V7|Jo0<5&u*Kk%jqS*l3g{2pJ)ln>d9-YHk7(mz{zd!= zVJPfWaCoT*;>Yh|QB&33C__FPu6$p&bcGQRkQ84giyC7SA3&VdIxe^BaesZ|>K;}B zx%Bkd>??H{xj$NpSLGZY^XLJi<|6@zm7%xR?67G5mS~#0f3#`dx`TBZekT&sP_8T0 zP^R0Mk9;>G2`P1`yRI#&VAUobdDn_@zu6JVC)IxHzifx&-DQ7QDI5S81_cV@A99Ib z9q7|zyzBN^#XNuz>Gaq24M{S4{&BylY*zY*T0x^S=K1^M$}H@vGcJZDW>^y8#EUD> zxN(G&xsh95R$sI%m1l6rv+wDzAFfy79QA4o@zV9qSA+K*-282}&9n7q(0!>sJDgOI zEXRQIWLWg^%q>V3kiR{Kl z!mzuxgYjmKxH?)osXLS&)1Ui?k}5&{`uFlk)W1`C1kir_>c7S@hH1GeCmp__+MjmY zq!!aKg~v=N-YwRF`}>+@be5&pnQy8a<Rvidr^I-A_W!g1m%1Q)-r5ebXMLqcZA#{YsIMbm@p=pL33r3& z78o}@O%+K{8J^7^gF8>iyqD_T&UgHtsJ%*huKv@sl^fksAhhY$F=g}WZ?Bdh_H{S; zK&gj0)#D}p=I~J04k?xNRcF&iGf8fTnxe%WCB@6$09H$tt8MEMt2m4t+B0udxG3`* z4}E0#`!ifVWZ!%XdU7R}dg@h(yDiTeFF(4MZxr&dQz~}kk`BTG;L?=2n$kO8+QCO& zG$izH@-obNWjrF+p^xMZgmTt%z3c(|VYkig_T$y)I}04=wgO_JqA0iCYl7h@n=@NzO7L-_NsRefmCR%IAuNOt?eZ4uG3@&NV&LYDe`=|h*m2u#{=B{WlTSUAg!L5TnPK^fM0ZIzm^rrUy@ z2+dgX^hiQd=<7@JCj->8(MOuVHVQ_Gw^@l{;o(wMW^gfUqauxLnYFmXfJU-eTP?2T zU4QkL?wr{HNz?J%DyHXd`a=e98p{3=UsxVO(p9vteMdz{Z&tX6`SS;}dj(t#!7#$- zz~BA4_erUm7r#yndvPYRm%T|v<#|dZ}_0C@| z_8H|F50=-HmZ;>Y;aBN#)t3@v|*o&G<|!x@FJq zOM#%L7C5|3i(-q=p)TU-(g8Y4+u`nR(r#yTwijo=FYtF^QZExvN$POg#B>`_*D}$> zeA*XP!SV5YLC>${89-6*FfN>hn!@s)gV;E$xk=r)|26ZpETdja9XB)R9yC+*gSPTC ziBH44lxxT#tA2+bLsgq$4E`P3>Y~5LGJ7u4-q(gr^J;bJG1D@X6&|FRy~Oj>1o_Ah z2f*`<+3I{seC+hQa%x_q6`(;=v*wj|RP02(qJppJurA_vy7v8~i_oy}FKLV^3RS+nRRsFm(T^^2F&JD>9t)C9JL2V6)mzL9*>n;KALMV!yWCT;Jo0~ z7<9d(lr==B;JVgfl+x>QjywrK7GWavzt&Bt>W3BL0NpYGY7)?%(o;kNTqVHK92Q0{ z2sLYYOIR}g*SwfCvA(LJEVe;2{HkO}S?`a~p2ugxMVp;{3Hort%zO&433nd_6alxfKrwppz6{{lW^cvI86 zfkVe`aKFp_Ak<6@cA594x0EG|Bh=Tfc8$WCMHn+E+K*6Uz$awJQ@QO zkLM4v^V?GNJ&Q~144(`hrz)2k*C+$dKcAD8%Pd{2B|X|-yZoP#mw4$T-Q%C6R_y^~ zk4hR>axe$O6pU}n_C7o|ud^K1+gKvFtphbmdaU1Msh+`J9H3(t83sv`vdvg*gMWfv zS?eEZEnByd`eOf*m396=cN?gF z)M|I!qM4;%u1?Fea?S5vws|c}YrVShVtVfJf|s^TBKEO>Mtbs@oR8ZCK5NG+{Nt1! z@-+8)U_Ezd&}ORrxbtp44%}loz*3jBC`_8p6O97he!Q|}`5`9O$N`NIRM_yy(o&ma zB{)xzt})x5WTjdcS$)jvc6QI0x$u+yMza^eaA{_nJQ>hb3%tDr`O4d05VW_#Y~nQy z+FV6DHdJHvY^JPMpuR#SlJ;r5nlhL~*Cqvy6&MxrtX4*OiO+cJe0HC8k2j`HLcOzu#AiP%_AT;u^VTp}t6V_VHRC zwGXBrm=dfy6T_FRJLEfF>!$C*!f2$t568+mc+y+n)f;0~R!{?P6Xgv$8o3cF@&{Yv zIH>ETdHr+qE1aI?c0$Ko#L_W?zd=sNlN%#(mWF;!Hx8)tV*VpL^JMlXBec_A&E`#; zU>rJ6r>9?xI-WyKV~j3$4VTFuInM)}r;+LI!KoB=z8vWJsZp|Qz{5 zwlyUIZo3cm!L2_@dXY!&BCL5pf$OLD8d-%tJsVbWN(l(`-AoQSr4uk~zjWOzWHNtg zJ3{~h3&4erIiZ7g<#FG-3O@NpK!l%ZqL1e3&-TnmL?TUl9(6|mERnSD!_IbI$!)Pe zD-MX4ri`F7N1nCjEsoaSfDm2>xF{DxH~C45nh3YxRsS=a=vTm^DCU({M~@K98%a{A z)x10X`4{$$0xi6aE7jqX;|OXeyW{~dDAl8IZ?By&{5*wA1gzDmZt5$a0lJP4k#zPt zp(gZrjYD|;Aw%>F9k*?i@%}H9BlBFp~aJ*WZ!itITm}Yb-<55byE^e{(|z=P1FGfzQYK^0AUs0P?neG zWq;3hXU%R`n9@K0?MQ76vi>v_y3_VvUgWsL6eoKD6De}0;0>=>6OVmz!|CtP$jZJp z;y`SS(T|WvWQq;{HS&~9*7DzvlE$qtD7_vOKhXp!ag6Y$3z!5=X-ZXl+g~OJ zwDHV~o>LgWT#Tax4J!oQd5s$d){S!%G5h)hv@Z#hJK7C!OFgjB@D9MwTF2*Eo4of0 z%E7+?7J@r0qvxd|a(h1|Wmqn+SJ|Vfx-**bz{Lj;ef6;Nw<#~pGk-G?LwY^#VVg`Y zsJ_FGms*b-hNaL}(AUJINmMTkdMLJ9<(XL^Qs}?3;z8`J_dl_23;j)L4g31~U8daX zj3$nAY9{7l)q5-^F8&?i)9lZW{x$r2;HogZQu8{cVc8OSx>ovrq%6G()_4g?D_mW1 z-k-ZTdicMtSa`L(GH)bZQo?dKSAFZ~O#TEGh?6~^p_sdVJR+afi~<>%B8A?h3fazZ z@}X!QSd!}dJN}kGZf9Pw2c0#tST}ZUjv|o~4A@>Ex5nGTuDfxohO1m;n(}N4KU?{c-2G!i}v4vJmDu?UJZQYeL9b zpQ6_cJE-SkfCF|#3>Zas&-#UElAW@?DDSh9UX_Aen7;Rv)0>bEFZD%~YkZ+% zk>o7>i7v&akDbrZGYxcWOXGzFPVRf!PeQBta1k|}DHw+m)Hig3_Gju$7_sYc&4!h^ zNOd#?5l6>Gzt#XWR$8r2y~TCC{WvuQ^TU1lCS1?erFZCCgp=q3F3{aI$gloBQ9ewi zC*}lj1)?vw!L@Jlm*W6DTaH75)4m$t>vN}9QD^-zMT7=C|nJc6`rN< zo^(|H*!6JiB%h^|khv#ZJ2y2dhdzbCr|8XBS-~7%VS`L{2Y~rP#RjMzR%BG4Z-Cc= zRCs{b(PNC@|Fi(qr z?g9|;ZWh?|Z@>h=#gVit#hYcpr?Lw|=<*3SOK^7lp}t50ra*>MYXrW<-P zxkj)1GIhnK*ITzTt^xRP=+`YcR9_;12l&d*pC(b+aW&Z+^E!$=d;OZb9&wTOjc}Lb z*(IdK1d=gdZMAh7Ri{oV<$KsS?6^4NB>3UKcK;diYA<5L*V;f-JOCx_Pkckfcfa&D zCw}^D**1y}_x(BuM!o9P*d#H+d)q-~F+C%u0bJ6-zYNEFO~&pnhIR+yn%sUD9?%DO zMrfh8i#g_U2MVcvO!s#aL@9zRA8R zURlWc?;3&K?M@wdK*MAB z#@)UT?I%kwdbw^LMg*;nSd z&=Rj!Axw7DYCpj9nVT*gxAn0lwp@>s+#PT|@f!pcs4w-Q^W0$0(-+M1HGcCK_I}uU zetUu@@KWW=ihD*Bh7>3R!FCLOn+*y#!kVgY+Ar?$s-kJEZ%0_ipD?L=gi9O>a^Glk z{V?W@bB(TfFW1{OZp^1MM2l+M9uijI5!RBAx8ngHg2|9W%r`= z^txe#>EGSi=kW&EjuB4Cx`8i?plVIPHWj*YtjxX5YVooh+WKV_6|r9QU=atD&6k~O?!Uk3Jv5o;SWN!UblSm`gi@mW50M9rW@;A~ z#bU+rQ04qNsldGzz@OloEViO1t9DY3z>0qkPi+pGk8cVn?XepZ%p_lwq|@K&kIcIo zP|Q$v;vTNu>Pf979)>K-tkMaZscp7rFjr+SF>8DF?e{Ks@ZD8fjn!F%lblV16JUX4 zTk}_Zlh4UQfOXUGwfp3bq7+=#SL1+n_Lw8odGWhr*F(bF=1obgKVrm>n2+J0-`}~p z2_-WzIQn*cYgFEpGIN>cw@EqPQ}&bM`GI)_Z;fr}epM1Zzl)^L8t|WL$+~%r2vg;M zFRfmlqTK*i&OKtWAo)w-{A6)v1V$Ba{pn4IVueiMPq_wm3h-CWu5$b&&qeSVVYbB? z1Z4gL;Jz^e1FK&-_TtQ%S++?NH&5!1=})cnx-~IFia)voyeKb|N(mGH85TWI7s^S@ z<~CW$uaROVB;}O*^}Q0*@hf2Y5k2;!jp=A7GY5aDIcV5a69nnhgp~|F>rD14zO=E2_VW(#lJVj+N!EPoRthk&vk5dr_+1;`wF;P@C!-c* zu@{L2R-X17kza;6>EHC4Hi7TRXRAjFQp`p25FZ*-n9c(>$$(xa13QC1Qx3HkyxslZ zU*PO*rDpvsi-F;x4{>DR1K;U)is=9^pQEA~JpgCpZf7cW6N@TnvoBSx^3%X0#>If2 zL{WrlgvrD__ueEBuVx{}GpN`BP5WmR$6zc<@My{1c#l8*et58&G>{SVXYPdeCPA*6 zj;#KW_4AFmUipXc+{O4IW;bc{fROSCb7I(geZ!yMUJTEnX9k!Al?flFc)#LRfP?%WW4)eaQz{0AMo)zGzS<6CzlTKRo5vGc_^*e@BQuL30OF1G_l9S++{VZaQ3xp0}%c4EzPwk zbR}T@%G#Bpr}O8%^A64GJ}WvO8h4{SX(jX7d8w)R{93u^tBkS0>q@=R4;^by^e6EC zIqsYSnq{r88biUI$_7s63-d>s_{-I|mgq`E?k$O%HRAwNJf*i1QrR~1&P>S~ydqwh zYeMV@)b*)!`XBG>?@n0W|M*sCS>n}{1UhK?E0^V*4WV#7#+kB#57S&TUS(&GVC#P( zyTI}d2n#$V2RX&wg{&PmJ~S}{30z_^XX39jx1ICLnva9kgi&=Gt@l|hDSufX_&E0d z4>@Z^gu60gxI3+o<)_$ZucSi}WoRY3t7x@{?Bkq)>JB$UoMz|ad2Iz2hx_R&?fnF5Y4}Uu_HZ#Jb~xSDoi=Ebn`6CA5Oi zAV5IIH#?mEdZ1ylmoj}%JO2N3vOE~;l0cAYKU3j)na!O3!(i=E!m+N zpx?U>4;F>!J+7L4Y$WKS11~#obM(ZVxh2YM74ees$w^1k;v9My_EEip(w`>#>*zso z1}eA}a4VKsJYe^-++WwE>C+_tshEbiGjYW0it>P}s0uz%B5d+kv- z9*`O%4(FS;pN zy|u;hY&Y>6+&AZs8#sX-#TlI<&2vI`7HOjJaV^LMf{%QHy5CywI==R_w4^x&Y?TEq zD9SX=-!J;RnLiL#o_i;rvhALM8^S`)_o$xrKl`vWjfS}G3SO3pIXfNz530&d>#Cul zbTVDD2WC%9yPDuipQRlCXXX;Z5J=TY1&dNM;CZBTGkI}EB9i64c%5xFDuwT5ru2r|K}m&WNci_oBhblUNpAPkT^+3{8fB>CGW zocAX%4F3pUNPv!1&(9-rmCk4w7dyh+2xrFDSy#Mfk zlpqplTEmC#eF?aq!M(@U1wOica(d{*FrFkV4Ec2 zGlSN<)~Qx0^YAu!F7Gq4rFb^i`;9KSmTm7C@8b;Uq2-4g+&xyF=`Pv=83IQ;j#mAn zW=GOx@}a?z>QU#vTR!anG_1hQ-e$$LXBu#8E8nf~@G00+XzqrJ-Vw$<(Q;Lla<$n+ zYYzu>G<7Tw@#jiS`Rq~6_C&->?or1I*1SgUyYP`mi3g?Kazj>lt;FDv(cvFWpU%#x z57fMVpW6?I(KME8^D=hNAo||Q7EMHi;Cvda5TVy)?WUL9wAt%JA)wJSSkDOXiTF2I zUv`>|(yt%9RBh*O{?xrsO=ED;DD1EtA>jNB;(|>r&(Z;dE7{9*QSmTZ4Gt}L5uTr89_n@17BhSEzYS-uJnZUN z0M^|-l!SH`KZL!}SGWpBRi8V0%Lm()@Fr%4zZTl98h;k8v~iC_gbjvtjP__+wAzmh z6&*Cq@+TH5cXfh!vSjb84{_Ea-sCmgxBh*I33CdqsS!c?0u%dtsK4GUmXqfF#`n3I0G#%5zy&p8PpVlfT zqh|NxWk@vN8Zzi+%&LE}dt1dS|g81=0(ge&=GwrDxRbL$+E$ucPmX>UCN!OoW9*t%Q=D^w}x2PBeedjMI zLpHy9Gl?DzMHF$54w<#e0Ed&&l^#$*3E+Nox~#uEpLwIBX221!v4RaZF&1GUg+z=2 zi7*6NLC|*ZRRH2Ck{WmyBW%C0WLZ@h>--8a>T=3uZPuUT#C2{Y10E6;Vdy%gfGXsO zNK?_vMp&sy`oa$tqDn2lf9zmGdPVYkqCLS4LZu$Jau(+#< z+osO7&X^6FC1nHZl2GHIN^S7_cR{Tt8459*IT+eMXB$QEEnX-%thay5a9`WEl<+>T z4mE52q#2MWq##;;oBnB0xIQAc2x@D$TNOhvBdLI-pgR`9fMH|Q(O)B)E#V#00G|~< zBB{jv>(f5js28P8*0+wDn4KuiC&dcyX;=>I+3z0?b{HIS!y5QE*#8UmVmiFXt6Zp} z*Ci~iy6oraX{}yWn}nC=?1@l;hNGzE$(b`vA#b|m=iSO0LJqzasQ*;w`;4zcE&p~@ z(m+-Jg1hAf9&JPIo8LLhz9j@D(6_~8v3nD;SXogZ5-gWFzK*(x=_GqIbDt^=$x2%; z1lz+o)DPS~uf!{Ay38kSkDP@3pBBJ*klEt)pxjV8-!3nAnB(lTc+JucJ9#a(2>O`l zbT{d#Jy&IYCCK}%w7E~_ex)F zjh?@9+n$TaiCQ2P03kS2$AS68xA%6@Xm+As3bw|>W(3+@f9_CgdPrpe51O3QR+F^& z`8RjA=w%7Tu6y)jH{oOnB)*SYBY>rsRCJ^K!M&w;xM)MhvnXoCMr`K z(Dz}!2X)n_pvlkoiAf3l1oX%|@I=A6@79PI4I2EN!yeLeT}&o|bw z-iw$Y=!~|KO`pa8%;UGgQ9>P#($2+i>yLu=Z28=1^mcV|XsW!6mOoD5@!H!MwEC2* z5kipVS!{+#2?8hYotP|Eis+*D$jlv<{vXQT0xFJe2^$3lcY-^?HMq-=U_pYrYk&|O zg1hSkhX4VB1-B4vaEAnU9o&Mu4R3PJJ@@AP?_Y1Nw}#c-!}L_w?%uO&@2dK~DtqhG zTL-b)A-gOem+fu+3b{pIE~{5P{ggI+WLOdv?eqF&?f&7ApynwmBXQ}s7kVnay&AaJ z_b^1iC4W`<+!q@^!X5aNvWNfAC;R`mQz*Zrp!UCju5}wA5ax_Us9Z(yN{9jT)-`TV zR37wJ!Iqk*83Fbuit|M#9#v|`b@2?P+Lfnm(LNfT20%ZPgG1O8no&KBFyRYOJ$2k@ zIF)hWq%z+FlQdUXdMxXT8For#tB6>YBvn~_U&wL0$sH*^1eWPkTa*;_wmePb4OPE* zBzr>!bcNKqM-P$;>mF0HmScSx3Uh?_eo{3v?1J;Ufaa~UfW)v&5Pu&zr?}erYj?Kt zdw#FzF~+QhU{>S8vn%U<)-8A&l>ehPQf~}ux;|(T*o$2ac?=Tah}5OfrFclQfqOxs zv2nS>61pZBxc0K~gta^8L^R0rOf}s9_k`pGctd0A30rtHIPoKQ#K4~*1p-e{B|6z| zkVYYkQ2`r*r<>f8q=8KTdka;HpyJJUoqrSZv*+Ih+~~GKiNh+LHCD0!CL7AcKE{mz&S@m{3N$MixzY?*Vqyp zz}#Ht`flgq?Ra2P4g_Zk`vEl&*jWsBLj(t#i*(My-DD*luj$bH(zTOw+7T|1_vclt z9reIrOrf!x67h6&y$u2w4x4rYbibJ*a3R8N!wEfx0=V(Xtp#~uYU5!|{Zh?)$u+a9 z*uEkh0U~iaBYuGU%OD{@G)~yyU{f9d#7r$7^5rEe%E@Lw_f0VGtR7$z_+;e3a4((tgS-jEaXlEAe?JH)uC;m!eECdlb> zfnHYr@DD`nIkkMIwlqaHaBT>x)DOs9VyMKwb%MJ|1#;3+@MiIZ+tr{uVA5S+myUe6 zyyNOH$3|rLg<~{@>w_RNbvyMLv`6*^^>!M`0aBo>BgBzaYkX}tiWL{}jQ~IA<?RSPorj1kpv>(Ky zbtxh}opE2RVaYt6H>E;*V~Ft%e*g2&KWPjUSh_I*(yWG^>R<$UJnOwo2;P=|39$72 z;~m{FpK73;nI&lq;2+OYs50P5*pSlSLqwqhvd}ttl=0i&qzk)Iur|4osoq&v+;uAi z=;bXFvn0QYIYO_lB2yjLc^YtX|B>>s*gxlse6sodwcm9wkeO;!QpM1gpns!;oJl!L z1m+F;M^eXxqz#`F?W7ADmQLQZ^+B!Y@1D>x5m*@Xgn(u9$m_oZ1xcj%nmpoJL0>#* zTx}{C#g)N{&oDPNvJTzTbsn_b?UnGBYlaKS<8!D*+ao2a)BEXvR=JHX@cV^@L4@*$ z)?|DqzhXuWd}RP8m9^m&-4h9g@W0(?Pz}C7XcSen1R0YL|#9&!LxQ1hDZG4ff!J`S?Bt1t^bnucjIt} zZ90Z*v}J#H0*FIC{DsPe>V?{cI!JCp3`Pz3=<0WQ22jw!`nrj%?!=&+82UyFzGCUE zUt1u3>+ws%Z@|A?55L_H|Ga`kp8>v4{^f<`LLQSFSFBGY?@zkBd>|z&S)*5lRBcAL z2OpHo+#f{rZj8?+@`_znvRVZE?YRB-)`JuI*8t2ViHS+_$oKLKBEu?6`YHGHfe%5C zMxgO9zZDJ@VcVHv*3IW@ROeaJH>B-I|5f83b$;)Ip6HQ(4bT6p{;C1mtvq`=o+k85T+XqzsT`_Z6-|3sK`i&DUsg{+}Z%XAtjtjTdXIC zF6NRNNp<$$sP}(@0e|mrK&Fs)NP_A)GK-HO(+=!Oa|kdFUexpdB|fMH3m{w5J8f~U z<3xw=R_gcLiwz5wFmaip*#5PQ|2w$#d+QWo0cRv|7JWu3LA2=iA5lS>;qV4E|7u(Z zWib2`{u24o2Wjb0rFj9VwoE-mg&(D?3~v#DEZ6Utg%!gHeFe)>mF6Wv(n1{@?G1wd zE1;vIFVKGUukgX|5CSZSaAiu0Z`#TibTI-Px2C>X z@p&{F{J6)k=Vkd-^~F_g?^VyA5|Ft}%l>zN8VgAxosk;u+g>KJyQ>ScMp$k&rPOXe zywf{G86M$C!ruYCe!!qjThB_n&rxPyzQ$=T1T@6yVLqgBIl%z?@JHd?yzzj0?4hmU zwU-gDXFc-5-kZ7|VoQAU?H=vFmRogRL>OVG?)}IdL6wk291SXRxWgaXd9R>2d=x zn}zmY=o*nvpsn;E2p14w~%KN7hp@P!K9Q@zE}H>VTSMZQ}_iXtE1R;5jaP< zc+;D|0esoD$Wxs*+ach;85(%0#&jd(1{veuE0o~m2zeqcZ^k$Kgtoh~&tLm~oX3md zvTUMAZ!z$(i88^j<#O8ZPH31ZajUmnoqctd$Kp(BnNUh6?98{YTMU=1!*Ax|_*wFE zzi{dG09D81tOQB%O~Aum%0)WjZpXLD%VP&`F<0v!O-GCmvwm)@WI%H;`1;Ie=%cG9 zr1fGLZH&>gP}$I$40`>g#z9ZC_nbJ52uojm8LKlvN^bK&B}Oq`>3EO-VdmUXzMBjS zDK9hnr|#P~*)pn9EkuWxOE`yuO5Yt8N*`m_id}$vZd8mor{`}+==RAPVY#@zmuor` zZC3Yo6H3x<{bV0ll%RVPaz(K@=B1ITEv}2oL4DRLE&d{3It_kW*zhuTQiDW%0iNzx z93nn>b%t(_MhkA^-<*gq(wKQ$EG7E}PNZdnP^t`CjnAvD0zNnm zQtG=Yol9Gn4bWZ8HgO6~I!3ITXy1Jw@E`*ft7Nww9mQ&Z!6q2cf`os30oVTHFM#F{ z)HnEaC~@e_=h7{&c{Ol@omimm(WGXqWpyjvEN`28(!Z?X?CZW_ZdA-X(hfIYKyt|HGOE=+HImd)2e+qGpw~Pyj;7g-JkLmcG-(0TVO!ac`|+7 z15VnYWYOKMdp+>z{Nl^g`A1l3fW%)xFq96?n!HNk>r`;= zALh>&!nkcl-NkSb)7W)sIoqMNn}gRcZTN4td0u71A+w!yNa4IrP?w<($a@GLd%gPf zaHwL`b8#EM302wg;vDtiQRY6WHAzVlZvkrqv$;Qkw1%~9t!hJCFZ-MQK6iI} zX*sfID4sImKS-xM&vzG|zOEX!h^a~+JZh?pWJ%DO_&$a`+}IHw$~Ar#^8O+Q-Ly!! zq*us_q)k*jCExdDT3k#}V7HAGUnR~N*xxEE^uI*rF`JUkUOjY%72oX!iLv(%`*CC# z|J`;v(u1I;0c?u(UoG=ETdueAdJ?+RX$U)?gC`}FcAuP$wb~v8;Agk z*-H05Sw(lMjBz71o)S{N^&C2ISO{2-(8uf2oD^wSyjq&ys$rtiDCWj%X1>sG?9u6J zYcjn;7(CyOa4}tl&+gmso;?hDMA?in&%uyI-%f0|Iv;&U zhu_8S9volqgmeEmS7iX4Z?kIA-$6n{4)r|Ukg+Sz zf;?1in}6l`j8x7QJ@EDtitDK`0zQ^Y;PpJK*yir^!GF{Y5W9=WH`z#eHd{|@%tBR* zQ<(K5Eq@L;h;M|7bA9A7;xc|&=g*?CmBxCOyU?xZtFs<{9f{hnYZ1&e0St79f_U!g z1lpdOXw_;kd~4M!K`#4#G*I!=s+NEgY0UHSijI?^$jFmG9u>C(x~IN+@S9Y_wI|5pcq*rsu)p%B=f1o&+k{KQ#4=s*VIQkEXwy{gx>%1U)}mqnt15>fJSvnT&7F*fo9DVz3Z6R|NDWSqD}K~IZ+M-Dwbo#MdTb>*8uUs8fixK9 zvE!YhK|V65h)l1>GHlG`E(9Vcin`T}f;pS&P|j$lT{-PV?YsNZOOmAtvC&~3{1T&+ zcC^v^+p+uwztesXOUb1aN8WO&RY2nReYXgIC!;ZH!I*81)Me2R!#E7Adk<5KZ8ARG zm=t#Wk9nZ(dz|*7id|W0>t7dnQp4C}IS>MPc!n5(<-1r_WCzju{qlZkVFDfjwV_)8 z6Vi4l?!j7@Fot_-v(~}8Pu$v?1o`Is^wBcZ;y z1Hr^KjflN3@cTC7569sBX_ z=Yf0%_0a1Bhl#(CtM>xAgwWlAM{1cMIQ39yI%X zg1@=!DI`_~o7y45*sTyi=Hj(Yq!iUed0GPEHlX^3*Jx*B$S??}Z6$5fX`*X1Q-%7_ zb?0M*Th6B$2iB9<>^J3lH`wm`Jq9OpV6zy9t&L!6JF{H#dUW-AWvw^Fa#K;Cl`&6) zI_gZXUS%(i;X8}ab^v@|?Svn9FG9T9HSe^}1#U>Zn1_sCt~eYvW1ItWZpxh5?0cRs zqsd`Y6FS9qhI$Uwa&qNNwR%F$zl1eOHWj~ca=eO=JXonPX{PNDnKY=ZH@9?_R@{p& zEVw$ZFLZrURxgj8)j!e%J6JAnpH{R;7~P9xf`>*gU2S4&j8Y(|nAAeAB1)&r7{l6b z)uA@4N|Ufez@_{RDL<|q2%IsBiX{i`jyBaw7mH)5wYq$B?4Fyzjo|rcmKMu-KE+ai zYX3cmP{Qhp;w+z8k}1plLH$<8=xvR9brTDK@*@#Cma~k{UBO=B@wrOqsZ>NVR+zRe z9|QX~9u$!l!gTfgE~;9^p9zs%@b>MSj6y4c88P21 zpm)#1$saBoZYKlgh89=9pB$jzV}#gW8s#bhgO;tU7PVbJ0dW zi;&9=4@~vi(mcZDKvH_(c!&w)`G^EzDX2?LFv~!(41rDS@G3e@w#O6VQQSowdrdEb0)%R9 zj?a3B3)2$oAr&HBr*N~~cQ{EDL)GZ5A1!B+JN1+X(x!a!ad%|d8oz@@qOgFRCF>Fl z^T&hFRXjhdq8|w(?QE??-fzwpzZ)nNN-LQE{?v9edlc)Hc$$J4j4MYH7COm2R5OOR zpW}JvY%l+|&U{^<+=w&T=VT3~YH#@Wl=-ZYW#V)+zRSpiow%@_nT|OQ;Da@ldT2SA zw1*!JuaIHC{4K5$&>M{v3+*;k{c5Uxv2od8l2nM1_O37qs>b;!b81+X>_&tgQ~M!B zB)Ipp#MDzsTczNEkKQ*^RNr0w>XW#o+7Ny z>pjm*vT1e$y=l12egxWTzR;2(CxMNr(Yw@_3;{GvS4^)zN**bC=KNAe#@Hq|{Ma#w zZRVtM5hae0tHHA@pHRXze2Z-F#`+f@Fr|Ja4h^2w0Jb zeY$v@MO|V&R=cl|QRftOhLm(=G!tKLa&N`R=kjgFX-e!}&ugZya>`bIln=)=CF2ll z!sr#y!NwSB;RDivsoiGnLdTG=p7wc?9JNpKdY9~@!B5$R(H4%FZ2`@7qy?ym*NBv-bjnFXL z>Sx=%zqajsQ(=K0ONBDV<*96{;6(}j5UL&iQu|kBjpZSfb$ejIbzLk4pZaIz^KZXa z8%)R^mQ-=|GXzhN`)>$ac!1c zE{EIgEewCA@@598Ti&DynvDBb(!K+&(lsAkMHT0a`Cub_G|@`v)L>FU`{r(X3>^>( zz_~|rHcYatsGFU3G&WiNN+Y2eh&|Avf4cMLpjGxIz8n}>O~fVZoBkw~9qOW{Me6PG ze)0Ke>q(YCc;}8@4%8xB!1>dPYdobeBWw^?6Duy+z8_Oeq`yA6d$yD*ncc%Ubx-Gp zhmKWY)TEr|x9Z_dZlsU(9j6XBiDSh(W&d84i8r*n>jg$t0*%41`6d|2dR*D{Sk92n zoV7CNcN}h%o%iR zRcSrffg+JQZzT$T`P?eL(W2C14PfDx%6Vy=0``sKC$W6SdxU+j8zpmb7-aVt}RdQN-BB`K<{cN{9FS}H`HA9zuOq!~17G2Jb2^Qbm?HZ4xj zsF`qtJ`64N2zLUr{ic5po_7`7j(359e$X_9_Fewta{Q;F%S?CQ^u(~Lh@%XgefUI1 zMb;D5t7qGc`8n7jYlx(&etS1H8^HT2tR1=Kibzjoi=<;GEo` z1!Q>LWTw{Gl34BJ#cb?Cnrxek z>6_OsMggrJ2yW!HA;9;Qm!8Z!E!gcGFRAb|#MV+)6U$wDbmg+l+yI*u1 z)#gE;c6?U1yH(ByVh4HMe8(qbruWn>Zu9zZn%|+S_)O8Y)JiDn^QpP`ew1pIh$=*& zHR2lfVg0zHacrA3BpaG8R0S9ZU`sSpTTk2Rep`j5Z5o_$SFObTR}q>tZF}RrlFa>ZH=FLr>Tf5JJ=w{ zf{x)=%(SXg#f(p^G{$6BfD{~+&xlT*159_mNkg;x_1Jtp+beQ)NOSmPTig%t3Jib- z+vT{!UwVoz8t{0}Fz?M(g`h`~4Ci9sP(Au!kF|a7_CY_lC%UVKcGu$tNw2hQOp)ZH zAgA$-7OE*k`?NCyNJJ#}=rO^UM+WAVD>~)IFV)N5#CQfut8%$q7=myNvo_hx5+WVJy!1ZFgr>E<4zI`fKZ;q`!?JPM1H-e;Hu zF5r9lHdDAmo);@vT|%-e?DMOgQK6=Gmc8p+cN-$88(EA`EjK1_x>0#aekd{g6ctF5 z$6!LYOKn`647c^}X}2H#@KQB8I_={u+H!;&{!DA!WIRyk)IKJS80mrC~=Z4f~TiT zEZb}Rw$l3QFZkE|TCSUqaG(cNNGzrP17-%2F3#NW}~QMzuUUbN7~ zF>9@TI#AkIJ8$^3&vKTiuVKZOGVM>5H#WAAVmH7Fx-)Xc3DXWaB{8!7*kA46?f$Nz z(+G*G$K9EL<|djP>uv+z0y_n^)>p+XPYC!p{DKW>p~C+rG}CYmB=_W`c`jpux$@Bj z6;KX%YUP@vQXz$t8250aemO(?o@TD`&1Y(Vs4)ak$eKK*Ht;5^?K#u;y>WdQflsB^ zb33@KU8ViCUr(A=n)1m)2}f-lZya{_jV!+v| z%6w`aA7-#F3W6BIAzt1`Ll`#%!R^ zN?nI362&Lqe)?uxZaJY>De{A&x8DZbKX%k?@@pd$OUQ}}KZ0D)*wD7<0`-C72jOEu z`gVJq{v#tu><)GOdFO6kKL7IeBi}Jc3a8Y5mI-5(G=!Km8^Z5Jrx`0}xff+M;ca5) zb-&h7s2Qz(_WFiSeS46n_OeUcwk5w}sK9>>{bQIjOL1_JKiAKBJL@R;6+b!?V+N89 zTK1aTYR{Ds=y#Z_7i42UK9sO0zQ&gv*{U$sfmE$3R;j#xl25QPlF6;JJ(8Z!gwvVS zDA{2iQ>84ami!v%2B|L44Cd3JbZNf|l89qip;pjhX3)JbeQh|nvAH$*{RTf4YlBN!ShGS8j`H@-}m^l<4SWKW?u137mxah+j) z;(N`*5(BPts^$-NPX|}EUY5e}>`gk^59#%h3^{xdHLHRS)n0jWGf$#d%+O&4TQ5m8 zLABFoXClux$}-$CK@zF2tzd=(XRv4dd!w+fAC9nDMPZ& z5La!7ixpy&IYRj(Pb)8v?cD@ys78A+h@JTiHTx9BPLp%P?P$&qIEUApVvrcpE=guR z@WU^T4t=C2;C*O?=vJ~YkIsI2#6|NXiFBkZH|vFr)R1+a^`=df;*ScQd8f_}k`cVh z%fs5&Oi?(}DfVkbew86ZON@tRq4zm8h>KG1@nQhtu1m`{Q?Q zaj$-`X~BX7Y!TR06SdI>k0;GvuWXis;6PuuV`z~bZ$e&Hi?1Z#OOhJ%-k!Qo1jl8% z(d3V!URSGlHNq-@LROdNER39)%yTQWHfE3-<4j)jlY{y{mT>f(!vH{sZCohUj@-uekdgPu*YK+W$ zCji%Lm-a&Cb=8SJP*yI%q}LrK6}!M7^=sK$Xd@PrJD56>uu`*kLgaIVEAL=wI-m1~ zZ0zi%Z!T)Mhgl>B;*tRql@*HMZOCB3k;-K#7X0uMiPa^Ov2>-&Ggd_kyh%nSEhfK5 zdY_|TJpL;}EQafBU#iG)QSp)&FQEoGWKQlOY!vThM-Yin8u^T z9&`TSc6RV%#EK+mdRHsBvABV+?{QZ-d$vZkFPYcNKWkGng>o<}1@cU>9U>9wphkCU z9m?QzSZegPpc@Hz82Rw}Isy^xXk0C8Xid0!^pWiOULHa*gt>UkI2aL)Hl|ao=G*t< zNn><2w!%(3OXM7Dn^iM!-fN_=vJ>5wHze24`{VuuNk4x`C(4C;#wHH^>7=( zsa~_f=2D1icdU3!X^5UW+>SwD&wQk;BHkD5%evQ-NR3o>ePlLqVkfFD%T&b-3;9&A zMYN_$f+ddUZ5lSBmnQ+G2@Ype@h;Qr`dRuyU)n;`^hj7CP1!a(bWvJQV8r!EAB#jL zY{hz3A(AII$beCd$D;0C^)5d&-DB|kjWPgWWY*Uxkh=qCALhSjRrNBY8L~_wg9Xk$ zwEx1tbgV%en-J&~8W8(QSp6KNZS*v8>zT{7Ms?BJO_9QL`{R7{qft)jMn)O?A-Jx| zb#dh570OWP1n1dv!wvvm;MzvdEv#_TTG~ylgcPMeli_Hy`0l;w^rHgg!%Qz4)p40C zAj0!Xu&Z}kp3n?%fg!>S^OTpJA|>tt+_^q`q#8*-;4udEEHfIKm+nnLj!=-LwwF| zc&AQ8$yClJ7+sFSX2<~8ZF9;Deq^U_{ zT4c!pEqZ%Ro8jC$=Ih5@iY58e;o5*uW%@21lj;qNK3>ZL4nEc^d{*5~@P(RgW>PMK zbxN;qjYCfQEnbC@Kh=&@%i|PM+i80`>XK|QHOOYU-`-mUA0^#e!eO676qPBFyG88D zsNPyqti$2UZa@EpXq@Iu_6qrms^F5y`m-riFUfkpn_e)3rSnZ9XUHXsuKrY_fAvJr zgir!f^1Am~mwZWGWqfEaT2faDhGJD~tL1KG-j@`Evzrh^XuoKrE=qPFT(TtEgkdY+ zOeW4WZ835=K_kBGo3Z0O0a$Tnp?H(_q5Db-owXmTj9HcwCO87Dt^mv=!5ROMUBqu?=bYTOWjjaI7@sr zJJ_nvu`4R)t|fO-Ot`unWBl1)|q)g8El^c)4Zc(OL@G!G+;1ke-(ASWFjgq9c z4~qD~@bJh#F)pA^n_~%qw^C7?(jR@h@32yY6+S-fcX#PWUU-U0E9OcZgyK^B5`xP4 z+p7wzen6+^*ly0u#ZQ^PHIh6tUC?DfYq?}|Kvj^*YIH7n)t)xkLMQ?`Yh3w&>H2=9 zI*i8BsbaA(iaB40rHqAo5F&-T72{cjA7K2=Gm=-aut?lC~U8mzuP$@ioCuR#J!>r|PH6S6}#nhFV;}$P!<=18_ex?-&Ei z_xtfoC+X+Cf(7D3ZKP? zvPiw&(^`APy`2cuZCA>a9p^=ei*n3Yepwb^VC2%8YC!Ia3*`|x@yI_&h2aik@F@)e zC-jxj44vmnb2&`aIO6B+eEQXQVzWD4Ia(zpPQ6hGR&&IYR=#RF7|_N&&E1fyZ|0Jl zb)&!-e6&5DqDg<7HCPB`!v8$*GbSYsvDhB33(RTy#+rLpHv)&`1#>@=lot72v$?DO19c`5m2_iw+w#Ec~;=BY|Eo$q)CAQ&icqPESlN zu?=2=lShn7hT3auW=e= z7Jmge9EW;d;K=ctOp&v~7;sR^nC2QoQZ}}c_9z5mNd>gIRPnUCaz13?V7r{LW~2&R z_;E#$JSlyKMUT)|t%O{5t2gg?a^jT}>7!sX%v;*ow66XrU3`(}$W)!4=?0hJx}oN` zj@GNP+ME1U2ibCT0rd_G`T<*M9=*1U;FU0-2(ALK`P(7uFTflB%b~_??QjE8*ve;$ zaJB)bLr3T((~FPTNyFDhc%Fo{fYn->D){0b7VA#j?5R(sU`bb)FXJn)63wr=-#n5= zMU%^X>~&BvB9D;9%&&Ux=O*f_alvV9t2P|*XJ*Xa`-nPl^^R2{H3|#%j>S*!Rf{XK zH+|>43I|IN3PdH-G{#4A%p90MgDv($VUFbNX&AYAYE}DVbG#Jp#y31WoZ<+>oX%&7 zbfZ2Z*{~uoTA?QZ-2~?n6vp^ftdVR{PTlp_j$^e!(65^F3RbA-nFFxN^S6`eOVVCd zCLyYMm$#G01T;Oa?jhIxuZ<5>JKkK2!D{5lO9V((mrs9{$Vcb<#}|+qAC70<>vMQG zTVS~5A~JHzY*11TKI>ycP!=MUzInb3A#3m7u;-N-k_!bE)_$1Q_mfFTqJUXJLTP2P zA2FOXWMwJ+8yt!ffUJ1-C9vefTE9tII=fC2j4gL(p;@Ufhx~yZ%*!W9O4^v3{fb2n z+|t4F>=Av+y7#uV*YENZw~yHDca3r-?lmJ?8yv>qGk6V8(fh?_Yyy2cKL4{`CXx6E zgLWl@P3I87y0(&}o7|tlP-fLEVX&3SAt1c z0mj?05rsc1D%gn*8f9vT}q*~XFC=57RX*>0- zyqM1V8YD;{bta1h6DY=H0B*^-Y%RaDc$wukQFW#@B&XepMpV_%0oc{JWi|HSHzbCg zCxKqmR5wZ!+7+Tl4VL=JqB>7=AL6>4zbQS z_z;*b5+2h(etlGTE?@jJo4we5R~^Cl#~0l(>!*hDld$L%IGn~c^jK|YtWrB) z>Ewk4dj0h+D8+6H23{N8#xkIdwK~Cx!xs z?1+)*2Q*n}3G7Yn0Q4|}2Jr=I5OmG--0E)Y)bsRhmxF_*kJ#2yDY}3IsDH6-v?-b@ zK`++=%1*iJVEQ;dg4}Np2kJNYe4|dNXlnsb7)?Iaxb(%tNxp6ShArqg_NtK*g@Ytx ze{(SDt{|=(_3Tax+-Pi3U^}#N`knMW2(_5Ko>K~;otM+M~paq4ri6LDxr53y1P8dwVhhgg28#72Ekk|;i)!#Mr= zlm$JD3pK4(R~kwi@i(jjzGLC|#1AC7hId4B4=zhFB;2oV4be0-QcKM-eDNv_4F{o!qHIfRM1yTVH zH5imK*-*fOjpnm1Yj=!(tQF~`J<-&(^qiSUX5FZyIA#-B9or&)w|t}$_KnbwS}KL? zVyEA(6^eMNXSP%%o^(?W4Ouxv8*o=drA<9y;Y7fCIIxh%bsAHgs`B}`!110clWm>$ zD8a-}5?t)pEmV;sb7mr7Uc`~k!Szt;(H&5-4HQ*zvV=6qxc38#k zbUFz!UdL|3M|kXQvLeX9msPp~@OiY?-@&;T4;LV>j28cRaJlkLy41d)+cGRi;f-9@ z36fnzGiI))DvHjRNJXfmOVHKZiqp19hXuIn_f(tw)EcIr(L}>6_94y;SS5mdqviVZw-vi;SP_87%mu3`oy6D3*~;ESvEKj z=3(0CS$=<`nawwbGc!G9r4h!KHv()&v=3RlCEwn=)or$jg5vWm&Ez@$09uS1&{UCl z^HZCwH4E87FBF6;hZ5hO$Ws*iQQo_MBxNhMYkSj0H@}xf|FZ9R9b`RD$AYsjpdCufW8Vv+f7j@a*fEVk=%0^?h=b}PyZ{4_oV zxB{#hVnIw8RrI^J%CX#u&6w53kXhe`je$J6J0tzdWn~j?qrRUo5T(p^^aIqwNGzZR z%4=R;pQjNnYwV$%ube-KQ#sGXrHYFz;hOh+0u!7p9FpsKre#8qxI4ir1Sj~*>qPAY zjzbmb${nyF_GV*aJ05=qeeXo~<_JjVgK_bbLdp27YCEOKu)6*wRbV|bwOO_*iXZc- z5n3X>?&C!Ucc+ttYS~SmfrYA0`KPWL1jZFRvb;3Vkly~YB_F{HFv+@bP)vl)K%SflCG^MIJ>^+U*M5SAY{ zJWQO#Kt$G;Y|G` zsrdeWpC@tC_nBiha}&B_#0!ciCpEgbn->jnc z>VhqTWxwpduIBF&qbqkh@7QQ+2B}RSDB*M{W)0Sij5aIeDZ4C*^x2A^TM!sD_;TZc z)HnMYY=OE!K>g_4Zp+i1c|~)*l<|{0EH&%faF(k}r`GXTV_TZ1ID{dcof)Hx618|7 zmn9qbW-yFuZFkk4I{RtAiJ(2Z_vXNys@Ysll*nJJn;d zz3iy~S3wq*<)a}DxJRURnu1@YVD0`lzKAOors>52nA&cMXsgxz;9P09I z!=&u@MKXnHasouQmTYD+(zw+l`d#^P&OMh7aK6jkZ;S(f=w_X7{Ze#$ET27?->~hs zUn*#V7s~;?@edVBuKlm}Edl6eT? zIJ1gnYowOA&71OR{thO&`Xc7Chlj<|MU_CXhVJBNI$;dH_%h{H`d#(tE!}d3e>%A~ zg@k5k@NOyy#O(BCKh>Y9r>@C^a!N+QU*}b$%P>C{@kTpZDlqr zt(H)`_B7XaY9oDgFT)Ru!;yCm>E}?%t#J|QL&IgFf<gqW~Qx|kIdt{%gvxMc~pb3<_;cO}GI%$a_4 z!^`S~Jj8NgQG;Jet1`TyXh@Sf{adFab|P41n{dUVsoAdKMNeOEed1R{vPx4I%YXxK zqH_+jyzO`Nv-a>Sg=d~ehGq=g+?IB+ha??e23wSLFGDU?_rF+>{=}bn0Fi&8 z{)TZmAR(6r`><8NU;IbFpL?hNz&AA{kEbb2a)l?3{tuRV-sU_oBKDY<$v!wvZG;A?nbk)4E_R?{0%GlbGwv4 z(kR;*hnf}>OD{%?*&p27p+!>tC%Wf<$3cr>@cG5z5rc)2ekU-XN*8CC{qv2}x(H|X z-y%a{)BgXPvJgh|j@^>t_|Gd_vr3_`njdV9weCNmq^vKov*w;NjZk*lb^g^xntvli z{<$rU{+PKrUy9YcA8PEdO&TigrzT6cB^-9HEWFjxh zi^be7-as$p!$+#&6!x`23&kdj>ubYK_|Wdf&_XpcQh9 z$E*GYi%@&HW`k;>-?+d(F1)nHXKE$FHu)E_GRD^Qc)L4$W^~oKSZCSU%OF7X>+OoY zoC@<6QSVi-3l<5I&Oxm*lFrsR&@Z^mm*QIe=HZi`0gU}j)L-*{NJ^ys$;_tZDkc1b z>ezUfYD|a%5y}V3zrl$Axu^#d)|^-U`8!+4)r4d}b%guDtmi}z$M6Sg^}9VU>MV^H zk|chFttkNgi@89ZdRQfPC zV?`SLC$8`h6`>e>xR2al0uXldcVRtpw`8|$w_>+SALK6DIUU8SQ#sD8&%{JnCh zLIKA!SP|@Y?`64ud z7K7#sl>+ZCMhzlTeGkT!8m|hgUEU4nmBSNmJvL=ShFS+V_`Jm4L}xkrUARohefCvd z?{WXRUoj|Q!Giq0;h!JpzxPK=%BdjXCH1H)-6Tqath!!6T6bzHtlanjOY~5M$c?V2d zD=BaYdk_9?J^}!M56ORy3x6NipaFhA0|!6!uMp#AlGLNT{=}A)n$v&Iq$NW6=kT9> z1IgcC`H8IQL`O@z?lL=H@iDI9zs3O-HnP6^7Yq>hoJ3MZ#>Bl$I5+qpb-wS<;k^eA z`90x3qJQrNkpM6<0v4P46#YM}fFURHd*S~g5rYnT1GSv0h+V&D{o6=Z^at9H|HlN; zpu&tBIij$@;QwKhAj(~Vhh=}=6iV2&9;-A0DStEQ{rURmem%j%tlu?|Yw$zlpGy97 z+{taKag6=@l%sSxQS|>R_&+lQGw+#UL83**Fj#g^2|7lL8@?ULe>wt05$*rH{h3)> z_h!!^zJHA%=W8EWMJMhV$@iJ~yw$M!OM^}4J4#&I%?$`14PWAPqn28#HP+VFy6#$& zB0k)nS(#Df#Rq`}eSS<)Cy8-S)hz$;m5ZlJHMNM2YCU*+_2j+Z=6U)hOT?YWW*p`h zY(Bz3!9Geeh$5ar!2Q-e{u)faCq&lWpqJm#Q2{AbbL(VWT_b4}M?a#B|EAg;u(^xp z#kir7XW7c&1*=!3-F&gXxiBMVvY9HQNIVoexKZHQ)Kz(_+Jo+Qo{=T&y1zeAiz6(Y1V*yTMTI>t^Qy(bg6jm zv-9Z6vU;U@N$&DPp&6E5{adRsG5V1#QK=bw!x+Inc&kKlj&xe8Y*}))1@lo#asN&J zd-}uQl}EojEYhZw>NJ)vSh1xk%;2mf&$@!ytH7BrSA03^lydmBc1VRCJUg1dI?hKI z&KCSG%sOB1Q~ZK_dZPijuh_7Xoj*k6;_bGZe(Es)U3jy3nFpIT!jx80Qy*uyxs9%YGo#*n(7VmI^&iJUI)m*8q*x!FpW zaINX7?_v8iPH66J%r-jFg;{>Y>#qA}Lq5mb$=ZhoEBlqGnAOq=>SuawqaoiZLr~^l zE;eaAgXJQ=kdni~URZ`bNz`D=F{P)39@Uu!a#nV#WiE>iM*_gl#fKNL+EkoWp#FN+ zt2mLk0OvP?XmtIUNesh8{|{Sd9oAOUE&5^wO0iPhp+KQH6xZTjptu((RwTFtf>S6E z+}$0D6ennKcXxLWe)E3keCM8X?|J5rnwd3g%`eqBys1BiE>RV!7xmbp z2-9w*Np5E-la7DI+lWx^WkaFbY-gY}D{rAP6n>mf{}HZPH=1^>A~oZMImdjpV`7<6 zwT3()8^bD4?^|S%`dLe(NJ9MOj7{H-vg_S`NM>DT!+S=G{5Q#>F2!YfEmClN5zV6O z&)5v_=6%@QvoC1bk}|cB;|s`Z!$F^+>E-cIVnx6zDeM5TEh^w`g@$> zWOz$%{A37J2G=s=c#+isQh2}jMqNNL_N8&r%U-T>TIO|+aFn%Lm9|JS=cdtol~rq8 z*;!o+?B40V7SBA?n#XqjW03s&shMffm*;>Yn>ASIW(=gScjBm$K8F8LA*OOHWICiJ0hv3{TlYR;>@WW13582enRc=E(cTH$R4w>8;d6 zX&EUzz8N7&gyx=ZQE~&@H9Tf3j9B2ArvN>J_zCnZph=g+QhlZi=bAM*e)$&E^hT?! zPxd)-xkA%jT{vF%-i;ED;(K9ril<5CJAXo4avBwWKR#w*Y|l|>Gj&~BJ;7BVFRB$6 zZ=<;3IF>WS`Kz@GYQR@R8BCKO_^+5o+Q9EuQaY#B(Ni@!6NX{s;Zbj*1j# z?g8f|sx@u!R{LxU#M2-Oi&+TZ^aO}Zyq@2qMZ|vgM!m6W${!tL)s%%!R3B5`m)z}7 z7II_$-G?QLdBt(81q;STVnlqooWf9=0B!7JnAH4eAbzzxd4oL-!K+w_W{czLo6Wb7 z$;Q0rgJXy1+niFlZRceRieoYCP#kizbYFR{T?N2VcX3l!2hygs2!R3_VhwpftHe7e zJ%u;`;w)BU({6azDOg1r@K&U>7~s??-&V8$sIYfXAwNS@*8LNwj#~Sw!cX+YPB~6Q ziqriAVhUaQ=lQ?thkZ6JW9)~>MET0GeTUd~-p-q)XMB69e;aJqr4`PKq=!i@1^iBe zBS>h<#9?=ET9FH{_HCXjAs-y@*%HZ%UD5L=K`x|1OVG}_VSUMVC_h&Y!>%R3yI6ks z^fM(C1+@SIHTMpnogr)pLB-+qT2Z`HO`qU^3~_m-=y;vcL*G`q0`w@m&%ut< zhu@~<@f(hpas!Md=@h7%_{al$$$;o(6zoLtd)ewW0?uTG3D9LnweSruHPo z=gv&Rcna^Pjd-87#gFC+z)$BJP`rz*wV4uKJ*-cU-C#stCytcZzW>KT*i%P@=pJyv^6U^ddN;RBX*Aj=@BtpO!)ezCxj2&m%R;EV zomqMO;OIT(SefCd54Q}s70A);uiK|{q1P$>-nYwzmBv9bfrITC67hAO(Qk=4Wba)_ zne9v3kgK5;dJxTtduA2=4s!OqN<}grDP0~b7UrM3f^{&}GE2Afi=Jxx7eiuR=W4Uj zi;SrfqjXD;Z;S(IuchbQ5Bdw~TuLmQQ!b-9yy^GHpDD!~*)b-QPp-Pq{@SfF)iAI` zVXutRV!rR<*l&)DUe9EG%KzHYIg#VSMUI&(;BlX)NGz=GQ>-ZiC!dtR=m&8!e9~lz zMU)c3MUJFwE6+LP1%H-rKNc*iceYwIIC!~8r5a)}YW!w60t;b+t;&ifJe{KR=};l&UFOk>E5yKf3_Nn|N>I3`G;C13ZtOi*&q zMSe?UN+13XHyR+1AQ>g<-~fR~Oly(yuo_{-QkG~%4Xi$Wka+LS^~%wEHyY7A(xvP^%Pj?%%~>7TE$z#sO+kNmoj zW&e@2>pn7i)tWax7&03#iH@fb|3+gwowxeqQanDdr={8{_BwqB9A+ZRPLew+R*7H<4`erc8uxsm=Kt4|kO^IvT;RCiyO7uf3 zw%JHt&;kq@Yh4L#Tv+f%&4scWDzyCq6{yRmdg1gwWtd34MDQ&+%NZ;5ZKWiRt?mmpF_(Le zfKpmHV0{L466-`&%6`Zrh*Pxh*YjrJJ%5CA?r&qE*JSxxg^6(HOQE&6s^^&>sz$5n z2k(W%yd;70?me;Z3xtMKPQjc8`8(KZ54-mhMJK16E<0xBXuCda;H|#<44_t1&I-oz zUU<`6vCHQN*)%?iVf8t6oTd5OuOGX**lY~t=5GvM5(qO@7`ULr(QrR0rd$W?oJ#-E zu${@-Ume35=uB1C!}1;!vYc3p?F_)WxY?5cg+sM`yJsYj_5$F(zhPY z)XSIr3EiPX{FBrDYPzAPUDW3O9kn{@X4RZ!-;Gb;aA@FF2ZNTm#q#~-BCQN~Pw4Cx z>XItAxN}@bjo`BZ<%+`1!h`DMSXu%8(VcReh02*R?^X$7AL9SCOwF42M64|B)xfC> zTOFtrF>nyE{DoP6u^N&MHI67l#-APxs?vS`HW{iEsfuP9;!6gV7p>ittnk@?NSlK$ z|5B_T=RwY#?hS6>z?UzB&(Pbiu?&*#=knO zmZIcRd`hNZhEZBeu!u$d{Q)30ui*eucp338&83}q>t)!+9mcMCur9BansBMP?83=- zVJdQ|O}W4$zt>GTXD4}-s%gmJ>2>O0rhG7TETDx_w(V?J&d!U>i_ny8%(UqJ;}yo; zpoKO?&e>bT`P|FxVKyb*M`%@T2mjb}CEhXZJNU{mLNch&J)bC!Mb$6^xWUM%wIL`I z*2gO)FXyN7##+3p%gEqrXG)Ke?%;?b1H3;!J!VNp*z6I?*RJ9qXCW zA30kOj`7>$@4@mJ`W|(NfhgF&4Yut;alLWob;0ijiby*WzGy%yS^uH5+5QBeznM+U z_=nr4$7D3KWKZT`26(YtInR^AEZV!=C%iQ z#W1-UD9-mY;S{238Nf%mWQb)(R#Aasv>Gur!C{Ls$pr3!Uk~}AuvXrAX;GAq z_SbC2tOrqpt#sa+H>u~ibidj^imH9V=oIMGh;&;`MESitSQEy233^N}h#!#WH;d(r zm@b|FI>F>O9B49k8DC9;=0=arEvSPo|B)L`Gp2>FU+iOQZJ>krv=v?CLe5{mCe4)T z<@2J~^E~##JwkiyjMZ{iRCN8ssX(yokoZ3g*;JAaVe8V^p?C~4-g0GuG)kuO*(MoJ zR^f`-krojPd;aS&O^D)Ibz4Jj=P`*sxNGU>Q2Vn+(^;%xE}XEW*}_akx|*pUwii;< zSa;fNXQVS<9qEpypBmeC?1|<)KO!wJp*ZEG=6t#!S7VR}C3jEbHgC!E5vXhH_vd)K z`KC~;ETDEE>IS}Eqf6(!!dH94XN%%5=KZ!?OG&5eMf;PVW&c)y)>!7Ok%xrH&~@~V z`Y9Yx{3(c{ZfDi~TB$=G%&t~1jpQeqT-|V{e-HkH)AnNmH?C^sVN*!Jw7>_+4|)m} zq-O#qvKnfB;xG?0YK1C#+X2I_w2XX|9Dj_K$8Wm5&-4wsJYnEY+b(yz_L(eIH!=|g z93oxwQ7;P4qv%(M;NMjzjmhvf;jD^+C?vQCrnrh*!zlcjTasdjbet>(Oc^Qh?6S@x zSv_8Ek1oSb+?Wh`R5(8&(Ow3Oy?8u-`GE0=ci(W@7C7L0q>+TQu48c{JkW4NJ0G8; zU-lJn*L0)>w;CjJncc*n35*uECZ;NsMc>x!zPoT&uMombLBFfCP>$CuH((?RmP3U* z7AL>87IpoCm`@db6Q{e{kcOycF_G7g_T~+*5Lr>l)^8Kj;5}Fo#h6Ip&JiA_kJX1W zdy8bTM}k}=kFOO*@`>W7yhK>HCyOk0LIICfQ*DND7>(|XpETn5xfAtTOq0CUqjbAo zu7DC9+>rM!d2uH`yX0c~A zpW|M<`Vj%l2`4(UxcL&dsdBlK2uHLfGHZz!0mp6MKJ&R~`snnl(1&yMzyR6VYCVJK zj>Maus{PqqKU8St5YJPMo$@h?Wj?lW_T-B7@jZ_$bK(VeszgaNJcovcgf`cp4FX_)vLQb4J_tmdWhVspHuue4h4T7t zm_7E#(DSbNvdTLy@vRiE6bEifhQ5>9>7wMdYw4?k>^^tPIe*jEPHH0n9>F-sl^s+N zIZVfJS+S9~aOKgr<0e3FM`%8VYyue;K*e}~UB(2}k=tP;9~7>L-l^?j#RzD#du&d<^228a57sev*F*b z8J-(f9)lKS4mMq&pP})NZ*0v}JMWT0bFV@)B4INa$FDl}{G6$XBBCh{+;ckVTo9fw z-5`UyG;F?gZVO5CoL|!i-rb-Ej6R{>?yFnvVaUcF5Q~p-+~rz_07D|lI#pl$BQHdh zDl^V45i(~mp#1h`LYqwHYRQSGNbfi|e*E~Z7PBRjC2?!`>5oIt9j^HvhfezAWH&MI zS4yOFjo8&QEM8sCJN6|j$VRI4$61rrT!TSn-7KSKpNN)QfTno%HJ#ONd-l^n(nZ$I zCHvzN=5BT*|9jmzxOohDgZv>50iPE6NxTX>P>WYYD!JSGX$=vu0iD+R>2|uzA{X<4 zPQI)4c7*}X@gj`_?T5GZKHYlVqWCX)ZV>p98bqOch%W@nWdj;a zbSw+pzZJPCR%X*Hl-+UtmB?7V&>nIcITgtccQEpc1UrUm_>;{cQtg?X%U(>s$`FO? zZCAukLQggJia#+~ju*v$t+=xXStDV4CD$l^mtwPfBrr6+;3QmM$6Z)kgK|+QohS9P z+BuWGVJ*({TifzvD`OxNDf zyPQFu)+F(M>&RYIv%}f( zBQ-cydIIjg_llu3wyE~O`LzQ1ojVBow}4OE6&h0SDRNn=KDyakrGsZy3b2-r+YX1% zO6;pj(YQ)xVZp0$tq#6*uKj1$RA6HaeqMMr4c!0if#J}hivJ%PY)483SF?~~YoZK& z5kHC%yK&jrd`(-WlT(hUu_dVEB9ZO1Pyp*t4;~_LEhBcZ^DJv*;KT_^XY315LRwE{ z9>TMu5+n92cx7s~9`{=4hxdbl$K`Z=EiV0bF5@-6dButNWLlHcv~71%cIUC5mX-P%mMZ)W;l$`?|IZ zUvy0(<}?KJCZJG`?%sI}Xm+0&G*Pjz-`%H%>bv;@1vA1u7e5B)u29quh7R6_L3fgJ3^Jd=;7qnm3kI3b+7)3ix z0)%Z!6BZ~^5sI%Ik+1!Z<$cS4RtJaX$t7lxUq9r;E&nww=w z7^8m^vPBsLh^8k$DKM0Tkgg^b-@vG8QRNc<3I~=xrnM9wMMGj%@$tfOmz1r`ZvMi# zlrjax*k2qD3MZ3{^9GBR;jG0Vm+v&mNH;`qZ^wz-B6T%nz4wu$@R_`JtaRI}mzTGD zjA_oXy!2E=5X^5cWmH04$*U2O5*>8c@1Jv$xXV}dNq9<20fhgW4Y<5oBbgOr_p!TZ zXzZ{gFvsZf$GZ{l(v9J3FVL7aXG;&kV>8vlBJ`%>dZe3zSO2?@$=hVMH#>&9tz2A!$ltGG6&P+C5w zQHV!TTjFA~ymiBGZ1B-~An>Go1^S4_1MNCyY{AI!atZ&pEg;+o9=K)_j2W~h zOOqXCGz2(-5+m=kaU)-t2<+0( zLJ#_H84V5;#bbm1zAvZB_{quLa6-Unlpmg^xh~o2?QQgIAa`64j-(93=a;O5YOOlt=WTGT#M*hQnIxAy#az>^q_TPlxh>lA z?#~s(QpL5KhHFHSat{yDp9qUbjYIF={YWJ^&R4yBJx~aLtK&%1+CzN15DC-nh%h=& zB*3mQ?2(0O&M6MHbsJLKTUTKW#WSY^3O4B80i}4mA0D6L{f7L5n8!K&y_Xu~tEu`v zdNCQk>BaC5hrv8HOa-S|%TA!TG56=i*y#>lF2gp!sucV|i>vI|Q6|P+KEr%4s(4l} zQmjcv$w=dNP{W^GyJMSemii>2`C$dylU4DcjypTiabfYH!s@@;f`TXGf*Ytnp{^$19JD9w_BZj444C`qN?);(fwDK$@Pj0 zPL67r({`Bo91Z^P&cF z#PuWVn@TM9_AG`r89!V4t$wa~7em!Lv-4Uu5M$~}FB9OHpmmm$HiO=(^VnYb2Tk(x ziAI(IS`r*j-Y4^Iu}sxP8$r&Dj54OO8x z<@Vu5UM?e#wuUxR0$ICIBPZC?4<$76V zkbx_rbc={IgUiwQm3PL`r=trTX;jj)B^&LUF_c|{k2SkKG?pL6urPsERf=RBQUxk$ z+uVb^mijM|CCY-6r}J9VU{p$MlKTVMoU1luJ{>Gx`mDGslDSQtv6E6 z1sbTsS#*knAUd%$UKAv`$W)=!;MV+*3aU{NoOV5YQ1} z;B$oI_R#@M@>nqhw6AcsTt+|rxve(ZjpXwsH4sk;O{g8P-!|G()?09Xd;LXkaJKmT z_d62TvyKnpLMi-;RT9TbuauLeA^e*35h+Y)9nAM~o4nGUcfBSuhB&E2r65BVBqK6r zrviBJcLhYFCAd2=(R~d?`Hb9lDSy(G)AmS4tz|AdW>QerD65^7)4g4fbK}~rwzUIg z#Wdt5mhm$058lZJ48a!NWjj|6AOsc6az2z;GSl9mob?H$J5+!bN|*?R8a;#waGDQ= z2xyLh6%zQc?DNe|5kSZR4liw0Y8rRnciv^6zV+U=U1wDd%>_>b{5XtjNjHg%44pQm zh-3)Z$xqvi=p%tfFl}<%fZ))?&6jnU&c$=m{ui%F-gB^8HRl9&Ohjqh(N=U=Vy7tz zgCNDxSn>x?|Bg#OK4rH}=~D{tz8E(MP=|3A-eq&4m1r{B;uC>?Vzlvyw{0n0EFHA7@Ckh5nsK$HtCAFI}p~=65-qP z$1gS8Bs4J*I2^iI2uYUdqs;Jarx_p|=b%qh8K>z9uYklN$ zE$?`P(O?KAKPD0-ig=dHMIkt|_>yoF!-*L%Nq0dZ5d|T26j+I_hRzK`b=-Y+Q zLR#a}{jek#6=iMCM8LZUH#hI9kc!F;SOi}`4DTf)-HUNmx{|F{fsSyZ+@Z~vztF@%qV_4SZ!WzIe6+!1OQdBp(-3Ah|1lj9{oMKBz2QI}V{)*$ZB}iU zB4Sc7B6`L90vKsHr`OO6T15d%?P9NDiQ}xEZY+GZ)89%Ta@!1sFxX?0undt;p}#b= z{F2a9hdPMD+9Qyz0vfKR+!e!8H~SVVopciKcxY9ijp+7}LK|@&WJXg?icV4D;~^io z!qI)1H3CHo@J_{fQCb%^`}0oQN8SAi`^sJC_%mQ6AV3NigY8I>q6cm-`>Ya3k5q7T zx6K$UOEchhn|_?PTFPb~T%{Kr6}2DT+1cHfClgn9I3o{A#HOU|B$rX7I?_Ck7m57l zq1$-S-0;H+5j- zdprWSMp^^A`>QaF)%T!*z#oa|4Cz3%b0-{RPNy3gwMr9mD>!BAsfnfe3UF8bF+T#` z;>)eM>^0Vgr@%%P?R9{~N`#!KUsdlp(&@ST#WNoThX2wu1D3)HifX~fpa(T>=6XB# zj6ePYQj-M_6p_Q@T3!47VC-cNQBVv`IIT%7X)MPL`}xV!t-_PRs|`G`fbs2n-imt% zd+2!#wu<=#@BSzNP{bf(@dTrYl<|l)Q8tK~CKmDS^l%rn>KkotmAhMGIfVxH;zF%R zm-;`VIf^Qe2VbRH`t%RrL?E5AgC(E(xrmJMeV^L>H9X#oE}Fu;}9z>lUYsDu0&izN7%~Jvba>jtFgRdd1$o&c^gP_t{SY*1pLO z0lzAculaeWw$6l`vgIHIdllX;a0KB$?uRZmJTj_ObI_8AMuK@!6xd~<_Wdv}_+;=T zGy#Lxo0Zpz)~Zv|E6{_7jG%7vG&X`(grd;0!v32I$ixqibVZcGNgdjgb`cr;AmBc1 z%Po{FSW(^y*stt|s!=i!S0al8n_r7RHps4`aAorT!0FzVa%ak+{Gey#8%-HIk-qup zCEM$Mw7hZ!yHq~4_@Rk>VeUfjbS&g^6n=5ZYCiJu6PuutWixy?nLU1Y59?OkQJva# z2b)#@gF*~^%$8Fr^jKbH;jBiMB(sd2FAsYs<4U7C+(=IAU<4l<4GTMVk&!56HwNn# z0WM|=(+oOB3-9=B%Wr?m&)0`=+Q~mT$~U*pzO=lF-2FY z;Dx*sBf@B^H=?Aiv({%J+=dG{G*F#Etz53(dy7T!fB%z6W-h3&X6=x1Z8MNUmqCf2p z`EsXCvdLMUrXn?QBUGkVm(J0CqCba&78B6dyNP>E0*p7khgdEP(z&#hKi_Y*2{IrG z%9Ja$FY_USR4da)e8c|1ZPvFL;JjT*biw zcrhLZ#Dqj@V>BYfVBdd#Lt~CE@G0ou|NSec7|%Z$UWa=8XQ`j3 zXxev}|J4rTUCDOjrSzKR@|pjvRFDl()9K&MG!BuXD)uEs?r6F`oH?zE{~Jd9-!D#v zaQXQ^+B}zlE2b=E81cze4Z$tZA7$t1Z1|vod&mBNw6=FO(sntw5LZ@v%R%=&>XTnw?Dun>RsQ|anzqVOuTG= z(S*yUh(p`e?*6|gW1F*Aj(a~D{sPd*U zMm)Khx6?d$d4u3p`9HeMKb`Am4cgBVU($vY;!vX3!G7nyvQ*!_#k<*YINnOWMZ)b8 z`=4L?xn9SlLYB}P{`w{g@rIdL%n@S~{V(Y!1w~W?#2H!NHjuc>Iv#d)yl}nSqEY&k zR&TwMm=!s~?O9r2VodZOWq-cIp5wE=EI#-BuOybF#^U+?-?ji$>$(Rs%i+Urz|ye3tTt`69L?`6h%y9l2$&x_zU)9%`j)R*3u*_Yjy+n4WkWSI375@22Ha^oM_ z!o%y}+DA;&kE}18a1^wdL&vI_&4Y#$^Em>(C-OgDJ3sPQ2&Ml2bc4fxy8jVPqS{RB z?Lz)>RAI?C*PFaAcHi1D^}EG*WJxU!09)P|>Aur_|4p&|M{Tb3$YmA5?L2|69E)eS zC_K*$hYiMEUAh6=%qdGnzMs@r8>#~#PF`!-+xjFAy#L!Ph(>FCQx1N0VfBx6Z{fz$ASJ+o< z=Ch&tkh!l2QY7xs*q#TDa1DZR`m){MFFn0_OZz{h@Lw`9P8Dk(EcQA|fHZuDib6G zL1BkQyjPl<|H~EOa+@h%^_3>*An1%Fv$0-C4@GyKXwt3wziRtGm7_7L1XXsDR8`Qo zhWs!-b}r$WN?uG>Um{{eK~zLd1qtI_>9zXRm$-e((O3i|nSmm=!hsFCPu)sMg1jF! zz5da0)HvETJ4ScT&omyUdwg_9Fx$w1a{cRAMc}FF{_O$UXMlYqkOfjDEe}*H&d<$v3+l; z_pygZ?e+cOQKQjT6L+eB$kWp;iOyNiyF1|etl$#g?J2}2U#~K8J6M|B{G^3#yJ|x! zFo^PZ-C#wV`i}QsKDw1~1IY7D$eXUzFsCaiy&SffT`eO}E+X*Cdxr-(2W{+D3s}Rn z4Lmz`6Gii;N^!MtEDV?g1@$iGuEr`O*|_OxPEqjD0b8_WpuLhHelymqZ{<~EB8pXs~$wK7~m+Ji3$K$ z(9kIuFvb=PP3<9}g2!*WY*b5hH(i&#!|!Js&nbwxo{^Lw!a^&l0t$yP+4@8t8-Nv8 zmB+=rxW`C+jm?WNMrOK=P$Kh_w5QN@7?9hht?gmG*KkTeIr%2EseTKoEof+I3_-I3&zE}F13E;=RXNgSx|8kEK2(%@iKI48k+l2rMqXa9Mq78 zRXgPJ)1^e{b^_q6? zv7YV?i)$pa-XFF0yCv)T4-u&q4A)sApvSw!tvWVnD~6BgMxMe}Uo2~Uu5z1Vr0~-l zk6H`J&|VO@BnL=NpgB_E7<>Px5rHQT^Wdp7HF+OM_Q>e=Lkx>xK!RjPbpi1~C*H1) zTH_>ukX5(DC985vzsT6-zLxpkB3@lps*{h^OjBu^(UazK`}k0-&`IN|oWTqG z%m?(59sCo>JYu~%*x4cM@8nDoQ>t6IdY7S>V#?y%8a6C>M63fzsTq4&qX6FB$sk9MbiwUqbjks{=3^$e zl}Q6Tn{%{zvs_Y=q~^vA)Or z-2prl>wB zj~aEC340+b0t2GDe)!k!lcoPWcXm~V+n?QF$5gwgd(0GJpod<7uA@nQjWegvecYh= zU6=+5GbB0M#LDxfub8=!IJ4=RgsQ&& zXrm^<;{GiGfv|g(Wj4j!sr9V7q4y?v86)EW9wY584ZFVbs_RQx{}VdNf!oM@zVngK z{`NXkdrh?9AbP|5-Kg(DTiYVomd$`bVhDSykY${}VqKHmvs-)HMAMy%qtbE70ioyC zK+}p9FVMf`Mk-U}r$3%82KKVsD9c3dW%|V>8$_Y>28@v!JKkWz>pCGZmwhYRH`j2# zXmz{lhN1a*A-@j1Opz&ZKE!)+u?d$d9{7{rx%MQeD@s3lOI}iWc70`9<+-!o5&dLg z1XQy}%3GNd5Wk8^Ok-x4GzNn9sxYtAqG1r!n4Pn(tjh z_sM)z2axCUWkmV&^~sl2?L235FCHjC*3o>CA(~OnmTcNvR$^P?hG-f`@|oMNzLdZ# z6k$a1+Hc!m=5mG(@qUNxDA)W-HP~ZFr$>629EDIPF~k*@yS?q>QY2l{b%PGUwc)N& zF)s$)Kp4+&=Og4lm*svZ=R10vbrqJ*b0`cWrYdIB0}Ym`kCa;Wt~!jAE>dXuWNT4J ze9}+BgrWEaZ-A71DN} zG}X3&*kX3G6l5JtSxYB`V&9%PmF)P(OVVI&_``D@Cg(xePI7Km;PUNR&AY@5?s{%7 zS@3f^?G`p0M8vFYZ0F2l*hUZil*W$pdtuaHD!Z-%>>n4*ceOtkB`itZGdHD7)>~B| zCWxXr=N~t@ckt*zaV!s?ZJu2OOKyub$&YzF;7`J0y}Pu{h~@0k9`Zu17!?P8?tcB+ z<>&JH`On^OjC#$=Dpowqzt-?v8hVS|R%feY{=x_BbQLH_fbV=HOoNnf{mf`R65I9* zc^^VK_6hr{t9J|JJ5-yK#~G2Dyru6Fwf?vJQ#rXycn;=%CahPrRInnJfzpO0H$;e9 z$-Y%L_j*6^PhYp{{naK4h4=Zsm-)?<*11~5JRH4M;YzW8aqjDvi|lvz7?v-W5V0}v zjyyAg#fUMZp6F9Y_z<5`JBQiP8R>7c1rm7J#ppl{xE}w2!|`m>oxh`~FeRKk{MZt4 z->8-lqwSlJ_!su9r-h}5K)c|MLoEYPJ%tM`@H__p%flq^M+Wa`6t8N>cQ0Vk=Xc!B z-4-KfKygqTLK}RxInEbH4S~a1M)`LL*Dg z$ElbtKl(t8=vr9rHs?07YD9e^}ng9OJMchwT3#$ zl3S~d?f{NV5pl1t7LXP3$*$%FS5JA<3p;mYpL|HqHaO~wz|6PWJKE@2(Qq9zzPqR7 zWKy+;&7H)C+bDX)<;)?XN}JiDvK7GoK!S+s?v-*yrj_LFFTLHbH)nj8v6IMZ^$mb_ z^Z0F1R-+l;tpH7&7Wi57fWxdb7?qtq?dV4-z(#smx}{enf|Fzt_qgkKZ^cwjBaY|k z*5dSb+U}>j#MSOy)rTgYFRewCPVd*lEzWpPf9Y9N5gae=yXJfvKiwm=1Wb1cUu+LD zfJMfJtXx7As$HE`tDl_8f8H*;#d9eO8tx1|s1aS_sLic47j9!#A3S903;$3EaxApc zU9y%FA}ZA}9aA|A>-t^ACm*Ep-U2F7WqK?IF=KIb>oshu__d>w+A;=8(l@9HJA$l= z==fi9M%uD%YStQ6IE96NyN);vd6l5-bgVggBmB0X(SWtAVeqsiHo@mk0J8W z4D02?ZzqTK7rD?}Z2V^eP#q#IM*fB=f&E!8<~6HavD`Uv@E{_aFR3=}N{Pg4`{|al zYSx``XtggE#z)j62-sn3#)hR!@OU@PTVTGZAmuE%0{bn%RPf6O67hxN3%ZD{QPG#k zZFElK@O=FUqW;h@LTl zxjnE-{p;K+V$QQ+4n8h)>R6Pw7(zm+ylv8ipYJYryOM*HMeFh|zRfwEw%)`or@uxP z4O|1-W%m^jw?t3^&qf-oN}hTmeBZp7f=Of1Fw`5^Ui+b)`VusqV;vPJWq#TpiS1d| zZo4(~+5>j)5V@bUEmmAyTw=?%mX-t=rRF|8I?3BS)KDR~U9h+;0OTgmZQlf94tUuG zsdavj2fB*{?Gf1aR5g~i`z8rP%nRi{q8L?18pLFGWRe~MQ z$D{|qsN?3l6S#_O;^Ug>Eq7BEIk!yL%5dMQ3-_2nM?FJ(>_TL-@+M%zgPIKVrG$z6 zf!Z3XX{tONH`nW~%Pf2$Kk3kZO|X7Px9r)lwV*k~(Gx}>-*0jD!HM_OkAZ!2hJXEn zV`NNZDuPFz2kR@AO7VrcfrLoegSHV38a(E4CjYq=0L0;~@XKTa^R8Qg@g7H(pWL9{ zR9Z$lJ+=g+ggf|gJmao8r(*@-_S+*uQigyO4OjxO@y#!>0e7ASTRT;@1DQoGXdV}+ zx&r{qmg-mTWxHQKe4-eSP`g$XiWJ^q*BFyrC?n~xXmED0ZF=~EuLbyM_JawS!#9cQZe&x>FM3)db^nBb%sit# z-a@RdQ#w_tF06oaoRSFmIpfM{H~yoo^J?(hwbC@UT%xj9HN9EOMmp-Vy{HAU=R4mz zNlt!_uv~?x8_*50@Nph7?gND93rqUK3X?kYDjFLK$vY{`@G^k+f?Po%#js2tzOM;D z0sU8a*mlLoi1y$Mpx!L`Plk6f_QMFR}qt#vH$V5-X_nZjl(%D<+(#F z=+&1DuEG}PON*8Ynb)9~Clxsj)=UwGn#4(V))X%0%cp=4b8nEoXQbb7s)+1DZG&NM zmN}{DK5d;FC@%=-#-W$~Aku4$ab8MJO7Y|P)&3M|3xN;kx3;8sKw?;WtEI?iw#qz1 z<`E+8Wc_gmDl%)a4Zr5^ca!;I&yN#%<`xEz4$euDF~4$N|Ng;@D%gSRw107WW%oWs z_LqVF<0Lh`q$^wF3)D=~TbX~KNIYOK)DMQk%ikp!u+$!Dth9nb$TE$I&;8w4=;gA< z{nPNHncoo1J!9chx}NUFgiT;K66O`#C;ENlpxH^`lZ59{S2ZP`RAIC>m`o_j>d(S? zQjwzO*VHg}vG!4S<`k}PA|5aJjj+xdm#-9u_XFyq5alREvb(aI&#p%2?%6MTMDi*^ zTwhU~?w2N)PZOxn%+eGTjB7L36Y;32CM#QRBGGx%cHrxxD|dRI>qC9!L@iOm(s!nv zwE(s^-lW*vURT~nD!aauD7%?*+{vGJo$ltWUI0}ewcEbOUl5(5V|*~~ltvdCnEP|T zYeWa187B58^j!4YUHRVz2{}jOA|6*cGa?oEk`WG8NY{+9Oy&k| znbp<&`lf&d{$@_R9w-(=>7$Li+<5w;&oFPEh&a(;Vtb4i$g%Rg!HqVj(|{XE{@wHd z79;b0*ipa_0GhX;joa;@XtR*X{H2=FK8ok5_F__Z^6Z5A*CH`N zty~_uVbwojUlY%@^lapPpD>hslk%f=4Bby#J1ur}c-A12D^~jzA*-`kfna>T>^hQp zPyMPzXy@lQf%ZaP!ZY$=WI(E!pWJq^$?CPu4z&fVOX9*gW+e;7&>!FG9QlRGB^DpA zXIDvikB{e-#NRMepk=yEx{_pp+M;$8&O&O$INbbo2MVjR_4y9lw{B zj(3e8)o{HN|0FhkXi6}?L~K*OX>-UZo)O#vDV3M#+VP|jjmiyP7dT%O9G8BchcTYuk(z!&O3U5WbbDlVApsKABXgM{cJ`vMkl%VRq=^!AZD_qAaz^~ z58PX!qVqK=P-X%0d@oap>DzBX*-HJXji}r-_rvo?IqP zNo}5mZXDqO^7k&2?cWR#J-+^+yk>mM-Oear88Wb&aBgV5V@@_17~1<+#O`zC$P46| z6|m`A;*(_OwK9{HkcEtxVL@rnkjbVWqo(|UVhgauk|)_Aif>!(dq{|9==AkqDtuX= zAonQv?7}JS)q7+=P7%t>c5K^S_-H*nqR`7%uPY&rCzh1?D%l~7+$)-5{41#bFkn9v z0YMOtfh6;VysDCd82C8eoTm9nq9(CSKTj^?QTf|6#>m8;rR!8oS3nT8ss znLm{{z->r7FiTESP`ve?9{~(AVe&i^KhWPMsL~#znkrG%5e52^Qj>6ze|M0W3_k0n z@7AyPs4h(X#y7AQN~?GDI=fY9qg&w`?)GgT=m0}S@PDjmZAw`Lr0?>W@4Wfa$st3| zR6Oy}a$@aTZq4}g<Faznt4oBj~Q|pJ(T)w zhW3j2VxlJ$55cS(P++C3=dycz?NE|{KNu9xb^pQj7Ya(jsZ+~u(_C17=Whqs2M!(g z30FdWS5oD3a}AlW-k3_r^j9Mz7fFgtwntrWWr1hQ&mQe=sv^=gvPlfw99Ofk%5zC0 zA#_~o@%aX&sh`o^ZdhAdLhSy2s%T!w^+9p4?~ z-4x2?$NLZ_oWdnZCrzp6@os1JC3RU~fzD6wEFr~6wQRc{5C&CJ0qFWGrV>A21~$if z<$V02&16+Ql{6@Tf8i6~uEu_x5I35g;$Sc%^IG7bcgp*X9A#c0U7`CU#OH=DhDP?_ z@tpj#>USheaxc@Ia&jQa>avkoOyqU^c6U!zX6vvN{U-?W=A=m#7lK4A>syDUTM&w= zG|+{B2KDxn1p7ndVmUwF3;!TNOebgecWWY92aqxJrJ+fH-TSH=GzmhmCO>A{;qhKD z85Za$TA^W|hq$!_hcM|gWq@+JL*BqhlD_D^d%R$%xl;0=iH^OPAjPNZQwa|WWMvH{ z)#0(PnjxJE>w9k7Wl2*oK~w1ynZp-0$NCD?UGY7jRm_<&{u#Qji6Yz4e4j;H zskGL|8T7nGNS?&9>xhbwaf~uOrlR67LtV5VwZ6*L!$#;AB30>tq9OjHfE7b>oZNX} z%V@1xmYZ({Oc-&e%dko&qnYY=#LG5qsqD8igp=J>8C^*pi_LL*%K_QLB`l0>G>9b_ z&(U*^bW(eeo|98)h|$S0_U76OoUtjbHAtDh@#U<1jAA0M(PI$*(H%Jeikn5Q`zA9c z|8v{y7+WXj4eHYBdc6ne#LWqQSL*sT!isMeeTJt`I6i#8GgJ|-J2||8y|B(;L@rNx zo!on^r}sWTRe+SBpDx#xkblbSAXph6Q+WUzc5a7-S<%6;rf)z~83w9i^&c7yB|&99 zglf_X2Izjzd#zWOV33EVzK$#!d|w0iG=8$x1Vs| zJYH&3LPK4sXd@3a0sO_sxaETtLfq9kw;h_4TH|E=pgC!I|22Kn1<86m8P-7! zYOXnI%t1$}cHF+IFZJx!A1~7EuV0X26zPcdG8KoR*0=0+m+>KR@h(-_ZmD-ZTjsv? zke$5lmbborkyQ^I3Hk0Y+iZr2lts+<6op6hg zoqi#N$DCq9Q0D#l9pLXyVwnuxn|hAT?!w!HvTVw|2W6@yLHZ6K`(Ky zlZP*&3_{2nO3urubAbUb%lEjn`AvMT{oyPJO?VXJ*y|V1sKUop6p&g^nNasg9anJ` z@HzcY0VT~ns%>qfk6i@yPns-oL9P%Up;^ge6~ut|Hy(7A9| z`?&23KBlM)yoL}wLieQ$S4Wh2=RG*I6@ds&91%^<)M*Hld@TB57R_bbd->Z%BX`wr zj%B2`M<6y~QuB1K1Hq==#(I;tnUqhpnl0jDqkv?zp@!3)+B2&cs5d~{+g%jdc6S-& zbm~HCLz97%I`8f##;F7M{LC?`gQW!iJnXwIo+m|oxh10BB*N#IiK#lbv)#dCH--Cn zWkA@E)nG8Z4830$VAh5tMfE`3EFF zmP~FsC7lnu^XA&##TC~bLqoeUeFyEA7fGaJN4XD(UqLe-MpSa2y*szy*<59+mf%^r z&r@9KaS%Cn99&chD>25pc#&jKZekcWO-MTV(|p1#H9GW{s`Sz#e5=&1+eX}%Y#Wn~ z9Fp^15$GJUCf}>6zIBw1|11Lb1ox=PIbu>IA7OB<=&3(VJ$14h3ek-)funGR1jI^8 zIBUY^8OC0Ic`;VMNxOmWOfj=^KZCU7C{7?Qw_pnxtzs0hYRB%N+JSBPBVD-{oJ zUNyd&=8a*qwe>7yMSCZ5nDq_a;15vHpGRZ|ooj4|L&58%NmS z!k$WH2K#n~jb-DB*~W3_kHky=)dlA?x0<%)%J=a1s#dJ_LI|49E)o2~oFrxPD-=~R0X1rhyL;exQX%tE9c zil$#Vg6`zpMXSoPUTcV!mi5@f!S{E~WqvF@yv6S^KG+{pP8lf0688mW4a_Dyujgs* zWKPc}z7d)6ktab3!j4X3wS83}{hw~~ATwIDW0yQOOG^Ew6$$1`<3gYc(9gOwdUf4| zvUv8m4AC}ONpv9d`73nJwJeOHApj6fL4Cic&swg*LhHCFpAcD_GqnoFMKQty4hGo_GRN=v06)yz?-Z?nb` zuDI}|LtM1`WAR}qhN!RDMt@q>Z}-_nFXOD~RyKpy5-LN2_up_>QW$zf$?`~bmS${Q zpBbd&C1g{K(RJNJ8?hTfLgYt8ul@7i^z-tMGvL3+2y3+?#VCw2$kfT(9|panJRVU! zm#{h2^yD&k2{T7xJ%~>gU~az>uXO=i0nZXgrmaT#?#H@epQ5>DI3|9}8`-3Gb?m5K zb8_r#=wz1Pg-Vh!L=gr>sdvXE`P#oGo*m!JI9U>w z=0Y8lF@i9v=3QgFpR2PEmnF&7R8NMS#@a4YVUiJD^KeedZg$kOFunm+Q+$IgdE&@L zg7gsz8x5t68D>a&&lf+TyUS5v-Qq+~dS+%dH%zhcR~GmNxYFVK)uVZ8yrxrmRQ(@4 zo(nJRnU0+mjDfG-m^Up*e8&6~R&7(9dghC4|KmfQ2^9<1?`S_lqZJ6qh*>|HS~+rh zI0&7yW?i{m&@OZ_Sqfup`pv6uTnp8?s9g7C!?uzP(cUHYUNX9iJr(HALO2m7i6NeM zVGf>0&YB47r;6zBdx|PXyBp;_W0gZ=Eh6mCWgcOEeO{{4%f+8Y87102B;1k)wG5w& zXy=$32qepqh!|x7mB(`EHWXThMmuvdCvW>Ih+-af4B8N2idstfQg>P--xDz{eSBTT z<3HDB@CBkDipZw%67@ibpb0Ta_V!okb$&Y^a6i=RwP6;u#=V5%P&Vq)UcYB6}@V@l5R1+pNr8B-kG%x6tTpFWUgMePOC3Q=|P-$tB zQ`Soy8>c_np2yoW1O?X6Vr-nNhuOz0tInu{z)}YV&P7XVB$GZ!1KNovG^N{U<4Lyk zO+1eBy7b|Rgx!-BY}^oG4H=tm&vH8FeM4Q*z|ldrYNLLg@9I2Pw2>vxqJ%&9p9H0O zHiJphag7Sa$VhN=i~fYW?Pk*?v~Ww`{j*NHTfV}g4Q`lc!%qrc@yXd_|1&E5#|(c@G3ZWaK0e_^ zr}NZb>3x|y_Q;=c8^53VH4zRxdb$caG=#MfE2YnvqMC&}EKnr+dXc+VK^Q@Vf33_v zur|^G6-wcK+-sfdNleg+THjXajVsIN52+x*AZ1}&^`}?a+wEO z-%W8%mppQ@Qx7V?K0k53&Lf>0XnH3sivQvJ8~0g~BZ0vK?4w73H^Kr}`cq4j%a96DC4L}JYMrCB!d1=k-* zx)Jt!x~>`~iq3|8a4L|lQclVr(?^K4Kt2~&9h$<`m6)dkKOO{?8u-3BxOCcMo?Ad~ z@kiaypa>jrbHO4dbtEkl7Zp}>ol36W$8^y@RAQJq4c4^ko}X9jdkW;n?EC-Znp0dY z3^d7#UM)YeiQg8StAjSn?681q8dU1d?t34nF807Et*=e%pr}Ra3&=`zFFb6}XnBOy z1{#4^yWmfxe8Sx)aG+P=#lwouToGQ$BGB5MQ@EojF~B3B@F4b>X^v?|Osh1l} zn4n@c+>BG*OvDevzTs?U(ciK74SkeD&g}Sy@p>SSRqw5!Gnk}w@=M_|MDAE@6V&)b zs&+`--4+5M2B@ID2kz@>R5=>~Fc1zQMBt(Y-jJ%43PPMYcI;Xag~gVvH5NgtCTLqA z^DsfF%a@4jANRlriQm?OPV+Ez3!F_cVu{bIkG4~d1vkG5y{~usHI{~f?16(EKquKV z7~$sZfOX-+x#v?HxXJ68)B)lV>mwZ$n`5`nH1d442W}>K5h$C_<@Y)=EHu(u*8qss zyboCeRG@t!fx*_}Vy9Y+r1 zBL2B$cjz>sOKYp_DlSonYjHGw$r#0n!#z?Vm>>wFF9~n>qEep_&A<)`I8#*Ho3jjA znq(4Qo`0uH!-gS3T&Q(b>|Wvn?MmZ3ePR;bCCFqiO>>}^s#vdSaiATys1M2;YQl+Fri8`1C54T zFG(Kh5k|P8ct5|Y^QOkf95QG#y}S7&0B;Wm8AA5^U`lqSL`}ud;hBaH0~){kUbj8M zVq&3JYLt!xNxVm+JJoAUIspgWa3I)F-_mY28f|X%pFHvM)8i|}Y9_F;b^d*DM|;|g z)@>O&=)_<5lBqR10~Y`u0t6$cE`3sf&dcEWDZA00S{?BC!IX&^qY-6XI$!Edih@wt?)iu7aFu*K_OV{5LrX}$#R@cbmaio*zH52zU zRXOhG__R(BRqiiiBPjAPtaw;slI*)sGWk3;*bt_EJPpC@>CXZy$CNF3PQRVGD8D{z z+;6Z*SYQf^AHvF9@ttUXxs6BF8i0>8X{|B&Shoxxbg4fO#6;}#>UspL-I&tlo7`VuQ?ZZl8n5+<#adlB*oew z4J-JXS|-d+z#X6frCBE%IHD-Mb;?T(+y(2gLLVmI0oHyTvH@8_|ToIjQTbw@vS{(m4CkutIqabr&bg zzCOV)$;$|i9uGlF$xFj>RL%%q=FY}eY}7jsz3sl9YUX&G(tm0JBynd(gw{x%QW2D{ zy_m}w>u`Kh!Ij@Ez>Ci-d5G9AF8QQA8FOQg`p)(%aB9?+d+{6|0R)bA<@?y3HrRgM zy5NsM`xVUERCRO{D+UM6U7@deqjcRm9sHBxJtglW$~BVHhu+k-VAv8?{9N{Latyw6 zbYspTQkdw*6KL~{*}?3cnB68=A|}Z}!Lq|^W|>TV-2U2*yEr>o<@X+=PHnAaq*H(h zgUOh{`#$DW1f`k6hg4AW{k{zgT(gaUIRR~~)5YG59pcBbhSJnxoomNbjJv7_BiIn6 zmkMQ9po(5ib3mx(Dyb-KVt*~0f*Iy7Be0xy!m(wp3{%Kzq z+1Xp@5`%?}fB6xXC%qNi(pHn_-Trj$5DR0+Fa6=)^vm;()_vcWnb`Y950NZ588bOQ z&w*@EsRO=Veale)56-9VLI{-LN3v5o?bln$Ghz2OR>|09b+&6LCSIs32(09ggPaS+ zgepzHtA`6H5nz)2tg7i^nXT@c;+n1eB&1`xFV~B)>7$IgYV6J$BI6G2<}3c2l^ZG;&1h5F%eSkcPHx<^?a+(bF=w@?&OMXg;v9y>eAws2km zJfa2Dlz(T+Ht6&)EKwSoa<_?tclgdnMF+O;YhLHqYy*C5TgRBBk(qxCTjQ} z*-PwlOMggluOTM@JD42cqo?(w?oqTLcQ`auUY$Ypd;CH%x%DPq*Caa~u)T#IGRSW| zhWCJP3_xP@(98PIN@whgYqf)6hweN&%PT#B{_$zFZpPa@eOvu*zkB-RcZm*ZMOQMe zs*#nQX)tt*YE5C;c)i*sVx^=Jz549ENy2%G#<2Kp;qEGn*eh>X3IG!z-Pu!Uhud(9Ng@Idj8a7oo>orOr`Q|k?;g$!wiBg>#tlmnMH3Dmx+P!rQlwU_vA1=; zy*1$T)Nd*6P$5#?{bt6OfX7iEdo0@iXwooKayGLNGainmxW)yk z3)eqYn+?0MZhnX657H60TGAEzgf#ajtkM|FWN@88@-5tBPrQDtenQ7h=8Q=<0`QsT z3#t8Xe=KBuc~S-D**$#5^0ThM11;~abzLy^V2d3dpk7oORMfLb|h0K z1&H~+M22Ivh@SfGQY;@GA6dPa3t*TE$tN$p5hZ@lZPf3C(f^cLqtvq@(0QaFGCq&y!I*nCnVd7lYg{hkw!qo!30= zpt@hacKmDy`E+p&2OBoR&c@Gn8!j#Ln{&38#XPb@ z^L1=$OT}z74txxO@Q-hUsOz?>d<9InF#4}q>Z{1@3gfCVodondJ)t*xS6T4N=vgXL znxh=N$N(WeC8y(qO&q+gwupiJK{t^5O?mQNCu{%Nw7rlKW!ZT+3~!oIoz zi>-g4@ha!Qc|VHa)^DQv!h=+S84wH0myVPU?ayk*`jq+&(?Xx#5Q>0003iT+Ig9sl zQpJA-N6ILc<_W8!D8Eb*-P}5JE0)8=D{6Y^&*h}(4IZV(c-_Yrz5Is5Ka6=-0TcCT zh^WRf-C#Ct$;rdq`z$kxXVsrj;OTdRhcwVqngzeyAQjQ)zRxOu15X#7*t8?6O>0$L zx~@5ygZ<0R+KO`Povp!LmwOH0!5c~w+09nlz8i;SBQlYAF8nnsFK@uujzi9jkQq=MgYN|eLs-;J@TUGu(OI+z>PG!qxnLPj1L01lw# z#3?nkeYm-Yvo;}vOI9}rIV@rNxCsVq@%2G>UcPE)`?5XIHaTb}Z4# zPUp=LT?OVJBQWkC_ACZGAA|fN7n8E{`G7=bm^SOAfA(+EJUZ+ah#3r<8{Vez!Y3FT z5xI3doEL!`2XBn~8uM)&#B3EVMi@XCi~RHhqFbtUm==**Gssx7lEFiP0(53fMu=oE zdDbpcmNJhq@g1v)Kc|R|Fo7H33g~37eH#8=;d=*edITWyKl^wa(z2aA7*jiMg)GIojxkB`}P zmLcEU*Iuwn|EQ;aHi4eMseYX-`P}WZ`~A=J*wU=7)(_ytC>pwwS*K~Wrsr%g4hF*@ zIpThgU%v?x%&(@#Ki!;qzxrD~T_okhJKZD95t}0p~Z@Z3L0Amuhs`%O7ipc(i=ur!(`)Kpaft z6k}rzr*T|Bc(wnAKpdWv-!5`GYs_&bcQfDBtw~$4fPlPP@)0IiX~(4r|Bl^?9$H4{ z8=|e>>W=B8I?pHpS@AJ`GyjprvLwQfoYyIfEAs}dlLtpUY&KSZ%yomwq2KYkzbO2V zOF+i~36o2!9u&JQ_-VCwS-DN_CW#C(dI}v{FQ+*GYUTi3ufz(IXq0|k#_iEyu-7h0XHamO#kU+BR>MLRR#W|Gz;U&&>0QQx5 z(jdQr(1k{woh1Kgt;EmV+e8j&?JuE1=O=%D>WPg>MKP&nD{GosU)}=_RPAKZHf%=t zaL-4u7vF#Qv{MerhDC%<(}I0+?&Ha29-MOoA{J$mmpC5Qn9$CB>_o<0F|C%X7>tg) zyPq!mh+9H#7ld5NrkAOhUfbM8kus}>f9=5gz2A)CbepPKnWF>u7l}lNvnaXnc54aR zXm4UBq_@;{ZLGp^ZQHZg^kcM6KH*5S)>PzE?l#*%cvL^saC7r`_6u!*7ePeJilQb- zgE{GUkEi}*VcYFL@}`!3#N%0q75Zu~FQQs@n@&3!pS(u4w``MppjN>TQ@5eo-ejZ` zS?nlsUpOd0NN#k;iyiiAkHZd=n|yc2q`6u=>D#HIi3E(a_lSUWi3qEXVI*fDMr-0% z>><(23pAY$c`E~@bo5ZjeX#|eirfVTAODas_PN!MM(fYcpima5cAWul{X~C2a$csQmr)!&fYQe@mT&O2ZU1gQGzj;R=vO|| z_Z`dx!(z~9q(wYT*r`y*aQFC-%W+X$o7>w4pi!1Eo8uRIz;PsvK^q(Y*~?HRwm zZbbRIrOx0;$jayd^qE(_eSpYtw9>=rLygZ_!@qabPm8{D)`<4#rww6p{FizCBm z?xh`{iUiQzdP?&mbX`Dg%&5G`Oy;yW+cFTe8-pM0M=^rbqrH&5dBbWYT4EYmKG?e5 zp+cv#Xs_Ip7fi&z>bDrrI{MB${w0g4zFCN04_2IKK|3=W9$OW-XfphyU4P_^b{7MV zoPr7b3g$a24;~n^2C$0Hj(Q5L9v#(*3tO$3Y@*r*Bp1^{S~MSsNudEvZWZT15+8GT zt?RMv@?tw?>i*lWue`X`u6?NKth$wR$NgHAwun%T3`E*Ps!e%YK29LgvFcO!L#^~} zdNubt4!P^JEqZd?wRo^MTML)2lD|{S!P2%EzR3rCgZNFEgMBA$7G2k*!gHgTIpzRq z7mt^pfu?VKX-L?gSikw;aqMN}I&r!iR$sa2W4yIG(Pe|6q;cXcD4|kAnorH#U(IhF zQ`h-HHQw@g4oGCPiQyZ>3WG3Qw$p`1XR{26Ul{1G=qwYigG zHJ^H3pGD+Bbn#oD}#hEY7f*f;`Ro@;RfP<7T>Rug=3}ct? zN70nqqd_9dF56rIrj$_A*h+GE`#mq>Dh?(sh8ecRQ3j3k^{2baL6=^e3C^P>c4Z&N z+wJBF!f5TbNonIDt*lobH@hO&CgYKc^ZifPRkD#kI1p~?Z0_m}H)e%$#6zF9o^4P6<0Lx+?%c- zxKWD+q`Kvv%3j6$L(*38dNdMZ`m%SwPVktiywD=-)LKovU0$;9<@^xUs~m>I}JcMt-|4nV0K7ZdE=ye7{L` zrZ2GM8+4j0zkrQ;cp)}(Chx~W#;~APcD0d6f%tCYwj*M3z1Dr^_xh^TAlZ`%@-U)y zW?t193$dL?MOAP_MlgA!At(u6=%_b4Ja$;*xud^)ada4jK?xdNHZ{1NsXb}Jp8@M} z#gcw;9gSWdup&S30zyShMN?-_!w8@erro7}zdjD=J2I!zbp z>7`r#RmMhB0$zsAD{7$i+0pyW zBFR=d*0{Nd{(uEQiV^b_k_%z_8qZKbMMaTG<%e_=(qHa+5PQZw@^=ME2q=CTA=;pi zy!t{s4XZ?GK1`ZE1}C8M>-JT#au*BO6&=j!(iEuZSx>8SUB7Y|O5%jZ7R!C(xMRiP z0E`n8`-|!!!1>oRZ$H)fOPFrCX@xweH!V@HbZgX;%Nm_?TwDt>x8PKp21x_g(Qf zk1+n8*Z=e4bqrjbA6Cek^B*Ee(=R{In2vu@O1xM>287o9U&9^R03?Tq|G&r~U*kX; z>Hpw1yeRPQa^$|YQCk@|!L7iq-Wu;wrY*YIV*1~_5JzFmLaDzCUegR1Ittp?Ti|)g zm!)@|RGVf?w=coDg!`A9B?hhjS#Pd&!i2_qhCWf6IZpCEhq=`Wq?=_Wbk+Q{vYlSF$M`}^7d|{>Hca&{pN7s zIBrg(sSCn^a8;@EBx7IlUtzgf_d?+HxsQTHik`{|E--8kjC%NbWLRj%Sy}2 z1OuO{Q5UBoqNvV4HHur- zz1j}!5`4#H_2o%cCu_AOhX~FfPu04dp?Vd@*dmc*6hb%Eqw833e!f=kV4xm+v$-bP z57nQ}Nh^W|a&xvE9ObgZ&1@kfC@r!Oh`;x;U|5eAJN96`yO)LwWQxC(}5j<7*l* zPnj**9_fF>FeM3psMv^KBXx|w8t-Xs&{(Ed|0t-XF+FCXUPXX z+~O$hle9O1o%FO^1g|1rCA__n^xIAuj|jmE!TM!GS=z_JAFM&93FiGbb17Na^9q~h zAL3RCZPi;#Y}zsmb}tEUnYL{0(MAIae}ipoY|=wZ?we1%4U1OiWWcBOwN_Zq0%8IA zn+0`CgbL@zqt<<^U-%yYR|GITBh+b@)w2!2gjsikup(#(#AUitJL8Go=nGlWJ^5Fc z>U@S*T{E#nHt_#(Sb@GPvfDZj+9lZi!c}PaVU|}n;5&B!=y;*iP%7}vgWIj@y*njC W)i>

%Xn7i{+|?`4U)Df0vw!4+xZsse-GmU$wRTX5eY!|9J z5qW&Ge4>ooV`Pl0#rzVDT@9{D|!IZit^No;O_v8o~IDKep6iB|NJLX zXcLidB!7>r`j9N|b3zle;87wXm&|6c-nOi9Ic8Lfm}4wfXR4`mT! z6)|!8r@L0iw*EPaiv*wUgHjUZ6Vj2Hw=OSPBB&~Nu_Uq-Gav`RyTJ>nT^|(8j`*VA zsAwga{saL%q9?Gc{|RUqmC*B!SR{n~F%yPQRoyMLo6WHM-*SS$W;E#3I%0Ayd;=N5 z>k)c4cgY#r9oY0YtgF?sThg6}xr&RNDkG-;>GY;32u)*9jHsm^`g1l9N~?K9At8!_ zEJJ_KgR(73nr37}&la|(M14a!5e;ERq-VuKZu@I@D&me~M_0R@ydBV;Q4i*I-b=d# z8s(CZMD6H3qe`G8X8H*Si1Pu^z{b@jJpswl#JpKh#m2bTF6|v>K+p7gccz|K?q#Lj z@^dv$fY00Q8~(DMrxEd-`YJ{h5`i$!1?pJwVjk=dHf3OE)Ms%9Zd(A2u&-ordZ1h7^Hf3+2th^k?T<;bo)_{l?0U4yT&aNsw1Vmt z*0Zz^t1phXKiV#~yM%{Egdzlu7S7rz3Mqp3fnLsMlnJQ*YQ5t}vF;S6#l5b+Q$uqJ zl!}~AJK8*{KSZ>P^?fWSD|%kT0Qf0eq+7oP{*QHnaa z?Re{8+%%Ps@>*s#N13@bA85ht&JK(To&~${8+VZ+wBRI3KY?o0XvD`aA`S?|C^NhU z(j^1Bfke?TxsiR+F8}dJP>IKFtaE%<#gC_#IMq?R_RW7B)jVa8z<2w!2B`7QfZyR& zZhc!${oVANu&1v~H-{`(iMi9?-IsIz_1{S63ICUPP zWRPI6;pQ->y1&#fWLU6}|*Sq})=pQjP|Y)?I|A5_taH!cE$3 zyw~>M!ijByeC>cXGuGA@cE4})b0Yv zKMDl>^MTYNdz|5iTF~F&xHt@~gNbO(p z{zK0e-=KFlnRQcI(w< zoEPyJ(F4{IJ_T3jMztIt@c4bUuo{qNNy&HuME_BfZ)@eF zgq?P#44rUEAF02vTkb;VU^@X9-!pJ8$&P$qDG|i5x@$bAMUqY`Z>HUAAu1A4n$9`Mu+)?NK1m$KBenqM9F@b(;0awEEKf zM)}Rq+L+jdzjiF45JbeYPVjil=-_Bn4`*R==*m{a{d7L?WuZ2on85Mxi2j)YF`o-` z+W>qT^NBYIadV_&!W**a28C@@ZD2ld*J`4i$RQ%_;|jRvV~|lXu)4e>$ZK0Wb3Avf z#L0UMNA(it6HPOQo?Gu|7+jV*PF@qtp^Sqy1$}7#t6B$4H z2AHH5#YX)6uE{^KX_2;EZ?kEu1A^rpeR#h(0R`&sU_O3x{r8jj zZojHTX_UN{Z~}>(9p2Ar8ghAD!>{%n`5d%?^hCP<*}}mvs`g|({_b(Bx@2|pi(Qgt zzT*U@LGpLf4bUlr{0thJ+`2a> zgK$@9M1c87Zg7NmP^IJhsy^Y1{8zcJewHiSNCpl8iv`OGwic_^0*^5(A{xf3)EAzo z3|`)!5n|SE&UVs;rlzKu zq2(bZwNnNmTzl$Cd<=>)(R*D7Q}%u1VkQKBGHzed8Qa&?y_pwUp`g;BQqak0Q?Qjs ztHhfz4v0e~tbPEi6<#>>Sy)v51 zfoDF^YYqGp!ocaKnU5d3#d|#SHjr1Ee;B#rJk4l=iQgbstq7m}XZ+)=aYlsE3L5^@ zK)mmTZpevLi!UWV9 z4fHkACQDynt%1$&O}O<+9rAOFU$-@a?_Vr}6r7cSyd$!0$SLTFHk zc`+efVF+P*kn5b)Q68M5R;YPZsqIXBe^>+i)24Xe{qqa4ai6{2m;5e?AJb_)3Dbc{ zg4T@m0wR@0Lpww(MIa?;z}p%Gp6w!X#1M+O-0%VGu%||^$&?p3uBaGKovkUAxAswA z6aTP7Z6ykLHIyme^T`Pk;`p+&)yWHaBZg;d3&8pXV zu%dtJcXkaw5mEN~RcaH0vq&8}FrVzDorP%i(!bzB1}smws-)hi*{q4W9`^9`3eVP z(#{G8G@2%sO$O6oo-L{%JBWU{@@u3C5IQ*iEljwx(?pWY7>9ni9IKZwO3@#^QsC*R z6=CHub@)8rXpN`7_u-da;)evF3IylFM&VC`6#mz}=qJ-V}oQUnMnyWOw@3R zRJo?a8$$Np&mhog$4HADz$-BOuVSu~abm7v#VvC!!!E^7UG{VZ!Yslx4Ic>f?(6b~ zv3#lp)`bhC_x*1=&Wj;~?y^eY3C-qQUbo;cySL#cSbG$_&jB;UhtTAqj-*WxI!0`g_ z(8*cf7=SS?1dFuzXGX_>Yh@zo)ajeL-9&QGKSQs$OLF)C^Y@$L#WA-!wmslql_gQXnVTX&b_M7jZ^V5vX_hxISB zuyYSu9cn#)2D+uyhc2IPa8S4e(m9?hTPCNZMdv2LwcNI`h(Fz49NM37DU@I&7$sPQ zwO@XsI-?g^vREY_dZi+u>wW@#UqV@@>>4&@C{`^^=P?Y)2tsAVYbT5_Sew_efV=~~ zVO(B}7+w$?bwYyBB7+~d+l9bA*bnU@q~jzSb#yMG$@s!6tqHlq+wSY++()&m$4C&i zVszW)*Bw#bOuW_?(x%AA**Jr9H0E;aQi7eWd^6qchR)7jWB1v*g8e7^IgnfFLW3C; z!^jwgr4k0r`il+3UvJw%_c*@Ov@Lad$Tk&Yz3x*l!(T>AO3XTePU6n9sUk>GuX z5mIQE0Rz{McU|P8ShfQ{z0LMh@B`SvL)jJFzY6`q$PH_v-f|}v7=CnK*gU`*ri_;^ z%;tv}tvV?lKE|WVIu7Z$tRW@jG8QTmQhMB@IBtuWvDp@>P6-B*-B>Nv*A!7Wtus0)mF`63jLRwb@5J~9 z&I~l7mutJfM-DLfaPd|ODB6T^-m#{x6<}W3nLpkKo%kCv%;i`zRYc-S5v|# zQx>)4aw^hUVWyB?j=Qrt1q{EYzbMSj?*u;aR(rhVS{ukmnZnO`_P&oNmW<%Sf9*5x;Y?%yZ9A`)jC`2O&>MlJ4wKzjg54SrD!x-k@Oc zCl(rk1Y%r~B_4FV>9nwJS{njYVjHY_4D0nxdJ-b?+O*%I-};$`>QbP4w8%lYS?+^{ z{=4kRVVsd8L~k$eLg4%a0jh?duRH(*bOPPr z&}tr<74G3r1x(X!81?Lq*AmV2E0y;{XPC%g zNJbLWjOB2p^f5~1%8!3z0kJ5Im5}X7k6XJOB!fBLc1-W-q_*bkV%g9GPBlK))!#z% zz^IR*Zj)^26yZBCNkMD@Fds)8gp$ux55;xe2AFjk++7}$k%ScJx8KYcK#d@Tar3ri z@rF*iWtoUek7QeX&_5nJ19n}F}@MF;t61J6}1KZ7SG~~lF1Jw;VglFx#*+aL0;ond%X62EJj2X95$V5yKmhS z-z|y&FBm)PCdh!$1}&{Its|}PL)jkK?4Z0aF-gXbT~x~*hW07DF#fh99+CXgU5$H} zJ=zZKcU2=gW>Z}R|VGLE@ZOOG(!$ zaCk#|7%HBktYK-pV>=HJM%BYUq)s=`tM3tq4IAF}%-4))ByaUfOGFC9<41`i!>=P; z28+ILsSj0`ugd8Fb_qZ$(FAk}KGEg<7Via)OY(wQco1AluFgGi!KFo7_L}p>i3Li7 z5f>T_LI?LbuYqXhgPA&)vf6B!4ux&>U1K8^mrm4&#qSi~Z~8dwiC>6KHkWKe{pJcP zXE?Vpj*u$jUQd{nI6Wu09xpS+nGl#*0PrU+`;cSO1O%Qy`0z4O2DxtE{Wgns>dYzW z`6IqO|I06mN-0Z^Fb{A1U`~nuoQ=@XFm}A^e!fdjlV2*uh&&d+%t&e6H%?Z(q)5Tr9Kmc_{qz{|b zvR{blpEmcmT$?Pi_vl|X(^n1|EZvsc0Va%hp%VMC22fIiH=8m^xDcr59*+fdPXAXr zEp%qz$uO$kaa*&wwjN?7O(A@~#<*Y6!n=XWc)K0?dafGE{-)EyPA zdLsr54Feg{IAmMmlcCQtlgZq&U~$j9OBbv+_F}!(V4}<+l}`eWZW9LZM#U3T!(wz7 z{VHff?UUV;kd{SVUa--HX7b)(>#tDywfgn?jcj{Z6u=q*JmkOc=c?qP?T*a<2`RY* zi|cI|87x-hvx=MEXu38_s>tVuPmFz(O1-Wo92rpEOy{M46s{C1oA;31dD1~doPBip-yoX zZ<;~`nS|Qe;)`zpK$-+H(|J>Upl|5UJ;mTBq|X3gqOZyn$I}q^bo)+Jug|Ig8Aq5X zMhBj%^Y-RE0%(O;v27<6W>BaQ9EUou!id0+%2VH`F~mVLkT@>+2EiGx&@XBKp;nNeOauQx`g4*IYvQgpo+9#N_&Pai5fr9&$ z&QU%@4sKFtz4bpG?*2y2f;oR)O0;g^D5Ev`6=z_gmNq?KGpFfXQ9{jUm(4`va4X&q zX*`=?M-Pr=5~io%GcW zUJ-pfAN8O> z!octZx~pvzA|%z#fT^4aOu#HNeOe(u^pXrPkU&MqMPUck)cbC(q5&S@O;LUWf#v(Q zGP|$H+=fSV?k&`$*3ubeB1`#$73{&$VZJ6^qw|F3l5C9Vmv2N;dSJsL#Ft zJ!`YXX;-&7q{?}gnuacAXqlnIQHDSj`jz93)y3q++D7C7DBx9D1Tsf?%myWZV;{p> zU9qwAPPD1}M5wU_us5VRe2%GG4XB!1k;WRnM6ON>ama>&9p#meR$rfzaCh7aJPM@Z>cSm7ngRul`kZ#)L+{lY)^-o;3 zHCkG#1D6vY>~rI1yk&|fmg;z0N)4hzSrDCod6Y%m_Qx~G9RgG84*0vZt3N+V^S|3* z+6xpb7!}jk&S;oPGM?sYad41_-ldchl#1H#6?iR5M%Lv9KU+l)Ae!+C_%c>vBrVnu zw^1aBzWa$$C~QjeyE*ot(N{`aFmPdkFdV7kJ9&XNJ|Fe;Gpm-WK9+clsRjTXwyJhK zcRJ!+!H8P0+3=mC>3cWc2ms3!k?XtiSKVRD{9yE+0_VVh3?;M42$-*?N$_WF`mE2a z35qh~NUb!F=*sk$K4P-xlN%pBexn;GN?V8|1Xv}-FjX(mLbW?@qtIaJN9J5srNrrY z_tMrfww1nJvCvSho5YDrqcO~_BjXq(H*R)2$|fN*cwa(y|0x%9jB&8?Ihe}eWX5MG zypdgdh0I)zll3GZf(Zg2czv5t3^+Ry3~laIo(~6PpLiVzr%s^HrywTXb%c8(hh}wn zE}u62$#vg2p6<Dx8|AHt9e zQmgky{x%N2a}XR{dIEgAC8bBDeZq5D^7T=bKqz(TP3dDG}QS93sJgsS%Mz&`%-y)%JKY_v^(gL*K| zx%2}$ajeeke!U@5~2yi&DV)i?^RdUcl~4F?yJN z6{XE&>p(T_zuUa_Bp1xYMLOArp1PyWD-{}_PWL64Ea59>)y8FYz(VQi)GV)EC;jz}*)Fw1|e zt}}YPdb4N{e?)@e!SG>Bm^5naaah9jh+LGcU^gVEC-Ma!oLMa>%Hv!DlC7i zYvxcN3mferGPB2qESWGj;3nsjxq?Z$qsaATndNK)jqc5wAN2+;%{0yW)ovZ~)yFp! zcR%R6ExR)BXGi`HJs-x#zXGe3y8=DG2PGL9*}4jSs%c6mN{%84Me%N%@*Qk6E}u16 zPSJxEdvRLy&HgW~uewX0za1-{p9NE?lzNAC;<}rGU6UzVnp&D3_#`&|`;~uV0pL^J z7az|*W5F6ykwyfgX~k%=Zzx;atO3&D90>}?y7Gv)2YASk_6G=xnh9kqBHxh|P>n$Q z5GyKoJ{@pswDZZ+7x7bu35%7R2WA#QP5}#xj{Z8!5y1&10&mcMOHsIQ&bbMGm;~Kzod8Obc zGFo@K5UK)h%SjC{-s7HBk!u+6z$cWrLEkxuy&Jq|yy4!oKEgh_!5juik~XOow_0_R zZ_jH7)TNv#nO*{3$pocf699kTMw78>LI4$=Pp!aK&Wp$n%bFNyYu}(&2#;X)c>vN; zpIBD5uoazxv_#*f}Wv~BUN zKJ_Db+N-RA8lcF+-zj6&aJ4ttZm}It;MNHO<A9v673v{@En@%9N8yPbu*E>O@?Qv z$ZP0pm}}T;IBR%o1Z&Y>+4Me89yl(SV>&p1>0D?EpZ!W3U>dQ;3%VS9zWq>DTM!Tv zX#9cx$cO$ek`R&yQa#cYk`S^7ay`*kne>7uRo(==KIBzCbi7iDQ9g(g6&R5s!H{7n zFjN>C3>_vFQsgf90E=x7RbdYO?Hp#rK4AwF(;OX|`mIz#Z(48Re{K5SGmL%0503)s zbQB6s<m0ac2^8m(gXit3f**)mjhF)_oZDR~p zSWVW`@H;@#2)-BtH{0jCvyIo>+v{=MfW|;_uJ1*(>kTTkRJDCCcW3f>Fc9|rc2%U; z*lTw7L}KSz69KmVInj>tR8f_+?e&6i`&HNlDa_%LbV_ z49_{Nl(}0CbGS2e`0y(HD%vWxD&Z=HDqT-KF|sH$-%m(8ic?utdbYC*@HUUI3yKr& z9qXyps#)9FMR?%Nk-~auiJkidakPDzt9;pc{dlVUL<&(0F$-}D2?|LH)qFWk4T-Vj zP>_|(r&6mFZ5tOF+O9$`ASaJ^ET(4v{a_*0&#`}o;fT-BV9c?6wI)?<1tgnUVoFNQ z1Hgk~t*{&)v_lqp7~CB3rLT;f||_3)+*$TH!g{7A)`ob$eqQ%V}c#T*IvB?Wmfrf-gu< z9`0yOHC9F1iYzv?Uxi;#p9t)@O1ibMPM?As4q#SaPxk~lRHflfQq=V-hL{RDZG3b1CMzbd9RW-+!TIaGWEQk zfsSsuiwki_|JT>tGx~2yLs5$N)jk~k!xsPNg$yo=C-cXEx>+4!vY`J-Sl`2D(!~Qb zVOp){|5$VW^Ii}hLKGxv325iuKM?+h|NS+`j+TV$;&q>@`zERNfBaaiDKMZe@b$ke z-~J)|gBPbsq6bt5!bQaBe-YCEChjAC3DkpxZBcX_h7xq|cg)0GX++eE#a|NB^hgG=azQlI=mieriU zgOSA1Do-#L^balnKcxVBQcXXjPRL^6aCz?=^zlD`ETe&T&acw(Gs-D!?NglReIKOF z|K-F*q7?_FC|3iX_RJmryP0}~B>E!=hR}b0eef3r-qIS+o8|lhkRIN+Q~$?y6AGZr zkYz*~cDjW^YAu0XHL*XE|9_uNQ7EyEc9?JoL(yw$#$1s`Cg899p`i*eqfV?qHi%+@ z+Z}Ddt#huaiur2T7Q#L9E!cv;@APEG&Du&)#7k)O++_GbHPZTwYg?zQ{#%z> zK3gMe7Vu&@$!Xk780&D=DV^vbx>#^bINef_S_iDL{C!C-D>Li)hGQpdMvk9sS{3ut$bim!j4Dp);(&G_x)f0-|V*K`lTPV%}Tn?H~V zi5lBx9Bvg(0;*f`1HN1$aB?1gtcc8IadWVBr^q~!F~1V(rcsO6>?O_VTkP`Le@1^F z3oyE$MYA8$BVXT*^8WBmA^BXXyHuC}8#@Oh_JK;SBgk-={2~%i*{-y;1h~};&8!hW z??^X-lC~(?<8oKd3nnszS$IqHx7G+=xFYru1I(|uxOkweAuW?V>up7syVwb0IoG(_ zRU6frMtaBFAOcFXHkdPi;m4g_6I7=_dkeLRl{L+&JR*`V^=hK??CsJ#) zCmvTDRU+?$htJ&xB4Pzyy+(SY z7+M*G94#Lv?AX8Ki3;QlQeDjsV>x(*sXI`vmn+MmceU*y_wG5)$*uc|z8k8!5mc2H zqij=NLes4u7sh0()5{I|O5ae4YHd~nYV1&oLGU}zovG@`?^f=s@(rbEed{%q)2c#n z-ATic;nf-5{#3LNLEejoFa?7A2%?YKQNIGvP-48bnn$ldL2<>Xjd{8mIj>Vw2}3@j zx%x6$^!6LS(din2!($=1?6kr=MGL{h5zHY1*kw@7IMha;aATkToy>LOP9A;wj$6)V z)>~%U3hu}v7{oS!erh_$OAIeH@9hU##zwVwB3d*DW}#J`vV9%70_ppn{Yq8Z=c4Wp zBSt*(cI4lwa8nyg2?cD}8p&^}i^#iG2K1=~3JMQ87Z!E7NO{3(H&93AJXm@&k@>i1 zPBx-WI$VLfzfa@#D=Ai?x1Rtr;{x!JFKCm5fAGABugq@D*Q(sXIW-d&**tqd-D;Nf zLP*4K^XuyFrmcPWVRpolB&F?esW+>0;fTe%*xy-U=kf!K@io%PGlS)YMU(>7uq8)4 zs_cu6^Cn*T6W>#CZrQ_(6tgnYT)2e`TQv&$v{}W4e++PqjL?m`HlBN7hzKcWbm$h9 z#IO&3Ke*E>=$%7djygQA+*y&l5-s1lVy@{R=aCJ!evlQ=wOSt)6-59O98}ZriWc@b zXXgsvbTzNd$+Szic}bzPbpQJC1DkbFTk>xRE9iy2Fl(4K*Uj#5Fix6Y*B#)+D;H+8 zoaa?-)TF$|m#&9L<%&Ja{Y`_O$tVGhrN27<4!}BRm z)iq8h6&61a6&r8#yKre<(G5GE#qCtGDB91imu%*3fAQ|3{<7ZSJLGQdgf2y${bJ|-;>tICXq)vm#H*}6a6?Y2*l6=C)Iq~KWJ9@pkS%dgG2L}y*|$#Eq2H2#kj&zxN3*_%+9i% zWh-UJ(TK&A?Bpc*hOPLa!*X~9@Zi(#wo#>VAofgb9$s@ea>JWXz&{bQ5VFpD7h%@} zKvyn#@t((n)?WbBdXzGmRmhQ4F`zcGTZ%90(0}N>Y5%zysEE8#k*z}qq&b%AB^{!k zv4i1=DM}kmdhh;Kv%BU=Dt!p0Q{GWWWj>scXB6|7>p3c)J~H3{ z(RrKg89O3*Zq6@QERvhj_@<7wro(R+SLkg{$I5p~?##X7j~Fg_*DlkG^U!cwk(`~h z8tjd^lZM~R@8n0H_8sz%D7JDvmjOl!8JH*L@b903Ep8owu}GE$br_4&EAjb!TJnBg z-azDWc{*M@xBHP&c-OgGw_M95L`L70-~VV=l;QGaamgCOLP<8wlh|HVWNe)ly*Fq7 ziq8Cc#OQDY>JY>AESdi(d1F63rl5@0=(&S(J-+y7yuv;U^YRwwQI?fN*~Q!a76#56 ze)blpxwE(A?TI>46c$^&uM{_5>UMiRT~6Slq+_(w+u)n%ZSBG@!`bU!J)+Fpk{RuK z77&9zbVg<7DPry!X^}AcNr!fNF;R?XhGkyer%n)Q9BI`LM`n-E?9&aNwJ18iayxFXI_GHLm^$5B2(>=jlqBqh5 z<~-|whUXV{qQTK?Q)c-TK|k+U?D|w zyMuc-&$#yM`nmad00SM9jeX$-9KZ$Nhebl(z8Kf-{$>TY{e1OhW&eDoC$Y#pUU={{ z^H9#1&!gMSy_TlxK-d-b#`Wl0X1NJ#Gn-Sm-zbcdY_NuOq*UZHn5hcvV3a%hiO)KkF_(D}N*U0Lh9+rC8K6*VP}5;5PmV#z*Ll5W zrP;*#VFI?p7|W1=O1l(V4z2Av^-*`8r~ELt${Dn@zqc(>F#I}_zg$nA7(TKY7`-5K zlA_!3Wd6&%R?qCS#>Vtg$Lzv7aU+`Hi_FhEqj{~?p|AT=u8{E>lYl+5{YGF!LG00` z*G7POZJ2||o2tA57mHQORW{>zvE8EF2aH8_&gxgc)He6NoPP$0$1fx+r_@|zYu+^v zT{Xt7m+gV?cF#Ag6(~NBygX_XNWgD&D&vB7S)${Ko6^8IFdEZ`)#9bIj5xHqRoJYp z)DPcXFPzv@2~HiVCSRn*kQaSb`CV+yo!`h0+IhMWSxO6yU))0;;^DPD-Q42Zi!e&^ zkt+5)5TE-NrQY(@3Nz((C`f!ueo877k={422+*lHEfghOm)VdkE@G_bD3UPg{09E8 zTZ;zsUxllZM(Nb*#Ot)K^420#I5dw8kfmnHJdnvg&Frr+!0C__ramBu%@TKIeNM7fp+SXOXa zCm@cLbQ$N+ka5WxhR*gex>0WQFC%c#}e1O{lyzzMW?gX7SC1*73ua(mj8%V2dg#?(~@9 zp)yyly;Q#HkN%)^Sgm8+3{A@8XL?RGB(c&k%IQ3U_n~$j~busces6dU1PSJ`qfHq>y<-eisEmF&+o3IW>!NT=#~@AsxEk* zQz@YqB~uy;O(&n%zh)k+>zC4pq_-+>xdA4KrOS8AYqRk%WYdny6A0_?MFB4Ns*2?B z2bhoJDcy{6=RswZyU()@w~*h2g_qR7N#7j~x|BU?J#pQF-}26@&-Mz_GuC)MO)D|X zFFnY&ah)7vpY_|KBEVFQk-KZH>^!lkg4@AeB7x)U=&jvozr5}oIqF0%qC}Q+og=2O z$+n3Z4wT4Qt~z3<-lB-Ng5qbNff;Obxr_|b#&Ah4+(y$$?mm{Hoi}l27Bhvx0io~! zj|!mID$mn<6)<4D5qa!Gp}f7XRR$Uv*@=D9%|C#>O4% zA)$#^x!{qzC_KEz)989t`*2oXcOx_?f?pN&&S%-a9<^m3)`Q1ZW6m2G$x1a3znoDI z)}NVcf2>Ry9^s7ax3g3VjL5GopE~WS&OMUI!s2&;2lEzYp0t|FlPo9Xq@Mjow49mO z3Y%9VBcG+z)`dPaUp16Ps@&uE;TVkUb#Y)*gg$l1;7nh$KUx7PW%Ma%b*xf!Z2(2! zNIB!Az(!;AK|lAKcutq1+aHbfNEG1SJ^N_^>m^{cX&W!^6{I(`XMbMo7cD9nQh!lVaYQXIk1hQ!M@&t3{S0x=;+QucL$t2=eagH6HXXzkOVJ9lMc zwAId_>X8e)QKl)1}ot+X3N^Q`wogo0m+d=is>dY&uT znVmy`f$6|C>;QS=o>g>TOTya<>iRG;IJ0!W=>-+@xxTy1IFC{zoHx>6!26V_Fm`7e z@3s-{8Y-TYi^4|hpiDi+acvv()N7+9bb{Qsx+Nm7&h$M3DTA1!pLa(b4!ee3vDb8b zT1BmyO~Vhcuq-U8R;8R(3OHZfG^cY7xB49wDY$PFFd+D2GR>G1!#jo}tG%t0@-keR zjgNb(UI=m>6i&h{?ec)!IEc%LK`})zcH~-GG4`>kf&)_gBbabv7H(=k=UnotF@FN= zlP+_Io8+>bs+!&+&!C-RgY$m6MylwQepp8RbxucBN0?p)%_eCSvsXkRqyaQGXE*k+DFUBH0ChFxJSjG4GqYy zmMEA|(^iK5UB{y+mI=4w4a=0#r62^9(slS$>XYX-nG*b4Z^wgXu?LWVIipb5bV)+4 zPMmlwF@lBI%u@b6P}eg}7OpKwb`$j8eC9_Q0hf_Qw?cqikbKD@S0I{m$9zXL-n0|X z23UkG)W(>3?yOzTpe4@#uMQU1p{kzD@X z*2h$NwPO4IB3nst(@T7rgfQ^xaz}T_Vj(z%MJa&BEZq+aNxm|73;cxVXk_lC-a&}wLwo!@8+j6@~sNL1gbL|wLbp9G_YZDpo z_qY@H5fLzbf)k)~t2rgta^qZWT7vEi#7wO5jRV2nU)#VkC)xlt4$#Zmg0;lpoKJZ zPrXT|aPZNlM<27d?KGtiUTgcH5{hH5lKm4j3u>>m&S&L(xN~-tw#+n-@bxtTdZ$K3&sSJ+NCC3i)-?5HFDrs3Qq#0pLrV;i%HDMx= zn4O!?L^t0UDfd_Q2#`g)3ZJFeHY~iV4)uy?0FKPOa3s_}@GiL}2R`@;ySOHma+}m= zjG}&S7uPPBLcLL<_1YdJxG;(J)`zs~S1aGEugDHv{}LwZjK7JA=u&Vh@R^x;0JO*d zq|hc#OKFt>aT)OhYiR2|k#=Mpi(`zJwEhAgpXJ5$;X4!>v=s!xxDzhhcSMw>5zS=O zZ{9VqEtSsESKb2_+-RkZD7q&Ut;mb`j*T;R^_F#K&dp|l82H@H z@@?u6^SNlKmPbNJ>qbY8f~!;js))Dl)G7ELsMvU{oL)!!8@j$ zMa3f5@(#c|ouG@45X+W(n9T6_zWL)(T#7>uxdi4@l`n$Ukyq%C6r22rw(>c<<_JI= z>aa3rHYYPj%j%YeZ%2x{;|L#_(cP^yNXyt_$LD1I5i2g#2``RfUqJoBYu~NyM(J;G z_pBK)`fp}^Lk$*WPw3z+fl*XAQ#(rqO`o!7??0_J*b%O{Emlg@N?%do(U8kun%@6? z%XMvA6^TAx;}<*&Tp|&l%yd*?(RwpN-mtvg=Z3UAM`h&r{t<#c_%}-Cl@nrAp;P$P z-kx6Gx&}H`lE$)0@>9!BsLiOiu6x!m3?Q7!l-+%)xI!==dD6<{g`P3MiCY<=)jnw( zBcMOKuq%x{A(7SK<;4apR%GF?e78`C%NujoEy!2_!IP_1S925N>m^#m%xWD7emy2vDh#&Jm-5n{c0~XH;;xh+i=X3hKAbIHz*58W78wIGKyy1a~` za#7OK=L?eJ*qwdwjTR`$SjFBRSGnjpB4gFF<%y%kV<6%9^#@@hPAfifA@&S7zjwtw zC$cfUl5bRegtrWKk0&RbaDA@y=xI1C-c6hY4^7p+o%8<88-G~o;MU%vaFgrlzt+I6 zORBP2v>M4=iUNiejFzou&NNJ89(ZIv57{9+wLRSVY;2{2Tv_zs+*=N zU)?JFIBAB%{2T|9St5N?#zn`e4#x7BogCRpqi6WO51EH|>3P9%<^h(oM}R_qITouoAzmz&7)W9(d1huJk`~Ci z4&^;5iIN5Ij(`s&^k567Y)zCn`iz!Dt8})S{6or~i(m2;4R;t$-mb^ALX-SW@-#63 z)zq29De^wOZ}9Tvm_!CKO?b zSJfsU&!(b^#rhNH47SEx-m^Z4`@rgL!KC+&@ZO!C2SjZsM3-e4-veW;g4A@Lvd!hd z3q1P}mEzDqzWM`rJjdI(-vD8x8oW)Ct1^V?V7rp6=q;+yY5?LdOUvc{wPFe z%dI4%j2{FW5B)T{8R2sxqF77tj177{AI)teT3@QUpkp9ce^0f-w|yZyTlHyV7(Lmc zx-?>4Ooc&pWveSn4w^_G_oO=%7yrY5f&R1Y-HB$S>fxF<=zT@X375xSR5#Oc2eH1y zW5X0p#xMIY7t`*a6HVS@M6u!d)mFTtWEq4ywvdL3foX*Cd~-kT zno>P-KQ5cGaabalxT9+M)pQm+pi_!$@~#Mbl&M)QZ}O2XQu3|Omvz`U30~_M$Ke$l z=#K`?P}Bv%?_Ua58d_oWhy{m}QSpD9^20xAE)m*>$XBuhNyukG$;ez~)(!Ie$J; z$aR4W#5M;as7*YcDR*D$8*7 zuX`#)*lp5^1FP-wcTa2<;l?z5ff{QHH=*~<3mJ`e1uh01sm$PnmM;BO(9CMpMgBv= zv{)Ht2U^OpAtquaEa{>Y9Cu&Q8JRh-i-Ya^f+!Vzrk!XGi$3Rh~^=Yg)urDF?>d)EoY#v2PE?^|V3_q2u|> zw|A=g3Jc-Z)FUBZqQ&njlA=)Octs=r3hAss9GzGi6^bd5L$A_{Tr;JRh>|dZXfc`i zMd3u5I{V(#J-SA;n^m+~9izZ^!BaIq|j0<>Zt}$KpupIPXQ z7e5%A>=^ZpGlkq&1J(WSHL?cjFo z;!@?(HD%BdrE7FNWoK)tTb46e<-oh(!E-5W^l=$T%*FMKDrv;=O}_|Y>e@B#cYnUq zQxNKo2X&Ilr=}GZh=+@W*SBIaPO>C5ry>v3gaNF%uBW)mGPJmT#oTV$z+s*zeuZVK0Z{VuK*=4>7ZH|1^quoa!0qbk+0XEi ziM}aM;gY-;T_NV7LV>3WW`wD3ahoK=pbi@f?b#o z#Yu-A@cYqCB_9UP&eY3jGW4v6KaMmxPc{+V`9PD?kOCFoV{BrWyHh@$KD{ z#}%X7&+L_PKTy6WztHnVv_w~)_)TeLS8Wmg|KKXpG~fRuhk-8IslG&RMroL?BBNt7 z5jFfp`~i88;fjRCS?J|>m&5zQ@iuJr0q*-b#xHc(Z(V*oHioU!fpI9e(K&u#{mQw? zjmI_)iH)_ii#jKP^(sI-GumVoF5o~SlxEpb`+!2AL&xLq8RyndR zGj^6JvBgTXM01tdu0#de%8HrcqUG}3er8!Qp3`fLhtV6n;J=oAkE0I%KL-xD1Ks~R z6GTFW*V-DBE(OK7&yNfJ-Ge^%cB!qS`CH4DJ7a z3}HQ1P(-#igbnIix6FHjJG8phhME6=U>M;D+2Iyk{~B&1mr>^qil|qD{&Voi;Op`K zS}^^!(Dao$%>Pe|1Z3+>*q2alz$_?S6q57)ZB+lx04>4mk-s<2$5EFR=jA6U{Eu5O z!j+@F{_%s_S&}j@<`H3HWZ~Z@`>%H($9;kPwS!H}9Sp9OqghI{M>Nir2Y3(t691a) zjWD7+`hQ)Z%;Cyw`F;>@Wr_~Z!yC;dP%j7ls#lUM`Xw zY_v^K_>})+B>&-?Ygtrz2rO`1fBX3+-1mRH74~%h?Yn}j*yGn(2{3~o@#~6TivPU+ zFV_w(qRjKr;)Sc|FWozy7-af4PyMgi^u->}$RUGGsf3829n-&0*ngjO;f#;NQ|HC_ z!@d51G2r!7_u*OW+Nr;Se92E8lZ22vSS~BLbNx-E#p-B)RTu;R+E(;;*Q0(eA&&!X zb=;upXRSSGXaA;r!cYfP`o_V;*9#WiCKvL&O#{!^Nnvw7(Kdx`p$PoBUZ(cLiYm^X z<|g_uj@o$SmgsmBh+`l%2qVGxMN10}Pru#@qK-_6AzHz8d38;O|JxE`+lzukrSODB zj)|KX67uT^TgaUj;iE$3H+>vIliS7Ijz^}diTthL+XcCH=?APnS`nY~BGPce-mo%x z9KltbCg;V{m`9)48+U8b8Xi#F!Zz91%M>Tb0Q6KC( zsH0tiDix+eAz>5TI90 z%)DyoKP)6|*aEBOa=6|F1cvjpYw|l0mOmwRsPr918_d2hn$ruaR{7j_Ewafp-X`!N ztY#krbR&@{w5l}!B78;o;pII%!5}kMY%d=XkEass^1 z61XG>c1-Dr=v)L9Rh5D3q~)^Yc52-aJvXT z&X;GQ969MHI}9?jPH!JT14FKIPyItgW=0!UVqf=y99Ho&14j^Fl5gKb4U(s?oaR6D zR8@WPU-wAR37n320mZNLl_)-V{;9huy2CnBA>#fG_(;wpUc*gdcTYn=kr?mgk)$)D zub7GihUZ&bM5hXz1?h8)fE?M`nCr3eZ<|lXl94`0Ft(BZA)uCD>dH-cFnFtCjPF%I z3#oqgY;d<*P%&!SMYuEHVsN!ywV?^4d4i4w^{ydBG*!oe2jfQ|Q9HP@#)JW_{UuKh zeNGcpr5qI~^6zKYLkY<3wSPOY7 zQF5>7#gDICGz$YU22xluvu=g z;?wLE9Uuj0rEZEdk|+{s;PLd4%Z13WJ?E^!!067kTobIM?44(IKfZsy@0Y8zW1nu( z_ed~#*EOnR(sb7T&SKx{1D#QcQDuR`B9hE87b;aRh-tIKDFD>_UQrLJFi9J%Z@%T8 z2sNUyc*HbK)V4HmjV|L6!Nw`xrMtEcNSwX#k>0ot0xNCpuTYb%5ZFB;C*L>VP~WlR zHaEv8NDUzu2`w_ZFlIlAL=`E*f6lE@-zt_YNl+V0?WCr54O(TR&XS5!W0E;-E6na+ zDn|63=rX8>AkBOewoh~XfpB{?H^Sv!-7qn|B*0@K%G>kD zW3<#`3)ca}KFS;gwFLIZk(Tj82yQB){VA7RspiC=1P&gh2M5IOcSEx19f@5OF|AxX z!v++AOy}8L_+tLhbeHvrFp>6^m*G_>4l!Kz?WukBEn|Lw_e9K;>A<#~hMM_-C{EIR zSW?Xbk@V(=)l;1w36eV(qUS_~9#yLy=go6=fU?D~Nb2h8_m_fQPSM4135UJAf&o}K z?~H#`rp13r!s+%wC0TYSfC1N9~M+>7x^+ru<@r<>;(G8Z+FLnj^ZDxe}IcNMl9p_xF{~4 z-|J0*s(gz@?{>cHI6ho~jV+u@yQtI-((SDtYep%A+-3AV9ho#VG*T=}SksHTv@k{$ z-l=v%rNv=b7hDr`^vE&>3E%7nI7nJI@nt+43uNkA9WH72Zq)mwdI&7MM{`#83?D zP5o8@@;IYD9hx)_lP$W>o&kh-IsB$;`+W7bs`F^P`_wdNZVqfDPI9TykFd#HD8s)$ zFGIe6AGLJ1mz}aQ8}~W9a2a6y&ThX(>&0`iF`Q;s!v89?q58RLPu`BJZ}MK!xX#YR z71LVc{fhS`#%Ao3fXAA)Pw_g-xnwR@k;9K)aLZ&tCqJX-6$AI%Q|*O7e10l=Yfro0RH47T;F& zwfiO-1ihHNDR1AX>J0V>?Owv#S@>nDQX;08j;BTm{Z9 zpAFK)jD+Z)n~$An)TFW=cR>aW-N&>p9O&3Q$bIB&@^@@zN$j3RXD?Pjb&;FtK|ZVuyg}R8LfHYX(Y>LMt#A538)w?H!aeWshGGGo z{D}VL-inj?`RBge9*JPzNv`Nz!ge=>OlLN~D}uZxyq(CM1i#33ZynzKzChrxVI~;! zVl|idscZC|V2z&|8A_oVa$)r)!>uHXo-yULK)cG(afgpc>bx>^{>{?n zuKn+U)HkctI_1O3hG8zz;Fctbo6nXEz=y-ZyOUUBR?n>VIhCzM@RO;d*F?~6DS>WK zJMXwegbm9T6~#O^e`gBb8^_1hIms|QHmnZj?%Us9>P0?;L4gngMD+8`ZesZ?VH2-p zSH_@uUYygC_N13N{=^BM-qB)3%k~K<5san<$Mo3!-LXVpr=tkCIZWf1n=uGq$XU<) zOZkmJVc+OBRtWMm5NZdMJgNQgC={9D`~Lg#$Ag-spaXfMcl)4NKbfNHbsV)zd=ygS zKxV6FP2-@1_zsag< z9X}lH3&DvMZv9q37h@OG-LRLrR7+rxUEvK7Xah-@T=~AtJmJcV?{ZQ$3s#%-&NOHJ zR-L1nHt?_s9r@7&@t1lSNyZ!VaP1g4-!9?^EvX39^{|;}4qv)TTUVr)E&j~>`ypbC zHwm?2xQWxWi%fu4pO?jmzMcb>8&Mevrt}s`BVj3n%KZ}aX?eBbV_VwB>at@K8@%bSZ2&g zs@SdAFRpf3`_b9zC7!ecvzr{ahIp-yC_4Z=N zT#x5o@?FV8fS9vGzvU@EDe>Z(*N%TI*nJhes(YV~_SQ&O6m>DMZr)-v=hKc_Hb<=Z zHAQ-bvoyjGwg$SP{)tD=0S33zxu|KPo}fd1oEc&I^2Vn^E5+u6?W)>=jn@|(niWyY z4bOL(x#8IlSC&W*ndz(Z2zKl8XxjykmZhDVLQg$7P*LBT)2 zW(_`VKYQK`?RaEvXIrgsY4?)nh909dURX)TtyrJVU(fNEf9kMwk9yJEo@JhO8`w_O zE9`E}4X`_7c2;~{ndfG<(U{z!2wf*DhSrmB#GVffBFL!=G@la=a1bHPqnHG9N``cq z&z5~UpKBQx!74&K0OCktuQ2#Ccb{2>sW?e#1+Ax91}{;!0c&Gje+TsL<1@xI8Zmrh zD4i$kJPEo&dw61q{t582!hp#Jw&Sh`ykZnX-tNe3G0GY{6287U!Cwvt?Vo$**WWlx z{y;tH95%m34-#kQ?n9;*i^F}hyilUb_==QDcA(l8Un+@^oJp@)2fnXve~6lhIwF%r zFi%vlkKkS~2?C0U`B~T(z`5)B!?A6a;&5=yaZ0BnMDgKp4i}tBI)}+GGOn9KS*q}T z5yR4Q?j`~%%c)Aa7aBCBDi7$s|G1sVqfQw5dtS_njqoamg0>bIOLdG-K& zrhw@Fv~+5K1WvhuP8e6W0ZOC6Bp16zhzK8HC9fH6=icSk6E)E%L6p$Ru!5;I`kGRV zG6s2tbU^k9k-fg|mTBGh(c5%;h_HKhA;(!NGxN7uA`A`=cHC3J1TkB+psB{9zNp!I zj~C29LLP|vIZ1~M5II2R&P1~RoX;%I5y(br3goEn%F>GG!kp!USnp7l>Sru7b>DXh z0+2|R*RXiQ7_MmC!|Q zQly&Uvc7Wm+}#M6UJ8&g;)kdMxu$k}qfNO;CrZKQwu`_=lOKCn9?OZ%s{q+~SomD&=)M-aC8o5#5&@3^85xtJgiB7{@> zJDOkX^hrw}%k-$2G+U5AZQX-GskL({4d?G2>#lV;Y))rbxE==XGyI<4WYFSYt+Bvn` zbDkTP?)tN@HI&G6*JQd%8ZwV=?;S_Ix{0WMxFOx#Z7cIPbDNKvP>8)p8MB)OJuIv+ z^ekH)355MukjS#`KD zzJzW1jvcU#wLesSMbu{;b6ByZ5m@pIb?(IwCRGtx*YE|a?X#pjqT$ee^ zkVPPhojYUf=nUUbfLqHtWv`1}nnyxzHoGbQoL!jgf5JvC4KT>=$#~R0Dc#B^B>xrw zzK$jn`o8ebSiqdr;Q=Jkf6~n!6H)&q`d&)Jy9Q&uixf?x9JQF%*%W;J%g*8a1AB)I zd)v8YFu%wV2MrE#vaj7l3{@QQ(%(I?3PPd*(7xmPz;+p0Y2QQ>2M)jTege8jzV|j) z4;#;h_MX`Et@P*XgwO7G9Jr>bS>SR@2A1k}M(kzFxv?!2ccg?{X?5lDS4IM3q`*+X0hZi~<%JuU!Is(3u4 z%(&oK02CN-9x$o4C06)|mw;AkFezr0#CFi?Q5q$Z7%Zh*SC%dQR3EGczw;6C* zm^fL=-}{*<@}sf8P3||tu#o4kGpo^lP!w2LHcQ<~ovStXv#IJP@v>}DoyJx=PyOl$ z)drkl)8?TZfOP~gOSc1m5%6eVgNP2Kj8OYrn@c3^&S(_O6)JNY#WI`*p-8>>yTRP( zPQ2?&b4Dc5eS6hs{dPDpLVVH2oBa7U>u|2MIi%FRK5rM@iM^O>4Q;)NM&7VYj~q>I zlz_us7}GdY0JaceWOQer&D|I;gNC3D{x*aM9cF?RfepxrB|SOZPc>&R5gDKJA^v9f zBU-V`B`<sz$n3RbgI~o;6^gdtx{>xPFyo#ZBOiXQs=KGvl3Kt7h zPxh--^Y!sOiyo@aGf9T|h;^7m8oUz!m}8x{3omfD&qVXMUqHwDyypE{ZF(Q@QBa=c zjR-dVmrbbv{hJz~*n{kE1hi_D_$%GZ+JfuA^~>;(V%$oaS*LeIP%LOxlx1XwRlCq6 zgqr${h)In)E0JqKwm7b~{c=L?`-^BdsPf$(%EZrEU7-&&NCnCGj-v5>@Uh1u7*5w= zh&PUlO)7#Gk}s2ucfi_II={CU_1qWqjC;Y-Ge3^0qTGWpz99u8<25FX{G5DG>IH ztG-nl9T?QqL6lBZTFDT5l+Mw8aUSfmd8n4ecWypH3rEiQ;ijIj>uacM_@q>;n?+1E zdFEv(j-CVlBWCf+N?PviAfIAUfJ9>aNsLKnl@A70sbPFo>609%cmUoXFgS&a+#S!4 zY7k3S3!r+x`w;J(8+ zmXY&6>S$RF9E)yhtse__3r8~X?``55tilQlKO~ZN8DVqws3*Euqp$_`&N#(KG_O3} zE{D|O@2SVXdjcgctsVz{XMU8(9b?GCltDCmQy|lft2s#Y>Guy={tEZ+Tsp+A2r+ka z?^WSl%@^T+OOS{5Y?>oKHZ^UrZ%8)}HfpEv2Q26j$M3EP35)w(+MgZY%HI{h6C+E% zr_4sm33&gN<|~1^xe$cR_9C)%eGp(AaGQL(A?&<&=c3ltiK8SQR_(SH)1lnQZ7}L= zGgaDPs>*&bC@K@yod{(~1eCwoZ=%)`5)L!X%YSU*H}eMueV-qwT}(&X5l7AORVuPi zu6=9A|5f=XKC?!@W9J&y4pC%So!=NuEf6c|PWjWYuZ&Fn5aizMvEO3{_b|a1?{uI{ zt*J}|3nwbZIWuWVNG4CYn0pqr+3F!DP2Z$p(;NS-D$&Qa3AVTCX}yx7oVqIX=XwB2d<-WTzjjnX~rP>CRg#umwCep}AKycjYua;=0+TTYdZwUq2T3 zH#^bcK?L8sqv{}t^@pZdH(jX0vW6g`IA9*#uhj+?d;8@;Hzd!(Pajrmta;ln42vn*wliT0rO5&q0+oU6&!;V?KvbJ1W%w0(`C6XV{X=2%GWsu){G%% zR$YTSH1PAgaEvEivR36}UYyPE1|z8L1frA=X+k7J$!g+WWEd1Wiw{0N_o`xFt;+5+ zZfuwCJG z^wHzjSWy&bhm5&#IAdVPhW=j)erJ@`1J_SKO(|HfbFE0MwJ#49jai*OcdS@j+ zw(xu4W%p=+3XBb5=hyB)^xJGd01$cXKtkU*Yzi+7={VzJ4|&~5g)`=NBsFQXWn(7i zZZ$t5@8hw|s}shx9C;RTehAJE`k(}2oEbtiPB8h{Pac6ss?wnHhkfw4I6mJ3((E{- ztZ!aQ-3vg64>IvLJ&gs4SA#Z~S=dRrgt_Hh@;~E>?4T98Bk^eaYMtf4tzdMDAPP22 z*5>HZlnAJ3J~9#lR1GImRKQDv>`gJ^mDb<@{jMq}2a=ZQb>q#9&q+9f zI~ws)afwa$Y(SDT>V@8R)@py14pJGezM;3NABM8U$wG!D1!Kq)uBpDC9cOXS2U4|> zpI8{bf?__?iB8IPTN9=(5oFWuh0|P!h6Y@`5kO-=Pz<9!*b?_V(LmQ_*JmQVp(lqH zMjp|3%UH2gpReevgui>e32!>u{Lr+;wFT`W8xiG97u;_?$ozPH zccOm24jKw1Ab+z(_k~A(7@wSgRllTcAYf;;K6uLfj3G8051A2M3}PZ<(O@L~ZD7`8 z-8GF+yH<#BUGqj2ja1BidZ$#)i0q}}s?MbsjC7>ZIkxrc)35n$$WOaT=PI(h+pk

Knbs_*uO}uA@LN(FA(daj-3y@T!O)UyI4W`0D z{a6LcPM)K2$*#>9X`9VrIz~TIuSgf%c(WMIbWjvd3QClxcy6wXphc3&q>IJtwI&ht z1S>DWc8q|=wfxran7U2w>uK5UR-%-mX`8Z~(#E8=1ZFujEls_B-8Gy!Bfbsml^bc- ztE2!7%1$_hp1|W@yYRv?A|;Wl4C3swcfgke65Y>eqG&i{gD>ZQI9wNd8l%T{FG{1^ z7v5d;#9tB28BqoOcp6&De%>r*Vl_8eZjuH(px^K7b%3^J=(cAejSG$a*N)4J%qwi* zdi0DGQr0xmKW0|h4<@5mLl_v2Kha1|@Yni+SVIV$Di~SX&n1LUmkC!;`8JK!54X7w!`%7N5bz55zpuOO*jWHBi*tBhgd}nlfo@isGTOM zptDp+b}dIr^pXiW^F;@tR!CxI)3~8F+>(3*i*Wn)!VHkOVm0xPz9X#2*(DxsgP) zwBM&O1fN+daCGb43f+fWk(N3JfRGdB-oKMZ;27GxL{bueT5xFkO~o(LGHfLPb-Dy{ zNb`?&@Y+01(5Q%XWysUy_$>lcWBb~@HZ%ZPU-;d|hghuDJ{9twIfg{%#s+YimEGTd z5PW)9QPt$U<(^x^?>;@6?M!sCa zy!C=KMdtTk(VaM+?re+U7u~e5|L7%Ozlrd9>e{g_YwiXPh}fN0{2Qb3vtJhLiO=f(;1=D(=eVjTh|>hi>vq@ewi?92DogxIvt^p^RHZgI zzJZnNpDv=ujP6|S?({J%sDoNvUsyt3au|*ieSaLFKx?2KTt?o|vFJ?(c;b-OnUe}f zc5xq|xu|C+fk5>xd@2vy zKVt!cETws*ZWP1${^zw#PF6v79r)uGW~n^pjK=PZi9_D}Id~aavGvur7BS@ru;Hk?U-FU6+Jl^4RJ6}Hq^}M*EjY6VL#H`dQwCA zE@%SxJ89OaQfQ~wf$AYX0VStnh*kF15hTO8PoUothBKTq$c;F0kR+SdI73tA%+ zPnP{cPCMpvC~!2!Ao$HUcEb2>fzggMv+rb3GJnGmVXHV-x1GaNUP9aWbb!ZmPH5y6 zyFVnBu-wHx!w&>#)!_eg`=Ev6oaaxzds>poGrsCYJeiBR+%cZ;d;jZNyZRIHJW`S( zh2%CYhd7O7&_TY>@vaB`>N9r+eqdyN$c-?k4vg1g68l|+!A91m#+T(1EbCai&APZJ z!EvOlti20hhkv(*FN&1`ms1&s1rO-d_*1urct9~QG`4qiDlRw5IcCSbY8ATwMpf^$ zKO+Ylh*mM#ZT3hbBr|N@`VA> zZjb9Nmq8jI$oS6kbepm>7$VBB?p~*^lbuws_ zE7zN+_1)a5;D#W{$t2?9a_(75kTNIe&7o}Wdzx_<_m@x@r_j<8ztUG&My!adV57Hk9VR$gYNf#Vfy7zSL)#MFdRRhm z`|NYlb&d6HILDff!a|rN3s{_s%wXJd8SR}lsX3RM6!T?$l&QgXKo@k8?0OH}FWyH} z`NKsUf8OMI4=fh5TFE-rfq|9GSLw%w=FH&m_PR9F8=Z}UJ>lvQRWQYpf}Ch$)lJd_ zR_;udi5Qmx1a(eIwi`snc|g2)NCVWeRay{KGaY;TyB2PSI&B3tR+f zpT>>41CmxFn*4oiZLCbfC&x|qEett)KZ-nki;Mx``8^dY($_u169!`-@cC& z9AdjzI&WRpHv4LB1aaaY=RmSmEeh1w$4loa#Eu<47e&|-F%s_>*Q+-B(V`Q<>RTZb z@=WrWXLrLHdtIT8xp?x}u5$Sw(4xT%Kr%ag8PD71n-+J=V>)f}iygI!T)eoI7Q{kA zRQZGWs<+0^CcK73^`QgFt+bp>?g8StVmc=meaA4G5hD%*_yYE&5{9Pja%$<3d%ohi zVIwXjz)vNB#h(isiih-XCTCVGNAYjH`ab}Sgg^JuwUDNXjHS9{ksX!sa4;#dN|KX( zK}?zHom8-H_f43=i}$|sZ$YNT2b+HwbNkv6K5#9UNIHLwBL`vDTSl z`$svdt$v<{^$XV{)J@$~Zk&D-bbC>&vCRAL`qJnm!JBi-U_3@ewoUu3pl7N?Vy9R} z34$lDbt?7!6s7K1IF8m7BqtZ!M2pZlBWW!W7`~Ok4=ZOG&R;y&89v)^O`Wa|FD{&4 z`|&sGZd9s>u=}q;WqYA4>-VpBlU{j14l?!h^&t5;qmF@QL&(F>2OL33Y@+I$U82|V zFD(LvxtkSP$s0mcoyR+$n3U0?iQo`iW0RQPa=Di0V4uV_G5S?FMezT{;eLyMZ~M*S z`-*Z~pH_gbPH-CBwP-{YW2*Ok>_N%e}9x z{$IFi7^pq`3-kHzM=x6db>tUK&3|F+|A|_rwD=1o9y}ua@zGZH{o#-QeUJCtG%yS9 zH&t5%!Oqy@y#EIzIV=|TE$h_9`}o00kzZyN{ul6>{XGoO-0@1pFw;MuChyPx8tvd| zV5gAQ77`Y@fd!0d=7)S737;!WT731dvd%>m4h?3;Pdw>JOtE5Bu5qrf-H`WReFpq3 zR=9)Gp~F^qvl(~U8N>kBIWqrFl-5p+$Igi`rWZ8s(Imnmf1|Ho#m%yKR)}waK|8@H zV3WIu^Uw=Be+F}QYyCfdWN*p==xKZhGD-%GqZ!mrZx31#!=S?jVoor<0C~5qey&pQ zvyFi2_WOgBnTn6#U#i){gaX#%ltZCd;%Ox?=Dvb|JkvS!)kU(;(4z%8Pj33$u+xsv zMYhk_qnhKkAl+2y)4#}*|3TX;e601{G$*d4Pr)@xd;;`?NL+lm>ufa%t=bSde|NX82RQTu(vT`Np6VDa*Zz&Y_AaFedk}OC%D#IR)HuKN5>$bVNAybgXZl zy4?ytiq+Ujo1N2vshvo4y@u1ca&rY;HgaJSY)wwEf*{9M7>g#)Qozl!5IXj!!6YUc z7}+N7n=teKuaX4?-`bR-yA&_}mKR{dr-6n)?{HkC-3_-|2J(!hpA7r$Ncg`B#12#b znRh`AyqRaOc;h3;q|9_8} zruP5Xdh4*Ly7v8>W?*O#1{pdPgrU0|lu&O0seu6jK~g$o=op6X78FpVm6Yx-DHUeu z5{Hry@ZH|e_j%v<7k_aa9COUOQUHd%G&*=L!leKeajn5UzKZ+C^;(tvUEU_pP zKlmU2L()Uq(8Qhz>>kedT;YnXo%)aAc!0J=GP~SM#}|(dzxW@mGsA^$y*X&vtI7ke zdy0h9G>?OP+9%+sO5sFVe{lHc_(AwLc!iS@t7 z!D-;juvb~yqx_q-QE}AkYJ9bx$H4I1~#QsFMjpjUziJ z4kElszK!qh{r09I{MV%LlD52M+uy>;;RJ#eW|@qNY%(7w_-9JI8%oU$Fjn5x4X%(r zH7uA;dk1{a-w~syfd^a$2UfTu+!7ufV`s_zdN2_p`g(ZzbKpFHxVb`-P~NtaoY$E% zb^=Oer+*Ay5Lb@1f;*-YnSe{4UAVuSuX2-lJGa973DH9$f;;(c0?InHJsN`lUUQW= zRSwzt4?KcL0E*tfm*eyw;KbrgDjxC@iV(uiF3bKsh&E8C2!;iCP6gp~uWpSQqNKgA zH0RHR4m)O%v2zb;bzF%a-d!;YN&2Q(ee&zZVh=c#S^FUT!Vx6kaB~x!h^^u z|KG2aX$5HneAjE>`m-GPGhsZLTe}`O^|aB9dtVndRrMeJwHQ(1pT`()9(S8yMD8>r zXA=VoCay$y*Q&iA5kU#=Ac0eN|6FYHKNHnCP0V^rx`1YZ=E%OrKW(RT=EoO7?dQAi z5C1L35s{%2TlD9{noO22NrU(*?>orSo(=z6m4{>P_NaT~u-L~QR#7{di%B6|~j`o%>18bESWQUq*-{JCB|DV+arFTnQ#!1SR^ z{^Qn)ff7hB`yrP17whLYnx3zh^X1VgRBOc2ncSWSk^T|Sm<6&+eZHoXmA#c!<1P}7 zQ<HPGzsx>Mls}r9{nOs_VAw-jckU)!TPVBYV6A=xw^OaYB(%lDW6xM<^60iv=3l*l? zbb>#l-V9m??7VqX_iRkA)U5tlruY7W=)`Nyc)O(rr|C9-uU4be1E8DfUi+T+FECrT z*>l6T*`dH+%jf6vi`3gsYga9n(gjV1&kxta`+*$x?oC!%&(g{{T z%0tevq%4gjS7QA)i^o`BGi_3rsk*i$`n)m^qVGn&-JH<$wH6Wm;kewm zgiG%?06a{!>4_o(lScf7PYp55hs7LMGMN_(~hejw)Nt`@3#D~0CJ-F<3UAP zLERdKs;qE`sOvYGT=c~n)Z5PLsT%Fo1wc|1q7K^1_vO-&^w;Uq#*B59^YZ{yjbO$V zd_oc)9`igWTKM*R>6M^k)JyA-3*I#=Hve_nY=A&%xwWvPX~shx?5_0|M8cEKE@b{` z^vKF@4gi($Lq2kue)iMN_8089I?}fa{}BVAa^~Zt)w$?86EXT)d;%AX4uy-}(`gO~ zB9)Pk$_&!qSGO;X-(mZ%PXhm%m&A(9o4RLhk4o$d#e)Tx~x zGu-EN7@D%eN)#(ed)Z|@dF0klhzaSh-f1P={uB%xZhboO_m+)^QP0iQE^e*e%$d7= zT##Fe{VZ(JG`YI9p5HrtxVbl9UHGOMNI!OSe|@E!oGCz4Fd%o+OK&*HUl>F;_co5@ z{ovH#?2oUFOm?)}%p2XG5^6EtgHh}u=QfoxcII=$^;qpEdjKpTkn`JAJ(4qIoN=Yx zx}!bRFy?tLxmJeTQbWc40tMM8e&sB47MmT(^jvqr(RW37BU%p=ZK_}Wx$*gID?GVn z#txVse6R#CpG|q+10f(ToP0>)FFcG|`KsnI0Bxjs2*+FZK4|}aK%d6A<+(L} zPe`FRH!7bt+FhN0bE42%AXm5f#aAi+qal9c;*pKdN5g`Bv*x!YpA+5j-}}%Qfp6zF zY6xJ65mW7l{VWe0_mvs*f$|3){qZK#s*Z6RAc^%peGE)cPT~MxLMKDWjEh9%J;*B& zPiq}`EC)4#-*ye$;MdO(svXvjeIp*LA9p| zqnwBY<#`F<0l2nom38hseSH`NQMvtw^ZD^H^} zpZ^8oxJ1**a9ciWI)Oy@$7C#y#_zl>*y$O|?0sqcHZhPz_4*KqG}61QmtBqgZo zHnoV-4ArSmi5=gk8=;07!9z`3URJPHk21}L8QJri$T-6qj@O68ryBTQ2N4E5`4dX=Z0aGcd6(dgorC zy~MzKyiR71$Oc4zlC2GY_QDbM^w_S8A0!W%R`2jr(5!af3i(TnZInv7D;4dy@96vthZt z#4Yx0*tKf;bP0%BA&%thwbpNWgZ!(y;ed<3GX?Id3Nsy!n&KmSK$HBNA!*4M4+ zSo!yUeb_%O=8djWF{}^vVsBJ25j!#_r8>#m5L!l7rt$RoU-Dg;AB`)YQYnbp-lQSB z2-d_9hs7=oIDn?L2uHV?CSEk(T<#I)jCVnKG3-w^x+B5hQHK7Z*Q7IQJG^%&=)10u zMu5njgYmVlu+cAMo2D5}1KmTnw9PFRE#$p-HQZDF(!LN&_6#)o)K_=b-o3A?J7cmA zstu(?v9D1eO*cg~(gcjv`0i)HnH8{LMj@IsChksm=kH~OAH6}1$kP&?nid-NVWfBz+dMmRr(EP3)!CQT^?a;? zu%Ee1Kqeh&q^9JfY4P+j!7@(y<~+O{LaIoiDYy#!&y__LTQ4YS;hs+&+K3Ietu zwQ;Zg!k7@t`Eh<{-+k{{9!(E`xP)+^XQsO=erpL3{24w8(p!N0tccMxA_m|jWa@Ylqj`7)wHwXlLH_H*ou*YNbMEDhyY-Xutkt9=Jq;tOXV;K4aMuPAQl&i+(PJ?89&mrhw7+M&3msR;1Pl z16APom#Qv zpI`Vg)524C^yyCq%iWD>_elPvIoZuy*E@b*H6iX!gw?nZhFf#qo9UviOY}hk0fSVK zk|BD7uRsBL`zL}v17*Mc(Cw$pdIM>&fr3JWtZIuWFY&-CSWV8=SRoOF3dTD-ADyWW_ zAYoC>Yd`Mrjj`PMqbG9;{WLFli&+wJOa~q)-y@l)z%nJiQ`?juNX*cqlUD{rhlUnG zJ-NI!f7CzFV+UPvdKAVd_v)UMi#}@gd4aD^0iBIJjJfI?vN^~PH{s6|+8@T;mSH2y z6|;|1yy!fT4Ax+Wkun#$3UMTM%j-@+`Op&Iq9Jc!-doolX9~R z&y!2S4oNm)hYg8sDE|X2<+tdyBHBSx%*wOR!+mn}RFu=b?+00kKUbJF2uo*VQ0%86 zh9YBA7xzq2aGGV8*74A;BqWE( zCi|S9S1kz@EYu^1iiY8t&^t8W4$X7D3?W_5z4AiA-1mqGDY5KV3zmpFaaEfphUg>% zR-@I$DGszd1@rx&$_9#uL@&`hT^!J5P?@O+a~4dKk6fM}v_Ba$@xiRVU+FeBi}#x0 zop7`_>K{k@ux*jWOEJN<35F;)OEaQcs?^4~Mv8E-g>oe-LDepzR%!1-jqfq1V!YdIWkS1Ye)QC{sTxk0^nt`>c~Vj@QBh?q(|f-*?Z+P5Mn>H&IiUgG?MkU;F0uPA>1Hgbk&*OXKYVH zGeDo}Hs!eI94BpA{>w;vlL~QX_Ug4OM)#@6gDaqJt$iCIbabdW4ZT#)l;Wc3>OmRV z`aU;aW?sSFMm+#fRc{7Og&}T;54PWk*QQRS+hS^opFwB@dT1rF4-3xz+Y2B`l6ZDc z7jYn%D|_A2%(pXf;_w6La7n1Jh0%D?wMVndlJwEw(=LxDd*L|Hhp|P`E3*A$A#+sy z9jUA!FJYq_53Gk}1^rQk7mCy^z@UPINtBJuw~qb>5XO^DC-Dbg&XBTZ%}2=hN#uP> zW@BRN@C+V0P0W+CAsFjCPV*tlux zqLYShUuaie)Z6*m&Eow?&q)Lu7yXR&avS!%f=~gEO*25e|Be6gW|$4=CbBEleV4^W z@y$;sc|S2X$Sa)XZlqYB>Dyr78K~{&IPPNCai(}iAS{qK8w-f{k9i;~pi%AMJEZOA zqk~+yOZc8fP#w$5MRzsQrXQca?sF3q_fuSf3r3r`*c)_N9PtndJO$ny({-?y`Cw>b z+6bT1c%JN!3OhgHJ%}@vn1DixAc7FVB1UnX3Y(T`vDhIzC-&-T4e9;(By~d8u}+D5 z;xR>%gXL{hqE=Z$<6v1-VO(j`igJ748h)hP*&h5K!AD*p4T+15E5s3L$hOq^8ys!jz_NMf8R7 zaU#e?%@@y9CL(N^te@6<-I5@q>=A9&4RxRRBHL}lCiAEOP%)?RlF*I|j^e#;GeR*mhB=p(}BIE0!S#rE71FzPgb5BG5v(H?lOYj4{aYgNFH)T0BE$IYY`r+6rB zKzC*nEV&@m6im+s);X4Ci5?7lQ$;TJH4CQFacfzO->VWflrh0!((oBgAabq8u$S(V zgo8@#6KL?j$~Jq1*Y1P{0kwg64gD%EYSCD@AD=;FtLTAW5IX;E1bdo&1SeyLkvEwp z7>Nc}M;2?Iz#=9Li1n}?Xf`ca%&rAvJ z;d!kNQ{8Ti?C&YWNinq`x(IGZ@$tiw2CqKOm=e?@so~pRwVj~x4H z_ZSHj_2>~uG1dn)QrB7IC)6VE50y(+x#&G2|3L61-?Iod%zq`(5pxqQG{Oy!WD2n{mkePan-|{sgV9$FJusbKgfb6h*L}1gL0tRhr2UirZvz&hlb7I ztIn2%KuBNSTSbF*ZpAZKXx+i&AAwc03D>L0Rn7GQo0A^U#O*P~Q!2^+m;p{}jz6{P z1B!J?%;vw6jSmP}y<~*71N_pMbN%So!-n()3~?r#N?xc0?oEL`y_(Zml)X9M&bEnP zbn4e8v5*&Gej$D*_ywKN2(Gqyj8Y+UX^Xs3(5A?-$(sQQnwpA^c{gTg2 zwewJY-?ZMUPDy@=y-;Uc;{&F#Z4xt>;LD>VaUphr zbAaCxJqGE=+X#J7bIQH&^ueXd0IkQIWw+VDw)vF|&Le9kw+1}5vd4>2$rG|9 zq#0}^yyT>*nXKH5CK2L9_m>_75J}A|F*0?3_Nd0u!jX& zLO8g|WV^V$9wQc-@BZ8ew(lw&9Fb08&pJx020uqDr6zmU#?g@=FnTMYl>Br})bmpv zpiCHan8&)jg@$Q#ii$z|GE8pL@Ib&A9>6E?f{QXvhJsQ{`@$(@_LMjlz?|ToY?@Kyo^dU zy3YN&b(#u8Lgd}^`{tmHJ;1H$sE8Qe3Be&%om&>${6`Q z7aa*H=n7J$!DKOmUw?hWeGfs>H~v+MzqiLwNBx?XYEkmFOMMY7bfnqGM!;T)vrYVM zr^u7;?cDQS6)}`8#Dyk=Rv7cOQn?pvI@z4lmmcq(hw1`#JqK_j(yCzw zI`-PRmHf%d#Y^<#S89Hte~2nS(y=SY6ukP>-*9IkK!wMd{5aw;u5R#YXo>-}EliLk zMuh^{dPQ^^Ddf;UMw#^A8pe}gewBtSF|250HaFH<9Mw0q&0iy(^pc>_5ruk82u}lz zr&3_y^vhCol)j8Yw6H7xrA)uRMIE7&J4$&@3wlItb13VsUEnTkRHd>Vq1{SP+qN&H zlq1ZhelKLsq#D16Y!TPv{*{CkddzwgR(R1ZDNK4Z%I!xO)~G)shwo<722=^*59T7@ zbI{2=vk)(TN0tyDjb=G3!sTy)`OI#=A`=Dv>4G{duVs9ku-P4BFDyKy5fEWss}FWy z&?D8-UZ=CW?TBPHfy&etJbbk(T6cIQ?fur;cvY9F3W4I%7b8h3S-^p$tEDYr?brn0 zkR`Q0UhFG!SU=>R!;y^kS7u*VHlSK0y|7T46Wk7%$6rl(#26J)l@B8%OyOc;AT8r)S}caa<)*c!>0ISR zhFX6);dPDCINs8t>7%;iZ>i=n;cs}U-sEUXm@42E(;$fXl#I-}N1u&{hrOpJod27w9I`|jTF+CUJ0Yxm_!4gbhMT!F ziy!UYgNKPB3*)^{X+v*+Hwjzcq&~mbb{a0#E!FK|AzICzaAAMNk9GgV> zMCdIHAI(HMef&urM%89aPT12t5ZawCRtu|pKj9ZnMt=YWL;3j$@*9=21gS)_nEahm zv(NkxvOgiJQ#Sizd`A6?pVt|P5yNKEPK0hRo>hrYfTDF%qGu>!3g-loNg40GP`nKI zL{J~b!9-OOMC{Id-rJCT%cb?E97|=Z2=_O&2ENj5-25->rb-F~0&t;lg+u^p-oc^# z#8aBY7}WNI&}%be-v5HwEAi4MT^spKsKw)8dooc-&*PB3v8GvJ5}t?2GLOg&q1gn7 zY++IbeuI1jBqL@Rr&qQ8+Wt204h?^naLi^~5=T(>gLY&u)gwYoGDq9I0T8P2#3EeQj!QdQ zNgA>MU{oexm#^>FHRDfc_1jWXH@?8)1RWnT?lrtGWDy6AzE$3j=Uj|XZMMDFTSOnn zBzqfaf+Lgcw|HtI+^uM{Z}2*ZJjR^uQlL%aK!Z#h~D+F!A2G88{Y51ZEO;m(rw0~?)!X$tyE6qw52qof8Ox(3L6 zS&Gj+JjPm$f`M!pwGUr#l>LMdn{5>>4^LuG!8U#dH<)u=urz*d<0$vKw276>{P}q( z)=idjBEe*tvSKjTvbd#REhY0iOWG~;3m91ibdIGFM56`mW{D>}M54LfWFXSRr1B@F z9)!tVJ+7a;OhK{K8{6<<#8F92XK0oNloD(D`h9c>H7kjfiG$+6M5qr?CYZUu?a3(h z;c>COjb807vcZquNzTxX_Y7Z?QM3UG&i1f&aH8F-5$-DUd!XaDv1x@Z5$f@4oTUi0 zTJFejQO?JRMTf?`5)sh&!3iPBZ4y|#z(fS%i)KxvL5=5Uo2tY%oAsW^ci=GDFlBnw z%ki!T2#XAnLza5c!`0~)!6zb@Df!$>8uGCS6Qr{R6(_4b83GhfP^MTz;Yk$lF7#5U z$Ur3_9*`pH=8J-CC^#r=A_eWSMhncT0%Nn8wTX0NP5k*@C-~Ql+eO%EBaxbJV<~J~ z)=+L#jmm%woEDCS3l*spJEl@?qY06zmYrUckyXbnIC>DzgEu^O?^`Pou2qLhL zlh}uHHbJ8JM!nAIu7=(iM(y1}EM(cIRdoxg2}$gi+Ya{BhDNXYpFFU=ApQYo^U#bf zt3H$`b;BKzUqv)uO5=U_Z!f^FpP-u;k+~Ljk`S*yoZdVH@j#2}DL1j=YqOLvml+D= zT5@jQ3L_YXQ&_`{l*-Q{7>7tApx%`K+0TIr@S=PP z8QK=v9roi|p6X>E^&dIvkA7|_b3CFyuxAKT4mIp1C735f&&uaij#aXQhT#$2q*5n` zUE!oZ4vO_h(htY4$kt!ry!#)**dDNMdu@hc$$Tt4rl|s~NZBRV?tajHG|$3TRTzY0 z9X{F|$`&t^WGiZV#1vtJT7h`e>i9QM*k@`-v|X&GvbH&`9g(sqqeO8yx$bYKp8hmy z#R(~4bz#br#l7-!xWHBH_lEQ!NSraFnM1Zi8m;T!-mCyy+ECu+?QI#|{sk4OlAt-N zUAj0X;!R)EqANB21qgVQFnEriqD`Q%@=vKp18!gowkw-0(cWOyg3%9fgPh6C1ysQ! zDZ0M(#E|gKN-qQqzoifZ2hkNgc;%&*ctQR_W$WJj#dk^Ay|b$Ja}NP|pxugpkPq&Y zwg@VBZram;hoBS(`A1C{WEg6V5G1V8R|Xkf+v+S~FD7|1h3;PzLlY+_6Sqx9A6yn{ zD^YaN_tX>Fr?sJEOCYNs_bf261YE1;%BCiS^Uvr19FVhK4W^;E`(LLz#2(8{GjqQW zw&Aa1Ji>1w-OJ525e~LE7Jj_a+-9-%^nHGi9ygHOX||<2{)e zPpvX@Vq-N|*yI@XczsO8%^_kd&ra)(Exg8*a~;whcFNXaJ_shN72*o5Z$0o9_&OOb zjOt<8y4vsN==4gAjA%eo@sdQwlEr8;?3ExcDe#WEi!7Ana$t-cawM;c5NDlld`%cA zEB{H0?4#G`3P<@3N0JaG2SUM8HZb)A=B#fsYzO%zLfwRxMeh64{J-Kn!o=EAGwRgM zEEA4w*|j+my&7UQsgM+#o$EbG1R1m7ct$lAJ*w-@^vLup&|+y9@q6%E^vg+(gn6Js zsENl>k?^_&R2N=O8n{71&`%#0_`1N*qHXY)&bvAa*9I(yxt3;iOPOuR+!7=KIU9;> zTC{4xNv%dTSqlqRXn#%?d5(2tY?Yo3_Xt%1y-49cF_H>dGJwXDwZ@DRcYmgO!-SNX z^q=v&Zp%uNg82enuSM0~;du7w4c5Gxn`wjHuGc!yH3~VI8k^$22{mqVun!qo43}Ih-vYoVY^Yb zYjb#xp}cDa-*3VAC@y|m2F$0$tUhytO(k63d`Y02e?X!1w|E_GJ|nNN0aKsX%rjil5<)XbsP=&ra;pMSR08$GcP4Zu3XY%D zXqt~)eSzZjleoYrPC8T-m#QRM!k6OjBd6I0TbY@nv~qfnD37{)+q}I)C`cg*khQdg zuBd!JqKEx7Yt&ZT(_XxJQV}Vv-}0z<|G=IMO~h(!%MJu_Dol3qe|s8#r4_KNC%3!m zCy13frv~~(GDqCt$xPHEcLxRWUop{a7pu-JPVS}DebsqI;EYKsXmq8?TkA-N)?7{f@J{C;>%yZFfk9_so2;qo;ie_4x=Uq{Bc-cT4?!+Jd>?Wv&cm@7i9Z?jP^7zSNw&kg*b65&b>=UGpX52VmJN|=3Wr_+0%VhK+f5w(&`Bj()*pNy}m@M2xT$$iE^1Eqcd8^DOKKc7rYVSxZffk|R zw7sKM>h!rcHWp$^--ClpF6AsZm($tV$K2Og6=Ar|;+uvZmJGTp*MykFd6Kmk#eNb9 zsz@dDucf#A8E!HmDk1s;gVed8Jdi4Ki9{YsCTIFNox3~X@vwl zMpv|8i-Wh}K)E5U4jWO{{OHdJBYwq;?(q^b?l`=%2Qt9}XoLotpr|GX8`0>uM(aP# zlLunJgcwB|0YJUm&hDXO;R1HW2Cw4vS!lb$#^$4O7L%VxVtcIsB%|XN(t3u zWQgbFlbpZ*JD`CB7l8gD;dH79r$1fl6F|>RrW%^*adcE0u|Cys2aInerpsP;Tv+$~ z?MGc*P|ZyZRph%F=C~_BX!+i5C}pF3>PCHI3I}GuS0e1w7x2^PE_@A03?FGEVR)ak zxp8v!rVSD!|DG&yN1R=FF{$OYc1qQ>o*4)?or~>><}ZKPc0ZnpYX z3lvW~o;Rz9`j1^bD~Nsso=z?Z!j>wNIyDT^fcC#O8fI0uc`Iwsv5d~k=rC>j)iQ}} zo`P{oq4Qg*&frUKht)2XK^Akdt2RVb1+M9bg65^*hyaG z%bL9}gfwO8S>v2>b?Nl{D{N7JpiOipr z`YcGabBY61-NRvYd{lq9HjpZzJ3i$z?;gX+kJT9og!ttKHHols8^X4>`}*e9HoW}R z`$)ltI*=2Ob!=SEPwyZS8Y`YZz2~&Tj~)rK)8kF7b@wDg<&6LJ@_6M7yL z5NR18aqO7tXZ!YIeL2ULE1XF8Vr;G`g2b#cyzsC>e0VK_E`-QNNmYx$ zM|Yt)n*HdP=XPC5FBSXY?l>y};LGrrk>p_A>Cp{TEtLf)eCv4Cfnrm9n&l<12t>5G zl*?-8<#w#A*4kudxPJo#BazJmP&+0CBM&VDWF{u_D@To?h@(0`>K`SPW9aPPWu>izm@NyL;_4e144Y#f+n8_k}J&>+| zlIT+b90Zi6(PFUApdGFs?#bfzI{IhI!H33WE@#x*Od`&671M=o?B{{8MYB2yaGTyv z720MWu3Y?(2E5%S^H!;NqqN-XqQF^S=S8j5OT5Q4%*)1{DKS`dpy>;UwcA;>OUcS! znIgXJ;LVXZK9t^;8*rkyOu}`3B+Lsu*|wP%-eqw4;eK=oDC3APC*QC)2jur;wx5nu zr-e9*OELd+t@1y9=sRb!IyA8*L3aGs_|*}2r(ACULuDyI$S66q?%INe@y^wNnec_C zppB}bQO(DS)>-_8!H2KA)uQo69Q**ymB`L)MJi5~4bt1>u@=!6s+k-~u!$KPAlHMMiW|#G;#{Cx``c2yvEjl#-163eiy*5;rx0*DmL@hx>zF ze+I+VbxJ6fjnKH*!k>CQXE6@GiDE_J*icKRdK$2sOB9>aY(5JSM11tvU623o5P9#U z#+{S|LBR#WgU0Xp9(;RJVb$#UCL+gcka^S^{a1>9suq$aVZwL(>G_v;VU=Q@>pEti z13t!+|84?^l48G0!D2`D5_Bnvf9HcKdW`^FU^k-JP* zyChYZp@w;5dFRJ) zL*ds&^(2R~GIMA^Ddm^TTj@4S8`J@)uUNEy+dL*4{BJLylK+x_c0q&LVTLltQ1LX4 zW367P&PvPSBh&80fEST!HHcN%F110_(=UfS#xS)72cl;hIj8UjI%CZTB z+cZ@w3s0xn>b|3#cjT1)H6dK0?_dO{Z0{%t^@qD|A?vtEn6CB z1`NQopiv0kVSNu0FN__#4=Cfi&m3q2{3Hg+0`|o?Fx)STgGyjY4uCHpj2-{5kIA~B z`%Il$Q6tS805mcc6mN*@fozGo>9J<#i&z1(x>62{qkA6bo9wTyRMR?FDz#sYll`ac zM5dxou%%xll2!4V@$654XD%#^;0rOBQKwusx)>!Pup{#MB!z-aVL^$-5K;wLb-F9E zGVYms3I?7Xe4LNCI@y`g{^q1{P88yHW8|&E$zyqT@I5mBl@BWW4}^vy;*+(FYk-ljJIPE{LL`4S8$AfW_BqQ4_%D5+tUy`hsLC=7RTLmHFp))_@|$h~jNr z4iHq`9p@rIsRQFh^U@)kzuy$=DJG+!y89225CN17AUcU9rHGGvq7O;}Ce)T3#6b@=t==^KE@E*b{$X!72h zGgD=^$LGu^PIYe-$FE*3*kW#8U8v|bjrBwur_{iyh(o{M^>MuWq!J<&82nrc<$6WQ66TZuk8Y&j$ddGgCtr{RMzZ zoB4@C$!}??iWD1p!^_fNR=J`ZIeyci!%e0PwlF4S>ER3B8}z0)<#P^CCi|%F6i-4+ zBmuEY63fp#3Xry|R}}5KJjh7O1giBIeNkd@bH0|?ZR}CQGu~`;HQzVj{RX{naqO>V z;HSpEkrqOpQT@2$p_EKjh==@I8!0jsB$H+;c2pr+g*~D6n=C6Guvyr+U8UX?vYG8* z%iwLmR>K%@JQITAhdhOu+LU*mV;;~ycj2l|9si8zI&JNVl7W^9!OH8UY?F!%;OcoT z_0f3OJ;0RG`|p(E5J5|%UunZ6MPPbm8p`H}!B?H!Tu7Whinad;XoI`!OJ&eII24PT z3YcIm!qf%&4we*n(5$@>+_OuHc&#w(>2&fABtwLGgcI~-j}-)_6SAjz z+*nYJmlUuf>&nu0C89q71pXs0!t$i4ho$5tJO13X`uS50*wgZGj*cr4$SFhzqIWzs zX*L#a7HSdAAE}BNsO3%fb+Fpyt8qOGv7sX#|M=KrvY|hD48AvO{%1kQ`0s*F^1(DM zoS*&;t9@nA#nF>kECXdQ>{DjW`G2?6Q3d?p&TGDGxhLlh`nqm_dzQ@or~{bm^9< zWFm>X5VIy&Hr5^5IyfjlZsV>~4%Ltq__4q2$%Wq>pO9%}`p{KI7}`v!PNv7UfN&}P zfR<`q_*=PgEaJ~l48j&Nb5nuSk*giOeOs`w&MncUqq*R9y`viOG091rZ4QdIrib&L zAs!?qY!BcRs|(5eir0+jyg1dkRMO+EiOf$gzLKB2|1=;_5lr)PCQalD0{k>#SVFU$ z6Dq+g83Wt2u zIsVW0MRh#3!T-6#+^ABIa79xR%2pVy3YkN3(J7R<`zRLtxXxb`6b92SuZio%t3lT{ zbrad-L9wcEzI!J%Dr+?Yh{*MF2Jj+aG>TYr&$J4-Gc=qPhx>5A+dy2LVoACEPsPK?c-+2%JQa>Rf)`!N<r!C?W=HV*j8%PL=Mn~46ypfIMcVDmzwyx(iE|0 zZDG?4@26`8mYIhViFlIv$mFWd?chPE6Iy(o<|v<*T=+jjC$$hDaju5IUG)-RqLz~iIM!EZJB;H+-JFrrjG-vr`4s1Av%KURQF`;BVAVDAD`;G6Kb5 zBxtT;nV7qB7Zx##Dg}2UrL;8Ab%*;L6HCXRt$o&u$I!%4cogZlyXcp6qhmx*#y(!IxF#%>pBv~hm|zV4j{9%qVJFSpN9FX5iLANH?l7r!;A zr2h7(+UQzMp>?KI8>>y*Zzvzr6S>R2n%C2W^;0zc^>fAAw8TQegu%t}*dhe8)b%tn zZ#0MK6Ut+b=SWtYW$>r>?Dsw|N=d2M9;2RCJb`e@xJP&qGli3OF0f3$%7~j1_Cxu_ z{D$3eMFLurbzqw;1a-qQZ+WdKeCTRc!F$iE9ZZl-KQtl)Ie&vG#?6Vi-VGi*5uh(#?k^b2jQ5Hb9y7|m$olUG|L61W zY+OYTfSd+oTB3@^tqZ`e7b{=TWq#NHqHidb|B({;Q}zEwH~hccIURC>9FxDl!9_s( z&w7Qlo6t(V1NCxN-sl!3 z6}wVi)}<5M|M5dcK!2^sH?ETy{rA{Lv&F%M_84(Wz`}W}UAZavw@<#e6B{?5|J_c%*^B`^1O zys{tvYaGYl78{rV|3xWR(ke|15Zz(2cTUB%Vu=z==>K;x{m&)*@0ASUDEa^-f>0x4 z1dsk)9Ven#<)1L#d&(1pJ^*HV{q`k6`~S-sj`I^W+>29zfHW+Uw`4B>)A#V%$}17K z&!-s*0RHr7GuI7>O?b4fcFyn}*#GhV{`CV)c)6hKbMWns^L$kTYgX97lS)(0AS>9n zyEeUKkv%Qmd+WgR0h%3Mjf6`87g0=6^UKw^#Dpw-pRvqvg=(+Wp|89n4`1p z|8`6qP$G%3$VipmTNhAS&%y7$EB7XpeplkATY(@7U0_>6OuVaRv~Em5|E~1!Qceu; z(-YkprkDYS3)q^dvoH^xc?)p)b)+O|bN8cJ1qEumb|b@D;ay>H;~_?^LcrnG`>E@6 zYWj_SWc6J?@b{0-nUZq;J*&qvY$&2h7E#*>$wBwTX{zgHudQLXj_YT?-)SeZ9RWwu zr~Pp4U&s6ZccYN?$k2>rJzC)YB+^;8yxf5Z7+oE0O^D1}`sOQ~W5QFEf-ke0j;HE< ze_Fk-c>}n}0#0{%d!nfu`RR-{)jnqC11^n$5#X&XZ{V-}I?oNg-=3XJR(`6C!WNG8 z#Wb(Cy;_i&B%X*-|fZ{>hTPhSTR30yf&gxew;{fGJVTrklurBi%&k8n8)| zy#(;)cVAr{M(f@=0xV*~&eflv^%%RqDB5y3T8OK1T*I`;~OZM>DtD6f4RkL4@1^Zb{Ul~vq2_>>f8=Mny zo%Y2vIL+`4Wr`Grvg)1oC$JDxY>nmexBC7>7dP7UkYMTK^5uO$T>y1jIFF;~v+xw~ zW+EcQ>&$*QEg47%&;k6~r7B2*xrtz4cfJAG^U{E^LvHQf^t1iMigJFgU047x=09y;pX-cAGAZ?K><;J|Xb`!@}!^?Wt{!?b?kDxG#~~?7_3Ps9GMt1}hpxU87Z7wxaXXd;OkOo8Nt!)4m$yZ)`OC|HIasheP@H z@8cyEm9!}PzLO!7;b{zV7RKz0UJ|ooDfH1bC^TU^25aS2ymzx8UzASrjoe#2c!+{Prp% zA47pXHFamw%;ZFs{gicQhidT(Z~V1OvNENnx*=uFC=K3cQe7KuT^ z3hB8>Vb$-wKU_s*8o(N?>HemhDRulUR^pNvk|RsDk5lfFFL)RwiIIv^z^&rJaV7vW zW>{6pJI~xt%PoMtXQymB2$yksO{mFSwj9Hywhe zk>}^5&lD=Ka?1iy$&az^Ol$u8FEc_joBVvd3>^YCDQ|Bzqt}%-$OUDKkjc)AXz%`!Pgy1X8AibXRrIpp!GcHZYOLtL78HoUr6*avw*bx|hO??xB==%@ z6n4LzLybJqnOHjHQ3{8Z#WGq`z3|J+;ECsnJ$fxgrBR0R`w6^KGNgUBetY@8l=?a- zbKzS;HLa)}mG6SCi0=63-a#oVo=sU0h&}81S*ei-3A#7Gk1?~#sJ)+l{I2Q{ciMgqr*7c%Z`))=UpKht>7cO(gCTONuB?3c=bu~H0ig7(-^Kc6Ev2EsaC3yr*PrNB zp7>_#m)$oXi%-NLhw`GXjW+r?tGqyRBzh8#Ue}2bQ2`j4#JFBpa=zKkE>F^sfD|`T(BMaC2BJBG0 z6kd9nm*S!uEMW6C-*mbPZnagX1-A@-;FF5UU#}lPb;`5GkJo}yQZ+)+YFT@g$A4?O z)V#%5GEwWi?v)0vj)RY_N_tK7V#qH8bAju$t^;+t_SGeXX(bWw?`;+By@bPpQ)ikz zx505P?9$zko!3%piF5R-j3hryu^lOC2%AE?inXgpi|)liL$Rj8 zeWMrK#(u-QTbuQLLp~Xbx4=3Zrz++7jSTcFL#R`ei=g-gDX7}irPs@4l$Cv9I+wdm zx@rrl&udpX1jJ%0Eqe1F*b1Y~@a8G)r zsBtD$Cs&NYRAEn1bLC_uVxq%rZ>io8I6^Y&}MQI6yZGGz{R>uZ!*EK(i&8+Yz)fW2K; zvUaA*UdIY5KW(4A#9H$EZ=;l}M*5gR&!)3l&x-Y1lT5qDmk2!$lIC|8l{1rWJxbF_ zxb;Wl{}uSx6)z&u7TYP5QD{cOPbax?w&Pz*>z7c45c86G1** zc9{dtKCR+LQ+faD$K1o%p%z6(_{S26xX)w`BdKqc=lz1w#gpu)-X|h=I z-jULP!5O0KX+4^S5!ZeEokN!l3Zuh+?C>0vLp3 z0NM2cKRO2m1qqv;YZFa=Lb|JY!z$peeR~u0l8>UWiBLn(3nc`c{@T=;5g4BQs~;eC zO5d-m-B!i}$E532m@$h`GhpV!1q$y3U=*QQX5O+q<<>@Fo^)AM-{jxkfbk^P9(M|z z_g1{|b^ZiymULgEitnF>w~!>vMt6BQSDl^cc?JsmUNl@m2#l5fH4`xpT<1In+uIxH zQ0^sfJ!j6E=l|0v|3~vi%}1i!`+FDQrsP5yX$qmO!GLeL3p?ojc&}*MdsrK|OI}=C z_%-r>SrLMecqZaCgc8lAMD1gNHOqIBksMXZ1&YIIK-nvKt_(yNn(;ljUsU+774aV* z46Y{L^BTAxnecDP{DzN+21f^*V|A(valda4BMMIpEYLlfKB5ex6%%9Wvnkf1V!$omeYVt*Ij|G$(1OGuCi5`eSS z;>BpOV?V$Cm-uL!yNc4laOU5ZQe^PKDK*g3v&Fm|`d_u!S><^WVoM+V_6qGX(Ve1u z9R@ix0;WZAM9xvK;1MJJ;N@MxYo;}6g`{Uo^8Z(z0`49K@oxx=!5jy-3*vmn%(!Fd zB~k(Nbdl?KkTT0Xwk+Vz_Fpw1_}ovd@x_r?DW{a%#bn@huGafM9xMkVu@mPvD2@k3 z;lq3#7*mmN!J~{wpE3#X)7Q92K5G3}YePI9DT%NWII{%$p+eAO^y(D#Ow7La@$i!BItyh{x8o07QeT?ZB_Pqj#7vrD5;M;Ko5$#y&j_U_5V?w}X zF!3Uj|9zr46R$z>2P|+)OXSvJ>ir~zK8|>vXyU`i^M*XK1~ zOb><~=Npt+MjVp{A*y}tc?x|$Ff(>3N2%LEnNyK(4IA_Smpf+|)nA+iz$?9=W#uB^ zPs@sps#JjS84JL_3&q@KO($zzf@WK{c6L9%M(wj>`c88$YMgS@T_)@*0B`L1u9#iq zXz9@o{rXJF>wW)*jA<{;^8wqPwVwNGTV{uOQEioCzOHk8NnI2v@0cLQeEOw!Wtz7k zbqRp#eEvwOm=g~rXieBrF{t0w}KwKq-M5|YRhJLk$04`(i_O&QLn7QK*2Dlp&S ze;3D8<*{QwE78Aw6LWILFR%nhxx&)bQ4ng=Pmh`>;!BgzD(Ooz~0Uw{5DSSqaQ z4e>U!DMzXSZ?WapA_e# zC5Zr3gQ4xujTDif6OMoqB-y&0`zx(tF zp)Rd3rDsr#*OvdV_a=G+o!zXE6&3waOfv)r$zhQ7ekOks{CT9UBtKYk`JT~nKeE#1 z7=HIwT9td6_lDF%bC)qw)M|@bYo@rG#w{V|rC!5kwFkVW!mySxm;>p4_W#VQ zzDq6klz-p)2HpTO`I9wvb*Ny*G_hBj@J~J>Vg^8$wbYYwl4GoZPBb;Hj5hT>emc|w zcQ}IPM z#@xiR(-YeJ^AEqqx@MGGO{8I8stZ2Ew(f4r4&Ogm06v2HRIMo$qd76u`6hIt$}S-b5bYg0g zb#6lEE?kKREWG9y`v^GnYEJ^1Z7LaNAecm`aph?8bA|So@dznLL8}+AHiL_XJ$tIk zc@b~amu~Cy9wp!qzrLT-`{J&f_<_;CE!*FcARlCr^{w;pt3rY*i{Ic@>RM$=o7xX< z^AyIWL$x>j8tT2z@n$}3Rd!Oi%y(kCx)mp7+UOuehVGN)Z{L4#q}r$#8>7; z_7)%y5>U396Ye-EdoqXb|Li6IhK0Wcgb~*|PtG_MzkwD|j&Oq077DPde&Ex}L+rtU zA1iC>5c}$08*{2aZPSjnacxD@W;z}#H#^$6I& zpLm^R$M%Bt=+WdAPk8p& z&lQVmWD?H%K&gZ{Z`uV5$j#F6zMf0xHJfE%bGu3-^vCH>sw?De=Lmj+IBan$_trq8VPiTGCQPXlUjONafm z4}-<6BX@f!`_R)y!w(a2l~7^bZS%`yCf5=7Q6x*pu=3M_dBBct`f3gJ zRNs!7On8~g<40q$I+uhH)2%L1^T^|ks#>fSE~qQ}d%x`Gy_v)}B7tfa0oB^7K`$cw zBBSqODsEVuk$?qPJlkAMQ{3@5QMu{j{NOHylIi{vzj=rJWZn*c3%uv@we6{l&h!WmFv{pjKm=>JqlZQ9n z%vSml9xP}w98LKIbf*$dUSggnNV@Nq-?#o=$Dd31P&o7_`kh~6brq5%zh)hwL!Q;Dg&5Ve11y;paz&tQ6%LB6Z6O8qVb#gL{az$F_gfY1 zO#TM+3RJ2pzb^vVs&YoP#&6~@bACMh?l94059JK|oaI!&oi}g`k>~m6W2NJdK;A%g za(ct^#|fgoTP_^|Sg#SDS)o=bt+xjoEIdDu(VuP%PZIDloJkk`ZpM~`R$=Xfid$3( z7@OVAZClGYg8EoU4x;{h;AX6ui1p6cPzx@n(qSU5GWd@%EnekfZ#=6RxK{gZ=Je%m zi%@8R$+VDlK`#eftkpE!&!j?fQJXHWlHdMgvCneu8(gUnlh~sE+ znl~sVTw4wdP8G}5WSi2kzZ}T_K9^i)(xE)Kp#g&iSEQ{9LP4EY=>~W0A=e0qNq4P79?zRGH~BhOEkGT)+)5|jdW(sQzSKCRi;T@=In!NjH&cQEuR(p5 zKCguVexg;g(*EZiQI!@}{t$C}i!Z%`I9+QpmLE$oo9Z!qhW*cTb|t?0MI{TEnHq~V zzy6X{sNY9v2IZjn6WimHJ2}>$I~{_Qt{Q)CxCpjc6%4IiI%%C z!FVeE6IrufX*0+XbjWng7t?l(w|AVWZlxGw{-?pee_btvG&?cpJ<(_v_>6J@fxu9i zg+iLJ)hpmoYE%mNg45>U(c;3@0Bh2ppsnD%sMV_osGB6;RRN>-1&Y}W=MYtHg?StM z7GJ0}@>0beVj1gY^!_~|L%tiOnJVb?<6Ow#lnD%O@7GdR7M#+IS+MuSuxEs4 z_|uuzTgnTz2kCX}Gk4E!)@*SEA7{7U%5(X2+Q4J)yE)V9Rkz%?5E?Gvl^-pBXI!WB z6J89b^60i{p!MS~&vq(5)J+q|`ZUzv zH>WRE_@81V9!o?$km%#$p^&3ptM5MBlTV5E#;_zEJeP7QX)>SrVYo)FP3~(%YeWtuy`30KvEoc|2hi=bfn}km7T7~^mh0L~ME^QEi$#p=nwraytrploj zo>%IYSjOdIuhkY*2f;jn6&Ki3k)xXHJ>iM^QQ4^yAjfL~kyna@-dRding57V-ekj>GO-l>mdPYBNs#j3oXntPfhcY_ON zVKBVj-0V2s&4PSE&_0!xiR`7;W2^i?D41TW!9!+k3lw?rwXJAQP9xlI3@lptCSgnG^D z^r6@QYyso*r$6R>U5+b#FJd#h&Eu_Z=J(&qi%K`c>bQv9bSaZCEaRE--MYO-anDim z#Fd>SB=|ROruqS+liHV4_aNS<&E>l!Eg8SzKQ7wr&3%03TI?H>+`RmpcJI5^Ldik% zm9mZliu2u?Xe{{wQEl??5;2jUc>0Ul^soR|;&@O@X##DmEkN2(pmo;(@|naYx$Aj^ zy=2UC*W|%gdRAMef~urS<6@KBa?1j;XTHv&s#{;`YvXu_?rwo7SMLl)!RGuQyEp|W zzvZRLl-2r80BTIuDYv1fb9|`@Q_2>7^ce(uzmWhmx;7M_3eALcblq#r`6>XFP2QB6v z9{o|)P#Qyb2l7rU*-s^yhl0tOcU!uJ3-lkgj$SK$0%aI_0b+-NQ+<>Y*ZR!rM~lLe zg1JLXdiWo=ajgq&62tjj2iAVc0*rxE98A$uCJH?QP>t1jx6!Q1bO%q;n z3h*1z&!Gu%rO4_iP=St1V<34Y!D3h++S9}Sp`0H}sRNa1KR8vz+pZ%*Z`E+2Mbt_{v9^xXh=bm9|s*!cjO&3Pny#d@*2JCh;IjP=Qt-#LB zjRyQ??;-=H<3CtvVwJ{`yZ1To~-ZT_T$~lv@?!#5PvTj(h9aa4s2?*?p+Is9g8oiy~V% z9mLvib`c0FsIqfpVKKXt!>nn@6D*wzkIn0HilvZhoROy)iE+TFc2 z)TQ~0V}AcScUZKc@NLk5H(SYUBnXAF?j7Gq71|=v^HnsF$rONR-itcuKDc=c5j9b9 zvFDQ@;X;n5cK8XBT2&MWVgH#?KN7>&lWZ%D_ zbdj^|`fS7xR;?%UA)vrfKvuhK+PMm;VY>4L4AKU{A9L3?H{}ZLkmgD>tu`UA!e7=O zBk*e1bRkktuSHKP$GNO}f&BQ`bI;RJA8E&HwDr89AyB!Y74=j+XmlVAJ9Y=pFTntV zr?IDtH4%K%?zu%U$Od<$>M#htyPl6>mW>`8ytUEk+Dh>YW}|Ak_Up4*V#_sw``6#o zkZADek%4KDdV^dCUdDe;)_G?(cLS>9s);PM6dy~1c@Eb0in?Ht?ylK#TX$S99ugFT z3a;52ul;kre~@u1I&o+2a~%1;zkiT-V>O=9=T(&o^P5hj&(>hosUlFNsoEfCXFMHy zMNse+ZBnB;Ke(VtYs+1Qby1sm?{!g{xi`mkZiEND4#}9uCE)3LX;Hb=%vB-p4?yZO z6~yrZ>#}a%zN%K|(DrvDTQ=L+Rpa`1T22wDL?~GkN^rYW?ldoWV%VA!a7B=4 z${~(7-Qw%W2LvU$w@c=*!A`!z0V;u~wE`$~Tz_6yj88P1*{r;d&QTu{I-YZ^9F-Ek z)E#>X69%hLqP1wA_aXwJ&T}Vq!F4uapSK9?TWtgo>4SALAn;5JVNp*SP#ckY#Sa}% zui#I7+Qw&&;dWZ7{e35X;l0)C@Hs@{V&1NKe5{Vd_GFa~sQqlFKoR;}&TG(B8dq&Q z_2?L1aL@BJW^BUydx#_x;V0_pt9E3J(38p!!L62s?pHD;e!&Tc`=#<$+=Np&S9co< zIv0LAfo@IEFic4pg5FMOt94GY7{&2kq-0D`7j2C%l8eM5>BTh*7nPkua8fd4G}fA{ zRZkLAv&nJ=Nd;d{eq}|Htl79^|K)L3=}CICdx-KWFkoX1i<)Z*^84BmZH*2<-HI3Z z-85_4Z)UBKwhu6dTDefSSr*D!BWbf*lMl;%8HO9PxR=*v=SQ7#hlhVIzj1Zb zd>fr+GgiuY7($qp6E8b0^s{(!+K|_EPzoi~ggo|KX;vHh$Y`EPE%5BidvE^@R;WJ6 zB{ui3LJzsMCU#G@vVh7PO57$3K8``T{KkY;-pzQz&*3HH(G9Y`YeIsFjpl+h!{cQB zlHwLD7OoTZ6QPQWANhrnc978t=jTo=W@{G8LYF54<1}M}V95sB8|t%Bo?XcsH(~xT zp_vM&IN6cXrmed&>mV!G<5y3k6EA4y#2lyYarvC? zoXg#qDOoOZ3yG(ml68i~`~?t8QyKFnCF^{UZImwy3it_ zWEXspH0|EcC@vcsrsxSNkxoQs55wU%ztgWf%>)(%A01;v?E0XIoF=}fdQH{qL*#DI z9sDTS0<&8p(;nXb@aG^q_h3S{9Tc|PctAff@>K${8HC%f9$`(vL@fg3Ev0}E5$?*s zRq-h^3cAa%beZx{SP7Y%O>88p?M}=~0PT`l9c6(0q;}W9lPk1l@eI>#7Ot&0*QP0^ zn+~$HqV}nVnL%0?TK;G=1?`IalIGg1S8}O@OQ_hRnsq-0`EGE|TjUQIbUBwaQHkQGvJv0h%zA4k$XU;Yp;s%jkTfRk@j$C!D;5Q zk%(=wYG$ZXW3!xws|%`Fw>c*qE_R*z7UCw7KCi*PAXBnz={Uem5+{Mrfg=jv(fcPA z67I0R6d?@jC}iXfRnve~2Tllej&L@3Ap^u*KH~lh-+!&du&TG;980)pBejyaVoXDh z5qkCR3pQDqSPtE;CJe2hyv0`KcubkUOU!tf$23q!)pQBi=iXmnmU4_uF4KRSxifAS zNXa0SysrH|5s%u$wLI`R0b^&ZL}c5oK2CJu{I%Zx9hJi!n-d}5i3rd*{W2*=XvVR48f$yTOs~ITEBP0~?bf{0V zGRWEFW(PsP%;4Fqho?ed`+70$pRWJK^ryTIvl>s1E?;SLqn@RE&*}cUXiq2gZq;7c zx81+Lx7)8l6?1&M3Rnc-gB!dis1bYQOjzo6r*Q^voKEkR*o%;7g_)AS-h=Iyb+ z6oHpE^P%^w^LRnz7x~#o2w_6fYQQm?oYy7lU*!9@29^7_j=g5i8MlIOaFCzPbkzQO zx%lPHm4kre`!@UL*CkKM?pR>rIuPQSjs2}rtDvn)1(_>PMoZ0zkv~yH!B{0ws97-u zF$%cL{#ZrNd}|r|+=Fgzme~4qQu8mJ47frTGLqi>p7cT|r2SB>H?-^}7kM)SGJ zi*%aymWbH)^&qO3qHOnwB*`EhN&lE@f?4;_JMUJ_Nwt-Mv^VfNJdn+L?yu=ed9THT zjClan&V|0UcBQvJ?&5@+>RtX}W@)X}Ori0!z|;BMzUsefy!mEjmI zq}NsgqFQza^1iwb_wkWjfAI3j8EFDLiLE;u-CyAiSYi&~@#%*7L zmD=MVu?ys!b)US(xp(*u=WCy+H(Jg+d|-2r1svOE8FA<+%3&Rd%oFcgcl7|}y7gN2 zzz?BzNXJ`y^jyfP%LJHgxI%I8@b=lf{C}~$EH^yB@JLq(5U`4PuRW%f53UbBSugOo z)HC@nF!s}B(np!Hjuv*R6F}mS3ZyKH6zkVkjycSFNa=`yc=or#0K#~wFPP)&y6<1tFG(yKK*tR)mCz|^`=^;EG@{?JJAtO;@HlRO!;jm zD2Q|Zq`?fAZ6bg=AYQX_s@A#=TVreTSh`0PAVQbvROc-S&d@mtY>dPmYBe{&AGG%s zM1W~ZAg4ZFzT7SxWsc}e$@5+xGKfI_DT3~Aj_H8(G+5P~;@@3ekcKo>(2S25=6FfE zPb&DGbwxOcDpSk^GRFQWV8~!UXM+RrJv|`zY^&g_uryKeM+`$EPDNA$^T?H->C~?o zgSAtRX(UH-z$+Ez6N=^Z`H0ssUl%oPJfs(B!?)^Y1~!!GH*DSwu>R%$ARcS=$Jp^e z(T!HjA^r-z_&_RA3Z)LFtGBVsH@(D#PgYwnvp2HU0Qm>?C9^gUB{_8Hz=rEx&G;#$9OA*?RG9E?y5 zf$|%O=7Vg(c%ldh3~K<%czHk!_OQUe4t2^Y;=d(;Kn;*MIdfP35~ju{06?+>jemC>V~YHYjM|`SgYE3N2f24$zsp!` z5;FH0#ecs&k7tncHO&ll$y)ABnOPyYj~a%lKi^UMZPMlu^^-p7u^jIOB2N!B^G&M; z!Ei)Id(6};#%z~P!u1X@vJEunQ-Ypz5R6%*cCkY9KUjccv%y2+UL!;rNi8G@lv9-? zQW{4fnWqF+euGovFHc9*{Yd+4jO#pM9jF>sX(eWNC9o+M$H$h3Ou5tDMt(?;ccQhbHPSug`1nf0#z!Z_d>BMz zYevF3fWP%rg$QjnARYVtt2WWE+LF5M~x2~XP}dRf-`Uf4j@ z_VaF{yyMvt`-Se7Q!srxY9hMpztQL)jlq62|BPxDQfjclq)DNAWKLWgn%~K3dv3j5jF#l`CAh%oDW(G>u!a zSTIYI3-S(zS|LFTMk7G%9D)BUp<@{$TkePI2(}(Qe{i(&@G+P^Sq3_cax6E{LDtrD zX@69kY4j<9!N4z*yXD(;Grl48B(NJiUiBB)!Jc6_@3#;LRG?wu?7M;8qn_Fg z{oy(3%#hZ{(uWMr5!l033%#1f6oa&G7F8B_34q15TpJL#3(Z>^0aE(f@&Robu*QhL zI&qEwwl(MJ7>QXPFxpjB;7VpG!D!9o)dJ#3U$Ji`>BuKcH+Us=NM8w{!PHBIVCzjm zX6Iq}RcyHMM*A&u)c&weKNuXkV^nFA03xj(R3rK(!M+FI(}Mf~QOepfC5~TU(lJk6xOiyt{Wi`?oPY`3j*5IcYL=ZYjrJb{gqd z&1}KcOdo7>tnb@hX3d!s${r#zyHAfl`pNUNJ9loEx6&RXnB*Ekv1-3R-&*k|Tw?q_m`f81KvO5rjp7%3GOLPif2zDD8NB?Z+3`w@Tg#+@xf9{TKJ7`E zdo*M#SqG8We%@xb!!PfCX4LYJ35#9;M&zx2+am&)L-VyCUsZ^d-!t*sN~HB_OWoOK zzU7M>!?vp1#cpuMY|OBo>pId&a(_;q7qC&zM8wpN?T)|Fh_q;%Wq}nQm?-r`C7{qn zl&HkGyBC%TgcBOQ&)?*^e-1`Hel0z;)jG92iL$lr+?eAC8oeVDXa9zedJq+NxoaTIn@EsY06Upt;~I@?{-+0<+|PbhgsUnm_Yzg|)J?gDX1srA zNZq+eK%(ur_jJJ2U)-WcR!_4qU}{oge4v2zzy5fyT(%EX`q`-so5+H~}*=wlB1(&-zBD1pZ3R6Xf!@eh^r5~9;YG~ZrH;{X~4ad-v=ou;7m znJ-?eI26E3Jcjs9tLp)%*14kjM!^_iqiDGvX_2dvQbdZtbiEi;`^jfWpNFvzXhh)j z4nFeR3=Igy=rup`MNSCy8{qkBO8JF|(kmIaLP6zA2ukCk?wA|qKYkH$!l8wTF4Vf= zTp!r$gr@v*Cnx@u%COufAw!db30D>W>5HL2#$DgxDGw7` zX$8h`C@?yA3EqQt#-_Wq@)Azx2+JUbI}rp|eeedZI^{C{UwgXmXEQ0Y=#u&8!+sL< zt1gJ3qdN5}Ts4mobrdSQ1N>wIvO;ic5spPQ2&k^Zt+(T3QSWo_UdjX)^=Q^Id}&0x z)U<(A0X@2kPWUMu9k?|w;W9ZQFQda03rE*mfFfawU4l|}_oen`NU(e;z-_sAQ-Fb( zF+1yC^LP0^vaTbKTYUQE@a`&h1>p4YN-t)rVw*XK#e(Vy(A>-L5;A0Y0sHMVEt4n+ z3FI{?eHAlTJ8tchR^2Rv5YCfcB`L)Ru6Y3Y`YZQJ;c3kD-h%ZP!>gi z1{6|y8$Iejzh+Kj=wcg8hOL4?O_S04lB-glOsne>o|W@M1v>T=3BM*Pg36W$Q_hw7 z0HtK@*f(ZXx9>wKDF1E=Tf%oGy*$N7MbT}D zqyo^1h^my%quHHq6Bw|V4s|L2Bll0sB6ezS(HBF`8GRzZhbm^f)@+~j?0oER>l7xr zz-qbJuIKL*;T0zwiKAP8Td0nRGpd#W9*l}IG7d*k)IzT4`Bg@y>1*I|h&7`?cq>^* z_yIBd7dRP}1g(93=PawhvnTm<@qtnXTf7))UkWelW?QB+l`p|#oxZe)B}$%wgI}Ir zz|Akj*wdz;4Px%d-fth13@Y|1q4{!jYMv=F*8ZbKI1~iGX zjAJyWMlX8=VaE*#i?NUmjWyM4PCk;oK*c5~XsT6<^E!!{=GMt%8E^cffMcOK(~3Lc zZ&)5m^cXrlEu+voxm!eD1eMEpV!Pm55oMS(J&yANVQs{=m#~>BxG=g$J^?P;X4VFyj}ha}q>C zjtYy7+J4jD8nP%>dvJUT+rGQU9g{FPbBEwqV)@}i-4T%net-FD;7s{3vz4K|L+*u4 zQKFi~<^p+>QFB8paOgA=ld3far2aC`BgemkAMrM%U^TLaL&)+ZWFAXm2Mcc?<0Tc! zE$-aQ`>Hp5OilDZ)=%{Sq-oZLhwmSAo+rMdZTCo#KI zV^X5azVDMV5H-76n2*)ko@=OgaBmpomYX1(3T?08+M7Fn)%rJbZkMf>?T*1I)ywJ9b{ z!74AtirIGa@+mfY2#p8OM#?$~4R@mM(7v7yM@R zwP({$(x%blmn;8lVco`?p9+&+Tm1yLU%46*iTJ^O=XVQ4VacOCAi%w~E7 zq`|#6;jPYvwJ$K)*LM|?DxS27Se1>JpHQUt&SUH}2nkQ}lvZa&Jck$>c>K&Ahg++n zGf{7vF~5Upr95Jjc^FUC+?yVisUH}JS}FtoI-i+YX;5!p*}Ti&=d*4JUA z$>CJXQ2QxAuKJemHjtw|rmOSsdn>p5n15aL2%-*S&1sgh{It(}Pdh*OFeABP@dZ5+ zSLoBvSh-dNUmgi1kaJ()qMbA}d(z1T#BSCB_8sp;EcaCpJN-Vd;i{h(zEnNgr*)Rc zsq2z`O6r?7YkJ!}S^7rc6j6}g3qbSUcHVQjXi|ot&Tz61%G}I; z?75m;3YTNE&<-@bt4U*5qHgF-UXpR*U|yae(HJW~J>?tzY9#Zp0t~$lX1Cx#56`O5gng)#p3N{E$$NBV4VfK736?{|SzN&*QcgUv4T`6H z^A5ghxCm?J%@J$cQD`WeK3iz=JbhYkS*Y;ou1;ZEXiFnib?nLEkJW%~il;1I`>fHLYI*?*A^saC}ky-lMci1SJ?Lm9HN3(8b63zFa zm0rX9FJ|=(Q$s}ORC0>J7(hkHGf^4rVHf(QT3ilR+oX0c_p#fCDodrn{R%toS28+Tu~uub{H*AsIC!3!mkK>zCiZ z+W-18TYp_SkJs$$%S$ABxoRnVEw9P4z2yUW!XDo+ZpDDjMsw$epgVM$ldx>(=ezQA zAboCYV0&sJLy2pbTO;|Jd0j(snrM%ckU_GK3Fd?BUTF|Xn=o4Epz}&w#QW#yAI*p% zT{oVGtnFW(v0WQY+!0_@=ts2?aPZ>A$XmG}N|~5Veghk>>8(ju*<-O5oOE{bWFO8O zJ}5z(gmTBz>{_lsYhY_G@Z-u=QXm~Im)^0@nNIsU4TG18vP z>}*bvIk;f<<$)XX&H$nFWkRB$5|_R1751}kT=R*p{ukQ<-`rTR!Q6pykz-9oVv zBX_E)1o%G!Ia|!Qt+5KjxWf4z(9qhTGk8 zw;!)+{K)_QPr(hdq&USmXe5^)B)G_Ype8WOT$R823}Iq@Yf@8D(RogXp{UudSaD9# zVn6vZ8QJ~oSFa-e6dMh*emXm=dgk)Ha&uS5%?qV3KJMPlyy1S&tuNue^UYm!)p+Yt z2R34!{IxKW-ESvEV?N=$-Zxtkrn}3)Ce4khq+8?nyQ_?e(X^t(G0stRu<*6W=+-nWM&8iTD;}( zWA|Y#=(=?K;p@YQ5tbj5vb=h^@t_M!laCH@M*gb%ljgBBQut&ugBAtgC?#Limvn!X z_w;qw$Qr#=fu^(H(Hy{)DF7Cfia~o@>IZ%Xp31JzRaHrtINfDxV^l92h?TI@p>(S1^_MWsAO6 zdr7MHO(jb^T^yG=G9hvqJ>PCFw4W0!B(J59>wQ=26leDtf1Do%AW zbr7bV?FlzdRU_uhBP*?AGJ$s+5xUC-V1^!`^M>O*lm<5*5{j+g))FqrUBNt<8@rno zT7REevYyHP&&BF!(@srlcyEWU#haV{N4sIcnC&y`v!5%x@n1eiGtOElKCds69=@#c zoD-X(xzblqvbhvL*iUXsQNMtoQz@BkxC}jCG%%$((?+;2eZVZBElrU)`-6)8!6?%H9C-F+`G6ziyS4CZAYKzC3FZ8Xt(D!^k z7i819*f7+%;51aaN58_o6~C|_F?VTbksEo<4>jIKR@+d+Pu&Y`kxzJiilH%UO%paz zd4yL!+?l-d4u1(^+|E^;*$)+@F#}pBEHF3u054xnii2BWUsO58jBm(%hFQ zU&jS#Mo3P%{<*Ak(b=?4@!PrewdVA1(H4e%kq1*|dZ@s8APR9WP7caRsb0UU^w$;2-l*FY2)c@8_ew2JfYL*_YSO*$Dp(b;p?>BXy{bOymFd8FZHc{dab4((8TyPN z`|dUgYnc~ni)QBe5mX0_f|eDUH#vJ6P=-uiJvm^$`*ePn9y2Pr88T2EfZAS`dL@FK zZQ{PZdAR}jAPF~yQCvAluiIf6UnvgUR`AyMl+SotPDhw$aCO{NUqmggyF1Q+jCQ8OyZ8{D#6aAm!= z_q<;qO(cni{pI27>t&)_B$)d1IRqzT*vFW64vnh~u73U0>r<%?>=$Zl2;h-)>p!#C;L_01d8Kyv9ke+@nBE%w@YE)u3WmAY^o%8@CFDQ!QiY%3sW>9nu5)jPk;utFJ0!RMnU7c&FI%S^xEVDbw@83ZgAl8@|y{+=UPPQv(^p|9@<~1z1&S_dYC$0#b_7 zEl78Fh)8!!NsAzj^f{EE0@7U~9ZE{4#GyN+ySp3y&tb-y_nY5$U3RCNU=cXEc^71c-*e2oyS!!FaZ~12LZz5IbUCTp>X>uS|0I*-VrjR3u-Ih%p@mYG ze^8RJK{7+6rWuPqlS?V_{Vi;H%^D?A0`56;*CKnmIQG(NtEqz+#%oX2ic6c-U~jS& zrkInNdfZIc-rR<8L|#XA1- zm)do!eOetcv{ZxjTNd-L`xg2yusW*4yP*8_N+VEZTGRyXa8-m zTW{n@^1Cu)*8fHG`SazOg`0@4v(@a-ff)O5r3X16{-6BB&ZAQP5RI zAvf^HqZ=xJ-)cv5VMS4Lsu$)|WWr+iU1S#qVgEKC!C^#Ej8Olf-RTUaiG#M z!fE6lH_7jc%~#CmG;^w4i|q%`qry2Y*bjs+R25eu>u<&_JIZI&+}F5r_TK8Xhtib< zB^~EodcH1T)Z>-io~(P;*eLe$iK%%{xE{S`!81ITL@qe8@iGH0C27sS@& zzZDTP6e30p<-@Ps%Vaa&a^1TxWHzsSV;_}_hrdCsTHRfZp(C?quP#^nbYztN_`R=L zxH>@;vw;Nj1_rX9+Wi`n_E-g6MG{7L_2rch^=Q*{SSsQK?5A6&o7(L0Gl>CV zUkb$~6=d>$h^Jpj22<%Qqi5DEwug5NaOo@#w>NMhTcWRjx%{v)~J7OMA+KRfosC{_U`@5306AywI}Wu+f6+f`?-a+Hv04X>*G3?c6;uz70V%9LHhcm5H6Kk zvv;U9c2mvw^4Ku7yP`F-_rp<3X3}L-8j0`gV}%}fPlw+m@iTw;9L;GsB0pf#9?T== z z-xW0(?}aKxFblm88^H^cj(FCjfK4q&b-WT*tN+rAD205t$8tD$?xKb;`TI0f{Atz4 zyz(Z|q+KWTB)dB1qlqK7)(iiwi*%>*RJX^o(UA<=zcqG7E!DU=j`Mi7BYE_5sYY!k zIqk&W1|hG@L5qA`0Se4c-35uD@O6v`2lW!K{EG(GuA_0Sm zsD|P(pv>2 ze;uNU6G9pFvn<7lEa`3PPSe_=c>QWRK#{vRHM7^>C4%V9?bvnN#$OxA+MSwAOCGJ8PSal!~M+Vl?2Cv|pxsjqXr03ccR6Sb);Q zN~no8GGnJ*PA_JKbI!&ma&;(5V0L>dOZiVRFWS~O$nkL>Di;*K-uCblWk|Asw0{!Y zlDi6vTZa}tC zuvg|D*KxuyRQ`2#3g3=RM9Aux=PViBEAn#3ccV%m8vW*5+A66EuYtl^X35^C$25^+ zfun8x5#}c6lcwx8zsTvrMPj7M>{RPs7>+sbeyqvk;e4>vZE?)YY)~xqLV3c@}rWjy58ieu-cqLh$sS2->;UPyETK?7nof1YEt(xiD`*=tN|%m zS**lP_=l+_$Asgqk4V70n0#?ZM0_bTKNbV*=FX$toRj&ax$aQ5l9JZ}h4xXoouSuB z-A0bGxLuK&(=M9H1$*&~$vooxWiFD;232oxODAfLBhnQT6!vi#jix^-CQ{|J=h@{c zn)Kn3q`TG<7P~gl=Nu1MQVU+T8etlmTx`{D*?TX3w)=g3K(4bXIN%ALYMku`^}8zc{niPxw(LeBF4`9_G1i_H z)F0Kka*5vk<;iHo1`8V=ec}I(5m#3=ec}%m5U9n7>#!I@UZc|W3`GQ%>)A(VJ~g!8HN1iLLXk;7_Gjr zIMrY}czb7lOHYd}Y=oyK|2Vi1>Cq8==mmjvoM5G(K|!ruL9LS5{&JULO5!!_>1=Ep zrT=C9%{2p{6^sEGcqizbV+vvj`+(a*7Ub}faJ@!>JCiO5-pE*_hbM?!*@&p2qFs-7 z+a=e@@Kn8jwU{2;=qw)Si<`^`Wwcf>>*hyZvO>Rm@EPTLu-T@6g8ndZRc&st$&|`s zsbhy&(QACCP+iUsFLT>(U z4iArMOu?m37@c-mxHkGuT%xPV>hBae{KGS+BmD>-vjb>sxd!fKo#eDjR_dpaiuODn z6s45oq>J_yA>@tjfE(Plk`Bm#XQ(R~8LVwa+tZw7ik?n2!}}xW+kUb2nhB}oPcK+d z=GOeAIbs@R@gz|MHF*oH3?|yD86-sYvuPxwSDtB6yr|o(@+HMdB zTTng@jFJ(5K_5eBhTGJca6s2M?~LLn4E=E~ChW0-!&kNzW4k>yo_h7bUL$}Asm-Bb zg~@akk1Q?$3pfqeoj=}nU7Yg1yzW`AHf}d5s{8D9vbPuM!%|jv^byk5PjN{q^Eg(9GEVoh`G@ z>(bZ9liD?&_$K*^XykguChc2kdiz=6V25W(t%c|3J}bJ2Y!9anGj}k- z7!`RK&F!?xhf;r}>|)N<>U!;X%;+=bV7_;37+NIE9KVnmRxt z0yct}fX_bNYNAZu$Hxc#WzC8A*f}_YI@{B=mcMEY#Yz5iH~iOz7?g_PP|23PL5Cy( z=jeDY%MQSS(QV}cLR0CQ1^TUk1;fB6h0ZG(&lDLh# zu@r@l-MDK*&{A0F^b6jiW(4wxys)HaYke0w-fTB>0tIb7 zR{3%>|`D3(E9hC=2NMHLsPjcou z&I^Y|8p$r12#2sMQZoH|Mz%rk1%HfHdV}Nh7t^`QD)&A6#eB9Wy(_xk&D8WP+O^z# z)2*@9K(s-UD%(_?n6DjOsE(hiagv;VJ1pL+kivo?ylqL32n{|Fc~k)`2cuuyJN)EH z$X}u`YbB-jUdb;m=+CrmeSQ7ddh^-!|V2v|s)WXefc z6Y@!Wv4n$3ACIcZZ;%S}`k0dsa#+|fYfxg8T65Tuz-}}^%;)i@esiLBSc5}QSY$qr zC0#E|Kb%4pmrn1;=hE%p;`_LaZzDcAkg{a=`37Ju_11^9*TWRy29fDbBPF}#-rG3- zOSCgrE0H*U^PEf1!^p`RCmo31S-_BIMtoNSs}#U#=yjZL%o7XFjRPES+2MLPTkSTM&IH1zYT5hW7Q2!R zWTJCuukm^lY`aFVR`-hF3S@tg#p+kZr$jFrBR9_hwFgU{8gL|_j%-A)EIUkiE5_7* zFHTH-*=<6tR4iZhNh3bXB6o=tFcb zZXhcPXQN!(x02>RhszsA4PH}H{|on>TAI-U19)Cy8^}jo1nI%yd=ASnjUhbjOzDK_ z?6Z|R61Zbzf3*LMPA%O}FoK}~6-Mp)Ir2Bb%1TT_KE-QH#{;pF5YyHZd<80V>ZLn@Z3wi>|6GF12qF0UO7}yCLB{<{Qelrb zW=qvh8`N~F*XGU7see=fgFZp~#J_tYVeDv-=8JG~?z2vfa4enoPw=l)VOY>~!Fz&| zuLpZXy7S!w%ywkvJSrDg+c`oQ^X=n|_ALU>TUW{W4Kvdf;&kiq*_DrpPhsgTX=mCT z=Al+`;x@;zqSpU;OQdGm;Ii%HHO|rp5`=ARWIcs0WOS#DAbN;!^oMF4AHgDB8HAeZIlSS7?*9%i@np3ynNA z`%dp8xny8{7+1O`JcNJb3+on*1PfS(gUN^+Xu%QK;v>F5Hk1){S+ncRXM0+Kt^y6H zqs*8j`o5STU6KLKS`H@{L!|uwT}pwxeY+A{Ip_5uy7m+iP+&CB{9+_mT^11AG21y6 zRY&Fww0&l}$qmDMn=X{VkkX}FBVBiS>`rjAfW5}!=z~roAP$E>?)odT%N4E-Ey%hq zB>C@q1dpcc)Z09y;!AEf-KW?1me1Fkx$;%{B_@Rb>Ddz>D3s$68quz$0We4Km{w@F=K z|Lj5gh0RE$$Z~MR2i3ahD2}=)LiHO9v<-b zLn#j7F!}A(jO4A<+0y+Chfbxrt6`?vxpy&hnHFrC(7NG`Jcf>%Vx7L~AfK-GqFRxbJ#d|N#TpjsV%pIF~N^3Xh z^guenp#*ZJTZ_T2B@t~Fv^<1rx7DfAaWApq#wkT1^HSQl4m0K+?!gOEf%0pLm7e5q zkrBoihHJd0_U0_#84a80arr8#qjM6Iv8wN7xmYQuC9^*hqxF!{AbfWpX^9>zy-XoE zJL?>wlxmerpqWufHXcAmo{74q%#>LwpOBuhspHvu#KT$+!>YEX6Xs%wy?bj1t7*{YN+Dvo5yny4G<-lINS0KL7yyiR1f-4lS-o1P|;0OWiGBE!YzrprQHH{9> zk7#fK1;W1c^fWZBW5vG6rlZN2&K71K9q3U06mE+ z!rw7U`ARrUd8YU0qXx#5C)J=9ZKzAFF;oUgsKsNh|WfL|K+BNZaq8wPfUwn#a8|+<< z-l-#@jbToVQjWOcKr+ai5KVUD&w9e5-GjeDr;-_6Gzf{RA67D`vWz%0z@TIiJ}?oczMXXrGFsLu%5P+0C#azuCaP56DM zw^DPLmFe17;~(wUtIFQd-rE#q9iq^iuPOezoCZN>8$czI*DoI8&J1 zg~UzY`>p*YXS3Ei&q3#)L??#}g~D{jgjZ!d0wx&q|JZ{KO-Pr5VK*RCZ3*!F!3lxj z%^`_}+|+!0eT9@GVF%5|U$s8AFWjrl@dECCY#eaDR}RKz+4^RyQ!TkppPTkUVqFjJ z=@~0Y;^Lqp1hyFA_AzRg6*I@9OLrLwK&~u$0~yPU3OR74)jq)vzvITV-X^E z|C9F?k5exTW8b=!+?kFddfx^N)^3asYINbUF=b@2G>FTL+JSHQ|M73|$$=GCr@cz&M6$rhH8dIH24SA&Q{@%CB>nS1#{nPSKEgjZj5WRqHi#) z^j@*h_e(8UPuEa%;`V&uoEApJ+Y-NU4s$S9h?@lgbeWhCqPQ{c#}7kz-qg1y-H*qK z+G|^zGfoX6{a|wN<1YK#^-0N35`@r&Wu~~Ft0z(mTa>o_*JllE_Um2u9($Zjd!Bvu z6qyhxq7F@fQ9V3~(5rUS_CMYMl`vP{=p-i*y0x1unp|Dl-B={QDbF6@;&f%w$l#ml z=6Tz&1j44At+ZKI1ptg%7Gl3Xvk%)D$o1sOxzf0MAO-p8#~W30zwYu}U|_psn%!_Q z2AkLsMo4UP5>ucb1F*3$f16?U`XIYC8Lv|V4BL*oAzgS(p*mG%mb9O+pK-E3idRf7 zP*NFrH)T9-Gx+7|C3SB14ssF2jr3sHDT2jdAu~ zwC2Sy^~L33gN)mk2bgrserfc`fjshrLR$JjcofQ9;gKXfK;Oz9;0>&PWGhb{LNd=t zulqa2y{vOJ)f0&a%jXyK=Li6X^wz8CK;qc=60JN0W9vP9Bf(RH)&^sprA~-Xd1TOI zQEx07W!Cr{!-<90DtdI7;s4*!RY$n@pfVV<-_CF2@KPZ(^79P=7{#?dTRDB2JV%e z%snFO*Ji|pht4{d!(2t6WWui&ejztsQ3lD|;%t;fIZqr$^1M}xW+V_#d5@y$ZVCsJ z540|HzPq)h#|5TKUF-4()=z%4C>^U;t1Q($tfz0Y`PPvxmE7egIjQw)yt;paC4mUF zfNPua%zo)1<5y?CP0!Ph7F|WShtD%q!cWGsOgi$P4b-g(@cYZN1#UEE+q}BrqHO(- zDo_k1_f7CgMO>URtfLpNHzg`L=~ZmS{hti+_C+e39Ri;F*}T<#KR_f=`?M3w=R-a@p1~BB zLriDF&)nFkufSq6B&C?jFWI8Z>hUmM8mBsU07q=v`w*=BlU3^aGR=8h8x z&~8|Hcphn#LmtCD1WGiax|9+b8wd!AeGCxScHPH|w!W%k<9YnhpPi7~2CKlXwvLc8 zDtDOm`n0U^^G?0OwG0HmU$$;@d|V9*3kPVQ`l@_n@2gsx;2Bp5l?*v%tw`%%qk2m* zdw1_yfWVX9U};!P6K-5*D06VTdHWDWv2DleGlyX;YLScX<$DYFc(5Lhlula~JD#&f zU5)1p)|-vc(y4RSGhP|ue+w4Zc3FYEnEB$9fQsVxgCxF|;b(3X2R~EnVHD#C`(E>% z`L%zXJwe5W-p3LqU9rmWfHPfrF4Y5?YLt z65|&9Dh{pxz+5q5MsWM!NXufTvH+cIsJkWxvflhC$)dy3u^71PACkY230ZwfdiUkh zL5bCO*udP>vi``M@yaRMm9flt6rojgXTUrg2^qP4dyD89jHA+IpttiL$4(&ROnLXD zs>lE5_V1>)`QPh>Zih(|<%=`o$oXP>)>xGIApgu6<6^&d?T8i&PBW!rzvU>F+ ze{ZMyQzH zD>SoTb!)@=K0Z4gWD_T7hgw_|%| zFEAsN4>3B7WKo%R425)c&aap!6u~e?LC$bJatIqzaY&caz1_n{$w(>%mD)f=&1MAN z_s%Ld3m>9W6=eUAlG4I$9C94EDdZAF5$b8;Pan$dBqJTod1%2m$yliMZ4^QaTT?}f z=MniwnkU~%b3p8Be|7GZ<^)=ubyx4_sAPm+oa`j98vIV=bNYqfMt3yNYxwQO@=vdH z>4*=^g6TLeKeyd|0?jXjRN9N9Oh(Is?HO2=229(si&9fKEe564cD$`;N;ULdOX>)3 zT+^RN;bo{*c2FDcKcQ>dvQt`_!U83_r7Gq6#%IRe>_`&pz(GHA%?iZpc|repl=4f8 zuzAoLm{#nGbOe<&V|xVk@UiUc-0SNEE+o9(XMu|m9j2diifViPB+X0A4y-MdR-Uy3 zgdYz2Vz98@fFYPfD*^(g&ukIZqM4?VCXzxGq8N(yK zk319R<(0Slat)hC`spWZnola!%#!-`cBW%ePeLY$>aEMxtoWzH z>VU&}`4d{M<~e7|fmUa?WiNJGr>E0Rf6}|yMd|G8iv)WFZDn?z_5smD890V_hOwHY z!}F#mmCR4b6r9c@f1=w6x!VleQW-T#N zoS#~ZbJKhJPx6m*EYw~wpCgXsd8@vb?`Y3KR7 zo9=Z=bdg>p*V7Xcej~^ai?MMgl}r~gB+0ce$$}v5T8JbAol=@S6Qz*&k0l9*_jaDn zz88md#j>%x9lnX@Hk+g%ov>J?ga z2NBX*aij?5y@`=01_e~XGr7!1L_fnF3E0~$ZR%)Hpw(t{Ytu!a4)Ix^!-F`QKTZ=C zh!93YhnC@SFZ&#}8?hvM01=)%dpCXRxHA#Cvx3KEnxK;?`$@hzSy&q z-(om5fN63al)*m_GQQL$wSF#mE-~wQeW6{hB7xS{pgd^WRMyv0t#G|l z6In3$D!1++tkgnFl08GwasesYWoM9Z7P=JgaoR3giEBtr64Hs+b&;=4QFSfpN#!{{ z6wWG!2*+0hlY{~}H~VJdKRA-|YS7n>y&QQrMrlxLV}(}kb|~yS%;3#qgJ%7F(TzWk zDTow%R}4|@e;eGW$R%!%bH&4LXsCHa?3aE3!CdzHxvk8;ikH4S9 zcbw0}pj_*JS2b8J7-!1p45))$PY1j+>Da`f-n_~rL%mkXlBb!uDg?NQT@j=HNtU2N z8n@3g*^y0^jPuTL?kng{FC`+wfM&s2RhLesY-X<>C7ntJci{Llxk#zZ>-}o<%lDsW zu^6Nk&s;wQCG#89rPRu>c81vIHFXSB?&B7cc0^;;cTQl$kCDR4C7L-DOU?`0!-o^( zzwa?$)Zq+f$W*R$*-gB3S~^LQeDF)ydjAV*i8W!Jx@;@P`J@{yJdaE>Hzp?NeXH=( zUfl{42=m*W{hVS*4}Rmqxz-S7{a(;;MiwSk?VL|kx@I@Ag@(VrkY%K67U|k|Oa@uJ zu7ms|C5@{h@%Va8f$8!T3>2ZgpgVc8{}UtGrA4&nS!6rVFENu}8_J1rTe_o*y!VVF*wqQe@GmvYi?q6|+~%pIzl+%) zfG*8z!#v$q)l5o4(g2U2PALxu9z8ioiIKRxyriqp&Fo@Ey%_c5DK$?rtBKh%zxD+h8WVj;2a|xnL~SewPLtnhSE`r=eIY# zk;ir{rhHB&Siv%N?rE%15u(CM`ogHFdMChX^cZc%D>k^`Rh?fa;768z#+4iG6bf57 z$ODyYG8uuWkejm=wksslaWu2ucxCewkrJ%W15i+?*SN!F{NaN39)UC>)hsSzm*2m! zA&=V>V?d!QRp)V*Zar0Hf3Pe+1{$KmZSHWYy;}95y<|bN=`A82^W@928KW{RC^7UL zJuZD6tw0KBSSslO6HIL?`DnS%!kVoVg2i6V0GUzUJDocHsJnZ6YZKa{YwNL2*YRA) zLT@5Pk~S@th@trvli7twIF0;M(-a01vL_iLKv{%yNS4u>;)7v6+(63yqxpaeY^H|? z3_vD2YZSfGS1&4=$hNL78dcvXkjIUG9EN5A#C+C&^<_l>T*nBy?n{Y^dV$o$0YL@m z);L$=+f9&vEary9Q&z+L*?ZO|JPL_0H4S`I@0 ze6Ha!QY_7%FB6@729L&6lvz*F0|t%9j2#q&Fuxl(`5-9t}AIkA6!iGX93#u?|JtXdkV zO*uW`%UyBpTDPBZNjwO*X;uHTaDSN(g1GSIgEz0>Fz@;3WG5h~l0vUeU+J4DsH{?7qv zet>`rK+@Zwvz@<}4#sWfIw;9pnr-x*-`QaSjZ8tlE7Sm9*8_MA9(Q%Y2<-E3GfUvZ z6cr?IpogcDg3IgV5eU<#FlaJ2QXT@9|1i)+upLx4ECPGbXBU(oxEpNcJo>Z#AT!11 z&H>fmF)}P{aH|b)lK~&f{#sVbgXxF>1Sp^iisQ5h2bj489Ou%)F_ZtZqN0d(@?PVA z&%=uW9^}_S^Ee(e&j1vjDxhJh#dv9sYx8wpL0veQj*djd|Aymiu@IJ@-#+SLFFbL(e|D6Y0vOkce(Y_UC|ZoETjoW6+ny3TrmwtHNk_r{Ielo+%BfA;du=2ib_*X(peFK?2#STATiaAqK40}baO|33nX?rQzcwcX? zB4c#l^&2o>y^WS>mKMv@RedzqkeZVey}AO50xr*kSC6-rFU$=#hZ}mQBv@XRN&!kE z$vD7sGMnULc?}Xha6B4ch1^C`y(?Zc7!~2)MQnZX%MZ1?z;QN)zp&4IAYJ+%CZvjL z9s|9P)V8l>T;Vq;gp&FVneKo2CZUaZ920Zz4G(WA(RoS>_Bh_c!0E55W^x2v3Ha^{ z!&56fQlEe5y0-~xz|(DJ>IU*OS>W$n5HX)hv3W0ZOCa9KBTltjS))q>HnOW*2H%I3 z`Lzj1E7#rB`}_pdMhW2mE=Rir=oJT#PGNv}?9cDB0idXb?BdjE6GGO+xaV`M3CEaPsCl7jnPr!Prk|{?A+$-$OH5E4;o&A`r6H^e-T(TNsgK$A4GsQoZ2yOnGLm19 zu*~I~YEMcj$M0co`EiTam+&lpfKoj)M=dmhA1Ly9fV zeSO;#;WL~k2BjE>z!Vw(4w)EBnSE$kG^nLmaV4!fOeLHWwPW z90sk%*cnD`#!ICOUkPo;F08CF8`NzkJ;}y_3$zOF+fxMmjuTeq)yu98_373|qqBnV zn@%NKi2)Oxec)YqTXZ?9fZGX$@Hm>A0oAZ;f)pDM5`>!IV+WZ@`pItVbbKDCg+ahe z1avdh0EYLm#TpG5gNr%Q{Kth_*BrErxaUl`GlpIfFrz&ZPbS=T@L1TLEl{dOHQR}l zEdbi6#s92`$} zW=>2shLYer`|m{#dHi}yx`6>Mb6NCjDs-zH+iUPbUd^+)3tIPTyY_wkPe#&510*dS zY+DutWbtXEB0*5bHTO8mCnXu)+zy?XSBcwt63&?UC|@fDkk-(2#C(EhKsCtRS$NXQ zuu&v%+=g{lMai-ZmV4F{sjt4jSFa!4KGazvnjJo&SvJHjUj4@nClBjF62)v;7eND6 z9}x3fk>h@42zp^hfW*p#s10Y(!y6Fi)d2{K?Uk!iEPO^!DZT6-EU268g*^Up8M?EO zZb}IE2Wh;ngCX0WGedoA7>Xl7J{~!O4W+2({CxxuthtLe?RUiJXGo~nNJ#hY@kIo` z*8+4w>FRkJaGfUu8hC$8N=Ah*7dbm3eio~~1nv#*9GHTz!mCVpZXX13b!K(t$D0Ry zZPia@3E-aSM@l3u3>HeS9SRRJKGn{~P|d)buJ1bKiS6t`@Nw4o-)W@_(%e=*fy5Z@Gr%UdX}Sitg1oh8yzEeKpGfQL5hl{H_u{$>XgKm^)UP6G25C^q zEXOuVk^xB?64SpE3TLHJX5EozL{3RRGm;62UyFo+lX0=Zc^wa0klf%&k-?>)3j#ur z>|Zk`0W((qLbGoUA1a&TohyRd!Dfh)u7wKouhIT-PZgivnGra>Zah&kIpgJ8r1038 zu2lw@5~i1k3{2|w{T4&Sk6{&dEtXmgu@}_7e*ny^k9>%$y{n@ks!s0OgFpp-{Cb7gmcCuaGRByK10cwUaHw-=aJWFzE01(WF0#1sorB4f2BG{$jfh@26vzhcJ;4N;3qi+4qRaggm^Y6}36BxBD6Dh3%5K!xV#lc7C)m zGdxzv18QMC1Xx*>0_S1wC`ym9KUO2LPFqb?9bx?1jQoQIV78J4i5DrWMh5QlUANr# zjFv|1ry1@2K{fr&xPjT(097xV@KrK2Y z-O!2pI_REl;zc&d<2&RMLx!+B^1XAxK->z_(%DZdQv^j=C%K6$@6MO$%TRqZm=S zU?O5Hqw*wee)y&|;~*;T*WxgVlQw`{mw_XY5!wmN{FM-6er#LsI}%dluaH>lkdTlr zKkre$LKR{}_vF2H#b+;}@diQV!G-%UUzimw^V!paM znPK_h=STkW3zDFymv$=;BEBN1Aao#EKx-wi$rm~Ffe_!+6gg$9dQd$~fIsilrM0LB z{)z*`X7Z@^NXuVP4Hh#BTHr!Yx5{TO_k4tZtx5squY{UHo~qtM~}1$T6pBt8l1DLukE zi8~T2gi)t#6SMVk5JOScr;TUgimRP_y-sG4R0z)4yP4o|-U2$&uD3#dl4^E)=Q|Di zqBlNT-RkuRP#vQS=ds^y3cxxFCBy#6gQV3@L+-VQxNyG}>qrxe2oUbTV&w@lSzz_C zEj%u z|Amk|6hg$GVe2ujfmMX;kL1$72;1R9@Ck#y=7lQZ^VOIzmR? zy@SW1nX(=xn*T`jMpQ%$DpoIcBPQ}3`n>)#S4;BO*I*lzv4)V3o?j^`4xOMDo2lH& zd~YpkFFWD|zE-E89sCuxY2!)Q>5lY~pjn$vl=X1#a4}f8xMU={3v?^@H$f(~HQ45jMu5loeL;7@bh+X4 zy8a9aybgo}T?avefz|ru`U@oF+J*$}=@e`ZWr&Qp;jV52a$*D8WCIp)V+0_vO*yej zqp_q+T;ZLun_jq-6cL4r*8f@7Pk~K{>mSZjle1aYgU@~Mx_F_TdOhy_Nh2`Y7INu= zxl3#d>~2) z-j&2yIEC$r^gwUs1Yu`i4oFu-&b|FxCg4kW0v+9R^Ly9qKn_W{drlJ7M^s;K)E#=# z5@@q72M($^O5xML)ijV~a2KHj=}p^()u6H+(DJb~4cLEMM7Ih;iu@S0FEr$e`|Ex1 zS-Ye=8{n}2b>Hp3)>NY6)4{B9e>sVN`Ko_e6u1AHgGURb{bId6lzNCmw|+2ORltSH zm&acz$Y4aV!?!Ye6yls19Y`${_?le*fPG!T$og;N5kX zPmuV{{FQw1;b(8+HiXghnjV`$f8yr^p}ttQC`4se0>IJ3703E}tRQ}x4?NmGLvLnB z5d5?%cMn-ym|e{L(_c;id;uON-wFA9-N5=Z1S6gEcfwsEiYy<(o<9!@^)g3c)x163 zw?>_9oEL}q^iuPaKOiLbM|niXgxl2zPXQaSx|0v-L3hpa>A~i>b*A9o(SVGqWctrj zQpmkp-X_tlziyMHUS(u!`ezjLSO_DxXZ7|T0Tf>R{pnQPpa1Ost>5swcW^@QV z6BC~hZRUu-@J9H&8;l>m#{Tm$glDskY;L!+$5;t~4^&X$D}a>S0Vn&BQ4Y!vAiGVI z({1HbYUSeC@9DOu0Vrv$^m5=wTa|bwfKgomj2kvc;wljx5FPeX?_D!PpgGVbI;htb z%cWmfvSmRo^MFH!f1WFEggVLF%6MU)M2U&4_+;pJ#~T&Eysb*1e>KL{m1^*Y&y};L zg5Oei7y#`w67e*4@iyGwM=37?I~M%|6|`T){Ca=>>IkBI0dd2C?pM7?4H{vnT-V`y z5avR##LV1cLXQ(K_)(s2VbWMUKY zKM6lLz*D_3hJuRhSPm+x^a=(75K_qYiXNN5(ROaj*0-UN3Y`sJUCzKuO#p+6X=rhD zFIAA}h+#=->h0ZW018c|I=1JdLbcXdX8(KHtY6jzfy;DmzOy>E zYI-Gk7np{R^weN(3wa-x*{JCW48EvRhqOF1N-}oa4AO!$8^7Zi+wOeU+ z+ML@wPP2t_o$a}hppD9-Z%0cYSEN^CPVMcXdS6`s3a51xzb$_LoDackq2wQ!rrm>8 zrb_kS^9u>e$+*xhaHBz@Dbqj>BygB)I|p$!*WS}n1Zyjmo$~P1zv_xmL+s?WE0+oUE6Xo-`J$3 z_&*ykE`4m$N4Nj? zZlO;b*;tBs~`g&fCcCM=iBZK#P}5S~-?EIl_|Y0((cUig)?G9sAo9$b)r zHY<3UvV6q!>&PY2-x=fJq|dlCZE#O?DKYs(DsM&oeYZn$71^*;<5_|0hmGFQL00p3 z>kSf1#W^}(F|RCAM_4?Y*;!lPXs^o7wJJp3#cPk9E46;rqtT7Fup#k%Me&tB`(o-G z)9_r)y`4-$Lr+fhBi>^`l}@b;tq}fk_Iq6KKzb*ig_4X{enL-A;1oC~$!b^CsXSk9 z12E)es4vR!Ykeum;$B;6dM&~2RU1UNN1o4Fnhi$K@)u>k)up7&bh+(J7m^FRzyQefwg~jzylqO;9_8W`oU4 zh;8Asc9Rj|h|-apMAJ(4Nc_gpWA7i*s)G3%VKX18K)m$VXr#`Dge$bNu!(Evr(E*gRW)*cj8*rR;Q9Wtm zs{PK$gjJHw!{sR!Rh@3py>K2630!y4X|f{gCQ?+69Q3>aOaN+wE`kH-;>VI07b;UT z^j6kiR!KRkTTh2-*0VS3cAxb6+ys4Rem9g}oF;Q@W@9cn+CaZ8wr40u+AEc=cs zty@fz2jf?PR(cHU(61{}>O!rX5Y_KZ`lot!@fv}ij(WQ7XzXs=OhZD+g>Q)78df~} zg9Q*1dx&4*Biml)u1~9D49fNNxJQ+Es%<+6{m#cM*{$WJSGDJl$gBnXj%89kz+o}` zK&7HLqwK6Pg>HVNQW5qu=Pq7%N!qf5+Js2TdGFhV`O62I{v%C>n`+tnA zMIfo>DB90ogk|z9PxcgSg_DL83W0MYqJEzvm$i*+d~Kzoqh~4y8sDoRe-oM6nAS1# z<`m(`_~m0f$BNn799XPfZzdl}f1ub#^|@-9%?w1DrGn-S>CS44fm$&KTUtDoGDy8RZ7W9L{;Mx+ylTgE&1-V=L9k!!w`ZF0JcLM{Xnh5v3T1a)Cp|Z~Z>*_kAn#{T`f`EKd zbtIG!6dXWs1R)60BHcnL0s^5ALk+zM30G zIs`zeJ_d6YBxY4}?qLeVbx(a8?~QkrE?C11-D#pGZxboNzsw0UuIW>>`ifkPyT^?c z*0x9`16~3@=TNvUSyjjqeo!4I`qC8ne)RACIo(&AQQd8$O3Xhq_ zcu^yL-)(N$*oAwggD?~6TgwK|nak<(7{-?9Js}=@E11K<7_Yq-?mxlm!w!tWheoT38kP#LX6IfON()3<2!caR*>ko0V^Bjh=S=O zAmxf~u8QyI9${RK+H`E~vApV(9f(4O3vxS2p075f#s)}z$tp(PofuY`x-qydV68a6 zFTIZunm9Kr`VbzX>l~-?>fvG2nk0-MSF<9m{&bbRw_Y1ASm*icO!#OZ@lG9?=EgzY zP@B~_;VOw#;a}$3 zhzKi|-NqL_z>8;021-EkMIxfWNRu2Zh7mXv9U5j}M?t{y!x4qjUR047}=O{uiZ^r~`rX zU-PhW$OHnmMAFA^rQC}sX^fKt?izE-bS&;9g1YZL#3JGqq?*+jfXKC#t|%+Bvzxbp{i9P;dhdA0{kZ|H|o|=eqxri`H}-m$do*B1xPT& z^6nuQRD~6Q6qog22B^TfKP$^R=%Zc~+*SJ^bDuS1_=%2XK8RceFi{s@C%>HBzP$2Q z+g=a&h0(L@3py%sk}QZrT2Pa>nN7}2M_;+l52d~0yVAo1yggLqpmPm$rvKjLwZs6@ zLZ+X_+jZBB{&=^xt)ghN#ps9`T7FVzC!!!50v6tu{;`U(ONLAw@w?k)z37SEgWoSF z^n1)pPA7GLR1w$El-=;4ys_f5b@p3K4bHgzZcp2+AxGkizO3izSX7gWsfDp?qQmvx z&pliT&O#AN7PU5|il+w@7$vrRGHP0?QZ!WCHtqS%i6pzU!iQbuG#Dndl3~#vyE%TI z$|ygM3pQ4;3^9CcSq4QO_>FDuc8vn)a!Z8~T4jIw$ka_l#VrR!kqedfAUdTm4{gAl z@$2T^v25+B`;+GUh=M51VtUOe?iNe}WW7)(KR;3PSaM`nTRCqbu)oAhUfY$kkNRCa zu)ji~Ckv__sLU`X61|<83+%CwU|fGa%O8<%DJIsR3ZLi5a(eYtqxDX!ygPmfRBf+K zE(JVG2n>T<}8e)2a9ayf41{+mr*!T zOhg=Y4sM*T>~7t`#j30O>yfvFVjd}pgKHj%*7SL)pStKQZlxzRwi}Rw_=}NxK?~Mt zU;VtPi0Fv@JpUQ&%6V^KJ7(oS-ZWM9Pc?$8h@{+*^7L@i^*J0ztfc)GjH}>b5B53~ zjCwO5pSB@Fi23$0t&+GiUcLizR>8iO@k(qGW5&1E?(N?~{P>n#gfM@cFQ)-3eG{zm zRIHcMq!xc$d&du*`$NvjmcdBNIOP$4EEvCyY3q|QnEn3l9lSser|XPgE0#B*ZR;pY zD%NP*Psz>c?_|~0wq_zC7?pFSnGcn-0~!bVl|Q#6m>D#Q&lswWL0P;HU`9I9h}=Ce z8aw5~PFGv}XuQ7WJ{F?0)P}YO2nd8oU%!4mZDb=WI7M-!93^B+2HRS6gy4rD{Mn&s z!i8HzNy$f<-+l5q-k}fdP08mPF}QB8>V{J{OVJ>5$=^XG-P(U#1eB|S?T4h<;2<;$ zMY0bis();Z+U#{n^t5{Z(Tb~k7)J6%xnf5ZpX{921RWMi^oAZoRu|Uvcj&rHJlLBq z0)-}}-r4?H6(bYSo$O+c8kw-%1~8X*#SjskrP+5lM#6G>YgJDL-d$`B&gmL8-=Rat zU8dTKJ9Bq`%Ji?X-W?#s9<+S@>fjXR{zc_|ReMatg?M~vt5Bo~HFA%kDp*fK9?5oo zx;C=l9+E(b>icUB8=c%oyDgK#`aStgt)CvmEB^PKjT_%8-RoDg5Q$pH;bw&h7^zeU zWe@oIyS!Bj&;Dq87uTR{*K%~Oma@;5kgj))l3n8k$+{KAzvAH8`b}zT^wj+U*Q%G< zRg)d#sDZhCK5<5i!{5(lK{$j%D*ldKn}!MAGj`#+W_ZL@?hUyXajNPyM%Sk9VS6e4 z`?SA{yZRO=%<}0odXznXN*`SE(2Sq~LQRv81707NR0N}waEnH?g(}3S$KnD;)5_n_ z)nclfDb+2%&z9n*E+6x+sM607uD&OS*``=U{L#0D;J&7u*D9@G|Ebs(xx=GWz4c8? zX_D*M6J5Z9>rt>C#2a+sLyFc$?NDl)Cj5DgNOcVK)yPPAC1Qvekz|p{T~z?39raKz z-Qi0?$vu6DyE6fHxZ*uTl#6e(>QhM(E?&x?VD=4QfwOg5?%n+e3gF5*B!~=Zg8Y|5dS!{Xlc*%@SfsjyF z7lf_`(AN}!n{;v8Ocr)-8*M@9X`^||pXMY=n-Y?;5C-;+&)m+NDorFnngdo0hkC_O z6V|QX9i`im?rQu2y_Chvv=X(~19ylG163x$kM<+6GL??a;BXh3Fd24SaTChpq>Qz@ zsx%pvh*40w6Icl2a;L)C^#ELfE=>&QD$HPS&{94#f>vflqiT0?U2F zf|c~_Wc-~RQaq+|)cn|sw|dR1dV*w)?~`UxyOj4RWzGWpT|HwEvoe6B&Fzl2*^iN7 z4243r{IG%ITc8=Q3kyC1IzqW*;)L(K`smTwf*OELWEdYmq<~h+5aC*H=Pg=Isc?$h zBi?0`nR66qv7SCuy>)DD+abu(3>u>iL3(oJI)cw3ZS1KWF(jZ<=?d>`@67PC8Q8v- zwCTNZ#};5v;Hv8EhJxbBubu*SO6N<8sr`7ygl{{0-A8Q*Y~w|FBSR(^8*OiOv zJM8nY2REXnFK?7`K6Rf}+lxkiHwGO3?4w~9j+;eS#&=ATK6{Z)WiTVFr-q+G;Rv^2 zb)GS}$Q!vLw+Mkp_5ET;^I+3)o@PVCPXTa^&v00K>SOJ>R$K)Z8-5I(pMLD>{cN?f zxy^-=m&%T*!y`~(i%mzSIo#{f+N;aPvaVhj6g5Vq_Y<1pLI&-|%T$JaoB!7Qv?XideI^@H##7w#Ja2X+3|^m?1_)^#p#Rc)XTl zlvE9#RiJr_+`mV(2&(Ti5;ioTsmK2GjW*VtfL0#V3kzSO{Gh%6A1C$ps==*Z@e6V#osO<}ru1i1Aj+w=L6 zmKmg8F)vV$PwAOUl}WNWSv=t+_`^OhD1 z+|Gg>XeD}TbPdW8diSSdq12~&P^KYM3}F{lu{c?e@0q1C_KQiaA(Ujq_2U4sVGZ=^MoKD+)P=0FC>r<#IAt#=GaAqy5llbb;AKKhvcJ}4k zqoPL2y6PJBTqS%%QB_w{0#E=lXijU{#oe6GoAU5x>?TW3H|^{#aaCl#{cKRJ&Zat zGxF*cMSj-!GLn65(@f4r8uY(=80!$>a}WR+(Y?)y9Ahj`S5=VG1M{8 zmF{Q36s=wx$n%~xKS&T~q4F!>?sH-QL$it|hT-iS1Wxu`mrC<(cev{l?39!}LJcP^VVQ H;n{xy5iF=i literal 0 HcmV?d00001 diff --git a/megatron/experimental/gtp/images/0613_gtp_dcp_save_call_workflow.png b/megatron/experimental/gtp/images/0613_gtp_dcp_save_call_workflow.png new file mode 100644 index 0000000000000000000000000000000000000000..b69bd835769d3e34e6a8d693167e8a3160ba8b17 GIT binary patch literal 147645 zcmeEu1z1(t+CL>B9Rd>4sdP(A3zA9*0tYz2kpqVgX+b(floT*11renM0ZC~Tq(e#R z?)vWo2W3WQzPaDM-^|>3{@^3$ti9G=Yp?hB_Pchlx~c;1X^PWGNJzL!in5wWNT?%7 zNXWgI=s*jKurCf0(ltLvIbBCmb=nKr_(824oFB>Z1;_0CVubTTkBY_@36T z3icpd%PTN5FccVO=6W=Uhlg9>=tgF4h!$=^p`#WH`{U^k@5;*!Z_t9=Ld?eltQNik z3nz#f*a0#4sL2rqgE~TNf7xgXv#|l2I{u;~$lf02^2=`KFeu`o5wqF?&-IHz@CGeY z5cJ1Z9f+Bu zz@RXD#6m$%jxhLh{CuIg|HDZy1BSwBalFfjDVccX|Mjuo_R1b? zgLu$?=XQ<|M__vo$?FV)Iw5vi;i@{YTqa%sdfc)y@Xw}}U{fnw7{tbr!vW+BjNk_7 z0)HkKn7x%b6xd9lzZ@@Q6mc0~Q0O7i58=SaBhN16*Ma4%DUK45#G!5M7t2my9W8gaoIVrB+k z=_Ln8dzjS`;DG)%Fq^~1V`#$fb@XJmpzpLskRN_e@G)oq9z@|AcKi>HL(K&EpjHKJ0k$zaYW@$QL0$&_`va|*c=<#Qe}F4+oFMRU44ad1_%-z%VQ~B% zg9HjT2blPh1AtD5jfKkLH(BBDEmz^aW%+=HqsRKcg@TYUg8B$7@B@T|lk)@^c)1Z> zPJ)4l7t#1vp+N(cH<#)DZ%+0X~Av;9?1A zIuvDxQuflJe1-4w3CIGx4PVhoDfLT~{R%(72H7_e^Jkz&_*l&%P{YIbBW!p?5N*GJ z&0j1-R1ow3yQq;96gV_BeuA1SYO+f5N^-LQGIYSM5C=y6_73_n7EoRBnsOUP)sAQbQkX8mq$04DvvXl=+!%L)HnzX-|*%UuHOLqIIS zg~R`?;QuuX3;u+I5zO)n1_o9JU)*0N_K&ThUs$fk3hOC1i#tGH|oO)}B4G13lI0}t8 zKy0jjAlZL4ufQWXaNhs>^9o|2Cxq0$!z=Ov@ZX>D$~SNSXk-3dYmA5IXY}|RG7Dmv zCz$0Z3J!v41AI)Q?~z@=-QtPL{-nW%&8fou^NDrap1HZwa4 zYca|2GF<`^kla^5)+Qj}FT}>k)*f~f2(ccIWwikxhD(95Fc%{rmt+ezvW7SS={!@* z|Ez%eZO#A9!Thy=Iu619&v~9dLm(Y1q2GzY@*YR&Ph#vi@b*_>49JE5N*-N-IXD7n z_$FDn|0jf9l>@pn30+cw*nmOyOuRC{R~3*O*j^Q8Z+)^+0Z?JKU^7IM5D?7g{suxI z)8}MubBxpDQQxm?!d!m);xU51-Aw_=cACk8p^hL`Fz_({LE-q@K>cQ-{=I?9_Xm*5 zFZ7da!C#70MTE#cAq$UFK_?PEN4cS+#L%CTU^>j5ALrN|4^!k0MnH1ugi&YY0&%o7 zvT(FLO1uM*7q|p`}!adtG;0qh7^0)T~J3kCAs zMnJX0ai+`B@;G_%!>-9*Q&&-vk-iG#NWSORPV6Z+kVdzMzyni=eBKXB{Z%fG)dT_v zGvGN7J&tdJ;9H9kkXy3{9aSX!XO+kw2}$c_&8*)Zv(Q0Fm(j zP2R)GwjYa11dg4bZ^-#ERL=XeoG?GItMHBeOOOMIAo$!Tggg)j=YR_!c)bkZEdv=P zQy`@T$d(`6BAAmST*iSxhaJAxJHgu!H!y+U#nH_c==b-*4!nfvkDIb)AP38Dg;jrs z2>K>o5J2J;L;&POY*GMD5crOs6hX&D2!9EXRDKje-}D#o;SblN{9jZS9LmBUR^w=U z+yscZZ|WEBa~;<29oZJYGA#~8=24N2{kNz3COLm0E594kCn^gM>tB9LtN(8d5KE8^ zP+138{@)D_Ek$WfS-5e*?Q&GU_T9=jn&Oxm|3Q}g#~A`}?t_zXzD(jOyUTDD`kz!h|8uhczuN=l=SB$ilbYvef`sp{jGb%!sCf`LI6AQK zpHHy;fhv#xF2es>0Q`*bKNSFfDdAzZh-mK#6`}<(fgWW%fP9&wJs9pKvf2O@=YXRL zf#*E{Th#&HcxarQI0(bW#0NhqCJQ8K?I9+BdugTyoKUg`j%C^ZMo667#0dg5gWJE> zAP4y26Sx|N=iRM=Lm!7#?}vUV&;S@@KOCX~99(!&7}SmPcVyDPUg8Rzo8kR=?VhaQ zB|!mr0n#tL$`d#h`ImgtqgDUkmS+4ac+D^Hqc{@bL5QA{YKfQsXmx*yAq+Kv%84A! ze*%|BE*$^^R!3VS5KwLod^|!6;FtZVmj1_rCOF+roR<@=<$;j!t7YP;i}i z;;0sI22mP*+8H=pW&(woS^-yosJ;Cr9e-14Il!Fk4^ONDJ-!!jIfCphfQ<6tQSrmV zoul#a)gCn-KFRMNsDz7oV9CEc3~CRCg5bxWPu%4%3c`aCD)sM5j1$}bD{3HW8U9hk zK!im8`-p)kUiwE717Sq|jfsI^fs>4JoR0lRQR2t*#@|lVu>PynYX1xM@vSKBNU`W3 z4(lJkj^KnD{;lV)+^RpMcfQvEo+RjTa_b*OP=pD2!nlT*18Ndv1JBt44iO@L1Fr!C zUe^MssFSIG_)h#^8VkoqLH|c;e>*h#zoGUqqn@Poaoy8Diq;5=^aQPKfY-dhsr+YC z_GdMF@Sw*Z;?AGZ3sJ%Fk0KW$oN|I(h$0I_UBzF-jK7X|95d@C)2AJ^TW%$~iFz z{uMsz+qLh*;Fn~N$NeGRL=f*JJ^ztv^EfE=AE)SV?(#3T6u~xsqPY7buJv~ZTOIzw zEI_ru0SG*i!0~$(;8+5_f2)8y4htVS{fJR;xdso&{*fv9fTDNcbxPj~@qT{{qW9rj z$^NIFKfJsQ%=2Tk5#iYWiO~5Y)ciLo^QU|3-_DT#Xqx^Z${Y*D zlU#kAMgM1!=1=$C|83Iz#3uP?k_O?-pU`1&==~o$bpJMK5Pbd%uKDwI;eSe?9SXhQ zc$4O_V*CC|(~}D8@apkPRF5to2EbptdZalQzA{Q7ArT-c$x2^!MV?JM<#VQ%rrz7# z#W7G?krjoVh}~O*1R1+l>H@dM{nw?)8aP+4UcE#h%>?Dqz#%AAzrg(t=?vt4z=6kv zP3zmYgBwFk3xj)~C+22XJy^VX@*qzRNRZ{E2-VSk`17cTphw={_0=aeKFuGm!iU^D z?OR4PlFa5gO+m;}YVycaYR6|Cc@DJ?BOl+Ard{G&gWMnnV^@W%zpDmW&ec>Hh13jN zBO0Lwjw}f(VXc%b55Z49rq-C!?t5$a7k6il)_$U-^1PNJuVBPS-m#dO1>-l*O~}$Z zg~~Ch(WiaUu;7=|9nH@=B$3;pp|k#Mqk``yMHS^O%2Bdr3JXH27quIs0>abJpB8Gr zSS+1P3m=+pkP1t66Ux-U8QDOtRpj~jed*ty<oiwl5&`1GC=Qlj< zgXE@XB55{mQ~NQNCB()Eek!}lFMmMd+bJcb{0x&7CB%pR(TP7&%e3k|Z=qZDH-$v= z3)wW!no>#IWRIM}vg)-S;L&ZqV7i`Z6zD0)SUPW^62Q}2 zQ=CGeAZx?AlK+O;XP*rvL~5D-5tG)h>E%lzMcbnM5*DI26!*GLD#+r);6%%Y1yvEn zsDnn<;XI_sdm29sE2GVIT4C<|7!|Z4%O1f!fLZDH40`R77-n$!d6bY!L0PFh?OzSj zB=LQrUqpLKfdY>%g8GudY~)K-cH9kLjlLJ*)1CMB^yle6vFj+Zv3?X&OAr{4W`B@T zk#Y3{c0ulVqq$-@`xz#Nq$R^lrPz>~kmvWVvB!|9Xr#TIVn^vm@x9$DR63vkmOn>d ze|;b=`D#v3yRm+!N&gs@^O?q1Lj8hUELJ($Mi_ISI@d$ZI82q;x_PLtEAe{%^D+bPbu${ujsBS+ z14xc4`|7EZ92UOw^0R8hfS#Aw z7m6+WMk%-Xi2cHsMC&8xFm_O5EO*q}a39blmx$O_;^%AvsJyG=1#uisuRRUVmZ96HYJVO~Z3>I` zG%~!0TW8wVunB5Oqw(F-(L(PnX-pG6XrNop(~BJ_J#aJI5uZCZ{j7gkl7{J`!p_Py z_Xb(xEZyw{rE}3&TvkS2@|@duDTw7o!?P+X#hLTYzTco23(ne)>Xz7)7qscS&_X%K zsv>{BYxle)*LHux8l|9~gX>CrqcUv|Vy;Z)YR4w_QN5>v>U%1(ujGS{PcjO}xb1SsD@N{$USP3Y~sWtcv=!kZ#@V}CWjnXN!Y zexf7ADC@>dle55c%stqM@;+MGFWgW=Z8kW?k_6;xXbW)g<^Qul)vyN1W-I&-n>rN~qdZRcWB6CxR8PzvH@w)cT_!&O$ z&DDJV_=8S@(tUpGVn2_A<|`$P&RaKNVr_YPY4Kv&Ou=5pUl|}_>_pby{(6yu)`4HT9P^h{blti9S1*SwqUw=fwi z8Pg};DQdL))uAO(;Dg9T*t45oN#uQ6CvZM3p7utI%1*d1_c>-iUNgzU!i&>AN#g4E zz;@h2w^6h$mqGMr6`oFd`DSU3(~<)TZ4(D3Gh2=BM*SPDr~yr+GCrfltprmyIH~9` z)=OlPQ^d(wdR-#r*Uzr6Ofhs{e+QI1^stLL@tR++SoqudRcOUb>qX6S&k=dLp+I~i4ofUyHGb63 zl~sCPQ}ZiK&{53#traC9-5d_E)N@IYG1u06MXd(xhG4By7VCj8U+)dC5|UkRdHw+E zyoM9~_)CO9n)X^_Rh{|#nD25N=rZjCrX+|9UT&g9agA+J@M=SiRZCT3gg~Ik)ZMq> zbw8b+iyyS1L6~>VbLH--Cf#)`7hS)6{?5=4!9(MX-lyGQuAa7mp|;Bc7v~05-cYFW zIi(KV550BIuwAO`37gd5`;Eet$UNyM%LU0;yOm|gI*at1JyvRYV23`!`-!`Yey(&E z2E6;ix9>?(Dh;IY4%mN0)~OPep`_zUO@>-B4|n$8dKE8yrL}z1>&bFFe`*m2rFBnb zK8xFJEDaF-Qfpn%MisZEWjiw$o}|$b?>*M-(`z$sy88NfLwg^GGwroV`x3T0gucSE zC5lbY6~E5#5O=zp`(+}D{~7kw(!5acdTgfG^vCBFp16-~Rx^YY3=_n)@7CNCn$H&p z)3st@EP!WJ5C-1O>4e% zu8Spk`F#b+xDqu`b~>AQHCC22DF(~uQA50i@m84c2P%bY&RtX8eWH?d{Zn>2$yBu3 znQAj9(B2BJO&DYXsp`!ZaX?05C_XQx8NXdmk5(6fAC?ySV8M3*vyWVX-39Tn| zsP9{&Oq*OAOk-T}EHQBwj-vyVVDmg~+{_KPm5=C*0fl&`0d{V^Y=NKe zTR-XZL3%Yp?NphJ7A1{ePCKh6x?ZY0?*R>PlF_v0(36aA*~XCwBi1>A^(b^H;Jzb+H4FfM@LRYh!itQ!E-Q?olv+pTgSWIa-j`{-o3GR zF}dJT%)*#e;^DLA<-mU_P?~C5OO-YcD8xLF(a>Ii^nZ!J0Q zs;>lyHuBZsih`Nvoj+y8aX)xrj`Cbz;JJA+Wk$p#m4i?A{zk={YFoOTVOztM!j)2G z@#>)XiqH7ZK$xDSb_QgTeG0I9l>R z;%(Kv2?3&Aubu21T2)0m^A}bvC~0RfUObT9R?hZHcyOPax$!~6U4E^Iq?u2JeUde$ z`?CzzL*AS>zzf$Nd9BvZHTNJJ^-);|@TTu^37153M^rwB#;;OM>I9Ps?3>m)jAOeS zc4UQe@5jb_%4lX{wu`)W!fN zaJD_t^g#z&@Ghli%d5McnD{x+J|n7TV$Aik%8-#6_k63MNiXoy7_}3d0MpLoW)l~p(eqh?0Uu)W-;Lx74R;^!m!WBnacK(0v#8h0QQ_JMVziR!TECzcPDwY zg2k4$Ss1}BsZ%t476I|+<05B6w?O3?c&m{hGAdZSX_)nN&2_>osGpfOubo z0X#W{19@EN_bwcZS~TDaj?JrP{E9vBTfN+bizdStNGO}rJ8#9&Fs>#F!bp=RKJD`~ zXbGl<+>frE7;9@hzg75~NqexM-I0`6cb{(kaHCj^^`corj{kQx~Lg%wXw6MoHpm z_=rxlfwRoIM`5`SZw!fad>YO5L_={Ir^pi#ntIG6b))iuoS&_9UX{PKXJ1RN!57Y* z1-ePxL=&NM|GD-J)$>yd_K_gzzAQb|QdVo8kCgee@?!hxDhc~hQkdz9EmfsPyR14x z8~KmA+3htWx>3|Jou7qn;y?y@R>zw%!f_x`Lp$_B6Y#hvIIh0>E0W7g-H{&Q(UjM zpEcp5E9Mbn^p%I`%T!xRJrB6Jx~EnSCU@=_ODUjSBnslS;*o5o>lRzgrtY}$I-&x{ z?JHB$n1gH%7OBIo3#14`n(;#NTF|EN*Ltfox~c_llXNkOQR1!LQls&A78d#=_!W=w zE9mnic2vu^?V`%K-E&qeJYRbpSWWEq6suZ`EG1xIF>~KhrP;M@w(s*HlqA5?$6XtK zc3>;=zWEH{JI0&)sghw{6V{a0>tvFc6sqn5Fymo}5wNZ>%88a-HyowU#WQVpA4%hW zk`<_tPY@Pe08JF1SD;jyVVseZgt;{m$b9yM47*sBJ!RyrFmPEXjIrJPoRyMIO1hS5 z6Bo)YTe(vh++lsI-U(`&q9NJLX51iVH{Y)C^mcHPU-R|*UvCk|5SR)TM=)yI9Lk?} zySj<(^QQ}m7wTPxn$QCYM=-C<++FjNKUY8U)he4QkOG~pW)lcL6j>Q+b7N7qZOVnC z?mn~wgv}C_)qL=owsAra=Gy{J60^76(>xbIf)`Eku+aqC06*7!l53ru5+_gigJ)&N zidU4g#c+(@lIkrB>bDhiq(zOzkcGtrYvX`H-Y;h!&q0*esf1SNZwmP5T43 ziRN?`oO~6Yb#6=YFY_wd_OdN5%@FRdiq$P2dOoqDcmXnHM#0mYb`rw|4o{@7A=ZwZx=%>Md;=d4bfZ^&}=wUquZqMfkF2KnYyA-tMGP!Ga7cE|Y>D@&~lwFQ|MZcmrIh z&s`c1CJAQ=eGomL+46OMo)lS}aduoi_^yu^PJs z$dT+k5%XkT}WNRW`PzcbX5=fp3_a8Guyi_y*Z0&Tw?dua}z(F3XSP=8QH;|UW7h0NqoAiVc$c?%_fS2 zE#|dpjTJc?ZXkc_x{8Nt&(_gdgZGLC1np zfE%{i+)qqg@^M_-Y_)E7cm2%N)QveEXL+x9OPbN8M|ZrL`eFxIts0ZvGoX$+Hw!p<>Q%P}eGQ`>}Y1 zP&3c&2VP@UgV!bkFYlRe%hInX2gKuB`)AVx3eDiJP*AA5UK7;hNq&XnfXS>KSC%6L zg4AADFnK^(W^kaEFUd=T-v9osX!?LhUPbY`?_^cTO>(A4ES#RKdG9)2I^EHp*y5+S zvI#W2$##)Kc$I)^c~u9_E@9CYYa}oynqpWrin9!j&B<2+f;b5mXYv;2L|w!Uoa(;B zybG_aNjnRpv3-}MHD}n$d~dD#R^Y1VJbp#L$i*g+9iH_PmkW7Ivx&^t(P@f5EWm_I zt(?jEsR@O-X+For#OMu`<8MZ3 zTk)s7wxr1C+`3TpU=jzA5mlOkf{u!Ql6((M2KFWbFF-`uJ&PG$B-yWs+}&Y-T-U&eVWyGD|d3`P3$v#?e5<@A^qnB3@tFgke;dH3L%^O`jGa1*S zx4R1KTb<^JtDqn5HQ#yj4o~0(rR25q*FMu>d&bu224>SRzSHY^kMYos)VMh_!=fQa z)S9bEa`|}}50S6}$t7XKk1NJavLJ^V@WsGn@5uH5&M!2$U+Pe9WL*dMT5QM%ZnI5+m&}uoSd-=NeqzVPyKCpYS4;4s8pymCk6Cbvyc(+9W;M z!Q_ocTcYpwiLz4O@bAq{3~-9B5%|7mO3|`$eQ$_9wL9RjRxy+GiJP8xu7z^mc(5=_ zqgP@pSYZDK1!yUV5*!n^J#3b`K(HU$yelPHTx?;Hl{ONIYKr|Lu|>bIk;z4pE&?)% zKRWxWajFU}rGi|E%I2C!3hC%Mw1L}ottoXpUAvI7IU1`|G0ba4lb|br$GOsD)WjQX zIup0%=O4jC#=pu!u5g2sA}aLr`;dg5YQbPCD$if%>8K}d4dSx8(^ zWTM+P2_GkQR6s&0cBx&5xkZZ?GcO)%bm-@4QbEpmVob6^?b;B9huqPMG@xDdQC@}e z&3OZ9%DD#{)$^?+du}HVmaBit-S7*@Gj>bfuOc#RctkQ_}-O-^_Wqn z>U9m&H%+wYyN+?YS^LSE8!{ksUKne*SWkS&ZZgGH62bhpw&pjaQIUkUBa{2 zwL*2=7Mf`0I`)clL7sG6D@n>~DIq>GQdo9@{t_H0GBjSj5+cNPX=gIi9roW}BwfaT zZjpgc!9{-0{7H30@V)$8?v(}}P9+mM6JAfi%SX{64JMzm#UhNIX?9{xYLjZTkBd>$oJ#k0d`Z8R3Cow;eY>R%s=J$9`n19} z{<{3FgxadMsRF{0U0dh-v@$z1=U;knzG%niCitA`I%tzRnBlaQ;<4Lu8%j^pTGVlO z)3lepWF)^jJ2lzS`1YcQi1K&}1CqnU0X_7Mp&%vJZh~Ns^xbLueX1}j7fMMSO1!GP zOovxw7GsvEg6vDnOkQFufXkju(!V11xI$#jrfsF>%f=XGN}ri<^#1$Lc{NE>DS%7Z zB|dFY2;#-{+&nYmxz00}_h~I#=Dk{($Iz}>a`?Dga+&y~Xja#PTl~F1HsmYBlXoF+{hHD?EJCXwA!+tZ^(knnD^vLeyLSYV9-!~A zQ$lyPN*=F=;=S62Xlp)>47KITMo+F@%ohMqzMA4N)!17HEW{0=~hqPTt1cqb%Lx8#WSXrJ~tLBl9(%x zi^mJ7_0Hd(d9Pru+NNA$x>hfHel0=IRJ~&8;U}`&OX?eK+*W5@&TlE7MyN@0glQrgSRBsh&!)Yl`>?ma)m4E2)OT((dWa=^e*R-Uw7QL~3ul)=c+nA^yl^xsOIwJd*EN3;WfVNRwg;+79NBGi6#t;omzw2K#gz3%G_UHwzW+VWg)kG20{- z)_ZaWo022>RWp!GptY4yLcP|;pEvTDVos-{EhkHuS9>gkP)*BWJKn#7oBTn8Uq)`9D-7~4(KNe@FhbFm}a)~}a(J$+OaetETGMo@R2|G8C>>O-5E z+7g%4&irc!{mi|t`?oIcc(*n4mO5|KS~>VlyT$WP36}4;&2(&OpH}uRUL0$Et-Lpf zL*{iye5Nt}nyqnkO^IcryqHUQ_4bwc=M4k_iZg-~Sok*=^VxKV1*V*CgDF2I ziS04$w7z&YhQB1XRJ*~XU7n*VyV~!DPZ1&TSYA`(#`2E$*C6$*g%T?R%HEHC-~4htnnMERC0GS}ADn zPG|y86S^B!b^Q-xjWgrzzn*k!8RUz{G!TwBP6?@4d;xNsdNmt&n}QJo3>!u6#(w#Q=>})y zujKVHSr+gdz>`tg@5U&z$^cy_6kd}gXqr;HwerAPpX}&9vSCmVVkKx(Kg|03qP(jQ zh`v=Y=ahw8@8&qPBO}nx0}t zS=YjlkmYxq8sxH$$Cp^J*%{iIP%wMR7tn}2IRp}F==!iB(|5o(F7qiKyR0(#&S}+) zWNmgOR8Hy8v6am*H7Hc@pv*Kg%ZXo7D9p_;AxT(W_05E|yDyHeU(!ZuhVmMfm3#W|Hl%f*%%N4RaY;jzFf2&{ap@rW3Bb zYd6=m-I<7Kj=L13mQv{}IG)6>{-Q=Td6O`}g|lQdi5s-_Iq-gDE_vz(6>+7i+Y*;b zHF{Y7jkf`6UW4mrT%tPZCI?N2Gr&ocq1S9AM)TU@i+Y->SZ7+z2TWMG9I!lnB#I~5 zWc;!7-pB4!K#MZ>?T3kBy|3H36p5jb6k|!;b?)T1)*NZj2YITO&T`oC0x%wq@iRR40wnm$>3q$;AJ`dE`D+~&dxX?_N z6`t`Y7do@<#4i+^5jI!eW1u(AUC=A)y~XW?*%VjXYT@yu5i6Q6o2vM6d{r?jLmjqh zlD)>p1};OJYFlVb9n@$! z0<3#pQPSgWFSMT20cH!Yt>E0tbYjdN$mUt)TVCHdfHSfdx!MHYqUjZrmejccD%pw* z0~_(D9IhBH%Rt`C`=zdFy?x&Ers4h-8BKM@3r9NyudV?JiJ%+Rru8&mkmG5 z4!I|YQ-rbYvV#W~eA@jt0Ip70_?pGx;lMwYse!f&SF8#$bG?{Sv$p%r$#mIy)LJzt z-Dl^Sgs`eL*;<$PqcC&Mpkx3)W!YRc_fu0O^h zAPn|zgT;+BvQ&(Rh5)7;)GMk_ZVg}1|D7Gt12y_Rrspa0X;T9w#I!v{Nk)rGH@>cP za(u0K&t9Sl()u*XIi|ZoKjC6E6(4zL3IJ zD?`hk;sU-8>n&F2_7{!G3x_tVg+=26UodR$G_|;=@|?@J*7fE$yt7@OKyqzJ_iQZ% z#yi)19_u+8%6Gs!chu;YZci0&lo`*Bw6lnOBr7x@(IMYKij6&J*krPvo^gq^Xj7)V zOZtVUr&>jqk|08ud=?b2%<$+lD^_z)5fMrQWleCf_N6J~24qp6$*l+bO-`COs+xWVlgbH%Hp zMNJHIHrM`hK#h4{1|c)?zc zf|LwU0w@KMlEPa0*xqYzi%shX z&o*K%0Hsi(mj;UaC1+lJZQ*%&VP|YykWzcksXAStNgigfx6u=x6(g2!H*Pfgi52~O z1?4ZwLlvVW9`PHz>d3V^Fyx+%ETR#aSWwGo;k~+^ej%?{jP5;03pHa+c*^Gzx3woF zO{8reUq1{FQ#mX=DXJT)u$(B43N;t!Z;bnJ^PDD*IUD*#yk8Za9?grnfSPtTwqhu8 zd2IVZXbi>3t+zasubbsp9BYcvu08?_qZspEr@c&fVL{hS2NJw$UBkNe2Fh>f=rDoj(Y>w z6F12kGXF?+Aoans1e*)$GwSoUXM$AdF9)&HJ^#%z+CB`E4dkyDUoF2vzTW(5g=xn) z^7#2w0D2p99bt-0iOjobq<|Xko)C!eCDapkkQ{mIKuCvcNxI@sxh)cS z^Nm}PcTnRisb{7^QnF#nHN9@b+VU%BG;l~ze_he*n>vGvijK1=A4YDs>4z`j+V77= zJakUdYxk0=5s!IuoWM;IzpGzSS9P{I_PTpuPbE8E;y$LbDwAe0#jGYq1$H2u=fhmo zq5+Ye7&pOaD*^H-(PzDoaLYm9ML~YDJHG3`np5f#ei)UA{Xrm&m+Q@$t`|D9`5mhG zb8k0>CS9gGAumTGM02|0Et2Cul)At)o|30tqIp#D5jIt^YP!w0m#}|DGExSAbIM{i zqHqH8rj2Fe4LPqUzaUQ-COtFnJ;}gRTE4+{M?{q8QZ!qf6A+o zEhCl2bM?a~{eqiJYtx^i*{?lUymjjh9oMQFp&~BY*O#mM+gy8b`zjUb-W_dI3#;y0 ziL#SS#H$q*xlMihD=pT_{KUAisnMyBk75Pk0XocKKTMr1!Rxx^;<5x)bw9qb-qO4kN3+_}Ee?Snas^m%HTO}i< zb(sz=0H4MXROOGQ6G16t^C^zUwD7qGj^;D`c36ERjW^|<@ST7p(rfV2Z{JiYgSAxlC=H}6jB@>L z89maHW|sDgjp?ozqN|_%@N?{EU)6_D)Y3lKM%pX%+S`u(5INQ6MvY2gcsSL^5FgMR z!AxLFo71l(I`bSn*Ur{`m4E5+8fPtL`@+;g7-}8rD7}X8eF8NSHrxzMk~1i559inW zVZ)znk@fSTy}VB^-8s!>K+JIqBIo0!e2_+?lcjBK45^i#c>$NdQicRl%N7%DWY_a8 zKiDt4-O#>8&-EG7LllfO>a$c?Z0y0PsYERt{3sPf;!6}H1-c3ueB(6gGU(JDR;8%i zp^jF_gt9scR52$PaWp-011g<;=r>1;a!m019~fwGrsf+mE7xh?A~oE zGV(Fi5KAjn!)Iqj0rXIgiLm!$FT#lFY)nSrW~SQ?nxv}Oa4i-hMMTGKGf*rjaI;ew z3Co!1jB@A`OvY0vVc2!`J+Dl;!gOevYDVc&_kmf}Q3%@Eze{E*I=V%#Ks@T_q?$lQ zF}?Z-fw6~cvNt-L_i-#h2nx`##pIeQ){^em%#=eta!Oiil>N94oe9%7n3$#YW`Ciz z2xlg{^rw%{SyAqUAdULjX+G0T!!Tva_IJJD*r|3%ED~9%(ClNKMk0t}S|1_auDI2= z*pQx?FL|)*P_o>3emuo{KlrpJ<9u}hZgf1K1*uYBLMHuP8Z>g2FtWNUS3Q_&p5E1P zInsQ-Wweh#XO4~>T;@hOd-g0@5w!R9QfKRI<2&<0HUq+2o1>A~ z;y13S6S88W9NvuLa*zVu(|LrRWApJL)qt-QS?*CLq3h&4!FV{y8m1}pZ&%&pGKSxS2Gytm0eRHRHR2E=L(~&Q`3EQp8#J*ZV;G}mW^<8t=s6-r@WR# zp{7JwIh&EklHB812LMgWt)dI>hrJJOJj$2cU6k3{noPmIk4|)l%*aE-YiF*=zk`k4 zfQVBH1v%<^bF#RojUu3%8cQ94(sf1})EORPJ=sX*tY zq5VfCmWE!76b_4IxAYs`x7KIb!YKsqnMR6t<&-wXa>Xi~bCVr%9mRFqU8*0?dRcx? z-=;(^hC`n@&%EWH&ET7cA@>E2?i(&ix%jja(-rm9E)TN}-(GAR|A-kPrNL0?3L4<{ zs#?~(B!+Xw2{=mNn=1SHfsSF+Iq_RT@$F`SM2R7fWdBO6CPoJrk>sjoSMREC0BF!s z75;+ei;07K_tEIHjAVALbQL9TN7vkIS=tSI8^c_~o*Oq@eJ*t4QD1z&ximVbFJ=IJ zWWAYZ`{fd7Wp$zAppK&!Sf@X3*Y4KJXaENXa(?n5p`qAljrZ9)N~HbJKdnRyHS1uG zzw^UrnkyQ%iT3HKn+u?{xx3OPY4rSRL#M0v)_CkVK#izqH3>bh?J1UcQI{OtioNyz zCXva93^(35zLFIK1$i&kl1&D|P-i0P@5*9w+mK6wSj=BkP z*pS%F`UeG?UY9B(_ql?Y_kp@)E3Kk;a&6b5jQcC~BtuC#UQavudw;n@2K5l6@08T$ z-LvS2j{AD*(yqO8WWL1Ss#fTku#k7=rvBF-HDP$G5NHh)dssBb@o2wYj5CcOT=oM= zd$4>d2X2}M;ORA}8vT3$(s@BuV+9HZg$NSin#{Ay=bG}Zx>cQf!DJ->kxfr&RXkCU za)9d!%?K&rt(ZG~1y(F!C|1>_3+3m@WrBKKv(IOrQ7q9Tvo*WoI{hr;HB%o3wd37c zU>lbK;$zQ<+?@75uU~i}653I0XKLj=SI|8MkfYTCs{ty zB0UZY+3>|U7bklZ>HGe}XE9CVToMNLuL zUzsZSs+X;G>F3G3dpGGpu{Ge(Cf>CN>}g?JhnnQz~!eqbN;;kd=`V42Vb8?p8nDhc?FHgtLnjdomQe(ifBC#X)wNlYz5a(@%WilwGmRy>*pc`?#x8>Q-3AWf1J3={9|7HHFm8`F zHr5ag?nu9~yt~q-yfV?kp`rRYSGd>|FahZTsh#7l9R%dcooP)Ad`=}RKKX(6Hk~Y; zA*H}gM%#&AG$~n`&-c0jif|A-R)o=2+Kp0I#o@k>5SA}`H5XSJFZFmE7H?X{sl9y+@BzydKVTz zjvm0-JOZoO(>6AHIwg0pB|(6gaS(u*ukBV}b3EUv^lcwxRI_)tPoces>W1Wi>)t+UW=8~1sW zqEkum8TvC1AeiRfajc^Lj0P^dGoP|p+~-R&Tn(8O1_@gU5(1(jp9ZinSZGX0l@`7P z(%7^;msV%^a%V<~;sN_Lddot)dzO<(b|Fb+B{k>)s0#!+VoHH|F!PZKfj~OdcwVr5 zaqX>VhHu}j<)}Zt?Rk%rG&c4;woR)nC}bT;)3$TySq66G80rO(xm0d7i4@9hNnN`g z>)~?GH2H7};b4ZMF{aC?`xA`s^2$ zcDQGGWw+O6Sg06e+_5FY^tp2Dd9!2P^oUK<(!2mYM0<$Av={!ayyYgt5tWf?=+k#5N&mEh{0uZD0QAg8;Q<^a}39KVmMR36G_`&kNErctWIFU}tCC8#=} z%FU&7HrRrcY^w7av38a`reG$Sy&x6aCZPfqT|V99EZuv$&(DMiiwUWN(Q(LH@idtS zZiwVMT=3oG7IoEuhYs$9_%Nqql3<~bMaFR~H5bje#I{a~(GGHYUM;(!dN-@i<359G z6p0G)HMDz}K=3*`lP8}QZk$pg7mpKkWF$E!NTCr|t?;IGB~SLQm=m~cI^e})zrWjV zZ=R4l@Vb?++1Bs{;5V#4Dcl%xS5uE%5#v%&54uc6i>~q5&s8>w+9f$NAjNegcpZ9? zEdDV?AV0MdAN#C989HGw`#IV3kjJg9r}!UQG2jXx5TttK+_^|F0m#?!C~Gk;i;~yk zVRu%RxX%k)Ss1>!I=u3v+Ch&BZ4DJ)j5gD&RG8pPCYpT!7mn6dJdl)>88OC66ITT$ z1(GHGVylaxn>}+fV25>GsXVm<$QTyu{=zdX9D%efYM9Q;PGO`2t}N7?vrEcKwp#GL zdKTdmQ^8Mb`-KhTvey+zrNT@m5ctdSJUp06f!s>t&?JV!d%-PsWAJ z@|659j;#Rea*tRt>MwHMcEfO=U5CmyeIfzi8vB%bU z0@j80m1O0ctk|07LB_NGfml9!ubVg-p1<`E-LuEpBBi5f{gn??X2^lla4+*XU-^f9XcL2!(Dp@tPIt+?i+P_#uB#k z|BtNyj;Ff+|G;s(BkLF;^Vno&3)vj0jAVzC?2(Zz9DDCj_6QZ3amYMGwq$3|$lfyg zK2O(my?>wI_xp#N+~{@A>-8Lu`{O>IhneuM7sWM(ZveZ-`JI{w6td zF{wWKY>%OiaN-@^d z<)etF)JV{tP?tP$?Gimp$owK48Mq;->@p-c_Ib_wn7%Z!q>3`^us+7Ayy=*N_J+QR zi;B%YA{}jj^?eA!5|^pXrMVVLfKB1&r0eOVtvPVv)}3h)r5yU`pSB_CCN{tE0$c0b zJR1#{b$U93(uGVBuHxgZA3xY03Aq~nk-GauR>;QKqlaS^T}u7K^^ zIJxJwtK z^>giT`L!aCIMY1Fa7GdlEM$Fe6Ccf!7m2wuwRWSoGOJ05&4+_+`}7S^N~@{HPL)^T zOg<&Kh;U9C9j|mOoBU4zbv?YZ&OV89DQL3B;4}2ww?agc`<+aM)`QuL0#+QAB=q8t z70_>Iko;;+q3FvI&ixWL#;^yP9`?fU)SD+C=8HJp$+Z+^+ZiGj+Gjs@k^oe40S>9)oS}wm(th*D?hu~Xaq|| z{dYXa+ZV7}#0$3A@Q5{v(R#PFslh52vnXB~@5I5~h4xTsQ`46-1Jl<~kLbg%f|Z;A-#Im1DQM}pOlD| z3fU%C(faJmAuFad>D$o7X~*D^3=4DU`l&Ro2*YDf=k2SR}Q?OPARs%&mjA%kG@az%19>?E-t{+TR*gmvD| z7<{e>cayR$P>tQp;@We4KKlMu?#F?bd?70$?@Vf<6z;`_{En5Fh5ni0fwsU)6CYAu zixl%LbzY#~*ns|+77J!3a1_V}=qt7P<@t4*FRYPk(d;Pc_HRNX)7rM+-+26xZ~t)t zHt$1McF+gP)p5mdl8|6ca}+Rsq6VpLh1P?{Ys4qs-^II4L>}W^C&&JM`?4OIJW}|V z_}0RUf*?V=q$w#8>vPAthUMenonNJ>`q$ofg^APZ_ivDm^q)lV3%^N`>O#y$dg6>Ff`zuW%NKO3qz68CH z;R|42-R9*nblGYrzc^M2(4j!qr0{*BHu{SAS5B%yh86q6!^WUBh6eao`^X?7PW$Kj% zm@aAg3m+ao#!PhB3c37m;x^;-8w<7W!%8HX2NM$o#7WNRg~59FY=!pX??LNY+W>J9 z<~$+t9}rEK!@HB#hX0*kY=;At4>r5`&e8WgIA4%h9U42cP355Rb$d@IP*Gg;JkO%l zu`J4dVd;9&U#=1**%`f>_s=BFg~$iGz0P1wY=ls$b*DfDNmIng{^4!@cSuAo6^qc* zeTVLH9u*iiN!L#NuxQ`T@ZZVz?>Av`?xPSdPU7Ex7m)l0%Pt@nJK1ZN&WcTA zKXv@ymr5Z9X6`xht&;k`6GF=XGEJMPmEx=H?Ee3MIsvx;_(MT~!emxv{ty7MQ$+Fd z3um>rIm)h#nE&s6>W~9;^{8@Gr>xJ&6#RnQ4lihUXWmzPt$h729fVG`0fyM)IB zJjD0;qvQ!yz_?RPRT%LD8GxLdAAnO=AS?1GF5;u-Y zI9QS@;blc8cgTl~cyUyMZw34le=_HT{rVKr4-|`U z3$i|?axh_+nX9X|!ck4i#I)(ox1Z~9#nAnC>$!z6AwXIH4lhpkgWiMpdS!3Q?HJwq zZdU<{PNwS;w0VxRAly5jX6B!$Z}##rrh$Ym`^$Y=Q_l6+n%Y17u1S=>a>z6J9L&MZ z!*={*mKdP!6HConOu^P=HK+M+l_jG4 zEgIt@b$R%`jG1B%a8nb)vPUD2_E$5W_W+=RxWsTsQVSuJ9ew?^F(dChcV<&>LsQqt zcHuyHp?OJa##DU7&s z_zFA;U7y3B2$yV0m;4&XA9`Qur^Ov6VAq)aD!$-1f--1G=*A{nL(m%ko19c4V5&IV z=RNB8ygpqF7{k0*pW=98TiN1egxvvg*VuKx!X41`2HI|=+yo1k^FS8)ewAmzZpCcQ zgt&V<<6YMypQj^eFlXi8sbUejeTllUvJ@{@o8;UokI%qL|MVPCR!SSRu^G$Jw^p|Vapxbxt7aRWy+A+?h z&e&>~=8Js|4eo&5wrh&B(C;C!!tidE<2|*b86ou*40~u_-ZVy0UGh}PMrmjpkVaL{ zx+#p)2JfEVf3III0R4dQ2Hocu3iR@y^PLuBoZo;zOexE`m7RbPUb{n*6C$+VkhZ=vMba`H-P^+46JVYO8^;L=wB!b$|y^7>B6 zZ18BVLNMNDkWUQs-g6QA)awy$TgFMJ>=fliib;!+XH@CrkokbDD6?RvN7BAS@wh}mFvD|Ix##I!L2p;>*ME#us49I@OWp| z;&0OwyjLRU{j?S8H+1XgTuaMi%Hm8R2B12@FpMzBi~*QQRk&tDk?SaLvR~luyIC0y z?uay%9QvYw#Qat(H)9ssdKqM$>m9Wt}f2R*&s6ZdzXc8Et4ewg*QFXg3a7@ zvpydfyylUq<|=mgogov<0T<-c}q8V zFNnuSGy*2vBy^$)x%3nP?6ZIjqZGwT2vVx~T%&$%6%SF7Mx8vVp`qdUDLYC&ilqW* z5e=gs^ffDW>|HeQaEu};VzCacA815=EYy35nL~UyaL>dmMR`|?2DZj`6>HvlKG9j# z<0a47zNidGUl(7iRg0ywBzzdr`@~~XC1KSRaJ0&mVNyvlQ+3Y;Wx%fFtkD5>{!7;p zZM8{_1(=DfporjLuiw;HL%A$zPinPg{dYQFI=PxYd~CSK{k-^L79aV{2mSI=TGoDdmh4UcV52?-EQKej8foz}r~H8!yw@Cp4n96&q`6YW{~2u4B9Z z+v&h>m3?q7bi0C)2V_fWrRbMyR~J*(%L5}M4SOTdH-KkZ+c;+Y&pQGj>pdkeZn@ZxHgRslSDXN>zOk3*`uz;?2U9h+7T@1UJkNGt&B%qOqRfDDf%fEM$>!;j zJ(*?$9ZVdbVU_Yj=`B%ujH)8eBu6jqkN~RMz^V!(57)N#QLA8|%>Gxl;d(b~`%~4Q zK&s&bs(zY$1*p2-`{2i~0D|I~#7V8v@;)F5i@c>_;6LIr--@iAPBU>;kApqLMf8Fr zM1Zn4b?4y0H;h_~dV2`8%%pKs-v_5fYl~l^EvHc~XbJhVS)Ls11^NOhioPxl=%11X zg~RXhHlKMvq7?N$n($yipQG6({M5F~I_IU{(EZkjS*_&n4ii|E94_JQM-m&##@_?H z&>eZ#+r-iR&xcSyC##De#8(C0c2v#=riE-0pKmHO-2k+dW-&LLU9ZOXBh)Z^%lu6| zWe=n9>~a#vuor*Z15o)Mumo(L=EX~&bSHBWA=C%UN98Z4>o&E}QvY!Qb}WtSO3OPE zlw1#MT)Jtb{pXp&$o=i8kdCo|tN-y?o|v4yv=yBaIs7Ib%pu4Bx2^3|%{$m$aKemZ z-OMtO8dE@X@xur9PHAJRSd-bX#wHQ=x;dzM@+i0wL$i6Dpvt_vcdg>BU>{` zk95vO;OXV>U*(Rv5o&`6%fNJyh90`(d?tNSup$29rxx<7EI>ZXK+65J)#a)14mU(- zrIVBmKc7O{{XNi17}b3UUOFH#lblNCA02`)SeCEYwUfXWGWPJDoWfT zZi#e3PTloJQwg=<5l~&?NUs9ZQX;^Aqfb=>l+S}@c<8ry5*l}WHTl~EFER3%d`E4c6xz)hj&mg{(xkjTz&T;DfW8Pl41w*?R zi(}7x7dP*lyHv>Gh`=$aNOIK5D{LLD(&}vS&0!1ZRq<&q4@GBsnr*2v{h7q@GuHF;(mATkZU?CZ#KUAkfAQ~Wm9o8bWlGq6+^T>#)!>jC*yW+$us z?4vRc>0uq!GG*+8Xiq0+*%EMel5Yxz$>+7t@|yIEFAg(GDJz zrV^~=ZGC$q2{xj=C|gMadHP%x#n(fMFdQRhA>s;^`r~cR?3qByF(_=3U(z z4)_HlZb%~`fJ=__0*TB6CzPZ(v!g_-*Dgs;V5+EhfYC=TD$5_-qe3qIFy%fIi(JiH zphD_m*d|>kS^m$g30OGrUyx2p)*I5wq?!?rvmd|LU0?xK?9(7V5%ol|+wI|jzw2#g#oaA@KyW(szsNU zYPQf*uOP1V{LwaP+izCpZJ$L(2tTuZtdw5rWLm-I{dU4D(NTJC+{mGcX}P!q@3GTYMB`;e@Co-#K|DdKGp-J5-=ZBy_-~|6W4P_@A`>X0?Ko|o z9pq{TgzR`E6gEJK!CPn7gplgyV7iUM;0zHy9r$}~1=WrIpb#IyB&U7ng&<983bd)$ zLg*rdhAWS=lr0cekzm03dWBfRpElw~1dYDOKo(fOcWRhI$VA|5hE!6;q?{qh$X3c= zqeHG7KUUE1)vPTgMlEg&av{(?I@MMWf>wny)rw~`eZwIAH(DhF84)ErWp%T+wx&K& z)?q6wK9DN;Kxh|Fv#Y#C7kO)|`?Cn}K2fs&*6t+#F3{+E=c0XsK!JplfnEQtL7e#- zfRThuTn)bJD1XrIf+K`xEJ}9i*;}J!5^8Bp^e0Y+5wmsC_9g~4|HYnK5fFBJ9h>Ve z3bqQpU2nRwIlsM-tfCrm&JaQbRgel>yKRzo-j2Z;6Pz~Btgb`IwLKA8{=yX8M5?x6 zaPX8+3=%)|6!;xl-}J(0%`baa@<`R=!3L{7r)(szl50aQKa(1Yf5T(ol(qg?MiWJQ zJwHjKYMK=v5&A};18TPo)sc}Ed^81D*B-S(A`lA~e0bM4)-n-crw5IH&=lFCUW$?m z_gz1x85gnSew{lIZud1^O+Hu>!t*)W;0U-pqR|c&HCW~*3gO)NJW=<<>GP0}m#LMJ zc-2n$!LATziHLRIjHPi(toig1=|dqH4DQn2s=ON>d5Z`UOt8MAt;T2>Pmp^(zGZEN zk2wvQg%N;P9XALnuOqrGO`$3C_v#~mvA+jxWm5%qYZX9J$UImDcuLcTP8NE;(UjKQ`Cx7H~b2Fgm_Ue$cXc&A`XRwWKg%6 z!tl4cIz!LNv?ei)C4Nl39~lAuCFa^#1;M^#Uin`XjA#dY54*M{ve(ps$!&8FQpj*f zb5YMn)=%K9ND;5hM7-NRRA(bHEp<0AW3CmRAK#A3mFbuPn2Y^7TY$-@zYtbS5gK6} zo8bhkZz^biy!NTVC;SX#titxZ_Tt2*WB0~5Yi1^XK;wK(VdjXDDmg^6)z*SPY4)a| zK^mN9RbmeTN%lxbG%ZXp5W5Yl4c~QJy}y9EVe~?#JL8Dhk!$#L?$4F=&4x|iq2?uf z9+U-&W*LAV>h#YHhyodgdRB&W=*1n6P43M(NW6=^P#4NeKN*u#$-Zzb%2R1`qQP;4n{c|Br-VoqFNi5M8$=qNOdWD7 zgh$7cJ;J$=#R{T%le@fyiO&Nr7)-#!XI;Yblra8~Dh!K=uS1;TCAA7Io)AMqU$Wf$ zi7QZPvBS;%8{1b@>3UTIzoZ^YmJi-^-n3zAt!x2xJw#ZcLm}dk#0I?HseXkTIUlls zJjNb&6}`FWGNI0vO}NOcCovrO8tX2!&mk%yL8+wbX5NDm-Xo+|D5_(iC=@l{CFMqZ zw9qHE?Vw|0F+Nr!Vz-EJRQ9q)tuvHP2I3oC{h^ZuPVK8WOvr&49EI6ZukFVmY zF_RG;UBP!TA0^wP#W}opo?Of;L4iZ@eOFXV~28e0lpbfDi-g&#)r?_eTHY6;hP2 zf@l_##o`OppdX>r{+!(Q&1;q)!~In&|M=6~A|)UAezS111jRN$y^&l7ivZ{V|U%0V9e{7<($uYr}z1DAV=Zx zRUgi4)d1jgoFg#8`77F7C&3n{a1fOdbo;JtLujw~pDZSy1Pdg%(KQ9rtEWwMo1dP+ zabZ*~X?P0i#?()5)8jPR%$z^ncY3ne;qRqMRIO{1?OL6*I z*#92P{}e-5J^4Hl<#aptLibIVZIy+O@xZ+MBZ%#FE%m|DCSZH1*@rvTBicz;p*_=2 z(8PnyknFf|@x=-Uew{Xp2vKS`@tGq@-CTUrjb`@?kUz`JT%8H+%8VgLDB? z0X&iJ%`!_~SqQ9tW9GAG6KETJKrX1{ZiUS^*2w)wo?8-!*qAE}i}>0-_}_9`#?W-j z4r$K>z|q(L5oUiK{uqe`qZwdr$VP!>Pbl zL8)@Dod<`58M`d+ztP0Z7!D>R*wa`Zj^@8FQ`1y-xZnpkO!@W1Pnr z(>Ayk(ZXuqYs!_f?Y9^kpoX>uI0zhDr8PY8|ExY32YEV>7y(nJ_$o+0iSQB;J-0=zSXcK6GGi3Y+hhhwDpp9l?jTUs1w4YVGk`(A+de;BYD z-hh%@9@Y?ig^5}K;>UKh6NH!mY#~VkZHvLv%Sf*51bQcw0(d@x#(~WKr)_u7&i1;P zy`N=Z;}U%Rn#i^S7@%YgfqLb!k54kT)OW}zRKr*qjQ@rm z@98s}lSbIMWs$vjIKBEZ@aml^W>z85in&0X*D1Px^WCH3IEEKP3OX3FJ--epLRBEY zb`L1^?6R^m6kjkjgZmF4p;!X>Vo*EgL58^D(-|qnTuidz>6mUkklo%s{Sp<_F?FVN z2yS&@TkJftMwX3D3FFuQP)mx4Wp4)O%%^H&X=gD`7+{cRh}wHO-(!c!>jD4jk@tx& zdDSs1DalAgD-E#CLyrn*F`wE|vGqIfY59cpF|u{Pu>9}Tq@qOvRo=RI0n`9Y?18#) z6nGCR-_K?)-Q8`)#b2beJp;O8DnGee4+;4d*odBWbqxcG4VkWXi5bE)z)Kr?Odzf` zmbP62Hn`T~PkhUL6qr^%Cg6jSZpo8nT8-5i6LBthJ$Ti_M1{6+{T4jdE9_%Y6*=hX7;`Dr*fM{>7c|KKA3u2 z&ug)SM+);u6{m*bPo7dt?OkXb}8&g|NoIB)g>ARWeTi)Vk*lBWLJT)xI)Z7hOb zrS=FDnRo`2{0fi4IfIWaS3V1RZn|GJWsh_-p8q7H|?hNu9okB9za@m(KrI6HRJs4MYgwGJ6p9s1dJ88 zFVJXdxtFO78262oBab#k821T->`b;&2L9$Eh=I>o=sB`Fw-!cBKf z<6&$Kr#Hr0)5w~7u@IaTHt)MXE)P)gKb^Y{6!KA_?WA$^krKw^&;#3@u!vXpf6$|= zL-Za(g#~s;A}mv3Rg!yjK|9?BR{Y8T?A!9)$}kiG{@kOtV@40b_>2Dd5d#~ z96!7nY0YilxRLMpiq26*`gSv|bxhqN?J0*CS@pL$3lO|2mpvR(;_ZsLS^YQ`eBKUB z>bE>loK=3XN=>oa{!m!keEOu?fXh%&EuLv6uW*|XW4^CqgQ`cgI4^1+&2D@@{}Sc? z>w&%BK2LZpqlf+|F4sc}v3JNemnEjL+Ix9dJ*Q7IZ_mz2Rv@?Pr!n0 zHUJ#7=kb(%xl+8;YWjPO)f{|WP7N8m8Z*W`5T}-voK!`YGA0$|o^(_N_R{YLa)xTj zwok^ZxN+WMBefJ6SaWqor=D+4ymK3G3Y*n?8ICIrpQIDJhsy+F@#Yz@*8g5xu5&@0 z=YEuB&uSistDS5%{QCH$zh&d|W>vIqsln|pc6nd^y5vBmP=Pocysvs#Wm@G_<*)Z% zcb#PHyQ8TP5DT@Upb(A@-th^<{VXL?$Fcczr?-&nWdJ`hK!mnw17g~jxwT!>^1=3R z0d}Tv%46VTUlu4y#i;Oq7r;ajB*c2)_ayKeS(O$qu#8}`vQaollMqR3U6EUQ4rI5r zsBYT50b3-L8M~8tDPfOR?!`LEUI(RSVHzlbvKbM>(LrM-|6LH&wbFvp;?S2?7cXWjvFFaju1K@D5@+*yNlf-xF7fU4fYa}4V@=%t7^`AEzzakvhyVJ+5NHhPgEf48`0CQ_`;-Z&LBpe?7P#X4;(16OBttgz$X6eYXXbw@Acwdp!RTrYV|1* z;})q0HPwyQ*$Y^Es%J3184!@EU1;|Qj-eGGi-eP<0X>3KdIBWERxqxeN1C#K4Y(L} z=*So3bE=jwoBRH#zM0|4v$JlM2y5NR}QCAEq*L=0%zS*Ilq@}>95Ga!3GVR z`nHAvn%;l9$Q-7|+D{%C=s~^$&&_A~ZjA8_C()?PNbDV8vca2-k^ymQ6H|m!JB>MKB952`xNH8;K37*WS4TJIiz zNc}Vp{2!c}*?iJFjnxdUUs=F%p_2k*#IZ5AIazIqu~va+-TVIW2Zv0IZF(M~l=%TL zAU;PYW(V(zV|0`-lBIxzSbL{>SdK}^>FN>vaoIP;$1=%zE;Yn& z>9IRmmMUKqKtCT_DbCgbhjbquR9C+85l5SokASqx21;4Vkrx=hRV==^$D%P0pQvZ9o_X8&bCbqmu?I>o~=CF#Zyi%HZPz{urWfG9Kokd~`{`myIZ zs^=5d6N6x$S`dmwI6_R(#K1bI72prlR7`G~JmL<-btIi7WzO&5!G}Af3#wlC(*&M8 z10jzjjAMo(B@V9K5c_ha=at?)4L|m#^2Yrr8Q{{xM9m#W*Z=brDqPY(c7(X2b<0hk zB>YSTk;{8I?Zf%R0|{NFhSkqDCn|ApL4!r&3SRd*-Y$v6G_0g0yq;Xnz;pk4Cr73T zQ0L1s0co8oABGeHQE!bGWOSlm3G(=Wu2GfgK4y5F#SB?vi<89p(0DkkfwK)d+sX^D zg2wnauDx=J*d54{)&SyVl^B6uxhX$bfzeT%pkHfgW%mLjTgDE+HJh{p&3C7`p&ZaW zVJz&!O}?HQnQS)(Nl~%<^^iSB6Q=&ZM;!c*`1tDyQa{1$;{3cfbK$3 z&~RuaG%T}v)_wem8;1(*$S7g(@W{^be_X&F@iPkUbZVyoQHKdFKsFm`G@WfyA$F1L1g~I%Qrq#>AmpqSbcqv+Q6msp;E$uQo+y zr=_q6pqoG^c0p6_wLw8=Qn|~+M5vd<; zKIKFLX?Xt2Q}<5dPiLoZ@8F%hl(ba}R{(@B-Ai12`}PU|Emp1O``-2Wy_WmmlXBds zT-u9btE^`1?5~WDiaExNw?>A;3{i(PwzU?IYzx$-?=~qf0l&j(2i=%0EG2x6abav6A^g zGp7j-;OGcNe6#^3=OdCLOU8aO-v9ybrTt1^M?s?^%E*+Dfrh>U*obNhmx$F|V}qG) zLD~UMy3>UV@s+iFsa3ARGwH3GWZ6HA5WB~n80!a%jJG)Ogbm$?gb_6d<8N#fMl)}0 z*$7XASJt}~KNDaQD3pRv65LKI-Nz(mt~odz$&4pyJ%V7`eaHxx6I`2#)Mw!2fVj}P zgB|ZURe^vklWl~t)@jC$usi7I?`nCBb@8l|P7I2&XLs(rX+7a~eIk0eV%=*wQSEvr zzQJ%krFukn&<`?7!e8jtNgoj2ow5IDtx)MnC>;58ZTdQmvN;)DfFa5-Q5)@(eVn&wkp^wHilo62Cm-Z zJHS4Z6ehnmb4UG0r_|%#xzpnZQs)bIC#}9kebEsp$b4brlRNT3Tl2kssley)nHj}z zPH8WidC=CNTFqjGpsIdt`Sst-i6}toIoXN1!4P^(Q1T8~l9>SmQfpr^RxW36t+_yv z7wsexUxA}Lq@OYvi7M zL1(O=xXd_NAwG+@Z_WU(n>hclav8oSyDYIo&)1M3;}qmgS1`Sd8(Zm}Xz3qBnCv6n zAmdTTtywQFCe&UbofVd*5^E%vmM*k&^0M06<~;7}F0c2X*?Z0sFwxCAa{!Xd7i;KD zjDG)PVroWlBeh8V6e<>0pDaanGVeE-*{^#2lkA^6`)DFT$S;x% z+0}Ir%Qtdw2PYB`buierO+VXipO2?MOE7c(Xf>eXqm=%6yP-GxZTmLyd*41};b3Eg zud_fGf6s0tT~C`28ecl3;4WN%?)9hSj4+qrT+3i(gPssO*mlb6YGi4$wmXE^VuViq zjRIwD*za+svFQ1R0o8c1=vd+P&{x>tP_^IeTk(`cxEnY*%!k=T`Pufpi%j3v9rEGs*TXn|sJ8d_y zoDKRJ>UCgtH%yLzM>Is}{hEzVRhreH_mpk1p_r#`31QbP^Hb&i~f$XTh>D zo~1mCMQS~8MEE?~8gKq#Td3Qu)aj_NoW$;o;@7ycUL;+Pu^{F@%3hr=p*bUE@`*QC z9GYnhQ(~Q8_=&%oPJyH`OVbNM+)x4jn*}}ccNBt`GujBmO9-U`(kUHcf*r%@idy(; z-sE|H5A7#{1goKnheLO-#8NJ18pxX4+_N5Vt+y7&-@?68PuJ2}Y;g3v96iSP0ql-F>Va5gJIfLiAU(noCLUho^~?!$5`D)k$tZ_cYWO+F`% zysNcPTo+-MKjfs6WQXG2Uh7qAbd_?knvzoeVo~Odpzs#fq=>r|q8}&D>-~+2{%KFm-4&>p+!!HWVcZ2kW7q!KJU`4Czr7S%w z|9x_uDEET1iEDE4%xGc%BQ)Ni9z*9=?oFc$Iu;+>T!6M*@2(W)Q3goVCdo$kY(Vgqo+D|G=Dr^S%yMRdMFFrWK(vNh!;# z=_LmG6D=R|z+VH2Od~i8r=Pv4l+%5OhHa-&mARo{+V$%=Hw)^2%#=0jV)*GT^>|gX z7?MbUOIaqc3D10;hIo-ANAc()Ayb4|IDqqx{-cM)2eS6K2e*~)P-Fs$qR)SDR4(li z4f-0%f!Hs?K67?yVE!#8QG5g4hfm&4p+{d7WjIyk5aF=xS0nq$wo7j#+#_zWxyOAX zNPkxShuhkj1mCl6JG?BT^a;B>szi96S+s^sZEc7z*qfq*bk(m^7BXeLNoLZ}p;om# zd-M)c$cl4qXUuS0cEzdwVpk)o@Km4K`|X22xmJu(>@Oeal5^j~(@BMfYizDD;8K`C z2i$X?qNS4{?8GHb@)cDJ=E;G~(av~k_hGc!j-Cj^6vG6molpejmr1-~LJ>F=9^9xN z9D8%YRzs^tovXc}_(bAqLTW(a;DQbf z49d3mV0jG8{W(nIr44IDPYp0LGhCp)(M<52x0gJ@a9KzpJQPoAeY)m|5VI07`wN%H z)BY6!0nHw4%GbYH_b{<+XKIMq@JeX?D=s=qXi=$PsV7QGiP7*6dIS1cHDF?%ZhqK( zutaR+tlH{n(66F{_*=mTCvx8yaB0@Uv|1;Qefn|t= z)MZPzvK?~g4a6N1@j~}}0#<=y*?ZRB5f-Sz{K++qw2ql#5V*)T`JHlGafS3^ zt^*DQK3_YD{^TksqOF#Lhy*Hz@keMb5wQ2#gG(`x3@Py&0as^p4|YRBiQ;=$=7 zcx{t*6KG*w@BdMkP!OT6HglDx*Y!ydV4b+N=486Uz@YQX;GiiZIU*-weWJ3UA}lXI zPWN>^)wAv4md z9D@w27DW5rfvG+*+9v$YQc9|A8S{w%B|0NwN8)a;S?ZpA9C<5PW9|<-pD+GEaJwjC z=HR^RM{w>te73nSEU~HlBF|$yoc=*ZU`(VSg~TiauX)+O!z-m8b$%g$bLPJZa(iLI z-758bpwi`Id}uZau`&$})Y!u$uTm^pBkbXh&vlLqkd`TsY~~KgkiFxkS>^OAVW27t56Np0}TE z?e%n7T@#ZRcY@zd!15|;Ecua48bh+7;?2@0rQcsKJ92AM3!3AMAnLwZn!enyK9hJv^ZwvTNv23nJ2z*nSJ+var~#3= zsEApCZO2)2Nj=?>7yGw9Z}`V2OyC-OF!VQC`;n4F3|nGU=fsb2tB>wV7Q-9$TXh@L z6L?j?^pN#odM?X@nilghR)nfOVbCk*?6tjwiaW}$ZqL%0Q}+EPG*-H_*PLj$pL5{! z9~VIA#=uFW)|TIs@X9al+LU?CZOz?fZMTQ;U0R=OOj8?|8<&Czl@ay0rwrzg~Yo3F- zz!(RR)wFC6!mt(UOGJ{q(e0U7G@E-pYP18}3L&nMWM|bb%KO9u4l%7kB;*Jl3GaOW zulV+GIoLDpu0wiuQ6cVAVxqd!B!QKuNC1SnRj#DaMM`f#cl+8idBRy@$7t+{>~Ec+ zDoW{vh$VYeq+rwE-aLI^@x}Zs;A0+*(L_FYns21Iy)QJ`ZGmY>*U%MoqYPJ z*8HY{n}Jdg0ik#wq6a<0KY#DiR&7dTmMP>CFXm?YV8I+CS#M+eE#%F$|+Bu8}2IGcmL19O66++h`!LogL z=h)ub)Vs>kWLwyAEp3OkBPH4f6n_t-!T=c@DEBwuW3b67{D5;S5iW2+5Er0Ll@j{_ z+>dp;&d`PAhX^{^H&7X(H>R7X8~+^!k^-Sx@gWD@DYTC1EC=2gj5zomcTd_ZK;d%` z{rUl&S^Iy{D^g7K>Y=f%IuSWGI~kniw8n^qGs8sP*7iSDl`s%gqdY%Hn{n%g4#r z^VY^e3zXh#I5z~=7K)6V@?xWw&@L>|y>ow-Yxhe~16g+v)4rzJnJ30gKJIbc( zos|TvYuvo3(Y?cOHA3&WJ}Q-y()}e#^V738I_uy^rb3?v1KRc5Lty5q_YG+Qo&Filwz{apttAmacb#z|rC0 zxSu-GZ;zK)Y0Bgc?fqz9*r|;EP*ch5yK%rvvt_2O-X&h$%kSY$d?97=Kyp{`qkktI zI`d((_wDKKHW~@Z|A(!!4y&U3*FMcgYSWwEgmi2`x*MdCl-xAZDcvb8NJxsL(jg#7 zhcu#6(kUIA&Ubj8^E>CQzql^0VP>&r)|xfnb>E+xTfHNmWV$l7wtIabseblU1!wDO zy3!~k-^#rgj<+0j=40)>^0t_}^?A)fcr;%FzQbq4_--oC%H*nYAmY6OtnH8f57XLn z7A~s0xVCTv%>`4926l-V)!}?k=Yr^cTeC`t`y@sXAPgkGLAS4NTPNYJfa6tiD4DDl zR|9oHW%&@do=dFHX0_ne0Lk|nE-ffEvnyX_!<_Y6Dk8Ce7=Jw&EH*ce=qcuLTyNI2 z1$ODiM9nm~bS&Yv+8dJ@Wpc(sTu2ONTttX$U~@rHe$+eZlfHKdgcQg$o}X~1!y}t> zYXN5uLw+DO`Y_^+99u?4pLR0)Uruo~eH3kKlb~~M`5;(*+zd@pNj{QMm??RQ7g*_1D30=s7-l2I_oFTe?^4v zJD`&N#Lq~4OLB=htfg~@!^&Z{6H)^`^of$_M0W`&M?;~RZ=TKdVrHu{V*YrG*bUmP zYV2XQG8Grvv8UOU&_qr7|-=YcO_BD9IN)BBSagNe{g=eU(5 zREm}W1V&VnGxde z?<~05?e8`2^wGGE`U6S{e|h6u{?w{Q??s*mF$sg)`v+C(v|Nl0gR{{iIeY zmYeIOlG(PC6V%a8A*?uAGux5gCq$&-|DC84j6qYlk?V2ukvLdwJvt|jvhyZP*a0@J=-M96X~7`;}-49G^~8F zl`t-e&{ctNVlConi0=U?Q9`b6!o$NT-OPrrJyHgQcxx0iuO$Y%HzWV38|Jhip}!V7 zFH#Fi!xCI|v&kGq#a1zZbfe=QE5%{!UM9Yp$XBs7R`k#qOaHOW)w;(^M}rNC5I(nXkj@z)*&e#p=3$2d|hJvJ=%pYvr3a9|Ne_b34 zr6VcW6&eU*YEQ`Q#3wty_bJ^q zl^{D2pJO8Sxn%cK3xkQO=G7;3`2F}JyaI~{aiF3ffLdr}7X^-ZZqPoG6%@s0Zw{R~ zvZS#Uf=jB`tH!7p-vLd>4u4i)YP5X+XEOotJ8$ z&t){9$%z$>G8#~fhlKtnY<q-bSloi6s$FMdHlv>z+!!*nEG&%v zggb!umGeuS$^`f2{G3NJ*WK@anDz9#RD~c%ukH+?XK?cLP3vl_ZjX6d8BcN`98EoSMa zliC7zCykD`_Y7{w)8CGhD;}1~XWK4sR1p{4!nf07IV`)9L5n5QBH0hO$}EaHbJj+l zX}D(Nyc_r3>2%zZ@kCs~IB`9t%-g}W-4T6=R-Wle%v$LlkIj+TY>t;!^GmBe5evaFVU zG(n8(9LbJ&S!r2&S;U6WBgOY>;q?19DW-K+u@g#;RIxo#Z%1k2+fY@ROEbVWEo9hZ z6P#=ZPtb^JjxTUetUNH+!W-=Hi|n{M%>AzO{6G_%&TS`Fcu%Inj)`}*nWkwA zuW2V=U-RWNS;Rw4EItlHu*Tf(?+2QW>W2vj!ibZPyC62EvJ4oTj2=(z>kO?I0lh-P zL{d2734A{1?};_h7}ePP#yE`(0+1MyoJ6DyUWOJ}<=FK9Ycxz=37BdSy0_@Tqf36GU*99gUD0lG?`^|1^ zX!JSP?ma>VlZ!!vBoQpmBN=wAq0}Q=0IT|~Jj;O+yE2iDRE1HKaskT?x!Hhu)l=FI ztn7zv+pclRx}AAg7MI!-Mn4hYoXv!nezQCyyJil|^3C*G;<1la$%pUTnwWch zS4awgn4Jnf%xQ?$>wQ4)djH(y)-vieEGkG!r0bXxhb*Af_|$MmPbW~gB9H~sp!>8_ zQ-7+j%B1zV!A%W@%tW_=g+`~3jZ1j$9l>^}B!mGu9NrCi6jUsGODoJ;a3{!;4*VxYuL*?q^Fzkh#-c&(ge^1$4v5Wv9i@}M+ySDjrE7?|U@ zkXf$wvTEU2+w!|=xwv!6_f56B^J4${H4&ATF9S2Mh|hkDK3h3=9DlB1GpRwUZyl!mcT^{7nhB*xDb27cBseWJh=x>9w4fG{tuDT*?X-$GR`P?r zf#dy{ZUqQevG8;Hk_65dXM|;3oR?l;Vm75i7R4k>ZHD7AZvh3L?Tnbc|P;6cQS9tRTP^l{x0NlrQfx= zf&&FyrFyDVqeR)bAmw`VoDu^w?hGcGmNklH-z7%pvc^&CniP%N40G+ouz50XBhZaw zDq!eH_&qM58^;IWUzjXYSg;rw2yJ0VBcrYXE9nN~p!ssLjiSITxtR(+@({`hO874T zTL-OK@YxuYS$Wle8{UgU-{+*1Yxaw;;(@1>=O9ydIlX@6qgfp+S#gF+@xgKp@=jyo z8-PRXE>wOGyUNsuM2|$m7XCIlUe1_Djj|VQU4I+S@P(ASN%n+Tp(;ZJo{UV7P=d{r z<_)38mO)k?+b}B{lo$Qayf-#2vcr7;MNlN9B< z5eYfqjX;8fe(?!C{EVh@zOR`v%s&ye4MV+S_|$OP`;`dN=&Ec>^?AqPnMicK`B#p91MB^bml=!#A9;>-(SDLw* zbT(1C*K{#vKe)&|4i?FP8yhdaSybbj=$ZQ=0KV39#e9OZOACW$CY6y{MoCCt`PSt> zIlwkqI@-cqOZH9vRTSKM;kFd=1)Yg2cz%x3p+v71Hi;0Hco3~r37vR+n zsrsnun00b@^j9EPzrWKX*~wUXW-J!7pRhCnyt><;TE9PZ$!)5*{Ry7mMU$Bm$)vSa z>~=D(f0+E_#BI;N0~!31VrScG^D?=HTe!H+e5zf5>F1A&*86YF8`U%<6^7orM!-i7 zzusIHog5h4W&9z(j`Zj^Mj|iIz(*cgBk9x{g-0yfDk! znP@0jHo2dffDUSyPqiRRJ`>(;9Q_3=P6cB7_YW)+JX6~dGK;;?7l4FF5hfJ;i#;R+ z?3#9sQK*}_JO4*?>&#g1Cs{GVO93D zegMQyi6gm#`VoubZA*f1WFyj^d098eEuLe(pw>Vm%t;}za7Zun00`*7U1R20Xn+kA zx`wNuY-OoIcRqmSqhz5R7+jK4A5vEj=a059v_KvrVoUp0sjIhHI=Q*7sz+`0$rD#A zt}@2QxSEV0*C^=ZtQ~laTtIooGO}xld}t6O0yzXuA6eslFQ(O35Dx4MxDEi4rDRN7 z40m&L>QRjvQ&*{k`VfLE9;6k!`q)Uy1llO+9u#ERu&#JYuPyRt&fy7+H1jh7BCI&g zm^}!*(sb!KaXV-lGa$rt>;>c!5srrjnf*P;{V^&rby|V~`jDUHoMhuy(c&i5#~*`_ z;;pIll(>>92&c|)b?k2q`0NQ2VUKKK)yMV1={Qk50V#%1>-P3Gmab66!;hp}56sC| z-8_;Jqi$6XPt)LfP*N6wi4hVx$0+yhQ(#qPE&9wX}Z9aR#7DMrMDN7?<=w+ea}-;#b? z&TKD2B=FD!EA423iP52q39aFLz=<8pI@MhjA*$}sl<1a0I`PE7c;QFH!*ssWkO7H5}FTkx4d8Fo-aS2I6uA=rYX zkf<9h^G=Qb2CusoAJr0v0zZ*wu(?MrLA?L@FEN0!+WS7kIMYXCn(6)?*&~*BV)`gL zSVefGz`qL}-svGCk4#i6{ zec}etl$J4BUM1t|*kjmN%GG-Z)Yf;l)!#S!mXxy=L}wT$*{18p)#8F{G=cW5p<*TLpPgim=?5C?i#-m0mz#z)q0x`Pri`z7t87aCipa!E#dm?eKf7T4mZmU#oQrnSO+Tljhp7TKku*}kp=My*5sFVxT!`rKzp+o z42cYxS!oX-Ljy!OWt0e(FuBZGfOoKo`xgt63clRqOs*gzD|u}&%YuLPQ8~m0w_9)W zgL(xyjKu&Ix;dD^gLN0Hr(uvyP*R3_7x`F*8fNDlsb!gmav+134GIQP-yt(`Fa--q zs49Vnz`4XZc+#bzQjgNeNO`m>{y3Cl9#0&BAS6}UODw_LJB&npY%uPg0K4jtFRrZ5 zkhCnN`M$7Hvl0+7)yp0Y58o6N7PD+a+m&xIM1nZtxRbJtZD`?P;L0(?u?g{fs7DU{ zBY&s)IzC1Bi#{N;j1*cPEEqmlnh0AD`${iQA56wcFmE!moQjUm1r4+STy7|INQ#-m zG|OUd1-x+jhYv-!f6$govI*ULKH8cZu&IO?<47g^vJ@=#Ce2gA zW)xq_3TJZnY?6>k>|vE&0UzhJrZC!-zxCg{&g8X6glhAAZ$HfYcBQm%JK`k@O2UG& z^uzir5uJtVCPz2hrTJ*FwB8-ZV&gPk7O4@UY_7{gHnxDYNpFE{pX;QHwx>Uo*@Gv$ zj%D7EAuGHqG=b7BXOLZ@!6oRL{Hm6w9!vIj@`XVZ?@vKi!IjL}U-p)gaqTLBlWC zq@#f!Ggg6HMlwJjgn~Ah zb+R6`5^Tm}n6Q8n3D)~lr0{&a#u)ASbDJ(~ntUyhhVSZ&qrPJ?$(G{f_DiY&2il+-jL@=HPGp znc77690+<+#pqHHdf|zX?hr*Bw~YwJMgY%{dn>ei1SnNScG;m-2o+L;*f;1yG|zx(zK?Q1$0~;R0{U)4Mz3qftvnax;H<1;oXF!DQ z1sI7;cPXPnSIK!>zG7&xF92jW^0W(U8$kH)x4zCuLdFas*ZsX$v2^b*s%f+JQG=1fp%^9y0|*5N|XA<1!{!N@_tSdDf5P z!4fL3qz2%G6+ z=8fdT?Ey31mNHN+q8e$&9m5|f<>Dmu)j?fo_2*40WEyB2hS)NU21I$zL;@Z967KFg zl|Bl|=892wmG+Z19_nQ3!4Qyiq)&QBrp z0pi*~4#weDk=M0MMA1A_v;|3h@A-fS4X8{xQ$<^-#bsw%IbD*LWh z-f3Azd5UrKX*_P}-F_X-+p>;guWDDFJ+Xw`l)YAH#7AJQWi@8hDlJ*>d-%3oe|>#X zv@}5BfBMb`k)y)y2nxKQkAf`R?@9iRxg7T-nH z7P_ShC5XaZ0TGRN3R#))Jz%z4mG&)Udjl6e7fyo%w5g1)bftbD3XYrTEV%*)#KZPqCb>ltrTVmZX z2*zKcGax4j>TGu5kOZKiQkjgZDl=A$&d+$l->eP_@!VPB0ps-U!7o@RM#i(0F6Fkl zU-~HyKc3Kvs?udD^;@hlJSi#OD8?x(aQ9^YkaO zfhbj)Jzd>r8yMW8R3d}L!sXCZu`u}}>dY0-NPefWsIIL@-8;Tt`TiqoB{7A|)@4Qx zCfynOQH`1zKllhue@8`V9@Zd?E%-`Ex`!zN=E~6(&%ANoBzUi?ZjO^dI7aPUmhES{ zhZX7i>Mb_%Ce3vjI|)L;we92}=xW~ZKed3{g;e<{`Ha`s#PA$A&70p3kZh&#(tN1%Gdvg5}I$&nq0N1YnPWm*b1jVr{qgt(5#g9&F?`8=>UY}+GzqeVSX5F~Ca ze4pH#`KIZ3qPj4T6OAATjx*o0Tk@|G?&SO7;kCs{;Z7pi-lXE;36}| zFJt1J@~uXWW;y3tNknz^QmKFI$`mr+I>F!N{*}xBrp6h^+}5~Dtm{BN%4>Wz+2MRj8;>Ez)neT~$88WXxdYL`Zz@B`F% zDfU7$e$hOX;wlE*vgZm-eAO56P4?P~gmq!hxnCxkn)keo;v-KmQY`tl@;R%{%gGrX z2~B_G>1ind^}v3~G=hApmZ@|qe~ySkB8t%u&-gY^p73lO5#cccYBPr9Ik%l{|5Sv| z*}K@0pPy8~PZ@Up;&_13>|8bEi z$ci0rb7GkzoiBa*Hjxi@)_j!QxCSWc$smO0Yqy4chNugob3#=dW1N>b?38dNOygMx zh;qHPaSw+%jtp}M&xV5L&W^g+&Ylb3{F_ObglcQ`BcIs1_JmbWM(h*RlToC4iS|H# z0@orgfp%q{J|R~*7Nd)1e8ON|QApqk{Fo3+HUMXyJcH&i(!P5~K8qYF?MFTtrBKH@ zhGh3hqR?hArOMzVrdw36qfBPKF#?d5~m z&`9nwS-hg;YY&^w@%BlDDeVZ!W#y~yDDyp2&>{6DIvnq7{&N^nl!HNYTl0_C#^feq zL$ZcGn9Sqsw>+fPf!{^7Qk8t~EJNEqPL||$_bcjL$}NyL?V%fA%3Vic8ztM)&&U2B`f1d_^5j0kGCv*Uhf9NMC?A1(VSH~GbZW!IFJjGXB&@&_7kBVU-Y zR!4Q9S6b0wa~zMFyLBHG;e7Vyle7&fvl85T)5vp=wYe7b{G31qmCt}F{rvo{aL$&u zy7{=1N=}%C))U>k1AeG#a_bz_+L3g#fN;QPH>zl$>i@-Dk>hJ$Z*ax9I&f*81Cich zuI*+<2Sm9cI{=@(fH%B|3`>-4r_7(&?4U1|C{Duos9+_Gu_!gxvHk9;{9LCSDugWP zbz_Dx8uV>)3E9P^>)YfoXxy_Wtio}Q(v!*?&0tOGK^d*7Xui-9LHR62c!`t1Y#3t% zt_r*)+ZceS1F8-{;sS|Xk9jn^S~8Xztrov%f5E--!pgXSl-sfHO22i_&FK6DGf#U@ zUe*JL_1UCJ(H1W_=GXEX(XvW3$#Ji#ctZBFAe? zE!pC)iqddG6RSRY`a(VdBxk3!^2lM!W6bVTi^YvzyLxoo7ggi)pt3l6O=1vWNHo6! zoX1H5mH}0>3hnOsmVjLz~)pi8VFf@lH8D9A;^u?or`7#WAvY<20WxBD6|PV zf-cbr1h3pyWZI6<&$j3VxGdT1L?TJ@6CfERBzk^a)MH;10`nMgP>VE;=Q`27xo`vj zO&EDe0C{GY44`*3n$vzd5=UU~Hq!}`($sD)n2T|BuGS|td)$a-08v%*K$;KBywo_l z#APITlk+MWw??~T7d2)g&zQCNI$5Ix<69>9FJcjt_vWlp1ujeN9gr@?0f49G{Pn8o zO{?m2>Bh!4Z=~?h9)q6yk=H`qQW&J-GYI8l5aiTcd?#WW2AM5`$>Zg&SCBY?91rKH z4P);7Pi~Z`sPCno5v7lZa~gdak?G$LnFl|=1)QXPU$26j>C!2Cln((U>$(3%jmH{Y zlVu_4kW;ZEl@d`u@UJb+`UmZLUtYhz~O^)lQ8~sw9|omARF)2PL2e< z)x^&i_9%ho+N2^G7*~&5%Y)qlOv79<-9rh`Xr|{7+gPN|0xSXQ@fw&?NKwjqS2|=% zKP|(huEF!`)X%`%0JOMD>axyF+B(-rHdZk!MSWTQs+lgwV*n7=*~$Xb4hBjSImH3? zm>wrS2I|{rAYt_P*T{DX8jG5*CBDh{T-XaSK80~L7ewP`Z)*x9)X++7%eGwYA;lvX z9nVA6vRdRRWacC3svpTJ(6*~I)ScJ}=~d}GC4oyhtRR@yckYUL$JHZBz zO+79KtAwNR)(SN^kXFiXMz9>FaT0@)p)pU}((O4!D^DQY9ZN5aNSc`r#99tC4S zgnTp9UIKtLpkF{c0mDEA6i6nLer@7RUVLnIBi_-1x?H2?rtRO}s@%U%1_W-HdH+eSoHNXFm zEzXC!**#zV&N9pVXi^rO7Ay9qbVY&~nUMSE9mNlmK;Zj75%oDhXnfKe+HrAC(-56w zOva%d--m_E9a3`vWPk!mkqpF5SuEGo3x`1ZXayI!ThW;*fpqtdjcr=le87UD#5 z7TXfno4q|VN3A#SLMi+XydOB>?SM%f3+~IuMhlR-7R)t9z#^n3tm0=`2yUo<)zS#du$^_2Bh^0t{C&a zH5a-9$0$vtkjhf^Bq;LZ*&}CzQQI#c=y*DDx1Yi33h(a664!lz4D@c!>N4KW*+alI zP;if#f&N=G#s@U_82AK`9M%dq{pz%2>fC?`ajdPiMF3G?40nyu6%eq5p=g#D9HgNu zi{l`HYy_SgH!Fys^C)S~>;Z^#udwF~UU35mMgd){TZCA6y=17s zH4p+i6f8mjISWsAR_wHE51!U7CNl%>!5|X=P=bOwm0wlwQG_+v?2m6WoJ(sj()tg~ z*#`{k9TMS7EIm6|xS?kdp5FqZs;d@CHS&3Z(SLU0{4CKz?-t;9{r3~S+f5<;GzzR( zHx$X`+8e$MMC1Uf;ZLtGrGb2k#d*^S40xg+0*FK|1}NDUonH>hUVM}oBXSngSQSBa z!{kX?0s!((x*wzQX`R0Tg(Hn1pm-yIT^5$gytC_ia=bfP9Jkl;aBqMv0}0FXGl)*6 z?S3I4pqukqAeWvoO{t=;_Jj13P`TXE6rjw8?qsYT?HTH^kdu$ECKUB?%a_!W49!?6 zPQ{C*G&M%_p~kkt-D8*Q)lpf2taew_oBf|!z(P%9{bQVtSI!wu6yK!hI6`Ua zJq9iXh(cMZ(I0>j8)HN({_U`UP6>DmhojQi8{bv|xv1 zHi-aIb7XO>D8`^aAoKE2N^ryH1D7jt++x`Cz-PhI0e+?>r$|jKM7jb)$|CnZN~^$r z5J%1h;3B{E74@&+3%PH|$!(ZmEI1hBOtCd+Va@LtxvxKf+tc$a_7Rpp&kSa3=D{OR zD%YHH(pC7$ecD?5=Sje*;074B)@Le4@ukEqKRVkoYZt)Kc8dzK-2|Ukp}^jJ#O{d0 z1aWmOP`_zSpQM*8ma1KK?ptVVZhcWPv3hWT*SEkaEqfch#OU#j>a}tR8a;PQ6Ca|# zV>b7X%i>;M@u51$*QjT-4AtcY#&?01pQ35qzN$Cdk)0kzpSSkjw$3-Grah$xfl$&3 zI#sBz{Cu5+^%}xLpx`M`fLyN=HFe@w6Cv8Evw(LeI2foa%wtzpZ@jIg8$fT|W*d9>IPO zN79W(?CH*ui{FNK?>c@n5Ohh6h1N)7x(2R@^qfWg^S{ZaOwfg7M*~yj?J4jj^a_9y z%(Vk1vgylhv6QDf%xLJCT`NZUsF#N0E(;jtW>w)K9#XDIc$ay-7f;&-kMCj+e@q0f zJ!b=Y1H_J2LFi_JoTtSp80^;fMn%83yx~=XG~=q}vc@ye8m4|f`)U$dKWLh)iqF^u zBAPNTi!}ZP*tsaP03cCo4UV&uPuMoBMb({_`WWIw|BA3ot4O$dKt)30^xES^v{{pnv-i z_#tH_D>cUs9PvR)D#nLJ>Mi5{bpSAgQV5d2V4`o}x<&&7^7!O0>Fk|;903=fd!`2T6(|JGcP3tG`FOb%!TB#c%5|8oi` zDB>xKRy_yZs+NawvHt(lipMME$4Loo5z*6(A*mey&jC`u3gxkb3A&(l`Ka!k*pICP z>wydyR^htsX$=21@bRyIP=x}oE^;}v@b5u*a)@DruiU_I;qg*vz@hV^tm^+;*FLuV z5(jO@Pcpf$ITcktGip{6EJE;4_e)>`T_vRk2!;OpPCOnClMdm$>>}JxUR8TTSp84W z(9%KWZW)x?^JHET{vXH2On`9eCPf_l*azh~A6qD;i53xq*~cIfQIzyQ|Jh@!KS*`H z|NRF@kdc2*_|N5ufnFxTaR;7@xr9HzOLiMjOGVJas-0Y}o4YtPCVv!s{jOJ?9N)|C-dZ#uu>D$#Cs9h{CI*HWSie6n`zf`lN za67)W7xCZ8EN9cc4s82ozf{HPdGfc*)jrrDJ|%jLnY-sQ`&_* zdSo)WkEzhj@tMcn&QlSO6FX2#4;RJO>6~uxr&@7uAF~*A%lj^|h$|3+aFcoD1)WVA z+CQBg5g+m;%t3IS>aEP`GS*1R8T3sf>=vSZRiJ8hX zmBC*)oIDt&w3~@iZ#bE8B)OUEF^QDUJ8WIhz9;vO`uVo!Mf~dazVzCc^AdE-fzM)p zxNa*a_gm`>BgtrTq+WRA6&j6KPZr9i+Kq1aJ*Op&o_%p&5GwvBQeI1XNT?y0jK9J# z)U>4){YS96`ORkIuc9&h^^Ge!omu828o8e9J7@m=_wVA$jX$WO6*};1p~YlMxS{cX zz9GsL`YLJsl8Dsu^m0X<<}KuAE->KcO@9ZEa%ZCaGqBCnNdRcehl|9`w_zOj?K?X?`k= zsAT5DG_rR39*1RE=N7b}a(V79hnF$wpTSfD0uczx%iexaKpHn^QX>`>aQ2qXjU^%- z)y#WtT{Y4kG9Fbw{1@mzWaN5FgtoYyV{Wm6^550xOU0bhwg)sTlpeHSk537O{|pp- zP8#=^(Kj5xF6CC??{JXGGWK}?PQmlxfb+UV8&0vv=;-o z#pk21ejHZrl)KxiQ(!fAfAzmn_@v>K{YwWfH{TYNMe&P8(U-6H<4Z}>kJwx4(rTM` z-CUt19eO2ieSd0pI9Co%4=5J%NjfeyL~V|O3^ILhHgUhf!`}F5N%UKdnyKidQ)f@J ziIr$T)c3Q0Ym2C0g2fv&rf+tB2V*mCTA~#S6=^X4R?N}wq_U>hX!7o06~7L;S`{u! zsB@Xt;}80r!Ck;3>(9O?(s-$xOSY@Wt-D=uhME!-DoVovcd?&O9?+pVE<_qjP;<~H z6iUP{1aj>Y5PgrLi@F7!?M1wPY^_#tRwnUFO*e1 z<;<^7w|%$9^E>Pg=3B;rZ~JX-dPu}3)%P@3qDZFPq7($y7Sbl!2w$d4;hGqGARak# zc58c& z7bLm|3AP#Ad*43lFzsr19xU>Fy}SK1xuvXY(2|aq=g2A%yot6y|22=-Y6OUPKulbpUx&GJDq87W@It6T|H&he#oK&9{3R>!H#`5{4IeAaF=ffQL_gL&t z(mXdNec$y@(y7n;q%r7Akm!ExPFg=dY1zs5HKUzxm7QU`JJ_duo(v;MpkB^mTb6r)Sw-22lsnJJri)nr@=Uv3b~=CcIe3Olc3I1UB6-y2d570hpWs=a>3OGN=?qj z8v*3Hh7%h@MiXopcCwp}!mLi$BUUg7r26ZtxUa`J3M&7p1w?)d(}m}C2aq_@$Hwb{ zj&R*IL`7S}fzW0b%}+Yz5x+m_0!w1(vn?s$n{mT9W%uc?|2);OL%GkfdLoo;z~UqN z36ru_Vn{}*#<^AOTIOd-VdKwId?LXQXw>cPh>vYpvnY8Y(imaQ{mSL59z~bxEv;OY zFPyhIBvYa<{O3G;KR&TlN!MuUqW*eH^IW!nL{nMBlTzv<6MAxG26rBjl5Gc^==<88 zzn{+`*MiL|61xy#zAa%A=T|Sv#zOixm(G#z-#*olhYEtIX6kXr6ju)qOFci&(C@FU zsSQu>^KDAYJ-}1#I=>Gd5#}-ARZ&OAWqfeLb@)vV6mIcuagxrt4Na#&35(>+^6aJRN#zB} z#`w#+M3D=MPm^Y??#e0L$s=B$ABy@bMcn4WVZ}ZV2Ts3=&JKUo-blS#YRrGN74F}p zE3r^LqS_uJO!{1%MpS9d_EwpMO<&)B1fQJG_T#f{Uf%Sb!@>L{1#KE~7_sP~bPoD=dH0{Y5dIaQP#!lAh;z#hxmQ+MIEzG>UKP-Ou!olpV_8jjVPw9D3 zP>`cm<6@$!p*zdxbA)Y@uAc$(O6XIfgF7h;Y>TU*g@jkNeP8A;bpH;1yoo6>;NoJF zUu{i}M7BqG4CHys)~hkzrDkBF07* z6xzJCsL$xF#IDYCd)rsi`#!K;%&c?NZD@K;v;-q-H@5N&f!pZ>&2^=(zx?+V?iVWq z$#sM~a{l|&DC5(e@=S<$pI(%&5FX2JIz>h1jhCVYsxNTbuHH{#8+;GnMd7Nrca>$& zEQOgx3VbVy`wBAmXBoe^izDTH-#I%gyyLCo#{?_`37#O22xh*9J($zH(M(9~4|sug zZwXy$AlLTwR4914i19`>+tT%R%5=`hyG1LnJ*ez0v8w*d7GOOG>#F^Lqg+?+djAWV z)R^QlKD^7k;3sn;%dWBQm-h}9o3zdR;(SK0wz&Qjfj0IAKeAdv%TbZJM)Y=o<%(uJ z1i7fE&CkF(`ur~U{g-VSk92399}jtwmBuR3%X7$uZ^VFC5*;w@+g-h#ZLl3+>P~n3 z`?F{iDcU&muijNl)!em%rGv`cSK-&!LXBtCQ=ac&5k33a(~{-YEL9GzOMf>Ir44?& z1Z;Yr#NfMp-gqHt_l=%;e#GY6x7)8&e5a^dQX)M&YH-_~*4{_#PlzU&>aL%KMGE?S zR<(&{;}0tS*7}7-%tnp8(gJ>`OCxcV0j1>(ep{+k*&j4GU+!tQq<-D*)~6w5a5~fOTj@vomHOhN;WxG%k!3!c9eV3pTERkEaes=M z+NGq^(`Yb1ZY5b&|mJJTVjL&Xl^lR$5;$tqmz0f7;w^gU%&trJlz&?Vri;{Yy z$b1kbwXRD_%Y8XjOirYjd`0T*SU)|T`4s1bj74FZ%S?vg`4I?X>|)v5y)42Vj8*|% zUSI!32bgOEG&mUNpU4%G0<(duOb9bg_iYbi;&4SLl9&?faVo&VLlBF0Ax>BKVquW( zE4>TF>ncsRPsWv`+>DTs)K4{y-a@tLW=}z4rxapQ2QV##mf9>I*<2xecM1_33~I@? z{8u_0N*G@*Gb%5Mo@!)8z4ZQD&T&%1YpKFJX_Q(qi&}A`wyRU}L{g_|Mn8OI?VJ(^ zI){Z9PV27|-U8h)CCTq{X_eOprUySS(3|8(|^t<-He z@l#;>WYf+>c!8!t)^%e$jcq55ZQHgQG`5{Ijorq!Z98e>q_J%q_sc%}+sxcp`Ob%|7w*<{Z3ZAMo@Xo=j0QS|zk-P`xnUlkp5BsxXzs~TTF?IgOphiIUq3GS zZu_yY$RWjWfEh+IH8%>M!1L;!hTs3@_c4~0WTfsXGe?_O7gR&p88dBsDVn+4-@;dz zbeZ)zarI|<>6?<*H>A$DOPRq`!WoM{K#R-^HEDEZ|7Gx3 zQp8vNMYtG?_{nu2aSOYa*|BJ7b+6t2o38R^d|{QercL`o#Z+2?KqEq)523;fK_Aji z-#3{~Ed(Yk*5rf9ER>3);;PO9bj_^UnMUsN{r+(N7R*m&V_E^u`7u`xS@fVfeXrPWzQ&HWb-Wj&MDDYdv z2oSD!pX77&2W59FF^jY@Kr>Q*Z}c=&o~&`KSL3GUHSEnRdlwOxMO_YUGoFSLV7LAV zBaf&s=y=G=5%fP`Q2(GXLiqbK-}xcM`L%i~)z+FM_>k$)ekMzd}!TFp@XvUd!)n=Hz%~Y%Jrb&zJ zIDfSo!eR{lWZGj5HQ_Nz=$xA}8Z77C=G``-Bl$3`zBUcYYAP}H1{-#xgB&qa7FV|` zU-X(PLuj8!&S#&L`5CzfvY*Ni|6N|pLyH1+_+gkm+ro&l?*7byh{I1{kk;zKu+{Na z>SfDPiC#ePCY4FqDb6>U!Bb|i%IS41(r1c8B;cV`M9;03s?`I=@%ES&*oOwi6^qhr ze}L(=_pWm0ME&yS-wuz0wLrQjZVTb#pcS z8~LahAI>IG^L0`O{}LV~d*;Vf(HpMmcSJ=S?;nK(DevGG=Vkg(9;R)cE5ZFaj^!Ix z^ZWgCdA`VZ-#mj!4)bC>wb9CV;!y$x zvE-;;`BqZVL75ur=OABw)LTVTVFVM+Y2IaBAAi;>2YXGO_GB}>%v_Uq|K}|jh6kAa zUp}CMlvr$)mJN*r`1RRAAX#)5GZxrS&yQ1a0+}#^pqwQZ-eszdWR;0rd*J|A0zae@RKSUhxKKeXE{aG|Hxj!<*CdZp=W zx;v2Th)kg`=231C((_FA114b~@Wa9}sbr4e3CgPfyuF|RSTNNhx!f#%?=;K4p?3cdzcTH5 z%GUOF=r50u!@#>y&E#<%nVn@-E1WA+kNj*tE4R|D(vuE_!`QwEXEG$O(UHa$1A=bi zj~6QO?8Nx!hYo=*N9zortn5vkzsu{6c1zFa?YVK@>Cdn&p!b^tq}DQ-JnTR*S%gM~ zmN9#^7_dxGKM5Mgxq`nzLJ$(0Q(c+I#Vsk-840f3aF_j$QU9#8xgU=zjRu^82!lnGyptp0vZ`FTLk4Q(pF zw_DsHDw%{*3_f=WkUJFORN0shP=ah`BPb#(z(0xrL|q=h-Iv_=ZI{CHyPf`?$^Qeq z4jl^s3*oF&D8L zxEa`>n8pTc4YmMqoSH}JY*u-F;frmqbjkGE2`pwKLQ~+*hksu%0E2P0Il$Dh0Wu*B zpPPNez??jTq*&%@ctouN5CxzYQ0Yt^haO*va2Z;7LoV@mY&^SMm0qg|K=I{tc;2W0 z@sx8OX(`|*{6qFY`74{tp^|tE&>xXI5eWnwl@}~L2TFDBfS!T(atyBftj2wIk=z1J zDN2B@rxZvnvBuBELWRhK1DKxtF)0~Ah(MU29=eBLySuw`_xDI+h*jDX6U=HZQNL@G zPWFxp^=5+9jR&z6CsE-pKpNWJPAUccMx=eu@m_$8}0x6^^9)h@78ApyFc*p)Ly zC|sko4a0X*da$CMF9feOUjX4`m=F}}w?9%qeut{YIT$2diETfzEzp9J3l#TFRd>Ia z5raPMf~xu9qdYup_^Sd5pYxcPg#2FAeaXD;c0nJ2;zsH|-U!~~;e4$W5T~dWNTCAl zaYV*+tqLA*&#pj@J-+QyT__N(F1fHAcny5AGKyJzTafdMYz~`3U{AElc@Tk{N7oH^ zT|fr|oBcW;APvJL5b)7z1v-8T04c*^0p=PYvQq(`wHiJQz0~_^t8*!^xMZN=*_tHt zk%3Jkr`di(1*qtt`P0V6|r^?y0zbaHV~gGU0I5A%SBTzW0{0*Qu; zuR$xiN#gr_8!?w}hjK9(iA6`}4i{ z9Zltelkj8G0YTo#A}g-6H_UZmqcz6;hf}$tYPBZfBHk&C`el*CNuV=le>O%x^@uu$ zJz9bumqKS|J$b{xwx=eL5eqDYCY?S)@->&$nv&qbB%ts?x)GFd2u^o3Sp+Fa=!;Mf zCETrW+vh%u3KN)MPB$HSpjayTgObh3=pk0e?cpBL6+Jwhy*y66reJ0bqVG-`hFa6Mq8R`h6w;|ld@*kKYBW4f>?IboEd+`0ry?*y$T{^*@nz=F}@djf|bmWB5SpqxNQgv8du#EDCCR?R;oc zZnl14_u4-X+`ZlJ#Hw8K50-fI42s*oYYz~wgiL2FeVpW{U-#lx({|;Vt6Z^J?oz@a z{V5^_g1OCXI#l9Pqh(EZ4yX!~FmS|V8Sph41z09ULW1qOIyvPf(WugH zS4z{~Qlp;^$`1}Q!nacZ7OKPj05m#EG}CWjtzmt*8sW1E z+|J)*elceDb-NtRH2A%F(W(|97=wafnijC?=YX^la5)gq5y84)1r)pk8F3-bVC|SD z129^L5++g5%SeJlBWRS1g#nORz`~yfHp00)5KOB$=%6+1lFz=mjww6js*qX*!b9LB6XUSm{S{D%dzA7veQ#5bft5IpNISX znn2Wi@HJHoRA<791eCz5CqmnUK>N7KkjO3Ta3<9{0uqa6pZudDFZ znT?9Lxw+k*F0q1)>lDnW1YscA#{fgm|Dc}vI?y#jI1 z(F6*zTMSH#!vu0^Fv)noy)E!?j;5@BxM+O{U{YYQn}!O@5YxV`&BR|Msd&La(~$Zu zFC@lJ*hUzYtPso7u(3J_2IVq>?8E$yjde(Td2$0~Y6gpL6xrao4e`f$(K9d<>@Xa; zP2M9WJ$MNDc-C=|8%0IXMM8G*gsY`1km-CDUf8m^l+X`Y2X$n?I;e}yX)r_yCx!v2 z3o{X4>CTY9qf4m^Hi8O*AInmWjJ`t^yJ);fz_ss}f`PB_sA`e&vo-t4^pbA1)kUf9 zF^&VZqM>`fa^ik(x!Gp;sLS@4vrhBPm%Ry1rK9yO&%Y^XSWfb3LD#!X1ug)2LBiRL zHO2%j+=B*|OR-72wzvjfz2>Yb<}rVMGPdn=v>%=`v|sO=Ed;rnXY zpQ{^z?Vj0Y*ve$+uVi&D;F*6d|8any+43s(^!h50+`He`x@p`#vPR?EKs{bC>D+JB*P};sj~60JxBswtzav z#>Hl5M+iEfX$x_y-p^L`IMEl1n#i7kbM($z(R3Z$2E(mPBtDetgX_1R8%M^>-t_2$ zl*p2UfCl#pQ7L2)cg(?QIBSid=1a>p*(_Nu)tQgs@_AmH%@iO(U@z!(Xs^u$MRe!@ z!;d&|u}2qviov0<%9c*T=5p>R;c=)s!NMT{uN(CV-B@3JkAvxOjJ($i>-FZ7iw&M3 z(C}CHNGfj$v2t9ijt7qfR8)cID%`UBy*_HxH-G=9V5>ZA3IbF^k#^v80Xq~ee^Wj< zvW-=>UTd9>nh?Y?43=Yd5XQA-Z~f8;5ZR}~dN^(0f-)^^YW+H{F>AwOAIfE) zM-JDtmS02kg5slSLuyaL17%-iWjpb{}qJAu*+%!uXsD6LvVj2*rVByZCXTWeEOuqui@6<<)q=#>I zJ+yJ82(tzU$E#}~aVAar6J7vGcWJ{uw2e75O&Jgv;5ChBU<2frva?{*H$yVT*3ThP zQ56%5?}+OW>rrD3;KdVi~kU!QK!n}khu-8($4N`PpsfuXR$BsrN-q5K?7 z_xKjR_TGGW6oA+lO6t~l3;p|Xuv05fWZdaWnBs6Kw0`x$9*7A0qy_NqKVlzAh?2 zsKk(>v!oL6eNg|^_m97M79Hdq5;MOkE}(NaQ1QW%6iL%NLcbRI6=*d5aC@x|4_W`u zkXV=T=C-kzs8AL1s<*?pLS;|8e6|NxX=C^bYHZXtoG5(-eK<8hb5$$mws-c|a`YPi zrRrX$eNPNn72}LA3vlgNJVqo%3<48`A>#oPtk|rX3aS7uZNWe?T*(X>Q8;H7ti&=< zQb+9zP}(pM=qa`ZL;^l7Uf((CJNJWZcWa6O2XXWz0(%xL6e#$X#2RsCD^&@-%wtVz^?h-hXY7DM5c}d~<3H?tMx1}hIzki9Oy5}oUx4p- zbAN6EFMtVueoYEdu|!;2Q_znFY^IKP_qKD$ET!3Rj9N80W!;w3c_D7OcCSsGm%X~v z@q~B!-F}%$Lre7*u#xCf>G$v}-G6JrlsN$uiy9Bp?UrEA2#*VsxF#Ruw^2NVSBDSs zTPOz6N>}_py-Rdc;_V)2+x`j2x$m}76~;k>NJe266VsTdH6S<=Z;KdlbefEziGJ(& zybMzsn?a!%4gYimYx+U=0;F{N*IibXFfo17KYSnm7Yn%j(+b=KZI>IQI~|FhOK4`w z55s|Ve8_b?V4_^1<#Gj! z-?f>X7$;#@p10x#O4f@))>lm3&CaN+1wLRZq=+-y6~@KW#S>2;67VmEvA9H}#%$1e|=*{pSvR=xMvpN*q! zvBX_G{NZ@?Gy7ota4wFHBq%12q6t$?;2IjT=F5wVDB;DP{53PA3MZ%>_V#&VKh@aW zKo7fwQ914Ob~Kzqg3)&&VcD^7aj@fX;ZZy!^zFrwr~G`_2FHxP#(CnQO5omP9OA7< z*}8XbF3_kgxU@;1L@3m?a zKb*b^9H0tX_^?OCC-liwZ73*lhxQS`$7T=LtwuPH>d@gEoH3cLvKHmJjl!+}qWRQ) zYxHo-Jjm0Mj*vwZ^u_$H^2ZD4iV5nD$4Q$t*|HVwp=?y*k7&mj&MK7j`?x(a$2Ond z-$-1G{N5&V!CbTND}o)xGq@d{Bkx%E5y?A07lbPi6DMFQh zU);ak9Qrveb#jLbd`(kA8sqfvq{Wzk5BVGFvA5CC&9iks86A>pL6>53OXT?LbaqB5nh z(nK3N@@#6~gbu?D@Jo@x2lkjmGsAo+kt0THyCpnNa?9Cw2!L7U>9Nq?D4 z%^*0P(_L=H?-Z>`%kI9~fM3%y;iArqK4?{rL3Hn1p9q6Ha40ljMOJC1@rch89CUSj zrp~MaKb!VzA(_Wf;T!vk4JK@GW*|Wmt?$%HRjkA!H%DE;l_MCL z!);PM(lO0H4h1MK{srP!vud;d3~zPFq%|q1h-bDX_;my2-Xv4ILc(MG?nzG; zBK=xUH*#O45-o?YWenT-D6)9qMq%0pW{0V}k4TP61itw1i4=--t29@MjzYN?K927z zmmwk2jJEU+L4xDOJ|xx2s;W!=F(qaxxX$aw#)5f8p?AQ38TSx^cI@y>c1fS^CQl5b zJ569=gsP6KEGf&?K>8WnO6f4Mz1o<-u2L>ay6ZY-p7)U~r?_D*o!1nY?uMy50Sz>q z`})yId935=4#am^U%Aj~N2}nJXe`N%A(KWnX9UmDhK_FiV!^Spc}bYN`}fM0D=K}g z!71o#NZrT`IX0wWUx~e5N8P(!(Z9^_OOPUZ$4Tdd2`k!0KkC>&TU4$%1w#1xK% z!7^;Gyk+QWH%m6qN-WI`Xu3M1-B~@Wx*e~S5F>t9PKtw;X+X!k>WYR_s;A}7A(cI% zZ)7Uq5sesj`w&2cFX5SvF&z6w&hDl^hVc4@v*^rQJq#b9LN~`zf&L`1r*T=hE(ooE zjS$%?CXj(BsNoK<09@We5)b8z1m?)`Axi8K*YaJngw*Irj?I+5uw3(}%KQ5745rLN ztsf6@<5hphoM3~#ee9C7#t@XukP|EiqiCdpBiABbs29+_M?688Sd_|<%TL#{Q)F?Z zskqpWinZj|`H9%en>g|ctrB1T`U{MddkS3kEQN2C5u`5rw#q-{kivSh?`vHBrAMLM zG@iEK>gUMC1F&2NUtSb30_Ia*->BcOJ!$!Cq`CazdQAw&E2?S1iHsOxx?PZ#1pS1n zOrdHhHxnJUTte}r8vnL~z_Y;M!FV5Ic2h^X3ypomXo1xF?n(UnSeHc(Q!M0F{T1(C zmt<{YF!x=-z4%qyjvcQI0r`iV+7_}lnBB9XW`&G96+<2`t7bxFJ6`MQO72W55ntVb z@{t|mhQhhJABoYi!$FuQOXSuIS|o`9P#|77HvHivoeZ_}h| z%u;m3dbEJ=IQCLh(07uhk2v`=%=BS|#~p4fMX{CYSRnjjYb)4=(Nn*uB_xE) zz}ymhwTM>L@4!$Pyq*xEW+8QzGWKPyS$K1?*0cdANbgG@?5~IPz|;}kAk4b+@b*T} zVi?Nj&)eOHGRciEpvPvyi^JM#zgw!Maw?2HD-L5Avir zTh^wJ<6GSC9?~cz`j(p4pbl~zUMX9m2f1HAg?uuq3<^Tvi))924hh2=0Xb)Y=kf5e9Oyyb?6nn09)xk?X#JL1x41zSOOH2ThLL z;`QxM8U2{TFDC5S6`xBbTM6Dbg3OTxK+3w>K3}3p#PW@`sD2+b(7nSoTF=c^McT)7 zP^GFdxI2@;a~G*5uh+zuA+vNn9+vm3%eFY4-~o1sTiTHBb$`&A%N?5M*CxMOJ+*}9 z!Lg~cR)6R2y0RL0rgx*jP4zuoI3}BBP1IiexowpnVb&xtyP6DnzYG1dc{dq`Js|qn zXwTEkkM$frj^EJt}H@KYc5 zfPO;%OI$d^frFv?Gtr>_2Uc#DR!0E3{T-UubPmPS%3_Hlm7pMbq)Q&m(}nq1hO4TR(?-Grvqlcm6IE~9O&x`D} znRFXKbxAu;5hgoykI9FVV{f$sF+1CLGAj^;<`49U;S<_7!SzSGS~f3N zMKi3_Oo?J;w45%yN>Nb&jp=+qz3K@R9?4_qSwCLoR={2ugWzO} za1YA;@@ID3OX@WBd(%fJirm5M*-bQoWO>p_oqap5GYj^kp5h;PDM=}NLx0t0Z?EMp ztkw^$MEvu0l_R-oSalF0I|eODdl?NQ!yf2J-JU^SE4XXG((@0K{>@8Icudf2LIF8U zj?_eiFpkcN2=N-r7&Uy=LSxR7KWp%Owivm({R!32iQay_BcxfcUni0<=xdYxM%HDb z20#MjcfUWwTR-o_3c?6x&jb8q8jmY2zt?T}Wzj7@w-Ys%J(Of0zxRC)Ns{})1i>d~ z3ozSZm#W`Nkj=e3Jy9z@t$aBN6tZZ3SqQ0@uTDUlMhQT9LV)pFp-sx-gQ~=GQ3 z=+p__R;VfjGd<2>h(`Plpdr$e$qMLmH$ z7iF!x!|CA5CReTPT>qDQhU&AIDIi`&YU&LrDlj{vI+J<_#sp$Hlp5GX6;ch7nKa9= z0L_Rw>&Pn2v4*i_XqEk*6xA0YQk(GwQxwsMyy&#`g)n~m-iaLBG8@6y@|$&6sqEzwfI(-6pl8LE^_s2k}8hQ z##kb43Z|m7tJb8vV7+QI8E>`s{*F#O@74AUM(pn-4b)PX8`WF!OIiPd9{smC%CdJq zvya0-8MtwViuWEoO#i}Na5s_~(HNWD(3hgQ2N(NQJ`Yg}KuZ_DxA58+Qn`$+>MtqT zqyrsZC{n`<`izVN2vL8yVj08GpE|smKxx%(`7in-tl?q60Z!8R&ghhS+DKfkj4-FDf_ zg9`$oLngXp;c7on{Q2qwdmw0mNg6hy){3_gHXKR*GE zFvuKExzlV;yYWb)uH)7RH3D30>2B>>v@d|tkvbg2FARQwOTA2OcM?9M#w1v5Tik$n zgLtXtHVCBO37)0HX-@_qwehx!E57|r6R$Z9!Vb116Ey0Z4A?5}mr14#B~GSR4*K_K zBlQmpxTf|^HU`>U1X_0dFBY&v)g*)qjewmqLNUS!!#83MkV>$ElTcvr+p)HHfLNno zwpg(|6l8bM1au~F^+(eh@zBY?YO0Q{7l05AaeINpg=%v0o?>`hERiYlfvazKXfuO zw%KSxC?=zxlvYjv(VY>(r3u?9Ux2~^fYhQu^dSQx2jj5HFSnH_0Tdk5e`Ri+4G0mwBvkR>O$VkNyTVJ2;ncr=nL{kU6NQQhSwz z4i+vAqMJ4UN7=+jSlDu+QD(cDWd%0UK4$533A3t@VYH+NTuf7E*j`diGPsM@!`_;- zv>TJxw|ezSyKEix31E#YMY` zzW6UdXe$3L*jpHblB8PgNNt2p9JepUYLg&@udK2g?N&UwBO=T3Uu8<@il_EH(6 zU5})s8#(?P&2BmZb}|JGFcE~#wMdIE-YZ__+_BvzpuQss!9uimixgRWK}MafOO2@= zDuopO(9mY8*CftJ&v7)oPb(kAq9JRM#UbvmX@_`CmHUHl{J%@Mbz!CwG- z!V@DeXBh$RA`K{L?~v@xZBP_r|0?9@!d4NdBEThaF?CU;C8lreZ0DTfAiLvByR)~<{)O^;n{O~oQEAq zh<$(`DF)O%pSlN6{ly%=efYgzmG++u^MLdv50K!Ia{GR~Jw)7+g5Bf3-cRoD>d7ghE&WnMOa(rUR1!EisDj zq)X~N{P<;)6<=f|L!(_IE~*rA6K=3a?*$&GK=EXC)2>M*E6kTBw_;nh=*w5;CSB;z zITRE#G}zci7SsbgD#ESkJ){S_>6@Xm@C(|J8KNfq?vj5QCWPy<#03{!bTSukm{f6U zD5hFpgi#0x*D3h?ZAuaHW6}={8ZnJeJXxB0Oz;{a>bGF_o*n}Z1~v}pLnARdbl+YxFzLrLG& zDWC^I&WBWVmSvQ1-HGMCcq)tA?&~0(F3!pZA z08dinxeXI77;l>4)HSH+ZHI2y%mfMP?(t#z&Pl3us()`of#Q3L>9JhPv?S;y<>9jB z8cGWR&kF%BF>>r=M1^0z?Q?~2^t@?;*P@W&0cc>j{ArD$2i^p~y;T7$qP8H>mo_GbIV2?`f|(C+SUT zCjtRq`*CCH5W)hOND74god|wvl)Hu(1@&BWyTP^HSJCAKY z*~M><7)YMC=1>hBiHf@Jh0TNG(|^r%=OY{K7yO>mB^Q z+-tAS@QGQhUru_eh=T|~O5o3~rFYA+NV<3(b7lPPUw!7+A3WjmaP#@jIeuRbIZ_+p zKpIdJ-EH9UYHCOQeTit$0WRm1Ms5~EGG40t(CW~b>@|&qXCB(^%<<8P#w0uY+380^ zW&EcXG9|LVK(TaNrc?0f+-EAq@xa!E$34P`yaFL+YWD8efs)xGWuTshXAE^#UvJyZ@tGwz+*<5rL3G2$UDiic1~E1&&p zic(A|q3I7VuzDPVQ~Z|U1F&Tn&1~euhx(LEb)Jqy5c92P;4($%P9Wv}_^SHzF~Aw~ zX;HSCR=rpkexdC9zVe202S`#-he}cIin9h?FZVUuWhbZ<(`O)xG7+c^!En4f?>#q>n;C|!FF@II2V=^cUWB34?1<15kY8i-hZ-FgV zQTEsSLnJMrr78yWcjcKdH4Ua)Il%N{UEBjm`Y=#6A;*M2hk#Fw^%hdC8cG-pgl>{y z)NdO|1T9}q`NI=5UH(X0U{Ex**$PGS)546_P^K0|ku@iU52qRn79_tqQ1_BYbJa2Y z#U;E9XL6YMl{qznN%zjxD}TKF(BmkL86OULCjB3g9>=Y$;0K`%k0g}=j3W{wY^*B% zot{t4&psNomAOC*k1xm~?=-)|B=O|N2e8w_f^STHfYUKp77fLshM7?glKOGvz_3Sl z6`zZk=u<$f0JHhD2LwBY0w+_kK?)Sw;qkG|-SIpbA6|^bK&|;(^M;?Q?$qDfUJW;_ zri|c%Y#!GLI81t}8W&BVK$>Fk{vyQ2@nW@=wO0qRu-7Oc-@pNrPJ{mLl`HEh49#rb z2qs)oD-xg&8bBk#pKcC{0oBwDpg41Bm`1d1kH-1K2PG=B1`xcqvz%vm4l|>OP{GsJ z*#K!&$f%SPpz~X(9iYK-T%dvj<(NHQA8TjQi7xZ_=q-(OxwDq+fg)=!WkM;gtdE7G zK;vL2L~6(Ffg#&lk9)o0^yfweJ>X5k5CQVrAIMcI)=477yA!Kf-C+jrGhcT@vVViw zLk}-Fs$R)uKcUOU@BT;R{5cCOk_P+=-J)u!8T0|-1CMcRLWBpD(X-kGd>+37A!;In zR`5*p&5K~sY$z!9Hjz+QA@B(>b+CEi9yacR&ie?tHgR$Tz+eok9D+5V-{qr?`T!8a zsM|uXS8~rl)Ui5l5;A{NXa6;PLToloFgt;U2YH|Dgb_uUV8fXN;Tp1y=fBAk)9?Jp zKncyUM7GODjU^}@_xALUp=i8B#^6ULgAR`nw|qqpKfI&MZ*HmxN|d*NT94JZ7R*m; z1`tG+7F&8cU{Ya6VAztKY34jiMMU(QRhzO+a3Y z#^X%-RNnmmkBd!INV{$c4ZmnDrSCu1mCry3m~fDa4&tmw9F{dc6eko3g0RT5e>5ru z!~-Pw89m3g41|-+`8L5MJZAku&1G}6sf#zD$zogJFli#+Jd z-3`~@-M=e8T!^MaZI7Sf`TWwPnCAc?x{j}3(Lv-=+8V0h&6wj z@bxY~@%o}yVa}@e?`7|1AD`m{%4*sdN@!>lLIIe$r>4 zg%1VrW%8%U$=6}$fC~QgtJz-dMC13p#Y+)8AZE8Jo6SkBT~!WlQyuNFDgivd5@>q| zcmX>v?)Fyie_+*WwKxDxe?eUi*-kuod+AYivU-=_nhxG5<8}kRAz!+m?UWvLu&eN8 zTZ2{3*vV{bC~dXr4WPkXv!^AZ|MO9<6rgmGD2ZGm{#K?&F3GYd;C&r#v?uwx%N(q( z>+92@E`YM5KE6-aePn#Fa0hXn$|=fcy;4Bg8x-?6)JQjdNYbfh$XTf`EvQK=*Z<RavXHtBY$u)iI3{TT)?f7Fo1io`*7{&-N#NRnaS^n?0?LT=wZa{*_ zhufm$aT)>s8KZ#DGp))4Ne7<)(JRha1_M7Qo+8A%WyeU9{A$|J)#S}oIpEKZTlzaGsI6}d`*LvAj)*Wvb8|r-A3wqsd z63*K%hrR3Y#5UO)h?dTLQyRe1T|quyt6ENCw;)Ut{7xDeuwXwEX7qHvR9aehY#In! zz+$)RVP*>RTjJAuOy-%pmHZ-W`j(YN1arsD!=utEQ3=QkoQtDXuK@35;eTTsMtggu zlaXh#NK7%88*RZ6PnTk@F{lA6)$~i-A&PJ1N zI)4_P43F8>f2J+G#RqsN<95#gsTw7w_sJ!d9 zX)()hf*RJInU*~!ExE!!eS1{0@iQi#;<72KRuL)Gk_J&5Kr9E1<~mpstGiziQH$|a zuF|xM%loNxvIf!uY7gID&rR~(7c_Kz6r;#Z=hp&Nm!s-iRbT`l#RU89{}oxhLPPp{ z7X=uo`_x7qq>6ii*k(U;HTOXkzYhTo-bXU7@A)mW%yt@|l&!xlkJR|WImM?R_o9kH zcw0vEG5#4by(i`R;r4P}-~1}15vKrs;C8n*n9Xk}TP@e=j`(SzLPx}$?)^)ixt0Yk zr!Oo9oqE=Lr-MeRo!;9oTBsDy*C_#>27jvF8GI@QzU78*6otkKfC{bU@2C|J2e7-h zS9slcv7D`8$0xBjLoZNBl`)QK`AF1Z{{p%86j~r%Z-?nmoQjyqiq=xhxq*<_9URpAiP! z+}z3$Nmhq|Gu`F<_S1@*(g(}|mY){61j=6GE~^>H23SW+)1nIQ5i~bUI*Sha=ya>6 z(%EkfS_VrI>T2=k-PIz1bzkk>Y*ZC>0tW)=bO}l_`To=w;Pt7h8p!bMHT zB3a%&+mU~N_Y+?JG{7w_FU6}yVar+akKWodpj+GI`aM6An45lQ+qSW>wOUAURck$F zYIwge4`G<|@@Cl-qYQFLr6tL zhkh*QOxt-^nMZ5Q*B6ZNyHZ5B_v1H){?`ysxSM&-?;&dV#wdV2q5qLiv&!SrFvKd6 z?K4a)4;t*hPIp576oW7*;4$Y4OV4PD_00AcPu!DfzB$x?XBKAD+y~t0Rr+0DAVNQ_ z*|tX>UCox8m}Z%qw?D~}SX_CISG}n_eXit6jbZIpS}2XnZzu9Klg%H3M%~QfzLC?b zze95ojqXSyTW+07OK^KRG<&VLssA8M;+fh$xwxp7qM`2tVn|BHsQVW@QMTBQSmTR} zsR9qZy}c7x#t+8oqQp|9Sm=*7Flb{eDL(C+jO%t?K+sQL({n`fhon}NPzVFMMhnT$ z-b*6sA=dpG6V2**;sP650o|C6F*t+wAJ)5RY~1>tK7reViaA-J!`l)-#L2uVSUYeG z9yx;85fWZ(v3K?C_pb3`)~f#egVaOQ;wbv$v4#6=B7oUe(k|CGYcW<91X%DXwf3j) zu{(Z)Q($6WW6?rYheJXzZg|OjhDm9MW2&=>#GuzPMEQ0;5oK@w%8*rQ?I7s)~Q7ZKK#tWY4GTH$jR{R%Kr>9+MN!@sO|Q>Nuxy; z!yvTLjl~k&l;0&Ao+QEgwfGq;)yHvo-2FRlL&(X7d^d3G{7o_!o5FF2BBv?y1r5fp zzDtm)~F!cRug|C#)KmdtynA{SzWR(vsZ2@?=-BN_0@_+r`)_8*S zx1Ij>RhR-PkOH&;U)M!q219@puc@Z!kITH^`dSbAFs=TH-_%p!uNBehj2{z~c$mYa z=En_5K$c2h3w{cFGyVD@ zw!SVk-VgqHH4L<7qebUzHC_27=lQZVZm<$#z|a4E-nd#?_91b^kLO`TITMy2I=UDu zhP!mFWlb%2iQJmQ4DoI58x+HaYp}*ed--iMzv@i)Ic&+^$lp1q0ZLi}`+^yJz6s1j z1VNP>Ky|{Y5QUm8^2ffnJTPS2E&-xG_bP!S6*f zl|?E$y(iSJSD0+Zql7xGXHnMnQC}dV_Xt|f%V#(VUSpjUSKV{&B(uw$m^!11XEHtU zRYRLIyh{6Csys>^wteo@7US~p8?kHSaqsmdT)i0c2@W=>->N$`u~%aJ@}jAodxvgl zKUiOEXbEobt-3V*ZjrU}&Mwo(0+``TMPqY`MiVN3QR@~m&e9}%HxCY|EefH zgE~REq5GbJU_mFq_c-|5{_91EUrIvKG$imjPn#F?w`O}lIm@MQKT*CJEIY~^ut3GL zgmJ%>;bGr1reh&+nMXk1sQG98wnv7ez&TVf$CK zRob`lzCiIm<`S_o#~B&3&1J?q|E_x%s>k+gQ|DhK%dO;Vcdk$WoQQf1wqW8Yr3^Z< z(rq;G?FKruhG}>DvmD--NleNJPeeAdww;?)Kr!K~tOVSWf;uDZ6a5kW9g9u$6e0)~ z-vniYY`ajQt@?4R%XAEmM|8R1-i$Q=o1J{Oh zGE!q9+;{colWfq9k2~bv=&-d1q*aXkE_SrRtbb%SstL;~AFV8JLE3CGf z6pLboGM>J+RYjT$XXZw=AH^%O+u{(i$}XN;uf$%8p>O-dJh1<@Qv-kWSdbCp{3m}B z0FZD3EhPsoF4jwlqf^;RodoYd`d%(xxrxl%`TQ9N6%xiAqGt=+uefX9*EPRXhd3V2*48ZzB z`j_f;It~~3oA9Qtf!Wq9+4sT-gU^Q)co;0yK)kIOLzw$^wlcMKu0Qh756#}@&S9%| z-aA2Xr1c@-;{B%M<#G|x`I%CE~$y8L^2q|sMdnDU3hn|(54*BD3l%k1Hp zKumFWSEoqHx~0p>>O zV!bnX1n^gWl0OpqAMbBg2NRjQCwTG(lBn}g2G5^Sukm1k)QSVb%M*&JQjkzcxqlGx zXq+PAr+4qx`k|BZY&>QD4|i|<7xmYDff7SWi-JfEC_S{&EhQlWf^E2#?nWQ0nRyeA_xRG5#$cNw|}rly=h*gnPI=uEM^Q^68>ysO=8p z6Pc0XUc#&h>s{%adkPjs8u@&#J4|2Sef`QZ(n8P6K}xykP#WW|BRn(H!!D!9BI3wC z(G?+0c(RixtK0N>4J&?Z;%B_gRfTrExp#8{+(22GMZ}Vt<9>sxHLIxo+g5A~zrx(A zpt!;9IiJGQK<2o79m2Y*EoFI)@;d_bic$iTE{5M+qe@~);?t|K)xS^GyY^(-;&J~d zRgji>M-PKS=`=pQINUm9c0+nX0}E-ps}|O}t6xWC)h<6W8GrrM8RTG>y#=z=+h|Bw zmuLgvUuyj+k1c?9+3EbyLOuaT9?zzuQA?3r&}NV&g;VGHn;PgIeOWY$<$!MXO)P$r1y}mZY z&mN+bZ)+DnB)76=e%d1WR-A$Td1%R&Id+byR^hOsxND4Y!|B_=l+~a)Q4N^9TJHUA zBi8+&xe81&F%O@s#YjrONal)Ve3@WkY5g>bFH=4-?c(ZkT8m*jWW8g8$ExS|Gn+(q z;_gb8@2#|4)nlCl?EqD3fB#-HSF|XxZdnO$S-yE*J>cZM=%&wvbFQG)%p#q< z|LJS3&u@9?bTga8j_~q5jtsh$g6U+gUH2OTeI#ml+KZi5dmMq;FC?#*zm>1Y_`7hD zkFrjBCZFnqJ`cfg#>h#$&D;dm594!G`ZaX61*9Vkd+f;wje_!R*c6M=e2Z%BS99tt zAVO0cE4;Ai3akILbwUGwJz9rbdDd-hZYH7hFBWkB%W~k`C!R>9oqWpmpn(689tO~f zOG!dVVVdIj6udd0y~TP0yabn5S7p+LT=Tvq^Qc`MZ?DhPf0*+FQrj&1rB0Q1_aEQc z(=Ru{5pmy_BHjhu7PXI(K}irIW|C5gc~icLE(r2H zltC8z;K*l$*5}A8Q7dN>b<^o;b}N-(6+Yn)wp1-QXLVkP&Mw?7;EJn!ZT~irzKH90 zs!;X%@D{n;d31B@*4}0E@SNt?nA<)tec!&zR3s={VoE=weO7mPcsxs$>(bQL;3>W~OJ;_b>gdSsF7gD9G}X zrufUs&v%wAVN)wUJpw9prW4PO+9F>BvTaSIeE9%z-~Z&MfTe?ll|5<5dGBh{_=~m1 zzPqreyen_1c>{w=u%NRumkggvO64H~QBn9v{%}3k$xphl7NSZUtB%O%_Mb`0!pJJ* zh)A=1oBK;)bhzNpUWHA9SvqA7gF5fIbj-(Wiflz|FCY_xgPEh-O;4>Ym-Ecj9f;%v zv|n2qe-N7_%};vf;&j^jpdjK^;WXdxuYkB5@gsZIwQ}LRHA^}lM#p>u+o#nJu8Q3T zj_|p;TdFBJW$VqgAk#RNgz`M6A4(B?`5|ZtysgwWRn&Ro2T|BjPb_2MlMsTpTWwkTz|f-` zGrp3ql@5vz9Q{D9k|j)xRqMj08n-LVm4OkGJ4Fcz{HUyV#&|S+w3F8q-PA-QfcVMh zUdSyUvi33d4!b;8EJJpFS0skJi;(y6mSXYmkN4!e6DFr9Mu!%5(t8Fbk|Q`sN7|!j z;_k8z^*6mFDPFaC$q{Q=cH4xEhoQr8Q>ggcp5v-XAA*mb{rU5F3vU#yrfOpfs-1~< zyKtBC=6I_SC&ZzSWncEy#Sfkd*J_(Qr<;pp)i_pVx6%89g_)_se!8mpylfk>mme)s zE&Rs)3Hrn`l$y>?3_i`dk&0>b#yMq7#cYK1I+9tQ3 z9(u0)q9nOmyLup{xy8UC4vxw6V{b)|rM)=saw%UmldHsH94u80Mo*5ryPX8EYXhN z+S)2m&6Th8Jh6GGc02Yr^St}ne(u{4XCXkBdt^BZD=3i%8mj_ej{i0yV1BJPJ{)p$ zGayma{8Xx4T3nj}TgvdyvL0)tR(*J7%%~d1%u~=h9RHkrWZJ3x{(n`6wQ;+MK6P-1G(Xk11KGvGO4}J)fkMVAHuTrG4Kwh%MHxMjN4Ywehov1zUkPV^afFl%JpfIq;)-J&+~{51`e@Jy zPd4_yc_<$jdW(!*an9?6%5VO`mwB2)N(cj;zqlN>TmzKoJ|qi+7>kI&evv4{I#`S! z6=$?vGe5v|%JBJIfIP-#ZOg8~yppE4{=j5l!X6rVE$qxaLknAy5S4l7S!Vzd8l6M1_hHPzH71$BH+<1Hdbxw8g zwLE5g$*?v+;PU9zp8cf5A;H4CRp?wT4};PSlW`+-N-1KG(<5#U#WZYoY!K`4DHF z-F+S+6E(jb$4$RnN}Z#C{y~HGydw*-GSPAP922lq*6p3lnc(}R_X$blFz7XT@kb0& zu-0VlR(fxu?%tORf$H*@$k*x-tE4+57XbCjt!5w<{-ABj z!`vjAl!$v3NI<{9lssM*=TLv05-Ip|;0aTb&}B;wJgT~$Iqnw^0iDhW@^ye%R(c%00V7B5(kmuC@7P4|=F4~> z=DI_fF6NVgOc<~T49j3pX+JcK*T6;w6;;&#`)AEEf3N*Nb4ljah@vXN2!}Yv#o*Vc z)hmfN_><>-H`q3hwi>7HeRiy%XAnOQacFR?NQa~SEmg_Oaa1EKv9VDrb6KHsuE1;V zFE4DOjIA?Vt;IX93%w zy}QRG6LGI4FIw(T1xrVTrt+Z$BpkASTruqb6p(N_G(^Nw^3gaKl_*#K!<#(p(-CO+ zk4EJJvxPad9!n>`_6uD9&puvVf0v^?^6*9nOe z=8s;M|MSZO(fpL5iyxVkV*XpMf@_~1J4vUhzs*3kY8fmyvO)DH6(Y_Il_mO+L;i0I z9SpO${`<;>IkZ9~KjP18v3&pzpwpuHFPo$QR0ZibGKTKAay@2+$ZZ@#SQW(Ds1MFAM>af88CB_lcMTTlCK>L)o(E zUtUw`mIQ<&NCW?G{FKm(p+_c(H6&PrZ7HD{KHBp0|H@BwcL8%G!p)U&m`(z^g&aKL zqACA0^Q~_V83YF91Q-1RIG^>cDngf+mohJkG~QL-!wW{$t|TZOZIRZ*ZT-LTlR_ij zo_7F2ma`r4s6T?VB%tX3%uj&sW!oLTAD$`Vx!QUH0Rc9P66qE;9VH(kDJj1AfE}&j zmgx7Xse0fFPV`qKRh@Q*UX2)yD&WjH_>lKs4wFe@Vjp?eu}u+p~% z%#c>Qf4;W(AG5_kR>18+bKxm`GBL*u`al1Dnx#u^*!_6f99>WUd-%&KCA(`ALuaQpZ2QDJNt{*ARTw6X%lXN zPX?{Tj$_ktF?u~BU-t`?yBh!fz!1Lzkr|)zBOSuWP!v zBX6Ui`hcm+XEXhw&UsT67!JN}_CbyZ07LI%_gDKBun4Jh0E1Bt03E-8 z<6xfkRMj8z!yg4IFX~;a)tf>>uonJU{kd#uy1jGQUw(SN*F{qZv<{Cv4qyMQy1F<~ z1DQy7XQxcAtfz{o9QEXjCU37dvkkRx#(<^W3$52v0NPadj&lY!6KKi5e-x4cp?H{h z#&`3$Ah*MxBO|dOcYgxB2R|UQPCzAA*D`Pbwr4>be7iI}W_TiA=P$n8UCAay+HZoX zVz;q_y zXwnO5;N$lZ2%ZY6z(DEnt?g}KoFDSN6u4~=55xk3XQAkaxmD{SQ>`|Pr1{HcfvH9> z&sG7guO7~mUp=d}3nXq1oxfL@E2oHfR16e2&q+60&gOS^ zO80yIWKDj*{K0wq#XUT%ECIa@h1x1bLIMuorD983ao&5qJ^2NkRNnK=&rNrTFayZ@dtNp2W?Ss{cbIbwp4e5WVPJIde3YEUOn7er~>#))r4M@Bty0xlWBEZ+= z(>dxQ?(Hz^!EQYN2!~*Nir{%NoY0?uNy`lUO2N*2U@FibLyv1c*O(6X@>(AhyU772 zoCPi)>8zRzs%@xN2GXZOjS5wB@3`}IQLlgT4Sm(}3Y0ZSetXbkY`)@Y;BQv14V7pD zZp0WwR=;KHRr3^|c86eKM~IPH8+v8%n7?P&t|g=v{s21bx3Q0*413V%RX4CvV^;^# zi$YvybtGDr^u&0v1aBY5GoIjJZvxXN@P-Z(PYC@Za zt{H`fDc^#XX8bgkLH!DSry%p^BkxN$>BhaJW)MMek`^Z+v!PwitBZ?SrKa<~d%pTo zM2J9?L3(u9x-=uUlo>MGtq#Rjff$d6TGa}Rc%>J!6G)3TZic_fQRT*{m=x7`u_JU8 z!6t_uN?>zrgfA6e129SyiR<O@jlIb^`@?<-KDbVSl*f@P3JU=mRBM-AsI zD}zIq3r;!9=KM-uBD2r+W#INoC^}9^D)0?7y&L@aeu9o>unHeUig+}jr=fx9Wrs6rI?4~$~NA&d;D z`)i?nq;cO92{@w&O3L^yuihK~Bg)efC-_*Nga z96@nF6mYX88g8zOD4CSQ&B6qRHqZ)SCc_3HbtIy+BF0>h@gQRQC9XM-;=VjtFs}sR~;tRscWj3h+Leswk^<6&Tr)v)H zCHa9re-O4_GJAo)K~reB%n%bZX|$Zi;7p zseyFD1-YBJlLSaAoyaZLK^}@jR0AxbQsllXS}gKOqVQdwuc$#;$LVkt9;KjHgRzki zIyCNE+$gwIFze9IYWW0TtR`Z>NzJKJ34qYzapMEuYm_Y9#h^pzLntkx`~e%xw}8gG z{mF37)9j1`Og|J8AwUR$0hU` zlrt2^HSkq&EIF$V{C)j@_;zRCKoMXOwc4hJhOnY+52==M*S}L+dj-wC5GUHx{X*|| zclHt6rDj31Bsti@l|mfdkSXw4T5t;OXKqN!<=RfiUdRpLttzvR2HA}J(C))opPIT$Uedn z1){MO=3=Y}5aq0H;n2hUV~H_zvNil!9^V6$a2&TP%6p4xZKw0XEyh^Q#P#s-WI zEaOCfh2OG+@~d+GQqq8^)7e+l@S8n>5iz#M7Jti^a7$F3343ZoAraUa((TYlN3;`S z0a}+J&^QZq$LKqO*{3pKeNCuEEb5^|9Yph~y>nTbi8(?>p(s^40d5P@)=iiDxJFZ) z*qBwr{*^{Wf;N|X<(z7&uVw>*IH1VfM}?qx&CT!bzQt{HRR|rr1L`IpHh;!CrT~}) zw6bufO+@M_8Q=!PYhHZEtwIX(FFjNQFD>x=Fx@3zbi#3i^!+EDTv%>@UJ$!)VL| z3m`V&SAJf4=Ikp?l}7f9#|0CbNg6?XPM~P?rBfiNi-3qw#188kJM|WVT-Xhv3b+xM zOMAq>B!*QCK(g@@m$a-N-5ZAHx0^l3Q~_u~ff(rO-H9|gkiUo+vRHi3!TI#TKje;G z?#IHB@7CT(^hyZ(-Rh?5X0JMHy%lb~ZrYbXe@bl2KHiDX%AQd>TkDwb!9Y3XwAcA2 z=kYx}h$TlTYUn*!5MuHpJTMv)%|CoC#%SWn_>1qnn#SNZDAG3zb}B%BcxT=4vDDT0 zC;WZcT1++z*5073dEq)36W@*I4p}NKN8=S z6r4xBS-XW0PX@+>7}TQ*^M1#G(4CZJ1$*lYrzKzy733VbN0%;e+?|i1Q)6_6g?Ud+ zSmzyogB8K3MI5dbf&wi;D@vzfhT02@7Hh|YQYC>}Htr+F;3*)@W(>+|F?HQhZg`Gw zTwyaK2&6%BrgmP@uz5-8yqZgHsX6Na1xeD5~P zep<^Iz!uQbcE=&ZMqFs8ZINKUA!X;xp18H#4=)r(y}+4JLkzxQI7Mt3jEaBC|t7V-E6}ie-=y>AKa3e z!HGG4hWJSNmhe}%mANqs-hP0xNnRWZ%@`lRwM?~e-EH7G%&VKfgc>mQ8Dc{dT5>xy z%ty`**MoxYb0GZbXSMaCURL(03Rl6}da=p&KpIp6H1RIqEn0Z$pkvr=pAUSWOJFTn zhA2*9&x{diiu>hS=poWlf*1HYh}NI9QioHljxTJ_)TO zz+L%JqHSA3_?jd%x7P5|nY>OoupeV7%>MDuO+h8+U7c18562ewKnJCxafxA&N~0}Q zYlse`D<9tVZrGy?f9{4ZNB~B)jX9kdLdS~`GHcaXd#Pj#+e=X|b^bM624V4dqrCC8JCv ztHIU<-LvVL8C&UTFjNh+dgrQE0Hnosty&K-_9R+EOUtQXb%kYyG?geYIH8}Y25@1v z=bAEs>`ffVg2T+dChj07oeQ9W;%VH<)>bSFgfH~|rH3$VDQyTE1-q0m5w<-NL3#y^ zVp7TCySY3VoCb-aB51O+ZBN%8R5dl-K)Zq7m{byr_)7aWHp1Q2-XYXaZk=0Lea~Y$UZ89+e5O^p6~R z+0B-WWg;5)JBUf_W@>!=aIrpSAK-nTr*odh&Tb`w=UE{-odhd^%0vawDnEoQl3(q4 zpVw;K?`1PSD3uk!WQAeBC{Qpz@+Y7>Prz)E7zp`nr-%2-;ZkVsPW}?B$PGYKQcDe* zt}j91x|0sDEVRvFP66t+W%o6J#FX3VqK@X9UOh-4ELLunx)=kPz*>#xNvYVT20*H7 z!E^`5iDBn?Fh-+3sbJ3Zt56?=eN)KR_P0M}<`zJj1o$t}$3!C7mh)>bcVIn(00Zh7 z9j%H=yK%Vo2|Dok0RIh7{XGXtg-9o7=b4@Ze>lXCDO~&!7(ecEnPdE)or=JYpx_&D z2d1-6&A%l@BZ->Q06o)l2R|Hvk+BmA&R!E@X;(`V=>z+G7u6Nlob@&^(73lfKR;(* zk(c4(vguOo9~|TmBUwJ(I(hZVD`d%~0%k_uIh;Z-7Q1_@n2@d&1N;s?y#Qk|yiI(l zTvE%X(ht6_a@W~Sl6*F3de%}mr}gZy?d+unz}}{TB=^MszR^4l>JiBZ!atGiMyxo(x1q1La>hpXc#br6T`9nN9-E zuR+Cx)N&`&8ABJ@3LAop#ESapHqVKhsE!9_-;y|LaTvAoDh`cq&qU2|!wQqf1?^7h zNnUAIyV|+k+Vn(}Qj^R%Df(J#E^^Vbx7uJZ3_TvV?j8=l`l=-sccnId!&px-A<|x_ zu;ufk*3q1heKVV|x&*V@k}URA7sgbr4B(wvuek4us15@wH*KaT^Mbt-6B8@TmlpzB z_Y;2Vu6pfUJakFzPv&7n-(3FBOm`_wWZ^#k@$54A>9aq#55zAtNSvv7_(_uZo1#Pb zajJIi6`NLW@*|%z^^~X9J`7)s$2Wu%pYFVIaV^`q6FZvtu^4O znL#8eFM5do8uYR00Uc1UZd2Hbi>nbINut_YGNJ*8vsyP|Bfi*uOWiqsmrbQiQ4xWz zKG}amQYa@f^l?5RUZJsqL5?4eJxhH#9*u!u_@ZO zR1rSK^_C3s2GrQ}s9in(dH+*0WQaHWuFzo=;2Vcji2IBG0)4@C&@nJKl{8z|ve3+x zwWG`s${5_?X0SfjS|=(Qp1#%K1Zy|XP39yuW!;H{ZD?5#_&hFpRJ~W^opf1MRZ&sj zldy3QVEF5`pN*R|(6y^=y!6h9h-h4MG&%>C6 zJ5lnDN@Y;K0=MYU*~Qf^g`ScxWya1^BV{wJJQK7feFkyKDahO2Sgn8Vm8ww#?g|-R zeFO)sUG@ws*;*+T?aNjDCaOC?(fl3{r#+R4xw&i5m2L90*_)ZVqfDdchcB740Cg)vvA5cx{-XRMJ0ReR z>P{CX=}R4*DRMYL6W}zv$;gT>U$eZU1K4FQxIF^mL$F=KUkNBk8`_InAM#`*Od>^QQZ2=~hg+A2dt(NDZEod2z>+w@B zdekLivbVdx-#Vz8zMKkQsu+fol$H!X!)swppn#35He}#*zh;=t(^x-MeEt@(f}m5t zXJ9!c|HQlXyPrF*O>O)tcIMp{^_zlw8oLV;zo`sDDhFvmuu8|CUG=smt>@c1# za6eeB&XPOvcM7;T4jo)Fg^Mob4|89fp2itUdMD@d8^{WsPS&?*&P)5?=RHeL;g&PQ zMP+9c>C;YFgQ*#YnVlGvW3R*;j+VNgH8LJ*sD1+jjjlt#dJC zX~QnXM{59agJpAI_pG3I$u5eUUx{=_!J4|O3=!cdvaRJx(z@N#clK|TCF%qLh0s_$ zmmhQqAAAQ%fC*?VUA}RY1(%WTnHhpgA)?B9D)>y9z`hn5>n5Ovc+z#{vQW9LNV12| zV)I5q(A91_3HVmgO9;=kF9toY7=L`LE754Z(0Tf^AE3_++;q|S#Qhq{AJnX3(ct|e zf1o6yfB&)n3>Q3B%x{JnmZxFS5az)+#upMMZqOK3+NBz2n-g1BR6JZf*R~ga7ECQh z^)ndf)U`!~wg1Xb{k{%x7ZO^t^L$zHEF<2G=)qD*7lJY~*3_NyiSzqS(fwNLt;Yjv zMyd%tid&;KwY4Ji`wEM~Luc6mePbMQ8^WD6v?n5WIZV`7yyw<7KZkz(diAu^oE{gp z`glUA+g?I6jiCN%!rm7gOAmo6DKfJelt#0c6|gnSnvjwbYvqB%IJTV{?^wr69%bc0 zgPPLYu%mkyPHCkAp*;B)zuN2rG$%R-21;+$V~lV2m$F12U@-?w?Zx#Ow7A#VT^Lt1 zp#!y}jhM|K{P^FS1sXwo`i5`BXxfJP8S=ytqrrJxPeCLMYtlG^SBUx9-dA??5-azi zt2a7dpcqVqSK#2WGCG+GG@CjZzs#U8C;EFR#5IwXNHKBzKFAp(Q07cV*LHFj zDeBLlSs?YyZ0Gwhu3d|zK~CVhysMEA0@E?lGS+iB&NQs;^uAOiD$@yX8|QX^*H_$Q zdOKtW-*lEnMQE*!+g__ z{j^q(UDuY7;nPTx+)ES5OQi41#?jWqfi_S`(10OWTehVY9CMFYiyqBMMLvsV=#K&{ zZgUeV{psb-Yd*!iC6oS?FX8~1oXUBYE@%Pf)QH+q^nsC=n~(e4tHhgLEjIid8nS;F zo7xDZT3!xd3AC-^-+L~16O%D;^MVEhvkUm{-f!uz^8Q@n)vA=|nPn8X#;mu2J8s#Q zwMEM?<6pgioa$n>sEv_kqL=YgE>A8U)baIECosZNPldVJ6ZQmfBr}=-c|7bC3Sio~7{*C9V09GCf%~hTOX$fsJeJlNfKb zYT_?#l17?6+S}cAix>4euWk@`trYh|(@TCBxm2T;R*IRp+!qtH_UZN(<{78OiJyO$ zBWdIlFZK$$*3rO`yKA+qL6Z;zC^jEYR{Jl{J`W~cGd)T;|@zj9gmC(Zod0Lb#7 z`Jofe$#3>p`RTy1g;ZHh`Izz@%(;ih%M>-ITh-2vj%_?KgIXuJAv*i-rU|OX6d1KD`vKAdKQP}Bw{EG#2MP|;{*c+4085Wu6f;Ro{juj_!x(eSu$tDe+ zTKgq${dor;!)RqUI-;c-Meq3i!Om3d2vo|2|wF z;RC%V&rGspRY&R{UGL$NlD=fh#|Ofkfh%jEc7`5EnxsT_f+w^h)lz|^=bg;?XEZa6Z&q620vyxfx?_(C{wEFtPDZD%yge|wlbko-VTEzaPO$ax*+CcD z`5dQi!1zZ^=Ya!!U3jBbT5nx1oa82Hh7TTlLY3dt(tajNVm`fmhEK_lKvYzKQ@U$l zOm~GIt9DdBZ1s2}jn{hep(23{XxHVUUL26I>$<^%@yOx(YlA-gVD-9^O}f_VPWF{H zbpb`DJg8LFVQK0XxWrQYWwL>f(1(uGVb2 zuzNXL>>9qJs-Y2ND5Guk4WX;+NxYTFBkkN;!kuXv7(daNPlPGE0xHAL2?6#RLVcUZ z0VFTxrLn$H_)a;;w%KbglVA{Ix3ROWY_8%Ynr}}|eOrs;>*82;tU#+@&D?-14J>W; zhkP!agqIkh{=zgMaXVa6eilJ?RQBYKm^YkEzk8ztS5tjwuE`+gGZ|mwdwmBJQ+8vm zdD3-f$^>OjY=o&08;LgT8+p)Ljy4#}N8UN+1M)doZ)Y8FGHMw>#KNDOA2Vb< zaJ8CYZe4bl1Q#g5N`I;=?%WSovqfQUkO7_v^gn29IWV+*{-z3Y)nie(MVgm^x6yxE2YLx(i|+fam_E#RCaOQnPzTTZe9n6!ZumX99;EKP&T<|z=0|U zO|eu>*yp-Igv8=A7_&%YVtHt*+v%-O^qQpRC_S8UF?I2+;%X5>l(jmWdG1@@OJdZJ zy6HS9Uv`=qiLCkx#ddK-3;z42UZV!LFseVX?}ReBNk`(U7jX$ zuMji1MRFK43Y!4M7@~aBmY7zPDPRDA0~WDo)5*uf@tnOkL*&9bgLSXGGXWqOeGv^?qMiPXfqnI++M<`m zYR-u5E7*fAAL!!b5RDtwNffbnZ^02lxn|t%wINKpIs~xzj`XFMaY$x=LF%CCmGnW_=*ozJ z#?9rJ;N&HVG5XqI7eA9_ocuSeD$Z@gHNu@iBZh5P{u@&QTa&XiLT^DPxwA~Q(B-gU z>|KjrgCqQYc7rP!ysHO0l=b0f4MBbSpZe-N(-3SNEbk04Jkj?NG-nJIQ^_NjeGfTI z!81yXMvTdUF^?IGsIXwp|i7sKrMXl>mVRIuR1ojdGEYKWHx83kmjmeQRszq|qoLzn3EUB$fS<>C=?Z5R};U8byMU@*sMS*k9m~ z9Q02bCWvX8Rlv+An#)5vPV0x^*{*Lfrt$(bb8GXtlB^a>`oFU>1_0XPva%i=zduXsyPfGa@4N`O)KZ5P(~9;JtIK7Nj8k9 z<}p)5XSg+y+>p1H+FhuNRkn#X_9DqJ;KlS(xuc!VV7ap}@m$2t+9M81Dk>5O@??IR zjCxp)32pbNV(Di{SFZ7-!f=4z;%AMFhHLcN0)y+58cFr1`6dM*{f6 ztix?*mjY?gGm)ajG_C&Tt}2PR7e0&Sh;7~6m0eeu+k3ST9>)gy)|rS4ZfW|M0H`4# z(7HU_tdrKGvO}hS5MoPt{=Fbu+#^QiwrDz+5=UA+sjT4#eI^pXa6FW|Jut) zj!2o|wanrL-JwYle5iCv*DN&|W?K-m-_KhmNVY;z)Js{~H=NM%I^OJtF2Nnvr%6!O zV6wNCx~ZeMno!;>Kb3)F>6~ymy%D(OQRbOw`{Sg%J_%c-{kn581z-H4iPmv`1zi}oPzlGGR>ezL?a$VGH^o?Y#GZl(sV%VK5G)@ zP+oHg=cLGt zs?qvxH1C&J8m)KiJ();v-BTE!>E@fpn(zrL86&j(`DvwPMRuH!>C>~N1%C6X)xx1(k5<2_L!k}Ho_j8y8%qO`zCXCnH|97^YT3S_feZ7!3 zDbeWmp)dy?&mijpcDQ&{@ZNa(qJTGL{b~E^@8H5qs=fEQ4>(Nf_5={`O*sc6R}iPS zA6ER!rfnogB2?suMh0A|;xO%wlJ&MmjOZpQhT)G@0B`7@gH4@cGKQ+B*m}dXbIGYR zjoW%zzQ&f&F-}!lIL5Ml;+VMI+4k*m_aC6uPaNWAgN91e+=ANhA?PHCXi#fh^_p)` z?Z&vJuO)@k^J0sNO1BD5cMt6{-7c!j>7GIm0BQvePTC$mipKF^1oqg3`hm%PYcjSj z;gU2Sw8Lvd`ODFj|J+Lnz>>j+JW;Oji`h|CPGqk^o?>2!cJ5iHG0~iFfqCv(@Q=lc zVd2w!K;Pd@v!!YDv*t~ZK#xt2SELOD2 z&2zSvHP!45L_En&ga&2v2N%HCdB0qAoG?@Nv?JuR=PYE3E(~~?gMQUM(nDweeR@Yi zh1R1`^=aV<#mjd>QDND8nD}*}4;6F3Zer+@)+`2qsXZ-7JZ*25D3p(iAI-U)+G%R# zYMH||Qf!|^fOw6spqiixAK8CUe~v#pGj0q1VM(L<%};sos?QnmtNZtC*!BL#4Y)Cc zb1?PsN9;irc7Usrr|{XnuP#lMR^Swbey_9`;k%sqlF?L+pr8|r-H(9lZ<#z@-Q2uj z%;byI%G=FyW_77Zdev&Qg=n5SQE-IEF)rnKkv07h%q;kUBw<(lh$6wl=86}8Ng)oy z#2$r2^@|c$LrSm7xns%j@2?FF(=~bk-zfU+HE}}`UJfoBf0E4~6b%zJEFgDqdNiL( z0G_d)v}73WczYo;-VE;_EFlzoSD3KX#G>r6dI$m44_mnq&CrLTq}TwuJZI*BeKu)% z<;bg_^V0T}xj(_}#^gczh6qQr`cvxn*%B=xkj=~N{T;!XvCmt8dk!GXcSy=O23Bfv zRry;>GJ`n3qOk;M+XZAW$3zD4?Pn$1p2*L zt3v3~t3Im9R;VmB;fok2bJI5)R8gP%2#V!4+@<=Jv|DITAL7eZ02;@<4j4{+sq=w) z!1)d9>9)h%ZzI(P3WK;yjPq!Wv?k#a5mx-7N+F#*1M+|*=_ zn0nuIL87_dfJNm zdue^@$w}hBtE8Hb1a4Xx^{YKIO8a-*@(R3`p7hdu_Q+v&KJ%)}A)7OG61`!MeGty7 z(Bo*!qtUo4>d1D?XXNuTp%IoXAe?&SQ?^xeNV|zcU?6rNcolxUbaEddGzuDm04KO$%8-(T0X zR(PO$55G|;qb8&=DYTlZDIegrGiSodu?d#CUk$e|wbY)s)^JVSx5Tb`6?Q3zozs(+ z2z3uvuafU>dZv)|l8u)a)0t}CquJSS6MYP)H1SuERQs>(2y&jc`~m{$gJ9xEEimJ5 z9{^O@TaHT}(-y9&~`P<3f)p4ok!uTzm1fZv56xGl}itG@u+IeWva3-ygS`9ER! zf$tgG0p=>HmO(inUZodm{M{;ix7L&7y5xqA zj=F8a!p3ilS+6|jDdja={QXuuRLyRQZi#`m;^)jvQV_fJd5;!nl2@;u5}9GQzrAZ| z15^{b!(fhznW?-4G6abbH8=R3p&!UeZE#HnxP0^?F4P+){ZmkLau|8H3LkwL3bi`& z;wA3FE!eWc$G8+GBk~FtJ_-g|%`hZ)8{n`~qw> z2&a!BbqoA<9)^#Z(Q7R5>%Gi~W0b&G3CJwcC!vXsfg{T~_GqO-M}-&3=FQ za@C|vtlq4L30q1HVmC8?9+o$A^JxZjH7&uKk|yUjvTUd(Ly3-vnuYz(9?~7+ZHs%- zN$TX_|GAGfpP&G1L{`haEXH#yx*hWm4u!FuBzVX`MnWvtkNmvK-xzwt(7_bd=j?!j zn;EV4SNpk3gO()AnR%Ii<|0~&zJ2Dx7M1dc#)eIf`b{nAv*a^!FEuzX@yYSN(tR

}QBY6jH3;6>=geHU=eSNGJ zTjQ^YZf@hze+D-qkr8qMh!NsK-!nqLlUfA)xg|%zcNcWltKINx{eW%pdfC?4*f{jr zJ#Q5Qe4CSQdkczkG0qUY&(zT)S$WHk>Ino&FZJRrJ=X$nJu!E@5sq#W6X-8d7$9Md zxyrCGX>xufaHA4IIeM(NzI`(IVx8)xGrcT(pF21P)GrgdH$^5gbn+MRlx?G~8p)R* zZIU_YjwVpPbb9{E)s8*(!ms#qZm)yxMA-As52~fZXNKrac8{NXUm=-IF9jb46`6A7H#-g`;8(|s4HSpuWdtTauZ$IPqpa+j) zcHErUGu6B1ifW!&!%Ope4O-&UN%_k(UGZsVKN;rY+`A7F_x@Na{l%>Q$?O08aSA~e z$7q^H=|4E!3Fv+knj9>%Oz2N2c{4kZ7a_iSi=TPzzj(<`AnN|9g?37aG2YT+w?|zX z!+IwjIn`&>N^^`h0XB{p?%GNZt{m5&exnaL>xxirBv-h7c7{;DS&-e!`i}tojrq@eVWI*uX10w+Q!qF@T^Ng;`*6iI2uj<+lc!X6@Pa7 zf8QF(Co@1&9u4r{^xZu(W0G(~ucNExTD(tEX#MN>*~W?$f#pTa(%bhYO|}eYU5et3 zinGp=A?YOD3%@g#A2s=@g8h;^tt@hHwm#ZJNND!^fPeMJO>Qe&YTAo!;~sZOj8=*X zFNf=b?Di`<$ix5t2au9Qu5zCW-}pVY@x0~xN@xq$-(S&3mrw~{~p6oWW{mAWGNBQ9=SPtX-%VGOSXV9Pek?v4cn~)58m^xX(k)Z2qQp|iigNs?@cZA7%gn#Aj*KMa zKUYX;`<}Aw|BV0td5z$A$nKmUo-fbD_-*xg@9u9u6J`N1w@N=<&mQ*(>i0$tRPBMV zW?Q#Au!@mjgOg_7&D9n9*ZbfoWRJy47nbEV7Z$A)yQh5`e!KHsf_SoTu=#*$ImskLE$1Mf}Z7M)^*q%}zxBR~> z{mHCzATv7{&;^!1>bn4XKUTo&NzELpa?zjo{V8uXAZb)++YQrh2Y87t(olAUZ>XVI z|L$2^qO^Qh3Lf&-ja@Y1Ts4`48SZJdcBW>aS(}L>IrhSv4a4FW(wFo_-a_egirtOI zG5M6m{(O!h%;Fv8n&L*C)T zh=^a|wApvObuRn0celH}Eq#>oD4NKc_Fh+5xA>*K%Km7Oro@Ft6bl*i&5x-!~u}tPR@-Y`^sFIXP4gM+;x?wgJtJ?3=br z;)*FpEOo7ugbg8aKv2gIlqx|AtF&5O<0T?UiHd9TLjW7%4`P+)#d@nr0KJS8u0a~6 z0%|1cQbb#HFOn9LjgAvQnZPTpLVQSjdG>Rdd&N11~5 z`D2u;z3Fd!cw3+bj${2k$c2n0oB< z^1@iF0B>Vi+3x&gAHK~(wLxsB=Vg>mF}f)U`zwY=18c>+F3@h1UhTf^ArFO5I1WTRv(q2NN>HA7TZIBb{mv<>u07`?rt!449i?{$+n#FQ5bFZ(V zP!c#j%@^77b#R9!&989r()}|b%q|^qq)=oXCyv&GHigGC$?W3>!5h(V5g@dy2xt@m z1}|E;aUWs8L_Ae#AZc`lh;lSjmii0GWBCBYuYslJ0dkQt>Z=-4c{N@x5D3Niv)%Kl zpDsyT+Ux0x0I*62+ivxL@_KoyuN;02kBGJZ0@#7~fZ`UJcD)X`!A#+HnRY|zzgisM zyOD_jW5?jh`g%4p$^9w7HI{%ulwN?ERmJ6>aTf5xipm3E`PaMWoAwv%g|aw&jK6R* zF~AgvQ~Hu48TrkF36lR75GzY;6N0QMv;XWOEjXWj|JxE#mo50u#Vwo2T2#|8?vGJr zy)K%^c@z{G899+mKGp^hVoXEa%o}tHX-Xi#pZ58Rh|p@jN*o&!=Eoh1&CE(!+mB>n zwc1uz?1T6A=~Jr6ZFUV;#LPLBU`5Bs2TLr~QnhR-*(FJmX=rkuKL|yEREV@TZz7O!gXfvlCkDK!P4RF_)5^DnKB7P3ViHnEccJTx?%! zK1-YXI=XuaP!w~fu`e)FAZtDqG|9YH8O8Mg3Ioepqq;mI*&c$7D#p6_1*TpO$;hmu5PAdmW8wB>3kz7aGf)^fE$hEj|- zIqPYqm^@GaL(ENcC)aloRoBbYj7k*BF@vC+yp23YrRy{oGm+6c#&So$of^Xx6`(@F z7d4eaxnM|&K{+q`76s=^c?-h8QIFl%mtQ^A3^WIQ;k%N}$+@X8F~Zztw})Oss}qqj z{%dNWUPPb!cU&63XHxOh?bmo-k4|fG4wkF+=Vi^y(LwQF!8EcNzYm;DEflRn6bD!5t>|cFIMtX{+t@g zZ99$=f_?79M}$y@uL*~Z^d7yXT&21uO`cb}Kc;?D33}doyCPhF1Ejc2<{T? z$VAJBEIFT{Y?e8FQ#P0TMtZqkj%p1C2zg7ikKq z?E0deR8(`Ut>&|=wQC_R;~K4$T}cJ(PU{*$@%4bM<&6b!#wZU(-}ny0*D5V+ z@uC>M-b8Rb#_OY~-%8XY10yrb`kjIBdj_u%4m^|u-G7B4OP4fp-P}aWddd-~$4mDo zL`^I0yajPZlV2909w?(zIzSPSa-Osz1_s8zGLG22@c#2?!@(HsJh4WE&^WH%KXVnC zJFJVJQE+H!qrMyX7xn2;T3NA)XDN~kNNt)tO5-uA^5&balruRT@cN!_v3)+aWTVd& zI^=VS^(@uVoK|g9{z}Ye!+KK?S=l0Q{!KZhhUndUJNcBf>ys6Uij1!N)y7#YVvy~x zL91cC4oKa1ygZf%54`F8HS3hhQ!8%7e(Sppb=MamJrV{-sx7E$@D9uB=eoMO8l$Hl zouDgXP)cBAYQR32Ca)`3fTiY(lfHb&hPoxlhRBNb9i6FM@ZKVs*>19ug1%lwOP-3eQeb#O_RX z&kqeG6ELgL3MFNjkg8-7*Lvlcp*`on?xMqXBuNvt19y8ChPfAZqH&A$4QDllYZUeP zaP4ut;iDK$7wUz%+bR&9O2^H`lM6Tf>17q6lKiwbiS;*}#4?P-cv$LmGn8C^DQ%rr zuRrSjC15-s1LhHwnYY^fmMTD0g&SiR{`V;JC9K|HY_VnLHQM#v@f?0Ltl5wOTR`55 zR}^^l%20{U`|8!zX5d?1sfOeZ^tXd>Aw47SkVz&s^GmfFZT(Y8YXFzk(!Rtyn<(Jy z9#v9OqVZWOm4_ue4Uj!YS#p-X9iu9vdF>Lk($f>5H}B&B&kOz68(D+Ss}S5LZ#Wpo zfbxy}5^2vGf@+H<*B3@4R!HzOK%Af%(Yh2b&R?%mO=@{`qe)lnVaIKQ~x zD^~}zCCn8o`ueZU;yVw$dxC+eWXSK&AyYg~U_@_f!Y)AQ7vcGMCfWJhzt*NE!3%gt zgWto&NiB#ZlO2+u%D8iH$Nvo#p}AiWUI5g4?q|O z12AdyZqdIe&}Vva8{+_OuD5WN-+Ml!HRs91D;Md}^j!TiCqo_fsfhcokI{{`Md*5A zPT9~1wheYZL?Qw=UI+NXn0yPQDJOhMiQgiMs?Q;)ROo%;rpRU< zR+sYQF?jBM{N<#Ol|FYD2T0MlPpHbcJC-7sC6d@G3lU|@6wuBiwc(N=71kwJGyCG| z71jpWuhakuOd`A0I)8nA{p`G7Lz3M`0!p%At~NR)lYb1^hh<-J z`TN5*3}P;0SIyla zvKcM5RtzMTDaI`7lJOG}J6_XiI82VDZyMW=2hod4 zk#0_k18_C#xEif>4#R@6ITHp`>7|u@S5`Vc1qeI4(u~rx|U#kAZLkk-sa*}C#oodtp#yNDuG(7K40IpP`68k1DP2R9#+s#X20_t2wMGS z7v;VboUP?SzPW?6XCt-)^wN|v%^8Oz{=(ZzmF+h3p+WSt7?q2neBI)o%DBA_uR|Tk zF-E>?Llx44-~rzyjr^IyQp%S^ORe+^P=ciq7vjDk4!-v}uWZ0#(*f6pV ze(ZU-3ZWc#b;pY25nJQMTQa07?IZVOF(o*i*1zlsA+Wc<#nqB#L{|I!$_^9yk%WY# zRu7?FA=u^3o429ifb^D*`F;;{Vg72C|NMJ~cM=qjKhp6dUD7CC@Z?V)3K+kZTEP1T z3ZSx)T}4J@*!sygsVnlD3hoUKt5mL;Tg9ArfYRh$cK`uN^h`|F$*jwJ^b4X5>=`?P z&ie(4W-KJKZsk^hi}(^6;>y~|O`)|+-=|eEN0eE1t`T`cDww%jw+n(0tU~hbaPiI1 z5#iN47bXbXJfBkJ+9}2nFNqwIJPAI$9e;xR)=#3<98Lb)ro(wp=o4m#C3gdm$P$5* zJNr`!l@A$?7TF&Ko0owtc(!*Yfi+z?&)y>_g3jHh zYw>=cW~KaO(!Jk-pZ1zB{Dj|})$%Rx(OgvoJ~ZP^eAS^}u&b*p4zPT#to(Rzsv~tl zPPpEdHD&?=Z-PtSz9&6iY5P<(*A!ux-D7qgH(q1Bjg!J*MWb2rfE&zNI!(FjoJ{11 zF}%S=Uu`%qW8Gh)D}z1!dhz^|`-=v@VylDa-s9dcHG@!4Re;L3O66@7%`u<(TkNMo z_x0h+6oT(jvs$F2qysn^G(##X(r2VR8PgI(K{qGL{G@WDoPsvx>b0L+i#*Sk7>Zj> zC9R)C_}+&IQCtwvOU$w&f$e^Qfo8+a@N{D2P-Hm$kXLoYb%4l0(C;mWu?7z^?YAfW zqK+9UHG#83vbuCXkb1S+cLa*X!f1lUQ)@9~Uki=8YXc@~X4(pS&>Pm2;ORu*(7kuu|v~++$-K@hG z;SCyw`mLBIAgh&_tBO1dcP3H|0s^~HlJ+)S-{L!-cCjNA;T zr!Ly~jWg-V#a#Cc5;Z<{pXo{|;DH<6YMTYJQ zxUqRjP@45-MQZ#vq(H&=2Y7Nt+TW}>Ey#oGyX$uC4vAz27ZJ^G0|XLltMS6VZ-sC+ zWv>)i9T0L>r{06xp?s~QB60_)blN8gL_*A)`Vw!ie%F&5HugqSN%}{yr9mY0rTrc` zQUle+y7LqYsG`1T?K4hfM|>!yU>?HB(5Dm3H%erqSjeCPYbZ%7CpI5YwO`;wTqq;q zCXUu}-*@Kc`u$6C1w`P$PS(tvgZi%TgqVZ#wZ~pu*0TJQe5(ZU zvD5npZ}ZROfE93dN8>_X!*tUg?zGK6htwix-=8MzgBc@MK?Eo!!DU~4b5{s*E~MVd zak=>u6q|szpy_XL?pe)a{PUuCJJf%#t@Ka1*0H|6;6?0q1|5;^JFn8PWc;P8A^i2e z^k=FC=i&B7l=!`>>d4}7R{?fG7GY3qO?eNm@vcq0@P@P9o55kEVTmN@39 zIs$7%9X{3_&{;kE_NrTq=SFB%NzOjmS24ddjDB;U5A#m+depi2Qd7)*FvJpY9G6%! zgEKQ{_pA@^Hh93jFZ0Y!x9YHOnta0jw4^20#@~J$$v1I$p@4n)Hg3!yFQbaPh_-ap z`2Nw2qBiD`&rTbAwTd-yL{YI5*J@$wY@$XVJzLD@z*s%|Z0Uw5on|Yb>LTy{rzQ`o zEH?8{%x+wl;G(y^w~_ir8^sBe@^IKvrjW<4t}25AT83w5B7=iSMyym+wFILXG0q== z+>wEkEL8$IoCs>5o?tU(mQ|^f$URm%Z=fM?K%UGh4`MR~c>|Gx$!15T^~ThFB+x2m z{U7ugoG=r4gZ0jU@JUIvAyf}UGo_F5$(P7o^soDp8Ts6At>0rr2t#<16Z%zL>$q~~ zaCorPa)yJVlW=4RB|5ENNu%|VLeck)Ch{KLrGm{uxF-5Zpv>ubxgEP2K=h8B$`C3` z(0s^NAMkRmjF7egc!_;W{p(=m#zJ(zmpD$@^ zw}L^VrjQ*k)8X0_hkljT7Y6(n5XniGPS?3|?Pk>K_om<=1a93XlOM&^e>A^Rvf^d6 z0pnH7sl{g8IrtR&6VyyyRS15(j44zWW{drSRp19gj$_8qJLrU3j_bOfYa+whLIg#db` z_Y-`AEz!Ma@5oP0q+bxbK%#i_^|hprVT4WDV*Db8>;;du+@)?lOZ@SZ7Y7fdz3;hA z>U@ideu(FZDwOw;fyy(TN%Ogd(w($)@W6^Fm7$A!#mw-~gi+;VpPHiF?TTNoR7MNJ z)=h{3l{cI6=56_+owAVmOE@2$ut5QHOkV2CM{_Su6DaX}ax?6k&2MCK4R*Z(E$e-$VPN5Gs*-W4gT{^ z&Y(_+&R~b`=BOYZGB=VHZPuB)oMZxb>qzs_IHwQe z`cgQ_cOf52O3!=jRY7 zVx2>0DB=)GZU+CZhGRHa>W|=YHP3uWjV?Ow5tQK&i&aXc;uZ&;6r>p!BMMA$FO)Ysc?X=~1t-VRCI( zPbdMe`Kc-M`FjrH*Ibu`9mcdcHkCGUW=i8(2Fq2|brBBnI#_{ME1(nqqxovOf_F|j zx3}2g?Le9)vTppgpXgSISsK@t>06Es`EHB(DwMs`PalQW#5~u`^lULCtO-Bmy+%{q zj2+jQd5k8=PXJ{2LS(N1KXKDYKP%OMy(s&3j0CgUcvh}YK)9cx8b6$xpR`yqcIy7L zn1Iw>F$iWb!gbIavKhD88GvR?CIoZvI}Q8(u}Q&s#wYd$n559CG}A&O;E01s|M_?oq z%4RBaDillr9`%Qx;s$xT7lt=Q!jLoCUv*SVsLdp4CK&P8-!>3AFv7BqHyRzzcPxs8 zoe8Wh_x1P4jTj6b2`BH|jIVF>iuH$AtnA9(oBsa>%6e`VZVk z$L>~?`y)EVXi<*EoH5V7M^IY->8Yc5N1P6uub+J9``}#^EaMOTS0mX zM|&(OMPtxsIw5_)vq|zPws7!Mi-?GB&HD4tm8R-blgr!ZSLsMEZsoRFrh_GLAHR7^ zpoG7|>BOiKIH`CN$@wLLJKAofzH^dZl|O+_Ow+wBdvMI{X6;GIBB9MEaSzIz(_!cT zq{cStqP%z&p4#W)1gblJLs%Ist>b+ zWC>gsTLT8|fuaZIHy?@Z6`s>s!Xb2W_LsCkYDPKL1IBNXOxq?0Do?|^od$}2q_eTu zXd7dbHGlRA=*M&u4+(;rs$6QwTbsyg^fyl0pVDFA$YT~W8>@$DJkLF4Oo356 zGmUTE{NDHDCw%A_82AO`4ih^>wsv2pDW>weZ8oZFOUH;_Cm5mVl918c_&pHw_Z3AxLJvrt>JspZL#t0L;I|j~ zj_bw6oF&fpR8{~i`wrsZW1NotEgMyfKoU&LaiMo{Wy@yGxV-nxvn-IEIHlLIlvJq6 zCjInGKNNDv!t?+N;{D(S>PuS<9abH$+}IKjivKw?@$+1vYY38;2JYRtE_0>mPZ~hW zgSw4u6`a|*lWbMG2+7OhHQ*x@(h&t*`!e*+#^`AEWPdsup?U~|>zn3l-Rn*GCg(wS zV#%JeBdDuPW%=(|AD8NGtKLB{@R3@J9YIPzRtuerY~z9v<>;oO@8IaE3&Bo(}y zyx+WDv?7q$TsH6jL#tYu4RJ3VO&$}yz z$Mdn#NW#NHA2tLN&Xy805z*R?A9qi4ud{K$kmt3bk1{1a+ttE& z{=8tdDKQwvknJ_e&2LjqGaMHpoXS=pNq4-I+>WMeA#Ar(qxbRz^V$8}T+E&;+jtkO zp@MzaK)BwjR_k7QxY9VX6>V@~q8OW_@3qfd%=Hv;QM)33VKNZu_t5_<4=v!&H=N@? zeh*!v9WJDYOKs{irCP+E7JI*4Y>$`Jf77eH{<_9b|2IWoMyE*_{4TY?h~+l~^Nma* z+xU6q`!)$E@tDYZc>?T@x)RS!g6_|y67*3G`5BX{Zy6UlKOc7kVM852iEo&tHF^PR zw;RGge-OC9o%aJP$57Kgkgd@pK0z!0jQVD^nq6S_jE>)dN!P+@U!@@8R~d7aPR2p6 z#?_n*Wbz9FPAHu8Aohz*fBi)HHd57(`QM;sPrzyeNX7oJ5U{4*2=>*nkodFJZqDK8 z-A1+bHiF%EbzL69VF*aLLOeb>tCjMcL|E5!gCArc=AyeM8nx}<&nFw82fgH$=Dc^a z&V~vEGdgCT`yMgTEQ2X7n_xzu@!x*C|->n%KxrcF-lkz_wt^<^@e7%f z&&Rnhcekx6%DGC$3^#Mj9F~`uW9RY(Gro{peZPHQ=x;1yV z+@zz%YLo4r0t$|t-Y^aW#hP^JH+ho&m1Tjg>inzDmu{l!RO zD3PyQGkN?dZs$@r_0&tW!?c428{vX!wK@BsEbPSI zk^t=!g3;oaEK*9 zXNS0vZ08fEHnw299k~mKrBR^=IYL37Om6c80G8Id{mrKPupPP*bU4n~Ry<^bRNAxI zvb7D%^l~(=b+1*tR7p>Y`Hx!?e;G-XvA_@n^ zbIO2pWzjZz;DhEsH!vGF9^A{In?UCUAnNr3Hf+dCK%_UTwfj%9J`W)1VgiU6&t^W1 zMf#KAPITKtpvwizl{>yDDnq44M7ZSu-3X2b-!x|XMPM3yK{_Kn1&?*~W}0vPA+|qR z(I&hy{>gQaTV&;oI37?@gXUs2o-|BPG&5CT!Am=a)wm!+?^J1Z@YH=PqogLwF*5-VF&5oib-?Gz4ITC{^PE23~& zIvo#o&?nAwTphPv7&|NbgUeE9Qmfc>t3+-u?93+e!?D~S62gIzKOY&~Jt1x<-||52 zJeHQq0K?NQr`=|1>0XzzxjrOXAI-8M+a2qV=k>~6zsmae@o$5BEW#J+@5v-R7|#hh zg_)W9pJKBUe+DfB@D=v6KOTmDnMSR@a9&sD-uh?^z_myd^I9@<>GYHBjb-%=7{5U} zo~tzI^aPHv^LWI4K^Bsae|_MMGczaaI7g)T!}H%eQVrd4>XO2Tou8T*5(>gb_oGyu zC03G&Bjo*DTK9E@96=iQ7vHym!7RoO;sA$i3yc%zqT*OO1=qb&VZ)!{vnF*bAc%?jStw&kh4>j5_q2A)qnFZpz2A-*H!yL@gxey*vc5&8y*8!7i+GbuvH@&4I4>eM8Z78OWP&TEmBa@1*@0) zu4g{#B5a1Gkd2NOLFmvq8+4sqvUaIBOuMO98B)_--zNNnAR=CXlaXxT+vMC~$Y=`G z_sU#!7BRuCE=GFj#N;B7n`1x7$98wM{RsnUD@j?Wb^^Ee{c~YY;>PpuhyD{JWU4c! zL>3%xbeO_ne{`3GfRJIG3Cz)DyRq->isuub6T2=6thqB*Ja;8EemFCvMEgZ-)mo+) zu2^J=UL+@K<(WXoxdRj$#y@85V=%S1;&y^ZO#OJIZu=J*FK3=h%`EtMp;ne~gi)n1 ziv7E&L2$20^apWd^49=(%$askpg*iIr}(4!OiCn+Mk#(z$c#sdQR(g?W9&8G_W zJI9@KA>M(`z-t7w)5uXJMnnJuFbc&dF{Ja&0-9N0#67{AB`T}dtArbsra!3E#0ZZW zBp}k2e^VuyAGrBlk_mz&MgevK;5c*khKD^rOr3%hz`9@)|M$m$jExNtFW~Wa+&;`a zwwgo5$a-!5>LSwvds6b9AAGnx-FEG|FFEGWYWat~MpT&;DZECJN5J9P#);g4|BM%u^RZqK@rFRLObQ4$GD zV?W#+eT*Q5>RAVd*^YXMQ~zwbJRjBdlCidypppiXH-@j(<|GGDmt30-hm+=y8Ne)F zRc?A?P)z?xLyspJuW^dz#Kz;+bu0PHM{VN;=hgk*L=t2q|ln0>b0Q@geXG*=5{-Os%1ewa*77eHA4#~_kTYoRh?C)GH&yB? zs1_je_45lbEA{TxwP{`-`N;cNle^x}rB%?9yBU#F#~7vd?$piFInX(Q!vt#aOPY}M zgQI2JoiS%pN8I;oo5Mms>y-?*V?OyXtQlrmDyLG=(DSa*-H^ZVINSxJQ%7H<)B&Vz;-mke(APA(cQ}u*uE|hub~% z9>ujywX$LYZ@u)dErwwp01xzq06nkjMUOwwIj{Ckv%LZ^gIbl#>QK5g%)=_F0;O&u zG7WTm1e@zZ-I^L7)?PidY)U*)wjzed+z9=kp;IJHd~$P=e)vW)xV!Gx>4!n_SjLY2 zRBuDIJzzVUN+6$HkjdSr-aKM&HPlpPV~)|}IkBkFV$Q3v120aI@DK_%jD+U?!fN~a zU}f05!E!Daz=$}P>9p%2-jKLjXmoAfdL1VBbVSfB=Qpz3^gieIeY-dX>-+juhd*lH zwv0kQFbI~S7Q)SDy$Wxol8i{IO_=iGk}-4U_+1UinVKP|VaYLS!1s_zxlcXcr&o%F zs#|W}_?`YncZjG#(qB+H@WeW}(D@AfpT@?}IJrHoB#NJ=a8YM=^lgMWP&WM^R_doPDzg3g{ZhH+BAO=KwJ0!V!Q_l8zX zqKWvYRKeA9UkZeNV_eBHVZl_9lro#TAdy<;%Q8VmDh^Dr-=4oo0dZYS^GVxc%@AR$ zi7^J%eFENvqej+2ufuQX;Yw?>x~IsKLdL5rYE7iASasE5ke(q)hHFe2fzJw)Kh<*o z#D;U$BhIRiF#?s&#HVtr0_G&U5j)?*BL?U+h_R zr(4%7LEDT^Nwlp1jpcH>-WSe!r4DEsddIaS!fjL;?~wZ_I}#G=wSLsD~XZqC8{Km z6nr7D)uu5&w!7v} zz@njo`i`CwfaY){WYr)8DROX=)&{K<;YRt+ov$M%8@e3*#xax%mEOB-gwXl7u-}Ut zga&5g)&ZO34!kj_m@8&YI`Yd8x=d?T_#srSasZZ2%?-q)V4ZUp3E8_3tnmV_y!dH3@C>}nf+x) zuj134@4ag0mow~YnUSZ{>U!tt9nIZcI<(XLmYG0*pJT|BG-&#cM-0e_2{%l@yD!Cw zQ0JYZ36tQwKeo?G_P&lZQt!pivx}*@qbD3Kar&I*8q*Y{ATP8v2lyHU9Cy8jszk$I zwFBFZ-R*S2)a%dH%(8^~kqXOzl_Ja-clI?I{CJV3@bj&~#S^qgsa?#yo|S!4B)NFY}e{Y%%|soJG}=c9ws1(tH(+YuzTe zfOU>imiLK;iVn)#_oHP}DhW~8Ynt2+y5xd}to!o(`=Y)x&U|CDf7NwSo5N$=7(U-i zfNGGbo-83k5Hb1k&g|){Vr-wTNhy{@(R$Kxx{ybO^q*6|NRa{PQR?>T`q-6ZVDJv+ z-5rNTI{3>z&wY2CUj*tkRTnS31Tylw{A0Hf|EdwKs%g{lL2_wbQ}aFl_CMAi?aF_B zE#{AVOMdr&H%s~GO$;D_*4lV_zi1DdZLWdE#|RpZJCgaV|1N1f2gs*FgR&|#qv*Yy zBG?P*eFo16Pc{aQ#5p`vBy;qqn9iJil$ay z{tUTX$OFRWLJaU4?7%GKJ{)p>7H|a^&r!|$?y{5t(cH-F76So)R-Tc%gz$_O2TOtk zD91Jpl8s8u^Y3^#qkOTX86yEJuvx$K=r7C>mq*}miiN&4$McJw;<2C&Qd=faD$ zO$$HZ0QJS(LU@f*z??OGlk2brW*bqNr{7)UXdyfX%JDfO&FL7P4K?HHs!WK!^5e>y zZ^wVbNPxj*^ovp6>e;3)ontb%hIxp(!zE1rMbEAFGZUK$()3HQh_9-*TWX};M~}&1vI?ifR)EKp+cAS_x>rz%3vkp-`*x(uKsf^{MW0>gDIK-VHE_o;?3_<#0P5Z` zo?yz$f4k|k<1#WlGOU1U_RUB-duiWIMyLnBEb^wRqa>4+2DI6)VOP%m96S9H=NmQ> zDs>cfY#bGImnUa1Oph+=U^8_@)2bPQUnWeCDXhWRd6uB-m^dvEiDEvR?$^~Thda1j zJ9>kWrwJgnN(kjLub(4Ws4&9bYJcq}AxvV6nq2qv)OtH2v0$XRE z01saZ_P%-epR<23xeNlKx^egTo=Ed~;jy+ALK5E^C>`3EyfK>TyeWHu(f1b47@qxT zb^Wm}x2M0A;7iYiEEB&LaPW~MI})V8aJAnsC;W%EmTbqwQF+#GbkClK6>VCKhMcQCY2F@tD$V8_og}+ zLA%OWHLc-IJ1zmX&qBD$t>7S9{K?gKLC^6uC5a&p>+W)~sF9h6TPV`a-z%ww9X~4{ zEwGx+lp_n{L#Rwg6{K6umbvL-H&)weoM%pA^n0nLJ-3vc`L{kUR|$8cHQGy0u<8;* z3l`ecsi~>KD|mq;cHl|>@oblxOD>gIy8Qzm@M^9`QVxw>vy`NJ;HPh)}T9!4Z9Lis~)7VByT+TqNJnQa9sQ z=bZy5fq%Tpe|`W`lx`+T@JMl9%)_J;0*qZFAWVWX?e9`1cZ|v+k8JO1uT;eG(Qj-f z>!?EyerYH)n}TleaVRn-M>F-W4;5dG=eh-hT8^(F$(**YiF|@)u@DuXKpj^)mbwIb ze5shLZ271#`(MPF#_E{n>4%tVF|+~x(mz{f-xMv2uEW->b!`|IQAU@4XOq=c=GKzk zFR`q~K3?f?HzAIu8BWwdGdaKbULSKk6!DgGKRg;f{w!U$^_)&yR+@`wd03jicZ_~S z*QviaE93K=bP|*!J$JBsP|VdEA)vmPqZqEdL^Dn?@Xsctyhs`|JGYLTWXlL4^`qd& z;83{dn|NpWT$Svh8y1cg2#toTWm=rw6!kZd`QJPA-y4OCun|vn{BA2hdvzhg%w^z8 z93t&_-aswjp}`UUAbmdTqJGX*W+2gkl3H>k8Pn<{IdEa4Juq8QoT~}OTnJ-@{aEe> zH6?oyS%OY5sj}~gTg{+;5HoFLhXxLX=bl_~*W}*FmJh^GyVK!;P8U)-&c^ii)J1O2 zy(KQtCE5X~5qPuQS}T|tGwk>Mw^g-#16gED!Cs z64#gaUlfEcle8Kbnk(lqQDlr__`V$@%ugC59Uk)zvL5K8WC$#uLNTdYhM7T^_lve! zBePX6B|^4?3QT!|a3}sK4YldFZzsKC?e0)z#-0VS(;jn1zoG~_y^P!LTsPB9{>kZ# z<@Zq3x3lOAHnYv`9I~Cv%D|~{bGtqBNWTwC3~=W|`u!_tMXMwZ>3Sg@za5RoNbaSN z*?==8vMYRZSkU(<_sD|C1(t%dlkc;diGxTrI25>iLS`i-Nb=&dvnbr6$kELw6k zPB$iCd&mi*nTD3(SVUTi>Bwbf$b-fkuJDOFW#U9`ZEYqZS%iu}omAi+Zh!)6XNG5%+U>_aWa(&81>w`E zdGv+mA&a|#<(4)ZZyVPTOqfKLGt3UgO$H~&t=MebSPA*LOmAjE!V)YQ^p*_X#HxA7 z9=hhtDY%l%5>=+Fs#*QhcIWsZ07?X)`}S*(_8Mu7=M@#yAKW6ZZ5cpnQ2eG?G24yx zzL9TU|9v|D^Q?kd5H|9to)ud$w@(n|<%wb`1b(EwZ!k20N)5JSqEgj@scCJ03P*xN9-ML%x0R;rM?BAQ`>0!&eY|mp zkgo#{d1b363*Hn=zp3zw0b;6Oex&C9PsJjs8n7YoFis z%Jxggbuhb&u!k;ob`E9NWk?-9K^J_kcBPc9`%}PPt~dDH!iWdk+wQFkg9kf_--Qe6 z++FL)+V>siz>DYG9)U!GcCFz!b?Y|Z2~omqVYvKnl&y5w=qMN%i(%= zA`l~@7WN$8eNlYMKZgaCZ5PswE-}5dO)EtRLFQ);;n{+kn(>x=}N z`NYz-t8uTpg4t+ura)$WhtcL~m3A)YLzyEB1C1@fq$?2dGfMDWZcd~jYMT~CIzvk| z?P4*fG;9|)qb6CyGk4+uCqwb!?;UC5p>8y==_DTsI|yN=xm4qZXW~W*zPHCYBn9gv zRe8QQcSMnnA6!!gBwjUE^+#!}FJ_x8n-K%cOQ5DfK{p?+dbtDnA-k;A z+prU-)MO|&zf?q`D={Nn=Vs$Zh|z$wo8dxO;K>rLq^|Syuzi-@rpkH{^>)V-ZMvZ& z5|R<<`+>)G!XkPfCU`Lv7{q|z55vB;TRCR#cv~rXKCh7|I`>Zv@W0E~zrO|JB9zZ# z*Aoi2DPYL%1DTmEM@lc6IX>)o^Yqvuf9!v6K{}B-lNK`eW|?MPa0!A^Ss;Uvf?rnT zdN8CDh8=YXP$5Z}?JDxu^+yU=(j$oj;SP6ncV{_?7dFWl^}us$wlP)&Yb=C~RUzJe z@2$W9rh|FS!!P>HVm{HQrD6|KDcnQ;MQ7L9rcdqG5DY$#8~9FSc5K!>X@i;M>KRg# zL=43IX_J%jmC^#IHYI2cAjK6;nU!_%{?{8c|L_kVJq+{q1Dd`XqP`W`%q`PBi1O%l)DM{&;Zcsvc=!T&i zX6OM1nCGJJ_sjBwTwd-y_uM^apS}0%wf8=t6CG*17#?>h8;sB~p71Hd zd;m`oZJ`If-}d#pHWw!tdWwCDgNGSoN}oGUZc3#L(4<7UzgQ0c)t4EE%alRKO465u zr}O-==Nr@v9;fi3mi<^31pGvm3)*TT&L4ku*&#ryKqG{$SAlxTl=4C4x966DW^~e> z<-zCYJ@`~YZmbd&ld`(k2ayRr z(vLe^a;(nxZ?T|+9Ka8SQmS#ij*q)>%DT_>zyn>-v!}@epXe+5KTu20p?u5gNi_y0 z>plc&_v{i*#yZWoEkmGAA*)vNQfHC^)PzTob7{Vd&nxj#ESKmKMjs-Z9-PAW+4zE4 zM!m-=Fv*eca@mb!_9A>*0@J-Mr4dQb(kkVwxgtyUQf6hcuHixnhKrSMLa3&rmhbDS zoWG8Y1apnk+271@dN7-lFnB0#dhVm3#hfx}WppyEO>_b^KWjjc&!{0F1)4=|2u9g9 zIcHQaE!bkolq7@Cw4=BEaL3 zup3XD*Txvc9gJdA;aian$jHbD?24oe%Npa;xsg08wsMqmdPm4%EdC)8_2D=0Zxc=v zjW(t>rWFv$#_2Q==}|$DRE}I+(jMPGBO2_USa^hkM>X19j^BI{|2++ioSLri>9?7l zJrO2a2kdx-^X8rNJUgPekQZfRPemffr{CB1q}hMyjimNVXg0)uCPHn8oOSz^Sg3GE z%%H{@D9Jn+eJD6pk+oO1LAVS#;dkiCV zyJ6Q^!F~dJs|g&VtEbl#%glNpY`G8w|jT^{bQ9i=Xy7-wY^N!lv?b_+Jz11T-$ z2R?J9e3#i1#8h+>Ao(Gny5$RMO``9@9s&<<-DTDW1B!YB$QZ_<>nM^resM3&k`EMbClAv*{z+ zzzG{Mhh5Jvan*zJKO;}icZU_k_K7V$*peK3n~WT!ICB~=W4PdsEe(0gd zW>*4kG>gRzdG?st?j<5yE7}Iumq8DsAZV>#L5G#cH0B8t>+r96`pxp!MHh;nU#fmr{i62UIPQ42?i4cm~i(M8DQ^Tj-I3QqB#6u5x>ua64|t zqt`!=j(U~p?Jd#Y?!APFvcKo23sx!<2B_H*ur;J9ydJ1`;4;}7arIiZUE(z^^w0XS zlwjRmbAS_sKWB?P!52L6(AgMroj+{4d8^(e5z}zoc!8QZ1fbv&0H8=SHvi5Wo0|U-q3hmxL^tyw)*#N{K!kvws^|uAjDZzEB*V@islxU_cRUqu zbnOSWvc0lj3UFA(-kJLSlF8-K@u8+0Yp)w{_Qt;9=&|AMW5MNSan?rUWll6|K(FaU z0OWI!qnwNnM7=COj($3~9kn?VEgU8!9*qKNfb&2fkFy1!ZA|3oi?4}&p7l0XKqM(4 z59IA6LU$v9#k8IDe&=e+Y9cRxV&2pjf;U&f7bE1z6?fc z+KLGn@7FPO#oNy@p{N8|5}pJ*Xam^TD~vi67|(;s*GhtwxDpXIaPQ)Y3RqhalIk92wtpTJYN)C0;|kD#(4a^P^WTT z4QQ0%`x9192~bUAS3s40>v222cqpBbH9_1Czr}Iwh`qPD5qX>ouU%VfNIrf(Nj`Nx zQ$A-tzny#padTzt?^*Jn=TUfyGhQMOJPq1g+$itmX~i+Icb-hr7^jiij+pAO*=V44%EXhM zQAa?I(Y~I*V5psa6PmX`wkFWL1M0a`B64&1r(L`1m_h{^MQ=mSz{f@Owo_JkAbpVz z-2{u^3EBzzNk>Fk$+vX$L%EioRv(`#gQBl4JG{sXL)Uqe$pBl$nUlqXMReOzhL&Hi zMi9F1OZc-4+kfeIYrA%=Jo%bj@i)$pfgPHE_g~l{`td_$84)hMmmGnA-$9UoZ87uj z9Rx+#`5>wyC#!e=aZMN7Lg?@RO1#sMpgidCG_BVA`x1Z^b6{Ig0PF9Md4UiEFM%*j z5dEjGOYjBFQ}3@ESXeS5JyaWJ-e~^)%*~^7*!d-Y&=H3hz%(yEbeA$|{1b8l(9jlZ z27}gbSEGRKW`I`os>^@pkbzB)i&6FY_k|8}j4{F}{sc*$Z+}Mo3r&bVdpPvqHylHt za@(~7WEE?XX!7oh9Z#mBb+AecVzPY@A)rkO4#10)+9E4*kt`^ zf2V8!0Tzq#NAFT)++(UCFA4ZhwiBP*B@E+BS5wy30&11r9}9!>3I z-APu^p!s<-(ECOc74x%7Y#(z!gG@hQz7C@B+bOB}a7Ds4{Xy@o`eVURh^Ry}X)02y z{rA+PhluOwX`K^sSAY0&kvuiUjzPogig)-hnJ)Wq=tbd=YCw!<)z*1j8n-omR%WOn;%}kIURvwCVw8nYXg$W7g;k zS7JKHf$jRAWhIO)bs`qI?XkNmsrJeO+rrma3y`?6mXd-3vFr%8)VLSb_yIjf+9QKM zi$a39PVvtpXRf`uY0|_J0a7@w9YlLM(l*zNaulLTz`z?9ylwhbl#S6hD$D z0?cp#d7ghQ|9qq0ic_xnUqAhg2FTooBc~hTL8B$WOo7a@?NGVhutFBVp8JQ67Qt*Z zi2hynp?=r`a7%M?lvBQ-D#n96;O2$=ox%+bj0g2-ie(Lz3ag#t%l{yVSgN--ME7in zOlWyW3nkFh0ad)?G5juetuA!d>|JVtu%cbn1+auTt5TJjq%z3_5VsItjdwKRII!E> zSlc+;xG?-+U{-pKXd*x+poo@;A$Ey**$LI+lctF}G9|b$a3Ae9Xqy4X^m6QGE2x$x zqw`0j?3%=eap&}m*=*TSDHp%3N3w}c(VWZS^F5$u&TH5d1$3t#^UwC0FP<-hrq1>H z2D-h4;bG_D>k;XZ=23b)U$lqcjCo8|7lXKULU2NO^5BH%MBSfNe(NcquJ}Zf{90ua zhhs^<)_a_~)N?Is#dye%CDggV3`7zQbP2(&-3QzSI9x}*p^Ykz+{F<0HiUK+ z)3ZpJ1W(+oTLKP@n~CQueItI3ek1Sw-wsAT#qZVAA%l^ho}ZUrwrqU8ESCmsE}U9) z0c#-F!!Ztc-Fap>7W?c+p~g1?8Z@-OUA9<)qf)RYhFuVv2&V`?|MJ{#CqWtGThXnP zhbI~??M$5x+o4RIMN~MZV(v@3qSM@~5y78%Ki(<>6BK?RPO+(x27=vp8idUb|V(Q)&`wzft)A2$tVH zzfs%gQVNncN)b?}E;0HaOZ`UBdcZPCt1Hcmm;Qanl&S=f_5Pm`cPQv)HwGE~j!A9? zu~v~-`A^#reRg9p4h)SsGJj96P(mGh)A3&=0n{)HPz*zv@)PyHmDykSb~Ugb{1nze|Nk@wqz++2$NfcEC~-vdYxI{-FkPDQfz zpBJ$HYb|cf$AK~+h?S+6!|XuFg|{nAlhsPi;ha18g(q(?u^%8JH6INcvWHxYnw%OR zI$usF zZLlmV)~$Z>YYPS~*Ugnrr0a^J;-9qtwMvRcvJ&L;V6j2zG0$)cTw32a-G=4HuO3#H z2rNKjcDTL->j=1KUhYf7jN-J%yY|q zr3&!X`o7Dv()rD=Peg39_x&d=hHA);R=TY2A(PUq%-xa93a4dexK?t!s1A;ec$~-Hb zeq8dcKM^2R3>%#yT&xk*Ro2%zV_sP;-ZS~Z(}pIki&YhejpPQB&n zbMDP8ahzh7Bw0a96ymgl@g9m7zVk|vRHBUVaU1%QuQf6HHO{_#SW(m`MVIiHLrIw% zL=}=6hp_gcA)mDUoT}^ox}BfAcS|h~9YgDnb&xJl5w%F~Q4 zi2}V-2GYYlf;i2EGUV&LiPVU_YooWzdC$e`3oNFgh=s|$hMuT}E9Nbp>?mH}ZQcUY z<-yjTg!Gyy-V`czj0ed#@~mRZJ&)kQc%t7x-;Rl!fYLZiKfXH9CFL{CZqh+0oFDy2 zU)o3!@vRWi$u4fXJZ5WSX2aCvG_*A+-Y-e6iVY)qf(^dNw%CX)8Ur^jb_u)i>$9(q(TlSpU54 zq{|GIQ^=V_QotYKZo5%pTUm2x)=74_9QVcabZnXt+9Ha~IrkbE5G0?)y?rVu0a^-H zC0<*SjJkJ8>vf#2qH{Q$Cr3@oxvI76H_9{+O>49!;CPvRygSzCa)0Q@St)`hvn$m( z`q66%|IvrkDDdEWmt5cIVCR#Hh4OC+)QJy^}(F z;6u9PQGd~=dLGeRVU2u}B;ZCD^6<~T{{n+o&KJA?(kGX+b8n=!@)%W+?ov~$Q)x;p z?Sa+mI{IuhZPAAZdD{d;aZA?BzgF8Z*eBnRJ+x?!wNuUQETrqpp0C#VgiilLe zunA;OuI(0RqptEW^woW;Vpf_$TQ=^kAym6XCtL%gDZDhsml`^dr?MQCU<9fbWAkJq zEoV5Yh&)hB%)+VtOuMk&4n&wp66vq+=+?tPq)%L-oZJ zCm}d58IjogHx>{Qr(o|uJe%~P7%44!^gb8U69Ji5ZCVAHUGR6I(JGS(9D!_QzAxHh ztSz*n__8*#x$+V274LdpuXLB`*DFqmdz}`u$N+`uy9xW&Z8$Ct=|6#rF0fi8v<8&M zr6&CyIJKWEri(=kNWosGuV)vU3C)@Cbb zJj8xwd;Z_-frE`!_x(b0){yD1Lg#NwwuqtftG7CSoVI<95*0qZ2XPC~#gSclhcDYT zD4c{xM8-DCsK|qxUd(SwY$h<2xtKk5Gd6dAvKMWiwf-XDxIen}m(s z#doOu``c4AmJ{_?M5P`*%ZK0f&D>y38+i36HM-#+3=~*6DQOWT^-3`CBGpf5qgUCS zGfGtI%ZO3}*XYcw_nhWyag8@rp2xIaQdxn@9~7+ld-vQIo?zH>&`djE?56>))Y3FCdTG+nfIM?Ii=pYHy&44Uau8_7l;8ut9ovMPH<4p z9aYZdU}OG0Akz3QQ8&moxw-K)8G<(RSjqy~Lt?WE)RQIt49~m@NrLPsk^5N1JIsUO zskxPDco+zzz1q~nGp0Uqqh*;BwFW&)q9DtIUqjRyG=$92?1Z16hXIapGMei`87M-` z(j;XgriKW4$_AgpTQ6JMnzYlFOZJ|SqVm|kJv%M-?hLZv29%Rfe^JYO+D1$YmOq3@{THh%#VMBKA)G)IZ4Ks7IZ4(v>h8B4;GRTA-)KXy>a zn{iOn^A{GZ@k9>YJod-nbn0E(hN}2U>lQQ5n88$GCbK(}ZlO%!sWf~$D}I%f!Thy`9fE-U+B$O0;3;}&y)BVT-GJJ#3<#( zwrt*h2ppw-QQs?ys&Ytcgf-3z^!p|Yt>qZgZ43zZCrz*$f;y>uhNcG9SveCVCBsGC z&a~?tmYOd2cM#H}d~W6vktvyu_xyQayq;59e|cnc0vO-C%%ofWQuYJYJ*#7jYj*sk zlMPvQUv73V;m%-Gu^51lh>xz#r6+_VpwHlDLZv!H&891w@5uz<5-HaAq-R>UhoLT? zD&YudW~G=id5{2&>kO~W{gI&Qrb!wPb3bw4#QW)IpvE@l2CiLdp~$T}-MTbo+4r%z z;hlnN8{ON)MGD|Vec9^<+)XQYR`bSXAnSMFt}sSSxEoU|*S9k)Y&pNcc@@f+%0PxL z{bRtrRC=25nulx>JEbP|$3d3wmo8gOr&Tzp|6vJ54LaXN$na90TS``j`t$Xdo`)S+ zmTyqTNl)Fr&ACnPEiF;3)Ck-2tSipy^ha|AmV=E*+pir{y0c^=Uq|W*_N$)}V7Wp@ zFfnIorLd!azih#7G*kIkz0C_}HYfgPW<96tfzCntxoL)g@qJ z@a-`v_np3vD@>~+!C||-lwzhln0n&jio&KQthx>kp3VB6z{>5wAPpSK- z4`ya(&%(T09v@}zhx^FP(c4EOoqcej(=18ZDfWi^wk<~>)?x;{e#yGm$z40&Zbgz? zC+fWDKeqHt0UJRdd;~T54f-B2xO#q1B_X30cjUHV!13wmZQU17I-TV;I-b$Nlz*Jk z9mk~jdfGg#N=DMTyGB8Gt)&g|D)Kwtt(MmpJDYu=!;XVC&WL^;Zm+;#pl0oRed-1( zh;Ki9byV3wx*8KfN_S)qNnjz$>s1>RW9Vs{@Ym^)oU|`bOQO2k@6eNWO5hndz5=@m zt)}DW5QbYqZ9sv02x&L%6mJ`C@|iejlfLt-81CSYD--hsY1Fx%mC`_ve0m9SK0bUB zaj)%Zr`=q)qnRlG9a7$%JI72tR@US5uJwsCWWzUvGt=VB&UFV3qKhNt^&fsb0`(?oR64og}L z5yV2ReI$PCpJH5~eu#%Hli7|>cZEeLbzOP;kk{8o_B?xOQ7ccXh3_T49~|?b@PQ5> zi0cPCMgjW)jJeG~{~^|bR6XD^YxJC(P~!>QYJ!&^yz%0EagR|oSC6!yOWlcWjL=mv zhFxbf%ehO`y}S9eFZ<<6e4CaWspA^kOgu4}m<>q7TIK_T=8TU=c`5nFb{AGz{O z@xn&u2y%RM%F*(ml&y~-w7py+!`nciPweP(JVa|_1l+4?b0{I9SV>qS69p(Ynx z5Bk+xJ;L2C>@*#lXJrm<0W>>O(urrNUMOK`3M&%1s(7l4op;i9ew6zIkY;3#4V>4r zt3TW7OdVRcG~z-C0<*I z8JI)}BF5D3={N(s`cs5nmY_bZkuQtW3&3Vu1R-0`kYJ(&o5 z#`$@8R640DC8}}GTKKx7OnD9b{;qsrSvvPSc^ZSMj-^henIr!GIF>L!lEZ-+zP2WW z{fm~kq?s#QHp!Se2Em2egrx7!`se{eohxtq`Of_GdXjJ?qH+YFYsm7*I`GY2< z?sIb6p^uak-?B)a8Rl(>6u`j z?~W}=y%1!qJ7M3P2MX{emRnJl#z)XapEi;~uolMeB^8YaCx@O_2?BwWB~-zMz_}Rt zqfDXEG;{vZdAl9S!S((6hjbw*VH2of(9*n|9<*!W{)KdkBI0fhNX-69uThjs?g9^` zIn;J_ZA?l^IS+ByvB0l_ALPtA&2mWN%rIsv)W32*trE;_r}d*xy;x<+9Xc!gadHE; z?iAFdvHfI@1_BXBB~T>mO87e&;$Ic)JQ)3)deTVmCja>^?Sz;1)nCK+i%XV&^XCE0C&AN5oU`Z#-2yn%DKP7aQu^+T5oKgZRc5 z>(Hfi`B0K#WMt6`SrRfv=L#%9W`x>qw>1b(4A;AU)1$7nW4&r6r9t<+|M`M?f<){> zTJHwfy-MsJhv#W}tm=uxLMT!w>Ia5y9 zD&0)R{q9hS`|av2%cac25vd`5=4h-H@@)Cf5;J?7$JpT;<-pjZJq6?S0BT!uRM*x3n7e0R_ zL{14=x-2+0;T!LyBtvjDC6uU3fk&5A^Z)X3{-TBcW{0L8hDNQa$cTs;5D&!tJ=TBT zBO$3tGtyK9qAC#aRQ?l%{RKe&^AFH5V;MpiHScX@^)Hh6Ut<8<(y$=FpA=fq{wGTM zk1!sT00>bRBuBxiK>MA3lgHM(Shm_n+Bag-`pu(HDQKtxz+BmHX!&Dp&>L<5#gx5| zt*oSC$nc?}LxU86TiHy@#sSO(s%nL+vYR#b`#z`B z%cQi)6n|-TjMzl9i))2c8BTdJKLyB^8{>`kO{6^p3G!OZ`U1m)JpyW_t;7M}#8LM7Ej0D2in@CYlp+XRq zXe#gLC^V8m3b5ru=D_HN4TaC~AgD2@nnvvB1EHLA0NxGIR`>slj}G*te-t;FtsX$1 zc?(FB!5<_XBpTF+Wv+Xq3dnQ01a0ian9P{mn9`Wq!L!N(dSgKHaX2vqmBYo$87tIR zWxDl#id5d_(~tt|Uz~7f$K4UCZ=txjytvI#4>m;qr6OVpcEkrNSsg&ci}g$AOM=Vb z<@ymJ9><9!PyLey#?JuH7kUxu`4PxE*p{A%!&;gHq#IQ9aPX*`x87{<*kiF0`%1N{ zaA-l5{w?nD#nu&h7r>I-Il(xQ7Ac$I^(vHT((>O3-T1iidZGCOHV*KP0pwu(KO9FV zvX>{#M49ry!-Gl-@f3d?D`)?TkHv zaJn0mUP|HfU;KdV6t)HIUl9K{$qGrFihs0=W{EjN@V`i4&l^(eW)b|Yp9JE> zLwf%ZWI(xx+6^h@-oxAe_vC*>Xh}H#BL^TIsEhN$bz4d!6-rwoO_(kIQQrl{|1QS= z)z1I1Gx;RcbctzER#8GkfAHM?4%YntbMIO_YyB$c<)bj)S}bu#=JJrX`oA~-*krUf z27j@$by_sB?*cy zto|c2>1I4h{aH*50aLo|2+f1L>2j)$=_J7CbZw< zMX$oKi2w23{=A%!HRGi!Mrj^e!>XWDHArAtD453S=kbl3@}H!22DX&$ZQMjD;vt5? z7{jxBkshl*Px4s_#2;!`xs-UHZA~7kguyD_4qL?)oa1O=UBX9E(bYS(q>%G-;{!71 zeyo-K^5zp^L=w-&YWMI;xl^GZN|;VOJmukD)i*B~-uwrtq+H<=PGS5{+?OZ!p1S|U z)@gR@<4fHh+v4H&nTWmTn5A2zI3Myd)T^W;nwm+eRil@JqPJw=SwYTmi-R}&o){QSMqzbC`4V(5*;Ghx&@{HYBi?y zTFga(#ZTCwpRY4)Kz4WlHEhhZdc0NHk{4-5))JFN;|>#}F?6M5x97i|bRy$^SN*vIf1%rt>Zf!o;zj#hS4I^-~K#aXA^iGpsjS z3>e=|;V*F-G%*LZp!mW+dLLc(!EroaQoS}Ajn}Uj@rB?~3o>^0O*|xPgOB@+OL?t+ zOn$Fs=_Vo$^N!^oT)NjN(n`}7@R*=z!Y=-V++EKZTX8z;HBG}(iVNK+wB#$%wN*`E z3$^`w%#pd&>ogy~Uic_UF9(KGU!o&VkPngIZdVx;N7$v4QKOKiCx#>J@(8|k4?+Nm zn0($q=fu88A@2A(&ASDvk@xFL*mh#MD3WBB@Y`2EA?;j<-m2HdX6_QX;Df{EPC3=B z6a3(d-GRw;9!j>jyenEilo!xe8ZjtG-xEyz3D&%XhfuAKGF^)0Q!To{aq`OaK9t^j zl)FNV35Hy*Em;}^wZ@-q_)XiwAJL7*->Fk#c7{;sFfcP?jA{Jz@$pUNofP5J`r?1L zr<)WR@SU!ESMWdg*=agqLq%xg~c48&BYYfViXznU$&|HN)TF~Ck#*cX9yx8BoxAsbB84U;HMjdh7R9)YAC&eorb#)NBKnF zCdM#0-|g{Ld*qBSdTp`v^qC9Mda5uhqG&Yb?B~8Ls_#{!ZQ9m6VI-Sotfg( zPcM_R;nFa@=^XhU%$)044U>+=vi>WkQ}K-%@Tin|D-wb==={@lolFS9g>;Ku7YF~~ zdSm5&>*-MkeCD9v`%(wrIkhhDqk!?LD){@p{z&G4*ZUg78mqTL1P<7bRaQL@!<#i) zBOLeV&s`25P5Ug0wDW$b*Xj{R5M>qnPC`qB&(RG^79WG@C=Y|PHr-pfKFRs~6uj!C z(vJ{GCz)#(fa(PXyy!~W7#xOja@x&b_U(f2F&f=Jeyq4!|_ z)aJ;F*YtPG5C_hYOyw?EHv{fAxino5W>&rb86Km8KFt?HkG5$(O8%I&U}xpYIHfAu zcY%zQ{r5Od$X)pcAD^4%-sx(k=6c0UP9wHfqE}te<5zG9PlTAbUw=ezN*-QnzY2dc z#*z|d9&8?f>`(oWem5IysGWS$>j_Mu?L85bQc~n062?NGL&e65?&ZU%Q)#jqU98>Q z^EraM%%Jh*<;eQHBDjam6agitUmXvf7J@pAVe~`J9p4JG=&l=qYawj!xj*3ljRlM} zfb~ha+^}EW<9S=RzF9s3%h&*?i+Oz%Tvwp3P(v84H}?vEfJn>eB9LKCM+VE#m3jtf zN6(QwV;9(YBRbWZ=^mtylgR8jseqA3 zz=&7tJ7|>K3);A$?sq1<3dv&?Ca20klh83@Jdq_~S87(ZE<}7oH7gG1{ELvNUM?iY zt2Vn?FPLU}6G+35g_D7*-Y|X!R9DuluDF?Fu3l-6Sm!nxn=~_dgxMbj@{s_IlHxLn zLocH)zzrsl&(9|~Njbtr6cTxr<~A5{_w!Tn$?oxHIIugQS>AI9zuVDNfWFoUEm`e@ zEIbcxPkkhMsz9!lu+@4RgAm_(*XD5qWXGdEYE>@yrgI9gt(L#N){{hEkitV|W-Vn& zkLS48n&oCTxj>%sBy)jh&~z$&zcw8>fsq&YBWrz#PWTM&wbA=*g|BZk`;kBUpV87yuALt;aJ8$H z+Bha5x>$KC+AU7E#eHMtNdto;aoojHn_jw6Uy_`r_>E-eveynG=TKA$BT?ea{2NU@ z2%z>>@xx`+dnMwBCTWyHmXAA+8W%RRryEh6NrL5NqBH!olG9m5gd0ci>gIK{W=dzx zl(sqLzvSpep8A3vch=oo!J~Pjp*l3iEpU@)rt`|VR(1XgZ9u<#z8JiV})uF-wQPH8MeP)mT1#hL z+I_7Kwj4F`=k-9ECh^p_`lJuWHY2ZN(cN^Y>fmlVTqqeH+Xh`Eqx1&VLgdrQriF}h z?wwlSk<0el>{mx+1A-23x}r82shf}=jyHuR=g!@^SH{`9OJ5XDQ!?&}`%TI#;o~VX z^{xBbw}zx~QF_35FL@nCa?rXn|}&yGDZ+gJR_&~s)j zpLO5Y@<=gR%(}cDAFMP)xY@$*@uZ3DeEx;N3~?F@1B+Vj;@DlK&#a7!OazMTu9)zr zDF!e(A;+tcychl+avR>9_EYeK0bRGSERHq4kF(p^7^;i>;{F?hPhRP@{)A-{F}(%499V$UWH%b<-#m7X8u82;!d_<0sT_|PP` zTqZCgfg?+7Ya}=3P|48FfiQ{$WBum}Lo6D+SVQIq<$sbzXeDHW1f7U7Z^BZ5bjC*~S@+f6%KC7*mk56rg{X z@-}Gi!>PO%ebb1uDZN6n`jQOJYU6Y`dq`w{I@U(>U7iQpkD~W-`+>|U3?8~Yj=|tC zlFlO1EVWUyCp9wB;sg^HBQ#EV3Jp;Y8?~HF`dAs{9zJVW?M~BBOcOFD>k?N=w3H4x zd-7wtazjCVLWo+}SMqQcECaF^nxj;8^lF4en{ zfVnX<>VLT1)`K%cNd%ng#xwaj#-vU{z*?9-TgCv3X#DDxHcYs$;+~yE8oVJZBYMLd z4D|339=MGR8SZpGb*&!QNBK@hO7kEnB05lCe8z%0jb_~!?Tvx_KNvBaQ^Iu4^Kh7p z?4mm92++m}Z5|Vr;gS|3JqS_ofj=*etKAK#D%_WUb+r4ifuU?`$e4Z*?pvfDK;ibol2JSAta9DLW}mc>RuyJ4Nn2ajk2|qZDx>%?&Y?io~rk z77s%VHJR0JU%~W_9JLT6XmVgW==)dz|7_}0;7sHAxo2Iw5l&yA)s#a#ZD3trJzeeI zp%nxkTXoCP}Rp#46dBYEj9DnSL>tuxJ|{p1zN9dBI!#jR>mGy&B@NVs1=rSY9qk z_wY#?WgI%}I6p7UB{&#lpd+f&%}9B2`4xS8P|Y08$f#K0022d3(dfH(4#UW&>+=WG zIS`4L^|qfbFOAI`v-Y!xXrI4P5VQM{N4q)aY=aL!3|`PF zdm-7$VB|%X#qX1mVXG-bTwjigiCI12yU&?9QqTptj3I*#x#)-@#b6>v5K3_89|*FNW?se4qbvlz?9|KoBSXt7RO}N=(I!?9A`VdGnJz7k8Tl z18%nrbqoBHF+}ljwTE)lhSb!2?7Udis$Fh8+DQBuTKUX<@WVpCi-#CmoRBN4BbkMr zxp;*$W^f6K@g3BuBv4(s594-paYwFQi?cYPQq8I?+w%ML@Mf`a``+KlQ!*F3wX*AV zG$`8_Z|l`>Rk8e>=$%b;e@Xe&wR}j`y@{1_mZ?O!GdOl}g zm^vb1U@{jeB&eZ zVe!)ck$%g$fyKU17kbVXg*#+#w{pJRx|KA?IQw&q5Lq`+VMj>zrMQGXoZNS^v>V+$ zd73szx6+r5d^F?oI&Yy3Og10>JszK2!6Ycke=ZB`WC+uK@BD-BJ39*b1M9`*BU>*V zP?LZ{Kxv$sTW=?`E=`F?T58qw-G+q}ulBOt;qlPp=0#)G7Z)}0FL^=|hfkYN?-dd; zDjNt;<-%Qy7 zCinQbhS~%qSod7uF>7QmEvT4d%Htsa+K$;A&j|Vo@LOYw&I6)9;2y3#xX;~Ke+hO));iS z`&?}H^|(fZ;KENE1M23AZ02e1zgc8f<;U9|?QJ5u7e{x#rlNm(MUZ6HKO*3BQQV(? zkt$;E{tUc=IpXi&>j)AinM95!4=8k@A?^)ceq*qH^?Gyhne+zs znY07cp{Z|cERoCWV-i|a3wvYX+o;&MXRZXUw-FQ&9ts)``->*dr0qe+_%~0(FlTWO z`*(4LXapynyjEv?C}!*Pnmj8zU}=qZe1r6Gf_PJ9EyAvdPlI1-Fe%Nt6CFp_g3aY- zQ%Ca@^!M<9E~Nz2#Ii1OX~sH0PQ;eBqIR4b#`5ccUg9)fYGdOF5^GA58BWjKxrRZ0 zx6AwWh#0Lkgr%SCORurX3ii=_=Mcx)Ci*0|iBY#xnBOtq(#uql^R+Heh$w42i85Q$ zVZglX^>{I{_Y7s%c+z*n|$k3Nm==|r8=AsTxFwB zU!+Yus{Qs;S;7j2JtjYj+IOFdhjMz?!#9>ej&a<;N4nwUVmSOUr+&G?rV&DzUuA20 zp%%qyG#2CYBmV)e6Pc=fK!=Wb33Hc3MwP(27L{~fQ*yK377MAJ^{+RL4H2_A5hTmV zpB0iPtH4+MwA{E5L8=<_NOn|tE@R}#(^9u+DOk7+%ZAIpXfx!v?2V^;dB_$E&X;#Z z`g*-g<}GGT;=UtOR|DJkMLhU8Je4)vHFGTw?v>EL4P%3y3sZZ)ny{atU2wTzzN|A$ zj?w3&yPK!P#*pUQ*Mhmgt!7jol33B3Jc0P?ii_$di0Hr<+hPQ)A1OWHE@0`?RlLn{ z41cUqqS5>cEr^%D$D?cB>njYUtNz+zldcd|$z4JtA3iNSM z!;(E)rc2Ufl1irbb#+vy5?frx4kpJXjK8mp#)v10mUjp!#q z$JCdVNsRh?dxJrIwZ^+Ky_aD7XInZGU$(A=EF%k)27cZW2l5$G%&5nr&G)?Gs^HCy z(Mh(8Bz#GGRnD!%<$P#-Zv9UxOM5G* zb^pwdC1E!aCtTUD!yfi?_kUUr$UkY_d6B0)&5i#x$+#Gt`(mT$8SX1qw964O-{tIj z&U?G=_Ggx>B)|!ATMJn^O=rJka;cUn`(oor*gn%*^1Sg)pBKxYJ(IK4L%Fse5Hb5m zU(|>6BdgW9koEV+Pl0GwipOHOsz&5W5*?g>{MAfq_Ceo1B+4`x=C^OBaR7B@=P5Y8 z2w9&Wuql=&9SvE?>ZoP!3$GfTh6oG4a1a9LI)Qi;Qdh8V@&M|v>5t4re>_C9e+u-2 zwo62lUm!!MX;ngCUGNz!>uznHew=EE?w5h&#y#s_v!Gk(!NX=`&|*vz`U$M|cxlq$ zeBGjoeOGho4A1E-7B-Um3GZIQYGim19jAZ}v^=W)7C6=deY06!?sM@A;RYP3Jn}#+ z%P_t-Y-*4XbsnXeGmC;P%EIRbda}C)8(j@zl3GucY7NwG-NN#ECG}kW3&!Z(SF?{! zGCHDoFn80xIz#E@X`u~1QI;YeM}D&<=i!fx{Wfi+u}EEA(`OawF8u=C4dMsbeDM}g z)=2dgsZ7%a*g)`><4!{OGwOLKwE9t}guuF>gZ23eI;&qUBRZ`oL=)EBuU{Ic*WsnO zkm^tfuCl#|vwNY-))Fgj-z%{+!(-JUbQ(pbk)UPB%2c3+nRGZ5O53EXmEtV+Xzf?* z+K$4Euec(Wz@vN{3gh;pAHW z;^a*IvXpZ0NAJ0KFK2sWwh&hNT`jcBVTw%1k;gDq44y5IE8S;qU45g1b-TT%~8A0N(o{pQx#eAEG*A?>O5{_BL*Eog+seVhwi@>J7MOysri!48Hiow#}{n46`yO=Z6rxHc}ysiUe6~;ify`(^Hl}vn7vbc zaLQBcr%P*S)M!SeVer_;bbu3~m&Zw#B=8wuf6CFNtDX)892~VlixMGs$!-`7FjnwR zabrSdqj!JR;+f#e5_GR`UCCP;}@EZbO`wlab&#Xg(#rHy1C&y3{9i0R}J;&ZrH#% zy&lVT5pqnB>VhNuB#m&$GZLyB{akBgyQWh{4M z%nhO#Z85Zoy5zW`v12HY!UE%F>vTv)vw+U9Z&-A`$p*a@h=dI*7$mx&d7yg^Di{ZR zhdLCiwHvt|`KCBGIX5DL331IA?Bzgx1DHa!!r`+Jsv5V48x#7PZBk$24FSYB29J99 zC>NKS*y1djvyJOp-vmC)dy%>F$tZ3f*DeV^!G_`}7I2E@!~XPHDp5t2xhTBB2~Lrw z!1w3QNijsHObYr0P#~kex-FH_Qp&aY{d?8qlltWg85w`;Nr5LYuQ{*1x-!xM*RG%I zLi4IL?jEE|K%aD)Vn4-T9Y}ovn?F(Hp)=m;aLP31R5>oB-L)~hW9W8VTZLbkiZ{~ zfG*ahOZzU>VRq+bp*vDK=nFu^{LTaL^lcUs#Fhlte{d=YAbi$?PVBCZKNv^<#0q=1 zxx33F7v%l&<4*QE9QKQsL;BRwbpr9d-}rcne@$ZNa9?Qs$Wp{Y`V*j4BzDPsre7Dof&W*Ll&Z0m3hp(FpkeRu2CmWP$T?e-HKQNLrRVXX5=$A^m@mu@Wh4|7Q!l=P6IO0J}^{ z+^2sP)AdCS$56K+1803FL0l$#P_W--ha7NSQ6tNehO|)Fx*xE0ATK;x2$~)?ZdFiO z(tG2^_RK$XdOurpj+|9Lzs)lDF0|^I-^|uf-CWHwy0T;_uPgy&jqm^d1E*sC|4)9# iTgAY@zy@lDF*CeSQIL>f4JZVPFnGH9xvX number of parameters on (tensor, pipeline) model parallel rank ({}, {}): {}".format( + " > number of parameters on (tensor, gtp, pipeline) model parallel rank ({}, {}, {}): {}".format( pg_collection.tp.rank(), + pg_collection.gtp.rank(), pg_collection.pp.rank(), sum([sum([p.nelement() for p in model_module.parameters()]) for model_module in model]), ), diff --git a/megatron/training/training.py b/megatron/training/training.py index a800e24edb8..477ac5a8c8d 100644 --- a/megatron/training/training.py +++ b/megatron/training/training.py @@ -217,6 +217,7 @@ def set_startup_timestamps(program_start=None, main_entry=None): from megatron.core.transformer.moe.moe_logging import get_moe_metrics_tracker from megatron.core.transformer.multi_token_prediction import MTPLossLoggingHelper from megatron.core.utils import get_batch_on_this_cp_rank, get_batch_on_this_tp_rank, unwrap_model +from megatron.experimental.gtp import HAVE_GTP from megatron.training.config import FaultInjectorConfig from megatron.training.datasets.data_samplers import build_pretraining_data_loader from megatron.training.initialize import ( @@ -302,6 +303,23 @@ def print_datetime(string, override_timestamp=None): time_str = datetime.fromtimestamp(override_timestamp).strftime('%Y-%m-%d %H:%M:%S.%f') print_rank_0(f'[{string}] datetime: {time_str} ') + +def reset_gtp_quantize_cache_after_load(model): + """Invalidate GTP's per-shard low-precision cache after a checkpoint load. + + GTP keeps a per-shard low-precision cache (``self.quantized``) that survives the + in-place writes to ``.data`` performed by DCP load. Reset it so the first forward + after resume re-quantizes from the freshly-loaded BF16 weight instead of reusing + the stale pre-load cast (which otherwise spikes lm-loss for one iteration before + normal training overwrites the cache). No-op when GTP is unavailable. + """ + if not HAVE_GTP: + return + from megatron.experimental.gtp import reset_gtp_quantize_cache + + for m in model: + reset_gtp_quantize_cache(m) + # Per-iteration packed-sequence (THD) accumulator. The tensor holds TWO stats, # both computed from the REAL ``cu_seqlens`` (i.e. unpadded sub-sequence lengths # -- ``cu_seqlens_padded`` is intentionally ignored so that neither the @@ -1759,9 +1777,10 @@ def build_model(): ) if get_pg_rank(pg_collection.dp) == 0 and get_pg_rank(pg_collection.cp) == 0: print( - ' > number of parameters on (tensor, pipeline) ' - 'model parallel rank ({}, {}): {}'.format( + ' > number of parameters on (tensor, gtp, pipeline) ' + 'model parallel rank ({}, {}, {}): {}'.format( get_pg_rank(pg_collection.tp), + get_pg_rank(pg_collection.gtp), get_pg_rank(pg_collection.pp), num_parameters, ), @@ -2097,6 +2116,7 @@ def setup_model_and_optimizer( and getattr(args, "use_torch_fsdp2", False) and args.ckpt_format == "torch_dist", ) + reset_gtp_quantize_cache_after_load(model) timers('load-checkpoint').stop(barrier=True) timers.log(['load-checkpoint']) one_logger and one_logger.log_metrics( @@ -3179,6 +3199,7 @@ def train( and getattr(args, "use_torch_fsdp2", False) and args.ckpt_format == "torch_dist", ) + reset_gtp_quantize_cache_after_load(model) ref_state_dict = {k: (v.cpu() if v is not None else v) for k, v in model[0].state_dict().items()} # Reload RL training checkpoint weights @@ -3194,6 +3215,7 @@ def train( and getattr(args, "use_torch_fsdp2", False) and args.ckpt_format == "torch_dist", ) + reset_gtp_quantize_cache_after_load(model) args.no_load_optim = no_load_optim diff --git a/megatron/training/utils/common_utils.py b/megatron/training/utils/common_utils.py index 00f30b34d4e..5f43ac85953 100644 --- a/megatron/training/utils/common_utils.py +++ b/megatron/training/utils/common_utils.py @@ -177,7 +177,7 @@ def _sum_reduce(tensor, group): _sum_reduce(moe_gtp_sharded_norm_2, mpu.get_expert_data_parallel_group(with_gtp=True)) # --- Combine dense + GTP norms --- - # model_parallel group = TP×PP×GTP, so GTP reduction is implicit. + # model_parallel group = TP×GTP×PP, so GTP reduction is implicit. norm_2 = params_norm_2 + sharded_norm_2 + gtp_norm_2 + gtp_sharded_norm_2 # --- Combine MoE + MoE-GTP norms --- diff --git a/tests/unit_tests/generalized_tensor_parallel/test_gtp_dcp.py b/tests/unit_tests/generalized_tensor_parallel/test_gtp_dcp.py new file mode 100644 index 00000000000..f4537b98322 --- /dev/null +++ b/tests/unit_tests/generalized_tensor_parallel/test_gtp_dcp.py @@ -0,0 +1,589 @@ +# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +"""Unit tests for GTP + distributed checkpointing. + +Verifies that ``make_sharded_tensors_for_checkpoint_with_gtp`` emits +ShardedTensor offsets that correctly encode TP × GTP sharding, and that +the helper is a no-op (delegates to vanilla) when no ``GTPShardedParam`` +is present in the input state_dict. + +""" + +import pytest +import torch +import torch.distributed as dist + +from megatron.core.dist_checkpointing import ShardedTensor +from megatron.experimental.gtp import ( + GTP_CONFIG, + GTPShardedParam, + HAVE_GTP, + make_sharded_tensors_for_checkpoint_with_gtp, + reset_gtp_quantize_cache, + update_gtp_config, + wrap_module_params_gtp, +) +from tests.unit_tests.test_utilities import Utils + + +@pytest.fixture(autouse=True) +def _no_pad_alignment(): + """Disable GTP padding for the duration of each test so local shard sizes + are exactly ``per_tp_out / gtp_size`` and the test math stays simple. + DCP semantics with padding are exercised by the integration tests. + """ + orig = GTP_CONFIG.pad_for_alignment + update_gtp_config(pad_for_alignment=0) + yield + update_gtp_config(pad_for_alignment=orig) + + +pytestmark = pytest.mark.skipif(not HAVE_GTP, reason="GTP requires TE with hook registry") + + +@pytest.fixture(scope="module", autouse=True) +def _torchrun_dist_init(): + Utils.initialize_model_parallel() + yield + Utils.destroy_model_parallel() + + +def _require_world_size(n): + if dist.get_world_size() != n: + pytest.skip( + f"Requires world_size={n}, got {dist.get_world_size()} " + f"(launch with torchrun --nproc-per-node={n})" + ) + + +def _make_gtp_shard(out_features, in_features, gtp_group, dtype=torch.bfloat16): + """Build a small GTPShardedParam by wrapping a one-param dummy module.""" + + class _Dummy(torch.nn.Module): + def __init__(self): + super().__init__() + self.weight = torch.nn.Parameter( + torch.arange(out_features * in_features, dtype=dtype, device="cuda").reshape( + out_features, in_features + ) + ) + + mod = _Dummy() + wrap_module_params_gtp(mod, ["weight"], gtp_group) + return mod.weight # now a GTPShardedParam + + +def _worker_helper_offsets_tp_eq_gtp_axis(rank, world_size, port): + """TP=2, GTP=2 (4 ranks total). Weight is GTPShardedParam. + + Production flow: Mcore TE constructs the Linear with already-TP-sliced + out_features (i.e. full / tp_size). GTP then slices that further by + gtp_size. We mimic that by starting with a per-TP-rank tensor of size + ``full // tp_size`` and letting wrap_module_params_gtp slice it. + """ + gtp_group = dist.new_group([0, 1]) if rank in (0, 1) else dist.new_group([2, 3]) + tp_group = dist.new_group([0, 2]) if rank in (0, 2) else dist.new_group([1, 3]) + + full_out_features = 8 + tp_size, gtp_size = 2, 2 + per_tp_out = full_out_features // tp_size # 4 + per_shard_out = per_tp_out // gtp_size # 2 + in_features = 4 + + weight = _make_gtp_shard(per_tp_out, in_features, gtp_group) + assert weight.shape == (per_shard_out, in_features), ( + f"rank={rank} local shard shape {tuple(weight.shape)} != " + f"({per_shard_out}, {in_features})" + ) + + sharded = make_sharded_tensors_for_checkpoint_with_gtp( + {"weight": weight}, + prefix="", + tensor_parallel_layers_axis_map={"weight": 0}, + sharded_offsets=(), + tp_group=tp_group, + dp_cp_group=dist.new_group(list(range(world_size))), + ) + st = sharded["weight"] + assert isinstance(st, ShardedTensor), f"Expected ShardedTensor, got {type(st)}" + + # Composite offset: (axis=0, tp_rank*gtp_size+gtp_rank, tp_size*gtp_size) + # rank → (tp_rank, gtp_rank): 0→(0,0), 1→(0,1), 2→(1,0), 3→(1,1) + tp_rank = rank // 2 + gtp_rank = rank % 2 + expected_offset = (tp_rank * gtp_size + gtp_rank) * per_shard_out + assert st.global_offset[0] == expected_offset, ( + f"rank={rank} expected axis-0 offset {expected_offset}, got {st.global_offset[0]}" + ) + assert st.global_shape[0] == full_out_features, ( + f"rank={rank} expected global axis-0 size {full_out_features}, got {st.global_shape[0]}" + ) + + +def _worker_helper_offsets_tp_neq_gtp_axis(rank, world_size, port): + """Row-parallel: TP=2 shards axis 1, GTP=2 shards axis 0. + + Per-TP-rank tensor: (full_out, full_in/tp_size). GTP further shards + axis 0 to (full_out/gtp_size, full_in/tp_size). + """ + gtp_group = dist.new_group([0, 1]) if rank in (0, 1) else dist.new_group([2, 3]) + tp_group = dist.new_group([0, 2]) if rank in (0, 2) else dist.new_group([1, 3]) + + full_out, full_in = 8, 4 + tp_size, gtp_size = 2, 2 + per_tp_in = full_in // tp_size # 2 + per_shard_out = full_out // gtp_size # 4 + + weight = _make_gtp_shard(full_out, per_tp_in, gtp_group) + assert weight.shape == (per_shard_out, per_tp_in) + + sharded = make_sharded_tensors_for_checkpoint_with_gtp( + {"weight": weight}, + prefix="", + tensor_parallel_layers_axis_map={"weight": 1}, # row-parallel + sharded_offsets=(), + tp_group=tp_group, + dp_cp_group=dist.new_group(list(range(world_size))), + ) + st = sharded["weight"] + tp_rank = rank // 2 + gtp_rank = rank % 2 + assert st.global_offset[0] == gtp_rank * per_shard_out, ( + f"rank={rank} axis-0 offset wrong: {st.global_offset[0]}" + ) + assert st.global_offset[1] == tp_rank * per_tp_in, ( + f"rank={rank} axis-1 offset wrong: {st.global_offset[1]}" + ) + assert st.global_shape == (full_out, full_in), ( + f"rank={rank} global shape {st.global_shape} != ({full_out}, {full_in})" + ) + + +def _worker_helper_no_op_without_gtp(rank, world_size, port): + """Helper must delegate to vanilla when state_dict has no GTPShardedParam. + + Per-TP-rank shape under column-parallel TP=2: (full_out//tp_size, in). + """ + tp_group = dist.new_group([0, 1]) if rank in (0, 1) else dist.new_group([2, 3]) + + full_out, in_features, tp_size = 8, 4, 2 + per_tp_out = full_out // tp_size + + plain = torch.nn.Parameter( + torch.zeros(per_tp_out, in_features, dtype=torch.bfloat16, device="cuda") + ) + bias = torch.nn.Parameter(torch.zeros(per_tp_out, dtype=torch.bfloat16, device="cuda")) + + sharded = make_sharded_tensors_for_checkpoint_with_gtp( + {"weight": plain, "bias": bias}, + prefix="", + tensor_parallel_layers_axis_map={"weight": 0, "bias": 0}, + sharded_offsets=(), + tp_group=tp_group, + dp_cp_group=dist.new_group(list(range(world_size))), + ) + # tp_group is [0,1] for ranks 0,1 and [2,3] for ranks 2,3 here — local tp_rank = rank % 2 + tp_rank = rank % 2 + assert sharded["weight"].global_offset[0] == tp_rank * per_tp_out, ( + f"rank={rank} fallback path produced wrong offset for weight: " + f"{sharded['weight'].global_offset[0]}" + ) + assert sharded["weight"].global_shape == (full_out, in_features) + + +def _worker_helper_padded_inproj_no_pad_case(rank, world_size, port): + """``in_proj.weight`` shape modeled after the production case (z|x|B|C|dt + concat along dim 0). With GTP=4 and these dim-0 sizes the alignment + constraint ``dim0 % (gtp_size * pad_for_alignment) == 0`` is satisfied — + *no* padding fires. Verify the helper emits the expected offsets. + """ + update_gtp_config(pad_for_alignment=16) + # dim0 = 512+512+64+64+8 = 1160 → 1160 % (4*16=64) = 8 ⇒ NOT aligned. + # Pick sizes that ARE aligned to 64 to exercise the no-pad path: + dim0 = 1152 # = 18 * 64; alignment-clean for gtp_size=4, pad=16 + in_features = 4 + + # All 4 ranks form a single GTP group. + gtp_group = dist.new_group(list(range(world_size))) + weight = _make_gtp_shard(dim0, in_features, gtp_group) + + # No padding ⇒ local shape is exactly dim0 / 4 = 288 + expected_local = dim0 // 4 + assert weight.shape == (expected_local, in_features), ( + f"rank={rank}: padding should NOT have fired (dim0 aligned); " + f"got local shape {tuple(weight.shape)}, expected ({expected_local}, {in_features})" + ) + assert getattr(weight, "pad_length", 0) == 0 + + sharded = make_sharded_tensors_for_checkpoint_with_gtp( + {"weight": weight}, + prefix="", + tensor_parallel_layers_axis_map={"weight": 0}, + sharded_offsets=(), + tp_group=dist.new_group([rank]), # trivial 1-rank TP group + dp_cp_group=dist.new_group(list(range(world_size))), + ) + st = sharded["weight"] + assert st.global_shape[0] == dim0, ( + f"rank={rank} no-pad case: global_shape[0] {st.global_shape[0]} != {dim0}" + ) + assert st.global_offset[0] == rank * expected_local + + +def _worker_helper_padded_inproj_pad_case(rank, world_size, port): + """Same in_proj layout but with a dim-0 size that requires GTP padding. + + z=512, x=512, B=64, C=64, dt=8 → dim0=1160. With gtp_size=4 and + pad_for_alignment=16, alignment block = 64; 1160 % 64 = 8 so 56 pad + rows are appended. Padded dim0 = 1216, per-rank shard = 304 (uniform + across all 4 ranks; the pad rows live at the tail of rank-3's slice). + + The helper today saves the *padded* global shape (1216) — round-trip is + correct under save_gtp_size == load_gtp_size. This test pins that + behaviour and serves as a regression for the future "unpadded global" + fix. + """ + update_gtp_config(pad_for_alignment=16) + dim0_unpadded = 1160 # z(512) + x(512) + B(64) + C(64) + dt(8) + in_features = 4 + gtp_size = world_size + alignment_block = 16 * gtp_size # = 64 + pad = (alignment_block - dim0_unpadded % alignment_block) % alignment_block + dim0_padded = dim0_unpadded + pad + per_shard = dim0_padded // gtp_size + + gtp_group = dist.new_group(list(range(world_size))) + weight = _make_gtp_shard(dim0_unpadded, in_features, gtp_group) + + assert weight.shape == (per_shard, in_features), ( + f"rank={rank}: post-pad shard shape {tuple(weight.shape)} != ({per_shard}, {in_features})" + ) + # Only rank-3 (the last GTP rank) carries the trailing pad rows; all ranks + # report the same pad_length (an invariant set by _gtp_slice_one_param). + assert getattr(weight, "pad_length", 0) == pad, ( + f"rank={rank}: pad_length {getattr(weight, 'pad_length', 0)} != {pad}" + ) + + sharded = make_sharded_tensors_for_checkpoint_with_gtp( + {"weight": weight}, + prefix="", + tensor_parallel_layers_axis_map={"weight": 0}, + sharded_offsets=(), + tp_group=dist.new_group([rank]), + dp_cp_group=dist.new_group(list(range(world_size))), + ) + st = sharded["weight"] + # Helper saves the padded global. ``allow_shape_mismatch=True`` is what + # makes the saved tensor portable to a different load-time GTP topology + # (different alignment choice yields a different padded size). + assert st.global_shape[0] == dim0_padded, ( + f"rank={rank} pad case: global_shape[0] {st.global_shape[0]} != {dim0_padded}" + ) + assert st.global_offset[0] == rank * per_shard + assert st.allow_shape_mismatch is True, ( + f"rank={rank} pad case: allow_shape_mismatch must be True when GTP padding fires; " + f"otherwise the ckpt cannot be loaded at a different GTP topology." + ) + + +def _worker_helper_cross_topology_reshard_metadata(rank, world_size, port): + """Pin the cross-topology reshard contract via ShardedTensor metadata. + + We can't run a real DCP save/load against itself within a single torchrun + (need separate worlds), but we can verify the saved ShardedTensor carries + everything DCP needs to do the reshard: ``allow_shape_mismatch=True`` and + a global_shape large enough to cover any compatible load-side topology + (≥ unpadded original). + """ + update_gtp_config(pad_for_alignment=16) + dim0_unpadded = 1160 + in_features = 4 + gtp_size = world_size + alignment_block = 16 * gtp_size # 64 + dim0_padded = ( + dim0_unpadded + (alignment_block - dim0_unpadded % alignment_block) % alignment_block + ) + per_shard = dim0_padded // gtp_size + + gtp_group = dist.new_group(list(range(world_size))) + weight = _make_gtp_shard(dim0_unpadded, in_features, gtp_group) + + sharded = make_sharded_tensors_for_checkpoint_with_gtp( + {"weight": weight}, + prefix="", + tensor_parallel_layers_axis_map={"weight": 0}, + sharded_offsets=(), + tp_group=dist.new_group([rank]), + dp_cp_group=dist.new_group(list(range(world_size))), + ) + st = sharded["weight"] + # 1. The saved global covers >= unpadded original size. + assert st.global_shape[0] >= dim0_unpadded, ( + f"rank={rank} saved global_shape ({st.global_shape[0]}) < unpadded ({dim0_unpadded}); " + f"would lose valid data on cross-topology reshard." + ) + # 2. ``allow_shape_mismatch=True`` lets DCP tolerate that the load-side + # padded size may differ. + assert st.allow_shape_mismatch is True + # 3. Each rank's offset+local_shape covers a contiguous slice of the + # padded global; together the ranks cover [0, padded_global). + assert st.global_offset[0] + st.local_shape[0] <= st.global_shape[0] + assert st.global_offset[0] + st.local_shape[0] == (rank + 1) * per_shard + + +def _worker_save_then_load_offsets_symmetric(rank, world_size, port): + """Save-side and load-side ShardedTensors must produce identical offsets + and global_shape so DCP can correctly resharded between them. + + We don't run the real DCP save (avoids filesystem / async-writer issues + in CI); we just verify the symmetry property the load path relies on. + """ + update_gtp_config(pad_for_alignment=0) + dim0 = 16 + in_features = 4 + gtp_group = dist.new_group(list(range(world_size))) + + def _build(prefix): + weight = _make_gtp_shard(dim0, in_features, gtp_group) + return make_sharded_tensors_for_checkpoint_with_gtp( + {"weight": weight}, + prefix=prefix, + tensor_parallel_layers_axis_map={"weight": 0}, + sharded_offsets=(), + tp_group=dist.new_group([rank]), + dp_cp_group=dist.new_group(list(range(world_size))), + )["layer.weight"] + + save_st = _build("layer.") + load_st = _build("layer.") + assert save_st.global_shape == load_st.global_shape + assert save_st.global_offset == load_st.global_offset + assert save_st.local_shape == load_st.local_shape + assert save_st.replica_id == load_st.replica_id + + +def _worker_reset_quantize_cache(rank, world_size, port): + """`reset_gtp_quantize_cache` must flip did_cast_to_low_precision back to False.""" + gtp_group = dist.new_group([0, 1]) if rank in (0, 1) else dist.new_group([2, 3]) + + class _Dummy(torch.nn.Module): + def __init__(self): + super().__init__() + self.weight = torch.nn.Parameter( + torch.zeros(4, 4, dtype=torch.bfloat16, device="cuda") + ) + + mod = _Dummy() + wrap_module_params_gtp(mod, ["weight"], gtp_group) + p = mod.weight + p.did_cast_to_low_precision = True + + reset_gtp_quantize_cache(mod) + assert p.did_cast_to_low_precision is False + + +def _worker_helper_offsets_ep_egtp(rank, world_size, port): + """EP=2, EGTP=2 (4 ranks): routed-expert weight. + + Mirrors ``TEGroupedLinear.sharded_state_dict``: expert parallelism prepends a + global-expert axis through ``sharded_offsets``, and EGTP shards each expert's + ``out_features`` (axis 0). The GTP-aware checkpoint helper layers the EGTP + axis-0 split on top of the prepended expert offset. + + rank → (ep_rank, egtp_rank): 0→(0,0) 1→(0,1) 2→(1,0) 3→(1,1). + """ + egtp_group = dist.new_group([0, 1]) if rank in (0, 1) else dist.new_group([2, 3]) + + ep_size, egtp_size, num_gemms = 2, 2, 1 + ep_rank = rank // 2 + egtp_rank = rank % 2 + per_expert_out = 4 + per_shard_out = per_expert_out // egtp_size # 2 + in_features = 4 + num_global_experts = ep_size * num_gemms # 2 + global_expert_idx = ep_rank * num_gemms # + gemm_idx (0) + + weight = _make_gtp_shard(per_expert_out, in_features, egtp_group) + assert weight.shape == (per_shard_out, in_features), ( + f"rank={rank} EGTP shard shape {tuple(weight.shape)} != ({per_shard_out}, {in_features})" + ) + + sharded = make_sharded_tensors_for_checkpoint_with_gtp( + {"weight": weight}, + prefix="", + tensor_parallel_layers_axis_map={"weight": 0}, + # EP prepends the global-expert axis; EGTP shards out_features below it. + sharded_offsets=((0, global_expert_idx, num_global_experts),), + tp_group=dist.new_group([rank]), # no TP in this case + dp_cp_group=dist.new_group(list(range(world_size))), + ) + st = sharded["weight"] + assert isinstance(st, ShardedTensor), f"Expected ShardedTensor, got {type(st)}" + # global shape = (num_global_experts, full_out_features, in_features) + assert st.global_shape == (num_global_experts, per_expert_out, in_features), ( + f"rank={rank} global_shape {st.global_shape} != " + f"({num_global_experts}, {per_expert_out}, {in_features})" + ) + # Prepended expert axis (axis 0): offset == this rank's global expert index. + assert st.global_offset[0] == global_expert_idx, ( + f"rank={rank} expert-axis offset {st.global_offset[0]} != {global_expert_idx}" + ) + # EGTP axis (weight axis 0, shifted to global axis 1): offset == egtp_rank · per_shard. + assert st.global_offset[1] == egtp_rank * per_shard_out, ( + f"rank={rank} EGTP axis-1 offset {st.global_offset[1]} != {egtp_rank * per_shard_out}" + ) + + +def _worker_helper_embedding_offsets(rank, world_size, port): + """Embedding / output_layer path: ``VocabParallelEmbedding.sharded_state_dict`` calls + ``make_tp_sharded_tensor_for_checkpoint`` DIRECTLY (it needs allow_shape_mismatch for + vocab padding), bypassing the GTP-aware wrapper. So that helper itself must layer the + GTP axis-0 split. TP=2, GTP=2, tp_axis=0 (vocab) → composite axis-0 offset, same as the + column-parallel case. + """ + from megatron.core.utils import make_tp_sharded_tensor_for_checkpoint + + gtp_group = dist.new_group([0, 1]) if rank in (0, 1) else dist.new_group([2, 3]) + tp_group = dist.new_group([0, 2]) if rank in (0, 2) else dist.new_group([1, 3]) + + full_vocab, hidden = 8, 4 + tp_size, gtp_size = 2, 2 + per_tp = full_vocab // tp_size # 4 + per_shard = per_tp // gtp_size # 2 + + weight = _make_gtp_shard(per_tp, hidden, gtp_group) + assert weight.shape == (per_shard, hidden) + + st = make_tp_sharded_tensor_for_checkpoint( + tensor=weight, + key="embedding.word_embeddings.weight", + tp_axis=0, + allow_shape_mismatch=True, # how VocabParallelEmbedding calls it + prepend_offsets=(), + tp_group=tp_group, + dp_cp_group=dist.new_group(list(range(world_size))), + ) + assert isinstance(st, ShardedTensor), f"Expected ShardedTensor, got {type(st)}" + tp_rank = rank // 2 + gtp_rank = rank % 2 + expected_offset = (tp_rank * gtp_size + gtp_rank) * per_shard + assert st.global_offset[0] == expected_offset, ( + f"rank={rank} embedding axis-0 offset {st.global_offset[0]} != {expected_offset}" + ) + assert st.global_shape[0] == full_vocab, ( + f"rank={rank} embedding global axis-0 {st.global_shape[0]} != {full_vocab}" + ) + + +def _worker_helper_public_wrapper_delegates(rank, world_size, port): + """The public ``make_sharded_tensors_for_checkpoint`` (the entry point most layers call, + e.g. ColumnParallelLinear / output_layer) must detect a GTPShardedParam and produce the + GTP-composite offset — i.e. it delegates to the GTP-aware path rather than the vanilla + TP-only one. TP=2, GTP=2, column-parallel (tp_axis=0). + """ + from megatron.core.transformer.utils import make_sharded_tensors_for_checkpoint + + gtp_group = dist.new_group([0, 1]) if rank in (0, 1) else dist.new_group([2, 3]) + tp_group = dist.new_group([0, 2]) if rank in (0, 2) else dist.new_group([1, 3]) + + full_out, in_features = 8, 4 + tp_size, gtp_size = 2, 2 + per_tp_out = full_out // tp_size # 4 + per_shard_out = per_tp_out // gtp_size # 2 + + weight = _make_gtp_shard(per_tp_out, in_features, gtp_group) + + sharded = make_sharded_tensors_for_checkpoint( + {"weight": weight}, + prefix="layer.", + tensor_parallel_layers_axis_map={"weight": 0}, + sharded_offsets=(), + tp_group=tp_group, + dp_cp_group=dist.new_group(list(range(world_size))), + ) + st = sharded["layer.weight"] + assert isinstance(st, ShardedTensor), f"Expected ShardedTensor, got {type(st)}" + tp_rank = rank // 2 + gtp_rank = rank % 2 + expected_offset = (tp_rank * gtp_size + gtp_rank) * per_shard_out + assert st.global_offset[0] == expected_offset, ( + f"rank={rank} public wrapper did not produce the GTP-composite offset: " + f"{st.global_offset[0]} != {expected_offset} (delegation to the GTP path failed?)" + ) + assert st.global_shape[0] == full_out, ( + f"rank={rank} global axis-0 {st.global_shape[0]} != {full_out}" + ) + + +def _worker_helper_replicated_sink_rejects_gtp(rank, world_size, port): + """Sanity guard: a GTPShardedParam must NEVER be saved via the replicated + make_sharded_tensor_for_checkpoint (it would record a shard-sized global shape). + The helper asserts; this pins that behaviour. + """ + from megatron.core.utils import make_sharded_tensor_for_checkpoint + + gtp_group = dist.new_group([0, 1]) if rank in (0, 1) else dist.new_group([2, 3]) + weight = _make_gtp_shard(4, 4, gtp_group) + with pytest.raises(AssertionError): + make_sharded_tensor_for_checkpoint( + weight, + "weight", + tp_group=dist.new_group([rank]), + dp_cp_group=dist.new_group(list(range(world_size))), + ) + + +# --------------------------------------------------------------------------- +# Test class wrappers (4-GPU) +# --------------------------------------------------------------------------- + + +@pytest.mark.run_only_on_devices_with_compute_capability(compute_capability=(10, 0)) +class TestGtpDcpHelper: + def test_composite_offset_same_axis(self): + _require_world_size(4) + _worker_helper_offsets_tp_eq_gtp_axis(dist.get_rank(), 4, None) + + def test_dual_offsets_cross_axis(self): + _require_world_size(4) + _worker_helper_offsets_tp_neq_gtp_axis(dist.get_rank(), 4, None) + + def test_ep_egtp_offsets(self): + _require_world_size(4) + _worker_helper_offsets_ep_egtp(dist.get_rank(), 4, None) + + def test_embedding_offsets(self): + _require_world_size(4) + _worker_helper_embedding_offsets(dist.get_rank(), 4, None) + + def test_public_wrapper_delegates(self): + _require_world_size(4) + _worker_helper_public_wrapper_delegates(dist.get_rank(), 4, None) + + def test_replicated_sink_rejects_gtp(self): + _require_world_size(4) + _worker_helper_replicated_sink_rejects_gtp(dist.get_rank(), 4, None) + + def test_no_op_without_gtp(self): + _require_world_size(4) + _worker_helper_no_op_without_gtp(dist.get_rank(), 4, None) + + def test_reset_quantize_cache(self): + _require_world_size(4) + _worker_reset_quantize_cache(dist.get_rank(), 4, None) + + def test_inproj_no_pad(self): + _require_world_size(4) + _worker_helper_padded_inproj_no_pad_case(dist.get_rank(), 4, None) + + def test_inproj_with_pad(self): + _require_world_size(4) + _worker_helper_padded_inproj_pad_case(dist.get_rank(), 4, None) + + def test_cross_topology_reshard_metadata(self): + _require_world_size(4) + _worker_helper_cross_topology_reshard_metadata(dist.get_rank(), 4, None) + + def test_save_then_load_offsets_symmetric(self): + _require_world_size(4) + _worker_save_then_load_offsets_symmetric(dist.get_rank(), 4, None) From 70ef35d5eb5ffffa7b15a7bc8f5c839612920aef Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Sat, 13 Jun 2026 07:18:58 -0700 Subject: [PATCH 36/59] rename gtp-exclude process group: with_gtp -> no_gtp Signed-off-by: Shiqing Fan --- .../distributed/distributed_data_parallel.py | 20 +-- .../core/extensions/transformer_engine.py | 6 +- megatron/core/optimizer/__init__.py | 23 ++-- megatron/core/parallel_state.py | 122 +++++++++--------- megatron/core/process_groups_config.py | 117 +++++++++-------- megatron/core/ssm/mamba_mixer.py | 6 +- megatron/core/utils.py | 6 +- megatron/experimental/gtp/README.md | 10 +- .../gtp/generalized_tensor_parallelism.py | 16 +-- megatron/training/training.py | 6 +- megatron/training/utils/common_utils.py | 16 ++- .../generalized_tensor_parallel/test_gtp.py | 6 +- .../test_gtp_dcp.py | 6 +- 13 files changed, 182 insertions(+), 178 deletions(-) diff --git a/megatron/core/distributed/distributed_data_parallel.py b/megatron/core/distributed/distributed_data_parallel.py index 69611a579ad..ff9006ed029 100644 --- a/megatron/core/distributed/distributed_data_parallel.py +++ b/megatron/core/distributed/distributed_data_parallel.py @@ -84,23 +84,23 @@ def __init__( self.expt_dp_group = process_group_dict['expt_dp_group'] # DDP reduces every bucket over the GTP/EGTP-EXCLUDED replicate group (the gtp axis is # completed separately by the RS + finalize all-reduce), so the intra groups DDP shards - # over ARE the *_with_gtp variants. They alias the regular intra groups when GTP is off. + # over ARE the *_no_gtp variants. They alias the regular intra groups when GTP is off. self.intra_dp_cp_group = process_group_dict.get( - 'intra_dp_cp_with_gtp_group', process_group_dict['intra_dp_cp_group'] + 'intra_dp_cp_no_gtp_group', process_group_dict['intra_dp_cp_group'] ) self.intra_expt_dp_group = process_group_dict.get( - 'intra_expt_dp_with_egtp_group', process_group_dict['intra_expt_dp_group'] + 'intra_expt_dp_no_egtp_group', process_group_dict['intra_expt_dp_group'] ) # Full cross-instance, GTP-peer-EXCLUDED groups for broadcast_params (init-time weight # sync must reach all true replicas). Fall back to the full DP groups when GTP is off. - self.dp_cp_with_gtp_group = process_group_dict.get('dp_cp_with_gtp_group', self.dp_cp_group) - self.expt_dp_with_egtp_group = process_group_dict.get( - 'expt_dp_with_egtp_group', self.expt_dp_group + self.dp_cp_no_gtp_group = process_group_dict.get('dp_cp_no_gtp_group', self.dp_cp_group) + self.expt_dp_no_egtp_group = process_group_dict.get( + 'expt_dp_no_egtp_group', self.expt_dp_group ) # GTP is "active" when the replicate groups are strictly smaller than the full DP groups. gtp_active = ( - self.dp_cp_with_gtp_group.size() != self.dp_cp_group.size() - or self.expt_dp_with_egtp_group.size() != self.expt_dp_group.size() + self.dp_cp_no_gtp_group.size() != self.dp_cp_group.size() + or self.expt_dp_no_egtp_group.size() != self.expt_dp_group.size() ) if gtp_active and self.ddp_config.average_in_collective: raise NotImplementedError( @@ -616,9 +616,9 @@ def broadcast_params(self): # Each (E)GTP peer holds a distinct 1/N shard, so broadcast over the (E)GTP-EXCLUDED # group — else rank-0's shard would clobber the others. if is_expert_parallel: - data_parallel_group = self.expt_dp_with_egtp_group if is_gtp else self.expt_dp_group + data_parallel_group = self.expt_dp_no_egtp_group if is_gtp else self.expt_dp_group else: - data_parallel_group = self.dp_cp_with_gtp_group if is_gtp else self.dp_cp_group + data_parallel_group = self.dp_cp_no_gtp_group if is_gtp else self.dp_cp_group torch.distributed.broadcast( param.data, src=torch.distributed.get_global_rank(data_parallel_group, 0), diff --git a/megatron/core/extensions/transformer_engine.py b/megatron/core/extensions/transformer_engine.py index 20caf7745f6..9f85ce9a8ab 100644 --- a/megatron/core/extensions/transformer_engine.py +++ b/megatron/core/extensions/transformer_engine.py @@ -2399,12 +2399,12 @@ def get_gemm_tensor(param_name: str, gemm_idx: int) -> torch.Tensor: sharded_state_dict[f"{prefix}bias{gemm_idx}"] = sub_sd[f"{gemm_idx}.bias"] # Set the expert-DP replica_id, picking the group by what EGTP does to each entry: # - weight ShardedTensor: SHARDED across EGTP (distinct chunks) → not replicas → - # use ``intra_expt_dp_with_egtp`` (EGTP-excluded). + # use ``intra_expt_dp_no_egtp``. # - _extra_state ShardedObject: REPLICATED across EGTP → need distinct replica_ids - # to avoid duplicate-writer collisions → use full ``expt_dp`` (EGTP-included). + # to avoid duplicate-writer collisions → use full ``expt_dp``. # EGTP=1: the two groups coincide, so this is a no-op. expt_dp_full = self._pg_collection.expt_dp - expt_dp_intra = self._pg_collection.intra_expt_dp_with_egtp + expt_dp_intra = self._pg_collection.intra_expt_dp_no_egtp for k, sh_ten in sharded_state_dict.items(): replica_id = sh_ten.replica_id assert ( diff --git a/megatron/core/optimizer/__init__.py b/megatron/core/optimizer/__init__.py index 427172472a1..b507f7ea6b7 100644 --- a/megatron/core/optimizer/__init__.py +++ b/megatron/core/optimizer/__init__.py @@ -1036,9 +1036,9 @@ def get_megatron_optimizer( dp_cp_group = process_groups_dict['dp_cp_group'] intra_dp_cp_group = process_groups_dict['intra_dp_cp_group'] - intra_dp_cp_with_gtp_group = process_groups_dict['intra_dp_cp_with_gtp_group'] + intra_dp_cp_no_gtp_group = process_groups_dict['intra_dp_cp_no_gtp_group'] intra_expt_dp_group = process_groups_dict['intra_expt_dp_group'] - intra_expt_dp_with_egtp_group = process_groups_dict['intra_expt_dp_with_egtp_group'] + intra_expt_dp_no_egtp_group = process_groups_dict['intra_expt_dp_no_egtp_group'] mp_group = process_groups_dict['mp_group'] expt_tp_pp_group = process_groups_dict['expt_tp_pp_group'] expt_tp_pp_with_egtp_group = process_groups_dict['expt_tp_pp_with_egtp_group'] @@ -1048,18 +1048,18 @@ def get_megatron_optimizer( intra_dist_opt_group = process_groups_dict['intra_dist_opt_group'] # GTP/EGTP params fold into the main / expert optimizers, sharding their optimizer state over - # the *_with_gtp (gtp/egtp-EXCLUDED) replicate group — which aliases the full DP group when GTP + # the *_no_gtp (gtp/egtp-EXCLUDED) replicate group — which aliases the full DP group when GTP # is inactive. GTP is "active" when that group is strictly smaller (no Gloo state path then). # A None group means the axis is unused (e.g. no expert parallelism) → not active. def _gtp_active_for(sub, full): return sub is not None and full is not None and sub.size() != full.size() - gtp_active = _gtp_active_for(intra_dp_cp_with_gtp_group, intra_dp_cp_group) or _gtp_active_for( - intra_expt_dp_with_egtp_group, intra_expt_dp_group + gtp_active = _gtp_active_for(intra_dp_cp_no_gtp_group, intra_dp_cp_group) or _gtp_active_for( + intra_expt_dp_no_egtp_group, intra_expt_dp_group ) - main_dp_group = intra_dp_cp_with_gtp_group + main_dp_group = intra_dp_cp_no_gtp_group main_dp_group_gloo = None if gtp_active else intra_dp_cp_group_gloo - main_expt_dp_group = intra_expt_dp_with_egtp_group + main_expt_dp_group = intra_expt_dp_no_egtp_group # ``mp_group`` spans TP×GTP×PP (GTP-merged). model_parallel_rank = get_pg_rank(mp_group) @@ -1153,7 +1153,7 @@ def _gtp_active_for(sub, full): param_to_param_group[param_name] = param_group_id param_group_id += 1 - # Pass Gloo process groups into optimizer only if needed. + # main_dp_group_gloo was selected above (None when GTP is active; no Gloo path yet). optimizers.append( _get_megatron_optimizer_based_on_param_groups( config=config, @@ -1172,7 +1172,7 @@ def _gtp_active_for(sub, full): model_chunk_offset += 1 # Expert params (incl. EGTP shards): reduce over the egtp-EXCLUDED replicate group - # (intra_expt_dp_with_egtp_group, which aliases the full expert-DP group when EGTP is + # (intra_expt_dp_no_egtp_group, which aliases the full expert-DP group when EGTP is # inactive). Backed by expert_parallel_buffers in DDP. moe_param_groups, moe_buffers = _get_param_groups_and_buffers( model_chunks, @@ -1193,8 +1193,9 @@ def _gtp_active_for(sub, full): # each EGTP peer a distinct distopt ShardedObject key. See gtp/README.md §3.3 (Optimizer # state) for why the non-merged ``expt_tp_pp_group`` would cause a DCP "duplicate" error. expt_model_parallel_rank = get_pg_rank(expt_tp_pp_with_egtp_group) - # Pass Gloo process groups into optimizer only if needed. GTP shards over the - # egtp-EXCLUDED replicate group (no Gloo path for it yet), matching the dense side. + # Gloo expert-DP group for the optimizer, only when (E)GTP is inactive. When active the + # optimizer shards over the egtp-EXCLUDED (no_egtp) replicate group, which has no Gloo + # variant yet, so pass None (mirrors the dense main_dp_group_gloo above). if use_gloo_process_groups and not gtp_active: expt_data_parallel_group_gloo = intra_expt_dp_group_gloo else: diff --git a/megatron/core/parallel_state.py b/megatron/core/parallel_state.py index 769ed4aea76..63e08c6f70e 100644 --- a/megatron/core/parallel_state.py +++ b/megatron/core/parallel_state.py @@ -69,15 +69,15 @@ _EXPERT_TENSOR_MODEL_PIPELINE_PARALLEL_GROUP_WITH_EGTP = None # Expert data parallel group _EXPERT_DATA_PARALLEL_GROUP = None -_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP = None +_EXPERT_DATA_PARALLEL_GROUP_NO_GTP = None _EXPERT_DATA_PARALLEL_GROUP_GLOO = None _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP = None _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_GLOO = None _INTER_PARTIAL_EXPERT_DATA_PARALLEL_GROUP = None # Partial expert DP group with EGTP peers excluded — per-distopt-instance slice -# of true expert-weight replicas. Mirrors _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP +# of true expert-weight replicas. Mirrors _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_NO_GTP # on the dense side. -_INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP = None +_INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_NO_GTP = None # Parallel state values changed on the fly _MPU_EXPERT_MODEL_PARALLEL_WORLD_SIZE = None _MPU_EXPERT_MODEL_PARALLEL_RANK = None @@ -133,8 +133,8 @@ _HYBRID_DP_CP_GROUPS = {} # Data parallel group information with generalized tensor parallel accounted for. -_DATA_PARALLEL_GROUP_WITH_GTP = None -_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP = None +_DATA_PARALLEL_GROUP_NO_GTP = None +_DATA_PARALLEL_GROUP_WITH_CP_NO_GTP = None # Data parallel group information with context parallel combined. _DATA_PARALLEL_GROUP_WITH_CP = None @@ -147,7 +147,7 @@ # Partial Data parallel group information with context parallel combined and GTP peers # excluded. Reaches only true weight-replica ranks within one distributed-optimizer instance. -_INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP = None +_INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_NO_GTP = None # combined parallel group of TP and CP _TENSOR_AND_CONTEXT_PARALLEL_GROUP = None @@ -952,7 +952,7 @@ def _inject_gtp(order_str: str, after: str = "tp") -> str: # Tokens for the FULL (gtp-inclusive) data-parallel domain. gtp is factored out of the # generator's 'dp' axis, so the full data domain spans gtp explicitly ('gtp-dp'). The - # replicate (gtp-excluded) groups are the _*_WITH_GTP variants below. + # replicate (gtp-excluded) groups are the _*_NO_GTP variants below. dp_full_token = "gtp-dp" dp_cp_full_token = "gtp-dp-cp" @@ -1074,10 +1074,10 @@ def _inject_gtp(order_str: str, after: str = "tp") -> str: _DATA_PARALLEL_GLOBAL_RANKS = ranks # Build DP groups with generalized tensor parallel accounted for. - # with_gtp DP = only ranks that share the same GTP-rank (true weight replicas). - global _DATA_PARALLEL_GROUP_WITH_GTP - global _DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP - global _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP + # no_gtp DP = only ranks that share the same GTP-rank (true weight replicas). + global _DATA_PARALLEL_GROUP_NO_GTP + global _DATA_PARALLEL_GROUP_WITH_CP_NO_GTP + global _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_NO_GTP if gtp_remat_size > 1: # The replicate (gtp-excluded) DP groups ARE get_ranks('dp') / get_ranks('dp-cp') by # construction (gtp is its own axis). Every rank iterates all groups so each create_group @@ -1087,29 +1087,29 @@ def _inject_gtp(order_str: str, after: str = "tp") -> str: dp_ranks, timeout=timeout, pg_options=get_nccl_options("dp_gtp", nccl_comm_cfgs), - group_desc="DATA_PARALLEL_GROUP_WITH_GTP", + group_desc="DATA_PARALLEL_GROUP_NO_GTP", ) if rank in dp_ranks: - _DATA_PARALLEL_GROUP_WITH_GTP = group + _DATA_PARALLEL_GROUP_NO_GTP = group for dp_cp_ranks in decoder_rank_generator.get_ranks('dp-cp'): group = create_group( dp_cp_ranks, timeout=timeout, pg_options=get_nccl_options("dp_cp_gtp", nccl_comm_cfgs), - group_desc="DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP", + group_desc="DATA_PARALLEL_GROUP_WITH_CP_NO_GTP", ) if rank in dp_cp_ranks: - _DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP = group + _DATA_PARALLEL_GROUP_WITH_CP_NO_GTP = group # GTP requires a single distributed-optimizer instance (asserted above), so the # per-instance partial group is just the full replicate group. - _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP = _DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP + _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_NO_GTP = _DATA_PARALLEL_GROUP_WITH_CP_NO_GTP else: # GTP inactive (gtp_remat_size == 1): the replicate groups alias the regular DP groups. - _DATA_PARALLEL_GROUP_WITH_GTP = _DATA_PARALLEL_GROUP - _DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP = _DATA_PARALLEL_GROUP_WITH_CP - _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP = ( + _DATA_PARALLEL_GROUP_NO_GTP = _DATA_PARALLEL_GROUP + _DATA_PARALLEL_GROUP_WITH_CP_NO_GTP = _DATA_PARALLEL_GROUP_WITH_CP + _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_NO_GTP = ( _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP ) @@ -1510,8 +1510,8 @@ def _inject_gtp(order_str: str, after: str = "tp") -> str: _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP = _EXPERT_DATA_PARALLEL_GROUP _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_GLOO = _EXPERT_DATA_PARALLEL_GROUP_GLOO # Build expert DP group with expert generalized tensor parallel accounted for. - global _EXPERT_DATA_PARALLEL_GROUP_WITH_GTP - global _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP + global _EXPERT_DATA_PARALLEL_GROUP_NO_GTP + global _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_NO_GTP if expert_gtp_remat_size > 1: # The replicate (egtp-excluded) expert-DP groups ARE get_ranks('dp') (egtp is its own axis). for dp_ranks in expert_decoder_rank_generator.get_ranks('dp'): @@ -1519,15 +1519,15 @@ def _inject_gtp(order_str: str, after: str = "tp") -> str: dp_ranks, timeout=timeout, pg_options=get_nccl_options("ep_dp_gtp", nccl_comm_cfgs), - group_desc="EXPERT_DATA_PARALLEL_GROUP_WITH_GTP", + group_desc="EXPERT_DATA_PARALLEL_GROUP_NO_GTP", ) if rank in dp_ranks: - _EXPERT_DATA_PARALLEL_GROUP_WITH_GTP = group - _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP = _EXPERT_DATA_PARALLEL_GROUP_WITH_GTP + _EXPERT_DATA_PARALLEL_GROUP_NO_GTP = group + _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_NO_GTP = _EXPERT_DATA_PARALLEL_GROUP_NO_GTP else: # EGTP inactive: the replicate group aliases the regular expert-DP group. - _EXPERT_DATA_PARALLEL_GROUP_WITH_GTP = _EXPERT_DATA_PARALLEL_GROUP - _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP = ( + _EXPERT_DATA_PARALLEL_GROUP_NO_GTP = _EXPERT_DATA_PARALLEL_GROUP + _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_NO_GTP = ( _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP ) @@ -1744,32 +1744,32 @@ def get_pipeline_model_parallel_group(check_initialized=True): def get_data_parallel_group( - with_context_parallel=False, with_gtp=False, partial_data_parallel=False + with_context_parallel=False, no_gtp=False, partial_data_parallel=False ): """Get the data-parallel group the caller rank belongs to. Args: with_context_parallel: If True, include context-parallel ranks in the group. - with_gtp: If True, return only the true weight-replica ranks (exclude GTP peers). + no_gtp: If True, return only the true weight-replica ranks (exclude GTP peers). partial_data_parallel: If True, return partial DP group (requires with_context_parallel). """ - if with_gtp: + if no_gtp: if with_context_parallel: if partial_data_parallel: assert ( - _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP is not None + _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_NO_GTP is not None ), "Intra partial data parallel group with CP and GTP is not initialized" - return _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP + return _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_NO_GTP assert ( - _DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP is not None + _DATA_PARALLEL_GROUP_WITH_CP_NO_GTP is not None ), "data parallel group with CP and GTP is not initialized" - return _DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP + return _DATA_PARALLEL_GROUP_WITH_CP_NO_GTP else: assert partial_data_parallel is False, "Partial DP for Optimizer needs to include CP" assert ( - _DATA_PARALLEL_GROUP_WITH_GTP is not None + _DATA_PARALLEL_GROUP_NO_GTP is not None ), "data parallel group with generalized tensor parallel is not initialized" - return _DATA_PARALLEL_GROUP_WITH_GTP + return _DATA_PARALLEL_GROUP_NO_GTP if with_context_parallel: if partial_data_parallel: assert ( @@ -1787,10 +1787,10 @@ def get_data_parallel_group( def get_data_parallel_group_gloo( - with_context_parallel=False, with_gtp=False, partial_data_parallel=False + with_context_parallel=False, no_gtp=False, partial_data_parallel=False ): """Get the Gloo data-parallel group the caller rank belongs to.""" - assert not with_gtp, "GTP does not support Gloo data-parallel groups" + assert not no_gtp, "GTP does not support Gloo data-parallel groups" if with_context_parallel: if partial_data_parallel: assert ( @@ -2096,7 +2096,7 @@ def get_pipeline_model_parallel_prev_rank(): def get_data_parallel_world_size( - with_context_parallel=False, with_gtp=False, partial_data_parallel=False + with_context_parallel=False, no_gtp=False, partial_data_parallel=False ): """Return world size for the data parallel group.""" global _MPU_DATA_PARALLEL_WORLD_SIZE @@ -2105,7 +2105,7 @@ def get_data_parallel_world_size( if torch.distributed.is_available() and torch.distributed.is_initialized(): return get_data_parallel_group( with_context_parallel=with_context_parallel, - with_gtp=with_gtp, + no_gtp=no_gtp, partial_data_parallel=partial_data_parallel, ).size() else: @@ -2119,7 +2119,7 @@ def set_data_parallel_rank(rank): def get_data_parallel_rank( - with_context_parallel=False, with_gtp=False, partial_data_parallel=False + with_context_parallel=False, no_gtp=False, partial_data_parallel=False ): """Return caller's rank in the data-parallel group.""" global _MPU_DATA_PARALLEL_RANK @@ -2128,7 +2128,7 @@ def get_data_parallel_rank( if torch.distributed.is_available() and torch.distributed.is_initialized(): return get_data_parallel_group( with_context_parallel=with_context_parallel, - with_gtp=with_gtp, + no_gtp=no_gtp, partial_data_parallel=partial_data_parallel, ).rank() else: @@ -2350,22 +2350,22 @@ def get_expert_tensor_model_pipeline_parallel_group(check_initialized=True, with def get_expert_data_parallel_group( - check_initialized=True, with_gtp=False, partial_expert_data_parallel=False + check_initialized=True, no_gtp=False, partial_expert_data_parallel=False ): """Get expert data parallel group.""" - if with_gtp: + if no_gtp: if partial_expert_data_parallel: if check_initialized: - assert _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP is not None, ( + assert _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_NO_GTP is not None, ( "Intra partial expert data parallel group with generalized tensor " "parallel is not initialized" ) - return _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP + return _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_NO_GTP if check_initialized: assert ( - _EXPERT_DATA_PARALLEL_GROUP_WITH_GTP is not None + _EXPERT_DATA_PARALLEL_GROUP_NO_GTP is not None ), "Expert data parallel group with generalized tensor parallel is not initialized" - return _EXPERT_DATA_PARALLEL_GROUP_WITH_GTP + return _EXPERT_DATA_PARALLEL_GROUP_NO_GTP if partial_expert_data_parallel: if check_initialized: assert ( @@ -2380,9 +2380,9 @@ def get_expert_data_parallel_group( return _EXPERT_DATA_PARALLEL_GROUP -def get_expert_data_parallel_group_gloo(with_gtp=False, partial_expert_data_parallel=False): +def get_expert_data_parallel_group_gloo(no_gtp=False, partial_expert_data_parallel=False): """Get expert data parallel group-gloo.""" - assert not with_gtp, "EGTP does not support Gloo expert-data-parallel groups" + assert not no_gtp, "EGTP does not support Gloo expert-data-parallel groups" if partial_expert_data_parallel: assert ( _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_GLOO is not None @@ -2405,11 +2405,11 @@ def get_expert_data_parallel_rank(partial_expert_data_parallel=False): return 0 -def get_expert_data_parallel_world_size(with_gtp=False, partial_expert_data_parallel=False): +def get_expert_data_parallel_world_size(no_gtp=False, partial_expert_data_parallel=False): """Return world size for the expert data parallel group.""" if torch.distributed.is_available() and torch.distributed.is_initialized(): return get_expert_data_parallel_group( - with_gtp=with_gtp, partial_expert_data_parallel=partial_expert_data_parallel + no_gtp=no_gtp, partial_expert_data_parallel=partial_expert_data_parallel ).size() else: return 0 @@ -2493,17 +2493,17 @@ def destroy_model_parallel(): global _DATA_PARALLEL_GROUP _DATA_PARALLEL_GROUP = None - global _DATA_PARALLEL_GROUP_WITH_GTP - _DATA_PARALLEL_GROUP_WITH_GTP = None + global _DATA_PARALLEL_GROUP_NO_GTP + _DATA_PARALLEL_GROUP_NO_GTP = None global _DATA_PARALLEL_GROUP_WITH_CP _DATA_PARALLEL_GROUP_WITH_CP = None - global _DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP - _DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP = None + global _DATA_PARALLEL_GROUP_WITH_CP_NO_GTP + _DATA_PARALLEL_GROUP_WITH_CP_NO_GTP = None - global _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP - _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_WITH_GTP = None + global _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_NO_GTP + _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_NO_GTP = None global _CONTEXT_PARALLEL_GROUP _CONTEXT_PARALLEL_GROUP = None @@ -2607,11 +2607,11 @@ def destroy_model_parallel(): global _EXPERT_DATA_PARALLEL_GROUP _EXPERT_DATA_PARALLEL_GROUP = None - global _EXPERT_DATA_PARALLEL_GROUP_WITH_GTP - _EXPERT_DATA_PARALLEL_GROUP_WITH_GTP = None + global _EXPERT_DATA_PARALLEL_GROUP_NO_GTP + _EXPERT_DATA_PARALLEL_GROUP_NO_GTP = None - global _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP - _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP = None + global _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_NO_GTP + _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_NO_GTP = None global _EXPERT_DATA_PARALLEL_GROUP_GLOO if ( diff --git a/megatron/core/process_groups_config.py b/megatron/core/process_groups_config.py index ac9aae00bf8..13b287da669 100644 --- a/megatron/core/process_groups_config.py +++ b/megatron/core/process_groups_config.py @@ -53,7 +53,7 @@ class ProcessGroupCollection: expt_dp: Expert data parallel group intra_dp_cp: Intra partial data parallel group intra_expt_dp: Intra partial expert data parallel group - intra_expt_dp_with_egtp: Intra expert data parallel group excluding EGTP peers + intra_expt_dp_no_egtp: Intra expert data parallel group excluding EGTP peers (true expert-weight replicas); identical to expt_dp when EGTP=1 inter_dist_opt: Inter distributed optimizer instance group @@ -148,9 +148,9 @@ class ProcessGroupCollection: # _INTRA_EXPERT_DATA_PARALLEL_GROUP intra_expt_dp: torch.distributed.ProcessGroup = field(init=False) - # _EXPERT_DATA_PARALLEL_GROUP_WITH_GTP — expert DP excluding EGTP peers (true expert + # _EXPERT_DATA_PARALLEL_GROUP_NO_GTP — expert DP excluding EGTP peers (true expert # weight replicas). Identical to ``expt_dp`` when EGTP=1. - intra_expt_dp_with_egtp: torch.distributed.ProcessGroup = field(init=False) + intra_expt_dp_no_egtp: torch.distributed.ProcessGroup = field(init=False) # _INTER_PARTIAL_EXPERT_DATA_PARALLEL_GROUP inter_dist_opt: torch.distributed.ProcessGroup = field(init=False) @@ -261,10 +261,10 @@ def use_mpu_process_groups(cls, required_pgs: Optional[List[str]] = None): check_initialized=False, partial_expert_data_parallel=True, ), - 'intra_expt_dp_with_egtp': partial( + 'intra_expt_dp_no_egtp': partial( parallel_state.get_expert_data_parallel_group, check_initialized=False, - with_gtp=True, + no_gtp=True, partial_expert_data_parallel=True, ), 'inter_dist_opt': partial( @@ -324,12 +324,12 @@ def setup_process_groups_for_optimizer( - dp_group: Data parallel group - dp_cp_group: Data parallel with context parallel group - intra_dp_cp_group: Intra data parallel with context parallel group - - intra_dp_cp_with_gtp_group: Intra data parallel with context parallel and + - intra_dp_cp_no_gtp_group: Intra data parallel with context parallel and generalized tensor parallel group (excludes GTP peers, i.e. only true dense weight replicas) - expt_dp_group: Expert data parallel group - intra_expt_dp_group: Intra expert data parallel group - - intra_expt_dp_with_egtp_group: Intra expert data parallel group excluding + - intra_expt_dp_no_egtp_group: Intra expert data parallel group excluding EGTP peers (true expert-weight replicas); identical to expt_dp_group when EGTP=1 - mp_group: Model parallel group @@ -344,8 +344,8 @@ def setup_process_groups_for_optimizer( if pg_collection is None: # Use parallel_state groups - # Dense (non-GTP) params use with_gtp=False (full DP group) to maximize - # optimizer state sharding. GTP params use with_gtp=True (smaller group) + # Dense (non-GTP) params use no_gtp=False (full DP group) to maximize + # optimizer state sharding. GTP params use no_gtp=True (smaller group) # since GTP's reduce-scatter already handled the GTP dimension. dp_group = parallel_state.get_data_parallel_group( with_context_parallel=False, partial_data_parallel=False @@ -356,20 +356,20 @@ def setup_process_groups_for_optimizer( intra_dp_cp_group = parallel_state.get_data_parallel_group( with_context_parallel=True, partial_data_parallel=True ) - intra_dp_cp_with_gtp_group = parallel_state.get_data_parallel_group( - with_context_parallel=True, with_gtp=True, partial_data_parallel=True + intra_dp_cp_no_gtp_group = parallel_state.get_data_parallel_group( + with_context_parallel=True, no_gtp=True, partial_data_parallel=True ) - dp_cp_with_gtp_group = parallel_state.get_data_parallel_group( - with_context_parallel=True, with_gtp=True + dp_cp_no_gtp_group = parallel_state.get_data_parallel_group( + with_context_parallel=True, no_gtp=True ) expt_dp_group = parallel_state.get_expert_data_parallel_group() intra_expt_dp_group = parallel_state.get_expert_data_parallel_group( partial_expert_data_parallel=True ) - intra_expt_dp_with_egtp_group = parallel_state.get_expert_data_parallel_group( - with_gtp=True, partial_expert_data_parallel=True + intra_expt_dp_no_egtp_group = parallel_state.get_expert_data_parallel_group( + no_gtp=True, partial_expert_data_parallel=True ) - expt_dp_with_egtp_group = parallel_state.get_expert_data_parallel_group(with_gtp=True) + expt_dp_no_egtp_group = parallel_state.get_expert_data_parallel_group(no_gtp=True) intra_dist_opt_group = parallel_state.get_intra_distributed_optimizer_instance_group() # Gloo groups @@ -387,8 +387,8 @@ def setup_process_groups_for_optimizer( # Model communication groups mp_group = parallel_state.get_model_parallel_group() expt_tp_pp_group = parallel_state.get_expert_tensor_model_pipeline_parallel_group() - expt_tp_pp_with_egtp_group = parallel_state.get_expert_tensor_model_pipeline_parallel_group( - with_egtp=True + expt_tp_pp_with_egtp_group = ( + parallel_state.get_expert_tensor_model_pipeline_parallel_group(with_egtp=True) ) # Inter distributed optimizer group @@ -496,37 +496,36 @@ def setup_process_groups_for_optimizer( expt_tp_pp_group = pg_collection.tp_ep_pp # EGTP-MERGED variant of tp_ep_pp: includes the egtp axis, so each EGTP peer gets a - # distinct rank — used for the distopt ShardedObject keys. (Note the opposite sense - # from the with_egtp replicate groups in §7, which EXCLUDE the egtp axis.) Falls back - # to tp_ep_pp when not provided. + # distinct rank — used for the distopt ShardedObject keys. Falls back to tp_ep_pp + # when not provided. if hasattr(pg_collection, 'tp_ep_pp_with_egtp'): expt_tp_pp_with_egtp_group = pg_collection.tp_ep_pp_with_egtp else: expt_tp_pp_with_egtp_group = expt_tp_pp_group - # 6. with_gtp groups — the gtp-EXCLUDED replicate groups that DDP and the optimizer + # 6. no_gtp groups — the gtp-EXCLUDED replicate groups that DDP and the optimizer # shard over: intra (per-distopt-instance) and full (cross-instance). Fall back to # the non-GTP variants when not provided. - if hasattr(pg_collection, 'intra_dp_cp_with_gtp'): - intra_dp_cp_with_gtp_group = pg_collection.intra_dp_cp_with_gtp + if hasattr(pg_collection, 'intra_dp_cp_no_gtp'): + intra_dp_cp_no_gtp_group = pg_collection.intra_dp_cp_no_gtp else: - intra_dp_cp_with_gtp_group = intra_dp_cp_group - if hasattr(pg_collection, 'dp_cp_with_gtp'): - dp_cp_with_gtp_group = pg_collection.dp_cp_with_gtp + intra_dp_cp_no_gtp_group = intra_dp_cp_group + if hasattr(pg_collection, 'dp_cp_no_gtp'): + dp_cp_no_gtp_group = pg_collection.dp_cp_no_gtp else: - dp_cp_with_gtp_group = dp_cp_group + dp_cp_no_gtp_group = dp_cp_group - # 7. with_egtp groups — the expert analog of §6: the egtp-EXCLUDED replicate groups, + # 7. no_egtp groups — the expert analog of §6: the egtp-EXCLUDED replicate groups, # intra (per-distopt-instance) and full (cross-instance). Fall back to the # non-EGTP variants when not provided. - if hasattr(pg_collection, 'intra_expt_dp_with_egtp'): - intra_expt_dp_with_egtp_group = pg_collection.intra_expt_dp_with_egtp + if hasattr(pg_collection, 'intra_expt_dp_no_egtp'): + intra_expt_dp_no_egtp_group = pg_collection.intra_expt_dp_no_egtp else: - intra_expt_dp_with_egtp_group = intra_expt_dp_group - if hasattr(pg_collection, 'expt_dp_with_egtp'): - expt_dp_with_egtp_group = pg_collection.expt_dp_with_egtp + intra_expt_dp_no_egtp_group = intra_expt_dp_group + if hasattr(pg_collection, 'expt_dp_no_egtp'): + expt_dp_no_egtp_group = pg_collection.expt_dp_no_egtp else: - expt_dp_with_egtp_group = expt_dp_group + expt_dp_no_egtp_group = expt_dp_group # Gloo groups - not supported when pg_collection is provided if use_gloo_process_groups: @@ -540,13 +539,13 @@ def setup_process_groups_for_optimizer( return { 'dp_group': dp_group, 'dp_cp_group': dp_cp_group, - 'dp_cp_with_gtp_group': dp_cp_with_gtp_group, + 'dp_cp_no_gtp_group': dp_cp_no_gtp_group, 'intra_dp_cp_group': intra_dp_cp_group, - 'intra_dp_cp_with_gtp_group': intra_dp_cp_with_gtp_group, + 'intra_dp_cp_no_gtp_group': intra_dp_cp_no_gtp_group, 'expt_dp_group': expt_dp_group, - 'expt_dp_with_egtp_group': expt_dp_with_egtp_group, + 'expt_dp_no_egtp_group': expt_dp_no_egtp_group, 'intra_expt_dp_group': intra_expt_dp_group, - 'intra_expt_dp_with_egtp_group': intra_expt_dp_with_egtp_group, + 'intra_expt_dp_no_egtp_group': intra_expt_dp_no_egtp_group, 'mp_group': mp_group, 'expt_tp_pp_group': expt_tp_pp_group, 'expt_tp_pp_with_egtp_group': expt_tp_pp_with_egtp_group, @@ -593,14 +592,14 @@ def setup_process_groups_for_ddp( with_context_parallel=True, partial_data_parallel=True ), 'expt_dp_group': parallel_state.get_expert_data_parallel_group(), - 'expt_dp_with_egtp_group': parallel_state.get_expert_data_parallel_group( - with_gtp=True + 'expt_dp_no_egtp_group': parallel_state.get_expert_data_parallel_group( + no_gtp=True ), 'intra_expt_dp_group': parallel_state.get_expert_data_parallel_group( partial_expert_data_parallel=True ), - 'intra_expt_dp_with_egtp_group': parallel_state.get_expert_data_parallel_group( - with_gtp=True, partial_expert_data_parallel=True + 'intra_expt_dp_no_egtp_group': parallel_state.get_expert_data_parallel_group( + no_gtp=True, partial_expert_data_parallel=True ), 'tp_group': parallel_state.get_tensor_model_parallel_group(), 'pp_group': parallel_state.get_pipeline_model_parallel_group(), @@ -615,11 +614,11 @@ def setup_process_groups_for_ddp( if ddp_config.use_distributed_optimizer else None ), - 'intra_dp_cp_with_gtp_group': parallel_state.get_data_parallel_group( - with_context_parallel=True, with_gtp=True, partial_data_parallel=True + 'intra_dp_cp_no_gtp_group': parallel_state.get_data_parallel_group( + with_context_parallel=True, no_gtp=True, partial_data_parallel=True ), - 'dp_cp_with_gtp_group': parallel_state.get_data_parallel_group( - with_context_parallel=True, with_gtp=True + 'dp_cp_no_gtp_group': parallel_state.get_data_parallel_group( + with_context_parallel=True, no_gtp=True ), } else: @@ -692,28 +691,28 @@ def setup_process_groups_for_ddp( result['ep_group'] = pg_collection.ep # 6. GTP partial group (fallback to intra_dp_cp if not provided) - if hasattr(pg_collection, 'intra_dp_cp_with_gtp'): - result['intra_dp_cp_with_gtp_group'] = pg_collection.intra_dp_cp_with_gtp + if hasattr(pg_collection, 'intra_dp_cp_no_gtp'): + result['intra_dp_cp_no_gtp_group'] = pg_collection.intra_dp_cp_no_gtp else: - result['intra_dp_cp_with_gtp_group'] = result['intra_dp_cp_group'] + result['intra_dp_cp_no_gtp_group'] = result['intra_dp_cp_group'] # 7. EGTP partial group (fallback to intra_expt_dp if not provided) - if hasattr(pg_collection, 'intra_expt_dp_with_egtp'): - result['intra_expt_dp_with_egtp_group'] = pg_collection.intra_expt_dp_with_egtp + if hasattr(pg_collection, 'intra_expt_dp_no_egtp'): + result['intra_expt_dp_no_egtp_group'] = pg_collection.intra_expt_dp_no_egtp else: - result['intra_expt_dp_with_egtp_group'] = result['intra_expt_dp_group'] + result['intra_expt_dp_no_egtp_group'] = result['intra_expt_dp_group'] # 8. Full (cross-instance) with-GTP-excluded variants for callers that need to # reach ALL true weight replicas (e.g., broadcast_params at init). Fall back # to the corresponding non-GTP-excluded full group when not provided. - if hasattr(pg_collection, 'dp_cp_with_gtp'): - result['dp_cp_with_gtp_group'] = pg_collection.dp_cp_with_gtp + if hasattr(pg_collection, 'dp_cp_no_gtp'): + result['dp_cp_no_gtp_group'] = pg_collection.dp_cp_no_gtp else: - result['dp_cp_with_gtp_group'] = result['dp_cp_group'] - if hasattr(pg_collection, 'expt_dp_with_egtp'): - result['expt_dp_with_egtp_group'] = pg_collection.expt_dp_with_egtp + result['dp_cp_no_gtp_group'] = result['dp_cp_group'] + if hasattr(pg_collection, 'expt_dp_no_egtp'): + result['expt_dp_no_egtp_group'] = pg_collection.expt_dp_no_egtp else: - result['expt_dp_with_egtp_group'] = result['expt_dp_group'] + result['expt_dp_no_egtp_group'] = result['expt_dp_group'] return result diff --git a/megatron/core/ssm/mamba_mixer.py b/megatron/core/ssm/mamba_mixer.py index 240a1356db2..afbc4d3cb12 100644 --- a/megatron/core/ssm/mamba_mixer.py +++ b/megatron/core/ssm/mamba_mixer.py @@ -1310,8 +1310,6 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None): # identical copy. The vanilla helper sets replica_id without GTP awareness, so all GTP # ranks would claim the same chunk with the same all-zero replica_id and DCP would have a # write conflict. Track those keys here and fold gtp_rank into replica_id below. - gtp_replicated_keys = set() - self._save_to_state_dict(sharded_state_dict, "", keep_vars=True) sharded_state_dict = make_sharded_tensors_for_checkpoint( sharded_state_dict, @@ -1325,7 +1323,9 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None): }, sharded_offsets=sharded_offsets, ) - gtp_replicated_keys |= set(sharded_state_dict.keys()) + # Captured before submodules are merged below, so this is exactly the directly-owned + # MambaMixer params/buffers (A_log, dt_bias, D, conv1d_*) — deterministic across ranks. + gtp_replicated_keys = set(sharded_state_dict.keys()) # Submodules for name, module in self.named_children(): module_sharded_sd = sharded_state_dict_default( diff --git a/megatron/core/utils.py b/megatron/core/utils.py index b4e06fdd79c..4e9324ca410 100644 --- a/megatron/core/utils.py +++ b/megatron/core/utils.py @@ -875,8 +875,8 @@ def check_param_hashes_across_dp_replicas( [non_expert_params, expert_params], [local_non_expert_param_hashes, local_expert_param_hashes], [ - parallel_state.get_data_parallel_group(with_gtp=True), - parallel_state.get_expert_data_parallel_group(with_gtp=True), + parallel_state.get_data_parallel_group(no_gtp=True), + parallel_state.get_expert_data_parallel_group(no_gtp=True), ], ): # Collect per-parameter hashes across all ranks in group. @@ -990,7 +990,7 @@ def make_tp_sharded_tensor_for_checkpoint( # GTP peers hold distinct shards (disambiguated by the offset above); the true # replicas are the gtp-EXCLUDED DP group, so elect the writer over that group. dp_replica_id = parallel_state.get_data_parallel_rank( - with_context_parallel=True, with_gtp=True + with_context_parallel=True, no_gtp=True ) # Saved global is the padded shape when GTP padded out_features for alignment. if getattr(tensor, "pad_length", 0): diff --git a/megatron/experimental/gtp/README.md b/megatron/experimental/gtp/README.md index 5f2c8a68ada..033f4a45dfe 100644 --- a/megatron/experimental/gtp/README.md +++ b/megatron/experimental/gtp/README.md @@ -225,9 +225,9 @@ TransformerEngine owns the linear primitives (`Linear` / `LayerNormLinear` / `La #### What the flags do under the hood -1. `parallel_state.initialize_model_parallel(...)` treats GTP/EGTP as **first-class orthogonal axes** (`world_size = TP*GTP*CP*DP`, and the expert grid `= ETP*EP*PP*EGTP*expert_dp`). It builds the shard groups `_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP` (size = `--generalized-tensor-parallel-remat-size`) and `_EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP` (size = `--expert-generalized-tensor-parallel-remat-size`), plus the gtp/egtp-EXCLUDED replicate DP groups (`_DATA_PARALLEL_GROUP_WITH_GTP`, `_EXPERT_DATA_PARALLEL_GROUP_WITH_GTP`) that DDP and the optimizer shard over. These `*_with_gtp` groups alias the regular DP groups when GTP is inactive (remat size 1). +1. `parallel_state.initialize_model_parallel(...)` treats GTP/EGTP as **first-class orthogonal axes** (`world_size = TP*GTP*CP*DP`, and the expert grid `= ETP*EP*PP*EGTP*expert_dp`). It builds the shard groups `_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP` (size = `--generalized-tensor-parallel-remat-size`) and `_EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP` (size = `--expert-generalized-tensor-parallel-remat-size`), plus the gtp/egtp-EXCLUDED replicate DP groups (`_DATA_PARALLEL_GROUP_NO_GTP`, `_EXPERT_DATA_PARALLEL_GROUP_NO_GTP`) that DDP and the optimizer shard over. These `*_no_gtp` groups alias the regular DP groups when GTP is inactive (remat size 1). 2. Megatron's `extensions/transformer_engine.py` reads `pg_collection.gtp` / `pg_collection.expt_gtp` and forwards them as the `gtp_group=` kwarg to `te.Linear` / `te.LayerNormLinear` / `te.GroupedLinear`. TE's `module/base.py` calls back into `megatron.experimental.gtp` via the hook registry (`register_gtp_hooks`) to slice each weight at `reset_parameters` time. -3. DDP treats GTP shards as ordinary params: they go into the same dense / expert buffers as everything else, reduced over the gtp/egtp-EXCLUDED replicate group (`intra_dp_cp_with_gtp_group` / `intra_expt_dp_with_egtp_group`) with the standard `1/full` scaling. The gtp axis is completed elsewhere — GTP shards by their reduce-scatter sum, replicated (non-GTP) params by a SUM all-reduce in `finalize_model_grads`. See §3.2. +3. DDP treats GTP shards as ordinary params: they go into the same dense / expert buffers as everything else, reduced over the gtp/egtp-EXCLUDED replicate group (`intra_dp_cp_no_gtp_group` / `intra_expt_dp_no_egtp_group`) with the standard `1/full` scaling. The gtp axis is completed elsewhere — GTP shards by their reduce-scatter sum, replicated (non-GTP) params by a SUM all-reduce in `finalize_model_grads`. See §3.2. 4. Optimizer state is sharded over the same replicate group; clip-by-global-norm reduces squared norms over the dist-opt grad-stats group, which spans the full world (including the gtp/egtp axis), with replicated non-GTP params counted once per gtp/egtp axis to avoid over-counting. 5. `classify_gtp_chains(model)` runs once after model build (in `training.py`'s `get_model`) and wires each `GTPShardedParam` into a `GRAPHED` or `UNGRAPHED` prefetch chain based on the active `cuda_graph_modules`. @@ -302,17 +302,17 @@ Everything else — bucketing, the reduce-scatter/all-reduce schedule and its ov - **Free reuse of a mature stack.** GTP inherits DDP's bucketing + comm/compute overlap, the distributed optimizer's fp32-master + Adam-moment sharding, grad-norm/clip, and the existing checkpoint format — no parallel re-implementation to write or maintain (contrast FSDP, which replaces all of these). - **Orthogonal composability.** Because GTP is a rank-grid sub-axis cut like TP (along `out_features`), it composes with TP/EP/CP/PP and the DistOpt the same way TP does — no special nesting logic. -- **Zero-cost when off.** With GTP disabled the `*_with_gtp` groups alias the regular DP groups and both hooks become no-ops, so non-GTP runs hit byte-identical behavior — GTP can be toggled without forking the DDP/optimizer code paths. +- **Zero-cost when off.** With GTP disabled the `*_no_gtp` groups alias the regular DP groups and both hooks become no-ops, so non-GTP runs hit byte-identical behavior — GTP can be toggled without forking the DDP/optimizer code paths. - **Small, auditable surface.** Two hooks is the whole integration contract, which is what makes the correctness argument below tractable. -DDP groups parameters into **two buffers** by `is_expert_parallel` (MoE tag) — a dense buffer and an expert buffer. GTP/EGTP shards are **merged into** these buffers like ordinary params (no separate GTP/EGTP buckets): they reduce over the gtp/egtp-EXCLUDED replicate group (`intra_dp_cp_with_gtp_group` for dense, `intra_expt_dp_with_egtp_group` for expert) with the standard `1/full = 1/(replicate*gtp)` scaling. +DDP groups parameters into **two buffers** by `is_expert_parallel` (MoE tag) — a dense buffer and an expert buffer. GTP/EGTP shards are **merged into** these buffers like ordinary params (no separate GTP/EGTP buckets): they reduce over the gtp/egtp-EXCLUDED replicate group (`intra_dp_cp_no_gtp_group` for dense, `intra_expt_dp_no_egtp_group` for expert) with the standard `1/full = 1/(replicate*gtp)` scaling. Why this is correct — the gtp axis is completed in two complementary ways, so it is summed exactly once: - **GTP-sharded weights**: each rank already holds the gtp-summed shard via the (E)GTP wgrad reduce-scatter, then DDP sums over the replicate group → `sum-over-(gtp×replicate) / full = mean`. - **Replicated (non-GTP) params** (LayerNorm γ/β, biases, router, …): DDP sums only over the replicate group, leaving them `1/gtp` short; `finalize_model_grads._allreduce_replicated_grads_over_gtp_group` then does a SUM all-reduce over the gtp (dense) / egtp (expert) group to recover the full mean. SUM (not AVG) because the `1/full` DDP scaling already applied. -**`broadcast_params`** (the one-shot init/load param sync) selects the group by `is_gtp`: GTP shards broadcast over the gtp-excluded `*_with_gtp` group (`dp_cp_with_gtp_group` / `expt_dp_with_egtp_group`), everything else over the regular DP group (`dp_cp_group` / `expt_dp_group`). Excluding (E)GTP peers is essential — each peer holds a distinct 1/N shard of the same `GTPShardedParam`, so a shared group would let rank-0's shard clobber the others. The non-`intra_` ("full") groups are used here so the sync reaches every distopt instance. +**`broadcast_params`** (the one-shot init/load param sync) selects the group by `is_gtp`: GTP shards broadcast over the gtp-excluded `*_no_gtp` group (`dp_cp_no_gtp_group` / `expt_dp_no_egtp_group`), everything else over the regular DP group (`dp_cp_group` / `expt_dp_group`). Excluding (E)GTP peers is essential — each peer holds a distinct 1/N shard of the same `GTPShardedParam`, so a shared group would let rank-0's shard clobber the others. The non-`intra_` ("full") groups are used here so the sync reaches every distopt instance. **Buffer caching.** The per-buffer lists are concatenated once at init into a single flat view for fast iteration in the grad-reduction hot path. diff --git a/megatron/experimental/gtp/generalized_tensor_parallelism.py b/megatron/experimental/gtp/generalized_tensor_parallelism.py index a9d994d4677..6a2503c0313 100644 --- a/megatron/experimental/gtp/generalized_tensor_parallelism.py +++ b/megatron/experimental/gtp/generalized_tensor_parallelism.py @@ -1870,7 +1870,7 @@ def make_sharded_tensors_for_checkpoint_with_gtp( *, tp_group, dp_cp_group, - intra_dp_cp_with_gtp_group=None, + intra_dp_cp_no_gtp_group=None, ): """GTP-aware analogue of ``make_sharded_tensors_for_checkpoint``. @@ -1917,13 +1917,13 @@ def make_sharded_tensors_for_checkpoint_with_gtp( gtp_size = get_pg_size(gtp_group) # DP-with-GTP-with-CP rank — replicas of a given GTP chunk live here. - if intra_dp_cp_with_gtp_group is not None: - dp_with_gtp_rank = get_pg_rank(intra_dp_cp_with_gtp_group) + if intra_dp_cp_no_gtp_group is not None: + dp_no_gtp_rank = get_pg_rank(intra_dp_cp_no_gtp_group) else: from megatron.core import parallel_state # noqa: E402 - dp_with_gtp_rank = parallel_state.get_data_parallel_rank( - with_context_parallel=True, with_gtp=True + dp_no_gtp_rank = parallel_state.get_data_parallel_rank( + with_context_parallel=True, no_gtp=True ) sharded_state_dict = {} @@ -1935,7 +1935,7 @@ def make_sharded_tensors_for_checkpoint_with_gtp( # ShardedObject (extra_state metadata): GTP-REPLICATED across the GTP group. Fold # gtp_rank into position 1 of the replica_id (PP, TP-replica-coord, DP) tuple so # GTP-peer ranks within the same TP slice get unique replica_ids. - replica_id = (0, tp_rank * gtp_size + gtp_rank, dp_with_gtp_rank) + replica_id = (0, tp_rank * gtp_size + gtp_rank, dp_no_gtp_rank) sharded_state_dict[layer_key] = make_sharded_object_for_checkpoint( tensor, layer_key, sharded_offsets, replica_id=replica_id ) @@ -1947,7 +1947,7 @@ def make_sharded_tensors_for_checkpoint_with_gtp( # without intervention. Inject gtp_rank into position 1 of the replica_id, the same # way the GTP-sharded branch below does. if layer_name in tensor_parallel_layers_axis_map: - replica_id = (0, gtp_rank, dp_with_gtp_rank) + replica_id = (0, gtp_rank, dp_no_gtp_rank) sharded_state_dict[layer_key] = make_tp_sharded_tensor_for_checkpoint( tensor, layer_key, @@ -1958,7 +1958,7 @@ def make_sharded_tensors_for_checkpoint_with_gtp( dp_cp_group=dp_cp_group, ) else: - replica_id = (0, tp_rank * gtp_size + gtp_rank, dp_with_gtp_rank) + replica_id = (0, tp_rank * gtp_size + gtp_rank, dp_no_gtp_rank) sharded_state_dict[layer_key] = make_sharded_tensor_for_checkpoint( tensor, layer_key, diff --git a/megatron/training/training.py b/megatron/training/training.py index 477ac5a8c8d..6f3916a12e4 100644 --- a/megatron/training/training.py +++ b/megatron/training/training.py @@ -1602,12 +1602,12 @@ def wrap_model_chunks_with_ddp( compute_layout = None if compute_layout is not None: # Size the layout for the replicate (gtp/egtp-EXCLUDED) DP group the DDP buffer - # actually shards over, so DDP can use it directly without recomputing. with_gtp + # actually shards over, so DDP can use it directly without recomputing. no_gtp # aliases the regular DP group when GTP is inactive. data_parallel_world_size = mpu.get_data_parallel_world_size( - with_context_parallel=True, with_gtp=True + with_context_parallel=True, no_gtp=True ) - expert_data_parallel_world_size = mpu.get_expert_data_parallel_world_size(with_gtp=True) + expert_data_parallel_world_size = mpu.get_expert_data_parallel_world_size(no_gtp=True) for i, (chunk, bucket_size) in enumerate(zip(model_chunks, bucket_sizes)): all_params = [p for p in chunk.parameters() if p.requires_grad] per_chunk_layouts[i] = compute_layout( diff --git a/megatron/training/utils/common_utils.py b/megatron/training/utils/common_utils.py index 5f43ac85953..610e620848b 100644 --- a/megatron/training/utils/common_utils.py +++ b/megatron/training/utils/common_utils.py @@ -105,11 +105,11 @@ def calc_params_l2_norm(model, force_create_fp32_copy=False): params_data = [] # Dense, non-sharded sharded_params_data = [] # Dense, sharded → reduce over dp_cp gtp_params_data = [] # GTP, non-sharded - gtp_sharded_params_data = [] # GTP, sharded → reduce over dp_cp_with_gtp + gtp_sharded_params_data = [] # GTP, sharded → reduce over dp_cp_no_gtp moe_params_data = [] # MoE, non-sharded moe_sharded_params_data = [] # MoE, sharded → reduce over expert_dp moe_gtp_params_data = [] # MoE-GTP, non-sharded - moe_gtp_sharded_params_data = [] # MoE-GTP, sharded → reduce over expert_dp_with_gtp + moe_gtp_sharded_params_data = [] # MoE-GTP, sharded → reduce over expert_dp_no_gtp gtp_rank = mpu.get_generalized_tensor_parallel_remat_rank() egtp_rank = mpu.get_expert_generalized_tensor_parallel_remat_rank() @@ -170,11 +170,15 @@ def _sum_reduce(tensor, group): # --- Sharded optimizer DP reductions (each category uses its own group) --- # Reduce over the gtp-EXCLUDED replicate group: the model-parallel reduce below already # spans the gtp axis, so a gtp-inclusive group here would over-count by gtp. No-op for - # non-GTP runs (the with_gtp group aliases the regular DP group). - _sum_reduce(sharded_norm_2, mpu.get_data_parallel_group(with_context_parallel=True, with_gtp=True)) - _sum_reduce(gtp_sharded_norm_2, mpu.get_data_parallel_group(with_context_parallel=True, with_gtp=True)) + # non-GTP runs (the no_gtp group aliases the regular DP group). + _sum_reduce( + sharded_norm_2, mpu.get_data_parallel_group(with_context_parallel=True, no_gtp=True) + ) + _sum_reduce( + gtp_sharded_norm_2, mpu.get_data_parallel_group(with_context_parallel=True, no_gtp=True) + ) _sum_reduce(moe_sharded_norm_2, mpu.get_expert_data_parallel_group()) - _sum_reduce(moe_gtp_sharded_norm_2, mpu.get_expert_data_parallel_group(with_gtp=True)) + _sum_reduce(moe_gtp_sharded_norm_2, mpu.get_expert_data_parallel_group(no_gtp=True)) # --- Combine dense + GTP norms --- # model_parallel group = TP×GTP×PP, so GTP reduction is implicit. diff --git a/tests/unit_tests/generalized_tensor_parallel/test_gtp.py b/tests/unit_tests/generalized_tensor_parallel/test_gtp.py index 51fef389b5d..dcbc77e8692 100644 --- a/tests/unit_tests/generalized_tensor_parallel/test_gtp.py +++ b/tests/unit_tests/generalized_tensor_parallel/test_gtp.py @@ -1298,10 +1298,10 @@ def _worker_gtp_ddp_bucket_alignment(rank, world_size, port): Bug: DDP used param_layout=None for GTP buffers, falling through to _compute_default_per_buffer_param_layout, which packs params without padding bucket ends. The distributed optimizer requires every bucket end to be divisible by - intra_dp_cp_with_gtp_group.size() (asserted at param_and_grad_buffer.py:1427). + intra_dp_cp_no_gtp_group.size() (asserted at param_and_grad_buffer.py:1427). Trigger: - GTP=2, DP=4 → intra_dp_cp_with_gtp_group.size()=2 + GTP=2, DP=4 → intra_dp_cp_no_gtp_group.size()=2 pad_for_alignment=0, weight [out=2,in=3] → GTP shard=[1,3]=3 elements (odd) Two GTP params: total=6, 6%2==0 (total check passes); bucket_size=3 forces bucket-0 to contain only the first param, end=3, 3%2≠0 → AssertionError @@ -1406,7 +1406,7 @@ def __init__(self): class TestGTPDDPBucketAlignment: def test_gtp_buffers_use_padded_layout_with_distributed_optimizer(self): - """GTP buffer bucket ends must be padded to intra_dp_cp_with_gtp_group.size().""" + """GTP buffer bucket ends must be padded to intra_dp_cp_no_gtp_group.size().""" _requires_multi_gpu(4) _run_distributed(_worker_gtp_ddp_bucket_alignment, 4) diff --git a/tests/unit_tests/generalized_tensor_parallel/test_gtp_dcp.py b/tests/unit_tests/generalized_tensor_parallel/test_gtp_dcp.py index f4537b98322..39415c08add 100644 --- a/tests/unit_tests/generalized_tensor_parallel/test_gtp_dcp.py +++ b/tests/unit_tests/generalized_tensor_parallel/test_gtp_dcp.py @@ -159,7 +159,7 @@ def _worker_helper_offsets_tp_neq_gtp_axis(rank, world_size, port): ) -def _worker_helper_no_op_without_gtp(rank, world_size, port): +def _worker_helper_no_op_no_gtp(rank, world_size, port): """Helper must delegate to vanilla when state_dict has no GTPShardedParam. Per-TP-rank shape under column-parallel TP=2: (full_out//tp_size, in). @@ -564,9 +564,9 @@ def test_replicated_sink_rejects_gtp(self): _require_world_size(4) _worker_helper_replicated_sink_rejects_gtp(dist.get_rank(), 4, None) - def test_no_op_without_gtp(self): + def test_no_op_no_gtp(self): _require_world_size(4) - _worker_helper_no_op_without_gtp(dist.get_rank(), 4, None) + _worker_helper_no_op_no_gtp(dist.get_rank(), 4, None) def test_reset_quantize_cache(self): _require_world_size(4) From e43080779c0f70cabb80b38bfc4ff8ce970b27cd Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Sun, 14 Jun 2026 22:02:00 -0700 Subject: [PATCH 37/59] fix dense GTP NCCL group using stale 'ps' key Signed-off-by: Shiqing Fan --- megatron/core/parallel_state.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/megatron/core/parallel_state.py b/megatron/core/parallel_state.py index 63e08c6f70e..1c764268b12 100644 --- a/megatron/core/parallel_state.py +++ b/megatron/core/parallel_state.py @@ -943,7 +943,7 @@ def _inject_gtp(order_str: str, after: str = "tp") -> str: group = create_group( gtp_ranks, timeout=timeout, - pg_options=get_nccl_options("ps", nccl_comm_cfgs), + pg_options=get_nccl_options("gtp", nccl_comm_cfgs), group_desc="GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP", ) if rank in gtp_ranks: From 5a8c469b0f61ab392d50fc064fd47a8ccbbda271 Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Tue, 16 Jun 2026 21:34:22 -0700 Subject: [PATCH 38/59] update README with scalability Signed-off-by: Shiqing Fan --- megatron/experimental/gtp/README.md | 13 +++++++++++++ .../0617_gtp64_weak_scaling_efficiency.png | Bin 0 -> 133612 bytes 2 files changed, 13 insertions(+) create mode 100644 megatron/experimental/gtp/images/0617_gtp64_weak_scaling_efficiency.png diff --git a/megatron/experimental/gtp/README.md b/megatron/experimental/gtp/README.md index 033f4a45dfe..32e983c581d 100644 --- a/megatron/experimental/gtp/README.md +++ b/megatron/experimental/gtp/README.md @@ -128,6 +128,19 @@ Quantize-then-gather attacks AG only: AG portion shrinks ~72% from BF16 → NVFP Effective per-GPU weight size = `W / (TP × GTP)`. Example: TP=4 + GTP=8 with NVFP4 → 32× weight-memory reduction and 128× wire-bandwidth reduction vs full BF16 replication, before data parallelism. +**Weak scaling.** GTP fixes the shard width and grows the job by adding data-parallel replicas (DP = #GPUs / GTP), so per-GPU compute stays constant while only the DP gradient reduction widens with scale. + +The best GTP size is model- and cluster-dependent — driven by weight sizes, per-GPU memory headroom, and which collectives can be kept on fast links — so there is no single recommended value. The example below runs on **GB200 NVL72** (a 72-GPU NVLink domain) and uses **GTP64**, which places communication as: + +- **NVLink-local:** the *dense-layer* (Mamba / attention / shared-expert) GTP weight all-gather + wgrad reduce-scatter, **and** the `EP64` all-to-all dispatch/combine — all kept inside one ≤72-GPU NVLink domain (EP64 ≤ NVL72). +- **Inter-node (IB / CX7):** the DP gradient reduction **plus** the `EGTP2` expert-weight all-gather / wgrad reduce-scatter, whose 2 shards land on different NVLink domains and so cross nodes. + +On an Ultra-proxy hybrid Mamba-MoE model (**~280B parameters**; `GTP64 · EP64 · EGTP2`, mb1, MXFP8, BF16 reduce-scatter, no CUDA graph), scaling efficiency holds **≥93 % of the single-domain (128-GPU / DP2) baseline out to 3072 GPUs (DP48)**, while max reserved memory *decreases* with scale (137 → 104 GB) as the distributed optimizer shards optimizer/grad state across more DP replicas. + +> **Takeaway:** near-flat weak scaling — **≥93 % efficiency from 128 → 3072 GPUs**, with per-GPU memory shrinking as DP grows. + +![GTP64 weak-scaling efficiency](images/0617_gtp64_weak_scaling_efficiency.png) + ### 1.7 Native distributed checkpointing (DCP) **GTP + DCP is straightforward:** diff --git a/megatron/experimental/gtp/images/0617_gtp64_weak_scaling_efficiency.png b/megatron/experimental/gtp/images/0617_gtp64_weak_scaling_efficiency.png new file mode 100644 index 0000000000000000000000000000000000000000..03fc587f96a3bbcd9ef2269a67e367f8cbaba1b9 GIT binary patch literal 133612 zcmeFZWmr{jw=cZtmhMJDN?Ju41SAwG0Z~d&knV0oI;9bWB`GK%AYDo+f`F8?gp`Cx z=NZ%e?04^v@A-bXF0beRuvlx(dB+&Px>tmz#w`*;20|1HMWU*rsEtD5@}W>z1^Bq| zn+$;hI`|(6S0w{i9fv2b9%jyuQFqK-9c>+4ZJ$13bARmY^3=gzR6tlrK%Ae=%GK4; zMM6-}?*ID>0S9MG!L|o(eefv+jw<(EP$;@g!7+K0mD8JXy_@hbP?xB2eWcExDfdrH(-hDUgtm9(mhwK(^tu`4*qFZygcz3|!G zjO$)Mo&2`Mb-hQ)*bI~<-z~!W#Y*H_vZf}1GY>}&cYIFJ^Y5?jD$sMtbYnd z6z%-p;xom-%$ZSxJAM6wyvs5H`J!&Cx&!kbD}$T^PUBrw_EY1iCO?;?r8T{orKIq1 z6)O;lqa_-C_#skx~L#S=lPP37U8B-mSXek+MYhqzb0ilT!2P zwb!{Up;TG|s;PJ0Ixl{)8q6a&+Fc8)cUxQa^U*IeR+5uL2^cqqkBu4R6%>5z>$@Nz zAW&pj&1+a?6?DyRJV!Z(!P`WXjSV&56-Qf5yVp>dSD=vnP)rOA3~~Pxar^1Uh=T*qg@uLIA3x~( z`ud6<)Cd@Uu#JH0w03kTZ61qQ_A%K`)@lvOJRQ8`dv>^#xRkWBd(?7%N}46X0LpAr%igG)>K4esA>Yiq-}_eNJfOTw+c(aQw}oFOG8W!0tRUWwjs zf7O-IGFBQvW8xg&?1}z<&4-Pikf0jdNWnj)F_yASXv~*!-?6Yq?qi+tH`T+EJV^WGCmFs zQMtRjJ32cn#AfuJ*~AAsp=;Na zzhz5TSbR=QoiQVEu45t~AW%_JX^mmLUT!}nv9meDI^%zQrL?rvHJg^6J|bPza&F=C z3$!23sL#O``vq1RViOaS-!m;HWoIzykM@55;B1yHtN;Dm*22>AA{Q5~<+tpP_m6vo z9)F?o-5IzQn(eXph5GBu8xyGFpK10h(t3^Sf4E3{aCDS1(8SJhv^iVo16!F_C*X9K_DSE%2pGIaS%sS1j_c#q z&a3xUevelZ!E=w*I$ey8iBUSfsh}WaJ0_&4q?CMU7jS;MA088P_U zi^q@6=Gr3?yO^Xr$zEK(`~0DR;P;8X-rf#<6F;1M?fV6zR|rBCf)4*KnV%l*cF0`6 zc=4vCir9*R>H6xigfLr>A&dmgHpyeA$3ADfi!F>}qQ%2?;^M=AGe~)XK`rRzpSP zNNo}o6>ab8n%`TWfPTYWC7U5&(j2`$QM2af>s5Te@=3x|vK`ez6e@<{s=|C{48{Vf zv8?xwiik?J@1e8n=CqMmp!4Rm^k=!y{Cv)g>kbzL1qHd2)f5#K*|@p!KY#x0y-S4B zdutdPM#{SC=c^^QSt5|kr=Mc`O4{ezHHT^bx^GZ!?qpsIjfx_oBE+Gfq;!HMqNuE_ z#!_Yzs-&w+g^7u|Bhzg5>3QNhj@t1gP5ftzB9j&wsPj1Kx1URTY^cZxHh63*zmoCm ze{-)y;ds~N$)iWX@Shx)F17a?vGZ2IgV5I#IV8ESkLSXGJ>O8SV-zw)+1lF1CntwO zdBw)Xjg+C}rXuu()e_&cc(C*RZj_%LTfpKB)J(-!n}dUcTfNt3Oks=UHw!8%o^ulG zk&%;YrQ?&3bW}b4t{6lJLwP$}ivCj=Dec05V0=bKWMClb_gDqiz`(%j-p@~AYmHZm zO*^)Jeu4@T^=Q9AEtwY`6b~g2ih{{bzWedsI@+`&PWCLk@a`+Z@zv3?I8OBxm0!c8 z1Ox*%II0>N7oL1g%k`IHW@c9MS~5v_PQsY-!&IxmeO=Lw^#YT4;5V7$+x;o2sbTPt zW=A_KtJjj%LSZYzoNqSmvpo5heTDEg4;A6VCU5HW^mL^?K4@PpXGbO84g4$)#m8^r z;Ek%dypvN?!ZsuP$YK&nzg;B@#T4%X6IoWtT3v(dN}@$MI`5m?)9eSR^6Ki2{@k0w zu-iO6J&Pvp@lu~1%#(gi7ro`0>9fD_xLiTKP$NsT;*pY={UjY!*zmYGL6I>P#hrq5 zaVL((#>SxuXhO&Pn_V6=ewUAq&w2z~a-8Nnz0aiwn%&`%WzJ7r24;q!zOrAsv>;pO z92#DfhrlK8J5+jf2e7%&m~S%N`12xtX{pqjpe0mdNPfuU# z`G+GoIN0%c&ko80JM1M`o0`SC@Bm85y!ZOc&2H|jPtfkJjS9XvHZ%xemGTUn2{>n_ z6?iE3@FCOh-@p5-p6Wz1-O#q|`+bZ?fn7*85vqt=ZB(*dYhwuo8sqC&i&*DW8rDi}}5pcxTk` z=C56Hm1Ou=-R2)Kn zQ8`CFRbbWp_3PIa?(2q3lJ3ONAId8$LtedN<+`1wiEdm(+GxR@Oh=fwOP4Qyg8mQP zwgjL~9p2 z9&SIz#=&8SQfX#prlO&tY;Dc^zOoWJFjCBi%1l_N>fLUJU0^n~u+aV8b?ZA)9iV;^ zj=#*xs_^>z#M8%zcFpkV)2Cw<=6D*fWMZJ4T;k$V$kjyfo`!}-k!CKYudgq%daJAk z(EWz$*X<`;&rXlY>FA*3<0HUwe4GKm>DJz!G9Z{geUh8_m~C*!w1UR%Ft|>zXQX`g zhyidRcLgMd?A`ak`#^a3!&J4wzJIA8-9g);ZEjon9rTSa<_rE0b0qO~S`}PW; zh3BxJsp}r`19*Xb*%?83nT(m41TY=6__+{LnO4coMxEt#nywFa6I;XjCIsUnrT0T> zX8a%Yk5t>}x2aV={a#@9@fks|xy<1LC3J1gCOcc(=+;)bu^&G$xa(<@)<)jCy~E79 zW|y&iw)&uJ4yx=}y&K=gWL*e=8vwhxe#d)ARsXrMVZ3md{}|pb<9CEqn&t0>ai$89 z(9)qSm&4YeFq)X0WODiSHQ^=7@$|?Ip=yyqK*7-G<@Q(o5KA4&ZLu%9NK zR7P5w8Np^2!V9n$VIH7eVN1IImV-W9Vf#a*jV&=foj!&Cez0+qHv#Up<=3;U>R@=BBJI^vl!DN4P20?g36Crq{0&W=H z`bL1;6+`YuL_`FyL*y^(3J-tOrfk9R6N3wa9!4{ zm(*7SPGQxb6U4T4soMIsq!2X?>o^u*D@MX}zQ!oh zi^1kQUf&K64(NXW!P^em8pohw3%EK9;6DPuiS5LPJI+_Fy%0(XxSA&eML|XN47Mm5 z0262ZyM~5_g*Ug?TRapMb!!Ljkh623M@Ik@LmkJ=3W`ibYECm$0Fj35f+$+SxlD&< za%^6)VqIRK%~TIQ*bc0D!*b*n(M$lCMJjV3C!)!|*crp<3220jg+;)s%&6fSf{W!J zSXktlE`9>)>p0&@(kA@rIf+hLv!jy}IS!-1!&dpNwM(3wO7Ev&E1mv@C8TtGdU{%cFfM58Bn)DKH8t0#z1ObXP7{(B z%QQf?JAnOs|F`gvQG<((5qX6CHZmiDo|>Ki#edUe17svTHlpE{K6Z8CF-y9)!xq`e zaAO6wrxZm~Tmz3*B#M6^qC&zlQly0ir83|!i|68zRLxQRk&y#pO|0yE}&@G~qih|n&MRaW`@?DWs_z-x}UD{7{uY|z+W|NVQ*?3(>#IBUR( zR{yWLxnK}2s3kRXHMzG|3SX`IX07mvh)@7FNjWjn`Nsl3zqs6zG%>ih^eW)d;+Nz^ zw~D}%{a>(rIa$A7zME~jvpf(i7y1F%*%X#F$<9Gbj$6JHu=k8K{K0bN^IScH$OZ|}#3FSkVV zL0L7X1gg~8op3QBJ6nKvsNgBVkW*K#_$9QS|n%pRL6q zqRDYt>Vs9qv*V2;nrdffH&E|p1DJ;c5bn2Evihulr#xnU0vkfRfT$1)WcVU4FCkz& za$-{`NVApSWIiE)WeJcV#bu3`t*{%0?ch(9<5;9;fBoW8JJv4IyP_o#2z-SgrLaVo z_xEI-ju?JU(;LiwhY{zell5-uLwfKx(36ta)>AGQCv#l5a3T0d1h8>xW>KhoTDkMV z^excz6#&+Nl0Y#wKCU*s0camPZfUa4#mv(3@{QT!_wZ3f@HoSDGao;qb+JeQ(Tn@+ zl@=NO-B!n4p&c7A?hXz!nZc}K#uuRZibApWjtQcTR9blit` zU&$CP|NXVG5epE*pud3G3Olf7+PmcI(a!sozmu!#KKF0}odqc}lajE3cEJ#R>gd3M zAs{2Mr5QRXuc&C7aA=8n`H~45og&#o)jM}OV5Ee+ca}LEDouY?0rF~WY63JI1O*Wv zmuJ!W2C$hRWHq)m@z>E@F$~uudD!57=ae4wkjqe3X|p`m$FJ_HQPw>a@SeJEGje+? zJ0?85mDa?UouTOM!#a@+v8R}nl$7u;lDpYbTAS+X>YttwP?C|6v87XdX>Z4NTN?>2 zDdA&cVlp*14*?BD%4-`Nx^;I?j}EI35LXadSb^ey|C{jHgrFcRu?KMAgBtrQ1H{{~ z!U6W&?*Fy0@C;Vc$IqV$Zrr#5n5h*yqbshME-!Uw9IGJxRxv;<=$N_^;&k%z^1&E800d@%xd{68H_kdCg2&0}>7U;} zL^{u#}mG&K@eBCr?HHc(@puOJA>o~`THTE%v)z$>#k!nu5TlKs9`?q>C zt~$X8%k%QFo$vYqwG~n4c|ZjUhZqP&J041_e@mNVS!D(4cq_H@)i|5E$Oi(*L#P!M z%Od{6r142fX>s`S2(i~*1DhSIdirAaX4NlRQ3nSH+nHu&*xV!O<4*y@rd@Z)_I{A7 zIST-r?uN5P#j#Ejz?z@18nsC!o`r?IHfiw(nTC2N0BG>R!i&3+jmFRfkP0r(8yS2o zxGRn|*VqVicQ6}5ioKGF2$rDT5Nf-2TZr%*H_`z%x%X{MzgYKRwWMtJ_iuic$J+4{ zYCc?gFSza?l3yF@5IBVpsF(6GcI&1fn^@M7x~q{yFB7KqzLI4HfIsZ@Hu&h$^Bi4X zWHw4oT3BwyGLf&3vd|@G@VsR%>UW=OCx~X1WnsK-{|rcusR;-z{zog9cilm=1m$pP zZZg!FoPr`aKAtS=7z&I!RcSZS9RNoajz1MX58cWrA;@D{Lt~2bCKG~U8kUfdU=djh zONuai31DunLMY+UOc2LpqnD0Y|LbC?c4*VkA)jKXyopQutHbIdIfimGQge&B;@{rN z07$vQ@XW>0k$Ys;N%a%}?y9Z?J3BifWGZ$+;dlgUYx&(=wzZ$(?FtwcjR0I^K z;+nfz5}^Qrh;Y{&Kto*{Eqe`@?`XdJxZ~S5(t?75;G9#S!^7XAhFXi=kD-nz?#5Q( z>4PqPMXhfCPZ#SvP<7q&)1IE5PtX~$0~&7LyjkHeBh$9l4gxsR??9){6}9pO0@1iX z?GxN}43*xyWg4_&*4t2TgiJSo{dx{iE=hbiW^aauh=_okvT2a_h;O?(aj ze-?0*?s-%45itnt)Buab#SKKd-<(SB2Am(W`a?SvSu1fb=eT?s&tqd!9##s9>db$T zx)g+50e8dKz*R82%j|Mbua`o7<~(W;2{`lh-D<-_ZEuc(@}cC{1RtzUq=$+xJ=IA} zx&ts7DZ&dDcgHh*|re8@aSj| zy`FoUDp-4+z+iuAEPL|gNt&o->}*t3YAUVF>5h(8&aa_j9VJ{oP&pBXAfJPIuz$uX z>;F)wgaQR(JYbv7vg_!>#* zqgJe2oX9rnZ5D(m`c>A$gfuiXAkCn;aXKTZ`L;G1HVJVFpa2Pj*Rbmj;^&{AVXQRs z*-3GuhfTVJOCPvF8Uv^!ift#7PAq$}b+c~C%mGk;P8l#W#ZW?OMXWAX->sn8-Ql+n zu4|Pm%+Qa4>J%)86VE!iGG1N6o%{uK3}S}T@~Wz5Wo3exSXje$_0|Rlhlj;Az$l`i zNf;Oys1$tvPOMj?%d15;-P?PItO&IoaO-Zmh?3aAEKGYaTS#$nvBc~SR0$yPKLLoU z+`W4Xq#KD=)2B~)(pqyjEi=Ct-kk^D6`Xu^V`Brf9HKY+W$#9>=2;awgL?qgE}2+w z=$+96k=~@CFFn8@itq}rLKin{4a7jZ+SeU%w8H*2-`wv7Qsta}erTwOofr$WxzER#vtwI(v)N?-^{baJ|N)Bwe8; zfHR24tQ+alE<}PgIj(GN)LtQo^u1$of)!+r3uf4qkWhSy(COyu{kurIMuW3^D z)O9ki+3iT!4;5*x9=R$i=CLA-`0O?By*GKI#5K@e=0P4xeo0>vKNLOdL#Jx$SiMnK2NJCuTLU8-`?XCGZ*+^|x z5Xd8g4N>KwN&{xak~<3=Oybc|TrvD$KgEl$_j{^=7FLd)!?!fyn?~KoEuGNKq25!M z3Jg})_jGpRf=y84&3PB}7hGaGw(fgnn%sziBEKF7sF4k%M3{$3QrQ#DRfVa?Z6hOk zJCZwhyaiRIYeEkJJ#Tf;nuKfHd`%O6ZGcmv{sw>t`kKDJzE(LJTrRYEm1=%c0-CS- z9wOGZMe66;1BJMP-4;1JYnqpr2V#w}5Zw)6vXd&~N5{tv z%SCc$(?C%i;8nRfT7qJ`kEix(DYsE|%J@F6yM)`SMY;DC01u-71Qk_DJ9VwRC#lr? z42!v@b)IJM$YC&S)W_rF#SDGf(owML*QA<(ctFK-&3`f=pp3r|a0uO$$5p6Q1G?9A z_K=9<-Me=`o9B-c@dX}&-ZJ+wlnDJ}21=(vqi4keZ#X7Y81M`OXQTcA^NI)bdh6D$ z0XaRu7V0mZ(c*d5&z3XTN5Plejs8A6JF87C>PwIKtDs!IhCYe*yLRmwderBwVf9cD z>=7!(@ixNVheMz#2fJFsZ$E;<+Orp%^sz^+7dYH=*v7LUV^EOnt!AVjO#5sSXvhBe z@xu&u4;sCYfrUm883-Cf6R&)?RTFO1s6QIaQ%zhcB%BA#!Z7rG*BDv(CnqPUh4bty z{P$9GsrM#V--FoqHRGzfZlNhi&*+owMWt1TxS<)E^?5?)uQr4C7W6aUwOrK>*d!K4 zmG%NY)T~j+*Jc+IA_MP+g80et@$qYz_|@?{qwkDf5bIfumT=q2ujJ)jT3B2x)_=l+ z=u=#`DFgY1zbRIh7?1$7?wMTZw-i5$oIgp%~GTIh`r-9 z8g!iJ6mO6CR_65?;B~5M)uw>znM{z_(dMpAf;}Q`Z1LwZKUxA>yc=dua}Q;n4izP^ z;Ujka^0L)rlaKyR)9cz@-Y+**UxjURRdXm?=W9E8d3(>jj+6NaC6ws&_FYBAK}7cF zdMLU{c`68-aduLE85~v&vA;L>nVRKZZTEUDj(`QG|{owZ{oqEywrQ z#LN;@AQ9@w_JOVmGKvz=HSEd<7>!B38*=`d3!p|*Re|^pA~FH`e#~a$-&hG8q@5A7 zl=cf~Iy`!WDJdzbTda)(kT&J((}y{*ELVRV;^Ynb>b`qO59=-X$4|9<3V38k7nhsb z+KHxeeXu2T`wbh?q)<|RN4kD5Yieq^-wu_)=qX!G)(Qun=*mC=2U+?iC}jhn&}Ox; zN=r+dSy)_R_!{rsn2$Rn#ySIv?(T6{cQ?M1lM}kDA*Z{7t2g)uKm>tQ`5b8Ak--;0 zsZEm7b7}Pl^&6K|FKMSP`Te5~Liy8_U>n#;i~ytrzx&fFTg%j!WIA>U6qZHW8m19AXg9cSr zJvbu0jL$9?_1%#(Jc-J8uSw&+3E)ne?CEh7K*ol z5&%w?{*v1lZCG*aFFs?J(L=|EE=gNw-Px~Zn8ShsYtO_AV-am&XsGr-^?;h51)MxJ9@FE)qjQk`hphF`#7A z(bKbEzFgQ%U0Nnmyq~;hJ%-t#`r-|#ksDuC2sO?*ElsH>_3FLGH$a{*suUZ_@$SA# z0`yKJkscZzt`>{P_m8Zh>=a#rW{yN9eD_d8qWDqu&ztj7C{w$L<%?%q5hu5VR6@kRq29s-qm}EI|2wi9Jc_# zO)SJP8msoJ*rA^P1ZqY#yaNoi4VwNAi=y{b7T9z;1-BDyM}N)F{}rj%%>&5=KbRU3 zB;?nF+lE1xg(XY}ZY*La)w`9i zyx#@n1;-9#9Z>yHhXJpEOL87efSQgNVrIbQ6GO|?KYsZ_2+Nh4%3$V$9b@y+D&Ou0 z?P5icb0rK0inK0jl|Sl9xnlL>eZFxG&+dWW$)B#^p|C*zS0pBnA*6#Iib0|&P&cvd zpvK;cW4ZEu;vJ|a;HZ)@GM3m+RNJH;?K;@nmR|4%9zYD2#fopi+=Lz;Y?FMg_4IwS zOQS7_rk%k!M2hPm2a{pzMuc|@rOCyG7p3v)76p3mu11W*RroUO+_x80KhqE5@Hnc0 zHD)-px3i*?CAzDw_9pGE`_ENinR8*R{@mXuhLr?T&0)O-i&4S=fEN+EJO+|X>d_v6 zoxDB=qgg;BV{mUd>oB;mSt^%(ixr|lkE9~81-f~EXK^1?AcM*$cXCewjb;_e)$aXn zMibhApGr6U<{sq<5h!N9^Z3*hiXO?i~9&Jvh259(sm;iW!e2f37 zP&Geo1`Qo7*CWqyoj?1IPCm(4rVv&@(gG?IqSgZ5JzcwOn-4dd@sk!-Tgz3 zr(0WKeRXxUsY1{%LTtH*WtiP21jVP;#M zwcCpkAa+~G_J@FT|H)=QMyPjk z5S2o_V=yk;g!T0F9&4wK_4E*7;S!Ve9=*I~#{jnxfk@L55)82KX<#|qj%n^FU1w*P z-{I_rwB6rdy&+(|!iVs1#~=RvCi@zk015pFD57TF@$3iS(Q)nrplR8W2lvbR=kaC> zZ7;sY-Md{N{UbV+7;GZ2gh3{~31H-%anq6ZvB3yJw-!@P9wFI>J*cgZK%d?M3m=AK zYwPFDHh*(-^JvEF8cpkqi=4diZ?60&gjaQVd~6Q2E5DIesljb6rKlj}1CYlgpDP$J za64DW8>xFl2q@8fb+9u7{A;a=vD#)e+?&te`H)0 zaD6&f&hEXxam9*J%!&+XJdRRp(8-S+$Ysg!>c5MI#@PlLi2L{N1FI1R87<$V#aIa_ zdDox$E~Ry__1mH3avH7_6z4siG=5z9r1v8vK1kq>AiXpT1A+Fwf>a7(JsL=sy=Dv4 z*KNwg1Ve-O&zHN!_0%}Fc6LA4*RP20LDDL5bl_^OgxN=I=(e|ncorZQ0OgDmWZj=D zD+C~z1I~W-?Aa__d$mhlwEKRGzu!PLILOZcWTF9JtEj5BfyLp5E5Czy)6h1MnF==4 zhV;+@ki7~Zqn~&5LO(-F4X%g$UnE*NF~JCNfafsT!0JJFaL52B-^O#v1`HBMc1|I!2D#s8@Bh+GagBQY~!;`;{&R^qO+q>7?q;y;`p1opuk zG7b0l5afG5Hvf>kEyB}ZaJ~Ua#b~(ZN@6isi&j9q)egM))4aRUgK zysdnG7o<+?6$_x5K;CdauPqbmd3t&M1Sz8vG~P^9H4_ugboH!qB$s>!9x$U8Lr#&8<6n7 z_*`y1%=>SvFE6j)A$=&BWwzyypqVSN z=m@PS^7itPlhfmVmxFj^Kdca8Z~&U{T36vY;37$%QHy!1FHC_F6N~1gD~r{N0%7(ATeD`(NK8 z1*Tf{(p%3Tf$m7$EQ{8N7ThC#!z%oX%F&)Tn0>H*OiY*qT=H&e1{E0rm?AvZ$M4fK zGLTdfl86A9t5xd4%)}(*yr9xY;dkfGOYw^SnV)dO2fL#toI2VJiSOh>_Et%aZv4z& zy=MhO@z0wkq~=|oDunD#)RQnl$eF$GGzbE~IuB7CzMa3%o0*uIE1c$)2Y4ePaEWkp ziu`q++fJA3VP&akWN&S6e{A+UR@*sl1A@kqvxE&|B zXwQJG|ARqL27%sFLniwoV06GnkhypC@VEt`_x^l!+A2U)Lwvl12y2J*V(aJ6iFgt_jA+95KK1B=X>|hXVy(}< z01|dBQlzLwF42QQ)Ip5X=gJv>Ky@ zlG6Rvf!BET9-FabK)HrYs>FYB&0Qt1SVpCclZzJ&V@X&puO8}NU zZI$|PN&nu+0|$g?oI7opc>wpX}T(b>_ex-uTn*|8g@*7L( zgPIG-48tfx`@w}|GDI(#FW!nx`Z{c)4Kag{(0hbEw@lmkPoYT8f4?i+0i&sS^?e?! z8KqO7S*Ty0JImx87cVAcWDtfbV8BkSZD{xec@WTFlwrQzaQi`c-GZ{o0ZJPZV})TO zhg?4tbEH3mojg4~Z8h=X%C!HnU0WR_Q}hFHL22TK4>f!GGy+tVT+rd^0LwG}J?@&B zVFiea+=e4KjS$>w9T`CYUMnz;aH!^{5b{F*{W7=N2bsY4@8pP zMciQJVnM){0$C#R4}cw%d;WQZwYxl=oS29S$5`5EyI}pzgXDhAv2F0>KI08X?44HwK&dUS?J=Bbh5746l0IT34;l>`o@-9Of z-f?&J9ysO~+1W85s)qPjuU}t8a<4$>bKvG3;S_?aM+D3cK63v9e1+gO;1dyvx?VR{ z^|eLj?TZPpOJO8$%XqyHBJvy@N8jQhx0ReLDUS@te}2x5l%H>)U@rdq=d=bn5A#1i zxE=paE&TV(^i=EJz&Ys+wz_s=d24g3_@{=1zW1{Db}3J>p{KoPzkQ@?I?Rr#;KdQ}MV4v*K|dcTH|CX@%rg1)8n>-4z$&h2L$SE)r@T++rSKB2wc z3`XA)HH^IdIaDY5RZ8m1p>fKq=mrm`N+Zby&P&T~zV_M_gYO>7uP*keH2BgNrwT^( zRXdDch|$R6bvM!O%epzaF{L8oMX}wjvcP%yQ~Ln<6MOse(tAWuHCiiNEDgMTqWYS2 zt9hNXWrn@%(jy3>?d;5qD>zD5N5fpcWjitpp_HFI@!q|1IK%;k$jAbrJt2oakONN|8n`Hf z2M@p_Vg~z52|Dr?RPeUZQT=TR^%Q=jW4cYpCM1v}O$?-Eq=_Mo@<-+6M$T-;JFsMG z9e-jX84oCjKOtp>G`;_d9%~kBb0Yf&l;4{nxFkExGZ4M_2wW}{GKd_IF@~xF%5V^* zZ~hau{?xA6Qsx5E{8+P}Bm`Pwfi)x97{ChvSri~?B?Oq>)7yJ>ss>FA(o3qK2~G7m zB-Vjy{=n&Pw0C%Vy6j?~%BS${K zKgxlUfaDMUtTSqRdrM?ovm>l~cmaA398>u?IoYT?+UR>I0=E!?3^_#l;L%q1Euf=m z0*3?Ka{=gnXmK$Q3f4VBBN2;z))0v>LYmQaTF=0M4(a3|Ys()4nnIG$sCa+BwGs&7 zfN$m$(Z|Wb;XNgU?Dd^U9ncehb|>JH=(Yn!2z#xpdf|+B@O!Sx-CKJvE~}g@u}Wo0 zGgb#2lf1*irOPQSYv~>?`D1(LyYMV5YOfg8eUXDLE?jQ=&%Ea~2bNOE)6YlIn%O?( z@oI|p*(ci@)>k<=c4+&8xqJ2RHru!EKq(Q5;-x-z>t2S|F*^GY+}w>{y^sC&jU#Wd z29$c_V4^TH#pL1AkH^D)FMT7IvS*?oWuaQ{fwN!sw)`TH<2cEdE`K^sb*IdTZ_lo1 zdpkZg=~-jmB@2jy*FwhJVuXKaqN$<(-o?6z2+u9ZRQmO(L$uvlgpB4n5ESwj8(LIr zzKQIR&!4|Ni7#G+fU9K+aRgX}2~x@esYs?DG&(tGK&Twxmk2)mhq@x=NgzHuoJ`qEg*q-$WYMG-~Q(|!%-wQ9v;>H?D+UV2stB3KqPMi>@^lV>hgY@ znU6zwxb&bxZ zwvYtpjfZtROLvk}Q=Uuu9kF?yc*1!!0s0zy`Q#TD>ja;C=fb{b^ZeQAF(($a1P_(U zjR52Gc@y^SG`c*sohhh|x(7QtyT={6=E;G3fsRifPqxHmIy7<86LUNKh-1CDoaGUp znZ{jblAIbgF;!2W=g{#7G+`uJ3?!$YzNnuQ!j&LWJRT&4L=VVeh(H`a`6?sBuyhSX zeNrT_ourn(;18z?0ly;WRg{_qSY>>};T#~_rAvxS!r)4FfNG=!>0XF{AOIc-2BS9> zdB8lR^4a^%Gf)SZEjS@zYi$hynQ;1S*$YBsc$~(Y+F0M7=QZ(ih>Ge!hF?unlMI1y z03VawM!!tK6^CVEeN1pR_ks8V>z#F#SXU(i_KU7=l6M0nn9?NN-YnOG7Rez$=hBbl z!=bd`kAvbO9B_8>Xs6M!Y%P)=CMR+B%YV(0yFpq}ula$aCv9S44!`v2hNN6B*)<0G zHl){pHHNf1bQa64;`Z-{f058k>R{JsmRfFW69!hi>p}T~enZFiWC? zV!QM*zqVtiR9i2~(s1Ue-0jNpn4!jd35wIwCMh2)D_wfJJPYMT=5YStdCASzFI{-N zp_tGkFELBvhK>Y!+51Xyfb(*%`B2%_e<tZfr>JsBCHiprbDa2_W(B!QgD zBA(g^W*81~hA*&H-R25`JUQf9&1`Is=rVIKdI50bw|4)(RhiXypUV$k8(-TU!OJ9u2Dn zAYCiyN7z^;&|T9+9up!i7ehB(sS9kVD}>R=Q6ZoSaw;m}R`G6dz|d;=4K?6Z{pC~s z7${x{F6=Ni0-PNSC=u}`{%Lxk-XW_0cdu#qQ=B;Zc-gc`_l4n*$XDl~Q&#)x@9Yj9G{b7P^k0%uPCa z`s4$z`^~;}eaA+~DL_zCzcaa26mm`xZ1QuK$Af)P+N8S5T;5$-GTf z6L*K{=y)|9zOmHqtmaS08Z)2mm%Xy+-nqG8{Zvf zl-m7yTs|5eHVM&4dLRAD#R_^L#UD3f0OQ95%n=evdM%T8SDeTm{b@M#>uwC-z z(kAVa$4n@kA}qHVz`S-RljAxUZmN2k8mDb1u|?Q(!g1RSG5M+9OtX81vC9Y1$NQZ6 z!OL@dZFpOB!U|1UjI42MF^o~c7gaB7sNnKFs3C1JXPub<_&i*>@M z>Zy6x10UbW5O+eu*&A3AX~O2uExa#-ItJX;93aS8gGbqP&AogzY9uGpj~EKKGsIN; zQ!+E7fODY%6QFIlKr?i2C6OO}S?>OKX@?0;955y>%x;F3OW0|7~oq z;$r;Ft3>*KzK$bb2%#&qH_3;@yU#zLJpnl8=+Mvhu+)IulAqtP&f5(;;E2b$>WS@> z(C2e6-@cu7?~}YC&%%6VRcGgD7cGP1wt|_gmcOxiGa^0XZp*2jV;@#b&;_G?9cVw{ z)O=)P?>XTeKTE5xzB@dc`#f2vX<@PKJDFt2^ZYgwGY+Jb99gg3cAf5FnDiBT^lbTz zt0gU;zQIp2xZbr5=lb=R!q>YNt%u1@K40!0V3$j29LH~iIC4qi($kmi_2V}Ale%>Y-UO>&;;MNsVP9*vgt55aL;;4e|*M3LS$*3*qQ zVwr9PgVdox1wl|tMfKqz&#l>@qIGDL^3O*$=fIIIL9$Yjff0s5W+*rq6=o_aEzKNO zrT2lJkr7EsQxSUxz(zyYlOX3RAt#_zqm|-+x~o6%2IZ%MLk|QaO!F16B(T z7%$U-x&YZA!q|j2c!L1{+Jqt2Bk?`sX+a<;1v(YSBnxHugU!uNP14A>1Hedav+hHG znuC0PqMHd|S$0B0%vL1r0p>CC1q;X_nBR~b8B7<^V1@WQRW%6+9y^m@eK&nZ3>)-$ zRcSogLz{lox8~yu>HC{E)01($EH2Ht|89**xk7%s*W2VSIVLUwG9{gh+Cmgyzt8^M#c;zbM27P4N!-N}4TO{$a@)?aUrq|h@(%gM@4 zdhHvv!(sB$Fw$HV)gWF-@HjcCGBkIA^i$?o<*dlaUQ50XmS%hga#mF&hl zJTZ!Nep*>i)mIn2MWU%65TN(O(*%ibvC*MmbzH0`1u|#>=g^A^q;O3btXBGx&7nCMLnC?K^|OW5zo z72JaA?!QJxMsCev@G=pcH^I{Nm&ucU{7`9eBO<*OLy@(VPaW=+&JxQKlNX4}I(9LCbV6oYK1g=kJRJ4W*`$?Qd@rnj)F2L z@$A#WK+J6{p>e2(7p-3yzBO}j5G0g<2xuGXk5R-u6YWvVKsfUhIbMaA+4B)T180KM z$mmb@$c!E;vGN!LFa{R-0}_POotb|Ohq=0B4L7|&b4AWha5fhV*${!?K>goGO|Gqv zGmKAY+d(##Z1^FPd|t4V@jsz66Xa4uOrvVolWE?s_akN+06ac^@7)POB?K8t3qTr2 zSxqf6rx)y;PnY!4SJ%MSCVP8fWu`?IPT;Ef-hrMyN^XM)p#g@*3YZ{f%}$ok>JsA2 z?f(u^Gw;8jkQh8mTUXv&q8p^M)Yt91B`3z6?tLaTXg?WfH{ZE>ZS$&)b&Wk4~_SSELhcqYLW$4r~uY^%btZnm?zIR(9q#eaNQZQ15V>{yi19b zVDamhL?*Ocw1m* z5sYWc_`NX}W=qEHhOe<&s;)}D6r-sn;r=@uaC3=eTUc0_IatLVETw>Hr9l+I3F&|X z+O;lA7eJ0HvwzXED9*1%q{2J)M+A=Z;5fg{affcKR!yj~T79p~xQW8(JD5;NQlzw? z-}pW|bt&Q*!SQTU8=I29bVd*tzemk^2ZQrodou(*9@EU0>J(mpZ=qm&bNTgW_@)6O zS^<71GX9Gfu^Os^|B9rS>b8+0iROFf{<7q9%Lh4XXuoUrbB~d>4kIQpuG!`nT@Ha1^36 z|9roTff``E79LCugN$B%dBc0=?nOR6F)24f_uh=+36^hZK}$0Mg5mu_mW-Pa%y4kp?`iCRDT#83G|iV6(ZzKJsM!I(EQG#cX-) zR71k)$&E?b1JnkqZ{Px?zuE{VAX1>iclCi+_b`JLE*qRiR+n=8w?soeh3%?UQ#Cv$ z8g!V(PEuQduPIs?6XvcOeD4Vj1}{RPAEmj{BS>Cj9>&Jam!rvcksF_0UOxO`9R)J5 zJHz^0M<*`bU%al>yVqx9Y8O9*C`>r85E}0$A?D(BF{N9#%$pR;3jhNmZg3|hbye7%VCbv-cJvdu1zI z$ljdi)#vm5o^$^G{c-*|*B|eC*X{1Uuj@6Q&&PTJU=H@8V1SfSD-th2YH4DUCXCAV zi7YAMw_ej_*BvKS^WWW!%Frkw8)Z%P4g_zxq^Z{fsAsEs;*^fZ>bxqJMMu-6aq0$i z0comxnt&Y+o}WPxFbDG%ZRC9HvmJiEgE%4_V>=jH3gDXHA1BGmw8KZ^rA+49?9zYp4*HmPxQ0HB> zo5GFt{?mhDcx&F`BL7f1l5OM3&Yh)sIE*KZw}5W^DOJxvGev(cVE+?SWzd! z>QJrN^Y~6Wxi&+`k)=`Stf=T^Qsp1_!H0`zWn~4V@v%8<#bO)aB?HM+m`07l!oX+& zk9uRPjHi8~R*mH|`_D;OZB;s!?5lrCKYJ_Z(;aXTQEv zM6<}zCPS>?c{=nzdwW&2h$M{x?T7dXOvn|%T1RMT<0!JEHfHUL4EjUJRRM=IF6yY{ z96VcJ#6G3j2SY247=V}7zC4Tt=(&-6<=XoC`W(bXWQZwYjpPZ66~3xNm&a+`R{WcSUyIV5966x(Z?X@eA4?Xq(L0Y7>&{On8c*!&+encZozjC-$Tz9o?mL*oFRhg0C^}aJpoK&ZtnMEW^_QsT~oyHRRx7KN+ zV#69-r>BK_No$nhLpIk?)XbKHHVu{pvY*V{Dtq_Wt%!+Hn;=QrU3^gabYTN$`=Mam zyB)91CTB3BT6(XGQU+w$61`1GDbsa*J2!w6&P7|iM<5bN=IR51!2 zIG}H!i{!y;ybOJg6S2nV7aT)Dno=8dR5O}BNEGXO`iBcB={>jSF9EjyXu086W>ivM z8HXKMPBL~CWS>IY$YF&w!NkNgjIjpg?wx`?AqCji5!+pLN-?9hE+qcVF<0xjxCnqx z?#36+0RS=(X&H01B^shI7pwcvk6e@ga_hOYe=MkRnE})BWTWqgPuYn zN}c*h13xJEkgTJW6KK@f@f26rK(h-52eMKGYy_Z%`#(joyD|nm)r+Us;y5mqN8P~o zP&PD_UDvof>H0(Xc6qT>3Pd7FXB50BE9Cjolcb%@9sjaw4gxYE^0XZZ$WPtQ0)ScP4#Z%?N6M~c@y z?R76l^B5Axeo7VyDp)T3#u!Dz7@FW?V$w?Cm)8yUm0^fa+tc5s|RJTKFYzNeJw`3#9-#FAkZPwfV2+hY6C9> z5G!F_50@IThi-mZxD`kRtw!=FLE^(w4Hl;8cnE}2DCKJ(94zS&u+#wIA_Cced3c8u ziS2`UGqqK11yH#GzoK2}fjTG9u)^j9mXJgD5`rbqFD`z0sB1NlZ6teNQvz+EK?q79 z{a!zW#b{lS3TpI8%7i0!{z{6ZT9mg2c;D4QkT@}Fz zb#Lc>UZ0w|sIS55T6}z~bSV9+@GGUyqiHyC0RloQwh_g`*t7ZjUmsq7OQsptgfETx zV3$FgTJ)$jjIYkjI+x`pjc>q0#yeahIg!&|hZAK7watL_4~FF&kT(l^`3P^zdPaTbJQ z${uSb253)R;iD@cS&zFJ?xoHsm;#9pAhgUnf2d<%XxI$R&^8r%~oi6*v+WmM?C%?ptQWA9s zi!%lcmOz|m^{<7l;d_%^0me)BZEoy_j;ZVgJ89xj>a ziYQ=g*#7IN{@yZRSvlubW{3i?{PEY^d9A1|~mR#!Q!2lvMAm%YDs>%C#P8wVds z;WDm-rK985LM(c5uK0cb9F4FJ?Oa6{QR)7pO7g8F!5fdFs5h8R4I)L>EFm$BnjhQs zw%wI=C1G!hOIMDL_ZawZhd5W*zaEX_z&3t|n2D8ZST6Yky0)wws%4a&8BN+OZX%^{A`;-8?Di{o+F1W&u%dlK*)Ax*pn1NK{aN zzpwbNcm#yKO=i?&WXuUq3yDCl4Yx(U?;TK~P{jadtKVm&_V&aA5%h>(oHALJfJzls za^t);gslMdxSp+OsLsnB;vMRhZ3F~H@jRA*;s#K5=^~Z8W96Ut# zukat*T^H22xHn;rIX5~i*$O0;Fub{86!?y_+UrSMj_g;`sH{gx-T%B(piFQ zqeH`FEX)a%>jO0v=8c@Ti^`q9nRE(Xh_HO>SX0c3<#To7aJvXs;yAkxvH>7`bvG*V z?^~ClrPXX867rW&YvON@yiZE{1;XKGpe1&ze-P@I#iC~6j%JjDWjJgc#OK@!%nOkJ zb^Wh5Y_h*fb#yxyoD8a*WcygaBIon8KLZO9XrEk~5T823t1T-brA?*bqIU|K-DMg-rR&GQuqzwm$>7|BHEQnq=zF2jt=X81~3aA@d%5M3mUt!Ii|NF(U9*YEh!3YfOa|7I!07Q_Wjb_`rm zxf1kE_Lj4I=qT9G$2EBjB-FB1Ix3yzw`+lL+WPzP74cYJA{&K{>xMx`>A(sgua3E9 z@-iTv8;+37e(j;xYkayfl`)K_zBa<}z{Z5^+qc(2dm*!vb@xDxglUIB8}>^od*zD2 z=Q=cyrEqE&85+kiTkcp+Rzv$qIVEwv8Q2AZh;z&R3NQz^%Mh4TGYs`5s?|EE*jLnL zb?Cm6Ya0kqc#-^V6R+n;E(%9joC0F7mb3(<5m^NQjU+97q^D;J;WTZEO9L=Z`!64X z0ta{-{MB`kRf3qKPuiJA>1rmxcLw@@haczA=Nc%sP|_Mu=l^uG?CGE! z@0j5gV9l@mn2g60pk&0-vbX0(29Pz6iYU6_(>6rb53F}{N6w#y9`d35zD&x#0q-7x zhwYQ^6s~5g7X3dn^yqI4Jxf-`HJqkW~5lO`pBo6TCbb*H^VW_4Yr| z`Cytw>77TX2qTR)UR2XfS!<30L0jDRo)lA~boAm>5y^qCX8t-H7`fH1vSnOGt{|wH z`+0iQQR}Wc1-27OlO2GJMhF%^d_Vxq&-xe?027q-Dkewg06d^j4!ZO2YM&jmAM)IBTS4!#9*%i-A*NC4tZzxa}vF?6pfwQ`r4oYIm~%BB^IgZOt&K#Km>BzF?X?dNlv7 zjp1nbY3JiD`!;1mO$`upG(sjTYf=_aoc2gG6zsz?APG0Iw~x-uyrI7J>zBvzF8iW2 zrX_S=i1@r!Ax~@t1T0n(yRiDbrj99qj8Dkn1U~~4J-vjQl_v;`VJkvI^f$2U^?$(wV^y7o zPF&nS>3b+4*VYjkhn7D`fVs)8{HcHpTfoPaT+YA?uyWJa$YiX@^tGTEg6MkEl-d5& zcw4O)5B60gIUsQM3JP(oLlYS|nqLkXOzG}{L{ZdOOiu@AL7vdx;v#c_u)flzG zZ5VPzWDZ^Cj`dJr9fFr=3%D%Yari*DEW0VdGJ*))5eqvgo}dMRtXa`LYZ5z%ScKJ> zPG%i;butkF^Z_t0Gy=DZwNeOjs#}me*6IHQEl#nmc^vjc;r~=Igxjy*Y?(l=0IEJV zj%+p0m?#kxfMw|%NT{oJhBq*zo#4w3{drs+W4VLn~MT6LW6FAKVJVXyME zA7kIJhsn8R{vK`xy2Tkpw@Nwv>QYu#bN*SZvGa$TjdNwXjjoWiT1vag&w)6tKR5tAp}y ze@UVs>`D4H31dt#%l!BYx**{9Bd%-L8h&5rfZMdOzshW-ROdl*pH;P4ov}_0P1aLb zLnTE@kz^DiBG0q1I6?f3=&O(fswVxiN8tHF0=(22W?qLwL>?3?5h6X{VlY<7^M8$J zjXL7*FT+3^2P+&it`2FbA(TI?0vfvui;Jr_H`f3BiA?o9z65rG3?DGtA%TZrN?`1W z@Ph(%vj6MjHF(r-^W4>qfK&{Iih2$NCCv45u=U~-=s-C1gJQ=TF*obH@PRDY4N@&O} zNpVFS^d$&UEE>1|)1NQUdo*LtTP;Wwl$!+5=WAqr)-=uCYZLHX*9Re^ z@#5KM00RH$SBzB6WLRqp+;Ng|(v}Z?_FK)l4Ub|)uW@i_ibl-u46B`w`h`bg-s+I# zZy>q{Gw=7~beaF9xO;_wkS;`>eJAAo*m%bD2hp6PwL)H=Z_1i$Aq2AQsdygIltJIf zd79G`?lwU(h$GD%VlozGwvp!+pGUrEzZRs&O8`n!h(1mC)o_=_(kpSbX9G7R9;5W$ z#Fh!xf&z$4{3LIvVr8;~HIErPG4*Np01gV^etLfPX>Vp|hSmIL#5%q+EJSh(S`RgX z1pk)ju9fc7Q5SY8*z@pGT#XFi+=V6xAytD@5BO?6al^CLt#V=pYY5xjzT8eg6Ort~ z2VpKgSfCXYMNYdQCGEGgun2~yat&%9uKN{A7TfckS;7s#{RNNMv8iwr6eBUA;1!X4 z1aJG@#j#%G=;$bHRR?v{)>Iaxr_r{IHpQMMfd4tZ+$gvl)&~Soipeugj{ratyb8k; zNRmI|PQ8QptpMPxn?HiD5aUe%H5gUr^*6Kvaku}WPb|Ox$<6q#l~~|EuCRy6RA$e0 zJ?1SX)yJN1QI*pl!0sIHz+FNf_XU7Za9+%rnZhtlFsPrPQx^zj+@=43scRb6DjiUX%VBl*U zX+$Vk%FBfdnFVXU-gMH5XU^3x>zf}Jym7um^vi8sYBftKMqXADg08OO;OJ`Q}rloNAKtLv>@(AC>rnMmFznn zf9a8<1rT8kXlEfji&s!E7(jBwdyixi0aR)Qob>!ncb~+BghpUgfLKPnLo-PF-$6mhVAe`$kmS@AU;MV0%VER+%LDPw61Jnx#5Ks|Z5pRSYMZhlk6wa>ZSq zf(Q>f#88l_AwUvh4udl=z%um$vQ!Ww87MCS#F2&1h;)>DKH*@?b-$8(12TQ}t!UyQ z1QJObs|3$G31q?*LtN!I7wFZO4v)_2u^E z4m^raf74yVj?lO~8D9e~vgZN`jpu zogTvi@>+Qz{tDyZWq438-P2!d0WZVNAP+ZW1Cc=2{n9N#>^lTIUq+D@>)$o-?L{#Z z5C3zStNlZ-{^x&Xu-g#F;QwC#hh~)jBPYxM|HG1r6xA47BH-LbnV7fULGm*GbH&~U z_aF*3e(a4)WH|D*|MBMhGw5`rRTlpe)GW$?G<%9l*8hEyGKI8IeoDIsDEcEA!vA~y z|EFj%GX8&ia@g*>y~A6K?|9mSOj{hYRA`cK^EE$BR6gT58u~2N`_SC-*U_)GN&rdk zZ2pPlUtFwQUI?ziz^32oh-GvTM>MzBg$N&7kABm$u}Ll|Ya2e^BN4X1b7Ew?(W|~V zKMPvN%3teI@RN7IH1sG%m|<&KJ*DC*yhf$VymIw7hJ}hs?Ad?U2lyppAojnmwqyR= zx3N)dq~Yr)m)xa!Y_U#vkQyQrTP(z_|^)mG4KlpOJ}PT*-TxJ=|rKvM_!%;3&Ki;LvF8< zn5!a94&veBGJ8BrcLg25C5fzNfP_gWqd{~JUTt(zJP+T~TNwY3MUm%H5Lu>BjZVHA zH17g+&oJ@_2TUevtBjUJjcav@bN*6@UjMN%9lRNUVdZuq*3F@HcrZriC-d&xSB|VJ z45>c*n{APieY4Bz0wQjgqk2EcrocvVpj$-oTKTb(;y!!V@-le#0=rvfHg_^c z^RL=IOcYc)D@m-mCnA2lJd=Wl_4u}}ngdI!2s^IJol+Z|P`CA=3B;k7VLj~kO8gnR z-YdGbsS5O;0H1e8_zvrTMTZKptseV-79c=jE(eim6%;o&gTSR=`nIcksO-Y`Kr`F3 zGpvcuCp|fSD*cC){`1y^>*a4DaDM$O5Zhq$NPdZXBjpYnswl^Z2E+j<|Jh4d)4DtS zm1gQ4A^(5#1a#^Db0J?SF1H(p2yL)9<0T-C<21v;X)3oQPV;AEk}k4aUCufGnTfgd-YVHRVV?g9F=$#q z+mv%Q;h@&J;Bm+M&fCO9-dtUh9~(3BB_{`_I$s%qdOSLDa@wuF{oiNXpUZf@+3;+t zm+0we;hO8ItGKvGB7e5`JuG@wzAK|=r^H7S4hz1IWOi4f&ER;b>_?C*>>A^;ePdo-9ff+O{&8q@i$V|&5xs6N zTV)#6;)VjrTnnei{pYG1zw(M!cJ*kdqt=h^r>UtLdz=+GRh4b+m|w-Ww=q0Ea>_KS zL|Is}+_V{aaq^PH5z@h2@-(>R!DN6eF;!bzyspkc0C|J1`!b)I+1@kId&TBHWZFvg zOY80l0X0sJfL)Ra9}lV(r0&amYjY)D6+P!9bsH3UX-*b;`PaMaYKJ64CdYI^>WUum zMSHkC)3E;Lo%VFQM|-{d*Tpd;zKncD#dt1+n@rS$hv8)J&&g)&(JWft>acVc$hlGA z%G0Qu$s>T5T1|Bphz8s6zsr;6&cQNTHn$-?|D0Odr^A$ch*u}Cw$4;1J#hZU zPjrySdkA<6IK6p2D`t+XJT)0ZO6=cm>>Kt4pw}ud_ogjVW1ryp!R$Fu=GlUc#Z!%# z(f5oDT9lmaA403YAK!M6>!$N-^cmw#xFgJH*@7(s5g~w|$ofQz+`bl@tCPMRNqJd| zqVdg`ivezQ``KTu$p{5WU0ZBYMKZA!4SchEE5^UwPJ**_iwOZ}3?nV_hsIkCqtJO_cG0DEi*tZyiZ?}_|~t2mlQ7=?nsPnn*<>V zS6(WfNJKmP^I!%8ZhYx>iT?BRD}8#`keU1Wq<-r%}vozU~{AG%vNpj z=^*xa@F7oZq;&*{KA{#6#sFuJ1HzZ*S5%;1`o)@xLP3Z5*LGS2{LFg^!#iyereMrU zu6E<9Xb*Imc(y`$JJm~nHy{{SwWE~>hfa!xv)X&JKaEX!#tHwqBINEB%gf3uP2#II zmE)^E6JW!mVkwodAYaqiTheYzBFn6J0YZ6`h-<5Uu;d4xHC<)F-6zixaQ$M!j0giO zbsEI%5B`MZx~_fEqO$m3Vir-lRYUQf=81C_hgYQ{^Z6qjq1`5X?qQg#F>f#-krb1ndThI06@a@#-x-q zmy<&<3s}_^svIS!K2Q*&P>v_F7)^qBv9y(^8}=+YQSrWe7H>j?>3i_&5DwU{Xj@B zK3-t>_x*AM{ra)%Ql)rec)Z?3G=V7%4W51-*{O_9!yKq1&Lpj>D#U_+FylvemxLroz#@Fs z6ILqRlVcl{L=>}<$?6{i%Y)Q^d*B9Kfs8-u?OaK$-Ml!Hqg2p_*5W8}IP6dL* zoSmiARWgjW^tWB<-Q7cLy8rC_w&6Zh)lghX(^BDQ+fT`y08xVzBR-<7R>QcnVVF== z)zm#@=$Hr96R3V-({Hg@uCGO8wuH`&eZC5Jkr>PiML9YI0D7rGwS7b+-ssbcs|$32 zyO9*>WeDfC_uEVLi2*mR-Y3#?a)`Y>v$8Wm&3%v$ln4^Aq zN6^799l^;Op&)O1sas7717L}K;Wl3&IP4H@aNH;5fah(^Om7fFQeBR!x-*>1MD2$d zg{tDo5lK8iQy3`Q_d=ng^PP@VB9)}CQ2lG{LbpgNYTV`RZS$&s6zg#5Y86J8az2bX zc&wxRXC$8&JT`E~(r~-z+5RKbG%1##Dab1THi3PqM=~)M4b`Ef*xVA!)c>a~v@EZP zSEj&-0ENJJ0wT}QzvtaFIM|T}a^gexhbUwbI&4#9L*bt9YO)Os%#M|DPL^2~z;Tgq z#p=!%HZbxCl9i3-1%SF7g#sXVoAsHPH`|@tzo$iI942vlznZC={?T=S%0NNMbM?W3nieBaVBN!?TD$?z-Q7|L;p#i^R2 zy&VixD?rYmt`7oi-MT?H3h|r6RsH#`VrH@^zKyZ5E2u?bU!M#mKrqh0-){ij2cNI= z!A6IY%C$cw&~>RdoP{JW^$wR@;G>g42NWK8St4D{FSM)D84(_zyyH^0Sx~Ht7UcfT zIQ0FGEh@%1S zELre8k9-v`N{A`&AK>@gd9eVx5=T_u!`vcmsCi@t-5`l?@FP zUF@Hdk@k$#^Pti^`*j?Zn|e9I%mPlYlO?sIAC>V4*i?Se;t;u*!@1+OidnPxgQV^m z)30&|`Qa*Ow8YD*aGJH;yLW#nZs!dhM1AiK8GQBZL=W#%-6Nd1LDs&VRNv zNo^+t?sv$XX=K#d|5Ib62t3qA`G5ZS(PW~v^L|l^=SK4J6IdN%3e;=Ye+FXZZ0+>& zmVcE5@Am~Z2dZ*^E&II4aw3nC{tX%j8JO3QHYg1!RPzeedD zskZx%gMFT{J3H+Xx^yT%FwIaetuF00%Jlp06HBP532u`>7c~iA6WLW=dhx8ciJ|i8 z74d%$X22&WS4ZdDF*6Nv__JfgB#-h}YVcJUtBG$4XN1#tft6jEq@$%Zvag__P6^SL z5A4qg#}Kp9)-H7BbJ`vff0T5e+_!?Si>^ntx982|MfE?8ugd_ToVl21rcpIMN)Lis z_n%AEJ&3b-*DyM`nS>vgu^!-oetk^nNps$Ei?7^&$AQ}M!}Md zqY>JRe%}XS^3f7mS{dJ7GxHYyb(lW;ldY;Wywu7he_5xbd8iEwf^AlhKP5rX2O3KE z=rCsjX;OZrOUJag;)dIHnLUo7BFDjrdOSbAt)W5u)s_<3U6qZ!S>xda=+vM|fLFdf z_EkUX&}7^Cu}@FYX@2PD&l4FL{xIgGAri8p83#JQYO+F)lsD}Q3oMY(&B2|B6d=jTlBbaq$@(m*(2?5c? z7YK7J&rvL+iSFbd`qA_wDQi&-~?jw#w`^R(qz7 zxHm2nP$>>SksirIq1w3(iA$bdechWJAU#oz+O~0OwVS}cdhgy?neAt>k3*HetEPKo!%9weCq_UjT|QEWQM>tM)VXpg37*Q(+n_vqCeovB@;5zYLO=D@4>1 z19L4ob#D14M_-^T0&es&neouazJ2e0M(}=ObR`L@yYAa&;$Si4XtMr;nC=646$?OK z?Px(TvNd^-{%0MV79nF*mRJqO$4$IL0&|%oD%HAXPvSoSr=#BJf)D8~HL-_nrHly%e zs`7WAGHm2om&l(K&RS{j)A(V53^ zsj!tfZJdri9~--Brx`_^au7;*CZxvjCcSB{#nJo34ke)iM|Qs#2oMpz{$P+`$b+WfCdb2X>l;(ZZHAQGIdBn}tZ7)OtYRw6bQO_Q7)SjHCTU8_3>f7tJcopfF`7-2ijCb}r%b)A!o1I;QF zmqX)aM^EP=24sB=zpz(4bYiyY^%7EWLu+9QkS$^Y82VWya}bA&i5Jj7&DObbA>||B zZA~5Bm)#udf}!bV%yE9wzlGGJfXzZUgEYVp63p=}2!UH>^RbyVHRn|cc>jj$FKGK= z%xsSuo7&nDoZPlkTR+)+maCZ^|C~GmP9DFWQ92+i3#T$2Klx!299AcaIOkip7Z5Kr z6p*?@IRUHUr)lQ}xu3xm_)@>-S{M!yB=Cw z!C1%c>>OL+cAcTul(0wib!~HFd)GfN=ijgbM^1`-dmUYh07nOe&F`)r%xo>U z;V5nHtyNVADkwn?P~~8a&^!r=@T+HD7d{?P95W4`Qi^5~=wPZQ_`|k?nqRsFOZ8&6M0ft3EFwMs17`Q>G{=R6so3TTB_wLD^x1Y zfOA6d$gj>nKXZ6Z15J*+|7NF3#lFLa15GG%{;{VSLenuL+F#e3`3>+?8aKLn`+LRx zlBLtNOwT{mD0F?7u2bCi$aVYqNybb2pEfI4@iMM|erhVcP>u$7I{-u^b@nOMKj70= z5>h8zl;{X&1GgUws3`S47ZeIMwBfFYg1tPEBHV=On%U?mYi>h4O?i2ge^^k^aRPYW zJ5%wanKQgey53zvC*$@xX@1g9*by1U@GANn&_1#8F)@CBwik|+&ew(l5_36M*T=JV zs@+cx$T2xNRFX1{;$nT*ld3y1*3u+-`Gi8jAAl$xT6r2I5@Q3pYuruOmT*unPu6&D z{F%UkvTexs>-C_2>;JMv4c&*pBd^Ub6)ji!&`>qpqV0(%7ruqZ7P?m}$Ms(J@DK`l z;d={pkPlW&Hcw8u7cH$iYdMxSPc3!gs&)sKL{a*BG%%QJypftgA>6Kddb4GZ3@pz` z=pTg7ED*6xH}J4oa)FNzl3#{{UISzbxmUF8*(wWkXlLP>#mtfUc|%y`_zBfj zO`jiCNS%-fKj+)Nas~0uPGS>iR6H8a$A@EC7$80EwB9FYd%c3!+4W4QH%A#eG@7Ps zv5DSO8a5S{v3+VG!ea8S%pW2mvygxYC3xq|XAK6JPwHm<8B&Fbz{`Q%S>1tZVmza{ zoi0Ad4~Z;+vJ@pzwpp*49s8eB!}I5W^S0xOUU-fu3p3R%_MfYE&p#9^+ZQ@xsCwMh zGhGi;!wg}bla%2wSii};od5l>QuD!`iKO_>_1f@%ArKktq#dDlG984HR}iui&lF2pig2l3jWa;UMt031i{-@?;7kPMBX|zCy=crc*%r=M5Y- z^{rR@_c?g>!NwZ5I&5nG^s|bY=G+@qlZAD|NiXGHxj~hhPj$x~|1IkcVn-u&i!s&C zPv$0}Wiol}P&i#~H*!<_?7d=eCCxoG(@6+gD2{;)Mz9cT>g=tep_)IbhCgH*E+;h@`>6s!2f-3lwF_+Q%-gs|5-aK9vxQ}VbJ^eGM-uNdM}KI>KS7Km44q+eo)A9; zT2LUCtj`kl3cpVZGu8*(O`pD{B3v7Rd zgVK6bB=Pn!9%>Ss@Wtj_3-YEQszu-zK!KoMOpuD8C0zWWX zc%Frs;fsmjRTQ919KPpOvlPG}sKf0mTN%XW&t6izpvS%NieW{lTHeoh6gb2h>e;!rHfurc<&q|VKHE!v1U(GTo+aXXD|A0u0 z?V0H#CVYGZIUqB_q*81i8@O`LgUyfsd$;GFqvuk}l^Mc*zc9qWU*oM;cjmHTpbNrf zkSzi1>wid|OM^F`^)g<>CS1SnI&7?ac zwl6GPk%N!_S(Xl02NRR}N^YT=PZRoHeLKJ9?9stY3+&Cv0DxzaRqybPXZt=UX}E^* znl|zOdv^~V5Fdf@?!fWffVx1)3iaHj)To_mVl2&7KWSbJVDK*OTq#H18rMEu7@74b9yNau<&mKQ1lDc+7 zBJC|79#;T5O(fs-@qv0(N+17Fb+@in`}FPsDatH0eYvN)3_;3#j*p)_SMAjFO4&JQ zSkW!EXfMwkbE;2W`)-lq^VW>uV#9exNIg*C8h-TIreAt0O@q#tPnCBp^;1&C|I`a# z>{ld6$d1I<3lvBM+g$hxgfuAqI3G++>swJfnc8f2D3N^1H+M)tRg_mn`OWMVR-oh` z2qB+aTc+6gFQQ`QlmxQbuUCA;TMUFk5Wr$(80{fbf(lzY1?BJP4K$@`OeZ}?ZI%?X_T1$5q1y5FB*rOE0UwAr6azS`yZr}Spd+sJlk_zC3Gby}RZ+KLRkpLL&(+Z0A6_SDS=R!+FX zF&)V@Zk>bQ%%UZG9@g_M?pXPqKi~D;B^koPN;q2nh(a0LZnJL^a`|;G?7bIXLk-O73n5$FjtJ_*;!4E}9@&Ud7`|z0I}Z7AszGro$Pi!unRu zY@p};FE3YTPNS(wyt5y2fy>*;#46edQOMC(Jk-Yde?JNP5{QWtZ4>y-SC*{e3?_-U8Hgx;1XWxr;B z2w|W&e9vrhN=ndP)W^#n=8sgqVp{tT@}YigR)j9WJISkYONt&w1{w`b~GgV&ar zvDnajq92n}@#6*swKAzJ9F_iuKxq56-`Sd4$c35^#bvdT(T@&O!j)6@R=pxuzXYN` ze>8J`=9vE|G_qTHKG};~D=Vugm`dlEifZn3HmJ{6Cr@HJjJ26UY;=FsdiQh(IKH#j z%zZtnM2Og`LxTfQ--f2m1L${=MRknnyh3Nn}a)q7f4;)+!!e?OW|Sm z9mF@JDQRKoI*whu*>cR?8+)x8%$0+VB6X*2t`&WB>!7bD;b%3u;k7hAEL}_z$^8AH4h_6+tEJj|Q zPkzo)TJq#$Ny$|d@$$0&?-X~b6TM>0Dy8^lYQlOw;BM9z;?Yp!zhA`{zL83o_|W+R zj=GnpCzdcdcAz8`KMxO)sNcoC)2S&(Ty#F}THeKrsq>zDYP-9QZ{tIo za|@*W{KS)0SW%5%EeX&cJOJ@PUKfwNsVM`dqy&6~v$et&zrXY-`kn1Ah8!se2g?%5 z0O(YmN4YDLRVcWtaq)soXAkCgdgms}@Zm3yW=>fGu`wV8427CrwXPI$cvtu)>kFQ6 z3L&bm)K`=`)f*kf>${?~?K)sx)_1U%egX=bJqhYq3AYRbrHdc}!QeckWLom23*u9b=%-3V-=_r}8F{orZc zeerbD_#8b&KPvLBbBx#YbI6(s>|Me@?bdCFd#4ohRHCj$Fz;Uca{IVdwM4(aF9kjh z=M*O=r@o8*iN9rKO(eMVW_|RZ%zj)?tE2fAd6^V)MQWx9LtD^Ljs=EP@Nb4kq-$|7 zgfp&Ad*auNdZAF1@kSiRtY8v+x@k04VH+^95V%phGk;Lm;ks@66Wv)3ds`Mu@lB$j=6oKl9Y1-pI4Z7&XSyv68Vm_^6V9Ap%} z={J`eDYocr<<7itKbo;HHz1C@^4xbM|}_(-o^3;9qp^XfR>IQscaL`Qe` z3-=c6N^6@r$2%tHBNhMdFy7EevpHY--!HkdnrwX&6ZrJI^-EJ1P!i$a7#VQxoy?D0s-AkI zH}RgKb){bMWxecoWlp^n`q03bNTW z!X(49(&n(LsY&O(p3eK8ixVqa} znJx<-xmG948psso;%m^OWGxr_EJQ_#X$W!Wc(VIpIr{r3pZK?V;Y%MMV!PXiSfWemyT}R?KSE5X*_HOIV^>RrJ3Pi zSmGpx;;Ekh#2vj4UnMPGbNf*7X&bv2YXSo&Mt$}~XVaAi&x}=@{%(H0 z5iT*bsXZ@zzlomsa&z4iqRBb&vHTso!i2IU?M84d}@I#`zp~xn)zUdb_HX2&}MkO$9Q1UX7_0w1P2-Vn- zL7Yu->iug;bN=Yt&EHF!`aWW4(uHpXnop)@d$Xa)8jbN2Couf>-#M_%=PUMLrI%Ha zsMZPfcCLoCwaFn;osmaQ`;H=A9SsUI^ylqkS?aoVg{1fIL&YCjSec7ISyV?akv(%$hRzKNwckgQFkB?(uvexTc!EBz8k|v&kQ=%{R7nzY`7pQBA?GE(7zt=`Tu20Fp0S9E6n3_GzQA?fg!ewC>(@ zTH%oV7|>8drInR<=lH{i^@zJ)4eB$@4UeFWXLs zrTTaA*_oK6>+i{F8W9qqB(q-oxd-$aqTWHQAfBHj7YBFrkqKUc^0xEMo7~Zzj;lPJ zL89JIqSG`avj;m*+}_5e^PHaUC@Q5#9mjNYas=|#-pno~JRgklpt>u^goncgE4{?>MqY=Zb#>Dy`{{>IYHTsE3MbEv zcEqfi^LF$;>o_seqdd0k#ucS2yM8@N?p!>|cczP-D{d1H-9I@H{Ycch!`0g6%`2gK zAD&fV*$t!c$xf29`X|Y^AHa>1ST2bFTkJ7<-|njf87_TEEp=7UJ6LctZ>p#a?L3q- z&i?Cllek)mOK5dZ;86H3KYx0;Zj9ja9%ic%t0b|B!1aZiXP!9lGDAs}2|H(yPq?=j z+T0rH9VqePQ1g?%g#KW%6&z0VqIjD;QKp4{XNVn6jIcB$!q zHsgcxuPyW;kY!Egofo#qH@Ngb67Bf5t^bn?-5p*vFyI_16sSQt&h4 zsTr`1Ir)mIdtS+GVrXE`S^SdQx3whx@wEPg_bmEe$W8Y@(@}QqwD>69=^45ThqeGJ z2bnG&zp&Z}o^Pk4MsJo1dwuRY!?z4^M|517}3Tu2Cr&#R|G-W}Bd{<<+Yk*4#CvyX)5 zFKm^RhY8Jr_U<{{cA8fM-R`}$%l*Lvwm~Ky zqQxvFb(bH49cQZ#<;zl~E2cs|rCx|wPVd%i${qFkUgKBY_3S^4FizWPBcJg zdTK8e3quDo+q#48Cu@j$nlQ-{nIhBCEmiq1`W7!?5<_LuXi@5td{y=56R#h?l$ z@HRJ>c*@tgQY`QltdJkq!&v*yC!KL$P;JR-T0HsqcQset`)sdIJ}&j&(-t28Sz*^g zZhUgxo9k=YTOT{SWzU@lE_FvN+YLt#w9b!u)nww5DDxbC%L#~XcjQFt`OF0``yLpg z(u?l}ocr$j9x`}}ALH&A`H;Pf;E5J(U!UkdT{FfA^uOzOd=;@F2(Vn%c@YP9I1l=$ z@l-FucM;rDJwKmdpaQwctqZHLaj|3F`(=5xwA*YSdJJn@dC#I%9;{2BrMN%-hC$TP zwYph*b;IW*jf+dRiR>YD|C03AQfp?Fq&J~&7XL50z5*)CCF=U6ySovPR9Zl~5l~u6 z=@ulUTe?9BK{})qq@)|9yGvTSyWyV~z2CimJ&Uz2-lgz9GiT1(XYV~TzZX9ptz}lv z1Ym$+{8nNx;=}54oh4sui$GUkiDjr$3oD9fN`P-9m5tEz?UANtDn|JmbE91I%z2T` zruK!6=AJ4Ji-I&qmCyrIN+q6r~A~)qLhMn6J}nz{xwZ z@%EcG6l*K;rf?P!i$^Qh&_xN)d?dHmLDl1f+QZx6Q(aUCt0#|E@ksN%z8)3k&2cVA zGb$7D&xWnHu8+Lz99*T($ zIjO|B6t|yrNOqZhN!_q7K^!5zNp3}K1YIa-OqNj{ekKJn<{W$E;)-RFIHw!!FF%W^ z%6}eBw_Ur-|3l=cH=*)z-70CX>t;UFo%cQE!ySE@c7u*__KQ|=tx!9_6|@Lq`Tj-q z6t4&$iKessvQfq^b^({^@V5Jw<#GC4_jj+PCh^p8@`k3CJuKm9vDE z2RsmCR=r3m0q4xchO0qvLyT2XBW8JhE!@}eaR0N1vbd6! z?!8l#tq9;`G&Yn0+rwxNC}c*wC4T0wLbAEW5h>d7!6Jq!2Y=RB+52pE8C>iwdc`#$ z3WkQ{Y%b|{**z^NMDB^PRZRfCJ9{1Uhr{G)pl2XUH9NWedxkrV_W;$wGO36zFVIlOF9j2^yx%485b`d*5fj8oqlXNG{eC?RhoX3O14k2S;irt@Zl2-+8&Z zEq;2;&ZbSLNL6#9&V92)bm-77%dqHgyY~VDS_Gplj*h08No24;gK%dq;?yiPq-&fw zS!?L3i(wM%rGKBtsZ>}v$>}ZZ>*+bzmmHFfgar$6N^_>HnRi738Q$P_tBB!h0}m%+ zN{vb)CvdfNa zM>M_-PA|7#+Q#g@xT;+sH5m_>A^O?j&VO)_YCA5~pp5zY(Q~o!Hc3@;Zl&L=;wec< z`wd_8vBiRD7dbUPcA#J+=K7?!;%u4?kAK>4z~`S`hb1E+DVchpJn|w+B=!8Rnl9s{ zi-E&3_5}p&zC=boKFvId^7h{;tX%k>FE+U05Aei)s%MbZ@?;5IiuV=SHgHO)Y9M)Q z5cy&v#UjG}$j$}>>3_ZFf%McY<%#jDp)ILpHZPxkB0c}y21?XI!qv7gWv8ve&WpqN z<<#ZoBx`X%PPeiHqjAXi$^LMg6X6$81pTa(pq*c*g^Cc-@qu?01x%mL=6XGQ&gONQIeqmDw z^U{e2i~~OSa`Rj-(G_9p(E|nG1vupC?fDWwW<}X1laBw?Pma&A7-xWg5!e&lzD4_9 zgZgqnh=|MPta`0G!exvQ4ba0GrM15ALAoOy@h3t)A^Fu{i~?$p*N!$AEB^2rurG&O z_&i;(u&Cf?UA$gkLnf@w-7W8_*O~ywi!y|A(%itFqv_idHnpIu7r(u3wluMKc{!@E z+826J^_8egzhk1NU;>I;+^4gwUmSp@q;Fpr_ST!W8B#?LHpgtK1cS#JQs0BSrXlYS zCXJx@pf{F2lXlabOd^GuxZoL=N5%`?w~GZ~j^#X7(>7#|<8As}G+IP)gvosNn+KB7T2P&a;OhyFx}L-V8Qxwt&m?fc+? zE%%mYG#1PsQEXe{x$1_pQkd#$+QoIT;Xnu)m79J6i87i2A?KDdnvh6DKP0Ln^}?1q4ZVPLwU&vwNbt(k zgJT5RjL4dGW?1Mif&KON@hfmEj#QZ8WwYzdbn(%T5yS3o{CQhA)1aPu0N;?I$O7{I z;_jN^u*KO{qevU!)j8&FOrd(-(V_U3$Soh{tOFi*TO0Hzk{#eiL?4B*Db<4`oNyH( z=}fGFMddG3BadSy&u;dh$PxkpfCJ7|@2;j!A+tGjuH3Q&*z}P}F$i}iVP?ZgCb^9e zp|B%1cA?5vbw9-{6$PLi?tlPIQcCQHnVP+!&xIp1au`kvsO4bF<$N9zRBNhy3`6cGT@zQv!e|{ zFAu09c86VqpW+NA;v49S$TrLt!;VYw;74`cAHON8SJySOJ(U<~#mZW7Zj=CutwF_ic3fu+uOKpxAgZTuSf-Jnh z@&vFAySI6a*S>Qr7qt%sU`^f9bI^&S*s-q%rDe&5@ z6VsuoSRw#{wF*SPEV)AxdB5&JV>8AKbkRk|R2&>B)x0sFU>C=XA+c;PTFF5=@A{pokzqkW?G#={ zpPX}{Ra5CUjS*=~^1iVIl5J2<6)~6h?a}Cs*`PAmk(){Tybu+Egm9p{O|c(=-)&a2 zaUk%dT-~Fh5e;)-J(0)#nnt*=WNCzyWnWQWI~C?0A(}LYaM`CDjUzntp+;8PJjob- zyf|HvW#W0`xJ_~_qejE2AWQ-{6?h%iVAl9+Rfk$6N_Rn7fw>`3z}6mL;cV|IZrH)c zy*ZwuYK5N*ISFbqM@^fAKf}^c$*;~qO z(maBmKtMAWrbStl{Coh3DjkYB@R6Hr%3B&t(C8YeMAwDtujkx9QtT?jK(umG0+a0r zq|Sf{ayf0eesjL8q{teDgbm+&-Npuiq_8!H?>f)VP-3sQ>$2edmVG1ghw6uhqexz zS1>?cPiK4;h1XqXw;?;iQQJvg)|XPW-STQCuxN{DXHGt{d4e#G(yNX33$=XIOZo2N zH?kzxVmP&=Zmn8G>Z27A!dsa`t1a>sh;=`NV;xJ(O<6x_tQdecb>uL?QA|d+zj{qP zE`_3!yjK(K=d;~Td=gaX%oegFy5F)BiSj0#TM^SKoO@tJoV`wnEHf$sAGuSD_g0Pa zSw6Y*X@}-23MmHCWb5W$gS(YaC?im`;a=ErnErCNi5>`DU;lKI&DPvt5hFNjyI0TN zpM>Nog-~;QV>3DD#$tNo;;iJpxvB4dbMoMbK$@1>>xc|r*KzEF$C*+R92pP!%d*_mSA&OeP?-#*g6 zX;7-p8#tZOr;-?NtFg7;)6-|mSs{fMZ_waAN^C1Y7LJ_J8#sq(M)6}8Ls;T0lu-1~yo~I)Hk1)!?7``+0H1%HP7fC6xD4E+HTDmW zeVEBjOl@8s!_T-RYzv7vn@<}QCf#eK3eb4n>?4~=$9$1?Cw5gf0LneE&rXaR&Y&=$ z*WPN9&W0*5oa)$PKr!(}Q9WXI7k*jVVePKC>v?}qGv?eov}+2APQev?e9t}i_1isf z<(reD4v+DZ>$67ln^r3JyFT8JK%vhPX3NZyV@K9-eDZp1OoA{U4V8UwOygsTl~yrlf2hTH83m`y{J*6&{io2Mapku-ZA}*tGoCfUq9h#U-Wjw%vB!f^R4Y z58k}$05>46h%a9vUcdE(`@xJr*w>A6Qj_;Xza~J7D3-r%LN98ygNmvo^2PuKHdLcX zVNUj;51^JwD3ugUcmtmed|31)eV5a*xEev@N0(vg@B>FGSb;$@m#e%3$3Fw+1ALD~ zTT1Ro1PkNvar3bBicj?Mb}?3N0c)_T}(FV>SQ$+ug|6|FzTV`v~U zQ!2;qpU8;B`C)3u!6s2%cNw$>fhLttnrg;^Rc5uAgOX0g`7P+&`;T zsAOVHz5F{S<1tI?cQ#L1-GO&q4-#Y~Da6527i zZ(|g^RTPN;RgZU@XUZ&1h>qpZ_DxYO<_`G3y{Zb$cHo|k)F+mK#!|2ujSuOK)QxK* zGE|VcSQxW$Db5L{P;RV6c(%(j>YepPzhs)PQ;;8Ao4idS&r?EfddXr&?;so#@=zkE zh>f*vf+(H_8^7dR3DK6Ngpl|fZJGH;mH9mZ@x0s%D;OCCVR&Y%`ciQSplb!7IAQeR zGe)7g;#0hZ9kAdiq+Hmq1sD{<;JU9=f|DWD2>v-_bYnL&hqXB2C8cdCh{bPEN zXj1IOaZKn@@iKIM`z!mLBQ5FG@uA^BOoXRd1Ibw zW(+;w|BUxS;V;s7XY?F`?J3MHbZOq(2L~m2tAFD{-h*z!;re2LvFiFAF*A$N>Du0g z*iE`FeiqXiK#O9_5?C;(2;c+8UMEI;*V;qF%7lDC0*qT+%K!@SICiyH>BA7d)QDnc z;jBq((+5zcrj!#Or1;V5Y7VWB;~IJtFN?*G-AcSq-iSuC-`w3|1%72d_Bh7zc$+c2 z8=#?wA@MTqy6T|eYQfS*=B_=WH}s1?ho+c?iD?54miHBWFQt@eTMrr8!XXAajZXbL zb!Nr4DkaA2`{h#E=hKu+#&UEp0?GUunTwr$wgn~q!&pw<0NtY4yBEZ(tml)4&?Mp2R1m;0=mARkF;qX%J znuL~LZXX zDxhpYLVoRj227S+TcEF8Z8*aod-2uidk^C067KV}8oNYw zYx~pw10Bj$!D@@GD)&d*( z*Ug&>p3xw(m`8VW=h-d0#vupe)@RE1SyAuy_aw0u-heArO#8ZMyyX0D~Wc z0#KOvq`pAk7km2zNm45$UmCCdUUe&e?628{sw@Ymg}Dy~$QudgddNuk2b18OjfGuj z4Id??5IN6gX;CvSg@FkCR&MASLEzs0!4c<0AwJLw2@CQ{)04G`Bqc3mQQ;9UeZsLv zp}6FS(8X`bH0y507dbxE?FO*qMiwg19KJZz9pl!_dEi9?lieT5n3(}7(DmrU)mTxc z%gN@0##)$KiWIhnV4#33{;I|U_LtjTYf)^_8u)shS{bLfF{6I3GVFZcsoxNZX_fyK zN}2tYU#Xny`{VVb|EZ^tWsZ*YV+IkiTa);QYa)PhW_)4w>W4XK^E$i_Lp;iSz!e}? z0*mkv;+N{bw?%I4ekNhi$+q;5C`iobre&T7lCfV^HpayxxVe{fadox+S>GEeTW-^fo|MJ#Wa0ij?f-|)JK5SReh(#YAn}Cy&a1~v%9?Z zeXxZfg|a4cjiOy6fl<8{Z@QFCm6qJ}nM$Z;doDqj$n*O_90gN9lvr6akdol3|C#Im$f9+~RVVcvP$ot%feFX)C6g=r-0T5W1V{~MGNo*- zW_Y#_pQ#&<=jV1GW%?1&qMAwdlgDoGX>I}U9H1*CUTKF)xu1%fL_ThwJ6_M?!{6m1 zs@`}IP4M(+NJiOdyKxR0K0O^V~2 z2D}*=VC(xi)j_n_ui>;fOHruF%|i6Djf8#c<+|f#$@d&QkaP|!=u=SlW-D+_>)kpz z*e+K1M=PdaAR|ZNE=Q}X=}P-iHn4>fTQ9X)c(&yqZ7uzN>Z+-1(mWjcn)pZf zXIz|Y+A|-xNkKEr51ksa_xgzROA%g5S$v%J4HAw#93i$8w|Bsv%YAWikQ9i8Y7Hnc zlF9o$WTDa4MtS%x`cjFql|<8jdR%SH+BpO;3k~v76l0gtg}BiA0#+%yR>k-BP%7ct zm#Ga`nFyNYKdiPH6lhUHala;sx^bj#6SDAO;_-{j$#( z-D0>r4i0XAINPqo7mB&faK0_=^&$so!;LUkW;+BPeOKap_F&=R6&wk8TvXp&KUq?$)*F}d$|3Ao(=5*H_RS`!X5RwH8icAgFrYT}@feQSUX+ZX1}{mb z9p*8;$T@$QhiGDB4$Q9HpXo0h7+w$VNCV~${qcd*ZuO%B=~#NqIS($t+df*ni3Q)8 z3LQSf?FTi|kuNYY^^dMJH1rJr;pmURGS)W*lX0TsZFVDN#e%=K3>9#o6BryVg^IYi zw@F%JQ6<<|hW9LMuMGI*f95O>O_9kRjb3^!PB&7$RTT6&T}us{A(M(riD>y&Bzgp-$`;Ls%xKJiCX(q{w7;n0K(JWE zdPnqxz)-H>8((QIx(^($(&siIKZo-)2f=&uv+1fS~% zaEC!8om3VnI8pWqM)E#Ql6Ub##K_ujpF?PI7qHv0iAfaMr7}jul)hbf$;4om(MbGf zL?a63S325npc22KL?H&x)j8s$N2ZSzFnKq0+Y8V@BlLq>78fY&K^W$elwsCq&`1MA zFnV!8Yi8w7_08V?0VDphu=*@CFEm`W*WdgQ*!vmozDIF(%FPLg1O!+nvQDb8lsn~_ zh}Hf(hfOmHZhF+V1UT|#K*v>L$lEG&n@Y7W{hLX+-7*<7DhjEfUJIgKM;vmQ=>U}n z3zHUF&=PvsEvQCd_Ph%VoKBmW6+$Nq%q<*3Poz<8gB8}mr(0)=8}tXhtDZ@vwUst> zr@yOCnzxhVj}c*b)w=IlSDG67?A{?4xsMeh6#({3K+!(A*>Y>T`ehmpTknpS4cQ2O=&mZFJb=kPN}&PW)mD8U?aX3!!ZV zof-d`&WAyCvUzXIi~wHWdQ&(##okbW762g^c#H=1S}9wIy|hEbNA)rQu#rCzVRje# zXcP$w|DIHq#EvxYDts`-#5^W>ht(O`UeGzT@bBct zl0w;*oyq_?=`n4C9V(FFzRUW>ut=*6Z_(GmVpX(u0bo8hF=Dx^7Fn(FBBTAei64UJ zrHevp#_^>AC-(VSpB49jQj>Xyw*E|oIVg63|MllWe>kR(JBV=$iJL_`2mgt{ST{g; zd)HEIy(s{0pVr7rO8EBAj&{929z7~TH__>RYyd4YI1y>Z{ijGZtae*2Z^aAJEUFpFvy&*z*58!gQ+>rH@&l%gvpv$-u4B~ zk*+tzrFt&F?OZQkB>l(t!~n15E@Z!Bd&sB8cg?LV8f*S%eQGNIqE zs(A23(k?>HmxaHHLxrT!4oT(LZiv^VYI}j+4Qlq?-rb913p#`jr4c+o0*gR4?#!;X zMHDlgQGYibhvSGM1&a!05G_chtflt}R8l74kZ4GOEN`k*AVlLSC2Rya8o!b7p% zQUizzfCAs<0{i$AfK-P@dGBMmk}KCDX{PO@n)p!*aHJ`YOGEYu6? zl9Y)3VBkj-iv~n_NWZ-Ka2d*Ey>yV_l60RsSoP zak=|SsvKSQ7qM7YJx10nwV~Sk369&6<1m%NjQUT&j4m9*7hmWTjgR@4vQC4A^3g_ZJ{bA99u>D3&LE1;e{a1H9_waP<^1CFPCg|@m|f=;B7CL(OkuTq9r@f zIw$?XvB~Smzz5>-pSR5sj^};f_pS(yOxyhzK#ZTnB7j%bz%?6zL1XnnJe16kB=L{AHV+tKsR2g@5x__56 z4JiO$gZlOX-BMuao&Z4obt&9-fd;EXp@;NkQ+n1!L>c9^%j*uZe&ElP6dNDC69;$a z-ajFW|CSQT<;7Ekk`rj)O&2$ehvoox*s0O@H{0QDL6D~r*dCG8RBZvq_3yvM3jjeY z`(%hgR8lfOcY@Kb`x+zbtY$F>KeaMfF2bb-9VUBk_oJ| zFFz@pLBZvp2cx|g^niKPI1iILsKPfKhxot>O+f`?Vc`0bauq-Epm5&$`KBu;^zMxN zN80b-^@5YFo%J}`QQ4^Vlb=pjf6cQei2vNCV9x!YT0Zpg_W@GLjuP+R0F$0MFJ+DP zfgX}}|Bzz*r)F?}Osq-P)WCZ)T)(1EBzQ|3540m_jeQkDXfwOMR`&!z?eO1943A*Q>U@5C)qzv77z~FBUavpt{<8be z0p&S_|M@Jjn51tNlJEAXzwBvXzb_?+GbKn=R@QGR5XWOi?PDC^5-(JtyK(tDE3sbC zcUHsmgawguVy=qM_kk|G1Z?|dB@+I+`(f=HK5Wjb@PwDxsJeiTQc~8Jw)hSs7w{q4 z{-4benZFI7S7i_DipJU&xBB=|V1oJ6NiBNG#7;YpkE+`_MQn<1vO$fEZuoZ!hdMU} zC?<>N1MB;4CP#*YY7`iJGySQ&On4g3vO^zig=PBdMASo#zN>z-d0P%m8z@a*R5QNP z!4V~E`r0cpQrf*XRJ5l`y;;Yu7;|i0M4=uiXv?=hI;iI|doc0k#mY#E5_C+5VplMN z^DK5`URZkfdSt1GXd#$B4aVu}?0jND{PUj+Ri?hBJ%RdmvUD4PZ=U6Dp9DfZlHOi7 z{M>kb%zW2rvFt_%U{9jCnW?}IHa+6XFDKs4<%{F-PBheVARC56TM3G}QHFhAHEx$z=&6eeeV{Lfa75uy53EvdWFsu{_sa&wrEsC154 z%svN{P6H3)jT(M}-AX=}(#vR{>W3sQm#u#1XQ$%^6GZU(fTR4*{}vId12ZYsgSBVv zVof%_TfX!;3Uld_Z6I>0qHMrVKzAzQJ>z2k%OkskQVYoYbnv}~^Wx4DKcBJ z6kPMs85F}u02|9;$@(wFi*hB*X5D7djJqz9vCpML68n5(mX7xtw0x=Xu( ze)I>;p+pm7t;<-3Q`@hfD#1YH0^SR-CP$UK^DEDe24P%Tfi{w4jVrN^uNx#S3khua z-)5U1fXdxvk0=}~r7Uh)^a}Gexw-R#ek}zkO#4XL{!x}YXD!ovZDv=shO0zoTSk`# zAV8I^ImExI(|bJsyGj$?zJ_u^AUpND4|;^JGvOe%{ReFlpGbZ%C-bmT-ObH~Vl}J- zuM2QvbE+`H13_LX(Wt5072)t&R7ui49u295g!tYsVIY&$7m%Ldf}K^~5iw553q~C? zISlf=hYQnK-``uIhf6e@TstiK&nyNu>sy@nsBUeEgWg$<^Ne2uCpK z1JZn+w0tGmC-lePg^bjDIdgfbMaC_c;^TSD#ak~h6{&X*9l)&+faOUL`_|%H`sfj3 zPw40Qy+bUEP0#e%NZojmMI*OsF*V*Zz+tnQHfZIBb~WWOavOlSdrJSdr32jm41F?b zx4NDn?T=Rwn!iL9JWRp+U^4prnCS)o{mE3rD~aVUcc*c(^J|h_?0F?XJ0p_*Lt+3;I=5?LJNd$#eQG9Y5S*Cza>@C{Bbpj_jY4R%e_Q*iu6y-L#&65egEZ z&kF+707LOoh9y^nw-mSgw_fIIsMm+x0el&P?w$P7J55UUGerJcB0u)Bdx*O8*f(-k zi*H9g)WIxgsL3IqmLB7Ik}^F+hJDwk{X4N-lV3nFl}#3R+Fb~agm^J6h@X5P@uz>( z%+9{Skq@ht@E`$KBfC7&n{^%iXnEPbPF{>8erRt7ee%zSFWm%B_eg;>GP;{!!77 zpE^a1?&*qB?nMl{FG6;2sGax2rEY;hOHD^D5G3K$?_Gk;XS+*%H!GZ zN+RKrJ~?wIDGKnCOpD7EXKsUYTg^4vmZQ~>?5BYs4x7j~!<5aXT~l%1j13BK{|#?O zSKYb6_j=d%HhfAY*tz|ER8BfE3K167h}=%qO0&O$rleLMY;B{&fB4SaMO zq?JAE(F7#h_eK5Rw1a!H*)A=3yh{00$!@xC5sm=*^O}y*Mxm;~`Q_5&>H@0`ByAhx z=Ivc}o?Sq`KeTXXqI^+jYJq{{dQaopFheBinMYaO9mzq zrX%0}?RO{F2KDTw@M|AVm5-^!Qd<1TuztjUhF>!4;4+6^@Z9ty0hR!=zeBL%;wnKa zJTo5v+&YVy*(MP2F;YsGEK(o$`RN_6bHXOaUQQ(VUMaVRi z@A(A1$Tw|&#>VguTFb<@#+MqAmdo{$j$&nH4u-#u2^2&B6Y+@-(N6a-p! z@;X-!E+1!6s|H;^FBH6s*LdU)ToQ@nUawELPCn4-OE=(j#F}8#?p?78Id#^Z{jFGx za^N5z`p`0rJnKy<`%1EEdW_V&W==3E^w)JzLmrtskO+{jrnlGaE(e$Q#eDs(vJj`F zF#vYpvyz9`G4;SE@o?c_GQx8>@W;!#i&pAK&K6hhO7RK||IITp>J|h`a$n0-&Yl`_Sgx4Py|dqg#Y5LtPy3TcGDiW4UH~d& z8gRpHcLWU%@4&KSCqM!b8EBRYRg()lp5y|*8DTtq{ocbrJu&FPVj1PkcN}|s!r*o= z$*n4vm2%;3^*0*`@y)h^j$0E9KoL^#WQN{K2ObI`zCiwktW8Y8TPKXf+Sje73h2DJ z?7!S#-w(frv*s^CWm#h=n&SQY(-R-@?3Nkm0Lk~&rP9bWR<84c=f=jEe6fqVb?@a3 ztBCO2h|gO`p*)KXnfTc-%6Kf`KpHs==gaQ)-Ah&x{^CFAQkhkP1~0|!Sp6G4{25ce zbP}2v#}hpwd|39!1Hqxd(UG|q)l6R!iSym_=~#-GKc9@AAui zM7MPnn0L;L*I#yGX!$|aVjc4F8`9MN7zU&diPg1)ADVo$WR@VMLFiESk=pz#vqEAF zl#GCG`6&4#?LZ72$}FZHqwUaQQ?o5oKHSb}f)~TG$r*fcRs)j2rn@2d5!`SX^=OER z?nx(`{!xE(1NYQJt_18t?uie_o?Os&5f2anAi`EQN?*g3h6mYv5}MmFH$?V*|GJE@ z+Vt_K{{EOXASj;d4u~R3pRa+0UFY-DPjMELiic{wWfe|`;|Pt@@g3LRGre$elvZOU%_VIgdAEQ*xNCk*S2N(s z^`3rzHs@70gA);VUkr^n*)g5?K#LAj$>x|=OIXIECdd4hs=dGiFPwi)^$%ObO-U|6 zu`i{{)^JoC5gfn9_5eSDePguP{n~CKM0d}-kq0Y2MNsHvZg(%3SJE`F*#gw%hr6Zg zO>IyI1aK__N~0SFi5fr7+u*Ivubs6x({8b#18W&>bzf>~5hz@tP)IfUxCXlXab0Id zmm$i{FuUT}NQo58bH9-JBZG2x@O3bC`#s^JZR#=sJv>yL%TzOb@M|0j6&H?AdN6A( zZ~j&96Uz)0=I9#5Ki~*;){0+~?ws16N(PCU4uTkg8x3D|)0_Zl57Z`uE)S4aq%yF< z!WFfMgkf337+piZuaMxT`o9J8fLhuzimXt`8W`Lt_=sS@itBC;3vE%d2`?3d8V1iSaZ6YHSIjS#W*UQM>JiaGo*2qBrEaN59L8o(M z2Vf>(AXFO83*LIuFnXf?@C*7uLfLEGoJtmfOi-IfOcEQmWc?UZd~_&0hjz^@lRyse zg9NtWj||pk4*Ex1!QqGg<~iA2cf8M!(0@VNp@fv|Iv=$D&gB72!bh7Hu5a=y5Oh`w@YALs4dQYSD&pAvRU?O?;yN`3x^ z?4p1{EJTG$8|jZ&OuZU=*mzGU8JSF=MQfi!=$86x(iLbUh`8npR1MVC5T&K6*rb2U zv)n@hoEI3Wc@223tCNiv?~raV%ttXXh=?;+vumCK*#bHvc#N&-?JXovLBWzqWst?K zp=eNA$ZW;2-ljk`vg|G~JIgH$+bM77;8*z6A`r)X16TV}qV%YpbV|6kLtestjiOzfIV$ z{*aH16eoP1=368UX@+$xLJE3c73JFzXDkBy3_wjXFHUF7$eww~dxRMmTbk7{67rt! zqH(jN`Oj5;5AnbxlixM&i9@Jif+wokFWK5Z@{>td++7o`(H(3Xa@;eVjD1W5usJIv z901#K5#o0q-N4{r@Z^&J3^q0f5U^?H<)88w$r^OUWm~mEi@*^ba?fLCGRYOH_+KjG zC&J)&9nCgS170qJQ-$JGNip0>VA@Rx?_H{h>!ST{vo{}22B}B#q@8*dF-kGs7?VEf z9Q>A{MxI~P1s!i77|QTpKWw|_JcpE}iln)pF#2<`W*mzBR^Y z?Zy+ClnTbUW23d%R$@;FL&)Xor~>7wh%D^oYp>(#YxCj=FM%?JCsz8jBk5ul*Q$8B ziWm`cpKBQYq&EkD{A^`!QJzT!E-Osvxte(Sf9dW_McNf~QWL(aj7TP_xF>w;I8PLF zP2b;_7<0~h1=aMP&>aF7dSkPufjqIRY?GzR@_{6#nghENyuwOv#CjSiX1*UV$Bh2; zsVZTA_~JyR0o_J?0C6dEsXSTJuFEDiK%@uh=3>>eiU8LM_~^*NFm$BrEr|ZF2)|_-U_6H8W4;<@jqD{1;F|7rv5*hk`UM-lO?GOFU_!ik0 z5~F|tn)u<@t$7tdT(S1Xu2&GbOcp+_@>sXO47v`-@H$^SQ0%{~fKe8&Yz?vwvKgIW zgr}Aec`Zuob#wp(3Fd5?fC2yec%z>NTT|*zwU!Mg1lG_ zEk^xesob87G9&W+`7JS)X}|n0!FiJ)`!&mp2x~QP*V8E>%a6qy{|%{ zKda^O!>^DgDr||*B($+#P!aeMOZh<8ikc>62>!EvT_fEXq5LhpYcx1EGxYW;-(<#1 z7S%Jl`(G7l=$L{Dm{1G|HpIiS?bv`TPi7Od3K;4r>UTMAJh|2XObpJ$5bf;u4<(S~ z1Fc7A;C*Q>f89>1sQJN?*83$!omJm_pf^jio(Y8;;2_R$30 z#0?Y?9#X|jKtE0pyXA&Uf7j|!F@!ELyzR0ZIA=;Gco?Eb%oI+uiw!z5x5txJjc9s$ zg4d@Zf36k}Hire2;_t*Cfa8{_f1wns?|hXdr-A_tBwo^T>L(!5LSwjE7QujzlS#fR zG8>&SR|~E3iCEVqFA;h=%Kqjhx-ELve=A*g?3^n&NkCFEsPbC|k|-l~iM}@BVQ=f* ztX)+jp>WZjLGjYFBW|9|jm4m$qxriQaBhTbboygv@zwm?%;R=41ODBrs+p4lEkCdU zN#<^~4?9J{JXbMjB{PhDr(y=!r6Tvspme-k1AzE@^9`IES6@n$nf(VLb;@v>;aStt z(R7nSK7cC!B~1P?y`$-e2;WmU@`m@p{laYgSJG)1=_;t5oIkOQf8OQ$$maTrH~(2J z*|~1tRJ}a5e-s!@#9*VE*LiMrCtP4PC`UM!j}JERQ*?wxiIeA|60Oujt zolSiO&VgZ!12>ZTL=x$WuF10*?8>t>A4b^xw3@i?(OX)G;Dz;l4|ccs%5kemKfs!9>h4rR0EvPXP`EtHY!^YX-bYRb8{#!~CVQ-@&W~ zK%mI?Z`xEfyxWWCptbHW6 zHr1V#6HD8Zj-S+iLuEzW_n%US;@v2)5LpnX&g%DoDw(zJJ}=R>=Y?0~2p3po@vI>-F4w@?enFTfY-^6%ap~$@@qpAmsBU zbw(duUGl)$vANok2$Ft%d$l5p06|7Y==MJ4;ubk221g&*?#`kep@7}?iIi^0AL#JC z&T8L*Tg<=_6wGO^h)JgX{yQfVi(pT3#@EeG;rnYv!A{{KW$~h+EL(Ifz6MeDoD7%a z0pVOKp~bY1+mXWYRHyiNCEU8v@W%mH{@l~Av*!8>R58dVJGsq&)BlEp!G6NcY=R2V zIC!#a*E7FI^3A6yCdPyjdAST1&s=4d=6E)bnksQ=;z(ssc-H@ZCd&ZMWty0=0oO-0 zo)kXV+Q?Cq0eje?Mt!ic8E(Y7_VH5&dFZO*4lq}f`)$VjF)g3$gy%Lr_%^RM;+%xM zqz5o9&tl3jkhW7H;486KCQa97ka0zrY$!Xg6cIrPu~d+I(2{rox^(!pfm*fhKBuEuf;@!nWa|L|RIa20@YT4rvGJ?vMuQ z?h;T^>5`OAY3UG<5|D0??(X{boTKOc-u112t^ZkbJPRG3+0UN6_g&X@-6GyR97D8I zMlcI%PS?w6F@%q)=2el0TbnBEI(j<>Q2#*9g@PhB6I&OD4xg}?&=|l|41o+t)~>v- zD44kpFo^s%pj_NB^D7_#Xb6L){Jdn z@~OA>6QE}4$9j`m4`@Qim%+u^N(BaAH6ReOC}crFLHqN#m>2}>y6fd6Q=k;Zf1h;KFb2 zKPDxi1hzpS;3L5Yi;XK5jr~ANKyqxD+t}TWXegVV)iLMD6b|L*$VeRo+Wm`Ri&koxe18f6ROhU(bCdFAV4nI z$tSn|=sR?F%x-R`$s9e6$%F0QT}+ozbCQUxkh!ouXOKU(^xrly8A zMRQ)@-0Ie7FaZ7U>^oh#6He0G27%;>lUFT$!)2RGJ_*izq!mE9Jp87C#9cNl>wRQo zWZky-JrmY>a^#)v*`*hddk$?V+gMl~Zp@XsZkU~)$9y3t`)On1#M~!A%0o8l)YbjV zgwx6Xk-6T8TpsRihgWdKDd)hyilr~!rOQ)po?U){GZ=n%m(jsvUTKgFR@2Oh z)xhe(p!lC;Ck+T2=ZIp$b0U+1a6Dc(SAuXSe76TkO4`U5uRK=tIkUCoDc@BH0-?7P zB77$z8kK;D5lVh)Ieuy-4{udaSj@+M^s)dNz9=OU1=8(MVf(g3iT1|?gcuRDCN2MF zGGY!oB#yuUsv9rVrFG>WDtBIhhEV_6XGQyn^5IJgLXxxViXP(tRxb=sPtT=@%A^f;xq!93{EC0>NDaL~bi-Xk9&YzRYgQlRf!o9N1?KG*X?=3~-cj+ZCcwb$LC zC~`AS=FSh-`a6mDg`hexMw{9#WPlPZqENtco|Ev=x$DUz4x*DX{V1Tw_TC#HYVS_B z6)*eQalH#+r=d6p6eR$Xt>zz~gCOmQaez-a6h|!VNx?=)4aG^G;TtwNe-}uU=_lF3 z56P1cAfP+pz1^ts99lhS-NVcnBz?y{fIyOkSm#BhBZMvZGZVqEhYB+EE|5&C8r~xU z^+5>{Uo|{k5MmsK1d=3G8y1}V6w?NWbUwb{!g6$NlpeGCD1{VII#r|MfwEg}Y>e%m zEFa~aLndRYK-BNa1j=~O8FF9+eu4rk#Z10P*$6V+ONHoz18vWPr~%oFt#%v|c1B#@ zXN-uDIXMvPcpUEUmgHOSCIsR^3`Pq;LO4{(qci}IIo3TE_2lf$aVjq${__|vOByri z5>?K615^WsK2T_f|%8m;Fu+c+_n2OfmS$w1lmm&a(9cGI5Ju)pi>Le9ZLyCtua|GfgAM>gmT8$|l;h-kcu5=^J(EpaR zQb9e#2k04TBlXW*xBzB7sW!oQp8k>LQSAHYS24aKnvd1eH9!4wM2sdW-ncq%8NRzY zhQSKo2X6CA;_$TV+rgA0AultBDXw=a72Qt9VDg^NPtZQre#M5o@lZm~MV|;i4%wc3 zLPwv;LI@-c%WdaEcShvNDNHN-KD{h)}RK-3wLjSm-7 z?Nu8EjGz-Ce9tT3?7WnuxWMwXEu>B4{Q(~k3MvKd!6vkAVQhDxP8i(2??WKUqq_d; zS8OZ=CGBK15cw}}J|p7O_po?X-aazq*Ng!Y z%RtdIaVrnt*TiE#hBUjAoFosOmop+aNaDK@4st;bhIRFc*!Dxr2D&E5Z}Bi{6vR5? z_c9|+-qQ-|cy{%(ns~za1_}ZEB#j3T0uC;|`)|I==wR+bHo-8kP8r|@M4oD z5|+|PrY;8Mm1172l}Q4vC8M?MD60JcG#uV0);{1B(uPX{$}x=7MVIdVSULs;LUHBA zAtVjwc}u$!?45;-s`uJmqV+fyPOk{Hi{$A(;>L9 z@adHo%=7QE6wnuqIqX!$qd94~oh%;=KAZQ4NFaHtJtE~q6!Fudd0C=FAyy{0`4wKo zp5i^Fndf^jSq>kc;!&U=zs|}9$-&uA6t7eu)D=4ABM9N9I+qQo2aA z-`i}W5k^VCzYVF3q8C*>`?M<`t5BYG)s7}6f(#{kC?p<$i;5I}ye1-ShB?a1tFXET z4;YiK+<;;Xq;*MBB9+O|_Bq~eu^Xc&Og76J7hP*Z}{mE14lqB2&z*<+|TKs&M3U#~wo&bzZP z>paXE=K1|h1oTF*o*BA5n<;O;q7%TtV7*Rxh_rBt3Dc8a17LG+tBI`jKr9F}AUFTv z<-#48w9k!^F@q+Xal^0l8$?)toDrj_wq|$stIu*9xw2wY*0qMy*0r71jj}K|;c%dz z`}dM42^TfF=}+;h5gpgokEW`NPD!+g zxzk$$vY5xb`!W)W7Ndp~!*rESBw2!L>TN!qui91&i_Ae*jrAIYU6Qc13%KiwXCvvN z>UwFhF+BpC)W&H}IKBG~SOZZ)$+6zB8vF2y$*%$ zA-ut_V0eOOpXo?86#7O!Wk{Y>Y`lEW+88P6Y@FN(NRxo9dHO~~Q%!4!Khu_=^8O&i z__Gu`FxoF>xWQ0aWEhdpeZmH(Zu8J(}tM-1YjwktTa`Ur7WKxeR~%# zPx1o~MZ7{VD}c-%%-Sw98_zPei~Az2Edv$QZw}W_uuX`PD30ox(bMQ*!%!3_ab%tc zNa9tQzWqlOS=pVgKDF3DtHAzrv$DL3iu=X&tWyTsppVldPxoEBkF1tP+4Z!4f^)D;HD{lRVGzzdJJ;r3*2&H5Y5cP?n*frlVlX--%NjV8Ei1 z#!A6*f`PyQEa!JBzz>`qcGA{{@7nlX$sxlIt#g-}%(ks}?^flO5S<_!MUjdWT50w(_R1-xEJ`eKYlNmi5GlPqXUQ$gI_skJR*wGm12!P z*aLG+I@c>}qhuTyys(U>2%w4hl3V6H8Ywb*WhX{3)9_TUCPc9)AX<}5+zIg&1mL}Z z5EUd%#}Puq`v9Q-z6Vsu$PBWBjD=*d2*N9zt65kkKfm@LY$$WG$ME!W{CNzW6txBj zU$uO$wPBoGo$Bf%ua4PoFgZEJqYiSi4)@Qtf54)=sR<>vq1j~o5vsn!x2Y|509R#AZ7)5)yZ?Ayr2zQ{lI~8 zR>*Tm2tS3eiw8WYe`a>iD@;Ald)}DVjTPm)W$beih~ts!bHambWoa`+YxoOmr06LL zBI~vk=6irh1XwXLsTh#NO_)m=^7J!RE`6uXp z=Pmz^P!X@llLx77u(IDilw#$r8W*?zHS6+&^z^x)K=~oRB9ViFyf|`7Efw zzU4?90;U&1ih1q-(YfOsKw$=;VBc)+2`_;bPu*SGrA0XL+eI~%y+}; zk117G7BoWKz;Km5Lj2MW^%P=%8DMuAu<`u@IZYLnGS@4PH+q*6S>#P1$v@}I+e`XD zwI2U1j$bC{zyB50Uuf~BVQ0+C=q}TE4{9+NWBk>VdJ6~?HC`i2^Rhwtr#S~LM!gkw z9aE0c;|O9x0F?E@+tv26gWPU0m>44R9!2GQTF^fVk)fcufl69$Y=%nk7UhG$HWVRT z*scRt@($4TQ?_}j4DzP8B0RH!mE1R9lSt$kpi)ZH90sij5FoM7aC7H;_$%auNw-o-E<8PkYF!wK$h{r`Mb;|edx3vzQZOUL zK_&8@(V0`vKZ6iePaKJDXHUV-$PT4w!|%u9+o*!rRX~vdFT-7VhQ1EMJoWBH9Q9A( zQLo6O+TgHF-5=2`04YKkAn4?q*F||f!VKoAfv0ZmY}A|bECxZj)TF>5_KL3#dO{8| z1p{948@jrvh!pvf^Fg4A4zFX@;!)q~r3d6m7=akK(StJ`FIFATGinDq*wFsDvU8Q_ zK3agvUB;7hA%M>4X6`j9TES6E%hV(t_7)%^^Q~)3qsaAA@zJ-TPi2Z z)r{hZizza(D>rcy&kas#Fx1XZ7DJ2eohEg5_D>+Kt7sYBOJY&%Drwvx1g?R`*eSRb zFkye52%esa~}n**>WsXDsGPt0GQ3C`j}wNl75M z$UM$T7y?}svq2(T@g|V`d*cB^f*vZ+9s};2;b2Yz=G+HrxBx%U?eO)A3fQXPGDigQ zRyh!kXkzpRYyEvsyw5j@qB9{JDw;58R{i2tHW@6n^B!)uUi ziSQ%`?xH5y)@1N1pQ_3uYPp&apNyM0txg84!-EBSJto#vPthmC0HgBt%NQRKo8-9whR91(&w!rktO|rQ$j4o5?!7NEf#4 zkQ)1JpMXJ7Jh@=geV#rIzs*9!14z<$ZgX$?nMi1_iScM=50B&;E3>U99eDu?vYSa5 zPOI7>d=N%U37Q~BN9GGQwFUK~Ne4CiUVVuGa=h0?N}!AZvOF2p-7%xyE&s~-Ggq2b zz!*-_v>MpM4K9pl_lwAx&!z$(58*>g8!*|)JsuVoGXP!CQE)s=gLp_JL~LHpf-4)w zeOtb}&g77JD_HE*MnD8}WUwQ;?{D1so6Wa6XQR;*s|phhL>mo)y2-zD`+UQU;39yu z7OO91(U|x_fZi91i>o=r!Xpj4?AG`jiJKe1n3G+*Ml`15ni(4b-UrcJrpDTA{7(*B z3F%6aDTC=Ly)cOn3%PJ(cQ@qZPy7OKlN};a8*if$@5qikGOfMg&U#5PfO=CF4UGmI zM2W(0P5%xD<9v<)_0>c8QFr&m%YzQu#-rXLzBY=#p>Q0MmPLjY9LhH(&RWIsicDD=S5h(QlT5FT1XQPx`S_+afbkwIJ1IxI5rg?i`78aql@WD|S8W2Qh9 z(Idq0&(grAQIxf6xv8Vp#u6cX$VZC@FYSOnIbn(n8Lh!6wi+)QCbfB4-!kBzLE8ft zmaOL4&w5tN4>Aw2n0TmglAlq*3khYXoBYx8*@hi@@0nd)2+I3+Dd}O#0)2sza!@Qa z@s(6>uE(H<1rb=bvRK0C!^*u4{nCGy%S zAM&`Mw;r6{0twyesDb0}`sds*eI%j=9k=u2@kDPBxdV5LFF9Wo)wqZQB~B!k8U^70 zn$I@!paSe!9};2>f`A|)C?GCQHxK}|3?Qdk(Q;!#EBg#K5inJ_otC8sCpYWD2Q^g& zgER@Sk}J#gc}s}TTEB5B{J+D>dDTcX*D*JAneDi?czk8^&9<|qp6qSW_J@X2$(&ce zo&bwNDu^gT=;$t$<@E7@y93t5!~N{-j?lgUN#ms{N&TssioO@CP>K-gXV5+aGBlVR zWVGN-0Bg?znuNTJ9y-3eo~$>3+uHwzoHdwmfcr14Eo-F*06MzmYz=-mm)EboNuCS+I;p6{m6PRhsJiBiE)v^JbcVSm5m$K%ut9BG|)NO-gTG}UD zkOrnj`F{}Z)t4t*G%(fdJ(ngqayo zJ*+^c@0Yz0u;S9yc|rp3^MnQ78|9fTTLBuo=GLjB-_GtdPQyc;3 zE>(b&1lokoZ%1mL0Rl?#95Jd){~yV!#-L8OyBIO^svtI6oBJ6#ovLDV$N`{K^X2!TPWL*d!O`S8K{0}%2qZa0}CDnXFhv(rn^B}o)}xUR9{D;&A1 zYotXAz~6b@7cc@K59={j6~7%wP1n}cEJjT*cI}8B*ZurZ_7)@o<>AW#Gy{fW4XZs{ zJh?>Tx&MxnD)8L4y*V39z(I8_0;Cu4{9)`npryD9kO1)@2@_4*J~4%I8~()pT+wYc z?#K1?gu=P+eZr4|f0y9!LqLHKX`;Wn)!jW>Mw4S`1L9o3YW_Z?fBgF@ucp=XynPR4 z@iJ=rO-&!W)!_v`TBdS0*Cje44FF0TdhZUU`~vj_pR))f5MkfV&5LuW!81J3>Syfn zgBG>(^G{3a%q(O5?jooGoqsCzg2Lis;Y4X4S-2t_RD0TwprtQYr!$0b-CPM>Ur+&l z-|xNSQ`rIYhY7ff^hh94m+&3d8lnhFNdlvt@|*s&aKm%2Q|6W1Y+ zMjTW%+6YNRP*(*x-Y2gUx)>^L=ZGrj-#`Bp7c|v!?@g8WFFs5h%GEFwK4GilFI-mA z0qWDkL`?X~soCAV?!d_!ee#2?D?U{51%BOGxF(FmJ)p~UQtt(h0zhV21nlsO@uN-J zAHU?kk5DE@lU=RZ0~9}85XcZ?5*NFfc^0LzB0+@j*A9V_3LGe+eF`K_n|S~+FXufx zTQAY5rjAvt2CaiuC)9^siN#{#MB8&O7>G|)VYG;^_ zKo`bt*OrBo>gdH5kZ7oztnv-f$wD%*~|RzGr-h`I`sYEF2Pa!4KISN z8z}vahHj!bNV?$Fme6z5ZUw3Fsr5Er*IJ{2J(r!g8$pDa*yxx#;%flKsHUQ6clUvY z$J)lH*GC?wx0n{Rq=!8Q=_J05%K3&>FYlL!{ zzKK$1PdIGc*n;7ia{V_X1B;CJcDfKI!hrxYIsJ|i{0!3-0Cd#4l`j^FpU7JT7Tv&B{n4_5bK-r3qIG}KUDxN>e1tF4uv;UJ z`Ikm74*j)$)E5;|pV=YWmETsWm*@NA$eh{stzRFv-Nexty&26CI_Tn*pe}6nJ#3q( zt8q>rL=3~e6E7e61+7lOaIApfCGYF@$jYU5`I4L=Oj~Oc$ZY7`k3I&~8vw3fdUr%Z zVqUvkV$FE5zvptZS>Fi*^RuN9`6*yk^vR_~!v3=fApl$u1MnrjW{) z&TMPmN>5CDI2P1-v5Dje4&fLFCJ+xn49@m_qkuw!NH-dD>o+qjicrut2-ZQ-;7_A-049Ytwl8Lv;O`wGA1aM5&H>%Ne&#-zpC3TzNQIANs^h+e4k$ndHqZxY zcLx-AO-;_1(Z3hRVZ>0oCWdP5!`dy<0e*Dc^iauy@~0;I?xV#0n5gBC|&m zg8=?!Kd{Muxa>%`YSQ9f`*_gH>0xbS9m0L%s>;S_?-$_-l9%21J#S))4G(lLJ@);(Kzs;HPLm2KPs?_d{^K`;KWHV`Ff28$ z#hmv(&$oVDl^bkrWLQ3M?QQ;+^F&ZCUBLTvlRXBOTaD>1%rRjdK@}l19e@J~$cNF0 zUe@*>NI)^ON%BLw_cCGX`Ek@&Q&;3IK2p}4*!k&cbgLEuAbD7~MDO_K%)LW>4SW%p z9FwWAp?MiiRUxKp76V!YtYK2unyFXK!s8fKY}UGjAQnmsx4#Cb~E-s150|J_W!QF_L%80Fab%-nuaPmkH zaPk~}Tm>{_5`8i{fYBr%W$hCx#*|I>_QJ*sodojkl#7711rHG-tkR`|F)JrqtPWO@ zh1wZ?Z%K&HG_hm9B({owaRo|e6?V(+s(@vPNsW%kFuyPrAL|H}{ug!&a|mU9V6=`r z&P)-G|Lt@TjRgJKTQ8@4T>UtBMZ8`PlL>4#AEfgtmS}tJ^yu?bJ({$KtBMZ z7>-t*b@!K9Y+^A-kuyZ@FjvgGee;65F*75cpPdZ{+5?aW70(dSW4)pw=Bm=Eo}$=H zg6{Ps2hG1npIr2y%^WK7)cqpC-;fg*Z3`^{1r1ODlK{=8ld9(NqG_{WG5<-JzQOR` z9CYgWzpgCyu*wP0yacpnILNf;9BZH6+$8P796yi-FeBH0cl`MA@})dx=I~=8jrv<# ze*57Hg%2?xQtFuj%<{$AAhG?25B%1Jg{Xzo(#*aC=)tpn07lFC_9{48vn)W7!1tnw zMVUYka0O>JQ8Ff0ks&~HVcACgBmQ)0_hw)pfu9I_^U^Kz9B41B@BvdvTs~oy(5?qM za!}ow^J55$5zS3H4m9Se%Z8u(@QPMrskl^f0AD==w02jl^58sTZo;cSI4oxqTG;U{A`D$9gC@c$HMqJ0R=&Ax5pW zlU@7f6W{XoU98@=4;vIgqL0e6*kI%oQlS3b@mBCeCz|Gk=gH_q_TJfyqU-f83=_b+ zo`!ZDO~b;hhYtGV%vyuC(dD*N@US*^9_yr&+_spE?>Y7eC9-k;S{9;|sUXuvpJmh= zcWY9>y!^G+ZOgAsfCXH1@OUihx7{J#IJ%$vKL z_RVKAF-k&wxe^F01O2SYnO`tYFi#Ugj?16!9bI#!6Pq1qR@(@F8 zad`tTb18jT*rspqFW}3(xfF;>d9ETVHB(H? z>J@)djSv+wGrmgvqnWVu`=~h-vKD9j`&Uff(#~>dMnsG(cQjz{0Qwrr=MDoD!EDea zE(_M_?rx_N^bqrZ`_Ljoy$(pNffxTzwj>I4L;*xNBv1`B6*y6#%7dvB{d>jM=uJ)f z0a|Hy)xez#e={&v8oTp}mw$R$XlrX9bZ zA+u=^fdUUPlx0f=nBC#^qwJE2h~D3svNil3VlyVhV<>TkR{*-B<8s6SiV{TULQ0gG zrIX14@UYS?Lt&|88)!(7InrtRO8h?TJ-2=|2%cy!e!7;MpWCQ39l%%7$UOZcO#t!+ z*>SLz4|zYU7iX8lFdJ$>(kFuXZ2|&50J8(s2ZDM@DqN)yN4fRCUT(*Bl<&Gv%S=@I zt+(q@TDxNY?3-B6J8)Wb>YB;B-OWZ%`iUER$UFMZ?owSB(e=n|kepYaE;mmU*>9GO zF9W$;nyNBmSl-GoQIHsPR11ljo>n+aS)g$2Ldv=WX*YtwKUz#3!4H-PS zn(L1!Suis7wVn}hGO$?2IV^->wQPVC)mONWoDDOFay;)3Tl7;x29WE!Jh!BpiqwC8 z2kz-|y0*3*;3#V7Jwes0^A@?;sh)cnit|$-7s7rd%Q~Z5o{uN*?e+-Fg3tY(&&|dT z?@`4)!jM=h1-DKhUf456}Z z|FJ!ZZ=LV?)ffpK7=U{r)$9mg>T8Jx^Q2(ZauAvRqXaVt2m8ks$3`7f%f`pCCck42 zzMb=UqwLZrDLwwTDEiNNl=U|$0fsuCEZy^Ur-xF68()kc7!}?)C3`mg%0yzf5c+3fgG@u;BoYZ#S#+S)0Hiq7VXS9@VmA~lLJ^|-IaKp&$w>mBRiRx>7*$K`Kb=9kn- zi~%ExISWrH`j3)RYMgb=Z4<|aXk3vZe_jWO}c1ZLCMxDRszHAN6%vO_a(s@*zT6A@^ zNmWtx&i*>j*}+b~VC4i~14B~Gb@NFI_xMKhYpI|cU(a?_$iLUd@^7kLx>Czn>M^c* zK9VwP^Em=?lLTr`zB!NI%d*j5`kt9sP3Mp5<>O_ZE%IB@xSP$4a@C(KR9~MC1dB%e zj8v8k5OXQI*L)moxPhe}iJ_(Y4*$>1hq@zvgsa-Cex=qA)NlsoiSrpIH{?T$>xK|J?lUnJpnJAHUpsWZw|dGCQZ| zyxi`K!=D)!yD3&2w--LqU(OR&E`JU{BS$Zn*=e9IMG|I_r{p?s`^$bcJM3>uPL}Xl ze9t`BYhBuH?M$n;{5nVMk7S7SXOtwSkF8&fchjF;37(Bj!!Mk5;#N5A)uBsFmUyfM zAJ*^c_o*n&)hF2k*ZLw?iaMCd)qG=Yy8@502FmhgFsJ^RrX24`(V%VQs0 zve5_s=kNU>^!fP{uG+7w8osD;o-#@Bzuij5%fSt%p?}&oY~_sT^(5b2t>|~Ioa%J= z6X|b9(o=7Y5Zo`b$&?fS+#QmDbh0^SshGZ%3ZU@WsdU} z9R&-u$#coA)pS`!>9^tvR;YR!!wyqxepC#Cx)u}qWmVf{>Z2RO{$4%VsQMg#ZUao8 z0@hyMy-uMCzch_I!iS$#?~rkkX|_7G^AM|Z7da=n!pDc&ImW7dTaMDal=`B0CF!5Z zjn1#9#Z*>tm@{Mjxe;VqyP`zA&n323!AimK$~}MJ5TC!V%1s-pu#I}rA37H$R2Y3e zuRO3dX04ZV$yn|EqT{1sE&XBRajcM0a5Ph+LNMdvZF3aQmw-RF9OB)}blJ^xo!DIH zJ2v@2EG;fI$>HrF9I;1`_hrK_Zj8fH%qMPMKRPe@L7t4eSXkI|)9rtEB^(yL*698$ z;04Vq_cZJmdUS?zmASpN8@`!MOUxg&DuaDRp-+T`2x&;MFS7ougqQ1eA=A};X?hvD z5}a%_Ih_(4r~0Y+D~Sh~%hH9!&U5-L%7!L^GEfBNtdpfOc@!;O5$y-PtmYpb_P$9i zCoqo0&gx)z96>u}Ec8ZtIrtfHR~Z<`z+8>=LSw2dw^UU7)5(i}{hcx%I(&7`S4ThN zFNRTgeF9OT(vc_=s?S$0;@{Dg+MrwNwry(;ON!s$ZMmM$5#8UO^ZA_HnA{#Elpt%1 zhH$R~5zl8j;Fw!~ zyENVP-s8Ow#C*tfcC#r@cYV}1Plx^IxBZ@KIlY^!Z8Fh!PB5Awv-Q+tK@=>`*KN_8 z*>6-1V`s14Otw-HGK*X}tRf$sZK1L}*%(iIJm4=euPl$$ApSptn1W&^9dS+gxE_*u z*H%yN*X`HFZ_IUi8vkf8R&j}bn(sUnV6W&Z8xcD1rF^aLr8w4?;OSapoj+Hx5<{)6 zllsfFUtw*~xAcc$hf0fg!+4r!DUpLC4b+*K3o-kDK8O$Z0ga`%UJg1GL$21w*W#Px zdDzItjvI|+NI_!_l=z6QTBuAkntPOmT9z=8aI_wMPsE6#&O`J(1 z!NFp=RuJfP8SB%e^t!bpH0bMBY0zsO_AFVFQJ5<^l+EUUetQUiFO#fZkfwTCRi`NV zK|ooI@SzQ#v#1Q#wKp;GLaJJ>T)lLNt z`%M$)0_(Lcs_jW;E`;iL1KiWc2OZ7p2=_N;opQ-|8xDUb(U06Yg2{SbJ?Gb!Bqxs` zGO6$IyW)&7oP+YLm4$n?&e{d(=yo8jVGD`!7q>rWrn_zf1D_Ya<1OOZOtBp5xFia4@K%q2QL zg4g})p+L-+JIh?@%Btjosk2koPkPF;3i4mEHQEm2JRV=w2%FoRad}LDT#xtnd55}c ziBV^Q@S3p2oupbjP+z;%&C2Cb7ufX0Gj-Pp+K^CB{(UP%P+4P&qK8;%vgENYe|1ab zCV81gNYe~EV*ur%VUQrN=3X*U4~HsPu)W{T^)$1y`NJJ2#tRR|)SD_@19=aSk@~bUSmDM(_tTu?6=CiNQ|2^HaEY=yt1LtePTFvTAwa>rfyFbabcu?gV zoR=vQhKz1*oj7-PH1Jx8^kOzGxp3KaKb}TF;A0NSr((4t>YiPfkUE?{BjUNS!#Z7@ z*QwH$_i9v(=AdQNQkUr&{!y$!)A_E)bki$ROful9IVX}p@y9RG@pV1aW?J)O#6>ZV`5N+f@v+^O`--+6wk@u;9+HxS{o4S(wr=JhWDS$&= zI-ON4hTB%Hi?NngfPI8mi$l1bojuNeyCk{FBIcKE9i;}%-#hqmHET5Kmod+-qEUw1 z2zUfCCK5yuvb^lja}$XM$6!oRmT1j|o95G}ftMA3f2lR9wt0rJ!22QgEHCaH?r3=y zqm?l=<$s%$vI_9xYj0wxbGjtWH6+8|Jl(C(@~|S4kZO9Y_V*@!Lu|?GDumH;q@Q!m zN`>WuvFpK(8fr%!SysGEaO>2oo^9i%PKxfeN~v=4w8&de95A|8 z(O6=z_7Tf{@!I%HaLYy3Z+T(ensjgZ*=VHx1UoH=n|}N_e!GY@LSFIyh)LhB$--kURg^t`b{5OVp~(6P-aT zRMlLM1#uHNigr)=-ikCF`BO?5u>bXIAO{HU0=ash(~G8#9aJ?ZM)xMcJI5UL-;VjJ z?Z;4K^t~U`Hx0+K6oj=`Plei-{}(+$C^m>vZEhXLQ5k0x3la8z*3R|_+q+fh-I#_@ zTll&+oyASiv$tVaCBx4fXh(B<@x&tkorDd1tfmIJ*C~Oh*#p@nhX}ZFkH)40J(kw8 zGRE-^lJG|a(q8KSJ$fLBt-Z5s-d{~ym{ZM@P!CcCu^@#CmWIimU%# zt#HzXMBX#&;e8(sFSH@OeKyvg_Z4O~P%U*u(CB5Nq%P%!OB$c2$5vKV&3%Q#63U;w z-d7(cwtW?y6)C7@By_`~k@I)x5GYxtbK7&N#Ol7UCY&X0l68!AXq-qi#dZ@DR<{xa z1;0~lsgM2QN|Tk7zdH_&rAd*;QFHFhmjur>;13JC4e)WP7gtuc3WkIwQCz>}_@HO8cnilBpiMpDf1~j;?MO~B zO^FuLODmJTjPvRCuFO)oi|?6R zOY_nBqDv4y2%-f4bj_~@{{rgw z=r~=YuWyb_dTHwQUJNH6AL9OB-@`Amlt)M^2JF4F+HYFrIj=fZBbtp~d|l#mcPq3J zaL#auZfG7Prv;O|1BF`7HmI|qkCp1ptxks8Hd_cX^YRYm+!#b$UTS&%BZ$$9I8wj8G;~={e8YLVUI>Y zrndk0bg^^lgGT-eR1g0|J;wP>MR{K-yn{(f!!E(==sB3wQPys^4%88qww)>IhC|7g zWoew5@Ps=`U{e!Y(F%!)o=P@h{HXgYlS z-)AeMk8lhGVr>hlJ6FfAto%?_U7a&bo}a?C*3VsaKTbdULM=O0DWw+j6c|*tsF8xs zq$G#t=&AzMz@$^%Q=`Pr)iXOLe$ILBF3 z`gXkJXmku3{pOk6iRawL2VB)}2L6_6C0lWz2Kn#RI&iN+CG5K4`>2DZ>gaH>=$jreI?shizIH$YIE!RQ8IW5bATAtUhX`q}%Y6Tvf zO0XEN!sqgJP)=SE_|dxI)@7_%E9aCzC*zP_z4uLRpRO(=v_f3M*+Uofv8JwOig11e zfNzv(dH6A)aJ;^N|FZ>;SbXlf>b8}o;M)hdyem5m!cHRGkE6afkN zXpcKhIVkkgfa&&`*|)rf&6@PpJ+HimD~8%zIoC7I$oKCRL`4I>lnVDTE?7i$XM@LU zF`D*?=f9S#y=CC$J=O}uDpLMZk6Wi(Y zOPWZ50P2*vG$5b*{q~IN+{R9Q`GcT7ecRJUrPfH?AoC_kh?=S@MfphCU1xv@B>EDP( zJMGji(D_V*(}hdNK1h1rSKoqrHUqn?G3%eOE{r*zQAkgjA4+zd?ybhZSO1z7_qpXw zg^6I{r!|&8n||oq%Xu-~hCI>QTdM*~;yLLEw2Ay;9!-Yenw_Fw^A#Rb48dd#XBwZq zWh5Jp^-|J{@`UCXIzDz`y;4w;sROpW=u5pKf>IX8^?ElwC0%j%DQSP*g4bS(>M#x> zwRq6xjeSz6J&A{b!;GdFg4&F~2X`}XzXUqbitEfmV(waj7B<3zSJ687_ztS-*Sp%o z9}}KKQEkc4{tS@kM)2m&1JpY>y`_ccTsLW_N*+U_yN$Lk?`=j}*7k3He zL`v3*=~ZkLBOc^RBA*Hh94^~?TsH`6ecImb`-P*y!83Pd?%phB`zao$rAmW$2;HcmgoZ~w%$JNqdz}B~CrKxXt4_l* zS)MZf@fyrV$j4J_wztPRwIR^~sR8_^L&=cxEW>jfRiE?E)Gsb8`aG5Z9rNGIgKp=M z&1@~h=8BU$hp+Ni>di*`B+@REXFMD>F9W~=+1@@4s=qv048}JjkIAfz>KTe`w`{td zBlE7YOt&P~-Hi{{N>I-{b;tL(8j-DcQWDm6_YyXtIrCZAOzWopT;0erAm&N(?CSpR zJo<}ZlwfXQXPEc3Y{F~){bp?pe%D?i-0YJSna}%Sk9wjyd zvoFgyt5htZ9eN512g81``FaH<>xMxs8A~jvB79vscCo?s-`tJv~1n53|y$W8-wdGXPDKmP!Ny+(xmqqdM_#>0!r^8 zgf2a_fQ0t0cno;%`#k*bPH zusVI6%+THFm3KjlHsF59nwi5?t}rrEY@hgl%s^w)D5Jm)wQGhvlF(u>y|1!H3AOpXNMJIJii7rk!>A9g?)-y z6lm2#F@}jbr?Vd$N&xKxF^XFV?%S1UvLKQ~dgAgtvsb-MGnp3Sw-#6E=VRZyBTFpP zm3jDCwfmB_azG(x=xnR0)y7*Ii9j!N)qcJjp$1#MbT>%}XZnsuXW>w=S?ukJ#8>4? zQHC?BEOMk9J&}$o5~hBv^*T&HpPAba7*zc8i~I}mQzm5(n@9|;MCBzWZ4-J4tEcks z->oq8L02;4g=rvK7b2h;i=Hu7WV*h4>Niw*rJ!8bJ=xFa6uB*?gO)-!hotSOerYIs zhQ)%KVZ|*$8`bzCL27IQgVwfTJP5*NfeQi6h|nr|##ZwJWUAP+z!-h^1qya!+;tmsWg4(AVrG2fMKhC?6B7J_ z9)eo^MvYSiI$I1aLWSAilos9@3!9$%)-$MMyDmbt4fA81d~@$Fz2jEbG(RcEhq4;Y{OSLv_9Uxh}*3jFwX=j}GQvk|&DN>8Z=)2&tc;E?_v=QsG%1rG)@(b(t^cZTQ25u#idqrOAF68hjx=}g<}gg88-1*%0_pc4;|uaU9J2=(S6=~7?pEY z7LsqUl`gbLbe^3fvxw+JJ_YiO1O>9?s&!Y6xWuPa!B5z45Un*`HHW-5;f}E(&)DZ} zP0Y+ZsXh2pk|dGj2f+mA-gXK3T9k%S#go4bCe%}S5lBU9PkvDbWci^_6IYeX%;aK> zt%r(iM7k!=J2T?OihST3p0KC*2Xl4sP^y`{JgBTn0IV_Zy%$PN)5?1|mnFA>ARJ+MD>#myv-`N5W8YuvZseLHGhgW+MAUZEpdf9?U`Lb_f52wEV4yFn zjrby~Z8yW%*pOpS z;6Uu9vl+S5d~J=G^f7t<029Qq4nbXntF#h|0fj%|uOz}->iLrOm^^%)%?yI(`F+E9 zu4PvOKeXEOW2q-h`2pnol)SK#vDlvM_UA{aZ^5U{HMmh_^t6`C=E;n!FRwpWI)@w3 z`O{b^K?kIgD_(~l(Z1Vpi&ak}{;Ajt1YXwY4YOdETsceEeoh!6$`3x-#u}f(hAw19!CWA ze((Z$fIBGO-#wefX=PeBg1G9QKRNyT$NuT+Pt4Sv&8cuNo9DUQSjPKghznobuiEkH zhoYfLjL3r4v+-6>>))@zf&!JKI|*GbuPCw(xM)eB>Ib{^c)H)4lDbaLE2yxM!#MS^ zVN2TQN67897z6c<%owYVZ*h%2Bw~AUM)Y!vZ8Er{`cuv?EC-LD7xKKJUd_Z*PZmh6 zpb|-w`(*p6fz!%Mr!Ly#55E1V_9E#UO3KR_N$jTw$Ysl|^K9Y^_iz*XK1>Gz34gtE zcHkHCqTQEc>^Q}<+5dnvjwl~2C|HbPdYlP>nAwBNxr)v&<+{4w=NMz?&s3<(gm>Cr zm*OTO#vqRCZ-kcz(DK+7#ZIwD>_M5YYduy~DR&QA<_+ncq7bxkj@LgNSr_D`*XJQ% zJ8;~VjvnGYTTw*$y`Wf9vRzMuJ}zrpc)!?6y}-iF8;LwB+F);-A8)}IZ#MAErJp?> z0k(QBF%3{1=AzNF*Uc&*K8n0tKr9YQj=k|BM0o|)y#;ozi$#|npD8S&3?P~W&8xP3 zbZs0nEUapiM+eC}_cx)6(VIW=9S`?k802`2FdTDOc+bsOF@tK*PfzvO8%?Sz&XH{L z5}oBKnD60IT<^=O*v{j4v+cu7gW6m@h8pg7uQ2Cp$edYU`SOXa)wirs3R>0X`anWl zacNOWGs0&FQ7ZYk>N3kH*PB%l+&SFumKbg*@n^Ig|8hqJphQ5ix8XB$b10;A_bQyT zW7{}OHo2Wt@j*0}lJ)fLEpDk+-_1}*zdao_R(%#X7neN};&1V|51T0Fyu++6QyJ6# zmaq7y^Q^v4ts@LX!NK9>`DvZGwURM*^^Bli%*71q#b^eg4#r-%?))vr0F%^Jrs-Wq zi;YW_VYPYL?6|ps_t!-Gv{Si>S1!%&UzZ?xHqN<~a#9Wx>&)R@7XbbdSM`AXc)cJL zG7pm*l9(>@rheJelHPK5y3BcQcq?Ua0Nh%%RDR3wusCkWBnb(rsP#LyAVUV%_9#lC z;~Vy`UpuJd(`x<17-qN4&o3$`AIZEm5=H&^EJG zgxMs%(LSxi=VbUIBFQs_x~Q%$a?zYkGna5;FceIr*ghT$a@lY*b+Bv#B>}2hW1p%F zYp6VMc!*16@d$Nn!*gA{n93Vcw4+_*1rU@7*THUmSo&iT=0+JvL|H+>L1$%*P*m|J z4({)>nAA*i9!bHah#$# z=q(G9oUc9UI?WgQo>HlnBZd&iqj)~=T((YCL$&M?YLhiD#~RAGOUWuvo>bs7iyqa` zhk1})s({st=J0?CAl10PF-RZS<3K0T(fHzIQo`Kl5iZ)b>8e>Y$D$C?oL8g0lait} zb~1awZGwdsGCIv4bEM-EF8}5)IBh6}-(FHe2S8et0x)4P)p9;o(z3{6r*1IPM?($i z$L{7*anY=6`}@0BLc1ms_~f@<3F3zN{$-zXZ@ao5DuFbNcgIux*t<|1t{`4GtSh$`#Sb7Yg1l;AGS0pb_G|AJ$#?+#?n+9i5pdD^)C(KL$bQ`rT1_Abz}Q;9 zhBdXk`Zk<0NK{^IG$el6a_4Ti`<}+n1w%s_*)85_UVLo+@3B-^+R}11|5&aK^Q6a@ z8T`cB!iALyT$(pSq7!e7*na!Y&eF4}H_Iibl0_rl+ds(&6gh?v*%d&`uN+QO(T2B@ zsoo0Q>5&AxeEXEWR2C!HjM^K2-uY9)ijn`O?)=h?@&$FQ ziKIT9>LiQF-D4+X4Qa8ssa@_93k93Vj)ABoSj;Ya%AG=KDS_)@$1CA3A4bPwPy~_{ zwF-BKpb}8la_5s(A*iW2iAwQ)I<%|{V3V%0rtbsKStS6>yG0@Mnqa7{S;b{K_jGh5 z`2aKejBtU)a09%~@K$?0T74ukQi#Gj)XFDvwY~ihf<+L!9{mlwbr+In?Up1-^BwJrjs^Jz1?bn4cBU-2X?BI7F2wp^H5|W;k%#RL&4U1t zJA8Wgc4A4Pr`X9Hgv07ETfbvr{d~MkQalWcX;QIvY4}76_0RqU-_RbPWD4kWMBn0R z%{^+64rq8)KG4SM?=j(vN2E)hJz;$8{RcqO46+&LFU?Nazss#+j`n^wFiwatPi4!g z47ed7fvI2iy$j=owUnm6RFIV|r3RHBQ60UltMsUSZO8zMoPV77J_jwBedwMIZ=Z5u z1A#UrP&2AzBTvqa8h)h@YvuSp$CL0^2ACzR6oRT*t&h}$?vB$*Jz z*tWu1VIfoEdoaqBh|ad#>ZeaAFH8}fcRA~A7C-~EYhI-N3xE=q@kGsYeBCR}1tOPZ z{YEgp3@YQE4@K_n-&YSC4c}}j{KL;@#Bwyr?$CA$j3A=^pV%%PVNAA0r zForP`PdzNNN+z~rj(|_=y~#D@m9V2e4qGP}rX0P>?PW?t4Jr&k{8gc|3fTy}!TKP8 zvCWFpdE-@o%HX48tmcE#R5+?|=so&uUeOM3?>8!k+HkL04l^PfpkITNyH~i#RP-Hk9=ky z!$Qv;}dp(N?z>bKy^R0(*@!rbB7dvdvvMm@l3=c}Q$ynB8R>MyE&6o?i@ zm3;)V5Ja5slUsr6qQ16o^DaK@3N9zPM+3>E@?+7E2r!BNE&2y!X6EJ=D^@HGwrMHf zAqfQa+9><&5g-HSShZcNX>a#Vl`z&%3Ksi$KUYJKXr2ofzwk3v$Wn8}Qz7Det_*uz zrr1jP_|osL*>`34CSS+sn2usi?)f%)7jONfutXt^yGVEO*3QKI>CFx9`P^)pWR-X*2lOJj}!Oa~Zok?yIqArtoAZ zF#+68K&ANz4U=!RZI~rq@mrT5QP2Ovs+%*+sA3YIrB3cuU^N7bp^*q9&xHU?x%9JW zD5LD}aq6ylGA8a)E9_|wi0F#S`$Ctj`zM^4Mca!~7dUI(A69l!#Pm$Q2MfT~=)NA9 z6{#4yDz0nf*qWBDsLk6u&Ei9tCu4S^M-P7dlj43L?vwm*B)NTvI>MMqeol6jjxjb? z5QJc9>a12QOVN{4g?#r5QbpWU=O(&wruG<|uFWh?)30hM*9G4r>Wb-5^{I8omm{j0 zN-zNK&z>&>GyFg4RZ9R+wdkz<>-*vEy7a4ym)iDfR=!bFdmvt1aMx6-u!ykRd-C8# zS@m$8VSJu$b$6AR(ZqdZWnC6s@mbsg)lGm!wS64o97C+oWo3c$bA5jUoM@#;5jius z3LSO*@pM!tH(LzOlHbyOG|vBkK2&Nw^k86_NV~K_HQc*cAGC)wY9M7o;GLwT&>@jn zfAEmuO75?`a#*yT2PDiCKk{E6D(e>>KwyN|$qU9iyXiJO`Loj(&*gn5#>bW@qkJl5K8)0zztWRMcq$ z+z|MuIM#}I0yZAf8`^h{(!_aFfGvE^Zex5$)K1qdXgcl!VJO+#l<1mSjHgekt*D5g z{n;G{EFaq?sspZXiOZKqrkNW_o_W(B<{F7v{pbK5k5Zg{eztMt)yEg7_3q%#Rd2Ms zN_w_JKJ#-RGrfQ`d|ShM74sL78V?hA4*Jh$7qDs#3vshsXAxAJhrc1T;jww^Zo?|r z3=6-p4$b+fHEf91DuWC2x6CFGs3Kf%^yj1%K`2$UcYoGs*EXnv8UbJhtIx}kKyUPG zgIInBz$;AN(*z!iOFqv6WXq=Sr=$JuTbpD!0=WX(Uu~DnG}noyP~hMEc5=_%kEvXN zy`#Z4U9caGq>XI4R@M!OEa_N2GAlUbGOhY-gzcB=$;c;RLJ zbwsg=&g)elr%)gi(m+`43THbHI@3rGcGE+l8P#7ozSkUaj})9ju}ofHE52j&0WZ{Z zN6)5y-_aw!-mcV!qbJO={TgLa1gbv$)!_jxqy^QG4dgy=@oQaTV%@a3OBVKoGW_u` z@S&@#OKhHpX|aB%3P@VLn0o1h`OoBfLN5ML<4@(HFy(OHYNdo7wS_RP4)h=_tKGID zr>I9L|I<~om95%{$#M7w4?&T*hs-JT!QeFnr*+L((%@E$qV*GM$Pe;_RC>{Ins7^y zqe5idq82|Jezb;PiMj=p!A)-8UB{M0$6z;g3HtD#ee!)jiI}6D$2}UI%KUa^`1WgTX;PP1zfOLc?IS3 z(Q`4YUQQKblq!KUUgQiwjz%bBD<0=6im>eZRUXgsFFu>0i@bK~EqurvTR3k`e!eL{ zI%m35gT)SzkK5x=k-Quy%wHE(l%1>PctR=6J>M+c%y#hrQbXwO z$CZ++TTP+h8jZ3p;YzO`CfS5~dTK#J)piSYIK^alcd_6MYV$;y;pQvsl(l!}A0(Tz zV|q73z<8&*z|(f_5s@12;y9m%+pUC1@>#OF>H>J@)~)ZLC+fi*(y7wU|fI4}Y)D3wt>(q&IT99_j;wUE`s-UFl(>EH`PSq*B~O3C>;iEslUc?joZ zqm!gr>%!_K$@yFbzNPJ$80$|T9)a#1OEue?;CwQk+qBXkVWcxbF|x9K;;WnL;MsF& zW;}fmW^G@K-i`NZlpND~jhzx8x1EUACNLaV$w zs!v@QRaazfgNscM!3uj*%C}L5BVo!SM+3Z|k4t)EyXw|ZJ&k(ttt^t^j3)yS`rO4j z)M(HRgvSMdE?4bMOoutD=T`}YBsAePzj=QUj_Q=v>E25+U=at!Bs2N)h7hFd#fKfS ze7+uOl4dPu0XW{u2LdIJ7r}G>pOTv~Usi*3M*h9a-7i22(kbOt_#2l<5~g zs`c|?ua&y3Lt$;a{u61^_r!%-RTQw{uM@n(m1i)|v1z%v+O0q?RP_sB^!Lv%|7Iy~ z)mZ(8gEM;EqB6wIZy8Vf7triHW+)hf$Knl?QhviXkVdOu~b z!!UX+Jgf)69Rq2sw73SpoY(k30^wxum{%HUkPmUdodb5Mtxe$m&6yF9)N z$^cC~y?gC3PZr1T@bFg$AKzJT5(Y12sHES|RF=ufJa{>TnO;`$V?HboB(-sD8Yu?H zYUh>%*v>g&0u|7i6+LQ*qz0?t4B5R>5g&F=4gmFLwsDuftV0$D#f$qYB)*;cUCv$OIX` zZKQj2W~5*|c4CP@qLGu;P*TUBkQG$-D(@R`*nD|&-+;@6scBCh;NL#ovjrQ!VJR+r zEIbMJV|FM_Mk~jQrsgVyHyEs)UkiRRq0HF6S z0rN8;IIY!F*h#Lf)(YCxtV;%%nlPC4(nKHbA$?obVA`1!Zb(+G zLekAPdTBwFFmiW^y-}0M>4(*lO_k-A7Yc2JK0Ys1b@&XtQ`-J#r5kZY(Y39KEl%lh zx191?B|DZ;R>-J<5>BINVpxaQGG`liv~QnWBfv)s zA|kM|*dG|I5{|>bOU%o$QI?@!c*S9NZv(Xa0<;C1p&6vNus(jp7!+Jl^_Z*)UKaX- zwe%^tUBwQ}Pi>Tdk6~edQ;6b+rIqr6c`^iwt${bMgP??n&n1s0cc%IB3zg$C6vCvU z{L5daJfTo+a7RK*p?7>*R|W@5ctcX3+yRBsx-3Z`p7<4>SW9Ti0b$;;1IF*VIZTGP_gci8S^6(5SWD(`hUs;Na=B^>QKrfh9F zlLS)7bYy~eHW?ck8Qnnai?A>=s~8&_2XO+e2yndsz-6JQp<%dl=b>_!SO+5`W9i6K z#39Sh7LF^`NS>`kE_8}^0Hjsgg3uewuX<;TP=FWOk$dv=dGYZo%(*)f-3t30_pLb|&Rmu^_h+itb3R|)_) zDwcKwfEWsHu+c_8YQNqBP;hH2dce$CQxiOkeJqk6bENFGnE*xK71}oCHE|N&gl&BX-xD(HG{*o82GYkp zWk6`~KY*zB8L;jI0m-t;FZv1>J+r6MKbcfj8oXL~zOGgK#9JLU(4SsxG**RlwKgy? ztkunX&j;(9Ca81G1)Fxj4vHNTOfIVX?yHo?;-YGUcg;0+NI+jcX8VV`Cp(atJhKi+&FZZT`_k2R8K|XwNB+C8+P)3 zHteW!Sie)wQqY*H`5_Gjy3(lQ=65K(aAu*8NWkdPD`wn!mohLaLgWcb3);XZfk56a z{PM3h0Vg!T-U>%nRtAJQc(nNv1sQTGuBdM*e*3vjy1&&o>M*3I?RK~uoWLt*n46oY zTYO(`Z>NLPyE?F8?O{&Gaq{y6Wnk0S)HqJv$8B0E^29T_MPd}a9^Al11At$UdnFr@ zZ>J3Ofg%3Bko|WaVH=0@@>tEYI(_!`#!Y#NP>THe>|}LxZ)RRzq1q$)Fbu*i8Kmo7 zcGEVvfp};*SS5rGuYvn?h0ck=fhE%$S*7ald6F?Y@>P#RDx>Dd97rDIw{ghz3IF6c zn$u!|8j^cYe2mR9xfMU>y0fqSm_L62zWn*Wv`?_m%F$+<)weI96O>x|ZJT0@~a55rtF{Bunt| z+9p8|HqaGtyrDY8y&A8}OY&8tfa5eB2^UU{AEitXH24#O`riT)^whx)cDLF;`O zus|{h8;}lI!F?e9fNMsN%gSEc8SoAOlfWG@1r^KBpk(bD)A3eSb0mf82rUG7cpJVs z>~!#w*-%9}%WMP^0gHrl6>QDia+?#)zOn5RA*$OgH}jCC5=FrmhUm=&z4@h>(X5QEFQfgUojRfW-lo&qnJ7;W}d{$ zpMw?9l8CY_t(r4q^1M6A?an(Cqg@rH|mJ&x*wBeJQfK)ix+WLqE z#9;l`)=X2R$9<~MZ66)4G(U00-K%)i_HCb^>Tz-EfWfcGuT}>nP0OR8y@fxO~kb6X@Y@G7bW6EnFu^oIjOC z&pRW;)=o?9ZUC)zeIZQ_jA$*12&|_y`FS+WZ)w|w)ILwye+4-r#A>t&$u~$kAQ+u7 z^Wrl1#AUzp&c}NlIl6Kc);o$I%FAX)2*L~of%BTc4e3CnH zSv0w315DUDh)fWn+DMFnCz+j0<23GGxy|7#U>)%8DSI?XPrd6>6`Nu!9zwT(v(xvV z@EuTZpA;I``6cE8QxglMtFdn*4deFb>pFk`a%oTngUXk}A=k!nL%>Y-fZ+Mw`Py8=%Dsh+G3jYXW!8t0IQa-I}@8K${^A z;|gYpR%TO%L~`$O*_#AA!V>|WyaKp)%ooE}z6HPgFjW%Hn^)9^pw4&|TxdC}rV$FW+;(i6@bvBS&>?F;?TYoOFlLL%JRMEQq-VGPS1Je-+JDcBx}U$ z`<;tNsH1XJz)jUU+eF!Hl?YJ#KA200-rY>@;P7-p=Qnm(Re){K*6kJZ?`iMfe0fJv zCvl^j91z8((xmUMrVpX`m0Wd}p38Lk)TpuhHZ=1!e|qcO^O6POv@QjzlU@~rF@@f|lv`==}0q{~40URF<37hro6^#n4L_4hr4?7q!VLom~7ty_-K`hY3G z-Ew#jyH+gqG=WTC-@M*m{mQjVfhv)BD_-zA=)KPB$o%57r)_YXO}?_ans8b-IvN0> zn4}2;-h2aF6<()Sm7;Vh=T}}W*FnVD&UiCI*$NPS_Q^Qs6Gk_$&2uyk2XJ$e(+IKgjfdNfoSfOrl z3c7xH_|H3dsq$XX%i5Q=_hkFQMi2HggAR349b@-^}ukfH+x zTIq;z;j{|##Mw(sCb!0qiRlkY^b_}(vDJxu1;MtVsx;a+^C_l&o5|C}EQ({V>Ccr$ z-M#iNG{8p%Fw374-P5MJ_?yWdN)}LY@xNGv zy{r_L)5rT*w_fGAcL_j41Z1SK)(Zf#VR7}cYdoGQ{P<{TO{KNP{LzCTu^bR-BSk}+ zQ!_wi$UlfmeP(U4;RHbjugx)+_&!>p?X;NN_=8al6A^fDfLi&o3^=9Paku0dlqtUW zU=Bm*!M;(KKLCG-JCtW;(c02 z!)me+>zG8Lilt|N*FOj_boP$T8sp&^-I}KA&@(`$A9o<()$(oz&@vV9m+!3uQSg(nK(E{7G8jJ1yexdq9g*-QR|bFdmN)`R6>n z+1f?+5UkJNWx>jxg~4VNYzP4>R5eY!2lzUc0?oQX022j^t&|hCh9JFQJUJGG3|OBP z@7>vp9bYY~xK$UjL0>bjUlFgHAR0irn;0vRtyRGRzFh-0YfwwmJ2)a{qxvZx%iWo{EP1CzH1vMh!0uq<^FNr!gcP77OQ04lTWjp za1n*j&~Bt37o@4iBobbIu;*3IuS_giFRZ_8+E z)@{Ps4opHraMV2Bc{U(DV;w@4n)sDKiU27rW6_o+i_6ZA8I*`ZG~^n1a*=p6oI~tjIV2yi1^FEh+ZeTa4h?I zaG>j+j+t7)h+YvbT-te%lcYge0noEFdd?AU=^p1K)`d_OdobV3_zG({Xqb$`q*GOR z*?J0tq`Hfb&GGZAlR><%SU9d$r19eirTbppi=nw6i-UfweQAX)*wQ~JL#y>o^oAIq zx_OTF5`jUDXZD1A0jM_TAC^kNSieStd|*$f5+6e3@@u{$cD`5vH06c9O8BSXeyStQ zs|R~t$>l7&{TmhlLl8y@ON5g2t&VL7IQ6S)&U;bb^Kev<#%r{8H}+WkL^IF$XQyP$f&O?0!h)jlX{-ZR$ZFRQGrcP%Hzxi%4cjwV8Kd@Juh&ms+geRjLI13n-a0aqRrFTXth=3Fou>qX z&pW?4@nJ|Lx8baGCc^#&C^9$35dlxP%oPG+AR|BsRFLuIO)bGpL*GR4Fi9&J ze_?POE>3!fyASSvz#7x=HNgvk;<7Lkk((IiR4W|c)FyO#65C7~X8unzDa`_CC8Y@4 zp;Ljj7|s7MzdRsQC8y+BU)8e>6bMgLXC!t^*^A%Ol&u6yKe+OijS^fw>(9$0P#SZ~ zT(DUK?-UZ^BP}V=*(;cv$8?1qC$Qb`?&qo9+H)IXWE9lh(d91fD5;aS8klgFfr2K= zU5a~5Lh=9?uJACJUyVqBmR=+GT#h$(kv)1z*cxf{2_2p+Kghi4x>&$H-qt7~@a=H_OdOCqY0c&UNb1Ka(Z zD4MmIHOjh3+EB&>AY}R0M<=b3DcFQ$9u%z*lww>$@^*)dk3@FYvu>*s`6nHJHn}Az z{dvwK(4b_{*ANqS(M8~t6T<_E5MayU>(?@TTeUCv*k8#1Zg7jy!uIm(PT}=7IwE!6 z`PWq8Di2jPt2$rf?{osgR!29%EWi(>-@)9L;h?bjd2CfWfrl($kNR#j4-8$gf69g4 z>8Rd?VC*d$-#ukk10Ci`KiBz9X&*^L^Op4TZ0vVeE+{FE+=jHv+Mn5_AlC)8IG(z8 zY>GQTz*a-pE45cy7I?0Z_yn=rxA%|*dBNbq`ag&}>JlaBUSs@Zs|Man04n#wG`Y7RF4TYp8P%|0d4gdS07HpL-Z5rQN=7 zmLw<@NDY8?81S0iUV+&;E&;nxK#ID|4RaI7h7v-25;mC8okplqbxA&|KOAxK&U^); z{{Pyb_p+c-L4DO`r@m~2=*xq!ze)cH$(;`vPxEl!m4tP64FCcJ^eIMDQl%!)6Jj0l zw&CtzdLmS)+sGdBFRP1i_`LDm8zhB{`%eKOnn8s6A?#bnK+%*oflPRh-p#PL->;1c z5C>YuHxRwKjrSi-=_;(V4-LbZmVk`ZLBG>?y06s&uwXmiGu-z_9k3R%1Kj*BE%z>| zfjUs6WX3#Cj#55+Mrj}4;z+#5g6><-EVmc z>dVp8cA7oiH^wM$A3Ocxk<*q&-l=FP)`qI8PwQ=1nyA0$UCG}#d;LhS&cJHi*0Me^ zZXPI-Cz0<5B_mbW0z0pjRYyo~Tx-VecEuTDvjiLQ-Zd zb#(J3vvueRWqMvWLw!Q42r4>r-ew#!D%Dz!rlbs-#%5Vkdfy_v2~_-^*Kmq4^vBFUl<;oa2@lyk}KS^`a?59a8pZwmfbA zaQr$Wh+4NZJu|NzXlOAz{o~xc6~!O_`|_e@-{S46Cc2Z!ah~W^9MQ*6#`-_F*5_!+ z;ck^bF2hHBkJFdLm4VhTPyUW2gjNy3^4A}C_+4F@C)j4zS)mSY;5H$bJf;Bfj#M@| zs7b8Z+d8A$DgSw54=mfcEdpoQ@~V1$<<>(Y-h7GdaQVJ)`EOW^rorF|*-IiH{C}l6 zaF9q`;=Q?qUH9q=X*XiC?A`_RlRm+h4jBxWpNCzdH(}x`*tuzXr(oXr-v+-m`=x>n zdk2(Q_#H$o28unr)vxfYfDRE?yZDow0201XF?yaEbL+&>6!aHT>PB`Hva6$pbU;`C zm;H9aRsskx0Z%^?&^myod1li6F~@IQDANLD$x}UHP93l?qQ3@_0U5jWK=x8!6ui%i z=Z`B9W$9Q0aPI?8P{tnZr^`7{bh7e>UBi}sJ9oUcKpj7?$gNUnu=gIg*M$l^aBJ7D7Jv_)YVxrUQ z_8;f8EpDz-J|8+8_s2ATh>jM$^2aQGW-H_*1-gTINuTa;{uf`v(wDWk_yEt|`R?&2 z<58c`?RXD^k9Re^mp4w%`aI*w#k-<>a>UyiC*S72)K&HJNe4kSQMV%3{jMX6*$jyG zEh^ZqYh9y^!VuQd3n>3C9=^aA6D#~^N4+mNOBnPn>{jX+@<4tyK3?DRxHi5e0(K0u zoDhCptaTbFf6zjJDX_-Y%L}EF61le1k^@Bs?2z{ASz-d8$+IMZJ*#8?(bO#K`Cp)i z80%@)p7effnzZmAQ`DWG+P??GK4TK}@IKP)k=z|Jum&+Q^O8oB@_rkAm5}<@HXNIs z%h*;0Hf;n0%h8W+1%QApe9!TqRFiuyQUS{+LTTLe5(fdc3dm2jjjaW&Uztcz#J?6p zpX-J#)WaK*$vG35F*p3akO;bVk|Pg8HS{v4+>mM*1>dyEBaB9!6R9g|5MT&2;g2JE zZIowTT@}fD_Fq0pgqiTcmP0G>Y1-WI-V>ISYt$#s9nyzSzW)=Z@Jhvr9C#SwPr>4f zO*RQMj4XH4U66nJed#-I9>%NXKi8$k%?+Q+h7x={L^pCkKQKhOeQTld09{i9r2!U4 zPdPb#yF%xO0O;B+OpJR&d1*kcf8z(|n(~F(rJ14v)&Aj<_G+{`lJl5a*acBP1e%|h zrvr;gOhhbjhPU1gX-PZS+i~M3CLzhKuI7g*1C4$$-rSpG65Ro{Xt1o9fmSyECG zY~`x}e5tIgtQI-Gn3XZo(n^5icc*M@I5rJQ%gU;{yI0fX=Hz@uX*&R;E$|cY5QP5z zoZ4L!`aY2EOSKC@_u1LAXMI~ANk}mA@~T5sm6TF37!08?ezT&dXKhB&5#`j>)D036 zMhFulV?^J=GRGI&DrNxzEvTx20SxdB;7HQa%9@(7G`Ypaij?8m&9{P@Q-A(+hXB^1 z0(>`Mzs1TbM;szyTcsl*Apwr{_%U;qzQ*+);7T)D({>zCgD~k?Tn_(K%!B5;8`w#~#H*ehd zYH%qBzz(b4@9Hq}REy>{`6%aq{vQ~CIbdg67yr{=-{l< zqPvgC*cvrK-Yd6435}iiLFKUea?EvJ9G}E;K=W&;~$KUvn=hB&}t)`O!af? zQY@@-@rtUH@@M~3P#Vz5p_?dcWA(Kok|u!f80o<)JkKmxGM(fVb@c6C>6aGEWeo!? zo6t0kHF?VPplDv4?KlX_wH&9q=j&}Bj0)aLH6be$^rLzic9R3M|sW!eCRgko6+$5(;SmRN?_cKd_>=dtofq z*!7s)Yzprd~t)MvvBt`NtR`ND%)QO^9pTR$iLgZ=oNPqZ`v{U6dq~Q z{d&v-@bebTlU38+7;yZg z6;Sy2qXgr(R-tDb?h6{y)cC5oxkV&3hF~2Pg?rOl|UOpIA}wd>!Pyh59-(;S}I-&NR|iQ zN;$5rcbgmTF@dUMqJH|X3QKLIX|Jm*PJ;9aUXTX*C7BmtA;&JyTz{m0I!p^TY#Mgv3f zyKi9P2K@2K-Q4}UY2Q4=tg8ckjz}yq16b05=H)6a=k~8Rz{5)OcU<@z|J4@YxW{r5 zbnd$YE*Xnhpn4VlzQCmY?SIDlNhvc*X_y3KQ}zt7#i{9?aI zza8I=MGaU%6b*R%QQrjj(d0vr{;f9JKdSSAeeW7jNw$4wD0TfD9TTUZY`$ZMnl2&- zP^0k&o72%(hz8R!Vcg|pxPWy656B|O?ZEwZc`rp1I(|^=`5&=+2?=}a!~0;*Gmbr$ zbbqJ?ZxFt>qIX=y1}bWBXgtYbheIrfF;1%Ny3%a>

FC;x3^}jj&0FIG zLt(|6&sNK{!NZ~c$rWD)TgJC{b!o`S$>scVZ18WEcXloZylL0MssspON9e+Ed6w$84*Zn3;iBZB~3dshX-P12r{uKX5uHJO#43El$=461w2| zGrheJb$=DOUnU`JH6A^B$zGmQR0N79{-S|r_wiCLbIPdzh!~m?8ykB?<5xHl6!Y#V zSye-WUS0HW|6hwE=1LWOW$)(d+S=NmuQ7?IvmQ8opGq9m(R`%fM_jMW9I$8Ktq(s+ z{tWWmd7VF##-1m?9O(!r9KAr=eyiJ>4m@4JY_{np_Ne>Vu5R+Ex<-A|qup?j@yrwz zz2l*ioIJoVuJ3P}y6`{L19k+bG(f^%Ms>iOGPJ)~mCeMOgp>pC{y=qDMu~Snd$H1{ z4>(K@P8G1&)K6Vpsos=d@mza={Tusl*3S0cFktr&dF7E9Tekt^2;9M3_w4G|B|!;bq|7`hqv%O~PEPkq z6|(5?V2_!FMHUoN`c)Lr6v#R^SW(lVr>U7)yIENq2R_1mIu!cXj$4`a5F2>+snh4S z<^>-^M@O~MOG65+0B{keRLJSDG(X(m_1x-t%;SS`M(%v!z-QI1F79&zS;7_&W*4zv zsic)MKQ{Ij;Hh&BntW?9i}@86J!xzPPF+bNwbNlj+}=O!1;`1(BUmjaK0J%(G!7gr z(CSOyDYJp|BAmKJ;)Oj*m6erEs^Gw}H(M!P5+rg(ePnj^zmlXM-;+e(JL5W(AcJvm zaKOI!a4TCZ7f{*=0i^F(Y+)CqPyuOK)6oH{c0MhnA7&sX0sNmP?HPoGbbh>d z3ZwMfcUT##=|k20?fV#h7$_Id4G9<7js=4z_`Vf&1&`c^5a|cn>t4RGj2}f$ycpfg z7K7+sM}r{0z2?Z5;Ab-xVZs+AmG|1Sq6YB1wxf@Uf-ZSrtW@>&iw5#F zmhAlY62?83SmlyMMfx)-L0i`>al_tf9Vy2Xf3~K5N9>q?esvXEpIq0$-lQuN!#0qk zlQ;sxAu@nqPK@yCMVHD21Ahwq9A|fXlga^-JR7h$}!K9 zPh8yVR8?2E8pu<_{G1L)ueVYybqb;S?{x`ZpSsvXlHCW28(wm5NS`X?G?nkq)b{pv z&zijS4tC%5OJG)utbI7`8-BFYbTj~t00ZXGz~VZTj88t2UO^os>=!RyEVt;n7>#bei6apF`FspT_a_?P7kXjxj6N9e!&%jH~-8Tq4 zL=U@=S+sJ?*&Hk=sIB$N1UW*nHe>PID}BX&2h20iBn%Bx&A!E^f+>ptc3qw`5sz5(hhFlefLl*F@2S6`GHkLU7c{NZI$)hw>az);5_Egy1Si_m1_=qn6P8H+H~Zb z=-dm5=e5zUbFu@|7Ia=Lz0hy*_>N>O#kqWsM9Pa-R`~PTr^LQ-V^>6edMaq~5#mfL+_ z*LfZ1aU9>_IPagIjcq2|&#H9pvi+{Q!?g4ypUcGE&aUC&k4zv}hQt zIGgtTa(nBbb-U9@$?&;mjYiyrjQx<_rEi7WdiR z=5}G<0q*5uCCMf}6>eiBOFw?}NG&ol5-(4k@6d`CH}l|i`jyM7_a#3taCH!;gbMO% z!p^N-e@8mSFo%+tm&d%%tJlO){oV@ZjROvE?kX>|w+vq=ue?pq@|(m#z0%T4oTqO8 z=e?{?(b}?D=kY`TJTE+`S_}l6fE^&_x^hmt=8H3 zdruh|Jima^ zxedn-A8xvhFF;}9-j#-_qxRCp-Z!BxvNMX)UCS_bb2+Y!2QSpane>Y@^=>OBiWp)d zrOs;%Isbii`27;^N69QPOwKrtzY%gNla0ew`-*xfVVs5}@;tMu6+2EmO|BYKaQb`U z_qP{Ok{14IL0lXjjKR+2F>je!m~zMT3~t7rJhnd{_D+t!6?0$^*)mdQj2vu&1Uvcu z;;$^TD(^u=RrnTM`IiySf41>HY~#^%;5{eDLgvv}b>5XXSptwR4s|Roo8Mo!xb*k; z-tGQ9OUtI`227&-nO6P1<8fwv^&Ot18rl}8<_prhq^&eB=0O*G;9BoZED`}^5%t(mwWuz?O?oi?HaN``TO@wZi_QQimvl%pPqjTnv=(#czsq< zGkP~$wrN>S`K@KzX+DDp^iPFDwGzCL>OBJ1`wang~nJN4< zF8eK*XYZqsopE0_CrRXroqEY&YHGT6htb+|=g#F66sV_Ygo|J8J?JGnCKdHeKv9wR z*}3-~K0fppFI_S_cg|ZUMWZ{y)O8=erl4_le0FYThJWWy=CZ|EQUCQ~%(}X|yZ7v= z`0{1_u{cG2r;TxnF8+5nt0$^>G3Gpf?wyjdRpb8ll}buVo}Qi*g$zay; z%(!2sy~Js~g5!j8L*fB5D=S7(QPH(KPjVs;D4#wZuVz1YuQ!M@m`};T@UoTFW7`dP z?%d(Z6!HwIInKBH%BFAMzDd5*%{jd<(}`6-(}<%ybw(Y}D@ng2&~C7Cxtf}qVUb;W z2e*o_wO*PQ4IaSm8>8`!GlSr@Tk)7>loJxQ}{-N z-Me>}zkRz3X-daPiJzbREiRZ-!7)$AQA2}q=u-xLq$a-IWA41G5Y2UZKzim$(p5aZL-)C>_i#+!GkZC&cIID;3ZJNpavI{c2e=`6isYkMm)a>LcW zIC#lE z8!km7=y_2yJ(ZZ4C~h~PMHUV1-j(auYvT9ao{*H+bqqRx>(;G)tL^JM%75 z3N7dIjT<+fzH}+badN2pl^=U!(cILCdX+bm+4=MISbJ%!W`>BJCOXCu#s!xvIy+6} zPW}D+_d-ifLR|~BcH|GQf}>}wf>WYaf{OUAUEXnVe6-7!@rSpYuML4g zL1vdPXN|ia2>=YW+S`!lVy6t^^ z2j(hj7zG6G$s0f2(!NJTT7WKghsonO#=+NLziyzeEX>;H+gwP;gIRX$*s+OUU)gJe z`P65|`x;7JoWi!BSaUr4R5f~_wV#!)bWyIegF+`u967vJttW*>E7M5kTd50ol6tV( z?G;QRAtCVvE>82)5|8!LJu;085{>e$8e3*pt=q2LQxkMIEG#+fJlgPqug`>3_V3rm zUrS&9Mxs!yy~y4^_J%Z~C0s&H_eROu+yO*=_dsG9qSDG|lfR$eJ_Jzi z**9y#b{Mb2f8J)L%{ZQIAjf-E{m#aHSSE>UKQv!DPBNk>sG?mwZEYQ3|MT-98`h%2 z!uSIL>wNFrscFr(*1xPIEX;tfC_#AcL*|ff?L@EA4ZL&3!QuE&ZeyDE=C#`mSqm<= zSE6X>{pc*P;X768>?flCI3Br$Qml@7_>dbxK{m6Ejg7QT$HwK1Ys<%aY9$)`!*RF4tAlVs?N^xBs(JGSmtHo9yFLPW6_)T59_0^ zikT?h6|^(zHKv6`j%Nj)uaC85xW6%3J(w49u5U$kU{80Hxv`j`=0@@@|V8gScc=fC6mir)|mJ2Gd|zc!l36@4_vF zbE0iejj&cc#muJ|h=jm@vU-)A)8AjWGBQLBvQDlB{*6DEN=0Y5)K z0ibhFFE2mz`6c&b=DTn2wmdK7aZ7V4IeAia25eEW{mGaaSWJjE?2dG$w zn_$a6RiYtlQ+$1xj_Mv75&=iBw(;J*d#~>B>|J{wK-{k>^_XZlzRhPVm+a;3l<#0u zs%XmkohJ<`tyb^s>}+ZiqL|}8JslkbMe9EL=MOudl51EaGT@fs5j@hh>U%q06Mr|&@hJc`;nnr-X|C(^oH^2J zPSU!Q%2jE*R>e0$%+wzn?VuZ=5dQ}g|>M8o~y}q z6S?NKj8r1Zl}JMj5^UJny3>JDPXX#^l)+>9 z;@gs9hPJ<2TC8ub4#p3)3EM5jV*Ej4?|swKQ(9Ufbrbyiy&0%8^LQ zw(UJtviS1GL;Ejk{A+uJ&~iO^_%Psmb%5v-U927I)2kAvspLN?s5v?(Dk>_zca%73 zytQdB+KLW9nDRxA6-mKenoz80Fuo&MsW9;W{oO8lRx&atHD5I1Z8L7Lr|LiC!X8rCq zMH3h2Bk2@vAsIQ1zJa6Bn}u&D<%?9k66M}@dem&&4v6SQKmHDVI?Uf0uwE?LdM<36 zt}42%uLY$$qond$qR4VNK1oXys_#3vY}i>J-T# zO(FDlS{RWi;E9%QK%^HdoWf1 zZfSgW(s~{tBQ7IT4X`b|BZ4Kr@#W2HDdX?nzt=9fY!dV{)n=r#tQNl{`&Qbrkq#Vy zo>8h^y3THwS!v|RN7AZCzdp6lE^~DWzm}D@neX14j*e5^&YL4bLN*0`iZK{&&2K=$ zuzQQI7u%<*N(WZQ9dANW-po9YZQJws-P&YI%#J-3&EBRy#=mdQY`>0FTa%X8XQ|*S zWx*!7IInaz=3wjiFGam|;j8d&RcK;tf3-+7XB=lKu<5Kx*G<*Jr=wbH4sj&c^@ldS zMaE<7sSQ@|EOp7Xc)Lz7!{7*i$&rVHoo=Yl674)DVNhD`wkGOE-c`t9p6!&&LuchZIR}=?m^)mxXu8ii^8vXNw6Q ziw!I?k?tO`yrN_la|{n&nfwVZ^H;82t3u&G>YuTzm9KxYUhGuYc;8!baq-s^bPeMR zR-L6Kb#H>1RPS4F9uG;)&0~Z&8!5Ux6@%Qphe^>9^ zxznolOGJI&ma;pvtudWCy1FR|S0n=fZ$>$`x&5!x}d zR%Mg4^jfKN@5;TIxUZm!zXSa@G`T4*ARfdASA^TtZxg~g$X@=YReZEHw^!=0B42HP zuZ?4B%*Qzty2j2S!BB$&d4;KwPWn;H`GvrmF1Ll*{Lg*XJ?I%J#rF#Fzu%TT>YbAT zA`0YFQf#-^ACA}+#}wbTWc($|M8a|6!o{}dO!aX}>ZQ&z$%mfO4D;$xAwL=pW~V9# z^D0~c=k2n1(yh-g1)ve?niycT1q|jY1FS80OmcG`D5-|7T$bmY2lD0TU8@WIN@W}e%I16hmqLU zrJFDE`kgL)ut9V+E?P_09wC%mz?wmmqyP?4Zvw}5@8$$E7~xpxg@{0N0K`{T`Iffm zb3ckZ`YP8$8Mwm8%TFjp6Pu&ayKkXpfWGTSaYH&eHYL?qYir=J@cijh6})$yowcB-sK>(EdXeN-VIr5Lwf|%21z=Q7`2Gllp253~Fj`a?A+%2iy~@N?J)==+w|M#mVt^15yNAG_wk?=i>_oz%>Idyny7s^VG|312hvNbsD7Yt26r)5~?ssKvU9hu1tdoPkJ@eQgJV$~gJ7qZiO!eZ>&wF?xJ z+L<$n9YRG%Y@X_xoSg_!6>`5M#^nmV+Zurb)8E~4~Od)uhyGK;wT zkLSDs6@37NHz7jNZHIA*_A&72rLZ@tE_ujodsDOoF zd?t3~2tpeopt>8ipyq*udCutyTB>#GF}3Nv-`$Rf4FEx8!E*qApBUvcn%CY5=e<8S zJNwwhx99iooh260(b0jZNbx`xhItp*oSmJe9Y4mMKzbW!%@^HusdbBtUh>rSoR_Ib z9TkrBE}3gqkA}AeaFhz<2P;;-9-a_Juoa*)elcQ!~(R;p4g24SC+H zH+TTFo~iIyj`Y0dbC$^}M3-ckyo>>9AbTl@ei8b9B^w^SXmq zCMnVGvHCfoFDpWRKa9RQ?sLzwj7}EUO653=oJx`K)xYcZ6~SCqw}+W1%i1nwYjKbr z##)zA$v*4jjLTe0KKJdi{=AyXv1}4-7flrc#wJ%!I-uOo_FFr%QNpb!l)_GoFUiiZ z9*w$s{W>AKM}K{l(tIFJgT9j#i^k-{sF1QrzIE&F|8^YriO4X>L@Vquu@v@q$Bzks z%!=Atb;sMFhu<1~(Km(@f1smeFCOP68NIX3=4y*w14x{VRpzgW?Va7}(U*a{_W?W* z$`l2*JVa1xfeVjGbdpv_=22m^GiNZ~N>pPd znf`XPG&8$}02KhmjG|UUB6`CHKhUWOXW!go{5fP4hvryCFti(>Ld5h3g24ZjxBI=H zpEA}%F6lzdUZowXcM0lw!ZgFGKJ-K2US|eMzDj?MKk3P_epPsTap=wnri4pv&o%aI zSPCS~xr3@Iuc=vq9_e_L_{lhc$mALZdxiPZzMrgYY?gO|IDmO6acNnfP;YjtbFb3X zSvb#q3|cr0f|kwM#f3w#I*eWAH1J+y+W_dFdyRzl#xvOrVGn8NK9_Va4sFbZL1}OV zaIo^@M^^M^p@Mp0Z6!{!1kEb^4!?{&KubN4wc$l;oD{MPJUsXkgP)Rf6N9%=&CLJ& zc#lP0dq!V#VDWqPs!YDhpCX$!QIt3!V>rv%s>;eMc**Lwme41@wYS^;{4Cg;u@q(M!Ktfh<*vmwG#L^xmydgoTC>i>*IHGFhtN$i_<+UlrnI){;mfW zsvX7M=;j8qj^kS#!+^=b*HpuK!-_`(sC<|`@DV&x0^y-Ur9hP+W~tFWANs>bj(7p( zVhK`h_b(q!uc&KmTr)N{Hqc&NP&s-Kkb}T@)FwLYs7yzW$=~hVii(N{4<2++O%(#G zoj&d+j3SfGvfzHK=k~I45Hrb#dYd$yvhHPP_|H4H##cl*T{wE;JX!RUJEDYjAFXyj z#$+=7Chyjq`%e&03g~CIC8~7w_8Vc>yJPa-n3C{m;5Do&OFNb!8 zzVLyJ^*Z-sK{HSoSykc@A0qYn$l<#kC(QvC4Y!-clzJCF$)%G}(%%$8v}1MpD;`L{ZuwApmLtavDE zBYoL(iL3U6E@)5hpdhB5CPkiLp-j4404~ndg>9ok()kYt1qG4!*dio!_*%(8WAU2T z(~9K{4Q$vW-nVYi5O5B-EBfkXG%GEd^2T&#EHo#Ot0ybS+q+-cmu&zlPfD8?z{ouD3uPh zu0%08{~>yx-4m*sKwv>-&A@F}W-lyjXQQ*X{PG2Ui_jo&pi*9jb$kHkBkHZxtU{`sb&_AKj3A|Az}jK2-ii3v2b&9TRv?=wUxf`k%CdL zvGC5){2!LZnSQ=Oo0Vd;EAC!9p;{=My=nX$-PYF;Nf#D}Vy@@0;Mpz33;f z`ZdTjUHCLC(q?pkzkmNmqh5nXZFFK#mn;JWPGN&54{GK%g}%wT+%C6PSd048U+;sB z3to@W!NCFVNRJ(GW4w1Ixdo`1oSAg32p1KB4R)Tsw@ z-a;^^ZSO&WDRi6^K?KBYWlz@q+XTfNAfE%Ru`1{=?d($uhDJu;Q7>*lm4-zn6(kQ4lnPJrq7pnTU|Ni;hW85MN~-3$m;`X3)+0uXE!9UZ)Y z@Rewu#o^i^qyaYNetaBd@pQqiSe3Bfr@7-9993M{82hGvw~wLY+XPV#ijvx0fdi*; zudiPpBm@OC5Aa?G9WDnDQv#Rc5r$j9O{9R5k6gWawYb}&bGVrFKib{`Y0udXy$e|H z^QX}EM)Hq}Si$JppxZCj^^CZ!gp)`W?ho7E*KCBwM_YDdW_)X4U|@)no2wmkL?DX@ z@9slyKYU;&Obx6$B&bf7I&UC-PN)zIDT=rU;U`O9Ox+Sd#*$_k@YDBggjf<7g2w5# z6Ya1T9lpQ)7WDIVVy6P(fGU6YZZ)A{6AuI=dvjl^y|I%J<)=#gw;sNKdhYKq39qAZ zicCmYMDpIO7O0GjTvpzZw(HzGPO^4^ zi7f=~ceq6eclm30_%7~g;^!9@>_89r%}xzL=`$x(A;3iqh|F9tAA?JIpkxOX-(C1t zC;|0e*Ze)B*g zAa_cVF*Y>{R;Dytjv{d^;D5rCvkiS{vX4L7T4(mU@s*UyXKGNmzg04uPLd z=vZts5+0(b7Thu0x^>>@>QA@kSyJwR%t>GoNSGXrmG=Xfta|l|j@;nF>}2=waOUV0 z&rRy!#V&H`_K^rVk7juzEMP*eiIfav((@VI`to z4YUhlvna1h^K-qalq;KB8y>e7)ALzO296HV6& zPgY=9W!#Cxl`iFZu-JpUe!wZE+-)NzB}Kv*&Z=_4ybcXLmgM`Ve6vJObKRB^Xq=<;X8YsZYLMf`P%#G6 zy~A`beto(eoplu&)Y1%ZMuEg%7b`tiqMLJDT8#Yd*Wc4~8+gKOurUS6D<#${P3a{w z8=H%1%d_v?RR;SKN7}x)9u`)V-&lWtdqqIG@|M&M1_wR>EFJ}o;?`MuLUDpUy}!TT z2amM^{t8$cl;IeVcJN6^;Kyx0x-NONP7%5V1e_-SZ0_mmux)r4|3#AL$q}=&uWOVA8aqHP zNWf)G@+^2OLcodWrG=3xnezBk&_^43Kdv zDaqX6_-$X`ye;f1zE85~ERS}0NdUM9SgWWK%4j99!M=+Zx7XAE>rSU9-n{}`gW&<~ z+lQecR_%4)@r>F3^^65H4l~>e3tKy?cl4i^cS_-s;r3)+P*tU-S5F#Oo5kZ&&0y+O zQi#Tgc~tQa^)GSp10!uRr%ism>gRgmvFR`!#iJyuky9>imC6+IpPwp zUO3(L4Las+&9-;@;4rh4`TmlNKb>l77!__lGpE0tcWKprQBgg)>#INOKc=XAKvLl= zm{=W#xfmDcrB0sv@UHp#{~jmFv5IjCyd{fkstim$y!`kLCNl0wF26=Yb<}K4oNByQ zI47{K{pY&s`i>N-lFSjFxzvF-Nu6%Us$2?2R%sMvuC5+tn8!ek+EvhQseN_!(851Y z^Phhl+`B~AzKog}-7YAjN4dwOtgOk~v5IP89cRrQJ9I*)EZQ@O!!cxZV%wSeIC`pg zW=uV0;$*%x*@M2GbgIMcbNLzS|6LIy#|?kjWO>~jXI@5HyQWNt92o9CYwi82S1dcax$v@Hl&p+x)FDtmvQ?D(|0zLk& z6Zx`jZmDF)Wd*0HPeb9lxw$GVifXD9^)5Q*Q3sz+{AOzh);)@A>=4>wt$RS{mV)TO!;q*bk(jf- z!u_W&viF?~v`LKi%RLfyO`_EAs$k77m%}a3R9FI^WYSRSwzgJZ4&7YhrA}Wk4>h|P zl{e&K)0-JDze{z`D)WcJ{b!un`!oYDB}NB~9SM_?crI02Ingw*Ys_!R>4@|65-*3r zBj@JW6TeO*&_6ohPr3ouC)Z;TJ$4Q#Up&Xb;PYolz8da=6GG z3vXClL`&V2CBre-Gw$1KLVT zMW41Ynr~)4oy;Q1@A-O214P~b{4`7j98vu5o2dSOsvG}({r|D7{J;O&v>7d=1fUd9 zB=N99$oBvN3xk$kRvelcw1b521xMnIu>z=(-2(%@0J~;@f)MH2Pn|ru5n}5KSy@>! zxd3(n-RbDxzcuIpN}DgQadCi63(728;Kn^`0OWJ>^TWSqf(R#OiaKYMWSF%MPxYe* zAG|y_J=QbOoK9Tx3}jM3PA-v{MZmY=O-f(=UgL6netupuunE>ukzT*Rz*lHWC2owK zZpOEALo0xA2%EHqgE6t7MDMxw5pE(v4Zto3lN~{3#JmDQ+UV4q-U1u$ukvufumlaZ3XydWEW2t>4KDln6Q2KMBz zBgQXaRe%-hik+QjZmtAoA&95)-9uRxR8lUR&;h61>%&HNKvG1KMz?AF~?gaF7 z8&2uw2aN!toWGGR2B$lW4_?mDw<~cm4Ou38|Lwfbfyzo|F8~c+x-J-H*Ls$kO5W`V zHBHpylCu*AkC2h)4AUavsT#*ITuC%z>a>jw3wbu6$pQX|&`6cTLI(Kfg)39(z@0T` z8m|W?Chre-gc*$6_+z3|QUr;GPx1>46AKu6S6T|{Zvq}kPF|iWh^Z{oGG=O^DV62^ z_7k*`nu2`~ciRdOSON()0fPr3Z)0~|;sRrzns<`n7FZ55a7RRQg@8*YSpF@0yaI#E z%FVqBy7|J0c!R?$uo*;2KquF%7)V+?#9d06!>^WGpvputzvP(h#}ye4KS!YWV(tOn z$JOw_2pi^XB35zuZhIh66JiN`Rrjx7(K9@oXPGZJjqI2Lem9#Q?M9^hL+?QWLI{#} zS6Kh?T0pgxXn?%rX9GLIl$gWm!>R(a2rCBJT1VuynIHWuk&QQYcH0SSDT2B1`Y;#p z5S;8df2huWulT0F zmk!kKU=$Z*Fc2R-ExG1IT~Sdq`925po7SAiX;6HhvrSnj!xBds;(ODo1*wO)WB>;P zDvT3T8pJ*Z|1L8j(BJ|6qnh4RG;|Fog^ zE7b*XXF~#6k6DT18btqvA#yGC>al*t+D2MhS~?12NAZ)pgm4ToNX|j`T+xX-p2c?QpoSK_}?5gPLs}c9r&_Aoe0@6_pIc8OMe`IixaJ8L$RZ}Cf zJ=oWm6#$E!@=Q%l-C4YE!ldzUhcN>`c)Km|(~?Voo*90>wz}F6?iKipUFQC3%uH~U zy}iGKoqC1&6*|i7!iBVIUUnBgyOOETGNnqgkRrTx+uPb|zvw`e(NT9rkMFca5A2nP zH}j)i&InI1uzl38T@4OB!225Yxt^@wK~WOVBdt5~R;Wr;>G!>v2muX=`R*!uuRMr1 z`v_xCUdH&=w8U{M_%-;T*KE}gR(QL)aUHklK*l{K>d?q4T}PPmp8Wlh<~%b_>yFu> z(7Kn{a%(7@Py5Wibr+ju7Lb$(pWaZbq3zGlJII3|%2-FKOGcUhn!9UExI`7#b8@C> zn;&q5SA(Bopoq^{E@JT;->X2%T1F_Ww9%Hz6F-$F9o+b>guw04{)8 zdxd*0k``z^T_$e$H&`oi}nU>Scb2+k5q&jMWdW&^+_Y3WbO_H~ zQnNVQTpjK!DOO_msT%Qr7os#+sH)GO4a6+gX#W*d3Z`cFMzG#Zgiqh{$sC{Cy!N&d zSlh`o6-Agdu8537^Njo?b#4(91V6_iF5Y!qSHL8_h}?wR{4^Vs6yj-xy-@NAeOV~` zV4U+#Aru+QuMvf=O0M&UT&4L~V53jDrz<>kPW~y4kqmI~oH+8rX=*2xwz0kxGCSoC z7M&duIHM(E;sDIz(li^`TuuyDi%=wClJOsyCjOCGj0>1y%dvX97ZzJ89>zj>9Vt%!oP2W#mB!TMph9Y;4HD0T>LUb+3H(M9g;K6sE$U0TDA>zQtp_ z3HM9*o~V(wZGZlOm{GH%-OG?9=_%mw5}jG_7FQtKZ3Go$b>RN79G9{02gslj7*chV z%hKzg{7OZM+=7x#lXgd+YJq-8?9w^g;ld!=VUfc~R*pO}E2{Sj3T+D!Z$S2jSIU7J zEr%J4P+SVb%h@tvLAN{sQ&`TKYDQ@E69G0$$deQWWAO3={%dExw{4!~1$jkLWGeB} zrOc}_=xw$=$?yyOK84UCVk8-Q0WD^A!Wu}ly~N>M&)zLt=)jVy!RkpU%%Gu0*coAu zScg=50CignCEe0v7}bi5r;$+|d3pJ~uLlxGNWRSlWt{xzkp}z4+L{_NKQj$SV#?%% z%fdR9gZ*rmob;)3q>a1x?s;H~CxB@*>KD=oASgoObUC@-O(1nSrOpNF6oH?|(geX7 zy9vXS5ERNmFxg^7|2;Ga^5(Ti6uXNObkPP#g~V2i8zj7*N)eba*QEsqkmv#;B8-Su zVv2*#!G_T`7z@#<81Ns{(_8DwO1Z-gQSs)@N|66IK&m22iC^cX+Y&Db+G<2YX=~}4 zK0Vxf<+m5tY~lMP1xC!Y^c8Mc8yXs!0798oxpHN6eP$INZLV#P8uG;%6naxTW7STn zo7rk8DEU7PPvDFjEPbkWxY}2V zOpyP$37serIfzpV3=Pz^We_JedAdhLtjEWt>fcoD48?RZv9}QG4l}R3FUA@OF^Rdx zl*)jD*-}>+0D92Z#ID<=9;@Q@qc%8-|E8)p1 zD)w2%KmbaD5<*P;!~u$((*r)2(Cs9I;C&+Fd4tR{`&A>Ga16!9Jy+>4m;_&6Kx2>w zoIs!Lei6kfpq))`7==_AnAFMfP|0bpoT^z$jh>pzrh>#2q;{P#K;+j z{T8a9A5fi5IW+$vQi>jw^((Nr2w$&klLLtxwn&Sqc0S+(@((y79gM z%~&BD)({U5Q{0HUR)r7wXMfk$F8eq#xmQqd1r(xX@$IZIfVX>K4l;w4zer)T=(%pl~zzVVo6^Bkmz-F5@4RNBQiGMK0h4rXybX433? zu*`LF<~%_tm`=pd@}`p}Cio6DOix4=j-WYgyoz=_{T3Om68TdXA+&X??bAz%V#32) z3N3r|_|U5nbOk$#85ZH!wgVF!ulqw$;|$v73_HXlJHAOo)r>&%9UbdYC#Dq~DU*ij zOo;7W@|~`R`&RK_8rs&(&H3f>87O?Ov-3ZLtXI=4)%fSig`fk$4y}P zi6@;5rV^P3<$GDA=A*C7Y!>dzJawMuL;PI5K~mK!4XV0Vt34lP?-sJMJ{c;On&Q|W zS5b|Lm^p|q;+T9PDgoLg&xa2m9_@oCa}yp06(+bc!zf<_=FRNqz)4qUTB22x-H79M zWJvH&Q#JIlk9n55Xj{)fn}Si;zoSl2U=n+KIgZO91s%Q<7)MeY<;LkZ>p#}hKHvoBoVX5wf73VJ%rvfF{3mSt#YFxNMRc?G-SRt|(FSKsEw zwM!3$jg8>6!6t|2?}n38YO!mp-?JuWzR=IO3yOjn@{{QSztU^yHZZOs6@vblY&hT` zxd`wBJIUQhMrsZoJ`P!nqF}8)fEh2HQ#b1$!>3@;@;=R5yfYy-EEHz;@M zZ^q}cJFj9XYXLMczK7L`Gc!T!OON4iG8#bY9|}Q>);`DDCE>d308!pB!Cd&JMMJ?M z(-_o>V@^B?nn}(+@oWXR>}9&qvaD3quszV>tK!@iR~j1|2XTqODC7w+*oAzkVTtiQ zWlW7y6s!)nLPJAU9Fb&U;q@S99H>2vOvEBfqzCHs>k$6P_8o~Mt$lAJi2aJdv+c)@ z9H|DJmH-Dp#hrQI(z3g$tr)?N0nxo`Bkg;R0g6Z++U=V^(JZ z**Z2h_QszdOz+;k>w@h8=$!PlaB@=^z3GSSz2`o!wjju;RY-R$sKOU75|GG6zFf1j z3lq#Q!yO^7nORsYKYsi;p@N93X4R@HBm)`>Z%v%LbqUcBG3Q-9Qwfw(v6UVc8LFL>g+ynyj z-`qs{#JarFo+-3m#Hj)MVK|qjU#UaUH>bCQYdVMM=;^DPn%FVaBI0v3ASmbnPF=t# z(M`%7#1~E%@kj9|_C}1C?SuBtx^^x8g8mXSjrN_b!J=r=3X6&o0e%kRWDV@`D}#+& zar=pyk&;9T#F#9{_&LGSPz+2$cqh~i_pXb|$WTq0=g}ata&a*NPR(Qb+KO8Q3Yr(H z{m<*-zLYYn3!-`-t+KZgcuu3t?BAwSAY9JCj+zLlhVM|+{z_=fz0cAVH^xJ(mJTn?2!AcntCuWcV%hOMFQfWcJcK~+VRRXIIu)e6NWzw#bT<44UPuw zhsQ&&seq37e3K%8SFE^4b&u!ssSe^fKM1U$hgN14Zj_AnjG?ph$G%JdcE7Yo7T*g^ z5p6DVH?8WP(4F`bOYY90*Yy%)%;?LZg9ljw;yB0vD@Hh)Ja1V$&h>!xeIhXj9?nOdwaK_)!PDR$#!xq%&xMmu@VUSG|IS~RO8ZrMiq)W zg&Nm6VGiOyKic=dcb)k!0sQ|1pB!e1k!PgiSCITuEYL6mQ8Fsk)YOopVqlYkwd6L& zJ-Z-6TD^4{?YgD20m%uYGKBad4J9&$mptT1hLtNTJXf-7;mHt?10or^;f3ZjHe#se z_pBSn`CJvSx&lIetiQ!STVs*hVsIX|9-T#)K+{k9A>KCWB(V_PIlM<#G&HRROgjO`3k!Vsp`bs(UfxD)t(gvEWm5 zc4wtU5qgF0D=ClH{Nce9*8@gCpd)gPnI1co_0|%HXC{IiXnA>4ip&ZpsWJ7we@JFl%8@-n?%hkA5T8H~ zOPXvzbY!~Z_X(Tp{ysH!c0kL6zdQi$Lp%8(8#w|8-5LRNqI3XtrHZk=RLj9*>pDyL~_Z53$_9Xql||9Zf1z2$7XoTTI$0I>t0#>v?u zz{hxrfV})hxSD7wG@5kC0Q#?MTLd#1P)%Om#1RW>cQ&Ty$O{YOq=ccxYOr~t-~qSe zQ7xw$Fc*3^D(Z0#1aeP3HT*-R75s!ME^j%d4M>E8WVU#+;sgFKByR6YX6AK>eOmVi z(w6t?9B(=~tpYg@EcWER*`_*M1f1JWwBTe`38;J}pw_<*e;8gAf9|&^giQqY9n5Ru zfmLIR3r=3n*eM*8C)Vj7%C3Ywk~eq8ip^`8iTJ|C$tetmM*yzChu!ukvhZYS-7hpf z3c~nozKW5ChChb2^Xi=8X9Amv8F#5~D;f#}@G)pHZY<8fXxs%W^#L@EI7p+rJ0avg zZVS*_gp8k%WEXOMa!CKKn5iO-GI51}3 z^b^k>QJWBJDLL{`~RufM2|ASaKXWfI4(QSy|beQX3APg`A`2E~euq5I-J_*}5VfMyz&fi|hHQzmJ|-rbMfkL@|K0J*!3avvy>SpY7sAvT z_z6P9K@|@lW(2zYbC(Lw^Jgxblsey}QNGxkcgVsKCz8@p62K!V(lxs*{jV8>e1L$o~QGQ^T?%7=4O zUcs~`owZ8)=)D;8y)Zq4-bLjxEOp)k1whS0^0}Ylo2;?+Cnuz+a*UWk<)inK2mMbv zKkWpSZJng=m!fZZEr(d}*-6w#U0>rZM-!iEvq+ugs3Vkf z)Qy8NP)}&xCzksUIEBWTI{|?a47K^t4wJ7Y&WZ~x*dD?$+|ZRWIvhm**K=orb+zc>ash#m~=->8^T zS(qBVrMPwq$2fNN_tPU?{B!2gs&zzt?S_332*vs(55MQcxk7l|TfN!voY#Kc^W1*s zvVv8!Fgdq@n5HpJ*#Im|PCtNHM{Mi_!gqC@2C_bE;fiojnUqx&U;=STOJRf{H2rGh zb(Vm@z;{;`z`?~<=bf9THWk-o z?`*tW06ie>#52KjAENz$oO%dxEOl-*j%^75KqWH)C@`Kn8v>o`4n1{pR&tzRK(Oc- z{s0`pVkIiyqUHL6QwR=`!zxe^MsbKynn^GU#)0cWmrTlxjg5@XzywX+6qlLuG2<(S z!J#4##W<2^%^I~0m;6*LJkA@-x$jj z=i$|&UmMGcSXB;|tp}0 zp$8H+Xjdh@(ic0Fn1ON*K(s`QAZa3&_+%8R4)bH1Je2`OAxC(?&AAzx1!AWbc{pWk zqUjmVsl|8K+B7b_e5`hG4}ys9tITt$Kkc*VImpRnq@&rU`zRholu(}lrs9{s?S@jh z62Q6~Y&FiE%M6RlJrV^w_#lKJwONx-D-Uv83VqU%J!b(vjQ~No%!pG}M2~?qW*}Ro zw!7iX2htz`3XsDPAX|vGPhPrsk#d&1xyHrjsrAKU@H?Z3S7KGt|GYXIcxkr7;>$Fw z4r+K;HqbR-i$R4l>yYDiu8el=BLl{$XqC7(@N?ODJ8(O8Uu9hF!>{v!`402;kdT%p zYA30nV0{fOHjEe2lTi!IDX5FtZ=P)}O_|<>*N|=V?EEq5i!DU-yp0hDkh)B;Yv<4G zvgrWYcSQDFuit82B7$~uB`)^-kK%kB^6?=~DHuSUoH)~X6(bSktd%s7Rb;vqjLsPZ zrpC;Mf5#bgl9W!QSB|5*TQ%AiGN2}ie2K(>g#X%YiS}QS?rV|9Rs)LQNC6Jq?R)Xy zd5<$Vac1(bZ>c>V6jU4Le?%SZkBpGNC^%F(IVTKU45fVKN)GnPzg7((Z9ASFIk^#$ zvn{3V>(>N`YYeMbzXC6>zso?pbX)4%tg(y?fdl@^f}#5uZUH8ypHwf!RB^vZQI)5CiU8|Y^2uw!_-?!T9G;(5)2Wv?ZXxHT9XWroX=-P2%@hE2@@zWS_m%F_ zhX}P;XZJq=VS$4t$nhiCjf^${kG7g##{M}f%H zu$YB0ok(FIAy1=^g@9L})+brIP3ZOS+{|GI>luZ;RQ_Dzpu%4BhmA=K@tvwm*F zcjzQBZ(+P)!2|{)MO@_*I${)=ulo<{XB|C-ZA`g?`x?qrRPDU?i?a=6zc;H&-!X9F z?#I~e>LSuBQEg(!cEov(K>ROsn#!uP{}8LNN_oNXCHfy;^gr)c&xnf;dEdNOR&oN9 z*x`dDNtRYmAI0|!@m_~tL}!}|QXgTOLLm2o4@g4B;EH%Z{U{o(;q$D{%%-E!JIZ92 zz7h6uG&V*OQbDp5SQ@Gv4>A7(HxEw`4&O-xQwPw4)T;M2NOL_Y@i^+n4)#ZPc$X+; zbe$7Ae2F$kt_KajJA)cfKb@tW^aa<>*0y{OM{UNKBMk-ZB2@Zp*f-Sn90+GrIaJKFr>jv4J*t%Net_NJ zHcnS(B8db#SaTNq+w=tDL4N{O&PI0fzaifL>h8?La!%X-f7?BMnHn_6l9)k@eP3D> zBOytpZfo`%X{1m}VOqvIMoA1wichpTDI~ygow&=gJV6HWB|2; zQp2C1s^g3X<&7F%`w~`eT>I6^6R(c&mKfVbUqYhy55QI7+u#2wj*jzjahu_cpQLV6 zSBUyZ<_2Bk=O@y8J{q-i*Sswjm@w=bKQV79Byi!z&6{S*bj(lXT^Df4oU7!`-KV08 zDTOjJGi3DY@7|Uewx05!jfdcnW1qxdn<=zwU4k}B;wiU3BJLjvDJuQfKh~fD-(Tl7 zZ3dL0un&YO6)+A|Fv61$!W2uWkcps*;tB9DX}HR&$-IAmzG@QZ)?OBI$R)U9!6x~A zf;})7{ue156#SpOK!p!_wKIIudNqsw<-yE>0LUaA3qWFc+$Hw48$7!NxSV3`$TSx_0rUgM|;R zD2&s`d#Qt4IPa<5x5hCQne4j3slRUZ7~8O^8H4U9+BRYS9aSn_Ko3b?oB>HDA7w$= z%8f^&kST{GxYw*{V**n(H8%E4Papvd&OhYl=AMsNn}3v@)~bhfUTC>**^;hWW0`MB zv%FIgqfy@V`KRtwzp?wP=F#TphG?u`Gw_={D;}!oZDgH+tlq7tc=eisA8i+>#eu2O ztJkrD0-Y?EeeqNKxOeN} znvF-w9wzSoS7sX!kkI3QA#E$fg7)v%YF!B=`S*(l^8fzZUo%UK7~1VSevN1s1`a-O>Vor%+=>Xg_|qmYB5 z`xK&^(Lv0P>$8>c>ef5a30$ru-B~v-!{vp=Q}w;4UYDdT3bUj%EmW?>+^Bab7$qd} zSK?=CO^rP<;kOYPO}j~~GwFP|^oni2n>pSB-vGqQi~!{R8#dnbTaLNM#)nQSAB>>` zmYcCoutbRqER0<^qWqz_jovD_zbLjYyBTcB6RxviZlH4p*49G-QLjS4TPX}h0A0D_qIj+jRd_#(7Lqc(-|8VMo zpER5B`=YO7wvf;e@rW_G2rem2GUZOk$Y^O6F8-fO-De&NP5R9?G_>KRSms6V$*4oQ zfC1>rbFd1SC#agetB6RIR5TkqaICx(%)EA~+Qp}W)1#7J9=`MQC!Y6UXsq*f?kj1>NSj3F z;hOSQ#J0tC4IyVWd-V8P;*I9J^HR0?WiSu^58!^#YDT>1yEv0CT-;=L${!v4tH-xb ziS;I;Ct!5mTi!P1P{eF(WGl*{l{#u%qeQ@iWC&+E-_BpWK z^j0#JNZmus5^bB#?|DwgGuf$T_8+gtJR>iP!+W;`%DAPHkvWaT&cX&djt=eiv=}M{ z^IMuz#S^G)9+_34MiIK7Jc6x+!M?2m9kAF}t&gYo(Fc>>LqV6Z)<1UXC?v&HX0 z`oXu|3jLE>MktVe+`o|0{PsrC`X1bdG=nMZA~RZK5+ypEk@Y(_w;AkfQLph~l=4rJ zJ4F<#t9Y@8*j#NTz+43AWQomnJV~Rr0>J{vhVPpxL30hsy`OAcgiZ~~jxtz`wm^*f zr52LDkw|yKgUr zhU7GHEKAS{n4jj+A|NHE2Q!fav*me#t#NQ{fV{YUPq0z@BM#k=^ya0A0=O99T-YmS z5qo%$E4|n3=fp|Zad?u5?JB^f&wRN7ncOdSkLnSAGfoAyGXlZ1qpB#X4%nGuEhB5vL3hCoS_8H z_C(h6Yed3C){QgtV+C|0^xvW~o~MxLzbhexW|I;68%1YYV}xu2BO?#U)Vtba(U!71 z^BXc6N#Tf`a?6G^F#C(kcaafv5jn;}i-qI9=6L=w&jUi!X42yMWW*#{TK4X!S9OUI z@k(JssNs&ml9c!%j@2ot%ueYz$|dbtAR}_ef9jGPUcUD12BWz~TXen`B}wLF()U-e zY1b!ffBB^j$oYv){RBux+Y~W+A7g95NGsFM()LA$otKk4snt;yV7?)AUN0q_fvywi zt(pqSS^Ib8bGOf)o!wcW=`J*8^D3*dBubDw zeJrvutho(eu!w$Dvg!rsovXFM+4&F?-3=yF5hqD{5h&})qW(`-k3|2Wo3!bvQgt;x^bclTL8-uOGofCDQM-J3hD28oye+*n@Q=-; zsk6U5(lhqBxna1r+mXy=Xtj7Cd+=Afyu2qN;aDg#3GJY)I~YCs#mLjhJ(WXQ63i9b zKYbVmhs;eQX5K_=zQ$d zf5|hNbAJZNO3=~r(YKSYe6Jx<3=-ZF_~ApfzwF#WGDN}a7xgk(s>A>I6MYB z`$D8oOJd_z(HF}YLe@dVYAh5Jp^=y?T=f<#3!TNZEZOjpFR)G$<%LCHqFBM{(`9oC zQ097WQL%qQ6d0C^A_wtV7u6x-Sw%D6U?` zX)qq8`pKh=-;mOJ!{9JOB)K&A* zEPzD-;IM?SKesUEP-=HXl})U_wsy~O;|dSIRcmAJjWKR|*N)~gL)niYz|;B5+j=%^ zxyug{{dL;hFHRk1C#!QrDO@XU<%vg5+OTosMp;8%Hg99|{u*OUmt(4=m@s+-px71J zBiIs{&oHZ~IlaY|!@M|m+X;S%Krs1<$dWxIZ06o4O#SiiK2k+ohwtMvNS`OJXu7!* zT>De|;U%B!$c+z2Dok*5V~`jShn{7kYM_L^-DGC9&|j8*&F(R1r0t}?%pn^Ck0QvF zX#MrsdVH(!ad43%kflZT64C6s+i z9W-pw_@|D}t|0dD`8%G&!%YryBgHdvWxI7PA(EwD>P*Fu(n7qixR?YO=4k$Ts=M{e z6eMoi9L=^NS5V_J2h4PIkhBpP({lV&ElX6LkAZTCScZ2olf@3kvm1AjuGe*+Hm&!2 zlni-vlEn4Q$EElOuA^*pWNm@v-}wF2gM}fCsk2(9?xCgs;${eUtF`0MA{JO8^)$M5 z+YQ~j+G9sZzqS?yYi%PfG?6Fk4KpT5J*)KbUtk^QK5jWVp7m~Q{Bf#hy@o_xXH|=C zi>OJ7h~UrLYOPRHKm+J@Em(y#IHTqBlm%v=z4{N4I3dR9E3dAaO0DVGI`kbb%ro+# zJw92xgUEhC>J%oNcw@iV4)8s4UGC{QPGckx?pjCYTwCQ=yhc8a*kSqu2AG83-kWsu z1j>Wz`;h|3WtPMvrcBsnny7T zI+_N1e$$gJm$&vOIx9&e0wS!he;=OdfjL8|zND_x5p+j(6KJ-W!Q z@~R=@mScNT%SL9i8IGIbP*GpIys5@-H2fj~95=|BR~9m42{1^r-vVvMvks_?@Q-r>h$(!di~gq2D$k(bn=Q6p&p%4U(Ki-J+}G0LgF^^ zKG@Ls*RMaInXb=b9!FfT^qJxX!@_djI2| zR-1mHuFButd`d_awKBs0ce?NF+O0<3&uxY$b~ipdB(2k77vT!%{?hIH{RLeV@$Mr4 zsTGrf9M6v)IfQMNpxDXIFKhh0dCZf8BM-*pSRcuGkE3xSMhw47B*M#}SMJAHhH5`U zmU@Pcohwgf!m>vu*S9%MM=Fsv3$8W?K?Dn^< zJ!GJFz&bYlWVZ_Q><5+!=MLRgLxRTC8tK0P7|+_{`iIMMP{|mXcXO22RD(~~gun8g z@G}eE54xa@g0d*uE1@~@N0~ypM^31|TsKZj-^TEo#gysC;-T7v!NhI3r1Bb%`Zm~U zgwA*UjdvQ1Gwn?%;olm+Z#j2F7!)`7t*s~tQT_yFwlini6+4_($?P`_G8PSd>c>lu z)m}e4AsU-dbZ$Gd?kQgcfA}WC>Ad+4qt;otDesGxz}_*Rdm4w<)^BiebNdzBS)f+O z^jq#x!m1@F2Yc9yM+2#i!sz64n$z8c44t9C5Ud(**?m-Yq|>?wmoE)W^|EU`^JYx) zrAzZGn!%#Fv*VXGa+A<|!eIhUNBqP*PscQ;V)GTUPh|?bem2LALhMm~`8hFyN&8zD zbVS#`3(@Vr12E2w#^Y{VwWo@`sN1DoWPI>+4_ZH&Y<(x(Zu4K3lEnc{iU0Epx43Yx zcoRr)M|?EXc)$`MA7s|3u$mMY;MvW=7AJ1tQBouHo0gD4jwi1#>t!Z!n5Ifo)3#_K z4~m$T0s1=^mrTaXy6Sg!!8v+^*u@K#9~y4D&y`DQkU?@p`6FNl+jBi0u23*}9;9H) zr3!iP3|u%=e?g8Qc_fT1(reqp?GumYTKts)e5y60x87jxoH-Zi)zK&E#Wom$;;?oO^-f1ld^w_;B`Ue2(l)&Qi?!^3f14C92wmSpi4aMmoaFQaeK&~98m zRV=W$ibR10f|RuJk#*SGLr;Py<{;x)!9EjBD?P7f-d{BxpJsS#3+;&SNdmjDblE?6 zx?t#RqnrT5otB~9PkS6o;h-lI(pEL}T9R2Dw|hGBTqZj$c$U;~2mUE8-#)COQN+rT zIv3a_5zo%fJu$f1XkG4r%SEG|&S}NGpS{?O{(*FWR`k$o45&{;zP&_$TNrk!om=E@ z5K}UIuCrxFTUAV!^svNw1xRNz#PVkoWJm&cP&f6be(mx8&kk}S1jp3%Cr=i~rZD^T zAvPuO^XH};_G{8S8@fR=SSw5Lm~^RWY;0Ts+vpKDPYJNQz5bn!|oRNZ6Y6@>yWv3)eyZ{d)U)Z#8k&UU^v1AILi^Ol^JBa?NKL{>IbMgNrokf zaVSWm58Ibq)@6=qG}C|5?<$L$YdXzt+;mNz6SZo(wwXyAAH0oOSA8W)n-{S3rAuOLlG(53oX`7}{w_!)`aHa( z){EB$=GjxHE(wq{r0Q`Jp1Po;XZuy3qXSfPM~QTy=EDZ&MJ`qtYS7(OVhj201y>h) zB_<~?^mtIu_y$F6quj<%v2dQ6+A2x3baY@y13Qdz*&$)USP}E8dA92pxkdVi#*|s) zib5M_;X$g1USu=!0tX}AH?$lD(7)eHr*vf1qMuBYz1B{<=vm4Q{& z0Lh?%0XjEzK&*_|sz;(9NTS!?P|p_baTH;dE@7=WG6d>Na4cX3MwM<+GxU|qsYh40 z4B-ZitJnwTR5q!21C}bIqmU@40e0Auj!{|nHgS|jtQ>vK}k*}tu!54Th0l#Ya1Bvuv(ut8+wo5 zcr4S*PJtJ31oN)0;Q%fanJ6j5f!i?NTVzWip@$Wnx}a+o!NU(QMBc>8&gp17>2u>t zbaT8INhQ@1#pVo+*_;~9IC<eK=xf(z^Ne-TC@vfw*rnpWptp;@GsyYtQDes3L@1`| zw>ky4o4hdE@^JL;;E_F z=el?TEwUFer2aT0kr(cV#TPPpp<{#G>4&dRlI!FZ`ucVdfD!>#n$w~_!VOS|diXUc z6W4S-W$b;Bb(vXQtOdGY&rOqgR?7v!ujvAsst&5S2NfTda!ExFhZf{oikUb&`Q4HE zKIfeJl2$y+aG%!rjnM^HX9UrjNx^-U7r8I9ng;PS=b#9HqzR?lxa-8x$hY+o!~XsI_dT9{ppXf~ z6;ZX}Q+M`_O;k+X=vYV=Wlydm!YPilzT>Qy$+*ytO)_REpmgwI;~FpmcB z7CQtLol!ftF@Y@I2iB<@hbAG<=re!|FOJ1Foq5jnb4xsTuvv$2v@B4L7&>(5f;xic z;|tyNQG;b|siAlz>HQ|FG-KzI*haB7Wo3Yt%&FR;!hX+OKp^my5pzEVX=jI45TwaQlPJBZSx3@eg`AL^WiV<&++OFwsO#aE`T& z_2a}S_j(MlBoV11TZ}6y9t+(CnQ@=WpH?(0i0RsqIb5sj=2i6He6q2xRbg?gv=yN6 zs7wkcUtl2$|8ntlO>*{_SYEGqZSfnNU(he{IDS-M%y$L9tw7{>157WMMEwK)cN3D5 zzTdYmyJ!5P;4RpbRV9I_NA&01GBEB$cBA)UolVT>m&lFaVxUEpIVrqP!AeRhBeEpF zAAJr3=DMVGqLaBt=+{Tb)~@!uuW$Vpmv za$>T+yPE#!;lt$|nKD#{yskJ&m$bvMu^GF@1AuD!!<-%ANrCb(+`%G@{ z<hg7zf1&#Hhd>8$aAuzmz!0?d1^)@oclF#Hg?G7^>q&45HLD z_KhR!N17YnlE37$q6cC*`LSL^RY{W-Mw=&eDeOkwjEZpbPQCt%CxoN)Px@8qv)Rmf}z81a!i8zio9QnAF!LsaPueHq=-q2 z(3E~LWXRb&udn<>UDM%lfSXCy09e>A$&5s!&u}3LHlQ(9GD!ktAW)D`EU0YP?{5I+ zMZpKC(TO)_W^Ue@U(i9Py_f)49N{#7jEV&drxa}xO6kph-WrLDb+H>yaJIU95u#zb z%IzEeme4+N3$Ux-qlQyCn41R_IvaHVGV=2F_ba;e8mUl-B6K-uu*2d(dBF@z`eTQS zWg4{a_e?FS<^AM?8d5Ltz@{e@RtCq|#zaYIaw7R)B zZqi=V%e(*l>AmDC@?|c7DT>-bT`#Z16-^Qi;wF$g7IL|*_JptqCZQA{a46w)2moJ0eH2FhkoVEV98X||gcK`j# uvPM+*@BbTk#^6^HM*i>D|Ht2M>C|OTd;ej3Zm22vnl;06dc5iK-Tw!u^C+1B literal 0 HcmV?d00001 From aa40d0d36f754e371646dcafc9f92b67fb5e607b Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Wed, 17 Jun 2026 19:39:44 -0700 Subject: [PATCH 39/59] fix comments Signed-off-by: Shiqing Fan --- .../distributed/distributed_data_parallel.py | 18 +++++------------- .../core/distributed/finalize_model_grads.py | 4 ++-- megatron/experimental/gtp/__init__.py | 4 ++-- .../gtp/generalized_tensor_parallelism.py | 2 +- megatron/training/arguments.py | 5 +++++ 5 files changed, 15 insertions(+), 18 deletions(-) diff --git a/megatron/core/distributed/distributed_data_parallel.py b/megatron/core/distributed/distributed_data_parallel.py index ff9006ed029..03d243e4865 100644 --- a/megatron/core/distributed/distributed_data_parallel.py +++ b/megatron/core/distributed/distributed_data_parallel.py @@ -82,9 +82,11 @@ def __init__( self.dp_group = process_group_dict['dp_group'] self.dp_cp_group = process_group_dict['dp_cp_group'] self.expt_dp_group = process_group_dict['expt_dp_group'] - # DDP reduces every bucket over the GTP/EGTP-EXCLUDED replicate group (the gtp axis is - # completed separately by the RS + finalize all-reduce), so the intra groups DDP shards - # over ARE the *_no_gtp variants. They alias the regular intra groups when GTP is off. + # DDP reduces each bucket over the GTP/EGTP-EXCLUDED *replicate* groups below; the gtp axis + # is completed separately and a single 1/(gtp x replicate) factor scales both axes together. + # Full rationale (independent reduction, together scaling, why average_in_collective=False): + # see megatron/experimental/gtp/README.md section 3.2 "DDP buckets with (E)GTP". + # The *_no_gtp intra groups alias the regular intra groups when GTP is off. self.intra_dp_cp_group = process_group_dict.get( 'intra_dp_cp_no_gtp_group', process_group_dict['intra_dp_cp_group'] ) @@ -97,16 +99,6 @@ def __init__( self.expt_dp_no_egtp_group = process_group_dict.get( 'expt_dp_no_egtp_group', self.expt_dp_group ) - # GTP is "active" when the replicate groups are strictly smaller than the full DP groups. - gtp_active = ( - self.dp_cp_no_gtp_group.size() != self.dp_cp_group.size() - or self.expt_dp_no_egtp_group.size() != self.expt_dp_group.size() - ) - if gtp_active and self.ddp_config.average_in_collective: - raise NotImplementedError( - "Orthogonal GTP currently supports average_in_collective=False (the default); " - "averaged collectives would need per-buffer 1/gtp scaling." - ) self.tp_group = process_group_dict['tp_group'] self.pp_group = process_group_dict['pp_group'] self.ep_group = process_group_dict['ep_group'] diff --git a/megatron/core/distributed/finalize_model_grads.py b/megatron/core/distributed/finalize_model_grads.py index 598e229da8f..c551bdd1213 100644 --- a/megatron/core/distributed/finalize_model_grads.py +++ b/megatron/core/distributed/finalize_model_grads.py @@ -541,9 +541,9 @@ def finalize_model_grads( config.generalized_tensor_parallel_remat_size > 1 or config.expert_generalized_tensor_parallel_remat_size > 1 ): - from megatron.experimental.gtp import wait_gtp_grads_on_current_stream + from megatron.experimental.gtp import wait_for_gtp_grad_reduction_on_current_stream - wait_gtp_grads_on_current_stream() + wait_for_gtp_grad_reduction_on_current_stream() # All-reduce / reduce-scatter across DP replicas. if config.timers is not None: diff --git a/megatron/experimental/gtp/__init__.py b/megatron/experimental/gtp/__init__.py index 6870ec2b22c..8ead8afb4e9 100644 --- a/megatron/experimental/gtp/__init__.py +++ b/megatron/experimental/gtp/__init__.py @@ -28,7 +28,7 @@ tag_gtp_params_with_names, update_gtp_config, wait_async_comms, - wait_gtp_grads_on_current_stream, + wait_for_gtp_grad_reduction_on_current_stream, wrap_module_params_gtp, ) @@ -56,6 +56,6 @@ "tag_gtp_params_with_names", "update_gtp_config", "wait_async_comms", - "wait_gtp_grads_on_current_stream", + "wait_for_gtp_grad_reduction_on_current_stream", "wrap_module_params_gtp", ] diff --git a/megatron/experimental/gtp/generalized_tensor_parallelism.py b/megatron/experimental/gtp/generalized_tensor_parallelism.py index 6a2503c0313..580bf31aed2 100644 --- a/megatron/experimental/gtp/generalized_tensor_parallelism.py +++ b/megatron/experimental/gtp/generalized_tensor_parallelism.py @@ -255,7 +255,7 @@ def get_rs_stream(chain_id: str = GTPChain.GRAPHED.value, group=None) -> torch.c return _RS_STREAMS[key] -def wait_gtp_grads_on_current_stream() -> None: +def wait_for_gtp_grad_reduction_on_current_stream() -> None: """Fence the current stream against all GTP backward grad work before the DP gradient sync. Drains in-flight AG/RS on the side streams (eager expert backward may still be writing diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py index 28542dfa8bc..7678e4cb269 100644 --- a/megatron/training/arguments.py +++ b/megatron/training/arguments.py @@ -1462,6 +1462,11 @@ def validate_args(args, defaults={}): "force setting NCCL_PROTO=Simple might introduce bad perf." ) + assert not args.ddp_average_in_collective, ( + "GTP requires --ddp-average-in-collective off (the default); averaged collectives " + "would need per-buffer 1/gtp scaling." + ) + assert args.ckpt_format in ('torch', 'torch_dist'), ( f"GTP supports only --ckpt-format 'torch' (legacy) or 'torch_dist', got " f"'{args.ckpt_format}'." From 601a6586f2f6f3a258e3dc0eebf3fd4b46257f8e Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Thu, 18 Jun 2026 18:51:52 -0700 Subject: [PATCH 40/59] fix comments Signed-off-by: Shiqing Fan --- .../distributed/distributed_data_parallel.py | 22 ++++++++++++++----- megatron/core/model_parallel_config.py | 12 ++++------ megatron/core/optimizer/__init__.py | 2 +- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/megatron/core/distributed/distributed_data_parallel.py b/megatron/core/distributed/distributed_data_parallel.py index 03d243e4865..44eb457f155 100644 --- a/megatron/core/distributed/distributed_data_parallel.py +++ b/megatron/core/distributed/distributed_data_parallel.py @@ -82,11 +82,23 @@ def __init__( self.dp_group = process_group_dict['dp_group'] self.dp_cp_group = process_group_dict['dp_cp_group'] self.expt_dp_group = process_group_dict['expt_dp_group'] - # DDP reduces each bucket over the GTP/EGTP-EXCLUDED *replicate* groups below; the gtp axis - # is completed separately and a single 1/(gtp x replicate) factor scales both axes together. - # Full rationale (independent reduction, together scaling, why average_in_collective=False): - # see megatron/experimental/gtp/README.md section 3.2 "DDP buckets with (E)GTP". - # The *_no_gtp intra groups alias the regular intra groups when GTP is off. + # Example process-group sizes (e.g., TP=2, GTP=64, world_size=1024 with PP=CP=EP=1 and + # single DistOpt instance): + # model_size = TP x PP x CP x GTP = 2 x 64 = 128 -> DP = 1024 / 128 = 8. + # The model weights are replicated DP (= 8) times. + # dp_cp_group (degree of batch sharding, includes GTP) = GTP x DP = 64 * 8 = 512. + # dp_cp_no_gtp_group (degree of weight replication, excludes GTP) = 8. + # gtp_group = 64. + # tp_group = 2. + # + # Data-parallel gradient reductions for each bucket are performed over dp_cp_no_gtp_group + # (GTP-excluded group). Data-parallel gradient reductions over the GTP group are completed + # separately in the model backward pass. + # + # See Section 3.2 in `gtp/README.md` for more details (including why + # average_in_collective=False). + # + # When GTP is disabled, the *_no_gtp groups alias the regular DP groups. self.intra_dp_cp_group = process_group_dict.get( 'intra_dp_cp_no_gtp_group', process_group_dict['intra_dp_cp_group'] ) diff --git a/megatron/core/model_parallel_config.py b/megatron/core/model_parallel_config.py index bbbe54b323b..e984b4f223a 100644 --- a/megatron/core/model_parallel_config.py +++ b/megatron/core/model_parallel_config.py @@ -24,9 +24,7 @@ class ModelParallelConfig: """Generalized tensor parallelism with weight rematerialization. Shards model weights across GPU ranks along ``out_features``; each weight is rematerialized independently (per-weight, not per-layer) via async all-gather on every forward AND backward pass. - Carved out of the data-parallel axis, so increasing this size shrinks per-rank weight - memory and shrinks the outer DP that contributes the per-rank batch. Placed right - after tensor parallelism in the parallelism ordering. + Placed right after tensor parallelism in the parallelism ordering. """ pipeline_model_parallel_comm_backend: Optional[Literal["nccl", "ucc"]] = None @@ -82,11 +80,9 @@ class ModelParallelConfig: """Distributes Moe Experts across sub data parallel dimension.""" expert_generalized_tensor_parallel_remat_size: int = 1 - """Generalized tensor parallelism with weight rematerialization, for expert layers. Shards - expert weights across GPU ranks along ``out_features``; each expert weight is - rematerialized independently (per-weight, not per-layer) via async all-gather on every - forward AND backward pass. Independent from the decoder's - ``generalized_tensor_parallel_remat_size``. + """Generalized tensor parallelism with weight rematerialization, for expert layers. Independent + from the decoder's ``generalized_tensor_parallel_remat_size``. + Placed right after expert parallelism in the parallelism ordering. """ expert_tensor_parallel_size: Optional[int] = None diff --git a/megatron/core/optimizer/__init__.py b/megatron/core/optimizer/__init__.py index b507f7ea6b7..763c4842e33 100644 --- a/megatron/core/optimizer/__init__.py +++ b/megatron/core/optimizer/__init__.py @@ -1206,7 +1206,7 @@ def _gtp_active_for(sub, full): model_chunks=model_chunks, param_groups=moe_param_groups, per_model_buffers=moe_buffers, - model_parallel_group=expt_tp_pp_group, + model_parallel_group=expt_tp_pp_with_egtp_group, data_parallel_group=main_expt_dp_group, data_parallel_group_gloo=expt_data_parallel_group_gloo, data_parallel_group_idx=expt_model_parallel_rank, From eddb7ba585e1a9cd08b9acf2364bab5d9f4c6d81 Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Thu, 18 Jun 2026 21:15:23 -0700 Subject: [PATCH 41/59] Rename GTP remat knobs and add num-weight-shards user API Signed-off-by: Shiqing Fan --- .../core/distributed/finalize_model_grads.py | 11 +-- megatron/core/model_parallel_config.py | 92 ++++++++++++++++-- megatron/core/parallel_state.py | 94 +++++++++---------- megatron/core/process_groups_config.py | 11 +-- megatron/core/tensor_parallel/layers.py | 8 +- megatron/core/transformer/cuda_graphs.py | 32 +++---- .../core/transformer/transformer_config.py | 2 +- megatron/experimental/gtp/README.md | 37 +++++--- megatron/training/arguments.py | 36 ++++++- megatron/training/checkpointing.py | 6 +- megatron/training/initialize.py | 11 +-- megatron/training/training.py | 8 +- megatron/training/utils/common_utils.py | 6 +- .../test_attention_gtp.py | 2 +- .../generalized_tensor_parallel/test_gtp.py | 4 +- .../test_gtp_grad_correctness.py | 10 +- .../test_gtp_loss_correctness.py | 2 +- .../test_mamba_gtp.py | 2 +- .../models/test_hybrid_moe_model.py | 6 +- 19 files changed, 243 insertions(+), 137 deletions(-) diff --git a/megatron/core/distributed/finalize_model_grads.py b/megatron/core/distributed/finalize_model_grads.py index c551bdd1213..50376d3fadc 100644 --- a/megatron/core/distributed/finalize_model_grads.py +++ b/megatron/core/distributed/finalize_model_grads.py @@ -449,10 +449,8 @@ def _allreduce_replicated_grads_over_gtp_group(model: List[torch.nn.Module]): leaving them 1/gtp short. SUM (not AVG) over the gtp/egtp group recovers the full mean. No-op when GTP is inactive (gtp/egtp group size <= 1). """ - gtp_group = parallel_state.get_generalized_tensor_parallel_remat_group(check_initialized=False) - egtp_group = parallel_state.get_expert_generalized_tensor_parallel_remat_group( - check_initialized=False - ) + gtp_group = parallel_state.get_gtp_weight_remat_group(check_initialized=False) + egtp_group = parallel_state.get_expert_gtp_weight_remat_group(check_initialized=False) dense_params, dense_grads = [], [] expert_params, expert_grads = [], [] @@ -537,10 +535,7 @@ def finalize_model_grads( dp_cp_group = parallel_state.get_data_parallel_group(with_context_parallel=True) # Fence the current stream against all GTP backward grad work before the DP gradient sync. - if ( - config.generalized_tensor_parallel_remat_size > 1 - or config.expert_generalized_tensor_parallel_remat_size > 1 - ): + if config.gtp_weight_remat_size > 1 or config.expert_gtp_weight_remat_size > 1: from megatron.experimental.gtp import wait_for_gtp_grad_reduction_on_current_stream wait_for_gtp_grad_reduction_on_current_stream() diff --git a/megatron/core/model_parallel_config.py b/megatron/core/model_parallel_config.py index e984b4f223a..f7dd0791afe 100644 --- a/megatron/core/model_parallel_config.py +++ b/megatron/core/model_parallel_config.py @@ -7,6 +7,37 @@ import torch +def resolve_tensor_parallel_weight_shards( + tensor_model_parallel_size: int, + tensor_parallel_num_weight_shards: Optional[int], + gtp_weight_remat_size: int, +) -> tuple: + """Reconcile ``tensor_parallel_num_weight_shards`` and ``gtp_weight_remat_size``. + + ``tensor_parallel_num_weight_shards`` is the user-facing total number of shards each weight is + split into across the tensor-parallel + GTP axes. It is the source of truth and implies + ``gtp_weight_remat_size = tensor_parallel_num_weight_shards // tensor_model_parallel_size``. + When None it defaults to ``tensor_model_parallel_size * gtp_weight_remat_size`` (so the pair + stays consistent, and equals ``tensor_model_parallel_size`` in the no-GTP default). Idempotent. + + Returns the reconciled ``(tensor_parallel_num_weight_shards, gtp_weight_remat_size)``. + """ + tp = tensor_model_parallel_size + if tensor_parallel_num_weight_shards is None: + tensor_parallel_num_weight_shards = tp * gtp_weight_remat_size + else: + assert tensor_parallel_num_weight_shards >= tp, ( + f"tensor_parallel_num_weight_shards ({tensor_parallel_num_weight_shards}) must be >= " + f"tensor_model_parallel_size ({tp})." + ) + assert tensor_parallel_num_weight_shards % tp == 0, ( + f"tensor_parallel_num_weight_shards ({tensor_parallel_num_weight_shards}) must be " + f"divisible by tensor_model_parallel_size ({tp})." + ) + gtp_weight_remat_size = tensor_parallel_num_weight_shards // tp + return tensor_parallel_num_weight_shards, gtp_weight_remat_size + + @dataclass class ModelParallelConfig: """Base configuration for Megatron Core @@ -20,11 +51,24 @@ class ModelParallelConfig: tensor_model_parallel_size: int = 1 """Intra-layer model parallelism. Splits tensors across GPU ranks.""" - generalized_tensor_parallel_remat_size: int = 1 + tensor_parallel_num_weight_shards: Optional[int] = None + """Total number of shards each weight is split into across the tensor-parallel + GTP axes + (i.e. ``tensor_model_parallel_size * gtp_weight_remat_size``). This is the user-facing knob: + it must be ``>= tensor_model_parallel_size`` and divisible by it. When None it defaults to + ``tensor_model_parallel_size`` (no GTP sharding). It is the source of truth and implies + ``gtp_weight_remat_size = tensor_parallel_num_weight_shards // tensor_model_parallel_size`` + (resolved in ``__post_init__``). + """ + + gtp_weight_remat_size: int = 1 """Generalized tensor parallelism with weight rematerialization. Shards model weights across GPU ranks along ``out_features``; each weight is rematerialized independently (per-weight, not per-layer) via async all-gather on every forward AND backward pass. Placed right after tensor parallelism in the parallelism ordering. + + INTERNAL / DERIVED — there is no CLI flag for it; do not set directly. It is computed in + ``__post_init__`` from ``tensor_parallel_num_weight_shards`` (= that value divided by + ``tensor_model_parallel_size``). Use ``tensor_parallel_num_weight_shards`` to control GTP. """ pipeline_model_parallel_comm_backend: Optional[Literal["nccl", "ucc"]] = None @@ -79,17 +123,32 @@ class ModelParallelConfig: expert_model_parallel_size: int = 1 """Distributes Moe Experts across sub data parallel dimension.""" - expert_generalized_tensor_parallel_remat_size: int = 1 - """Generalized tensor parallelism with weight rematerialization, for expert layers. Independent - from the decoder's ``generalized_tensor_parallel_remat_size``. - Placed right after expert parallelism in the parallelism ordering. - """ - expert_tensor_parallel_size: Optional[int] = None """Intra-layer tensor model parallelism for expert layer. Splits tensors across GPU ranks. Default is None, which will be set to the value of tensor_model_parallel_size. """ + expert_tensor_parallel_num_weight_shards: Optional[int] = None + """Total number of shards each expert weight is split into across the expert-tensor-parallel + + expert-GTP axes (i.e. ``expert_tensor_parallel_size * expert_gtp_weight_remat_size``). This is + the user-facing knob for expert layers: it must be ``>= expert_tensor_parallel_size`` and + divisible by it. When None it defaults to ``expert_tensor_parallel_size`` (no expert GTP + sharding). It is the source of truth and implies + ``expert_gtp_weight_remat_size = expert_tensor_parallel_num_weight_shards // + expert_tensor_parallel_size`` (resolved in ``__post_init__``). + """ + + expert_gtp_weight_remat_size: int = 1 + """Generalized tensor parallelism with weight rematerialization, for expert layers. Independent + from the decoder's ``gtp_weight_remat_size``. + Placed right after expert parallelism in the parallelism ordering. + + INTERNAL / DERIVED — there is no CLI flag for it; do not set directly. It is computed in + ``__post_init__`` from ``expert_tensor_parallel_num_weight_shards`` (= that value divided by + ``expert_tensor_parallel_size``). Use ``expert_tensor_parallel_num_weight_shards`` to control + expert GTP. + """ + ################### # Initialization ################### @@ -438,6 +497,25 @@ def __post_init__(self): if self.expert_tensor_parallel_size is None: self.expert_tensor_parallel_size = self.tensor_model_parallel_size + # Reconcile the user-facing tensor_parallel_num_weight_shards with the internal + # gtp_weight_remat_size (num_weight_shards = tensor_model_parallel_size * gtp_weight_remat). + (self.tensor_parallel_num_weight_shards, self.gtp_weight_remat_size) = ( + resolve_tensor_parallel_weight_shards( + self.tensor_model_parallel_size, + self.tensor_parallel_num_weight_shards, + self.gtp_weight_remat_size, + ) + ) + + # Same reconciliation for the expert layers (expert_tensor_parallel_size is finalized above). + (self.expert_tensor_parallel_num_weight_shards, self.expert_gtp_weight_remat_size) = ( + resolve_tensor_parallel_weight_shards( + self.expert_tensor_parallel_size, + self.expert_tensor_parallel_num_weight_shards, + self.expert_gtp_weight_remat_size, + ) + ) + if self.pipeline_model_parallel_size > 1: if self.pipeline_dtype is None: raise ValueError( diff --git a/megatron/core/parallel_state.py b/megatron/core/parallel_state.py index 1c764268b12..2168b18e35f 100644 --- a/megatron/core/parallel_state.py +++ b/megatron/core/parallel_state.py @@ -28,8 +28,8 @@ # Intra-layer model parallel group that the current rank belongs to. _TENSOR_MODEL_PARALLEL_GROUP = None # Generalized tensor parallelism group that the current rank belongs to. -_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP = None -_GENERALIZED_TENSOR_PARALLEL_REMAT_GLOBAL_RANKS = None +_GTP_WEIGHT_REMAT_GROUP = None +_GTP_WEIGHT_REMAT_GLOBAL_RANKS = None # Inter-layer model parallel group that the current rank belongs to. _PIPELINE_MODEL_PARALLEL_GROUP = None # Model parallel group (both intra- and pipeline) that the current rank belongs to. @@ -54,8 +54,8 @@ # _EXPERT_DATA denotes data parallelism of expert which replicates weight across the group. # Expert generalized tensor parallelism group that current rank belongs to. -_EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP = None -_EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GLOBAL_RANKS = None +_EXPERT_GTP_WEIGHT_REMAT_GROUP = None +_EXPERT_GTP_WEIGHT_REMAT_GLOBAL_RANKS = None # Expert model parallel group that current rank belongs to. _EXPERT_MODEL_PARALLEL_GROUP = None # Expert tensor parallel group that current rank belongs to. @@ -682,14 +682,14 @@ def initialize_model_parallel( each weight is rematerialized independently (per-weight, not per- layer) via async all-gather on every forward AND backward pass. A first-class orthogonal axis (world_size = TP*GTP*CP*DP). Maps to the - dataclass field ``ModelParallelConfig.generalized_tensor_parallel_remat_size``. + dataclass field ``ModelParallelConfig.gtp_weight_remat_size``. expert_gtp_remat_size (int, default = 1): Expert-side counterpart of ``gtp_remat_size`` — shards routed-expert weights along ``out_features`` and rematerializes per-weight on every forward AND backward pass. A first-class orthogonal axis on the expert grid. Independent from ``gtp_remat_size``. Maps to - ``ModelParallelConfig.expert_generalized_tensor_parallel_remat_size``. + ``ModelParallelConfig.expert_gtp_weight_remat_size``. num_distributed_optimizer_instances (int, default = 1): The number of distributed optimizer replicas across the data- @@ -934,21 +934,21 @@ def _inject_gtp(order_str: str, after: str = "tp") -> str: # Build the generalized tensor parallel groups. # GTP overlaps with the CP-DP domain because GTP only shards weights # while CP only shards activations — they are independent and can share ranks. - global _GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP - global _GENERALIZED_TENSOR_PARALLEL_REMAT_GLOBAL_RANKS + global _GTP_WEIGHT_REMAT_GROUP + global _GTP_WEIGHT_REMAT_GLOBAL_RANKS assert ( - _GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP is None + _GTP_WEIGHT_REMAT_GROUP is None ), "generalized tensor parallel group is already initialized" for gtp_ranks in decoder_rank_generator.get_gtp_ranks(gtp_remat_size): group = create_group( gtp_ranks, timeout=timeout, pg_options=get_nccl_options("gtp", nccl_comm_cfgs), - group_desc="GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP", + group_desc="GTP_WEIGHT_REMAT_GROUP", ) if rank in gtp_ranks: - _GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP = group - _GENERALIZED_TENSOR_PARALLEL_REMAT_GLOBAL_RANKS = gtp_ranks + _GTP_WEIGHT_REMAT_GROUP = group + _GTP_WEIGHT_REMAT_GLOBAL_RANKS = gtp_ranks # Tokens for the FULL (gtp-inclusive) data-parallel domain. gtp is factored out of the # generator's 'dp' axis, so the full data domain spans gtp explicitly ('gtp-dp'). The @@ -1333,10 +1333,10 @@ def _inject_gtp(order_str: str, after: str = "tp") -> str: ### Expert-related parallel groups initialization # Build the expert generalized tensor parallel group # Expert GTP overlaps with the expert DP domain (experts don't use CP). - global _EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP - global _EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GLOBAL_RANKS + global _EXPERT_GTP_WEIGHT_REMAT_GROUP + global _EXPERT_GTP_WEIGHT_REMAT_GLOBAL_RANKS assert ( - _EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP is None + _EXPERT_GTP_WEIGHT_REMAT_GROUP is None ), 'Expert generalized tensor parallel group is already initialized' # EGTP shard groups are get_ranks('gtp') on the expert generator (singletons when # expert_gtp_remat_size == 1). See RankGenerator.get_gtp_ranks. @@ -1345,11 +1345,11 @@ def _inject_gtp(order_str: str, after: str = "tp") -> str: egtp_ranks, timeout=timeout, pg_options=get_nccl_options("expt_gtp", nccl_comm_cfgs), - group_desc="EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP", + group_desc="EXPERT_GTP_WEIGHT_REMAT_GROUP", ) if rank in egtp_ranks: - _EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP = group - _EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GLOBAL_RANKS = egtp_ranks + _EXPERT_GTP_WEIGHT_REMAT_GROUP = group + _EXPERT_GTP_WEIGHT_REMAT_GLOBAL_RANKS = egtp_ranks # Build the expert model parallel group global _EXPERT_MODEL_PARALLEL_GROUP, _EXPERT_MODEL_PARALLEL_RANKS @@ -1698,40 +1698,40 @@ def get_tensor_model_parallel_group(check_initialized=True): return _TENSOR_MODEL_PARALLEL_GROUP -def get_generalized_tensor_parallel_remat_group(check_initialized=True): +def get_gtp_weight_remat_group(check_initialized=True): """Get the parameter-sharding group the caller rank belongs to.""" if check_initialized: assert ( - _GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP is not None + _GTP_WEIGHT_REMAT_GROUP is not None ), "generalized tensor parallel group is not initialized" - return _GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP + return _GTP_WEIGHT_REMAT_GROUP -def get_generalized_tensor_parallel_remat_world_size(): +def get_gtp_weight_remat_world_size(): """Return world size for the parameter-sharding group.""" if torch.distributed.is_available() and torch.distributed.is_initialized(): - group = get_generalized_tensor_parallel_remat_group(check_initialized=False) + group = get_gtp_weight_remat_group(check_initialized=False) return group.size() if group is not None else 0 else: return 0 -def get_generalized_tensor_parallel_remat_rank(): +def get_gtp_weight_remat_rank(): """Return caller's rank in the parameter-sharding group.""" if torch.distributed.is_available() and torch.distributed.is_initialized(): - group = get_generalized_tensor_parallel_remat_group(check_initialized=False) + group = get_gtp_weight_remat_group(check_initialized=False) return group.rank() if group is not None else 0 else: return 0 -def get_generalized_tensor_parallel_remat_global_ranks(check_initialized=True): +def get_gtp_weight_remat_global_ranks(check_initialized=True): """Get all global ranks of the parameter-sharding group that the caller rank belongs to.""" if check_initialized: assert ( - _GENERALIZED_TENSOR_PARALLEL_REMAT_GLOBAL_RANKS is not None + _GTP_WEIGHT_REMAT_GLOBAL_RANKS is not None ), "generalized tensor parallel group is not initialized" - return _GENERALIZED_TENSOR_PARALLEL_REMAT_GLOBAL_RANKS + return _GTP_WEIGHT_REMAT_GLOBAL_RANKS def get_pipeline_model_parallel_group(check_initialized=True): @@ -2168,40 +2168,40 @@ def get_tensor_and_context_parallel_rank(): ### Expert-related parallel states functions -def get_expert_generalized_tensor_parallel_remat_group(check_initialized=True): +def get_expert_gtp_weight_remat_group(check_initialized=True): """Get the expert-parameter-sharding group the caller rank belongs to.""" if check_initialized: assert ( - _EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP is not None + _EXPERT_GTP_WEIGHT_REMAT_GROUP is not None ), "expert generalized tensor parallel group is not initialized" - return _EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP + return _EXPERT_GTP_WEIGHT_REMAT_GROUP -def get_expert_generalized_tensor_parallel_remat_world_size(): +def get_expert_gtp_weight_remat_world_size(): """Return world size for the expert-parameter-sharding group.""" if torch.distributed.is_available() and torch.distributed.is_initialized(): - group = get_expert_generalized_tensor_parallel_remat_group(check_initialized=False) + group = get_expert_gtp_weight_remat_group(check_initialized=False) return group.size() if group is not None else 0 else: return 0 -def get_expert_generalized_tensor_parallel_remat_rank(): +def get_expert_gtp_weight_remat_rank(): """Return caller's rank in the expert-parameter-sharding group.""" if torch.distributed.is_available() and torch.distributed.is_initialized(): - group = get_expert_generalized_tensor_parallel_remat_group(check_initialized=False) + group = get_expert_gtp_weight_remat_group(check_initialized=False) return group.rank() if group is not None else 0 else: return 0 -def get_expert_generalized_tensor_parallel_remat_global_ranks(check_initialized=True): +def get_expert_gtp_weight_remat_global_ranks(check_initialized=True): """Get all global ranks of the expert-parameter-sharding group that the caller rank belongs to.""" if check_initialized: assert ( - _EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GLOBAL_RANKS is not None + _EXPERT_GTP_WEIGHT_REMAT_GLOBAL_RANKS is not None ), "expert generalized tensor parallel group is not initialized" - return _EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GLOBAL_RANKS + return _EXPERT_GTP_WEIGHT_REMAT_GLOBAL_RANKS def get_expert_model_parallel_group(check_initialized=True): @@ -2464,7 +2464,7 @@ def get_all_ranks(): pipeline-model-parallel and expert-model-parallel groups.""" ranks = [ get_tensor_model_parallel_rank(), - get_generalized_tensor_parallel_remat_rank(), + get_gtp_weight_remat_rank(), get_data_parallel_rank(), get_context_parallel_rank(), get_pipeline_model_parallel_rank(), @@ -2481,11 +2481,11 @@ def destroy_model_parallel(): global _TENSOR_MODEL_PARALLEL_GROUP _TENSOR_MODEL_PARALLEL_GROUP = None - global _GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP - _GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP = None + global _GTP_WEIGHT_REMAT_GROUP + _GTP_WEIGHT_REMAT_GROUP = None - global _GENERALIZED_TENSOR_PARALLEL_REMAT_GLOBAL_RANKS - _GENERALIZED_TENSOR_PARALLEL_REMAT_GLOBAL_RANKS = None + global _GTP_WEIGHT_REMAT_GLOBAL_RANKS + _GTP_WEIGHT_REMAT_GLOBAL_RANKS = None global _PIPELINE_MODEL_PARALLEL_GROUP _PIPELINE_MODEL_PARALLEL_GROUP = None @@ -2571,11 +2571,11 @@ def destroy_model_parallel(): _DATA_PARALLEL_GROUP_WITH_CP_GLOO = None # Destroy parallel state related to expert parallelism. - global _EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP - _EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP = None + global _EXPERT_GTP_WEIGHT_REMAT_GROUP + _EXPERT_GTP_WEIGHT_REMAT_GROUP = None - global _EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GLOBAL_RANKS - _EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GLOBAL_RANKS = None + global _EXPERT_GTP_WEIGHT_REMAT_GLOBAL_RANKS + _EXPERT_GTP_WEIGHT_REMAT_GLOBAL_RANKS = None global _EXPERT_MODEL_PARALLEL_GROUP _EXPERT_MODEL_PARALLEL_GROUP = None diff --git a/megatron/core/process_groups_config.py b/megatron/core/process_groups_config.py index 13b287da669..6cac3b3d840 100644 --- a/megatron/core/process_groups_config.py +++ b/megatron/core/process_groups_config.py @@ -126,10 +126,10 @@ class ProcessGroupCollection: # Separate dp_cp communicator for param all-gather (AG/RS overlap) dp_cp_ag: torch.distributed.ProcessGroup = field(init=False) - # _GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP + # _GTP_WEIGHT_REMAT_GROUP gtp: torch.distributed.ProcessGroup = field(init=False) - # _EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP + # _EXPERT_GTP_WEIGHT_REMAT_GROUP expt_gtp: torch.distributed.ProcessGroup = field(init=False) # MoE layers need expt_dp group for sharded state dict @@ -285,12 +285,9 @@ def use_mpu_process_groups(cls, required_pgs: Optional[List[str]] = None): check_initialized=False, with_context_parallel=True, ), - 'gtp': partial( - parallel_state.get_generalized_tensor_parallel_remat_group, check_initialized=False - ), + 'gtp': partial(parallel_state.get_gtp_weight_remat_group, check_initialized=False), 'expt_gtp': partial( - parallel_state.get_expert_generalized_tensor_parallel_remat_group, - check_initialized=False, + parallel_state.get_expert_gtp_weight_remat_group, check_initialized=False ), } diff --git a/megatron/core/tensor_parallel/layers.py b/megatron/core/tensor_parallel/layers.py index 97815590fde..95f385ab989 100644 --- a/megatron/core/tensor_parallel/layers.py +++ b/megatron/core/tensor_parallel/layers.py @@ -16,9 +16,9 @@ from megatron.core.model_parallel_config import ModelParallelConfig from megatron.core.parallel_state import ( - get_expert_generalized_tensor_parallel_remat_rank, - get_generalized_tensor_parallel_remat_rank, + get_expert_gtp_weight_remat_rank, get_global_memory_buffer, + get_gtp_weight_remat_rank, get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size, ) @@ -122,8 +122,8 @@ def param_is_not_gtp_duplicate(param): return True is_expert = not getattr(param, "allreduce", True) if is_expert: - return get_expert_generalized_tensor_parallel_remat_rank() == 0 - return get_generalized_tensor_parallel_remat_rank() == 0 + return get_expert_gtp_weight_remat_rank() == 0 + return get_gtp_weight_remat_rank() == 0 def set_tensor_model_parallel_attributes(tensor, is_parallel, dim, stride): diff --git a/megatron/core/transformer/cuda_graphs.py b/megatron/core/transformer/cuda_graphs.py index eed65288f20..62217ddd765 100644 --- a/megatron/core/transformer/cuda_graphs.py +++ b/megatron/core/transformer/cuda_graphs.py @@ -867,7 +867,7 @@ def __init__( self.fp4_enabled = self.base_module.config.fp4 is not None self.fp8_runtime_enabled = None self.fp4_runtime_enabled = None - self.gtp_remat = self.base_module.config.generalized_tensor_parallel_remat_size > 1 + self.gtp_remat = self.base_module.config.gtp_weight_remat_size > 1 if self.gtp_remat: # Ensure internal warmup (inside create_fwd_graph) has >= 2 steps @@ -882,18 +882,16 @@ def __init__( # Dense for mamba/attn/shared_experts; expert (below) for routed # experts captured when "moe" is in cuda_graph_modules. from megatron.core.parallel_state import ( - get_expert_generalized_tensor_parallel_remat_group, - get_expert_generalized_tensor_parallel_remat_world_size, - get_generalized_tensor_parallel_remat_group, + get_expert_gtp_weight_remat_group, + get_expert_gtp_weight_remat_world_size, + get_gtp_weight_remat_group, ) - self._register_gtp_side_streams(get_generalized_tensor_parallel_remat_group()) + self._register_gtp_side_streams(get_gtp_weight_remat_group()) # EGTP streams: required so _wait/_sync_side_streams drain EGTP # NCCL into runner_stream before bwd_completion_event fires. - if get_expert_generalized_tensor_parallel_remat_world_size() > 1: - self._register_gtp_side_streams( - get_expert_generalized_tensor_parallel_remat_group() - ) + if get_expert_gtp_weight_remat_world_size() > 1: + self._register_gtp_side_streams(get_expert_gtp_weight_remat_group()) # Bridges Phase 1 (AG drain on ag_stream) into runner_stream # so bwd_completion_event records past NCCL_AG completion. self.bwd_ag_fence_event = torch.cuda.Event() @@ -1333,17 +1331,17 @@ def create_bwd_graph(self): # so bwd_completion_event records AFTER NCCL_AG completion. wait_async_comms(GTPChain.GRAPHED.value, skip_rs=True) from megatron.core.parallel_state import ( - get_expert_generalized_tensor_parallel_remat_group, - get_expert_generalized_tensor_parallel_remat_world_size, - get_generalized_tensor_parallel_remat_group, + get_expert_gtp_weight_remat_group, + get_expert_gtp_weight_remat_world_size, + get_gtp_weight_remat_group, ) - gtp_group = get_generalized_tensor_parallel_remat_group() + gtp_group = get_gtp_weight_remat_group() graphed_ag = get_ag_stream(GTPChain.GRAPHED.value, gtp_group) self.bwd_ag_fence_event.record(graphed_ag) torch.cuda.current_stream().wait_event(self.bwd_ag_fence_event) - if get_expert_generalized_tensor_parallel_remat_world_size() > 1: - egtp_group = get_expert_generalized_tensor_parallel_remat_group() + if get_expert_gtp_weight_remat_world_size() > 1: + egtp_group = get_expert_gtp_weight_remat_group() egtp_graphed_ag = get_ag_stream(GTPChain.GRAPHED.value, egtp_group) self.bwd_ag_fence_event.record(egtp_graphed_ag) torch.cuda.current_stream().wait_event(self.bwd_ag_fence_event) @@ -1381,8 +1379,8 @@ def create_bwd_graph(self): # replay-invariant — so Graphed.backward avoids per-replay group lookups. self._gtp_finalize_hook_plan = [] if self.gtp_remat and self.finalized_during_bwd_capture: - dense_group = parallel_state.get_generalized_tensor_parallel_remat_group() - expert_group = parallel_state.get_expert_generalized_tensor_parallel_remat_group() + dense_group = parallel_state.get_gtp_weight_remat_group() + expert_group = parallel_state.get_expert_gtp_weight_remat_group() params_by_group = defaultdict(list) for param in self.finalized_during_bwd_capture: is_expert = not getattr(param, 'allreduce', True) diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py index 7685c8707f8..f1a0aa9f3fa 100644 --- a/megatron/core/transformer/transformer_config.py +++ b/megatron/core/transformer/transformer_config.py @@ -2366,7 +2366,7 @@ def _scope_to_str(s): ) if ( - self.generalized_tensor_parallel_remat_size > 1 + self.gtp_weight_remat_size > 1 and self.cuda_graph_impl == "local" and (self.fp8 is not None or self.fp4 is not None) and self.moe_shared_expert_intermediate_size is not None diff --git a/megatron/experimental/gtp/README.md b/megatron/experimental/gtp/README.md index 32e983c581d..c694aa51bef 100644 --- a/megatron/experimental/gtp/README.md +++ b/megatron/experimental/gtp/README.md @@ -113,7 +113,7 @@ Quantize-then-gather attacks AG only: AG portion shrinks ~72% from BF16 → NVFP - **TP** (intra-layer): orthogonal axis — GTP shards `out_features` regardless of TP's parallel mode (column or row). 2D grid naturally formed via `tp_group × gtp_group`. - **SP** (sequence-parallel): transparent — GTP operates at weight dim, SP at sequence dim. -- **EP** (MoE): `GroupedLinear` with GTP → each routed expert sharded across `EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP`, independent of EP. MoE AllToAll (HybridEP/NVLink) runs independently of GTP AG/RS (NCCL/IB). +- **EP** (MoE): `GroupedLinear` with GTP → each routed expert sharded across `EXPERT_GTP_WEIGHT_REMAT_GROUP`, independent of EP. MoE AllToAll (HybridEP/NVLink) runs independently of GTP AG/RS (NCCL/IB). - **DDP**: GTP bypasses autograd's grad accumulator (async RS returns `None`; `_finalize_wgrad` accumulates directly into `main_grad`). `register_grad_accum_hook` + manual invocation from `_finalize_wgrad` (eager path) and `_CudagraphReplayNode.backward` (captured path) serializes DDP RS strictly after GTP RS — critical at IB scale to avoid deadlock between DDP and GTP on the same NIC. ### 1.5 Opt-in, minimally invasive integration @@ -121,7 +121,7 @@ Quantize-then-gather attacks AG only: AG portion shrinks ~72% from BF16 → NVFP - Drop-in `gtp_group` kwarg on `Linear` / `LayerNormLinear` / `LayerNormMLP` / `GroupedLinear`; no framework-level refactor required. - **Per-weight opt-in.** GTP wraps only weights threaded with the `gtp_group=` kwarg — typically the heavy GEMM linears (`Linear` / `LayerNormLinear` / `LayerNormMLP` / `GroupedLinear`). Small replicated tensors (LayerNorm γ/β, biases, Mamba `dt_bias`/`A_log`/`D`/`conv1d`, MoE router, latent-proj MLPs) stay full — no NCCL launch latency for params where the all-gather wouldn't amortize. The split is visible in §3.2's *dense non-GTP* vs *dense GTP* membership. - `classify_gtp_chains(model)` walks `named_parameters()` once at init and sets `chain_id` on every `GTPShardedParam` based on the current `cuda_graph_modules`. -- Turning it off is a no-op: when `gtp_group.size() == 1`, `wrap_module_params_gtp` short-circuits; when `generalized_tensor_parallel_remat_size == 1`, the GTP path in `layers.py` is skipped entirely. +- Turning it off is a no-op: when `gtp_group.size() == 1`, `wrap_module_params_gtp` short-circuits; when `gtp_weight_remat_size == 1`, the GTP path in `layers.py` is skipped entirely. - User-tunable knobs (`GTPConfig.pad_for_alignment`, `weight_prefetch`, `check_param_states`) plus a debug-name tagger (`tag_gtp_params_with_names`) for readable link-table output. ### 1.6 Scaling @@ -158,14 +158,24 @@ GTP is enabled through two CLI flags on Megatron's training launcher; everything ### 2.1 Required flags ```bash -# Shard dense weights (attention, mamba, MLP linears) 1/N along out_features. ---generalized-tensor-parallel-remat-size - -# Shard MoE routed-expert weights 1/M along out_features. Independent from -# `--generalized-tensor-parallel-remat-size`; can be 1 for non-MoE models. ---expert-generalized-tensor-parallel-remat-size +# Total number of shards each dense weight (attention, mamba, MLP linears) is split into along +# out_features, across the tensor-parallel + GTP axes. Must be >= --tensor-model-parallel-size and +# divisible by it. The GTP degree is derived as num_weight_shards / tensor_model_parallel_size +# (e.g. TP=1 + num_weight_shards=2 -> GTP=2; TP=2 + num_weight_shards=8 -> GTP=4). +--tensor-parallel-num-weight-shards + +# Total number of shards each MoE routed-expert weight is split into along out_features, across the +# expert-tensor-parallel + expert-GTP axes. Must be >= --expert-tensor-parallel-size and divisible +# by it. The expert-GTP degree is derived as num_weight_shards / expert_tensor_parallel_size. +# Independent from --tensor-parallel-num-weight-shards; can be left unset for non-MoE models. +--expert-tensor-parallel-num-weight-shards ``` +> The (dense / expert) GTP degree is exposed **only** through +> `--tensor-parallel-num-weight-shards` / `--expert-tensor-parallel-num-weight-shards`. The internal +> `gtp_weight_remat_size` / `expert_gtp_weight_remat_size` config fields are derived from them and +> have no CLI flag. + ### 2.2 High-priority streams (Blackwell and later) Required on GB200 / GB300 so the GTP comm streams get the SM priority needed for AG/RS overlap with compute: @@ -179,12 +189,13 @@ The launcher also exports `CUDA_GRAPHS_USE_NODE_PRIORITY=1` so captured CUDA gra ### 2.3 Minimal end-to-end example ```bash -# 4 ranks, GTP=2 across out_features, no TP, BF16 weights. +# 4 ranks, TP=2 + GTP=2 across out_features, BF16 weights. +# TP=2 + num-weight-shards=4 -> GTP = 4 / 2 = 2. torchrun --nproc-per-node 4 pretrain_gpt.py \ - --tensor-model-parallel-size 1 \ + --tensor-model-parallel-size 2 \ --pipeline-model-parallel-size 1 \ - --generalized-tensor-parallel-remat-size 2 \ - --expert-generalized-tensor-parallel-remat-size 1 \ + --tensor-parallel-num-weight-shards 4 \ + --expert-tensor-parallel-num-weight-shards 1 \ --high-priority-stream-groups ep gtp expt_gtp \ --bf16 \ --num-layers 12 --hidden-size 1024 --num-attention-heads 16 \ @@ -238,7 +249,7 @@ TransformerEngine owns the linear primitives (`Linear` / `LayerNormLinear` / `La #### What the flags do under the hood -1. `parallel_state.initialize_model_parallel(...)` treats GTP/EGTP as **first-class orthogonal axes** (`world_size = TP*GTP*CP*DP`, and the expert grid `= ETP*EP*PP*EGTP*expert_dp`). It builds the shard groups `_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP` (size = `--generalized-tensor-parallel-remat-size`) and `_EXPERT_GENERALIZED_TENSOR_PARALLEL_REMAT_GROUP` (size = `--expert-generalized-tensor-parallel-remat-size`), plus the gtp/egtp-EXCLUDED replicate DP groups (`_DATA_PARALLEL_GROUP_NO_GTP`, `_EXPERT_DATA_PARALLEL_GROUP_NO_GTP`) that DDP and the optimizer shard over. These `*_no_gtp` groups alias the regular DP groups when GTP is inactive (remat size 1). +1. `parallel_state.initialize_model_parallel(...)` treats GTP/EGTP as **first-class orthogonal axes** (`world_size = TP*GTP*CP*DP`, and the expert grid `= ETP*EP*EGTP*PP*expert_dp`). It builds the shard groups `_GTP_WEIGHT_REMAT_GROUP` (size = `--tensor-parallel-num-weight-shards / --tensor-model-parallel-size`) and `_EXPERT_GTP_WEIGHT_REMAT_GROUP` (size = `--expert-tensor-parallel-num-weight-shards / --expert-tensor-parallel-size`), plus the gtp/egtp-EXCLUDED replicate DP groups (`_DATA_PARALLEL_GROUP_NO_GTP`, `_EXPERT_DATA_PARALLEL_GROUP_NO_GTP`) that DDP and the optimizer shard over. These `*_no_gtp` groups alias the regular DP groups when GTP is inactive (remat size 1). 2. Megatron's `extensions/transformer_engine.py` reads `pg_collection.gtp` / `pg_collection.expt_gtp` and forwards them as the `gtp_group=` kwarg to `te.Linear` / `te.LayerNormLinear` / `te.GroupedLinear`. TE's `module/base.py` calls back into `megatron.experimental.gtp` via the hook registry (`register_gtp_hooks`) to slice each weight at `reset_parameters` time. 3. DDP treats GTP shards as ordinary params: they go into the same dense / expert buffers as everything else, reduced over the gtp/egtp-EXCLUDED replicate group (`intra_dp_cp_no_gtp_group` / `intra_expt_dp_no_egtp_group`) with the standard `1/full` scaling. The gtp axis is completed elsewhere — GTP shards by their reduce-scatter sum, replicated (non-GTP) params by a SUM all-reduce in `finalize_model_grads`. See §3.2. 4. Optimizer state is sharded over the same replicate group; clip-by-global-norm reduces squared norms over the dist-opt grad-stats group, which spans the full world (including the gtp/egtp axis), with replicated non-GTP params counted once per gtp/egtp axis to avoid over-counting. diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py index 7678e4cb269..219d93b382e 100644 --- a/megatron/training/arguments.py +++ b/megatron/training/arguments.py @@ -1433,9 +1433,33 @@ def validate_args(args, defaults={}): args.high_priority_stream_groups.append('ep_dp') - if args.generalized_tensor_parallel_remat_size > 1 or args.expert_generalized_tensor_parallel_remat_size > 1: - gtp_size = args.generalized_tensor_parallel_remat_size - egtp_size = args.expert_generalized_tensor_parallel_remat_size + # Derive the internal gtp_weight_remat_size from the user-facing + # --tensor-parallel-num-weight-shards. gtp_weight_remat_size has no CLI flag (it is excluded + # from argument generation), so it is set here as a fresh attribute on args before it is + # consumed below (and in initialize/training, which read args.gtp_weight_remat_size directly). + # Mirrors ModelParallelConfig.__post_init__. + from megatron.core.model_parallel_config import resolve_tensor_parallel_weight_shards + (args.tensor_parallel_num_weight_shards, args.gtp_weight_remat_size) = ( + resolve_tensor_parallel_weight_shards( + args.tensor_model_parallel_size, + args.tensor_parallel_num_weight_shards, + getattr(args, "gtp_weight_remat_size", 1), + ) + ) + # Same for the expert layers: derive the internal expert_gtp_weight_remat_size from the + # user-facing --expert-tensor-parallel-num-weight-shards (expert_tensor_parallel_size is + # defaulted earlier in validate_args). expert_gtp_weight_remat_size has no CLI flag. + (args.expert_tensor_parallel_num_weight_shards, args.expert_gtp_weight_remat_size) = ( + resolve_tensor_parallel_weight_shards( + args.expert_tensor_parallel_size, + args.expert_tensor_parallel_num_weight_shards, + getattr(args, "expert_gtp_weight_remat_size", 1), + ) + ) + + if args.gtp_weight_remat_size > 1 or args.expert_gtp_weight_remat_size > 1: + gtp_weight_remat_size = args.gtp_weight_remat_size + egtp_weight_remat_size = args.expert_gtp_weight_remat_size if get_device_arch_version() >= 10: # Setting GTP communication groups for high priority streams for Blackwell and later # architectures. Assigning high priority to communication streams ensures that @@ -1444,7 +1468,7 @@ def validate_args(args, defaults={}): if 'gtp' not in args.high_priority_stream_groups: args.high_priority_stream_groups.append('gtp') warn_rank_0("Setting 'gtp' group for high priority streams.") - if egtp_size > 1 and 'expt_gtp' not in args.high_priority_stream_groups: + if egtp_weight_remat_size > 1 and 'expt_gtp' not in args.high_priority_stream_groups: args.high_priority_stream_groups.append('expt_gtp') warn_rank_0("Setting 'expt_gtp' group for high priority streams.") @@ -2165,6 +2189,10 @@ def _add_network_size_args(parser): "persist_layer_norm", "bias_dropout_fusion", "apply_rope_fusion", + # internal/derived: controlled only via --tensor-parallel-num-weight-shards + "gtp_weight_remat_size", + # internal/derived: controlled only via --expert-tensor-parallel-num-weight-shards + "expert_gtp_weight_remat_size", ] transformer_factory = ArgumentGroupFactory(TransformerConfig, exclude=exclude) transformer_group = transformer_factory.build_group(parser, "transformer configuration") diff --git a/megatron/training/checkpointing.py b/megatron/training/checkpointing.py index 087be661c3c..5e6e713644f 100644 --- a/megatron/training/checkpointing.py +++ b/megatron/training/checkpointing.py @@ -803,12 +803,12 @@ def iter_finalize_fn(): with open_file(tracker_filename, 'w') as f: f.write("release" if release else str(iteration)) tensor_rank_to_print = (tensor_rank if tensor_rank is not None else mpu.get_tensor_model_parallel_rank()) + 1 - gtp_rank_to_print = mpu.get_generalized_tensor_parallel_remat_rank() + 1 + gtp_rank_to_print = mpu.get_gtp_weight_remat_rank() + 1 pipeline_rank_to_print = (pipeline_rank if pipeline_rank is not None else mpu.get_pipeline_model_parallel_rank()) + 1 print_rank_0(f" [{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')}] successfully saved " f"checkpoint from iteration {int(iteration):7d} to {args.save} " f"[ t {tensor_rank_to_print}/{mpu.get_tensor_model_parallel_world_size()}, " - f"gtp {gtp_rank_to_print}/{mpu.get_generalized_tensor_parallel_remat_world_size()}, " + f"gtp {gtp_rank_to_print}/{mpu.get_gtp_weight_remat_world_size()}, " f"p {pipeline_rank_to_print}/{mpu.get_pipeline_model_parallel_world_size()} ]") if args.log_progress and args.async_save: append_to_progress_log(args.save, f'Saved async checkpoint\tIteration: {iteration}', @@ -2033,7 +2033,7 @@ def load_model_state_dict(module, state_dict, strict: bool): print_rank_0(f' successfully loaded checkpoint from {load_dir} ' f'[ t {mpu.get_tensor_model_parallel_rank() + 1}/{mpu.get_tensor_model_parallel_world_size()}, ' - f'gtp {mpu.get_generalized_tensor_parallel_remat_rank() + 1}/{mpu.get_generalized_tensor_parallel_remat_world_size()}, ' + f'gtp {mpu.get_gtp_weight_remat_rank() + 1}/{mpu.get_gtp_weight_remat_world_size()}, ' f'p {mpu.get_pipeline_model_parallel_rank() + 1}/{mpu.get_pipeline_model_parallel_world_size()} ] ' f'at iteration {iteration}') diff --git a/megatron/training/initialize.py b/megatron/training/initialize.py index 2316bbbfc47..6c92a777991 100644 --- a/megatron/training/initialize.py +++ b/megatron/training/initialize.py @@ -338,16 +338,13 @@ def _initialize_distributed(get_embedding_ranks, get_position_embedding_ranks, s if mpu.model_parallel_is_initialized(): print("model parallel is already initialized") else: - if ( - args.generalized_tensor_parallel_remat_size > 1 - or args.expert_generalized_tensor_parallel_remat_size > 1 - ): + if args.gtp_weight_remat_size > 1 or args.expert_gtp_weight_remat_size > 1: from megatron.experimental.gtp import HAVE_GTP assert HAVE_GTP, ( "GTP requires TransformerEngine >= 2.17. " "Set MEGATRON_GTP_FORCE_ENABLE=1 to bypass for custom TE builds, " - "or set both --generalized-tensor-parallel-remat-size and " + "or set both --gtp-weight-remat-size and " "--expert-generalized-tensor-parallel-remat-size to 1." ) mpu.initialize_model_parallel( @@ -358,8 +355,8 @@ def _initialize_distributed(get_embedding_ranks, get_position_embedding_ranks, s use_sharp=args.use_sharp, # GTP/EGTP require world_size divisible by TP*PP*CP*GTP (and the expert grid # by ETP*EP*PP*EGTP). Inactive when the remat sizes are 1. - gtp_remat_size=args.generalized_tensor_parallel_remat_size, - expert_gtp_remat_size=args.expert_generalized_tensor_parallel_remat_size, + gtp_remat_size=args.gtp_weight_remat_size, + expert_gtp_remat_size=args.expert_gtp_weight_remat_size, context_parallel_size=args.context_parallel_size, hierarchical_context_parallel_sizes=args.hierarchical_context_parallel_sizes, hybrid_context_parallel=args.hybrid_context_parallel, diff --git a/megatron/training/training.py b/megatron/training/training.py index 6f3916a12e4..8ad6d180814 100644 --- a/megatron/training/training.py +++ b/megatron/training/training.py @@ -1674,8 +1674,8 @@ def get_model(model_provider_func, model_type=ModelType.encoder_or_decoder, wrap # Configure GTP padding alignment based on quantization recipe before model construction. if ( - getattr(args, 'generalized_tensor_parallel_remat_size', 1) > 1 - or getattr(args, 'expert_generalized_tensor_parallel_remat_size', 1) > 1 + getattr(args, 'gtp_weight_remat_size', 1) > 1 + or getattr(args, 'expert_gtp_weight_remat_size', 1) > 1 ): from megatron.experimental.gtp import update_gtp_config @@ -1738,8 +1738,8 @@ def build_model(): # from args.cuda_graph_modules + moe_shared_expert_overlap. Must run after # model build, before the first forward (which lazily builds chain links). if ( - getattr(args, 'generalized_tensor_parallel_remat_size', 1) > 1 - or getattr(args, 'expert_generalized_tensor_parallel_remat_size', 1) > 1 + getattr(args, 'gtp_weight_remat_size', 1) > 1 + or getattr(args, 'expert_gtp_weight_remat_size', 1) > 1 ): from megatron.experimental.gtp import ( GTP_CONFIG, diff --git a/megatron/training/utils/common_utils.py b/megatron/training/utils/common_utils.py index 610e620848b..ed35a85f95f 100644 --- a/megatron/training/utils/common_utils.py +++ b/megatron/training/utils/common_utils.py @@ -111,8 +111,8 @@ def calc_params_l2_norm(model, force_create_fp32_copy=False): moe_gtp_params_data = [] # MoE-GTP, non-sharded moe_gtp_sharded_params_data = [] # MoE-GTP, sharded → reduce over expert_dp_no_gtp - gtp_rank = mpu.get_generalized_tensor_parallel_remat_rank() - egtp_rank = mpu.get_expert_generalized_tensor_parallel_remat_rank() + gtp_rank = mpu.get_gtp_weight_remat_rank() + egtp_rank = mpu.get_expert_gtp_weight_remat_rank() for model_chunk in model: for param in model_chunk.parameters(): @@ -188,7 +188,7 @@ def _sum_reduce(tensor, group): # expert_model_parallel = TP×EP×PP (does NOT include EGTP), so we need # an explicit EGTP reduction for MoE-GTP before the model-parallel reduce. moe_gtp_combined_norm_2 = moe_gtp_norm_2 + moe_gtp_sharded_norm_2 - _sum_reduce(moe_gtp_combined_norm_2, mpu.get_expert_generalized_tensor_parallel_remat_group()) + _sum_reduce(moe_gtp_combined_norm_2, mpu.get_expert_gtp_weight_remat_group()) moe_total_norm_2 = moe_norm_2 + moe_sharded_norm_2 + moe_gtp_combined_norm_2 # --- Model-parallel reductions --- diff --git a/tests/unit_tests/generalized_tensor_parallel/test_attention_gtp.py b/tests/unit_tests/generalized_tensor_parallel/test_attention_gtp.py index 8b495e7c601..d14bffcdbf4 100644 --- a/tests/unit_tests/generalized_tensor_parallel/test_attention_gtp.py +++ b/tests/unit_tests/generalized_tensor_parallel/test_attention_gtp.py @@ -173,7 +173,7 @@ def run_step(layers, x): for layer in layers_gtp: layer.cuda() - gtp_group = ps.get_generalized_tensor_parallel_remat_group() + gtp_group = ps.get_gtp_weight_remat_group() gtp_size = gtp_group.size() gtp_rank = gtp_group.rank() diff --git a/tests/unit_tests/generalized_tensor_parallel/test_gtp.py b/tests/unit_tests/generalized_tensor_parallel/test_gtp.py index dcbc77e8692..06935944ec4 100644 --- a/tests/unit_tests/generalized_tensor_parallel/test_gtp.py +++ b/tests/unit_tests/generalized_tensor_parallel/test_gtp.py @@ -1319,7 +1319,7 @@ def _worker_gtp_ddp_bucket_alignment(rank, world_size, port): orig_pad = gtp_module.GTP_CONFIG.pad_for_alignment gtp_module.GTP_CONFIG.pad_for_alignment = 0 try: - gtp_group = ps.get_generalized_tensor_parallel_remat_group() + gtp_group = ps.get_gtp_weight_remat_group() class _TwoLayerModel(torch.nn.Module): def __init__(self): @@ -1374,7 +1374,7 @@ def _worker_regular_buffer_padded_when_gtp_params_present(rank, world_size, port orig_pad = gtp_module.GTP_CONFIG.pad_for_alignment gtp_module.GTP_CONFIG.pad_for_alignment = 0 try: - gtp_group = ps.get_generalized_tensor_parallel_remat_group() + gtp_group = ps.get_gtp_weight_remat_group() class _TwoLayerModelWithBias(torch.nn.Module): def __init__(self): diff --git a/tests/unit_tests/generalized_tensor_parallel/test_gtp_grad_correctness.py b/tests/unit_tests/generalized_tensor_parallel/test_gtp_grad_correctness.py index 7705c17bcbf..1d1a1827961 100644 --- a/tests/unit_tests/generalized_tensor_parallel/test_gtp_grad_correctness.py +++ b/tests/unit_tests/generalized_tensor_parallel/test_gtp_grad_correctness.py @@ -130,7 +130,7 @@ def _full_main_grads(stack): g_attr = 'main_grad' if hasattr(p, 'main_grad') else 'grad' mg = getattr(p, g_attr) if isinstance(p, GTPShardedParam): - g = ps.get_generalized_tensor_parallel_remat_group() + g = ps.get_gtp_weight_remat_group() shards = [torch.empty_like(mg) for _ in range(g.size())] dist.all_gather(shards, mg.contiguous(), group=g) out[name] = torch.cat(shards, dim=0).float().cpu() @@ -175,7 +175,7 @@ def _worker(rank, world_size, port): for layer in gtp_stack: layer.cuda() - g = ps.get_generalized_tensor_parallel_remat_group() + g = ps.get_gtp_weight_remat_group() gtp_rank = g.rank() assert g.size() == 2, f"expected gtp shard group size 2, got {g.size()}" @@ -313,7 +313,7 @@ def _worker_distopt(rank, world_size, port): gtp_stack = _make_stack(_make_config(), pgc) for layer in gtp_stack: layer.cuda() - g = ps.get_generalized_tensor_parallel_remat_group() + g = ps.get_gtp_weight_remat_group() gtp_rank = g.rank() for name, p in gtp_stack.named_parameters(): full = saved[name] @@ -435,8 +435,8 @@ def _worker_moe_distopt(rank, world_size, port): moe_stack = _make_moe_stack(_make_moe_config(), pgc) for layer in moe_stack: layer.cuda() - g = ps.get_generalized_tensor_parallel_remat_group() - eg = ps.get_expert_generalized_tensor_parallel_remat_group() + g = ps.get_gtp_weight_remat_group() + eg = ps.get_expert_gtp_weight_remat_group() gtp_rank, egtp_rank = g.rank(), eg.rank() n_egtp_sharded = 0 for name, p in moe_stack.named_parameters(): diff --git a/tests/unit_tests/generalized_tensor_parallel/test_gtp_loss_correctness.py b/tests/unit_tests/generalized_tensor_parallel/test_gtp_loss_correctness.py index 44fdaae617c..6427df482ec 100644 --- a/tests/unit_tests/generalized_tensor_parallel/test_gtp_loss_correctness.py +++ b/tests/unit_tests/generalized_tensor_parallel/test_gtp_loss_correctness.py @@ -135,7 +135,7 @@ def run_step(layers, x): for layer in layers_gtp: layer.cuda() - gtp_group = ps.get_generalized_tensor_parallel_remat_group() + gtp_group = ps.get_gtp_weight_remat_group() gtp_size = gtp_group.size() gtp_rank = gtp_group.rank() assert gtp_size == 4, f"GTP shard group size should be 4, got {gtp_size}" diff --git a/tests/unit_tests/generalized_tensor_parallel/test_mamba_gtp.py b/tests/unit_tests/generalized_tensor_parallel/test_mamba_gtp.py index 27ed6483aa1..de0f8cf358a 100644 --- a/tests/unit_tests/generalized_tensor_parallel/test_mamba_gtp.py +++ b/tests/unit_tests/generalized_tensor_parallel/test_mamba_gtp.py @@ -185,7 +185,7 @@ def run_step(layers, x): for layer in layers_gtp: layer.cuda() - gtp_group = ps.get_generalized_tensor_parallel_remat_group() + gtp_group = ps.get_gtp_weight_remat_group() gtp_size = gtp_group.size() gtp_rank = gtp_group.rank() diff --git a/tests/unit_tests/models/test_hybrid_moe_model.py b/tests/unit_tests/models/test_hybrid_moe_model.py index 2bf935f1ef6..895996dfad1 100644 --- a/tests/unit_tests/models/test_hybrid_moe_model.py +++ b/tests/unit_tests/models/test_hybrid_moe_model.py @@ -95,7 +95,8 @@ "ep_overlap_early_attn_memory_release": False, "experimental_attention_variant": None, "expert_model_parallel_size": 4, - "expert_generalized_tensor_parallel_remat_size": 1, + "expert_gtp_weight_remat_size": 1, + "expert_tensor_parallel_num_weight_shards": 1, "expert_tensor_parallel_size": 1, "external_cuda_graph": False, "ffn_hidden_size": 1856, @@ -123,7 +124,7 @@ "fused_residual_rmsnorm": False, "fused_single_qkv_rope": False, "gated_linear_unit": False, - "generalized_tensor_parallel_remat_size": 1, + "gtp_weight_remat_size": 1, "glu_linear_offset": 0.0, "grad_scale_func": None, "mtp_grad_scale_func": None, @@ -266,6 +267,7 @@ "softmax_type": "vanilla", "symmetric_ar_type": None, "tensor_model_parallel_size": 2, + "tensor_parallel_num_weight_shards": 2, "test_mode": False, "timers": None, "tp_comm_atomic_ag": False, From 6806f431677ef82addad3d8bca719a44248f08aa Mon Sep 17 00:00:00 2001 From: Deepak Narayanan Date: Thu, 18 Jun 2026 22:56:52 -0700 Subject: [PATCH 42/59] Support GTP/EGTP in LayerWiseDistributedOptimizer and Muon (#3) - Support GTP/EGTP in LayerWiseDistributedOptimizer and Muon orthogonalization; - Fix expert optimizer grad_stats_parallel_group to include EGTP dimension LayerWiseDistributedOptimizer: use GTP/EGTP-excluded DP groups for layer-wise sharding, all-gather, and broadcast so that only true weight replicas are distributed across; GTP's own collectives handle the GTP axis separately. Falls back to the full groups when GTP is inactive. Muon orthogonalization: all-gather grad along GTP/EGTP dim 0 before running the TP-aware Newton-Schulz iteration (which needs the full weight matrix), then slice back to the local GTP shard. GTP shards along dim 0 independently of TP's partition_dim, so the two axes are handled as separate all-gathers. Signed-off-by: Deepak Narayanan Co-authored-by: Claude Opus 4.6 --- .../core/distributed/param_and_grad_buffer.py | 1 + megatron/core/optimizer/distrib_optimizer.py | 7 +++- .../core/optimizer/emerging_optimizers.py | 35 ++++++++++++++-- .../core/optimizer/layer_wise_optimizer.py | 42 ++++++++++++------- megatron/core/optimizer/param_layout.py | 6 ++- megatron/core/process_groups_config.py | 40 ++++++++++++++++-- 6 files changed, 107 insertions(+), 24 deletions(-) diff --git a/megatron/core/distributed/param_and_grad_buffer.py b/megatron/core/distributed/param_and_grad_buffer.py index 9051fb9f47e..0d6c7ca1c26 100644 --- a/megatron/core/distributed/param_and_grad_buffer.py +++ b/megatron/core/distributed/param_and_grad_buffer.py @@ -1011,6 +1011,7 @@ def __init__( param_layout = _compute_default_per_buffer_param_layout(self.params, bucket_size) self.param_index_map = param_layout.param_index_map self.bucket_indices = param_layout.bucket_indices + self.num_optimizer_shards = param_layout.num_optimizer_shards per_bucket_numel_unpadded = param_layout.per_bucket_numel_unpadded # Check if this buffer contains NVFP4 params. diff --git a/megatron/core/optimizer/distrib_optimizer.py b/megatron/core/optimizer/distrib_optimizer.py index 3da5f301f5d..d66527f4f8a 100644 --- a/megatron/core/optimizer/distrib_optimizer.py +++ b/megatron/core/optimizer/distrib_optimizer.py @@ -203,7 +203,11 @@ def _build_model_gbuf_range(cls, param_and_grad_buffer: _ParamAndGradBuffer, buc """ data_parallel_rank = param_and_grad_buffer.data_parallel_group.rank() - data_parallel_world_size = param_and_grad_buffer.data_parallel_group.size() + data_parallel_world_size = ( + param_and_grad_buffer.num_optimizer_shards + if param_and_grad_buffer.num_optimizer_shards is not None + else param_and_grad_buffer.data_parallel_group.size() + ) bucket = param_and_grad_buffer.buckets[bucket_index] gbuf_size = bucket.grad_data.numel() @@ -566,6 +570,7 @@ def _finalize_bucket(param_end_index, bucket_start_index, bucket_id): bucket_indices=bucket_indices, per_bucket_numel_unpadded=per_bucket_numel_unpadded, param_indices=param_indices if param_indices is not None else [], + num_optimizer_shards=data_parallel_world_size, ) @staticmethod diff --git a/megatron/core/optimizer/emerging_optimizers.py b/megatron/core/optimizer/emerging_optimizers.py index 4dfed6199b3..1f170091f49 100644 --- a/megatron/core/optimizer/emerging_optimizers.py +++ b/megatron/core/optimizer/emerging_optimizers.py @@ -17,7 +17,7 @@ from torch.optim.optimizer import ParamsT from megatron.core.process_groups_config import ProcessGroupCollection -from megatron.core.utils import get_pg_size, log_single_rank +from megatron.core.utils import get_pg_rank, get_pg_size, log_single_rank from .optimizer_config import ParamKey, ParamPredicate @@ -230,6 +230,35 @@ def scaled_orthogonalize_fn( scaled_orthogonalize_fn=scaled_orthogonalize_fn, ) + def scaled_orthogonalize_fn_with_gtp( + self, p, grad, tp_group, partition_dim, + ): + """All-gather grad along GTP/EGTP dim 0, orthogonalize, then slice back. + + GTP shards weights along dim 0 independently of TP's partition_dim. Newton-Schulz + needs the full weight matrix, so we reconstruct the GTP dimension before running + the TP-aware orthogonalization, then extract the local GTP shard from the result. + When GTP is inactive this is a plain passthrough to scaled_orthogonalize_fn. + """ + is_expert = getattr(p, 'expert_tp', False) + gtp_group = ( + self.pg_collection.expt_gtp if is_expert else self.pg_collection.gtp + ) if self.pg_collection else None + + if gtp_group is None or get_pg_size(gtp_group) <= 1: + return self.scaled_orthogonalize_fn(grad, tp_group, partition_dim) + + gtp_size = get_pg_size(gtp_group) + gtp_rank = get_pg_rank(gtp_group) + shards = [torch.empty_like(grad) for _ in range(gtp_size)] + torch.distributed.all_gather(shards, grad, gtp_group) + gathered_grad = torch.cat(shards, dim=0) + + gathered_grad = self.scaled_orthogonalize_fn(gathered_grad, tp_group, partition_dim) + + shard_size = gathered_grad.shape[0] // gtp_size + return gathered_grad[gtp_rank * shard_size : (gtp_rank + 1) * shard_size].contiguous() + def orthogonalize(self, p: torch.Tensor, grad: torch.Tensor, **kwargs: Any) -> torch.Tensor: """Orthogonalize the momentum. @@ -280,14 +309,14 @@ def orthogonalize(self, p: torch.Tensor, grad: torch.Tensor, **kwargs: Any) -> t qkv_grads = [g.reshape(-1, grad_shape[-1]) for g in qkv_grads] qkv_grads = [ - self.scaled_orthogonalize_fn(g, tp_group, partition_dim).view( + self.scaled_orthogonalize_fn_with_gtp(p, g, tp_group, partition_dim).view( num_query_groups, -1, grad_shape[-1] ) for g in qkv_grads ] grad = torch.cat(qkv_grads, dim=1).view(grad_shape) else: - grad = self.scaled_orthogonalize_fn(grad, tp_group, partition_dim) + grad = self.scaled_orthogonalize_fn_with_gtp(p, grad, tp_group, partition_dim) return grad diff --git a/megatron/core/optimizer/layer_wise_optimizer.py b/megatron/core/optimizer/layer_wise_optimizer.py index 606525f8097..d1b3f3006dc 100644 --- a/megatron/core/optimizer/layer_wise_optimizer.py +++ b/megatron/core/optimizer/layer_wise_optimizer.py @@ -288,6 +288,7 @@ def _emit_bucket( bucket_indices=bucket_indices, per_bucket_numel_unpadded=per_bucket_numel_unpadded, param_indices=param_indices if param_indices is not None else [], + num_optimizer_shards=dp_size, ) @staticmethod @@ -366,6 +367,16 @@ def __init__( self.pg_collection = pg_collection + # Use GTP/EGTP-excluded DP groups for layer-wise sharding and all-gather so that + # only true weight replicas are sharded across; GTP's own all-gather / reduce-scatter + # handles the GTP axis separately. Falls back to the full groups when GTP is inactive. + if pg_collection is not None: + self.dp_cp_group = pg_collection.dp_cp_no_gtp or pg_collection.dp_cp + self.expt_dp_group = pg_collection.expt_dp_no_egtp or pg_collection.expt_dp + else: + self.dp_cp_group = None + self.expt_dp_group = None + full_param_layouts = None if model_chunks is not None: full_param_layouts = [ @@ -448,13 +459,13 @@ def shard_params(self, optimizers, full_param_layouts=None): chunk). ``None`` triggers the legacy fallback. """ # Simplify when dp_cp group size is 1. - dp_cp_size = get_pg_size(self.pg_collection.dp_cp) + dp_cp_size = get_pg_size(self.dp_cp_group) if dp_cp_size == 1: self.dp_cp_params_list = None self.expt_dp_params_list = None return - expt_dp_size = get_pg_size(self.pg_collection.expt_dp) + expt_dp_size = get_pg_size(self.expt_dp_group) if full_param_layouts is not None: self._shard_params_from_layout(optimizers, full_param_layouts, dp_cp_size, expt_dp_size) @@ -463,8 +474,8 @@ def shard_params(self, optimizers, full_param_layouts=None): def _shard_params_from_layout(self, optimizers, full_param_layouts, dp_cp_size, expt_dp_size): """Derive shard assignments from the param layout.""" - dp_cp_rank = get_pg_rank(self.pg_collection.dp_cp) - expt_dp_rank = get_pg_rank(self.pg_collection.expt_dp) + dp_cp_rank = get_pg_rank(self.dp_cp_group) + expt_dp_rank = get_pg_rank(self.expt_dp_group) self.dp_cp_params_list = [[] for _ in range(dp_cp_size)] self.expt_dp_params_list = [[] for _ in range(expt_dp_size)] @@ -478,14 +489,13 @@ def _shard_params_from_layout(self, optimizers, full_param_layouts, dp_cp_size, # separate DistributedOptimizer; LayerWise does not own them. if not buffer_key.is_managed_by_layer_wise_optimizer: continue - dp_size = expt_dp_size if buffer_key.is_expert_parallel else dp_cp_size for param, ( param_start_index, param_end_index, bucket_id, ) in layout.param_index_map.items(): bucket_start_index, bucket_end_index = layout.bucket_indices[bucket_id] - shard_size = (bucket_end_index - bucket_start_index) // dp_size + shard_size = (bucket_end_index - bucket_start_index) // layout.num_optimizer_shards shard_id = (param_start_index - bucket_start_index) // shard_size shard_end_index = bucket_start_index + (shard_id + 1) * shard_size assert param_end_index <= shard_end_index, ( @@ -561,12 +571,12 @@ def _shard_params_ping_pong(self, optimizers, dp_cp_size, expt_dp_size): # Assign params to rank in ping-pong style loop. for p, group_index in param_list: if param_groups[group_index].get("is_expert_parallel", False): - if expt_dp_loop[expt_dp_idx] == get_pg_rank(self.pg_collection.expt_dp): + if expt_dp_loop[expt_dp_idx] == get_pg_rank(self.expt_dp_group): param_groups_this_rank[group_index].append(p) self.expt_dp_params_list[expt_dp_loop[expt_dp_idx]].append(p) expt_dp_idx = (expt_dp_idx + 1) % len(expt_dp_loop) else: - if dp_cp_loop[dp_cp_idx] == get_pg_rank(self.pg_collection.dp_cp): + if dp_cp_loop[dp_cp_idx] == get_pg_rank(self.dp_cp_group): param_groups_this_rank[group_index].append(p) self.dp_cp_params_list[dp_cp_loop[dp_cp_idx]].append(p) dp_cp_idx = (dp_cp_idx + 1) % len(dp_cp_loop) @@ -599,7 +609,7 @@ def set_bucket_layerwise_params_list(self, model_chunks): for bucket in group.buckets: if not _bucket_is_managed_by_layer_wise_optimizer(bucket): continue - bucket_params_list = [[] for _ in range(get_pg_size(self.pg_collection.dp_cp))] + bucket_params_list = [[] for _ in range(get_pg_size(self.dp_cp_group))] for bucket_list, full_params_list in zip( bucket_params_list, self.dp_cp_params_list ): @@ -614,7 +624,7 @@ def set_bucket_layerwise_params_list(self, model_chunks): continue if self.expt_dp_params_list is not None: bucket_params_list = [ - [] for _ in range(get_pg_size(self.pg_collection.expt_dp)) + [] for _ in range(get_pg_size(self.expt_dp_group)) ] for bucket_list, full_params_list in zip( bucket_params_list, self.expt_dp_params_list @@ -678,9 +688,9 @@ def _allgather_helper(params_list, group): if self.pg_collection is None: return if self.dp_cp_params_list: - _allgather_helper(self.dp_cp_params_list, self.pg_collection.dp_cp) + _allgather_helper(self.dp_cp_params_list, self.dp_cp_group) if self.expt_dp_params_list: - _allgather_helper(self.expt_dp_params_list, self.pg_collection.expt_dp) + _allgather_helper(self.expt_dp_params_list, self.expt_dp_group) @torch.no_grad() def broadcast_params(self): @@ -689,15 +699,15 @@ def broadcast_params(self): if self.dp_cp_params_list is None: return for i, params in enumerate(self.dp_cp_params_list): - src_global_rank = torch.distributed.get_global_rank(self.pg_collection.dp_cp, i) + src_global_rank = torch.distributed.get_global_rank(self.dp_cp_group, i) for p in params: - torch.distributed.broadcast(p, src_global_rank, self.pg_collection.dp_cp) + torch.distributed.broadcast(p, src_global_rank, self.dp_cp_group) if self.expt_dp_params_list is None: return for i, params in enumerate(self.expt_dp_params_list): - src_global_rank = torch.distributed.get_global_rank(self.pg_collection.expt_dp, i) + src_global_rank = torch.distributed.get_global_rank(self.expt_dp_group, i) for p in params: - torch.distributed.broadcast(p, src_global_rank, self.pg_collection.expt_dp) + torch.distributed.broadcast(p, src_global_rank, self.expt_dp_group) @torch.no_grad() def get_grad_norm(self): diff --git a/megatron/core/optimizer/param_layout.py b/megatron/core/optimizer/param_layout.py index 2ee511c6126..9d2dd4db365 100644 --- a/megatron/core/optimizer/param_layout.py +++ b/megatron/core/optimizer/param_layout.py @@ -11,7 +11,7 @@ import math from dataclasses import dataclass, field -from typing import Dict, List, Tuple +from typing import Dict, List, Optional, Tuple import torch @@ -79,12 +79,16 @@ class PerBufferParamLayout: param_indices: The index of each param among same-dtype params (using the "fake" high-precision dtype for FP8/NVFP4 params). Needed for loading non-native-fp8 checkpoints in native-fp8 mode. Order matches param_index_map iteration order. + num_optimizer_shards: Number of optimizer shards. Set by the distributed optimizer + that computes the layout so that shard assignment at runtime uses the same + value. ``None`` for non-distributed-optimizer layouts. """ param_index_map: Dict[torch.nn.Parameter, Tuple[int, int, int]] = field(default_factory=dict) bucket_indices: List[Tuple[int, int]] = field(default_factory=list) per_bucket_numel_unpadded: List[int] = field(default_factory=list) param_indices: List[int] = field(default_factory=list) + num_optimizer_shards: Optional[int] = None @dataclass diff --git a/megatron/core/process_groups_config.py b/megatron/core/process_groups_config.py index 6cac3b3d840..c625e17c13d 100644 --- a/megatron/core/process_groups_config.py +++ b/megatron/core/process_groups_config.py @@ -50,11 +50,17 @@ class ProcessGroupCollection: # Data Parallelism Groups dp: Data parallel process group dp_cp: Data and context parallel group + dp_cp_no_gtp: Data and context parallel group excluding GTP peers + (true dense-weight replicas); identical to dp_cp when GTP=1 expt_dp: Expert data parallel group + expt_dp_no_egtp: Expert data parallel group excluding EGTP peers + (true expert-weight replicas); identical to expt_dp when EGTP=1 intra_dp_cp: Intra partial data parallel group + intra_dp_cp_no_gtp: Intra partial data parallel group excluding GTP peers + (true dense-weight replicas); identical to intra_dp_cp when GTP=1 intra_expt_dp: Intra partial expert data parallel group intra_expt_dp_no_egtp: Intra expert data parallel group excluding EGTP peers - (true expert-weight replicas); identical to expt_dp when EGTP=1 + (true expert-weight replicas); identical to intra_expt_dp when EGTP=1 inter_dist_opt: Inter distributed optimizer instance group Example: @@ -123,6 +129,10 @@ class ProcessGroupCollection: # _DATA_PARALLEL_GROUP_WITH_CP dp_cp: torch.distributed.ProcessGroup = field(init=False) + # _DATA_PARALLEL_GROUP_WITH_CP_NO_GTP — DP+CP excluding GTP peers (true dense-weight + # replicas). Identical to ``dp_cp`` when GTP=1. + dp_cp_no_gtp: torch.distributed.ProcessGroup = field(init=False) + # Separate dp_cp communicator for param all-gather (AG/RS overlap) dp_cp_ag: torch.distributed.ProcessGroup = field(init=False) @@ -139,17 +149,25 @@ class ProcessGroupCollection: # _EXPERT_DATA_PARALLEL_GROUP expt_dp: torch.distributed.ProcessGroup = field(init=False) + # _EXPERT_DATA_PARALLEL_GROUP_NO_EGTP — expert DP excluding EGTP peers (true expert-weight + # replicas). Identical to ``expt_dp`` when EGTP=1. + expt_dp_no_egtp: torch.distributed.ProcessGroup = field(init=False) + # _EXPERT_DATA_PARALLEL_GROUP_AG expt_dp_ag: torch.distributed.ProcessGroup = field(init=False) # _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP intra_dp_cp: torch.distributed.ProcessGroup = field(init=False) + # _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_NO_GTP — intra-instance DP+CP excluding GTP + # peers (true dense-weight replicas). Identical to ``intra_dp_cp`` when GTP=1. + intra_dp_cp_no_gtp: torch.distributed.ProcessGroup = field(init=False) + # _INTRA_EXPERT_DATA_PARALLEL_GROUP intra_expt_dp: torch.distributed.ProcessGroup = field(init=False) - # _EXPERT_DATA_PARALLEL_GROUP_NO_GTP — expert DP excluding EGTP peers (true expert - # weight replicas). Identical to ``expt_dp`` when EGTP=1. + # _INTRA_EXPERT_DATA_PARALLEL_GROUP_NO_EGTP — intra-instance expert DP excluding EGTP + # peers (true expert-weight replicas). Identical to ``intra_expt_dp`` when EGTP=1. intra_expt_dp_no_egtp: torch.distributed.ProcessGroup = field(init=False) # _INTER_PARTIAL_EXPERT_DATA_PARALLEL_GROUP @@ -250,12 +268,23 @@ def use_mpu_process_groups(cls, required_pgs: Optional[List[str]] = None): ), 'dp': parallel_state.get_data_parallel_group, 'dp_cp': partial(parallel_state.get_data_parallel_group, with_context_parallel=True), + 'dp_cp_no_gtp': partial( + parallel_state.get_data_parallel_group, + with_context_parallel=True, + no_gtp=True, + ), 'dp_cp_ag': lambda: None, 'intra_dp_cp': partial( parallel_state.get_data_parallel_group, with_context_parallel=True, partial_data_parallel=True, ), + 'intra_dp_cp_no_gtp': partial( + parallel_state.get_data_parallel_group, + with_context_parallel=True, + no_gtp=True, + partial_data_parallel=True, + ), 'intra_expt_dp': partial( parallel_state.get_expert_data_parallel_group, check_initialized=False, @@ -279,6 +308,11 @@ def use_mpu_process_groups(cls, required_pgs: Optional[List[str]] = None): 'expt_dp': partial( parallel_state.get_expert_data_parallel_group, check_initialized=False ), + 'expt_dp_no_egtp': partial( + parallel_state.get_expert_data_parallel_group, + check_initialized=False, + no_gtp=True, + ), 'expt_dp_ag': lambda: None, 'tp_dp_cp': partial( parallel_state.get_tensor_and_data_parallel_group, From 7d71c0865ff59e3a84d1a22482b46405c129b763 Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Fri, 19 Jun 2026 07:21:24 -0700 Subject: [PATCH 43/59] GTP+DCP: simplify gtp replica_ids in MambaMixer.sharded_state_dict; add corresponding UTs Signed-off-by: Shiqing Fan --- megatron/core/ssm/mamba_mixer.py | 34 +----- .../test_gtp_dcp.py | 103 ++++++++++++++++++ 2 files changed, 107 insertions(+), 30 deletions(-) diff --git a/megatron/core/ssm/mamba_mixer.py b/megatron/core/ssm/mamba_mixer.py index afbc4d3cb12..48d31009740 100644 --- a/megatron/core/ssm/mamba_mixer.py +++ b/megatron/core/ssm/mamba_mixer.py @@ -16,7 +16,7 @@ from megatron.core import parallel_state from megatron.core.dist_checkpointing import ShardedTensor -from megatron.core.dist_checkpointing.mapping import ReplicaId, ShardedObject, ShardedTensorFactory +from megatron.core.dist_checkpointing.mapping import ReplicaId, ShardedTensorFactory from megatron.core.inference.contexts import BaseInferenceContext, DynamicInferenceContext from megatron.core.inference.contexts.attention_context.triton.tensor_ops import ( tensor_get_slice_after, @@ -1306,10 +1306,7 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None): metadata = ensure_metadata_has_dp_cp_group(metadata) sharded_state_dict = {} - # Parameters: A_log / dt_bias / D / conv1d.* are GTP-REPLICATED — every GTP rank holds an - # identical copy. The vanilla helper sets replica_id without GTP awareness, so all GTP - # ranks would claim the same chunk with the same all-zero replica_id and DCP would have a - # write conflict. Track those keys here and fold gtp_rank into replica_id below. + # Parameters self._save_to_state_dict(sharded_state_dict, "", keep_vars=True) sharded_state_dict = make_sharded_tensors_for_checkpoint( sharded_state_dict, @@ -1323,9 +1320,6 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None): }, sharded_offsets=sharded_offsets, ) - # Captured before submodules are merged below, so this is exactly the directly-owned - # MambaMixer params/buffers (A_log, dt_bias, D, conv1d_*) — deterministic across ranks. - gtp_replicated_keys = set(sharded_state_dict.keys()) # Submodules for name, module in self.named_children(): module_sharded_sd = sharded_state_dict_default( @@ -1363,17 +1357,13 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None): torch.distributed.all_gather_into_tensor(gathered, local, group=gtp_group) if gathered.shape[0] != in_proj_dim: gathered = gathered[:in_proj_dim].contiguous() - # Mcore replica_id convention is (PP, TP-replica-coord, DP). For a TP-sharded - # tensor the TP-replica-coord is 0; we put gtp_rank there so DCP elects exactly - # one writer per chunk (the one with gtp_rank=0 and dp_cp_rank=0) without - # touching the PP slot. - gtp_rank = torch.distributed.get_rank(gtp_group) + # Gathered weight is replicated across full dp_cp; replica_id needs only the DP slot. dp_cp_rank = torch.distributed.get_rank(metadata['dp_cp_group']) sharded_state_dict[f"{prefix}in_proj.weight"] = make_tp_sharded_tensor_for_checkpoint( gathered, f"{prefix}in_proj.weight", tp_axis=0, - replica_id=(0, gtp_rank, dp_cp_rank), + replica_id=(0, 0, dp_cp_rank), prepend_offsets=sharded_offsets, tp_group=self.tp_group, dp_cp_group=metadata['dp_cp_group'], @@ -1447,22 +1437,6 @@ def _gtp_slice_after_cat(sub_state_dict, _orig=original_merge_fn, 0, ) - # Fold gtp_rank into replica_id (pos 1) of the GTP-replicated keys from the vanilla - # (GTP-blind) helper: tp_rank * gtp_size + gtp_rank, so only gtp=tp=dp=0 stays all-zero - # and is elected writer. in_proj / out_proj already used the GTP-aware helper. - if in_proj_gtp_size > 1 and HAVE_GTP and isinstance(self.in_proj.weight, GTPShardedParam): - gtp_rank = torch.distributed.get_rank(self.in_proj.weight.group) - gtp_size = in_proj_gtp_size - for key in gtp_replicated_keys: - val = sharded_state_dict.get(key) - if isinstance(val, (ShardedTensor, ShardedTensorFactory, ShardedObject)): - rid = val.replica_id - if isinstance(rid, tuple) and len(rid) == 3: - new_pos1 = rid[1] * gtp_size + gtp_rank - sharded_state_dict[key] = replace( - val, replica_id=(rid[0], new_pos1, rid[2]) - ) - return sharded_state_dict diff --git a/tests/unit_tests/generalized_tensor_parallel/test_gtp_dcp.py b/tests/unit_tests/generalized_tensor_parallel/test_gtp_dcp.py index 39415c08add..19571a73d02 100644 --- a/tests/unit_tests/generalized_tensor_parallel/test_gtp_dcp.py +++ b/tests/unit_tests/generalized_tensor_parallel/test_gtp_dcp.py @@ -533,6 +533,105 @@ def _worker_helper_replicated_sink_rejects_gtp(rank, world_size, port): ) +def _worker_mamba_replicated_param_replica_ids(rank, world_size, port): + """End-to-end ``MambaMixer.sharded_state_dict`` under GTP: the GTP-REPLICATED + directly-owned params (A_log / dt_bias / D / conv1d.*) must get conflict-free + replica_ids — distinct across every rank holding the same chunk, with exactly + one "main" (writer) replica — so DCP elects a single writer per chunk. + + With TP=1 these params are full on every rank, so all ``world_size`` replicas + of each must have unique replica_ids and exactly one writer. This is the + invariant the gtp_rank replica_id fixup defends; it must hold whether or not + that fixup runs (the gtp-inclusive dp_cp rank already disambiguates peers). + """ + from megatron.core import parallel_state as ps + from megatron.core.dist_checkpointing.mapping import ( + ShardedObject, + ShardedTensorFactory, + is_main_replica, + ) + from megatron.core.extensions.transformer_engine import ( + TELayerNormColumnParallelLinear, + TERowParallelLinear, + ) + from megatron.core.fusions.fused_bias_dropout import get_bias_dropout_add + from megatron.core.process_groups_config import ProcessGroupCollection + from megatron.core.ssm.mamba_layer import MambaLayer, MambaLayerSubmodules + from megatron.core.ssm.mamba_mixer import MambaMixer, MambaMixerSubmodules + from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed + from megatron.core.transformer.spec_utils import ModuleSpec + from megatron.core.transformer.transformer_config import TransformerConfig + + GTP = 2 # world=4 -> tp1 * gtp2 * dp2 (exercises both gtp peers and replicate DP) + ps.destroy_model_parallel() + ps.initialize_model_parallel( + tensor_model_parallel_size=1, pipeline_model_parallel_size=1, gtp_remat_size=GTP + ) + model_parallel_cuda_manual_seed(42) + pg = ProcessGroupCollection.use_mpu_process_groups(required_pgs=['tp', 'cp', 'gtp']) + + config = TransformerConfig( + num_attention_heads=32, + num_layers=1, + hidden_size=4096, + mamba_num_heads=128, + mamba_head_dim=64, + mamba_state_dim=128, + mamba_num_groups=8, + use_mamba_mem_eff_path=True, + params_dtype=torch.bfloat16, + hidden_dropout=0.0, + bias_dropout_fusion=False, + tensor_model_parallel_size=1, + pipeline_model_parallel_size=1, + ) + submodules = MambaLayerSubmodules( + mixer=ModuleSpec( + module=MambaMixer, + submodules=MambaMixerSubmodules( + in_proj=TELayerNormColumnParallelLinear, out_proj=TERowParallelLinear + ), + ), + mamba_bda=get_bias_dropout_add, + ) + layer = MambaLayer(config, submodules, layer_number=1, pg_collection=pg).cuda() + assert any( + isinstance(p, GTPShardedParam) for p in layer.parameters() + ), "GTP not active: no GTPShardedParam in the GTP=2 Mamba layer" + + metadata = {'dp_cp_group': ps.get_data_parallel_group(with_context_parallel=True)} + sd = layer.mixer.sharded_state_dict(prefix='mixer.', metadata=metadata) + + target_bases = {'A_log', 'dt_bias', 'D', 'conv1d.weight', 'conv1d.bias'} + local = {} + for key, val in sd.items(): + base = key.split('mixer.', 1)[-1] + if base in target_bases and isinstance( + val, (ShardedTensor, ShardedTensorFactory, ShardedObject) + ): + rid = val.replica_id + if isinstance(rid, tuple): + local[base] = tuple(rid) + + gathered = [None] * world_size + dist.all_gather_object(gathered, local) + + ps.destroy_model_parallel() + ps.initialize_model_parallel() + GTPShardedParam._chain_state = {} + + if rank == 0: + bases = set(gathered[0]) + assert bases, "no GTP-replicated tiny params found in MambaMixer sharded_state_dict" + for base in sorted(bases): + rids = [g[base] for g in gathered] + assert len(set(rids)) == world_size, ( + f"{base}: replica_id collision across ranks -> DCP write conflict: {rids}" + ) + n_writers = sum(is_main_replica(r) for r in rids) + assert n_writers == 1, f"{base}: expected exactly 1 writer, got {n_writers}: {rids}" + + # --------------------------------------------------------------------------- # Test class wrappers (4-GPU) # --------------------------------------------------------------------------- @@ -540,6 +639,10 @@ def _worker_helper_replicated_sink_rejects_gtp(rank, world_size, port): @pytest.mark.run_only_on_devices_with_compute_capability(compute_capability=(10, 0)) class TestGtpDcpHelper: + def test_mamba_replicated_param_replica_ids(self): + _require_world_size(4) + _worker_mamba_replicated_param_replica_ids(dist.get_rank(), 4, None) + def test_composite_offset_same_axis(self): _require_world_size(4) _worker_helper_offsets_tp_eq_gtp_axis(dist.get_rank(), 4, None) From 69dae5a5d2d7e426d25158360157d797128b33df Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Fri, 19 Jun 2026 10:52:00 -0700 Subject: [PATCH 44/59] GTP+Muon: fix DCP save/load; add corresponding UTs Signed-off-by: Shiqing Fan --- megatron/core/optimizer/__init__.py | 4 +- .../core/optimizer/layer_wise_optimizer.py | 89 +++++++++++- megatron/core/optimizer/optimizer.py | 49 +++++++ .../test_gtp_dcp.py | 100 ++++++++++++- .../test_gtp_muon_dcp.py | 136 ++++++++++++++++++ 5 files changed, 374 insertions(+), 4 deletions(-) create mode 100644 tests/unit_tests/generalized_tensor_parallel/test_gtp_muon_dcp.py diff --git a/megatron/core/optimizer/__init__.py b/megatron/core/optimizer/__init__.py index 763c4842e33..7f03c5f2124 100644 --- a/megatron/core/optimizer/__init__.py +++ b/megatron/core/optimizer/__init__.py @@ -907,13 +907,15 @@ def _get_megatron_emerging_optimizer( "the legacy LayerWise ping-pong path for MoE models." ) fallback_config.use_distributed_optimizer = True + # Shard optimizer state over the gtp-EXCLUDED replicate group + # (intra_dp_cp_no_gtp_group), matching how the DDP grad buffer is partitioned. result = _get_megatron_optimizer_based_on_param_groups( config=fallback_config, model_chunks=model_chunks, param_groups=groups, per_model_buffers=distopt_per_model_buffers, model_parallel_group=distopt_process_groups['mp_group'], - data_parallel_group=distopt_process_groups['intra_dp_cp_group'], + data_parallel_group=distopt_process_groups['intra_dp_cp_no_gtp_group'], data_parallel_group_gloo=distopt_process_groups['intra_dp_cp_group_gloo'], data_parallel_group_idx=get_pg_rank(distopt_process_groups['mp_group']), intra_dist_opt_group=distopt_process_groups['intra_dist_opt_group'], diff --git a/megatron/core/optimizer/layer_wise_optimizer.py b/megatron/core/optimizer/layer_wise_optimizer.py index d1b3f3006dc..ce2786ac719 100644 --- a/megatron/core/optimizer/layer_wise_optimizer.py +++ b/megatron/core/optimizer/layer_wise_optimizer.py @@ -2,6 +2,7 @@ import logging import math +import re from typing import Callable, Dict, List, Optional, Tuple import torch @@ -86,6 +87,84 @@ def tag_params_for_buffer_routing(model_chunks) -> None: param.is_managed_by_layer_wise_optimizer = is_managed_by_layer_wise_optimizer(param) +def _build_gtp_replica_fold(pg_collection, model_chunks) -> Dict[str, Tuple[int, int]]: + """Map each (E)GTP-REPLICATED param's name to ``(gtp_rank, gtp_size)`` for replica_id folding. + + PROBLEM: LayerWise keeps (E)GTP-replicated params (identical on every (e)gtp peer) WHOLE, so + their optimizer-state ShardedTensors share one key+offset across those peers. The DP-coord reset + in ``sharded_state_dict`` would then mark all peers the all-zero "main replica" -> DCP sees N + writers for one shard and rejects the save. + + FIX: fold the (e)gtp rank into ``replica_id[1]`` so exactly one peer writes. (E)GTP-SHARDED + params (``GTPShardedParam``) are offset-sharded and excluded -- each shard already has a + distinct offset, hence a unique writer. + + Returns: ``{param_name: (gtp_rank, gtp_size)}``, empty (no folding) when GTP is unavailable or + no group spans >1 rank. Names are bare (all ``module.`` wrappers stripped, layer index + collapsed) to match the optimizer-state checkpoint key suffix. + """ + gtp_fold: Dict[str, Tuple[int, int]] = {} + try: + from megatron.experimental.gtp import HAVE_GTP, GTPShardedParam + except ImportError: + return gtp_fold + if not HAVE_GTP: + return gtp_fold + + from megatron.core import parallel_state + + # Source the (e)gtp groups from pg_collection if populated, else from parallel_state + # (the default pg_collection leaves gtp/expt_gtp unset). Compatibility point. + gtp_group = getattr(pg_collection, 'gtp', None) if pg_collection else None + if gtp_group is None: + gtp_group = parallel_state.get_gtp_weight_remat_group(check_initialized=False) + egtp_group = getattr(pg_collection, 'expt_gtp', None) if pg_collection else None + if egtp_group is None: + egtp_group = parallel_state.get_expert_gtp_weight_remat_group(check_initialized=False) + + for model_chunk in model_chunks: + for name, p in model_chunk.named_parameters(): + if isinstance(p, GTPShardedParam): + continue + grp = egtp_group if getattr(p, 'is_expert_parallel', False) else gtp_group + if grp is None or grp.size() <= 1: + continue + # Normalize the param name so it matches the optimizer-state checkpoint key suffix, + # which is wrapper-free and layer-collapsed. Two transforms, in order: + # 1. drop every leading 'module.' (DDP + Float16Module can double-wrap the model), and + # 2. collapse the layer index (the checkpoint key drops it -- it is a sharded axis). + # e.g. 'module.module.decoder.layers.3.mlp.router.weight' + # -> 'decoder.layers.mlp.router.weight' + nm = name + while nm.startswith('module.'): + nm = nm[len('module.') :] + nm = re.sub(r'\.layers\.\d+\.', '.layers.', nm) + gtp_fold[nm] = (grp.rank(), grp.size()) + return gtp_fold + + +def _fold_replica_id(replica_id, key, gtp_fold: Dict[str, Tuple[int, int]]): + """Compute a ShardedTensor's writer-disambiguating replica_id for fixed-DP checkpointing. + + Base reset: keep (PP, TP), zero DP -- every DP rank holds the same shard, so one writer + remains. Correct for normal params. + + For an (e)gtp-replicated param (one in ``gtp_fold``), the reset leaves ``gtp_size`` writers, so + fold the peer's gtp rank into the TP slot to re-spread them: ``new_tp = old_tp * gtp_size + + gtp_rank`` (rank 0 stays the writer, the others move off the all-zero main replica) -> one + writer per shard. Suffix-match (bare fold name vs fully-qualified key) and collapse the key's + layer index too, so it matches per-layer and already-collapsed keys. + """ + rid = (*replica_id[:2], 0) + if not gtp_fold: + return rid + key = re.sub(r'\.layers\.\d+\.', '.layers.', key or '') + for nm, (gtp_rank, gtp_size) in gtp_fold.items(): + if key.endswith(nm): + return (rid[0], rid[1] * gtp_size + gtp_rank, rid[2]) + return rid + + class LayerWiseDistributedOptimizer(ChainedOptimizer): """Layer-wise distributed optimizer for Megatron-core models. @@ -840,14 +919,20 @@ def sharded_state_dict( model_sharded_state_dict, is_loading, **kwargs ) + # (E)GTP-replicated-param -> (gtp_rank, gtp_size), consumed by _fold_replica_id below. + gtp_fold = _build_gtp_replica_fold(self.pg_collection, self.model_chunks) + # for fixed DP usage only for sh_base in nested_values(sharded_state_dict): if hasattr(sh_base, 'replica_id'): assert ( isinstance(sh_base.replica_id, int) or len(sh_base.replica_id) == 3 ), f'Expected replica_id as int or (PP, TP, DP), got: {sh_base}' - sh_base.replica_id = ( - 0 if isinstance(sh_base.replica_id, int) else (*sh_base.replica_id[:2], 0) + if isinstance(sh_base.replica_id, int): + sh_base.replica_id = 0 + continue + sh_base.replica_id = _fold_replica_id( + sh_base.replica_id, getattr(sh_base, 'key', ''), gtp_fold ) # later code assume list but chained optimizer fallback to non-list if there's only one diff --git a/megatron/core/optimizer/optimizer.py b/megatron/core/optimizer/optimizer.py index 127b32d9ed0..0913400d50e 100644 --- a/megatron/core/optimizer/optimizer.py +++ b/megatron/core/optimizer/optimizer.py @@ -775,6 +775,53 @@ def step(self): return success, grad_norm, num_zeros_in_grad +def _backfill_gtp_sharded_param_map(id_to_sharded_param_map: dict, float16_groups) -> None: + """Backfill the optimizer id->ShardedTensor map with GTP shards it is missing (in place). + + WHAT: ``get_param_id_to_sharded_param_map`` matches an optimizer param to its model + ShardedTensor by object identity (``id(model_entry.data) == id(optim_param)``). A GTP weight + whose model entry is a gathered+split factory (Mamba ``in_proj``) exposes the *gathered* tensor, + not the per-shard ``GTPShardedParam``, so it fails to match and is absent from the map -- the + generic conversion below would then KeyError on it. This backfills the same per-shard + ShardedTensor every other GTP weight already gets, so its optimizer state is saved per-shard. + + WHEN: only the distributed-Muon path reaches here. ``LayerWiseDistributedOptimizer`` keeps such + matrix params whole and routes them through this ``Float16OptimizerWithFloat16Params``. + Distributed Adam uses its own ``DistributedOptimizer.sharded_state_dict`` (flat-buffer path) + and is unaffected. + + No-op when GTP is unavailable or when every param already matched. + """ + try: + from megatron.core import parallel_state + from megatron.experimental.gtp import ( + GTPShardedParam, + make_sharded_tensors_for_checkpoint_with_gtp, + ) + except ImportError: + return # GTP not built in -- nothing to backfill. + + # Checkpoint compatibility point: source the groups from parallel_state, mirroring the + # make_*_for_checkpoint helpers (which fall back to these same globals). + tp_group = parallel_state.get_tensor_model_parallel_group() + dp_cp_group = parallel_state.get_data_parallel_group(with_context_parallel=True) + for param_id, p in enumerate(chain.from_iterable(float16_groups)): + # Skip params that already matched, and any non-GTP param (those always match). + if param_id in id_to_sharded_param_map or not isinstance(p, GTPShardedParam): + continue + # Key by the param's dotted name (set in prod by tag_gtp_params_with_names); the fallback + # keeps the function usable in tests where the name was not tagged. + key = p._debug_name or f'_gtp_optim_param_{param_id}' + entry = make_sharded_tensors_for_checkpoint_with_gtp( + {key: p}, + prefix='', + tensor_parallel_layers_axis_map={key: 0}, + tp_group=tp_group, + dp_cp_group=dp_cp_group, + ) + id_to_sharded_param_map[param_id] = entry[key] + + class Float16OptimizerWithFloat16Params(MixedPrecisionOptimizer): """Float16 optimizer for fp16 and bf16 data types. @@ -965,6 +1012,8 @@ def sharded_state_dict( model_sharded_state_dict, chain.from_iterable(g for g in self.float16_groups) ) + _backfill_gtp_sharded_param_map(id_to_sharded_param_map, self.float16_groups) + # Convert fp32_from_fp16_params assert len(state_dict['fp32_from_fp16_params']) == len( state_dict['optimizer']['param_groups'] diff --git a/tests/unit_tests/generalized_tensor_parallel/test_gtp_dcp.py b/tests/unit_tests/generalized_tensor_parallel/test_gtp_dcp.py index 19571a73d02..1de0f6e01ab 100644 --- a/tests/unit_tests/generalized_tensor_parallel/test_gtp_dcp.py +++ b/tests/unit_tests/generalized_tensor_parallel/test_gtp_dcp.py @@ -16,8 +16,8 @@ from megatron.core.dist_checkpointing import ShardedTensor from megatron.experimental.gtp import ( GTP_CONFIG, - GTPShardedParam, HAVE_GTP, + GTPShardedParam, make_sharded_tensors_for_checkpoint_with_gtp, reset_gtp_quantize_cache, update_gtp_config, @@ -632,6 +632,100 @@ def _worker_mamba_replicated_param_replica_ids(rank, world_size, port): assert n_writers == 1, f"{base}: expected exactly 1 writer, got {n_writers}: {rids}" +def _worker_mamba_inproj_optim_param_map(rank, world_size, port): + """GTP+Muon checkpoint fix: in_proj's gathered+split model entry does NOT object-id-match the + per-shard optimizer param, so get_param_id_to_sharded_param_map misses it (the KeyError seen in + Float16OptimizerWithFloat16Params.sharded_state_dict). Verify the per-shard fallback used by the + fix restores a ShardedTensor with local_shape == the optimizer param shape, which + make_sharded_optimizer_tensor then accepts. + """ + from megatron.core import parallel_state as ps + from megatron.core.dist_checkpointing.optimizer import ( + get_param_id_to_sharded_param_map, + make_sharded_optimizer_tensor, + ) + from megatron.core.extensions.transformer_engine import ( + TELayerNormColumnParallelLinear, + TERowParallelLinear, + ) + from megatron.core.fusions.fused_bias_dropout import get_bias_dropout_add + from megatron.core.process_groups_config import ProcessGroupCollection + from megatron.core.ssm.mamba_layer import MambaLayer, MambaLayerSubmodules + from megatron.core.ssm.mamba_mixer import MambaMixer, MambaMixerSubmodules + from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed + from megatron.core.transformer.spec_utils import ModuleSpec + from megatron.core.transformer.transformer_config import TransformerConfig + from megatron.experimental.gtp import ( + make_sharded_tensors_for_checkpoint_with_gtp, + tag_gtp_params_with_names, + ) + + ps.destroy_model_parallel() + ps.initialize_model_parallel( + tensor_model_parallel_size=1, pipeline_model_parallel_size=1, gtp_remat_size=2 + ) + model_parallel_cuda_manual_seed(42) + pg = ProcessGroupCollection.use_mpu_process_groups(required_pgs=['tp', 'cp', 'gtp']) + config = TransformerConfig( + num_attention_heads=32, + num_layers=1, + hidden_size=4096, + mamba_num_heads=128, + mamba_head_dim=64, + mamba_state_dim=128, + mamba_num_groups=8, + use_mamba_mem_eff_path=True, + params_dtype=torch.bfloat16, + hidden_dropout=0.0, + bias_dropout_fusion=False, + tensor_model_parallel_size=1, + pipeline_model_parallel_size=1, + ) + submodules = MambaLayerSubmodules( + mixer=ModuleSpec( + module=MambaMixer, + submodules=MambaMixerSubmodules( + in_proj=TELayerNormColumnParallelLinear, out_proj=TERowParallelLinear + ), + ), + mamba_bda=get_bias_dropout_add, + ) + layer = MambaLayer(config, submodules, layer_number=1, pg_collection=pg).cuda() + tag_gtp_params_with_names(layer) # set _debug_name (mirrors production setup) + + in_proj_w = layer.mixer.in_proj.weight + assert isinstance(in_proj_w, GTPShardedParam), "in_proj.weight should be GTP-sharded" + + metadata = {'dp_cp_group': ps.get_data_parallel_group(with_context_parallel=True)} + model_sd = layer.mixer.sharded_state_dict(prefix='mixer.', metadata=metadata) + + # Reproduce the gap: in_proj's per-shard optim param has no id-match in the model dict. + id_map = get_param_id_to_sharded_param_map(model_sd, [in_proj_w]) + assert 0 not in id_map, "expected in_proj to be MISSING from id map (the KeyError gap)" + + # The fix's per-shard fallback restores a matching entry. + key = in_proj_w._debug_name or '_gtp_optim_param_0' + entry = make_sharded_tensors_for_checkpoint_with_gtp( + {key: in_proj_w}, + prefix='', + tensor_parallel_layers_axis_map={key: 0}, + tp_group=ps.get_tensor_model_parallel_group(), + dp_cp_group=ps.get_data_parallel_group(with_context_parallel=True), + )[key] + assert tuple(entry.local_shape) == tuple(in_proj_w.shape), ( + f"per-shard entry local_shape {tuple(entry.local_shape)} != param shape " + f"{tuple(in_proj_w.shape)}" + ) + # make_sharded_optimizer_tensor must accept it for a same-shape optimizer state tensor. + opt_state = torch.zeros_like(in_proj_w) + osh = make_sharded_optimizer_tensor(entry, opt_state, prefix='optimizer.state.exp_avg') + assert osh is not None + + ps.destroy_model_parallel() + ps.initialize_model_parallel() + GTPShardedParam._chain_state = {} + + # --------------------------------------------------------------------------- # Test class wrappers (4-GPU) # --------------------------------------------------------------------------- @@ -643,6 +737,10 @@ def test_mamba_replicated_param_replica_ids(self): _require_world_size(4) _worker_mamba_replicated_param_replica_ids(dist.get_rank(), 4, None) + def test_mamba_inproj_optim_param_map(self): + _require_world_size(4) + _worker_mamba_inproj_optim_param_map(dist.get_rank(), 4, None) + def test_composite_offset_same_axis(self): _require_world_size(4) _worker_helper_offsets_tp_eq_gtp_axis(dist.get_rank(), 4, None) diff --git a/tests/unit_tests/generalized_tensor_parallel/test_gtp_muon_dcp.py b/tests/unit_tests/generalized_tensor_parallel/test_gtp_muon_dcp.py new file mode 100644 index 00000000000..bfd59677377 --- /dev/null +++ b/tests/unit_tests/generalized_tensor_parallel/test_gtp_muon_dcp.py @@ -0,0 +1,136 @@ +# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +"""Unit tests for GTP + Muon (LayerWise) distributed checkpointing. + +Covers the optimizer-state checkpoint roundtrip for the +:class:`LayerWiseDistributedOptimizer` (Muon) under GTP, where GTP-replicated +matrix params (e.g. the MoE router) are kept whole and must be disambiguated +by ``replica_id`` so DCP does not see multiple writers for the same shard. +""" + +import torch + +from megatron.core.dist_checkpointing import load, save +from tests.unit_tests.dist_checkpointing import ( + TempNamedDir, + setup_model_and_optimizer, +) +from tests.unit_tests.test_utilities import Utils + + +def check_equal(input_1, input_2): + """Check if two inputs are equal, used for checking checkpointing.""" + if isinstance(input_1, dict) and isinstance(input_2, dict): + assert input_1.keys() == input_2.keys() + for key in input_1.keys(): + check_equal(input_1[key], input_2[key]) + elif isinstance(input_1, list) and isinstance(input_2, list): + assert len(input_1) == len(input_2) + for i in range(len(input_1)): + check_equal(input_1[i], input_2[i]) + elif isinstance(input_1, torch.Tensor) and isinstance(input_2, torch.Tensor): + assert torch.all(input_1 == input_2), f"Input 1: {input_1} != Input 2: {input_2}" + elif type(input_1) != type(input_2): + assert False, f"Input 1 type: {type(input_1)} != Input 2 type: {type(input_2)}" + else: + assert input_1 == input_2, f"Input 1: {input_1} != Input 2: {input_2}" + + +class TestGTPMuonDCP: + """GTP + Muon (LayerWise) distributed checkpointing tests.""" + + def teardown_method(self, method): + Utils.destroy_model_parallel() + + def test_gtp_muon_moe_save_load(self, tmp_path_dist_ckpt): + """GTP + Muon (LayerWise) optimizer-state checkpoint roundtrip. + + GTP-REPLICATED, Muon-managed matrix params (e.g. the MoE router, held identically on every + GTP peer) must not collide on GTP peers during checkpoint save: LayerWise keeps each such + param whole, so its optimizer-state ShardedTensor has the same key+offset on all GTP peers + and the replica_id must distinguish them, or DCP validate_sharding_integrity reports 2 + writers ('Invalid access pattern ... [[2]]'). Adam dodges this by sharding the state. + """ + import os + from functools import partial + + import pytest + + from megatron.experimental.gtp import HAVE_GTP + + if not HAVE_GTP: + pytest.skip("GTP requires TE with hook registry") + if int(os.environ.get('WORLD_SIZE', '1')) != 4: + pytest.skip("Requires world_size 4 (gtp2 x dp2)") + + os.environ['MEGATRON_GTP_FORCE_ENABLE'] = '1' + from megatron.core import parallel_state as ps + from megatron.core.tensor_parallel import model_parallel_cuda_manual_seed + from megatron.experimental.gtp import GTP_CONFIG, GTPShardedParam, update_gtp_config + from tests.unit_tests.dist_checkpointing.utils import initialize_moe_model + + Utils.initialize_model_parallel(1, 1) # bootstrap torch.distributed + model parallel + ps.destroy_model_parallel() + ps.initialize_model_parallel( + tensor_model_parallel_size=1, pipeline_model_parallel_size=1, gtp_remat_size=2 + ) + model_parallel_cuda_manual_seed(2) + # Disable GTP alignment padding so the tiny test dims slice cleanly by gtp_size. + _orig_pad = GTP_CONFIG.pad_for_alignment + update_gtp_config(pad_for_alignment=0) + # GTP-friendly dims (divisible by gtp_size=2); GPU init (CPU affine init is not GTP-aware + # for the strided QKV weight). + moe_cfg = dict( + hidden_size=64, + num_attention_heads=8, + kv_channels=8, + ffn_hidden_size=128, + use_cpu_initialization=False, + ) + meta = {'distrib_optim_sharding_type': 'dp_reshardable'} + with TempNamedDir(tmp_path_dist_ckpt / 'gtp_muon_moe_A', sync=True) as ckpt_dir_A: + with TempNamedDir(tmp_path_dist_ckpt / 'gtp_muon_moe_B', sync=True) as ckpt_dir_B: + model_A, optimizer_A = setup_model_and_optimizer( + seed=2, + tp=1, + pp=1, + bf16=True, + dist_opt=True, + use_param_layout=True, + initialize_fn=partial(initialize_moe_model, use_te=True, **moe_cfg), + optimizer='dist_muon', + ) + assert any( + isinstance(p, GTPShardedParam) for p in model_A[0].parameters() + ), "GTP not active: no GTPShardedParam in the GTP=2 MoE model" + + model_sd_A = model_A[0].sharded_state_dict() + optim_sd_A = optimizer_A.sharded_state_dict(model_sd_A, metadata=meta) + save(optim_sd_A, ckpt_dir_A) # fails (2 writers) before the LayerWise replica_id fix + + model_B, optimizer_B = setup_model_and_optimizer( + seed=3, + tp=1, + pp=1, + bf16=True, + dist_opt=True, + use_param_layout=True, + initialize_fn=partial(initialize_moe_model, use_te=True, **moe_cfg), + optimizer='dist_muon', + ) + model_sd_B = model_B[0].sharded_state_dict() + load_sharded_sd = optimizer_B.sharded_state_dict( + model_sd_B, is_loading=True, metadata=meta + ) + state_dict = load(load_sharded_sd, ckpt_dir_A) + optimizer_B.load_state_dict(state_dict) + optim_sd_B = optimizer_B.sharded_state_dict(model_sd_B, metadata=meta) + save(optim_sd_B, ckpt_dir_B) + + update_gtp_config(pad_for_alignment=_orig_pad) + + Utils.destroy_model_parallel() + Utils.initialize_model_parallel(1, 1) + from megatron.core.dist_checkpointing import load_plain_tensors + + check_equal(load_plain_tensors(ckpt_dir_A), load_plain_tensors(ckpt_dir_B)) From f13a04270e8d1adbe409197f80a38f58d0bad099 Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Fri, 19 Jun 2026 20:17:19 -0700 Subject: [PATCH 45/59] code clean and fix comments Signed-off-by: Shiqing Fan --- .../distributed/distributed_data_parallel.py | 9 ++ megatron/core/model_parallel_config.py | 22 ++-- megatron/core/optimizer/__init__.py | 39 +++---- megatron/core/parallel_state.py | 103 ++++++++---------- megatron/core/process_groups_config.py | 31 ++++++ .../core/transformer/transformer_config.py | 4 +- megatron/experimental/gtp/README.md | 30 +++-- megatron/experimental/gtp/__init__.py | 2 + .../gtp/generalized_tensor_parallelism.py | 55 +++------- megatron/training/training.py | 5 +- 10 files changed, 168 insertions(+), 132 deletions(-) diff --git a/megatron/core/distributed/distributed_data_parallel.py b/megatron/core/distributed/distributed_data_parallel.py index 44eb457f155..4459b64fcae 100644 --- a/megatron/core/distributed/distributed_data_parallel.py +++ b/megatron/core/distributed/distributed_data_parallel.py @@ -193,6 +193,15 @@ def __init__( self.full_param_layout = full_param_layout + # GTP needs average_in_collective=False: the per-bucket collective runs over the + # GTP-EXCLUDED group, so NCCL AVG would miss the 1/gtp factor. arguments.py guards the + # training path; this assert covers direct megatron-core users. + gtp_active = ProcessGroupCollection.is_gtp_active(process_group_dict) + assert not (gtp_active and self.ddp_config.average_in_collective), ( + "GTP requires average_in_collective=False (the default); averaged collectives reduce " + "over the GTP-excluded group and would miss the 1/gtp gradient scaling factor." + ) + # Compute gradient scaling factors. if config.calculate_per_token_loss: assert ( diff --git a/megatron/core/model_parallel_config.py b/megatron/core/model_parallel_config.py index f7dd0791afe..818105d4c77 100644 --- a/megatron/core/model_parallel_config.py +++ b/megatron/core/model_parallel_config.py @@ -11,6 +11,8 @@ def resolve_tensor_parallel_weight_shards( tensor_model_parallel_size: int, tensor_parallel_num_weight_shards: Optional[int], gtp_weight_remat_size: int, + shards_field: str = "tensor_parallel_num_weight_shards", + tp_field: str = "tensor_model_parallel_size", ) -> tuple: """Reconcile ``tensor_parallel_num_weight_shards`` and ``gtp_weight_remat_size``. @@ -26,14 +28,16 @@ def resolve_tensor_parallel_weight_shards( if tensor_parallel_num_weight_shards is None: tensor_parallel_num_weight_shards = tp * gtp_weight_remat_size else: - assert tensor_parallel_num_weight_shards >= tp, ( - f"tensor_parallel_num_weight_shards ({tensor_parallel_num_weight_shards}) must be >= " - f"tensor_model_parallel_size ({tp})." - ) - assert tensor_parallel_num_weight_shards % tp == 0, ( - f"tensor_parallel_num_weight_shards ({tensor_parallel_num_weight_shards}) must be " - f"divisible by tensor_model_parallel_size ({tp})." - ) + if tensor_parallel_num_weight_shards < tp: + raise ValueError( + f"{shards_field} ({tensor_parallel_num_weight_shards}) must be " + f">= {tp_field} ({tp})." + ) + if tensor_parallel_num_weight_shards % tp != 0: + raise ValueError( + f"{shards_field} ({tensor_parallel_num_weight_shards}) must be " + f"divisible by {tp_field} ({tp})." + ) gtp_weight_remat_size = tensor_parallel_num_weight_shards // tp return tensor_parallel_num_weight_shards, gtp_weight_remat_size @@ -513,6 +517,8 @@ def __post_init__(self): self.expert_tensor_parallel_size, self.expert_tensor_parallel_num_weight_shards, self.expert_gtp_weight_remat_size, + shards_field="expert_tensor_parallel_num_weight_shards", + tp_field="expert_tensor_parallel_size", ) ) diff --git a/megatron/core/optimizer/__init__.py b/megatron/core/optimizer/__init__.py index 7f03c5f2124..0dc91abffb2 100644 --- a/megatron/core/optimizer/__init__.py +++ b/megatron/core/optimizer/__init__.py @@ -1039,7 +1039,6 @@ def get_megatron_optimizer( dp_cp_group = process_groups_dict['dp_cp_group'] intra_dp_cp_group = process_groups_dict['intra_dp_cp_group'] intra_dp_cp_no_gtp_group = process_groups_dict['intra_dp_cp_no_gtp_group'] - intra_expt_dp_group = process_groups_dict['intra_expt_dp_group'] intra_expt_dp_no_egtp_group = process_groups_dict['intra_expt_dp_no_egtp_group'] mp_group = process_groups_dict['mp_group'] expt_tp_pp_group = process_groups_dict['expt_tp_pp_group'] @@ -1049,19 +1048,21 @@ def get_megatron_optimizer( intra_expt_dp_group_gloo = process_groups_dict['intra_expt_dp_group_gloo'] intra_dist_opt_group = process_groups_dict['intra_dist_opt_group'] - # GTP/EGTP params fold into the main / expert optimizers, sharding their optimizer state over - # the *_no_gtp (gtp/egtp-EXCLUDED) replicate group — which aliases the full DP group when GTP - # is inactive. GTP is "active" when that group is strictly smaller (no Gloo state path then). - # A None group means the axis is unused (e.g. no expert parallelism) → not active. - def _gtp_active_for(sub, full): - return sub is not None and full is not None and sub.size() != full.size() - - gtp_active = _gtp_active_for(intra_dp_cp_no_gtp_group, intra_dp_cp_group) or _gtp_active_for( - intra_expt_dp_no_egtp_group, intra_expt_dp_group - ) - main_dp_group = intra_dp_cp_no_gtp_group - main_dp_group_gloo = None if gtp_active else intra_dp_cp_group_gloo - main_expt_dp_group = intra_expt_dp_no_egtp_group + # Drives no-Gloo state path + sharding over the *_no_gtp replicate group below. + gtp_active = ProcessGroupCollection.is_gtp_active(process_groups_dict) + optim_dp_group = intra_dp_cp_no_gtp_group + # The gtp-excluded replicate group has no Gloo variant by design (parallel_state asserts it), + # so None is correct under GTP. Warn if a Gloo group was requested so the drop is not silent. + if gtp_active and intra_dp_cp_group_gloo is not None: + log_single_rank( + logger, + logging.WARNING, + "GTP is active: disabling the optimizer's Gloo data-parallel group (no Gloo variant " + "of the gtp-excluded replicate group). Use DCP (--ckpt-format torch_dist) for " + "checkpointing; the legacy Gloo CPU scatter path is unavailable under GTP.", + ) + optim_dp_group_gloo = None if gtp_active else intra_dp_cp_group_gloo + optim_expt_dp_group = intra_expt_dp_no_egtp_group # ``mp_group`` spans TP×GTP×PP (GTP-merged). model_parallel_rank = get_pg_rank(mp_group) @@ -1155,7 +1156,7 @@ def _gtp_active_for(sub, full): param_to_param_group[param_name] = param_group_id param_group_id += 1 - # main_dp_group_gloo was selected above (None when GTP is active; no Gloo path yet). + # optim_dp_group_gloo was selected above (None when GTP is active; no Gloo path yet). optimizers.append( _get_megatron_optimizer_based_on_param_groups( config=config, @@ -1163,8 +1164,8 @@ def _gtp_active_for(sub, full): param_groups=param_groups, per_model_buffers=buffers, model_parallel_group=mp_group, - data_parallel_group=main_dp_group, - data_parallel_group_gloo=main_dp_group_gloo, + data_parallel_group=optim_dp_group, + data_parallel_group_gloo=optim_dp_group_gloo, data_parallel_group_idx=model_parallel_rank, intra_dist_opt_group=intra_dist_opt_group, distributed_optimizer_instance_id=distributed_optimizer_instance_id, @@ -1197,7 +1198,7 @@ def _gtp_active_for(sub, full): expt_model_parallel_rank = get_pg_rank(expt_tp_pp_with_egtp_group) # Gloo expert-DP group for the optimizer, only when (E)GTP is inactive. When active the # optimizer shards over the egtp-EXCLUDED (no_egtp) replicate group, which has no Gloo - # variant yet, so pass None (mirrors the dense main_dp_group_gloo above). + # variant yet, so pass None (mirrors the dense optim_dp_group_gloo above). if use_gloo_process_groups and not gtp_active: expt_data_parallel_group_gloo = intra_expt_dp_group_gloo else: @@ -1209,7 +1210,7 @@ def _gtp_active_for(sub, full): param_groups=moe_param_groups, per_model_buffers=moe_buffers, model_parallel_group=expt_tp_pp_with_egtp_group, - data_parallel_group=main_expt_dp_group, + data_parallel_group=optim_expt_dp_group, data_parallel_group_gloo=expt_data_parallel_group_gloo, data_parallel_group_idx=expt_model_parallel_rank, intra_dist_opt_group=intra_dist_opt_group, diff --git a/megatron/core/parallel_state.py b/megatron/core/parallel_state.py index 2168b18e35f..0f082227d89 100644 --- a/megatron/core/parallel_state.py +++ b/megatron/core/parallel_state.py @@ -683,6 +683,7 @@ def initialize_model_parallel( layer) via async all-gather on every forward AND backward pass. A first-class orthogonal axis (world_size = TP*GTP*CP*DP). Maps to the dataclass field ``ModelParallelConfig.gtp_weight_remat_size``. + NOTE: "remat" here is NOT activation recomputation/checkpointing. expert_gtp_remat_size (int, default = 1): Expert-side counterpart of ``gtp_remat_size`` — shards routed-expert @@ -1753,37 +1754,32 @@ def get_data_parallel_group( no_gtp: If True, return only the true weight-replica ranks (exclude GTP peers). partial_data_parallel: If True, return partial DP group (requires with_context_parallel). """ - if no_gtp: - if with_context_parallel: - if partial_data_parallel: - assert ( - _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_NO_GTP is not None - ), "Intra partial data parallel group with CP and GTP is not initialized" - return _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_NO_GTP - assert ( - _DATA_PARALLEL_GROUP_WITH_CP_NO_GTP is not None - ), "data parallel group with CP and GTP is not initialized" - return _DATA_PARALLEL_GROUP_WITH_CP_NO_GTP - else: - assert partial_data_parallel is False, "Partial DP for Optimizer needs to include CP" - assert ( - _DATA_PARALLEL_GROUP_NO_GTP is not None - ), "data parallel group with generalized tensor parallel is not initialized" - return _DATA_PARALLEL_GROUP_NO_GTP - if with_context_parallel: - if partial_data_parallel: - assert ( - _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP is not None - ), "Intra partial data parallel group is not initialized" - return _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP - assert ( - _DATA_PARALLEL_GROUP_WITH_CP is not None - ), "data parallel group with context parallel combined is not initialized" - return _DATA_PARALLEL_GROUP_WITH_CP - else: - assert _DATA_PARALLEL_GROUP is not None, "data parallel group is not initialized" - assert partial_data_parallel == False, "Partial DP for Optimizer needs to include CP" - return _DATA_PARALLEL_GROUP + assert ( + with_context_parallel or not partial_data_parallel + ), "Partial DP for Optimizer needs to include CP" + # (no_gtp, with_context_parallel, partial_data_parallel) -> (group, description). The globals + # are read at call time (assigned during initialize_model_parallel). partial requires CP, so + # the (*, False, True) rows are unreachable and omitted. + group_table = { + (False, False, False): (_DATA_PARALLEL_GROUP, "data parallel group"), + (False, True, False): (_DATA_PARALLEL_GROUP_WITH_CP, "data parallel group with CP"), + (False, True, True): ( + _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP, + "intra partial data parallel group with CP", + ), + (True, False, False): (_DATA_PARALLEL_GROUP_NO_GTP, "data parallel group (no GTP)"), + (True, True, False): ( + _DATA_PARALLEL_GROUP_WITH_CP_NO_GTP, + "data parallel group with CP (no GTP)", + ), + (True, True, True): ( + _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_NO_GTP, + "intra partial data parallel group with CP (no GTP)", + ), + } + group, description = group_table[(no_gtp, with_context_parallel, partial_data_parallel)] + assert group is not None, f"{description} is not initialized" + return group def get_data_parallel_group_gloo( @@ -2353,31 +2349,26 @@ def get_expert_data_parallel_group( check_initialized=True, no_gtp=False, partial_expert_data_parallel=False ): """Get expert data parallel group.""" - if no_gtp: - if partial_expert_data_parallel: - if check_initialized: - assert _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_NO_GTP is not None, ( - "Intra partial expert data parallel group with generalized tensor " - "parallel is not initialized" - ) - return _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_NO_GTP - if check_initialized: - assert ( - _EXPERT_DATA_PARALLEL_GROUP_NO_GTP is not None - ), "Expert data parallel group with generalized tensor parallel is not initialized" - return _EXPERT_DATA_PARALLEL_GROUP_NO_GTP - if partial_expert_data_parallel: - if check_initialized: - assert ( - _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP is not None - ), "Intra partial expert data parallel group is not initialized" - return _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP - else: - if check_initialized: - assert ( - _EXPERT_DATA_PARALLEL_GROUP is not None - ), "Expert data parallel group is not initialized" - return _EXPERT_DATA_PARALLEL_GROUP + # (no_gtp, partial_expert_data_parallel) -> (group, description). Read at call time. + group_table = { + (False, False): (_EXPERT_DATA_PARALLEL_GROUP, "Expert data parallel group"), + (False, True): ( + _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP, + "Intra partial expert data parallel group", + ), + (True, False): ( + _EXPERT_DATA_PARALLEL_GROUP_NO_GTP, + "Expert data parallel group (no GTP)", + ), + (True, True): ( + _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_NO_GTP, + "Intra partial expert data parallel group (no GTP)", + ), + } + group, description = group_table[(no_gtp, partial_expert_data_parallel)] + if check_initialized: + assert group is not None, f"{description} is not initialized" + return group def get_expert_data_parallel_group_gloo(no_gtp=False, partial_expert_data_parallel=False): diff --git a/megatron/core/process_groups_config.py b/megatron/core/process_groups_config.py index c625e17c13d..069cb77d5d7 100644 --- a/megatron/core/process_groups_config.py +++ b/megatron/core/process_groups_config.py @@ -335,6 +335,19 @@ def use_mpu_process_groups(cls, required_pgs: Optional[List[str]] = None): return cls(**init_dict) + @staticmethod + def is_gtp_active(process_group_dict: Dict) -> bool: + """True iff GTP or EGTP is active (a weight-shard group spans >1 rank). + + Reads the 'gtp_group'/'expt_gtp_group' entries produced by both setup_process_groups_for_* + builders; a None group means that axis is unused. + """ + gtp = process_group_dict.get('gtp_group') + expt_gtp = process_group_dict.get('expt_gtp_group') + return (gtp is not None and gtp.size() > 1) or ( + expt_gtp is not None and expt_gtp.size() > 1 + ) + @staticmethod def setup_process_groups_for_optimizer( pg_collection: Optional['ProcessGroupCollection'], @@ -401,6 +414,10 @@ def setup_process_groups_for_optimizer( no_gtp=True, partial_expert_data_parallel=True ) expt_dp_no_egtp_group = parallel_state.get_expert_data_parallel_group(no_gtp=True) + gtp_group = parallel_state.get_gtp_weight_remat_group(check_initialized=False) + expt_gtp_group = parallel_state.get_expert_gtp_weight_remat_group( + check_initialized=False + ) intra_dist_opt_group = parallel_state.get_intra_distributed_optimizer_instance_group() # Gloo groups @@ -558,6 +575,10 @@ def setup_process_groups_for_optimizer( else: expt_dp_no_egtp_group = expt_dp_group + # 8. GTP weight-shard groups (None when inactive); used to detect whether GTP is on. + gtp_group = getattr(pg_collection, 'gtp', None) + expt_gtp_group = getattr(pg_collection, 'expt_gtp', None) + # Gloo groups - not supported when pg_collection is provided if use_gloo_process_groups: raise ValueError( @@ -577,6 +598,8 @@ def setup_process_groups_for_optimizer( 'expt_dp_no_egtp_group': expt_dp_no_egtp_group, 'intra_expt_dp_group': intra_expt_dp_group, 'intra_expt_dp_no_egtp_group': intra_expt_dp_no_egtp_group, + 'gtp_group': gtp_group, + 'expt_gtp_group': expt_gtp_group, 'mp_group': mp_group, 'expt_tp_pp_group': expt_tp_pp_group, 'expt_tp_pp_with_egtp_group': expt_tp_pp_with_egtp_group, @@ -633,8 +656,12 @@ def setup_process_groups_for_ddp( no_gtp=True, partial_expert_data_parallel=True ), 'tp_group': parallel_state.get_tensor_model_parallel_group(), + 'gtp_group': parallel_state.get_gtp_weight_remat_group(check_initialized=False), 'pp_group': parallel_state.get_pipeline_model_parallel_group(), 'ep_group': parallel_state.get_expert_model_parallel_group(), + 'expt_gtp_group': parallel_state.get_expert_gtp_weight_remat_group( + check_initialized=False + ), 'inter_dist_opt_group': ( parallel_state.get_inter_distributed_optimizer_instance_group() if ddp_config.num_distributed_optimizer_instances > 1 @@ -745,6 +772,10 @@ def setup_process_groups_for_ddp( else: result['expt_dp_no_egtp_group'] = result['expt_dp_group'] + # 9. GTP weight-shard groups (None when inactive); used to detect whether GTP is on. + result['gtp_group'] = getattr(pg_collection, 'gtp', None) + result['expt_gtp_group'] = getattr(pg_collection, 'expt_gtp', None) + return result diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py index f1a0aa9f3fa..cd9d2398025 100644 --- a/megatron/core/transformer/transformer_config.py +++ b/megatron/core/transformer/transformer_config.py @@ -925,7 +925,9 @@ class TransformerConfig(ModelParallelConfig): more details, see: https://pytorch.org/docs/stable/generated/torch.Tensor.backward.html.""" cuda_graph_warmup_steps: int = 3 - """Number of warmup steps for CUDA graphs""" + """Number of warmup steps for CUDA graphs. Note: GTP (``gtp_weight_remat_size > 1``) forces a + minimum of 2 per-graph warmup steps regardless of this value, because the first warmup builds + the weight-prefetch chain and the second exercises the prefetch path before capture.""" external_cuda_graph: bool = False """DEPRECATED and replaced by cuda_graph_impl. diff --git a/megatron/experimental/gtp/README.md b/megatron/experimental/gtp/README.md index c694aa51bef..82b3e8c6bea 100644 --- a/megatron/experimental/gtp/README.md +++ b/megatron/experimental/gtp/README.md @@ -16,8 +16,9 @@ Core implementation: `megatron/experimental/gtp/generalized_tensor_parallelism.p - 1.3 [Low-precision quantize-then-gather](#13-low-precision-quantize-then-gather) - 1.4 [Composability with TP / SP / EP / DDP](#14-composability-with-tp--sp--ep--ddp) - 1.5 [Opt-in, minimally invasive integration](#15-opt-in-minimally-invasive-integration) - - 1.6 [Scaling](#16-scaling) - - 1.7 [Native distributed checkpointing (DCP)](#17-native-distributed-checkpointing-dcp) + - 1.6 [Optimizer-agnostic (Adam + Muon)](#16-optimizer-agnostic-adam--muon) + - 1.7 [Scaling](#17-scaling) + - 1.8 [Native distributed checkpointing (DCP)](#18-native-distributed-checkpointing-dcp) 2. [Usage](#2-usage) - 2.1 [Required flags](#21-required-flags) - 2.2 [High-priority streams (Blackwell and later)](#22-high-priority-streams-blackwell-and-later) @@ -124,7 +125,16 @@ Quantize-then-gather attacks AG only: AG portion shrinks ~72% from BF16 → NVFP - Turning it off is a no-op: when `gtp_group.size() == 1`, `wrap_module_params_gtp` short-circuits; when `gtp_weight_remat_size == 1`, the GTP path in `layers.py` is skipped entirely. - User-tunable knobs (`GTPConfig.pad_for_alignment`, `weight_prefetch`, `check_param_states`) plus a debug-name tagger (`tag_gtp_params_with_names`) for readable link-table output. -### 1.6 Scaling +### 1.6 Optimizer-agnostic (Adam + Muon) + +GTP runs under both the standard **Adam** `DistributedOptimizer` and **Muon** (the `LayerWiseDistributedOptimizer`), DCP save/load included: + +- **Adam** shards optimizer state over the gtp/egtp-excluded replicate group, like any GTP run (§3.2). +- **Muon** keeps matrix params *whole* (Newton–Schulz needs the full 2D weight). A GTP-replicated whole param (e.g. MoE router, latent-proj MLPs) then lands on one checkpoint key shared by all GTP peers, so the LayerWise optimizer folds `gtp_rank` into its `replica_id` — exactly one peer writes (the optimizer-state analog of the model-side fold in §3.3). Mamba `in_proj` (a gathered+split factory on the model side) saves its optimizer state per-shard via a small backfill helper. + +Neither path adds a GTP-specific checkpoint format or call site. + +### 1.7 Scaling Effective per-GPU weight size = `W / (TP × GTP)`. Example: TP=4 + GTP=8 with NVFP4 → 32× weight-memory reduction and 128× wire-bandwidth reduction vs full BF16 replication, before data parallelism. @@ -141,7 +151,7 @@ On an Ultra-proxy hybrid Mamba-MoE model (**~280B parameters**; `GTP64 · EP64 ![GTP64 weak-scaling efficiency](images/0617_gtp64_weak_scaling_efficiency.png) -### 1.7 Native distributed checkpointing (DCP) +### 1.8 Native distributed checkpointing (DCP) **GTP + DCP is straightforward:** - Reuses the existing checkpoint stack rather than adding a parallel one. GTP-sharded weights *and* distributed-optimizer state save/load through the standard PyTorch / Mcore `torch_dist` sharded checkpoint, with **no GTP-specific format or call path** and a tiny code footprint (one new helper + one helper made GTP-aware). @@ -212,8 +222,7 @@ At iter-0 you'll see one rank-0 log line confirming the active config: ``` GTP enabled. GTPConfig(pad_for_alignment=16, check_param_states=False, - weight_prefetch=True, async_reduction=True, wgrad_before_dgrad=False, - fp8_param_gather=False, coalesce_amax_allreduce=False) + weight_prefetch=True, async_reduction=True, fp8_param_gather=False) ``` ### 2.4 Tuning knobs @@ -225,14 +234,14 @@ update_gtp_config( pad_for_alignment=16, # NVFP4: 16, MXFP8: 32, BF16: any; auto-set in training.py weight_prefetch=True, # Disable to debug the cold-start path async_reduction=True, # Whether to perform GTP gradient reduction asynchronously - # wgrad_before_dgrad: deferred — setting True currently raises NotImplementedError fp8_param_gather=False, # Companion to Megatron's --fp8-param-gather; currently asserted off - # coalesce_amax_allreduce: deferred — setting True logs an info and falls back to per-weight ) ``` `training.py` auto-tunes `pad_for_alignment` based on the quantization recipe (`--fp4`, `--fp8-recipe=mxfp8`, etc.) before model construction. The other knobs are usually left at defaults. +> **CUDA-graph warmup under GTP.** When CUDA graphs are enabled, GTP forces a minimum of **2** per-graph warmup steps regardless of `--cuda-graph-warmup-steps` (e.g. a user-set `0` is bumped to `2`): the first warmup builds the weight-prefetch chain and the second exercises the prefetch path before capture. + --- ## 3. Implementation details @@ -287,7 +296,7 @@ Communication never blocks compute except at the very first layer of each direct Current behavior: backward always runs dgrad GEMM, then wgrad GEMM, then issues the GTP wgrad RS — the RS overlaps with the *next* layer's bwd GEMMs (the one-step deferral above). -A future MR will add an opt-in wgrad-before-dgrad schedule on `_Linear` / `_LayerNormLinear` so the GTP wgrad RS NCCL overlaps with the dgrad GEMM of the **same** layer (best for the GTP + no-TP case). Until that MR lands, attempting to set `GTPConfig.wgrad_before_dgrad = True` raises `NotImplementedError`. +A future MR will add an opt-in wgrad-before-dgrad schedule on `_Linear` / `_LayerNormLinear` so the GTP wgrad RS NCCL overlaps with the dgrad GEMM of the **same** layer (best for the GTP + no-TP case). ##### Recompute-forward prefetch chain *(GTP + activation recompute)* @@ -336,6 +345,8 @@ Why this is correct — the gtp axis is completed in two complementary ways, so - **GTP-sharded weights**: each rank already holds the gtp-summed shard via the (E)GTP wgrad reduce-scatter, then DDP sums over the replicate group → `sum-over-(gtp×replicate) / full = mean`. - **Replicated (non-GTP) params** (LayerNorm γ/β, biases, router, …): DDP sums only over the replicate group, leaving them `1/gtp` short; `finalize_model_grads._allreduce_replicated_grads_over_gtp_group` then does a SUM all-reduce over the gtp (dense) / egtp (expert) group to recover the full mean. SUM (not AVG) because the `1/full` DDP scaling already applied. +> **`average_in_collective` must be off (the default).** The `1/(replicate×gtp)` scaling above is a *pre-scale* applied before a SUM collective. `average_in_collective=True` instead uses NCCL AVG, which divides by the collective's own group — the gtp/egtp-**excluded** replicate group — so it divides by `replicate` only, missing the `1/gtp` factor and over-scaling gradients by `gtp`. Asserted via `ProcessGroupCollection.is_gtp_active` in both `arguments.py` (training) and `DistributedDataParallel.__init__` (direct megatron-core users). + **`broadcast_params`** (the one-shot init/load param sync) selects the group by `is_gtp`: GTP shards broadcast over the gtp-excluded `*_no_gtp` group (`dp_cp_no_gtp_group` / `expt_dp_no_egtp_group`), everything else over the regular DP group (`dp_cp_group` / `expt_dp_group`). Excluding (E)GTP peers is essential — each peer holds a distinct 1/N shard of the same `GTPShardedParam`, so a shared group would let rank-0's shard clobber the others. The non-`intra_` ("full") groups are used here so the sync reaches every distopt instance. **Buffer caching.** The per-buffer lists are concatenated once at init into a single flat view for fast iteration in the grad-reduction hot path. @@ -398,5 +409,6 @@ torchrun --nproc-per-node 4 -m pytest tests/unit_tests/generalized_tensor_parall | `test_gtp_loss_correctness.py` | End-to-end: GTP per-step loss trajectory matches a no-GTP baseline. | | `test_gtp_grad_correctness.py` | Gradient + dist-opt + grad-norm numeric parity vs a DP baseline at replicate (DP) > 1. | | `test_gtp_dcp.py` | Distributed-checkpoint sharding (§3.3): TP×GTP composite/cross-axis offsets, alignment-pad `allow_shape_mismatch`, cross-topology reshard metadata, and quantize-cache reset. | +| `test_gtp_muon_dcp.py` | GTP + Muon (LayerWise) optimizer-state checkpoint roundtrip (§1.6): `replica_id` fold for GTP-replicated whole params (router, latent-proj). | All tests require ≥ 4 GPUs and the GTP-enabled TransformerEngine; they self-skip when those are unavailable. A green run (skips for unmet hardware/config are acceptable) is the minimum bar for any GTP change. diff --git a/megatron/experimental/gtp/__init__.py b/megatron/experimental/gtp/__init__.py index 8ead8afb4e9..d36c4e4074d 100644 --- a/megatron/experimental/gtp/__init__.py +++ b/megatron/experimental/gtp/__init__.py @@ -23,6 +23,7 @@ get_rs_stream, make_sharded_tensors_for_checkpoint_with_gtp, reset_gtp_quantize_cache, + reset_gtp_state, set_cuda_graph_mempool, set_cuda_graph_modules, tag_gtp_params_with_names, @@ -51,6 +52,7 @@ "get_rs_stream", "make_sharded_tensors_for_checkpoint_with_gtp", "reset_gtp_quantize_cache", + "reset_gtp_state", "set_cuda_graph_mempool", "set_cuda_graph_modules", "tag_gtp_params_with_names", diff --git a/megatron/experimental/gtp/generalized_tensor_parallelism.py b/megatron/experimental/gtp/generalized_tensor_parallelism.py index 580bf31aed2..6454c5bb37a 100644 --- a/megatron/experimental/gtp/generalized_tensor_parallelism.py +++ b/megatron/experimental/gtp/generalized_tensor_parallelism.py @@ -275,14 +275,6 @@ def wait_for_gtp_grad_reduction_on_current_stream() -> None: cur.wait_event(evt) -# NOTE: Coalesced amax reduction across the GTP group is deferred to a follow-up -# MR. The TE-side split-phase APIs (`compute_amax_nvfp4`, `quantize_cast_only_nvfp4`, -# `compute_multi_amax_nvfp4`) and the Mcore-side `_quantize_with_coalesced_amax` -# helper have been removed. The `GTPConfig.coalesce_amax_allreduce` knob is kept -# as a stub: setting it to True logs an info message and falls back to the -# per-weight quantize path inside `_all_gather_weight`. - - @dataclass class GTPConfig: """Global configuration for Generalized Tensor Parallelism.""" @@ -296,21 +288,10 @@ class GTPConfig: # overlap. When False, every wgrad RS is synchronous and finalizes # inline, at the cost of that overlap. async_reduction: bool = True - # Stub field, reserved for a follow-up MR that will land the wgrad-before-dgrad - # schedule on the TE side (_Linear / _LayerNormLinear backward run wgrad GEMM - # before dgrad GEMM, so the GTP wgrad reduce-scatter overlaps with dgrad GEMM). - # Setting this to True via update_gtp_config() currently raises NotImplementedError. - wgrad_before_dgrad: bool = False # GTP companion to Megatron --fp8-param-gather: optimizer casts FP32 master # directly into GTPShardedParam.quantized; forward's _quantize_if_needed # short-circuits to the cached FP8. Moves BF16->FP8 off the fwd critical path. fp8_param_gather: bool = False - # Stub field, reserved for a follow-up MR that will re-land the coalesced - # NVFP4 amax allreduce across the GTP group (single NCCL call across all - # batched per-expert amax tensors, plus the TE split-phase compute_amax / - # quantize_cast primitives). Setting this to True via update_gtp_config() - # currently logs an info message and falls back to the per-weight path. - coalesce_amax_allreduce: bool = False GTP_CONFIG = GTPConfig() @@ -318,16 +299,6 @@ class GTPConfig: def update_gtp_config(**kwargs): """Update the global GTP configuration.""" - if kwargs.get("wgrad_before_dgrad"): - raise NotImplementedError("Wgrad->Dgrad schedule to be supported later") - if kwargs.get("coalesce_amax_allreduce"): - warnings.warn( - "GTPConfig.coalesce_amax_allreduce: coalesced amax reduction across the " - "GTP group is deferred in a followup MR; falling back to per-weight amax " - "allreduce.", - stacklevel=2, - ) - kwargs["coalesce_amax_allreduce"] = False for key, value in kwargs.items(): if not hasattr(GTP_CONFIG, key): raise ValueError(f"Unknown GTP config option: {key}") @@ -512,13 +483,12 @@ class GTPShardedParam(torch.nn.Parameter): integrator needs to drive overlap with captured compute. """ - # Per-chain state: each chain_id (GTPChain.GRAPHED / GTPChain.UNGRAPHED) has - # its own linked list. Chains never cross-link: prev_w/next_w only connect - # params with the same chain_id. + # Per-chain linked-list state, keyed by chain_id (GTPChain.GRAPHED/UNGRAPHED); chains + # never cross-link (prev_w/next_w join only same-chain_id params). Call reset_gtp_state() + # before rebuilding a GTP model in the same process. _chain_state: Dict[str, dict] = {} - # Per-chain cursor for the recompute-forward prefetch chain (see the _recompute_* - # slot on GTPShardedParam). Keyed by chain_id like _chain_state. + # Recompute-forward prefetch cursor, keyed by chain_id; also cleared by reset_gtp_state(). _recompute_chain_state: Dict[str, dict] = {} @classmethod @@ -842,10 +812,6 @@ def _all_gather_weight(self, async_op, skip_weight_cast, cast_noop_flag, fwd, nv w._set_state(new_state) # 2. Prepare: quantize, set usage direction. - # NOTE: The coalesced amax allreduce path (gated by - # GTPConfig.coalesce_amax_allreduce) is deferred to a follow-up MR; - # always use the per-weight quantize path here. update_gtp_config() logs - # an info message when a caller tries to enable the deferred knob. fp8_pg_hit = GTP_CONFIG.fp8_param_gather and self.did_cast_to_low_precision if not fp8_pg_hit: @@ -1833,6 +1799,19 @@ def backward(ctx, grad_output): return weight.wgrad_reduce_scatter(grad_output) +def reset_gtp_state(): + """Clear the process-global GTP prefetch-chain state (``GTPShardedParam._chain_state`` / + ``._recompute_chain_state``). + + These class-level dicts survive model teardown, so a GTP model rebuilt in the same process + would otherwise inherit the prior model's stale ``last_weight`` pointers / flushed link + tables. Call once before the per-chunk ``classify_gtp_chains`` loop (never inside it — chains + span chunks). No-op on a fresh process. + """ + GTPShardedParam._chain_state.clear() + GTPShardedParam._recompute_chain_state.clear() + + def reset_gtp_quantize_cache(model): """Invalidate the per-shard low-precision cache after a checkpoint load. diff --git a/megatron/training/training.py b/megatron/training/training.py index 8ad6d180814..1f17887f394 100644 --- a/megatron/training/training.py +++ b/megatron/training/training.py @@ -1682,7 +1682,7 @@ def get_model(model_provider_func, model_type=ModelType.encoder_or_decoder, wrap if getattr(args, 'fp4', None) is not None: update_gtp_config(pad_for_alignment=16) elif getattr(args, 'fp8_recipe', None) == 'mxfp8': - update_gtp_config(pad_for_alignment=32, coalesce_amax_allreduce=False) + update_gtp_config(pad_for_alignment=32) elif getattr(args, 'fp8', None) is not None: update_gtp_config(pad_for_alignment=16) @@ -1744,6 +1744,7 @@ def build_model(): from megatron.experimental.gtp import ( GTP_CONFIG, classify_gtp_chains, + reset_gtp_state, set_cuda_graph_modules, tag_gtp_params_with_names, ) @@ -1758,6 +1759,8 @@ def build_model(): moe_shared_expert_overlap=_mse_overlap, cuda_graph_impl=getattr(args, 'cuda_graph_impl', 'none'), ) + # Clear stale process-global chain state so a rebuilt model starts fresh. + reset_gtp_state() for model_module in model: tag_gtp_params_with_names(model_module) classify_gtp_chains(model_module) From 0e3c3d253c20084be71312ea1cd5b24cb5bd278b Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Tue, 23 Jun 2026 00:24:58 -0700 Subject: [PATCH 46/59] Fix GTP DDP grad-ready firing before deferred wgrad accumulation - DDP: route the backward post-hook for GTP params through register_grad_accum_hook and skip the autograd AccumulateGrad hook, so grad-ready fires only after the GTP wgrad add. - GTP finalize path (_wait_reduce_scatter, finalize_grad=True): fire _handle_megatron_grad_accum after the add so terminal/async-only weights are not orphaned once the autograd path is suppressed. Signed-off-by: Shiqing Fan --- .../distributed/distributed_data_parallel.py | 9 +++ megatron/experimental/gtp/README.md | 9 ++- .../gtp/generalized_tensor_parallelism.py | 9 ++- .../generalized_tensor_parallel/test_gtp.py | 70 +++++++++++++++++++ 4 files changed, 91 insertions(+), 6 deletions(-) diff --git a/megatron/core/distributed/distributed_data_parallel.py b/megatron/core/distributed/distributed_data_parallel.py index 4459b64fcae..76c77510401 100644 --- a/megatron/core/distributed/distributed_data_parallel.py +++ b/megatron/core/distributed/distributed_data_parallel.py @@ -400,6 +400,15 @@ def unmap_weight_tensor(m): self._make_backward_post_hook(param) ) break + elif getattr(param, 'is_gtp', False) and hasattr(param, 'register_grad_accum_hook'): + # GTP: drive the post-hook from GTP's manual invocation, not autograd's + # AccumulateGrad. GTP issues the wgrad RS async and defers the main_grad add + # to a later backward node, so AccumulateGrad can fire register_grad_ready + # before the wgrad lands in main_grad, dispatching the bucket reduce-scatter on + # stale grad_data (corrupts reduce_scatter_with_fp32_accumulation for + # chain-boundary weights). GTP fires this hook from _handle_megatron_grad_accum + # after the add instead. + param.register_grad_accum_hook(None, self._make_backward_post_hook(param)) else: # Expand so we get access to grad_fn. param_tmp = param.expand_as(param) diff --git a/megatron/experimental/gtp/README.md b/megatron/experimental/gtp/README.md index 82b3e8c6bea..13ff83b5a7d 100644 --- a/megatron/experimental/gtp/README.md +++ b/megatron/experimental/gtp/README.md @@ -115,7 +115,7 @@ Quantize-then-gather attacks AG only: AG portion shrinks ~72% from BF16 → NVFP - **TP** (intra-layer): orthogonal axis — GTP shards `out_features` regardless of TP's parallel mode (column or row). 2D grid naturally formed via `tp_group × gtp_group`. - **SP** (sequence-parallel): transparent — GTP operates at weight dim, SP at sequence dim. - **EP** (MoE): `GroupedLinear` with GTP → each routed expert sharded across `EXPERT_GTP_WEIGHT_REMAT_GROUP`, independent of EP. MoE AllToAll (HybridEP/NVLink) runs independently of GTP AG/RS (NCCL/IB). -- **DDP**: GTP bypasses autograd's grad accumulator (async RS returns `None`; `_finalize_wgrad` accumulates directly into `main_grad`). `register_grad_accum_hook` + manual invocation from `_finalize_wgrad` (eager path) and `_CudagraphReplayNode.backward` (captured path) serializes DDP RS strictly after GTP RS — critical at IB scale to avoid deadlock between DDP and GTP on the same NIC. +- **DDP**: GTP bypasses autograd's grad accumulator (async RS returns `None`; `_finalize_wgrad` accumulates directly into `main_grad`). DDP registers its grad-ready hook on GTP params via `register_grad_accum_hook` (not autograd's `AccumulateGrad`); GTP invokes it from `_finalize_wgrad` (eager path) and `_CudagraphReplayNode.backward` (captured path) **after** the wgrad lands in `main_grad`, so a bucket's DDP reduce-scatter runs strictly after every GTP param's `{RS → main_grad add}` — never over a stale `main_grad` — and DDP↔GTP NIC deadlock at IB scale is avoided. See §3.2. ### 1.5 Opt-in, minimally invasive integration @@ -324,10 +324,13 @@ Under **full-iteration CUDA graphs** the recompute-forward is captured; `wait_as ![DDP + (E)GTP interaction with the distributed optimizer](images/0611_ddp_egtp_orthogonal_bucketing.png) -**(E)GTP is *super loosely coupled* to DDP and the distributed optimizer — they stay completely GTP-agnostic.** GTP is just another sub-axis of the rank grid (`world = TP×GTP×CP×DP`); a GTP-sharded weight rides the *exact same* code path as an ordinary param. There are **no** GTP/EGTP-specific buffers, optimizers, gradient-scaling factors, or bucket groups. The entire DDP/DistOpt stack touches GTP in only **two** narrow places: +**(E)GTP is *super loosely coupled* to DDP and the distributed optimizer — they stay completely GTP-agnostic.** GTP is just another sub-axis of the rank grid (`world = TP×GTP×CP×DP`); a GTP-sharded weight rides the *exact same* code path as an ordinary param. There are **no** GTP/EGTP-specific buffers, optimizers, gradient-scaling factors, or bucket groups. The entire DDP/DistOpt stack touches GTP in only **three** narrow places: 1. **finalize SUM all-reduce** (`_allreduce_replicated_grads_over_gtp_group`) — completes the gtp axis for *replicated* (non-GTP) params; a no-op when GTP is inactive. 2. **`is_gtp` / `allreduce` tags** propagated onto the optimizer's master shards — consumed only by the grad-norm dedup filter. +3. **grad-ready hook routing** (`DistributedDataParallel.__init__`) — for a GTP param, DDP registers its backward post-hook via GTP's `register_grad_accum_hook` instead of autograd's `AccumulateGrad`. GTP fires it from `_handle_megatron_grad_accum` **after** the per-param `{wgrad RS → main_grad add}`. This enforces the invariant below; a no-op (plain autograd path) when GTP is inactive. + +> **Ordering invariant.** A bucket's DDP gradient reduction (the reduce-scatter / all-to-all + local fp32 accumulation) runs **strictly after every GTP param in that bucket has finished `{GTP wgrad RS → main_grad add}`**. `register_grad_ready` only fires the bucket collective once *all* its params are ready, and for GTP params "ready" is signalled by GTP after the add — never by autograd's `AccumulateGrad`, which (because the wgrad RS is async and its `main_grad` accumulation is deferred to a later backward node) can fire **before** the add and would make the bucket reduce read a stale/empty `main_grad` (notably under `reduce_scatter_with_fp32_accumulation`). Everything else — bucketing, the reduce-scatter/all-reduce schedule and its overlap, master-state sharding, grad clipping, the checkpoint format — is unchanged and unaware of GTP. @@ -336,7 +339,7 @@ Everything else — bucketing, the reduce-scatter/all-reduce schedule and its ov - **Free reuse of a mature stack.** GTP inherits DDP's bucketing + comm/compute overlap, the distributed optimizer's fp32-master + Adam-moment sharding, grad-norm/clip, and the existing checkpoint format — no parallel re-implementation to write or maintain (contrast FSDP, which replaces all of these). - **Orthogonal composability.** Because GTP is a rank-grid sub-axis cut like TP (along `out_features`), it composes with TP/EP/CP/PP and the DistOpt the same way TP does — no special nesting logic. - **Zero-cost when off.** With GTP disabled the `*_no_gtp` groups alias the regular DP groups and both hooks become no-ops, so non-GTP runs hit byte-identical behavior — GTP can be toggled without forking the DDP/optimizer code paths. -- **Small, auditable surface.** Two hooks is the whole integration contract, which is what makes the correctness argument below tractable. +- **Small, auditable surface.** These three hooks are the whole integration contract, which is what makes the correctness argument below tractable. DDP groups parameters into **two buffers** by `is_expert_parallel` (MoE tag) — a dense buffer and an expert buffer. GTP/EGTP shards are **merged into** these buffers like ordinary params (no separate GTP/EGTP buckets): they reduce over the gtp/egtp-EXCLUDED replicate group (`intra_dp_cp_no_gtp_group` for dense, `intra_expt_dp_no_egtp_group` for expert) with the standard `1/full = 1/(replicate*gtp)` scaling. diff --git a/megatron/experimental/gtp/generalized_tensor_parallelism.py b/megatron/experimental/gtp/generalized_tensor_parallelism.py index 6454c5bb37a..06bc82a3b02 100644 --- a/megatron/experimental/gtp/generalized_tensor_parallelism.py +++ b/megatron/experimental/gtp/generalized_tensor_parallelism.py @@ -1247,12 +1247,15 @@ def _wait_reduce_scatter(self, finalize_grad=False): if finalize_grad: cache = get_global_GTP_cache() for w in self._weights: - w._set_rs_state(GTPWeightState.NONE) wgrad_rs = cache.get(w._rs_ticket) w.main_grad.add_(wgrad_rs) cache.release(w._rs_ticket) - if hasattr(w, "grad_added_to_main_grad"): - w.grad_added_to_main_grad = True + # Fire grad-ready AFTER all adds (separate loop so a bucket-completing + # grad-ready can't dispatch the RS before a sibling's add). With autograd + # grad-ready suppressed for GTP params (DDP register_grad_accum_hook), this + # is the only grad-ready for a weight finalized here; else the bucket orphans. + for w in self._weights: + self._handle_megatron_grad_accum(w) self._already_finalized = True # Release stashed wgrad inputs: UNGRAPHED buffers go back to the pool; # GRAPHED just drops Python refs (addresses must stay stable for CG). diff --git a/tests/unit_tests/generalized_tensor_parallel/test_gtp.py b/tests/unit_tests/generalized_tensor_parallel/test_gtp.py index 06935944ec4..6013aa8492d 100644 --- a/tests/unit_tests/generalized_tensor_parallel/test_gtp.py +++ b/tests/unit_tests/generalized_tensor_parallel/test_gtp.py @@ -27,6 +27,7 @@ 21. TestGTPGradAccumHook - main_grad updated after reduce-scatter backward (multi-GPU) 22. TestWaitAsyncCommsFallback - wait_async_comms(finalize_after_drain=True) inline-accumulation fallback when _wgrad_rs_handle is None (single-process) 23. TestGTPDDPBucketAlignment - GTP and regular DDP buffer bucket ends padded for dist-opt alignment (multi-GPU) +24. TestGTPDDPGradReadyWiring - GTP params drive DDP grad-ready via the manual hook after wgrad add, not autograd (multi-GPU) Multi-GPU tests skip when ``torch.distributed.get_world_size()`` doesn't match the required world size (4 for everything in this file). @@ -1414,3 +1415,72 @@ def test_regular_buffers_use_padded_layout_when_gtp_params_present(self): """Regular buf bucket ends must be padded even when gtp_params forces layoutrecompute.""" _requires_multi_gpu(4) _run_distributed(_worker_regular_buffer_padded_when_gtp_params_present, 4) + + +# --------------------------------------------------------------------------- +# 24. GTP DDP grad-ready wiring: register_grad_ready must fire AFTER the wgrad add +# --------------------------------------------------------------------------- + + +def _worker_gtp_ddp_grad_ready_wiring(rank, world_size, port): + """GTP params must drive DDP grad-ready from GTP's manual hook, not autograd. + + GTP defers the main_grad accumulation to a later backward node, so autograd's AccumulateGrad can + fire register_grad_ready before the grad lands and dispatch the bucket reduce-scatter on stale + grad_data (corrupts reduce_scatter_with_fp32_accumulation). The fix routes grad-ready through + register_grad_accum_hook (fired after the add) and skips the autograd hook. This pins that + wiring: every GTP weight has _grad_accum_hook set and none falls through to the autograd list. + """ + from megatron.core import parallel_state as ps + from megatron.core.distributed import DistributedDataParallel, DistributedDataParallelConfig + from megatron.core.transformer.transformer_config import TransformerConfig + + # The module fixture initialized model_parallel without GTP; re-init with GTP=2. + ps.destroy_model_parallel() + ps.initialize_model_parallel( + tensor_model_parallel_size=1, pipeline_model_parallel_size=1, gtp_remat_size=2 + ) + try: + gtp_group = ps.get_gtp_weight_remat_group() + + class _TwoLayerModel(torch.nn.Module): + def __init__(self): + super().__init__() + # bias=False -> all params are GTP weights, so grad_accs must end up empty. + self.fc0 = te.Linear(64, 128, bias=False, device="cuda") + self.fc1 = te.Linear(64, 128, bias=False, device="cuda") + + model = _TwoLayerModel() + wrap_module_params_gtp(model.fc0, ["weight"], gtp_group) + wrap_module_params_gtp(model.fc1, ["weight"], gtp_group) + + config = TransformerConfig( + num_attention_heads=1, num_layers=1, hidden_size=4, tensor_model_parallel_size=1 + ) + ddp_config = DistributedDataParallelConfig( + use_distributed_optimizer=True, overlap_grad_reduce=True + ) + ddp_model = DistributedDataParallel(config, ddp_config, model) + + for name, w in [("fc0", model.fc0.weight), ("fc1", model.fc1.weight)]: + assert isinstance(w, GTPShardedParam), f"{name}.weight should be a GTP param" + # Manual hook set -> grad-ready fires after the add; None -> early autograd path (bug). + assert getattr(w, "_grad_accum_hook", None) is not None, ( + f"{name}.weight must have _grad_accum_hook set (manual grad-ready, not autograd)" + ) + + # bias=False -> all params are GTP -> none took the autograd path. + assert len(ddp_model.grad_accs) == 0, ( + "GTP params must not register an autograd AccumulateGrad hook " + f"(grad_accs has {len(ddp_model.grad_accs)} entries)" + ) + finally: + ps.destroy_model_parallel() + ps.initialize_model_parallel() # restore default for remaining tests + + +class TestGTPDDPGradReadyWiring: + def test_gtp_params_use_manual_grad_ready_hook(self): + """GTP params route DDP grad-ready through register_grad_accum_hook, not autograd.""" + _requires_multi_gpu(4) + _run_distributed(_worker_gtp_ddp_grad_ready_wiring, 4) From f6dca059876d1231517763a2aeab12ac03a14bfa Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Tue, 23 Jun 2026 19:41:45 -0700 Subject: [PATCH 47/59] fix format and comments Signed-off-by: Shiqing Fan --- megatron/core/distributed/finalize_model_grads.py | 8 ++++---- megatron/core/optimizer/emerging_optimizers.py | 6 ++++-- .../generalized_tensor_parallel/test_gtp_muon_dcp.py | 9 ++++----- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/megatron/core/distributed/finalize_model_grads.py b/megatron/core/distributed/finalize_model_grads.py index 50376d3fadc..d2fcbc5ce5f 100644 --- a/megatron/core/distributed/finalize_model_grads.py +++ b/megatron/core/distributed/finalize_model_grads.py @@ -443,11 +443,11 @@ def _allreduce_non_tensor_model_parallel_grads( def _allreduce_replicated_grads_over_gtp_group(model: List[torch.nn.Module]): - """SUM NON-GTP (replicated) param grads over the gtp / egtp group. + """Sum wgrads for replicated parameters over the gtp / egtp group. - DDP already reduced these params over the gtp-EXCLUDED replicate group with 1/full scaling, - leaving them 1/gtp short. SUM (not AVG) over the gtp/egtp group recovers the full mean. - No-op when GTP is inactive (gtp/egtp group size <= 1). + The data-parallel collective already reduced wgrads over the GTP-excluded process groups with + 1/full scaling, so the gtp-axis terms are still missing. A plain SUM (not AVG) over the gtp/egtp + group adds them and yields the exact full mean. No-op when GTP is inactive (group size <= 1). """ gtp_group = parallel_state.get_gtp_weight_remat_group(check_initialized=False) egtp_group = parallel_state.get_expert_gtp_weight_remat_group(check_initialized=False) diff --git a/megatron/core/optimizer/emerging_optimizers.py b/megatron/core/optimizer/emerging_optimizers.py index 1f170091f49..12029d3b5c2 100644 --- a/megatron/core/optimizer/emerging_optimizers.py +++ b/megatron/core/optimizer/emerging_optimizers.py @@ -242,8 +242,10 @@ def scaled_orthogonalize_fn_with_gtp( """ is_expert = getattr(p, 'expert_tp', False) gtp_group = ( - self.pg_collection.expt_gtp if is_expert else self.pg_collection.gtp - ) if self.pg_collection else None + (self.pg_collection.expt_gtp if is_expert else self.pg_collection.gtp) + if self.pg_collection + else None + ) if gtp_group is None or get_pg_size(gtp_group) <= 1: return self.scaled_orthogonalize_fn(grad, tp_group, partition_dim) diff --git a/tests/unit_tests/generalized_tensor_parallel/test_gtp_muon_dcp.py b/tests/unit_tests/generalized_tensor_parallel/test_gtp_muon_dcp.py index bfd59677377..8a1bc967566 100644 --- a/tests/unit_tests/generalized_tensor_parallel/test_gtp_muon_dcp.py +++ b/tests/unit_tests/generalized_tensor_parallel/test_gtp_muon_dcp.py @@ -11,10 +11,7 @@ import torch from megatron.core.dist_checkpointing import load, save -from tests.unit_tests.dist_checkpointing import ( - TempNamedDir, - setup_model_and_optimizer, -) +from tests.unit_tests.dist_checkpointing import TempNamedDir, setup_model_and_optimizer from tests.unit_tests.test_utilities import Utils @@ -106,7 +103,9 @@ def test_gtp_muon_moe_save_load(self, tmp_path_dist_ckpt): model_sd_A = model_A[0].sharded_state_dict() optim_sd_A = optimizer_A.sharded_state_dict(model_sd_A, metadata=meta) - save(optim_sd_A, ckpt_dir_A) # fails (2 writers) before the LayerWise replica_id fix + save( + optim_sd_A, ckpt_dir_A + ) # fails (2 writers) before the LayerWise replica_id fix model_B, optimizer_B = setup_model_and_optimizer( seed=3, From ce0272802fa4e62aadd9b26fc4f951928dae6fcb Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Wed, 24 Jun 2026 17:36:41 -0700 Subject: [PATCH 48/59] fix comments Signed-off-by: Shiqing Fan --- megatron/core/transformer/cuda_graphs.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/megatron/core/transformer/cuda_graphs.py b/megatron/core/transformer/cuda_graphs.py index 62217ddd765..ca599a7a73d 100644 --- a/megatron/core/transformer/cuda_graphs.py +++ b/megatron/core/transformer/cuda_graphs.py @@ -892,9 +892,6 @@ def __init__( # NCCL into runner_stream before bwd_completion_event fires. if get_expert_gtp_weight_remat_world_size() > 1: self._register_gtp_side_streams(get_expert_gtp_weight_remat_group()) - # Bridges Phase 1 (AG drain on ag_stream) into runner_stream - # so bwd_completion_event records past NCCL_AG completion. - self.bwd_ag_fence_event = torch.cuda.Event() # Records after Phase 2 (RS drain + main_grad.add_) completes # on runner.stream. finalize_model_grads waits on this before # reading main_grad for the DP gradient sync. @@ -1338,13 +1335,11 @@ def create_bwd_graph(self): gtp_group = get_gtp_weight_remat_group() graphed_ag = get_ag_stream(GTPChain.GRAPHED.value, gtp_group) - self.bwd_ag_fence_event.record(graphed_ag) - torch.cuda.current_stream().wait_event(self.bwd_ag_fence_event) + torch.cuda.current_stream().wait_stream(graphed_ag) if get_expert_gtp_weight_remat_world_size() > 1: egtp_group = get_expert_gtp_weight_remat_group() egtp_graphed_ag = get_ag_stream(GTPChain.GRAPHED.value, egtp_group) - self.bwd_ag_fence_event.record(egtp_graphed_ag) - torch.cuda.current_stream().wait_event(self.bwd_ag_fence_event) + torch.cuda.current_stream().wait_stream(egtp_graphed_ag) # Record completion AFTER AG drain + fence but BEFORE RS drain, # so main_stream can trigger the next runner while RS is still From ae8a57117ffea3f1e5f99a4210ae0a70339418a6 Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Wed, 24 Jun 2026 17:58:36 -0700 Subject: [PATCH 49/59] fix linting Signed-off-by: Shiqing Fan --- .../core/optimizer/emerging_optimizers.py | 4 +- .../core/optimizer/layer_wise_optimizer.py | 8 +- megatron/core/parallel_state.py | 17 +-- megatron/core/process_groups_config.py | 12 +- megatron/core/ssm/mamba_mixer.py | 10 +- .../generalized_tensor_parallel/test_gtp.py | 6 +- .../test_gtp_dcp.py | 103 +++++++++--------- 7 files changed, 74 insertions(+), 86 deletions(-) diff --git a/megatron/core/optimizer/emerging_optimizers.py b/megatron/core/optimizer/emerging_optimizers.py index 12029d3b5c2..f775d0accd0 100644 --- a/megatron/core/optimizer/emerging_optimizers.py +++ b/megatron/core/optimizer/emerging_optimizers.py @@ -230,9 +230,7 @@ def scaled_orthogonalize_fn( scaled_orthogonalize_fn=scaled_orthogonalize_fn, ) - def scaled_orthogonalize_fn_with_gtp( - self, p, grad, tp_group, partition_dim, - ): + def scaled_orthogonalize_fn_with_gtp(self, p, grad, tp_group, partition_dim): """All-gather grad along GTP/EGTP dim 0, orthogonalize, then slice back. GTP shards weights along dim 0 independently of TP's partition_dim. Newton-Schulz diff --git a/megatron/core/optimizer/layer_wise_optimizer.py b/megatron/core/optimizer/layer_wise_optimizer.py index ce2786ac719..179c16c0062 100644 --- a/megatron/core/optimizer/layer_wise_optimizer.py +++ b/megatron/core/optimizer/layer_wise_optimizer.py @@ -574,7 +574,9 @@ def _shard_params_from_layout(self, optimizers, full_param_layouts, dp_cp_size, bucket_id, ) in layout.param_index_map.items(): bucket_start_index, bucket_end_index = layout.bucket_indices[bucket_id] - shard_size = (bucket_end_index - bucket_start_index) // layout.num_optimizer_shards + shard_size = ( + bucket_end_index - bucket_start_index + ) // layout.num_optimizer_shards shard_id = (param_start_index - bucket_start_index) // shard_size shard_end_index = bucket_start_index + (shard_id + 1) * shard_size assert param_end_index <= shard_end_index, ( @@ -702,9 +704,7 @@ def set_bucket_layerwise_params_list(self, model_chunks): if not _bucket_is_managed_by_layer_wise_optimizer(bucket): continue if self.expt_dp_params_list is not None: - bucket_params_list = [ - [] for _ in range(get_pg_size(self.expt_dp_group)) - ] + bucket_params_list = [[] for _ in range(get_pg_size(self.expt_dp_group))] for bucket_list, full_params_list in zip( bucket_params_list, self.expt_dp_params_list ): diff --git a/megatron/core/parallel_state.py b/megatron/core/parallel_state.py index 0f082227d89..8a34e21ddd9 100644 --- a/megatron/core/parallel_state.py +++ b/megatron/core/parallel_state.py @@ -1528,9 +1528,7 @@ def _inject_gtp(order_str: str, after: str = "tp") -> str: else: # EGTP inactive: the replicate group aliases the regular expert-DP group. _EXPERT_DATA_PARALLEL_GROUP_NO_GTP = _EXPERT_DATA_PARALLEL_GROUP - _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_NO_GTP = ( - _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP - ) + _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_NO_GTP = _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP ### End of expert related parallel groups initialization @@ -1744,9 +1742,7 @@ def get_pipeline_model_parallel_group(check_initialized=True): return _PIPELINE_MODEL_PARALLEL_GROUP -def get_data_parallel_group( - with_context_parallel=False, no_gtp=False, partial_data_parallel=False -): +def get_data_parallel_group(with_context_parallel=False, no_gtp=False, partial_data_parallel=False): """Get the data-parallel group the caller rank belongs to. Args: @@ -2114,9 +2110,7 @@ def set_data_parallel_rank(rank): _MPU_DATA_PARALLEL_RANK = rank -def get_data_parallel_rank( - with_context_parallel=False, no_gtp=False, partial_data_parallel=False -): +def get_data_parallel_rank(with_context_parallel=False, no_gtp=False, partial_data_parallel=False): """Return caller's rank in the data-parallel group.""" global _MPU_DATA_PARALLEL_RANK if _MPU_DATA_PARALLEL_RANK is not None: @@ -2356,10 +2350,7 @@ def get_expert_data_parallel_group( _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP, "Intra partial expert data parallel group", ), - (True, False): ( - _EXPERT_DATA_PARALLEL_GROUP_NO_GTP, - "Expert data parallel group (no GTP)", - ), + (True, False): (_EXPERT_DATA_PARALLEL_GROUP_NO_GTP, "Expert data parallel group (no GTP)"), (True, True): ( _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_NO_GTP, "Intra partial expert data parallel group (no GTP)", diff --git a/megatron/core/process_groups_config.py b/megatron/core/process_groups_config.py index 069cb77d5d7..8f48d8e4e75 100644 --- a/megatron/core/process_groups_config.py +++ b/megatron/core/process_groups_config.py @@ -269,9 +269,7 @@ def use_mpu_process_groups(cls, required_pgs: Optional[List[str]] = None): 'dp': parallel_state.get_data_parallel_group, 'dp_cp': partial(parallel_state.get_data_parallel_group, with_context_parallel=True), 'dp_cp_no_gtp': partial( - parallel_state.get_data_parallel_group, - with_context_parallel=True, - no_gtp=True, + parallel_state.get_data_parallel_group, with_context_parallel=True, no_gtp=True ), 'dp_cp_ag': lambda: None, 'intra_dp_cp': partial( @@ -309,9 +307,7 @@ def use_mpu_process_groups(cls, required_pgs: Optional[List[str]] = None): parallel_state.get_expert_data_parallel_group, check_initialized=False ), 'expt_dp_no_egtp': partial( - parallel_state.get_expert_data_parallel_group, - check_initialized=False, - no_gtp=True, + parallel_state.get_expert_data_parallel_group, check_initialized=False, no_gtp=True ), 'expt_dp_ag': lambda: None, 'tp_dp_cp': partial( @@ -646,9 +642,7 @@ def setup_process_groups_for_ddp( with_context_parallel=True, partial_data_parallel=True ), 'expt_dp_group': parallel_state.get_expert_data_parallel_group(), - 'expt_dp_no_egtp_group': parallel_state.get_expert_data_parallel_group( - no_gtp=True - ), + 'expt_dp_no_egtp_group': parallel_state.get_expert_data_parallel_group(no_gtp=True), 'intra_expt_dp_group': parallel_state.get_expert_data_parallel_group( partial_expert_data_parallel=True ), diff --git a/megatron/core/ssm/mamba_mixer.py b/megatron/core/ssm/mamba_mixer.py index 5d2628f94b8..68e968f59a0 100644 --- a/megatron/core/ssm/mamba_mixer.py +++ b/megatron/core/ssm/mamba_mixer.py @@ -1425,9 +1425,13 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None): original_merge_fn = factory.merge_fn @torch.no_grad() - def _gtp_slice_after_cat(sub_state_dict, _orig=original_merge_fn, - _rank=gtp_local_rank, _size=gtp_local_size, - _gtp_size=in_proj_gtp_size): + def _gtp_slice_after_cat( + sub_state_dict, + _orig=original_merge_fn, + _rank=gtp_local_rank, + _size=gtp_local_size, + _gtp_size=in_proj_gtp_size, + ): full = _orig(sub_state_dict) aligned_total = _size * _gtp_size pad_rows = aligned_total - full.shape[0] diff --git a/tests/unit_tests/generalized_tensor_parallel/test_gtp.py b/tests/unit_tests/generalized_tensor_parallel/test_gtp.py index 6013aa8492d..be495a683af 100644 --- a/tests/unit_tests/generalized_tensor_parallel/test_gtp.py +++ b/tests/unit_tests/generalized_tensor_parallel/test_gtp.py @@ -1465,9 +1465,9 @@ def __init__(self): for name, w in [("fc0", model.fc0.weight), ("fc1", model.fc1.weight)]: assert isinstance(w, GTPShardedParam), f"{name}.weight should be a GTP param" # Manual hook set -> grad-ready fires after the add; None -> early autograd path (bug). - assert getattr(w, "_grad_accum_hook", None) is not None, ( - f"{name}.weight must have _grad_accum_hook set (manual grad-ready, not autograd)" - ) + assert ( + getattr(w, "_grad_accum_hook", None) is not None + ), f"{name}.weight must have _grad_accum_hook set (manual grad-ready, not autograd)" # bias=False -> all params are GTP -> none took the autograd path. assert len(ddp_model.grad_accs) == 0, ( diff --git a/tests/unit_tests/generalized_tensor_parallel/test_gtp_dcp.py b/tests/unit_tests/generalized_tensor_parallel/test_gtp_dcp.py index 1de0f6e01ab..5334b091ed6 100644 --- a/tests/unit_tests/generalized_tensor_parallel/test_gtp_dcp.py +++ b/tests/unit_tests/generalized_tensor_parallel/test_gtp_dcp.py @@ -112,12 +112,12 @@ def _worker_helper_offsets_tp_eq_gtp_axis(rank, world_size, port): tp_rank = rank // 2 gtp_rank = rank % 2 expected_offset = (tp_rank * gtp_size + gtp_rank) * per_shard_out - assert st.global_offset[0] == expected_offset, ( - f"rank={rank} expected axis-0 offset {expected_offset}, got {st.global_offset[0]}" - ) - assert st.global_shape[0] == full_out_features, ( - f"rank={rank} expected global axis-0 size {full_out_features}, got {st.global_shape[0]}" - ) + assert ( + st.global_offset[0] == expected_offset + ), f"rank={rank} expected axis-0 offset {expected_offset}, got {st.global_offset[0]}" + assert ( + st.global_shape[0] == full_out_features + ), f"rank={rank} expected global axis-0 size {full_out_features}, got {st.global_shape[0]}" def _worker_helper_offsets_tp_neq_gtp_axis(rank, world_size, port): @@ -148,15 +148,16 @@ def _worker_helper_offsets_tp_neq_gtp_axis(rank, world_size, port): st = sharded["weight"] tp_rank = rank // 2 gtp_rank = rank % 2 - assert st.global_offset[0] == gtp_rank * per_shard_out, ( - f"rank={rank} axis-0 offset wrong: {st.global_offset[0]}" - ) - assert st.global_offset[1] == tp_rank * per_tp_in, ( - f"rank={rank} axis-1 offset wrong: {st.global_offset[1]}" - ) - assert st.global_shape == (full_out, full_in), ( - f"rank={rank} global shape {st.global_shape} != ({full_out}, {full_in})" - ) + assert ( + st.global_offset[0] == gtp_rank * per_shard_out + ), f"rank={rank} axis-0 offset wrong: {st.global_offset[0]}" + assert ( + st.global_offset[1] == tp_rank * per_tp_in + ), f"rank={rank} axis-1 offset wrong: {st.global_offset[1]}" + assert st.global_shape == ( + full_out, + full_in, + ), f"rank={rank} global shape {st.global_shape} != ({full_out}, {full_in})" def _worker_helper_no_op_no_gtp(rank, world_size, port): @@ -224,9 +225,9 @@ def _worker_helper_padded_inproj_no_pad_case(rank, world_size, port): dp_cp_group=dist.new_group(list(range(world_size))), ) st = sharded["weight"] - assert st.global_shape[0] == dim0, ( - f"rank={rank} no-pad case: global_shape[0] {st.global_shape[0]} != {dim0}" - ) + assert ( + st.global_shape[0] == dim0 + ), f"rank={rank} no-pad case: global_shape[0] {st.global_shape[0]} != {dim0}" assert st.global_offset[0] == rank * expected_local @@ -255,14 +256,15 @@ def _worker_helper_padded_inproj_pad_case(rank, world_size, port): gtp_group = dist.new_group(list(range(world_size))) weight = _make_gtp_shard(dim0_unpadded, in_features, gtp_group) - assert weight.shape == (per_shard, in_features), ( - f"rank={rank}: post-pad shard shape {tuple(weight.shape)} != ({per_shard}, {in_features})" - ) + assert weight.shape == ( + per_shard, + in_features, + ), f"rank={rank}: post-pad shard shape {tuple(weight.shape)} != ({per_shard}, {in_features})" # Only rank-3 (the last GTP rank) carries the trailing pad rows; all ranks # report the same pad_length (an invariant set by _gtp_slice_one_param). - assert getattr(weight, "pad_length", 0) == pad, ( - f"rank={rank}: pad_length {getattr(weight, 'pad_length', 0)} != {pad}" - ) + assert ( + getattr(weight, "pad_length", 0) == pad + ), f"rank={rank}: pad_length {getattr(weight, 'pad_length', 0)} != {pad}" sharded = make_sharded_tensors_for_checkpoint_with_gtp( {"weight": weight}, @@ -276,9 +278,9 @@ def _worker_helper_padded_inproj_pad_case(rank, world_size, port): # Helper saves the padded global. ``allow_shape_mismatch=True`` is what # makes the saved tensor portable to a different load-time GTP topology # (different alignment choice yields a different padded size). - assert st.global_shape[0] == dim0_padded, ( - f"rank={rank} pad case: global_shape[0] {st.global_shape[0]} != {dim0_padded}" - ) + assert ( + st.global_shape[0] == dim0_padded + ), f"rank={rank} pad case: global_shape[0] {st.global_shape[0]} != {dim0_padded}" assert st.global_offset[0] == rank * per_shard assert st.allow_shape_mismatch is True, ( f"rank={rank} pad case: allow_shape_mismatch must be True when GTP padding fires; " @@ -369,9 +371,7 @@ def _worker_reset_quantize_cache(rank, world_size, port): class _Dummy(torch.nn.Module): def __init__(self): super().__init__() - self.weight = torch.nn.Parameter( - torch.zeros(4, 4, dtype=torch.bfloat16, device="cuda") - ) + self.weight = torch.nn.Parameter(torch.zeros(4, 4, dtype=torch.bfloat16, device="cuda")) mod = _Dummy() wrap_module_params_gtp(mod, ["weight"], gtp_group) @@ -404,9 +404,10 @@ def _worker_helper_offsets_ep_egtp(rank, world_size, port): global_expert_idx = ep_rank * num_gemms # + gemm_idx (0) weight = _make_gtp_shard(per_expert_out, in_features, egtp_group) - assert weight.shape == (per_shard_out, in_features), ( - f"rank={rank} EGTP shard shape {tuple(weight.shape)} != ({per_shard_out}, {in_features})" - ) + assert weight.shape == ( + per_shard_out, + in_features, + ), f"rank={rank} EGTP shard shape {tuple(weight.shape)} != ({per_shard_out}, {in_features})" sharded = make_sharded_tensors_for_checkpoint_with_gtp( {"weight": weight}, @@ -425,13 +426,13 @@ def _worker_helper_offsets_ep_egtp(rank, world_size, port): f"({num_global_experts}, {per_expert_out}, {in_features})" ) # Prepended expert axis (axis 0): offset == this rank's global expert index. - assert st.global_offset[0] == global_expert_idx, ( - f"rank={rank} expert-axis offset {st.global_offset[0]} != {global_expert_idx}" - ) + assert ( + st.global_offset[0] == global_expert_idx + ), f"rank={rank} expert-axis offset {st.global_offset[0]} != {global_expert_idx}" # EGTP axis (weight axis 0, shifted to global axis 1): offset == egtp_rank · per_shard. - assert st.global_offset[1] == egtp_rank * per_shard_out, ( - f"rank={rank} EGTP axis-1 offset {st.global_offset[1]} != {egtp_rank * per_shard_out}" - ) + assert ( + st.global_offset[1] == egtp_rank * per_shard_out + ), f"rank={rank} EGTP axis-1 offset {st.global_offset[1]} != {egtp_rank * per_shard_out}" def _worker_helper_embedding_offsets(rank, world_size, port): @@ -467,12 +468,12 @@ def _worker_helper_embedding_offsets(rank, world_size, port): tp_rank = rank // 2 gtp_rank = rank % 2 expected_offset = (tp_rank * gtp_size + gtp_rank) * per_shard - assert st.global_offset[0] == expected_offset, ( - f"rank={rank} embedding axis-0 offset {st.global_offset[0]} != {expected_offset}" - ) - assert st.global_shape[0] == full_vocab, ( - f"rank={rank} embedding global axis-0 {st.global_shape[0]} != {full_vocab}" - ) + assert ( + st.global_offset[0] == expected_offset + ), f"rank={rank} embedding axis-0 offset {st.global_offset[0]} != {expected_offset}" + assert ( + st.global_shape[0] == full_vocab + ), f"rank={rank} embedding global axis-0 {st.global_shape[0]} != {full_vocab}" def _worker_helper_public_wrapper_delegates(rank, world_size, port): @@ -510,9 +511,9 @@ def _worker_helper_public_wrapper_delegates(rank, world_size, port): f"rank={rank} public wrapper did not produce the GTP-composite offset: " f"{st.global_offset[0]} != {expected_offset} (delegation to the GTP path failed?)" ) - assert st.global_shape[0] == full_out, ( - f"rank={rank} global axis-0 {st.global_shape[0]} != {full_out}" - ) + assert ( + st.global_shape[0] == full_out + ), f"rank={rank} global axis-0 {st.global_shape[0]} != {full_out}" def _worker_helper_replicated_sink_rejects_gtp(rank, world_size, port): @@ -625,9 +626,9 @@ def _worker_mamba_replicated_param_replica_ids(rank, world_size, port): assert bases, "no GTP-replicated tiny params found in MambaMixer sharded_state_dict" for base in sorted(bases): rids = [g[base] for g in gathered] - assert len(set(rids)) == world_size, ( - f"{base}: replica_id collision across ranks -> DCP write conflict: {rids}" - ) + assert ( + len(set(rids)) == world_size + ), f"{base}: replica_id collision across ranks -> DCP write conflict: {rids}" n_writers = sum(is_main_replica(r) for r in rids) assert n_writers == 1, f"{base}: expected exactly 1 writer, got {n_writers}: {rids}" From 6aeecc1e17cccef671a4fd7c9924c3fb66fb548e Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Wed, 24 Jun 2026 18:44:04 -0700 Subject: [PATCH 50/59] Fix optional process-group fallbacks defeated by __getattr__; Log human-readable dtype in GTP weight cache Signed-off-by: Shiqing Fan --- megatron/core/process_groups_config.py | 22 +++++++++---------- .../gtp/generalized_tensor_parallelism.py | 5 ++++- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/megatron/core/process_groups_config.py b/megatron/core/process_groups_config.py index 8f48d8e4e75..647aa9b4a3e 100644 --- a/megatron/core/process_groups_config.py +++ b/megatron/core/process_groups_config.py @@ -457,7 +457,7 @@ def setup_process_groups_for_optimizer( dp_group = pg_collection.dp # 2. dp_cp group: fallback logic based on context_parallel_size - if hasattr(pg_collection, 'dp_cp'): + if 'dp_cp' in pg_set: dp_cp_group = pg_collection.dp_cp else: model_config = get_model_config(model_chunks[0]) @@ -542,7 +542,7 @@ def setup_process_groups_for_optimizer( # EGTP-MERGED variant of tp_ep_pp: includes the egtp axis, so each EGTP peer gets a # distinct rank — used for the distopt ShardedObject keys. Falls back to tp_ep_pp # when not provided. - if hasattr(pg_collection, 'tp_ep_pp_with_egtp'): + if 'tp_ep_pp_with_egtp' in pg_set: expt_tp_pp_with_egtp_group = pg_collection.tp_ep_pp_with_egtp else: expt_tp_pp_with_egtp_group = expt_tp_pp_group @@ -550,11 +550,11 @@ def setup_process_groups_for_optimizer( # 6. no_gtp groups — the gtp-EXCLUDED replicate groups that DDP and the optimizer # shard over: intra (per-distopt-instance) and full (cross-instance). Fall back to # the non-GTP variants when not provided. - if hasattr(pg_collection, 'intra_dp_cp_no_gtp'): + if 'intra_dp_cp_no_gtp' in pg_set: intra_dp_cp_no_gtp_group = pg_collection.intra_dp_cp_no_gtp else: intra_dp_cp_no_gtp_group = intra_dp_cp_group - if hasattr(pg_collection, 'dp_cp_no_gtp'): + if 'dp_cp_no_gtp' in pg_set: dp_cp_no_gtp_group = pg_collection.dp_cp_no_gtp else: dp_cp_no_gtp_group = dp_cp_group @@ -562,11 +562,11 @@ def setup_process_groups_for_optimizer( # 7. no_egtp groups — the expert analog of §6: the egtp-EXCLUDED replicate groups, # intra (per-distopt-instance) and full (cross-instance). Fall back to the # non-EGTP variants when not provided. - if hasattr(pg_collection, 'intra_expt_dp_no_egtp'): + if 'intra_expt_dp_no_egtp' in pg_set: intra_expt_dp_no_egtp_group = pg_collection.intra_expt_dp_no_egtp else: intra_expt_dp_no_egtp_group = intra_expt_dp_group - if hasattr(pg_collection, 'expt_dp_no_egtp'): + if 'expt_dp_no_egtp' in pg_set: expt_dp_no_egtp_group = pg_collection.expt_dp_no_egtp else: expt_dp_no_egtp_group = expt_dp_group @@ -684,7 +684,7 @@ def setup_process_groups_for_ddp( result['dp_group'] = pg_collection.dp # 2. dp_cp group: fallback logic based on context_parallel_size - if hasattr(pg_collection, 'dp_cp'): + if 'dp_cp' in pg_set: result['dp_cp_group'] = pg_collection.dp_cp else: cp_size = getattr(config, 'context_parallel_size', 1) @@ -743,13 +743,13 @@ def setup_process_groups_for_ddp( result['ep_group'] = pg_collection.ep # 6. GTP partial group (fallback to intra_dp_cp if not provided) - if hasattr(pg_collection, 'intra_dp_cp_no_gtp'): + if 'intra_dp_cp_no_gtp' in pg_set: result['intra_dp_cp_no_gtp_group'] = pg_collection.intra_dp_cp_no_gtp else: result['intra_dp_cp_no_gtp_group'] = result['intra_dp_cp_group'] # 7. EGTP partial group (fallback to intra_expt_dp if not provided) - if hasattr(pg_collection, 'intra_expt_dp_no_egtp'): + if 'intra_expt_dp_no_egtp' in pg_set: result['intra_expt_dp_no_egtp_group'] = pg_collection.intra_expt_dp_no_egtp else: result['intra_expt_dp_no_egtp_group'] = result['intra_expt_dp_group'] @@ -757,11 +757,11 @@ def setup_process_groups_for_ddp( # 8. Full (cross-instance) with-GTP-excluded variants for callers that need to # reach ALL true weight replicas (e.g., broadcast_params at init). Fall back # to the corresponding non-GTP-excluded full group when not provided. - if hasattr(pg_collection, 'dp_cp_no_gtp'): + if 'dp_cp_no_gtp' in pg_set: result['dp_cp_no_gtp_group'] = pg_collection.dp_cp_no_gtp else: result['dp_cp_no_gtp_group'] = result['dp_cp_group'] - if hasattr(pg_collection, 'expt_dp_no_egtp'): + if 'expt_dp_no_egtp' in pg_set: result['expt_dp_no_egtp_group'] = pg_collection.expt_dp_no_egtp else: result['expt_dp_no_egtp_group'] = result['expt_dp_group'] diff --git a/megatron/experimental/gtp/generalized_tensor_parallelism.py b/megatron/experimental/gtp/generalized_tensor_parallelism.py index 06bc82a3b02..d4c14152aec 100644 --- a/megatron/experimental/gtp/generalized_tensor_parallelism.py +++ b/megatron/experimental/gtp/generalized_tensor_parallelism.py @@ -1566,8 +1566,11 @@ def _allocate_buffer( buf_bytes = self._buf_bytes(out_shape, dtype) self._total_bytes += buf_bytes + dtype_str = ( + str(dtype) if isinstance(dtype, torch.dtype) else getattr(dtype, "name", str(dtype)) + ) print_rank_0( - f"[GTP Cache] +{buf_bytes / 1024**2:.1f} MB (shape={out_shape}, dtype={dtype}) " + f"[GTP Cache] +{buf_bytes / 1024**2:.1f} MB (shape={out_shape}, dtype={dtype_str}) " f"total={self._total_bytes / 1024**2:.1f} MB param: {param._debug_name} fwd: {fwd}" ) return buf From 114b6fcf356e01aadc2cb6b84be19a2686444e33 Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Thu, 25 Jun 2026 02:35:31 -0700 Subject: [PATCH 51/59] Fix GTP grad norm inflated on CUDA-graph capture step; fix linting Signed-off-by: Shiqing Fan --- megatron/core/model_parallel_config.py | 6 +- megatron/core/transformer/cuda_graphs.py | 62 +++++++++++-- megatron/experimental/gtp/README.md | 1 + .../test_gtp_cudagraph_grad.py | 87 +++++++++++++++++++ 4 files changed, 144 insertions(+), 12 deletions(-) create mode 100644 tests/unit_tests/generalized_tensor_parallel/test_gtp_cudagraph_grad.py diff --git a/megatron/core/model_parallel_config.py b/megatron/core/model_parallel_config.py index 0313fba290b..cb880726a87 100644 --- a/megatron/core/model_parallel_config.py +++ b/megatron/core/model_parallel_config.py @@ -134,8 +134,8 @@ class ModelParallelConfig: expert_tensor_parallel_num_weight_shards: Optional[int] = None """Total number of shards each expert weight is split into across the expert-tensor-parallel + - expert-GTP axes (i.e. ``expert_tensor_parallel_size * expert_gtp_weight_remat_size``). This is - the user-facing knob for expert layers: it must be ``>= expert_tensor_parallel_size`` and + expert-GTP axes (i.e. ``expert_tensor_parallel_size * expert_gtp_weight_remat_size``). This + is the user-facing knob for expert layers: it must be ``>= expert_tensor_parallel_size`` and divisible by it. When None it defaults to ``expert_tensor_parallel_size`` (no expert GTP sharding). It is the source of truth and implies ``expert_gtp_weight_remat_size = expert_tensor_parallel_num_weight_shards // @@ -516,7 +516,7 @@ def __post_init__(self): ) ) - # Same reconciliation for the expert layers (expert_tensor_parallel_size is finalized above). + # Same reconciliation for expert layers (expert_tensor_parallel_size finalized above). (self.expert_tensor_parallel_num_weight_shards, self.expert_gtp_weight_remat_size) = ( resolve_tensor_parallel_weight_shards( self.expert_tensor_parallel_size, diff --git a/megatron/core/transformer/cuda_graphs.py b/megatron/core/transformer/cuda_graphs.py index ca599a7a73d..f0f8a128a76 100644 --- a/megatron/core/transformer/cuda_graphs.py +++ b/megatron/core/transformer/cuda_graphs.py @@ -382,6 +382,44 @@ def _ensure_generator_state_is_cudagraph_safe(gen: torch.Generator) -> torch.Gen bwd_buffer_reuse_ref_count = 0 +def _backup_capture_grads(runner): + """Clone main_grad for everything a graph capture re-accumulates into, so capturing stays + side-effect-free on the finalized grads (capture *executes* the recorded main_grad.add_). + + Used by both create_fwd_graph (warmup) and create_bwd_graph; restore with + ``_restore_capture_grads``. + + The backward always writes main_grad for the module's own params, so those are backed up + unconditionally. Under GTP there is one extra target: the cascade also adds into a param's + cross-graph ``next_w``, which lives in another module and so isn't among this module's own + params. + """ + backup = {} + for p in runner.base_module.parameters(): + mg = getattr(p, "main_grad", None) + if mg is not None: + backup[id(p)] = (p, mg.clone()) + + if runner.gtp_remat: + # GTP only: also protect the cross-graph next_w the cascade accumulates into. + for p in runner.base_module.parameters(): + nw = getattr(p, "next_w", None) if getattr(p, "is_gtp", False) else None + if nw is None: + continue + shards = nw.weight_list if getattr(nw, "is_routed_expert", False) else [nw] + for w in shards or []: + mg = getattr(w, "main_grad", None) + if mg is not None and id(w) not in backup: + backup[id(w)] = (w, mg.clone()) + return backup + + +def _restore_capture_grads(backup): + """Restore the main_grad snapshots taken by ``_backup_capture_grads``.""" + for p, saved in backup.values(): + p.main_grad.copy_(saved) + + class _CudagraphGlobalRecord: """A global datastructure that records of the ordering of all _CudaGraphRunner's first fwd or bwd passes. 'create_cudagraphs' will use this to create @@ -456,7 +494,9 @@ def create_cudagraphs(cls): "https://github.com/NVIDIA/TransformerEngine/blob/v2.10/transformer_engine/pytorch/utils.py#L759" # pylint: disable=line-too-long ) - if any(r[0].gtp_remat for r in cls.cudagraph_record): + gtp_active = any(r[0].gtp_remat for r in cls.cudagraph_record) + if gtp_active: + # GTP buffer reuse during capture trips the param-state debug asserts; disable them. GTP_CONFIG.check_param_states = False gc.collect() @@ -1025,9 +1065,7 @@ def create_fwd_graph(self, args, kwargs, outputs=None, clone_inputs=True): for buf in self.base_module.buffers(): buffer_backup.append(buf.clone()) - grad_backup = [] - for param in self.base_module.parameters(): - grad_backup.append(param.main_grad.clone() if hasattr(param, "main_grad") else None) + grad_backup = _backup_capture_grads(self) saved_fp8_tensors = None if self.fp8_enabled: @@ -1132,7 +1170,6 @@ def _resolve_input_buffer(ten): with ctx: # warmup again as case graph capture mode may execute a different codepath _set_warmup_start() - for _ in range(self.num_warmup_steps): with self.get_quantization_context(): @@ -1189,7 +1226,8 @@ def clone_ten(ten): ) if self.gtp_remat: - wait_async_comms(GTPChain.GRAPHED.value) + # Forward only issues AG prefetches (no wgrad RS), so drain AG and skip RS. + wait_async_comms(GTPChain.GRAPHED.value, skip_rs=True) if self.fwd_side_streams: self._wait_side_streams(self.fwd_side_streams) @@ -1243,9 +1281,7 @@ def clone_ten(ten): if self.fp8_enabled: restore_fp8_tensors([self.base_module], saved_fp8_tensors) # restore cached grads - for main_grad_copy, param in zip(grad_backup, self.base_module.parameters()): - if main_grad_copy is not None: - param.main_grad.copy_(main_grad_copy) + _restore_capture_grads(grad_backup) # restore cached buffers for buf_copy, buf in zip(buffer_backup, self.base_module.buffers()): @@ -1302,6 +1338,11 @@ def create_bwd_graph(self): if FREEZE_GC: gc.freeze() + # GTP's wgrad add runs inside the bwd graph, so capturing it executes a main_grad.add_ + # that would clobber the finalized grads; snapshot what it touches and restore below. + # (Non-GTP returns wgrads as graph outputs and accumulates outside, so nothing to guard.) + grad_backup = _backup_capture_grads(self) if self.gtp_remat else {} + with torch.cuda.graph(self.bwd_graph, pool=self.mempool): self._sync_against_side_streams(self.bwd_side_streams) @@ -1365,6 +1406,9 @@ def create_bwd_graph(self): if FREEZE_GC: gc.unfreeze() + # restore cached grads + _restore_capture_grads(grad_backup) + # See _compute_finalized_during_bwd_capture for what's in this set and why. self.finalized_during_bwd_capture = ( self._compute_finalized_during_bwd_capture() if self.gtp_remat else [] diff --git a/megatron/experimental/gtp/README.md b/megatron/experimental/gtp/README.md index 13ff83b5a7d..0b162261dd3 100644 --- a/megatron/experimental/gtp/README.md +++ b/megatron/experimental/gtp/README.md @@ -57,6 +57,7 @@ CG compatibility is designed-in from day one, not retrofitted. The entire sync / - **Allocate-in-pool at creation** (`set_cuda_graph_mempool` + `_graphed_alloc`): GRAPHED-chain AG/RS buffers and quantized weight storage are allocated **directly into the CG memory pool** at first creation (during warmup, before capture), so no CUDA allocations happen inside the captured graph — and no post-hoc reallocation/clone is needed. UNGRAPHED buffers stay in regular allocator memory. - **Lazy, one-shot chain linking**: `prefetch_initialized` is flipped during the first fwd (warmup), so the chain-construction Python side-effects never execute inside a captured graph. The link table is buffered and flushed atomically at the second forward. - **DDP hook manual triggering**: `register_grad_accum_hook` stores the DDP hook on the param; `_CudagraphReplayNode.backward` calls it manually after replay (since `AccumulateGrad` hooks are silenced by replay). This is also how the `assert self.grad_reduce_handle is not None` failure from partial-CG + overlap-grad-reduce is resolved. +- **Capture is side-effect-free on `main_grad`**: GTP accumulates wgrad into `main_grad` *inside* the graph — unlike the gradient-accumulation-fusion path, which returns wgrads as graph outputs and accumulates outside. So both the fwd-graph warmup and the bwd-graph capture *execute* that `main_grad.add_` while recording it, including the cross-graph `next_w` add whose target lives in another module. Because capture (`create_cudagraphs()`) runs *after* `finalize_model_grads`, this would overwrite the already reduced + per-token-scaled grads with raw unscaled wgrads, spiking that step's grad norm and over-clipping it. **Fix**: `create_fwd_graph` and `create_bwd_graph` each snapshot the grads their capture touches (the module's own params + cross-graph `next_w`) via `_backup_capture_grads` before capturing and restore them after, so the `add_` is still recorded for replay while the finalized grads survive. Bounded to one module's grads at a time. - **Drains at CG / eager boundary**: `_drain_gtp_side_streams()` before eager MoE expert compute. Inside bwd capture, two-phase drain: Phase 1 joins the within-graph cascade and records `bwd_completion_event` (next runner unblocks); Phase 2 calls `wait_async_comms(GRAPHED)` to drain the chain-tail handle and re-joins side streams (queued after the event so it doesn't delay the next runner). - **Side-stream registration**: the `(GRAPHED, gtp_group)` ag/rs streams are materialized at runner init (`_register_gtp_side_streams`) so they are captured before the first forward. diff --git a/tests/unit_tests/generalized_tensor_parallel/test_gtp_cudagraph_grad.py b/tests/unit_tests/generalized_tensor_parallel/test_gtp_cudagraph_grad.py new file mode 100644 index 00000000000..944e544ea24 --- /dev/null +++ b/tests/unit_tests/generalized_tensor_parallel/test_gtp_cudagraph_grad.py @@ -0,0 +1,87 @@ +# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +"""Regression test for the GTP + CUDA-graph capture-step grad-norm bug. + +Bug: create_cudagraphs() runs after finalize_model_grads, so main_grad already holds the finalized +(reduced + per-token-scaled) grads. Capturing the fwd warmup and the bwd graph *executes* GTP's +wgrad main_grad.add_ while recording it -- including the cascade add into a param's cross-graph +``next_w`` (which lives in another module) -- clobbering the finalized grads and spiking the step's +grad norm. + +Fix: create_fwd_graph / create_bwd_graph snapshot the grads their capture touches via +``_backup_capture_grads`` and restore them after. This test exercises that helper pair directly: +the module's own params and their cross-graph ``next_w`` must survive a simulated capture clobber. +""" + +import torch + +from megatron.core.transformer.cuda_graphs import _backup_capture_grads, _restore_capture_grads + + +def _gtp_param(value: float, numel: int = 8) -> torch.nn.Parameter: + """A param with a finalized (reduced + scaled) main_grad, flagged as a GTP weight.""" + p = torch.nn.Parameter(torch.zeros(numel, device="cuda")) + p.is_gtp = True + p.main_grad = torch.full((numel,), value, device="cuda") + return p + + +class _Mod(torch.nn.Module): + def __init__(self, weight: torch.nn.Parameter): + super().__init__() + self.weight = weight + + +class _StubRunner: + """The ``base_module`` and ``gtp_remat`` attributes that ``_backup_capture_grads`` reads.""" + + def __init__(self, base_module: torch.nn.Module, gtp_remat: bool = True): + self.base_module = base_module + self.gtp_remat = gtp_remat + + +class TestGTPCaptureGradSnapshot: + def test_preserves_own_and_cross_graph_next_w(self): + """Snapshot/restore must keep both the module's own grad and its cross-graph next_w grad + (in another module) intact across a capture that clobbers them.""" + own = _gtp_param(0.0125) + cross = _gtp_param(0.02) # next_w lives in a different module/graph + own.next_w = cross + runner = _StubRunner(_Mod(own)) + + backup = _backup_capture_grads(runner) + own.main_grad.add_(410.0) # simulate the capture-time main_grad.add_ clobber + cross.main_grad.add_(99.0) + _restore_capture_grads(backup) + + torch.testing.assert_close(own.main_grad, torch.full((8,), 0.0125, device="cuda")) + torch.testing.assert_close(cross.main_grad, torch.full((8,), 0.02, device="cuda")) + + def test_routed_expert_next_w_via_weight_list(self): + """A routed-expert next_w exposes its grad-bearing shards via ``weight_list`` (read directly, + since the ``_weights`` property raises on non-leaders before capture).""" + own = _gtp_param(0.0125) + shard0, shard1 = _gtp_param(0.03), _gtp_param(0.04) + routed = torch.nn.Parameter(torch.zeros(8, device="cuda")) # leader wrapper (no own grad) + routed.is_routed_expert = True + routed.weight_list = [shard0, shard1] + own.next_w = routed + runner = _StubRunner(_Mod(own)) + + backup = _backup_capture_grads(runner) + shard0.main_grad.add_(50.0) + shard1.main_grad.add_(60.0) + _restore_capture_grads(backup) + + torch.testing.assert_close(shard0.main_grad, torch.full((8,), 0.03, device="cuda")) + torch.testing.assert_close(shard1.main_grad, torch.full((8,), 0.04, device="cuda")) + + def test_non_gtp_backs_up_own_params_only(self): + """Non-GTP runner: own params are snapshotted, but the GTP cross-graph next_w walk is + skipped (the bwd capture doesn't touch main_grad on the non-GTP path).""" + own = _gtp_param(0.0125) + cross = _gtp_param(0.02) + own.next_w = cross + backup = _backup_capture_grads(_StubRunner(_Mod(own), gtp_remat=False)) + assert id(own) in backup + assert id(cross) not in backup From 3c7aa6c00d38a4480b2e01208a2ec823c7fb2374 Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Thu, 25 Jun 2026 03:23:40 -0700 Subject: [PATCH 52/59] fix online UTs Signed-off-by: Shiqing Fan --- megatron/core/model_parallel_config.py | 29 +++++++++---------- megatron/experimental/gtp/README.md | 1 + .../test_gtp_cudagraph_grad.py | 5 ++++ .../test_gtp_dcp.py | 13 +++++---- 4 files changed, 26 insertions(+), 22 deletions(-) diff --git a/megatron/core/model_parallel_config.py b/megatron/core/model_parallel_config.py index cb880726a87..157ae1437f5 100644 --- a/megatron/core/model_parallel_config.py +++ b/megatron/core/model_parallel_config.py @@ -506,25 +506,22 @@ def __post_init__(self): if self.expert_tensor_parallel_size is None: self.expert_tensor_parallel_size = self.tensor_model_parallel_size - # Reconcile the user-facing tensor_parallel_num_weight_shards with the internal - # gtp_weight_remat_size (num_weight_shards = tensor_model_parallel_size * gtp_weight_remat). - (self.tensor_parallel_num_weight_shards, self.gtp_weight_remat_size) = ( - resolve_tensor_parallel_weight_shards( - self.tensor_model_parallel_size, - self.tensor_parallel_num_weight_shards, - self.gtp_weight_remat_size, - ) + # Derive the internal gtp_weight_remat_size from the user-facing + # tensor_parallel_num_weight_shards: + # num_weight_shards = tensor_model_parallel_size * gtp_weight_remat + _, self.gtp_weight_remat_size = resolve_tensor_parallel_weight_shards( + self.tensor_model_parallel_size, + self.tensor_parallel_num_weight_shards, + self.gtp_weight_remat_size, ) # Same reconciliation for expert layers (expert_tensor_parallel_size finalized above). - (self.expert_tensor_parallel_num_weight_shards, self.expert_gtp_weight_remat_size) = ( - resolve_tensor_parallel_weight_shards( - self.expert_tensor_parallel_size, - self.expert_tensor_parallel_num_weight_shards, - self.expert_gtp_weight_remat_size, - shards_field="expert_tensor_parallel_num_weight_shards", - tp_field="expert_tensor_parallel_size", - ) + _, self.expert_gtp_weight_remat_size = resolve_tensor_parallel_weight_shards( + self.expert_tensor_parallel_size, + self.expert_tensor_parallel_num_weight_shards, + self.expert_gtp_weight_remat_size, + shards_field="expert_tensor_parallel_num_weight_shards", + tp_field="expert_tensor_parallel_size", ) if self.pipeline_model_parallel_size > 1: diff --git a/megatron/experimental/gtp/README.md b/megatron/experimental/gtp/README.md index 0b162261dd3..542995fa9c3 100644 --- a/megatron/experimental/gtp/README.md +++ b/megatron/experimental/gtp/README.md @@ -412,6 +412,7 @@ torchrun --nproc-per-node 4 -m pytest tests/unit_tests/generalized_tensor_parall | `test_moe_egtp.py` | EGTP on MoE routed-expert weights. | | `test_gtp_loss_correctness.py` | End-to-end: GTP per-step loss trajectory matches a no-GTP baseline. | | `test_gtp_grad_correctness.py` | Gradient + dist-opt + grad-norm numeric parity vs a DP baseline at replicate (DP) > 1. | +| `test_gtp_cudagraph_grad.py` | Capture-step grad-norm guard (§1.2): `_backup_capture_grads`/`_restore_capture_grads` keep a graph capture from clobbering finalized `main_grad` (own params + cross-graph `next_w`, incl. routed-expert `weight_list`). | | `test_gtp_dcp.py` | Distributed-checkpoint sharding (§3.3): TP×GTP composite/cross-axis offsets, alignment-pad `allow_shape_mismatch`, cross-topology reshard metadata, and quantize-cache reset. | | `test_gtp_muon_dcp.py` | GTP + Muon (LayerWise) optimizer-state checkpoint roundtrip (§1.6): `replica_id` fold for GTP-replicated whole params (router, latent-proj). | diff --git a/tests/unit_tests/generalized_tensor_parallel/test_gtp_cudagraph_grad.py b/tests/unit_tests/generalized_tensor_parallel/test_gtp_cudagraph_grad.py index 944e544ea24..8a283d5830e 100644 --- a/tests/unit_tests/generalized_tensor_parallel/test_gtp_cudagraph_grad.py +++ b/tests/unit_tests/generalized_tensor_parallel/test_gtp_cudagraph_grad.py @@ -13,9 +13,14 @@ the module's own params and their cross-graph ``next_w`` must survive a simulated capture clobber. """ +import pytest import torch from megatron.core.transformer.cuda_graphs import _backup_capture_grads, _restore_capture_grads +from megatron.experimental.gtp import HAVE_GTP + +if not HAVE_GTP: + pytest.skip("GTP requires TE with hook registry", allow_module_level=True) def _gtp_param(value: float, numel: int = 8) -> torch.nn.Parameter: diff --git a/tests/unit_tests/generalized_tensor_parallel/test_gtp_dcp.py b/tests/unit_tests/generalized_tensor_parallel/test_gtp_dcp.py index 5334b091ed6..88b0e162960 100644 --- a/tests/unit_tests/generalized_tensor_parallel/test_gtp_dcp.py +++ b/tests/unit_tests/generalized_tensor_parallel/test_gtp_dcp.py @@ -14,16 +14,20 @@ import torch.distributed as dist from megatron.core.dist_checkpointing import ShardedTensor -from megatron.experimental.gtp import ( +from megatron.experimental.gtp import HAVE_GTP + +if not HAVE_GTP: + pytest.skip("GTP requires TE with hook registry", allow_module_level=True) + +from megatron.experimental.gtp import ( # noqa: E402 GTP_CONFIG, - HAVE_GTP, GTPShardedParam, make_sharded_tensors_for_checkpoint_with_gtp, reset_gtp_quantize_cache, update_gtp_config, wrap_module_params_gtp, ) -from tests.unit_tests.test_utilities import Utils +from tests.unit_tests.test_utilities import Utils # noqa: E402 @pytest.fixture(autouse=True) @@ -38,9 +42,6 @@ def _no_pad_alignment(): update_gtp_config(pad_for_alignment=orig) -pytestmark = pytest.mark.skipif(not HAVE_GTP, reason="GTP requires TE with hook registry") - - @pytest.fixture(scope="module", autouse=True) def _torchrun_dist_init(): Utils.initialize_model_parallel() From 1b066a5a28610b7bba22ba15b99e444a301fa6eb Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Thu, 25 Jun 2026 19:24:49 -0700 Subject: [PATCH 53/59] Simplify GTP grad-norm fix: drop unnecessary bwd-graph backup Signed-off-by: Shiqing Fan --- megatron/core/transformer/cuda_graphs.py | 30 ++++----------- megatron/experimental/gtp/README.md | 4 +- .../test_gtp_cudagraph_grad.py | 37 ++++++++++--------- 3 files changed, 29 insertions(+), 42 deletions(-) diff --git a/megatron/core/transformer/cuda_graphs.py b/megatron/core/transformer/cuda_graphs.py index f0f8a128a76..414b9603b56 100644 --- a/megatron/core/transformer/cuda_graphs.py +++ b/megatron/core/transformer/cuda_graphs.py @@ -382,17 +382,9 @@ def _ensure_generator_state_is_cudagraph_safe(gen: torch.Generator) -> torch.Gen bwd_buffer_reuse_ref_count = 0 -def _backup_capture_grads(runner): - """Clone main_grad for everything a graph capture re-accumulates into, so capturing stays - side-effect-free on the finalized grads (capture *executes* the recorded main_grad.add_). - - Used by both create_fwd_graph (warmup) and create_bwd_graph; restore with - ``_restore_capture_grads``. - - The backward always writes main_grad for the module's own params, so those are backed up - unconditionally. Under GTP there is one extra target: the cascade also adds into a param's - cross-graph ``next_w``, which lives in another module and so isn't among this module's own - params. +def _backup_grads_before_capture(runner): + """Snapshot main_grad so create_fwd_graph's eager warmup can't corrupt the finalized grads; + restore with ``_restore_grads_after_capture``. """ backup = {} for p in runner.base_module.parameters(): @@ -414,8 +406,8 @@ def _backup_capture_grads(runner): return backup -def _restore_capture_grads(backup): - """Restore the main_grad snapshots taken by ``_backup_capture_grads``.""" +def _restore_grads_after_capture(backup): + """Restore the main_grad snapshots taken by ``_backup_grads_before_capture``.""" for p, saved in backup.values(): p.main_grad.copy_(saved) @@ -1065,7 +1057,7 @@ def create_fwd_graph(self, args, kwargs, outputs=None, clone_inputs=True): for buf in self.base_module.buffers(): buffer_backup.append(buf.clone()) - grad_backup = _backup_capture_grads(self) + grad_backup = _backup_grads_before_capture(self) saved_fp8_tensors = None if self.fp8_enabled: @@ -1281,7 +1273,7 @@ def clone_ten(ten): if self.fp8_enabled: restore_fp8_tensors([self.base_module], saved_fp8_tensors) # restore cached grads - _restore_capture_grads(grad_backup) + _restore_grads_after_capture(grad_backup) # restore cached buffers for buf_copy, buf in zip(buffer_backup, self.base_module.buffers()): @@ -1338,11 +1330,6 @@ def create_bwd_graph(self): if FREEZE_GC: gc.freeze() - # GTP's wgrad add runs inside the bwd graph, so capturing it executes a main_grad.add_ - # that would clobber the finalized grads; snapshot what it touches and restore below. - # (Non-GTP returns wgrads as graph outputs and accumulates outside, so nothing to guard.) - grad_backup = _backup_capture_grads(self) if self.gtp_remat else {} - with torch.cuda.graph(self.bwd_graph, pool=self.mempool): self._sync_against_side_streams(self.bwd_side_streams) @@ -1406,9 +1393,6 @@ def create_bwd_graph(self): if FREEZE_GC: gc.unfreeze() - # restore cached grads - _restore_capture_grads(grad_backup) - # See _compute_finalized_during_bwd_capture for what's in this set and why. self.finalized_during_bwd_capture = ( self._compute_finalized_during_bwd_capture() if self.gtp_remat else [] diff --git a/megatron/experimental/gtp/README.md b/megatron/experimental/gtp/README.md index 542995fa9c3..4f48c35e870 100644 --- a/megatron/experimental/gtp/README.md +++ b/megatron/experimental/gtp/README.md @@ -57,7 +57,7 @@ CG compatibility is designed-in from day one, not retrofitted. The entire sync / - **Allocate-in-pool at creation** (`set_cuda_graph_mempool` + `_graphed_alloc`): GRAPHED-chain AG/RS buffers and quantized weight storage are allocated **directly into the CG memory pool** at first creation (during warmup, before capture), so no CUDA allocations happen inside the captured graph — and no post-hoc reallocation/clone is needed. UNGRAPHED buffers stay in regular allocator memory. - **Lazy, one-shot chain linking**: `prefetch_initialized` is flipped during the first fwd (warmup), so the chain-construction Python side-effects never execute inside a captured graph. The link table is buffered and flushed atomically at the second forward. - **DDP hook manual triggering**: `register_grad_accum_hook` stores the DDP hook on the param; `_CudagraphReplayNode.backward` calls it manually after replay (since `AccumulateGrad` hooks are silenced by replay). This is also how the `assert self.grad_reduce_handle is not None` failure from partial-CG + overlap-grad-reduce is resolved. -- **Capture is side-effect-free on `main_grad`**: GTP accumulates wgrad into `main_grad` *inside* the graph — unlike the gradient-accumulation-fusion path, which returns wgrads as graph outputs and accumulates outside. So both the fwd-graph warmup and the bwd-graph capture *execute* that `main_grad.add_` while recording it, including the cross-graph `next_w` add whose target lives in another module. Because capture (`create_cudagraphs()`) runs *after* `finalize_model_grads`, this would overwrite the already reduced + per-token-scaled grads with raw unscaled wgrads, spiking that step's grad norm and over-clipping it. **Fix**: `create_fwd_graph` and `create_bwd_graph` each snapshot the grads their capture touches (the module's own params + cross-graph `next_w`) via `_backup_capture_grads` before capturing and restore them after, so the `add_` is still recorded for replay while the finalized grads survive. Bounded to one module's grads at a time. +- **Warmup is side-effect-free on `main_grad`**: GTP accumulates wgrad into `main_grad` *inside* the backward (the fusion path returns wgrads as graph outputs instead). Graph capture only *records* ops; it never runs them. But `create_fwd_graph` runs an **eager** warmup fwd+bwd before capturing. That warmup backward executes GTP's `main_grad.add_`. Its deferred cascade adds into a cross-graph `next_w` (another module) from a **stale RS ticket** — the prior backward's wgrad. And `create_cudagraphs()` runs *after* `finalize_model_grads`. So this overwrites the finalized (reduced + per-token-scaled) grads and spikes the step's grad norm. **Fix**: `create_fwd_graph` snapshots the grads its warmup touches — own params + cross-graph `next_w` — via `_backup_grads_before_capture`, then restores them after capture. The bwd graph has no warmup, so it needs none. Bounded to one module's grads. - **Drains at CG / eager boundary**: `_drain_gtp_side_streams()` before eager MoE expert compute. Inside bwd capture, two-phase drain: Phase 1 joins the within-graph cascade and records `bwd_completion_event` (next runner unblocks); Phase 2 calls `wait_async_comms(GRAPHED)` to drain the chain-tail handle and re-joins side streams (queued after the event so it doesn't delay the next runner). - **Side-stream registration**: the `(GRAPHED, gtp_group)` ag/rs streams are materialized at runner init (`_register_gtp_side_streams`) so they are captured before the first forward. @@ -412,7 +412,7 @@ torchrun --nproc-per-node 4 -m pytest tests/unit_tests/generalized_tensor_parall | `test_moe_egtp.py` | EGTP on MoE routed-expert weights. | | `test_gtp_loss_correctness.py` | End-to-end: GTP per-step loss trajectory matches a no-GTP baseline. | | `test_gtp_grad_correctness.py` | Gradient + dist-opt + grad-norm numeric parity vs a DP baseline at replicate (DP) > 1. | -| `test_gtp_cudagraph_grad.py` | Capture-step grad-norm guard (§1.2): `_backup_capture_grads`/`_restore_capture_grads` keep a graph capture from clobbering finalized `main_grad` (own params + cross-graph `next_w`, incl. routed-expert `weight_list`). | +| `test_gtp_cudagraph_grad.py` | Capture-step grad-norm guard (§1.2): `_backup_grads_before_capture`/`_restore_grads_after_capture` keep a graph capture from clobbering finalized `main_grad` (own params + cross-graph `next_w`, incl. routed-expert `weight_list`). | | `test_gtp_dcp.py` | Distributed-checkpoint sharding (§3.3): TP×GTP composite/cross-axis offsets, alignment-pad `allow_shape_mismatch`, cross-topology reshard metadata, and quantize-cache reset. | | `test_gtp_muon_dcp.py` | GTP + Muon (LayerWise) optimizer-state checkpoint roundtrip (§1.6): `replica_id` fold for GTP-replicated whole params (router, latent-proj). | diff --git a/tests/unit_tests/generalized_tensor_parallel/test_gtp_cudagraph_grad.py b/tests/unit_tests/generalized_tensor_parallel/test_gtp_cudagraph_grad.py index 8a283d5830e..cd5ae315da0 100644 --- a/tests/unit_tests/generalized_tensor_parallel/test_gtp_cudagraph_grad.py +++ b/tests/unit_tests/generalized_tensor_parallel/test_gtp_cudagraph_grad.py @@ -3,20 +3,23 @@ """Regression test for the GTP + CUDA-graph capture-step grad-norm bug. Bug: create_cudagraphs() runs after finalize_model_grads, so main_grad already holds the finalized -(reduced + per-token-scaled) grads. Capturing the fwd warmup and the bwd graph *executes* GTP's -wgrad main_grad.add_ while recording it -- including the cascade add into a param's cross-graph -``next_w`` (which lives in another module) -- clobbering the finalized grads and spiking the step's -grad norm. - -Fix: create_fwd_graph / create_bwd_graph snapshot the grads their capture touches via -``_backup_capture_grads`` and restore them after. This test exercises that helper pair directly: -the module's own params and their cross-graph ``next_w`` must survive a simulated capture clobber. +(reduced + per-token-scaled) grads. create_fwd_graph then runs an eager warmup backward (graph +capture only records ops, it doesn't run them), and that eager backward executes GTP's wgrad +main_grad.add_ -- including the cascade add into a param's cross-graph ``next_w`` (in another +module, via a stale RS ticket) -- clobbering the finalized grads and spiking the step's grad norm. + +Fix: create_fwd_graph snapshots the grads its warmup touches via ``_backup_grads_before_capture`` +and restores them after. This test exercises that helper pair directly: the module's own params +and their cross-graph ``next_w`` must survive a simulated warmup clobber. """ import pytest import torch -from megatron.core.transformer.cuda_graphs import _backup_capture_grads, _restore_capture_grads +from megatron.core.transformer.cuda_graphs import ( + _backup_grads_before_capture, + _restore_grads_after_capture, +) from megatron.experimental.gtp import HAVE_GTP if not HAVE_GTP: @@ -38,7 +41,7 @@ def __init__(self, weight: torch.nn.Parameter): class _StubRunner: - """The ``base_module`` and ``gtp_remat`` attributes that ``_backup_capture_grads`` reads.""" + """The ``base_module`` and ``gtp_remat`` attrs that ``_backup_grads_before_capture`` reads.""" def __init__(self, base_module: torch.nn.Module, gtp_remat: bool = True): self.base_module = base_module @@ -54,17 +57,17 @@ def test_preserves_own_and_cross_graph_next_w(self): own.next_w = cross runner = _StubRunner(_Mod(own)) - backup = _backup_capture_grads(runner) + backup = _backup_grads_before_capture(runner) own.main_grad.add_(410.0) # simulate the capture-time main_grad.add_ clobber cross.main_grad.add_(99.0) - _restore_capture_grads(backup) + _restore_grads_after_capture(backup) torch.testing.assert_close(own.main_grad, torch.full((8,), 0.0125, device="cuda")) torch.testing.assert_close(cross.main_grad, torch.full((8,), 0.02, device="cuda")) def test_routed_expert_next_w_via_weight_list(self): - """A routed-expert next_w exposes its grad-bearing shards via ``weight_list`` (read directly, - since the ``_weights`` property raises on non-leaders before capture).""" + """A routed-expert next_w exposes its shards via ``weight_list`` (read directly, since the + ``_weights`` property raises on non-leaders before capture).""" own = _gtp_param(0.0125) shard0, shard1 = _gtp_param(0.03), _gtp_param(0.04) routed = torch.nn.Parameter(torch.zeros(8, device="cuda")) # leader wrapper (no own grad) @@ -73,10 +76,10 @@ def test_routed_expert_next_w_via_weight_list(self): own.next_w = routed runner = _StubRunner(_Mod(own)) - backup = _backup_capture_grads(runner) + backup = _backup_grads_before_capture(runner) shard0.main_grad.add_(50.0) shard1.main_grad.add_(60.0) - _restore_capture_grads(backup) + _restore_grads_after_capture(backup) torch.testing.assert_close(shard0.main_grad, torch.full((8,), 0.03, device="cuda")) torch.testing.assert_close(shard1.main_grad, torch.full((8,), 0.04, device="cuda")) @@ -87,6 +90,6 @@ def test_non_gtp_backs_up_own_params_only(self): own = _gtp_param(0.0125) cross = _gtp_param(0.02) own.next_w = cross - backup = _backup_capture_grads(_StubRunner(_Mod(own), gtp_remat=False)) + backup = _backup_grads_before_capture(_StubRunner(_Mod(own), gtp_remat=False)) assert id(own) in backup assert id(cross) not in backup From 7d7e8c3aa369d329480b6f6f1bd6cc786a502494 Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Thu, 25 Jun 2026 21:12:17 -0700 Subject: [PATCH 54/59] Move GTP from megatron.experimental into megatron.core Make megatron.core self-contained: it must not import from megatron.experimental, which is not shipped with the core wheel. Signed-off-by: Shiqing Fan --- .../core/generalized_tensor_parallel.md | 16 ++--- docs/api-guide/core/index.md | 1 + .../0525_gtp_mcore_te_architecture.png | Bin .../0611_ddp_egtp_orthogonal_bucketing.png | Bin .../0612_gtp_dcp_tp2gtp2_save_load.png | Bin .../0613_gtp_dcp_save_call_workflow.png | Bin .../0617_gtp64_weak_scaling_efficiency.png | Bin .../distributed/distributed_data_parallel.py | 4 +- .../core/distributed/finalize_model_grads.py | 2 +- .../core/extensions/transformer_engine.py | 2 +- megatron/core/optimizer/__init__.py | 5 +- .../core/optimizer/layer_wise_optimizer.py | 2 +- megatron/core/optimizer/optimizer.py | 2 +- megatron/core/parallel_state.py | 3 +- megatron/core/ssm/mamba_mixer.py | 7 +- .../generalized_tensor_parallelism.py | 6 +- megatron/core/tensor_parallel/layers.py | 8 +-- megatron/core/transformer/cuda_graphs.py | 8 +-- megatron/core/transformer/utils.py | 4 +- megatron/core/utils.py | 8 +-- megatron/experimental/__init__.py | 1 - megatron/experimental/gtp/__init__.py | 63 ------------------ megatron/training/arguments.py | 2 +- megatron/training/initialize.py | 2 +- megatron/training/training.py | 8 +-- pyproject.toml | 5 -- .../gtp_test_utils.py | 2 +- .../test_attention_gtp.py | 4 +- .../generalized_tensor_parallel/test_gtp.py | 11 +-- .../test_gtp_cudagraph_grad.py | 2 +- .../test_gtp_dcp.py | 12 ++-- .../test_gtp_grad_correctness.py | 4 +- .../test_gtp_loss_correctness.py | 4 +- .../test_gtp_muon_dcp.py | 4 +- .../test_mamba_gtp.py | 4 +- .../test_moe_egtp.py | 4 +- .../test_tp_gtp.py | 4 +- 37 files changed, 76 insertions(+), 138 deletions(-) rename megatron/experimental/gtp/README.md => docs/api-guide/core/generalized_tensor_parallel.md (97%) rename {megatron/experimental/gtp/images => docs/images/generalized_tensor_parallel}/0525_gtp_mcore_te_architecture.png (100%) rename {megatron/experimental/gtp/images => docs/images/generalized_tensor_parallel}/0611_ddp_egtp_orthogonal_bucketing.png (100%) rename {megatron/experimental/gtp/images => docs/images/generalized_tensor_parallel}/0612_gtp_dcp_tp2gtp2_save_load.png (100%) rename {megatron/experimental/gtp/images => docs/images/generalized_tensor_parallel}/0613_gtp_dcp_save_call_workflow.png (100%) rename {megatron/experimental/gtp/images => docs/images/generalized_tensor_parallel}/0617_gtp64_weak_scaling_efficiency.png (100%) rename megatron/{experimental/gtp => core/tensor_parallel}/generalized_tensor_parallelism.py (99%) delete mode 100644 megatron/experimental/__init__.py delete mode 100644 megatron/experimental/gtp/__init__.py diff --git a/megatron/experimental/gtp/README.md b/docs/api-guide/core/generalized_tensor_parallel.md similarity index 97% rename from megatron/experimental/gtp/README.md rename to docs/api-guide/core/generalized_tensor_parallel.md index 4f48c35e870..5a748d5c36e 100644 --- a/megatron/experimental/gtp/README.md +++ b/docs/api-guide/core/generalized_tensor_parallel.md @@ -6,7 +6,7 @@ **Scope**: a high-level summary of GTP — design intent, public CLI surface, and Megatron-LM ↔ TransformerEngine integration touchpoints. -Core implementation: `megatron/experimental/gtp/generalized_tensor_parallelism.py`. The public surface is re-exported from `megatron/experimental/gtp/__init__.py`. Low-precision tensor primitives (FP8 / MXFP8 / NVFP4) remain in TransformerEngine and are imported by `generalized_tensor_parallelism.py`. +Core implementation: `megatron/core/tensor_parallel/generalized_tensor_parallelism.py`. The public surface is re-exported from `megatron/core/tensor_parallel/gtp.py`. Low-precision tensor primitives (FP8 / MXFP8 / NVFP4) remain in TransformerEngine and are imported by `generalized_tensor_parallelism.py`. **Outline:** @@ -150,7 +150,7 @@ On an Ultra-proxy hybrid Mamba-MoE model (**~280B parameters**; `GTP64 · EP64 > **Takeaway:** near-flat weak scaling — **≥93 % efficiency from 128 → 3072 GPUs**, with per-GPU memory shrinking as DP grows. -![GTP64 weak-scaling efficiency](images/0617_gtp64_weak_scaling_efficiency.png) +![GTP64 weak-scaling efficiency](../../images/generalized_tensor_parallel/0617_gtp64_weak_scaling_efficiency.png) ### 1.8 Native distributed checkpointing (DCP) @@ -228,7 +228,7 @@ GTP enabled. GTPConfig(pad_for_alignment=16, check_param_states=False, ### 2.4 Tuning knobs -Set via `from megatron.experimental.gtp import GTP_CONFIG, update_gtp_config`: +Set via `from megatron.core.tensor_parallel.gtp import GTP_CONFIG, update_gtp_config`: ```python update_gtp_config( @@ -249,7 +249,7 @@ update_gtp_config( ### 3.1 GTP architecture (Mcore ↔ TE integration) -![GTP / Mcore-TE integration architecture](images/0525_gtp_mcore_te_architecture.png) +![GTP / Mcore-TE integration architecture](../../images/generalized_tensor_parallel/0525_gtp_mcore_te_architecture.png) TransformerEngine owns the linear primitives (`Linear` / `LayerNormLinear` / `LayerNormMLP` / `GroupedLinear`) and the low-precision tensor types (FP8 / MXFP8 / NVFP4). Megatron-LM owns the GTP scheduling state — the prefetch chain, the ticket-based buffer cache, the per-param AG/RS state machines, the GRAPHED/UNGRAPHED chain split, and the DDP integration. The two are bridged by: @@ -260,7 +260,7 @@ TransformerEngine owns the linear primitives (`Linear` / `LayerNormLinear` / `La #### What the flags do under the hood 1. `parallel_state.initialize_model_parallel(...)` treats GTP/EGTP as **first-class orthogonal axes** (`world_size = TP*GTP*CP*DP`, and the expert grid `= ETP*EP*EGTP*PP*expert_dp`). It builds the shard groups `_GTP_WEIGHT_REMAT_GROUP` (size = `--tensor-parallel-num-weight-shards / --tensor-model-parallel-size`) and `_EXPERT_GTP_WEIGHT_REMAT_GROUP` (size = `--expert-tensor-parallel-num-weight-shards / --expert-tensor-parallel-size`), plus the gtp/egtp-EXCLUDED replicate DP groups (`_DATA_PARALLEL_GROUP_NO_GTP`, `_EXPERT_DATA_PARALLEL_GROUP_NO_GTP`) that DDP and the optimizer shard over. These `*_no_gtp` groups alias the regular DP groups when GTP is inactive (remat size 1). -2. Megatron's `extensions/transformer_engine.py` reads `pg_collection.gtp` / `pg_collection.expt_gtp` and forwards them as the `gtp_group=` kwarg to `te.Linear` / `te.LayerNormLinear` / `te.GroupedLinear`. TE's `module/base.py` calls back into `megatron.experimental.gtp` via the hook registry (`register_gtp_hooks`) to slice each weight at `reset_parameters` time. +2. Megatron's `extensions/transformer_engine.py` reads `pg_collection.gtp` / `pg_collection.expt_gtp` and forwards them as the `gtp_group=` kwarg to `te.Linear` / `te.LayerNormLinear` / `te.GroupedLinear`. TE's `module/base.py` calls back into `megatron.core.tensor_parallel.gtp` via the hook registry (`register_gtp_hooks`) to slice each weight at `reset_parameters` time. 3. DDP treats GTP shards as ordinary params: they go into the same dense / expert buffers as everything else, reduced over the gtp/egtp-EXCLUDED replicate group (`intra_dp_cp_no_gtp_group` / `intra_expt_dp_no_egtp_group`) with the standard `1/full` scaling. The gtp axis is completed elsewhere — GTP shards by their reduce-scatter sum, replicated (non-GTP) params by a SUM all-reduce in `finalize_model_grads`. See §3.2. 4. Optimizer state is sharded over the same replicate group; clip-by-global-norm reduces squared norms over the dist-opt grad-stats group, which spans the full world (including the gtp/egtp axis), with replicated non-GTP params counted once per gtp/egtp axis to avoid over-counting. 5. `classify_gtp_chains(model)` runs once after model build (in `training.py`'s `get_model`) and wires each `GTPShardedParam` into a `GRAPHED` or `UNGRAPHED` prefetch chain based on the active `cuda_graph_modules`. @@ -323,7 +323,7 @@ Under **full-iteration CUDA graphs** the recompute-forward is captured; `wait_as ### 3.2 DDP buckets with (E)GTP -![DDP + (E)GTP interaction with the distributed optimizer](images/0611_ddp_egtp_orthogonal_bucketing.png) +![DDP + (E)GTP interaction with the distributed optimizer](../../images/generalized_tensor_parallel/0611_ddp_egtp_orthogonal_bucketing.png) **(E)GTP is *super loosely coupled* to DDP and the distributed optimizer — they stay completely GTP-agnostic.** GTP is just another sub-axis of the rank grid (`world = TP×GTP×CP×DP`); a GTP-sharded weight rides the *exact same* code path as an ordinary param. There are **no** GTP/EGTP-specific buffers, optimizers, gradient-scaling factors, or bucket groups. The entire DDP/DistOpt stack touches GTP in only **three** narrow places: @@ -359,7 +359,7 @@ Why this is correct — the gtp axis is completed in two complementary ways, so ### 3.3 Distributed checkpointing (DCP) -![GTP + DCP save/load reshard for a TP2×GTP2 weight](images/0612_gtp_dcp_tp2gtp2_save_load.png) +![GTP + DCP save/load reshard for a TP2×GTP2 weight](../../images/generalized_tensor_parallel/0612_gtp_dcp_tp2gtp2_save_load.png) GTP supports **PyTorch / Mcore sharded distributed checkpointing** (`--ckpt-format torch_dist`, the `megatron.core.dist_checkpointing` `ShardedTensor` / `ShardedObject` format) for **both model weights and distributed-optimizer state**. Checkpoints are **fully resharding-capable**: a checkpoint saved at one `(TP, GTP, EGTP, DP, PP)` topology can be loaded at a *different* one — including a different GTP/EGTP size — without an offline conversion step. @@ -367,7 +367,7 @@ Consistent with §3.2, GTP stays *loosely coupled* to the checkpoint stack: ther **Save-side call workflow.** The diagram below traces the save path — from `model.sharded_state_dict()` through the `make_*` helpers down to the terminal `ShardedTensor` / `ShardedObject` sinks. The GTP footprint is deliberately tiny: exactly **one new function** (`make_sharded_tensors_for_checkpoint_with_gtp`, in `gtp.py`, which sets `replica_id` for the GTP-*duplicated* entries) plus **one modified function** (the per-tensor `make_tp_sharded_tensor_for_checkpoint` in `core/utils.py`, made GTP-aware in place to emit the GTP-*sharded* offsets). Every other helper is untouched. -![GTP + DCP checkpoint-save call workflow](images/0613_gtp_dcp_save_call_workflow.png) +![GTP + DCP checkpoint-save call workflow](../../images/generalized_tensor_parallel/0613_gtp_dcp_save_call_workflow.png) **How a GTP weight is described to DCP.** GTP always shards `out_features` (axis 0). The helper layers that GTP split onto the existing TP offsets in the `ShardedTensor`, so the global tensor DCP sees is the *full, unsharded* weight: diff --git a/docs/api-guide/core/index.md b/docs/api-guide/core/index.md index 0d39e46e744..af22af6c6e0 100644 --- a/docs/api-guide/core/index.md +++ b/docs/api-guide/core/index.md @@ -16,6 +16,7 @@ Low-level API reference for core Megatron components. transformer tensor_parallel +generalized_tensor_parallel pipeline_parallel fusions distributed diff --git a/megatron/experimental/gtp/images/0525_gtp_mcore_te_architecture.png b/docs/images/generalized_tensor_parallel/0525_gtp_mcore_te_architecture.png similarity index 100% rename from megatron/experimental/gtp/images/0525_gtp_mcore_te_architecture.png rename to docs/images/generalized_tensor_parallel/0525_gtp_mcore_te_architecture.png diff --git a/megatron/experimental/gtp/images/0611_ddp_egtp_orthogonal_bucketing.png b/docs/images/generalized_tensor_parallel/0611_ddp_egtp_orthogonal_bucketing.png similarity index 100% rename from megatron/experimental/gtp/images/0611_ddp_egtp_orthogonal_bucketing.png rename to docs/images/generalized_tensor_parallel/0611_ddp_egtp_orthogonal_bucketing.png diff --git a/megatron/experimental/gtp/images/0612_gtp_dcp_tp2gtp2_save_load.png b/docs/images/generalized_tensor_parallel/0612_gtp_dcp_tp2gtp2_save_load.png similarity index 100% rename from megatron/experimental/gtp/images/0612_gtp_dcp_tp2gtp2_save_load.png rename to docs/images/generalized_tensor_parallel/0612_gtp_dcp_tp2gtp2_save_load.png diff --git a/megatron/experimental/gtp/images/0613_gtp_dcp_save_call_workflow.png b/docs/images/generalized_tensor_parallel/0613_gtp_dcp_save_call_workflow.png similarity index 100% rename from megatron/experimental/gtp/images/0613_gtp_dcp_save_call_workflow.png rename to docs/images/generalized_tensor_parallel/0613_gtp_dcp_save_call_workflow.png diff --git a/megatron/experimental/gtp/images/0617_gtp64_weak_scaling_efficiency.png b/docs/images/generalized_tensor_parallel/0617_gtp64_weak_scaling_efficiency.png similarity index 100% rename from megatron/experimental/gtp/images/0617_gtp64_weak_scaling_efficiency.png rename to docs/images/generalized_tensor_parallel/0617_gtp64_weak_scaling_efficiency.png diff --git a/megatron/core/distributed/distributed_data_parallel.py b/megatron/core/distributed/distributed_data_parallel.py index 76c77510401..0f9c3e424cf 100644 --- a/megatron/core/distributed/distributed_data_parallel.py +++ b/megatron/core/distributed/distributed_data_parallel.py @@ -95,8 +95,8 @@ def __init__( # (GTP-excluded group). Data-parallel gradient reductions over the GTP group are completed # separately in the model backward pass. # - # See Section 3.2 in `gtp/README.md` for more details (including why - # average_in_collective=False). + # See Section 3.2 in `docs/api-guide/core/generalized_tensor_parallel.md` + # for more details (including why average_in_collective=False). # # When GTP is disabled, the *_no_gtp groups alias the regular DP groups. self.intra_dp_cp_group = process_group_dict.get( diff --git a/megatron/core/distributed/finalize_model_grads.py b/megatron/core/distributed/finalize_model_grads.py index 678595c32f8..053c3902b15 100644 --- a/megatron/core/distributed/finalize_model_grads.py +++ b/megatron/core/distributed/finalize_model_grads.py @@ -540,7 +540,7 @@ def finalize_model_grads( # Fence the current stream against all GTP backward grad work before the DP gradient sync. if config.gtp_weight_remat_size > 1 or config.expert_gtp_weight_remat_size > 1: - from megatron.experimental.gtp import wait_for_gtp_grad_reduction_on_current_stream + from megatron.core.tensor_parallel.gtp import wait_for_gtp_grad_reduction_on_current_stream wait_for_gtp_grad_reduction_on_current_stream() diff --git a/megatron/core/extensions/transformer_engine.py b/megatron/core/extensions/transformer_engine.py index d852ceed5fd..50c9d5c655a 100644 --- a/megatron/core/extensions/transformer_engine.py +++ b/megatron/core/extensions/transformer_engine.py @@ -388,7 +388,7 @@ def _maybe_setup_gtp(module, gtp_group, extra_kwargs): """ if gtp_group is None or gtp_group.size() <= 1: return - from megatron.experimental.gtp import HAVE_GTP + from megatron.core.tensor_parallel.gtp import HAVE_GTP assert HAVE_GTP, ( "GTP requires TransformerEngine >= 2.17. " diff --git a/megatron/core/optimizer/__init__.py b/megatron/core/optimizer/__init__.py index 2af74cbd38c..e10487a9b34 100644 --- a/megatron/core/optimizer/__init__.py +++ b/megatron/core/optimizer/__init__.py @@ -1204,8 +1204,9 @@ def get_megatron_optimizer( param_group_id += 1 if len(moe_param_groups) > 0: # Expert analog of the dense ``model_parallel_rank`` above: the EGTP-merged group gives - # each EGTP peer a distinct distopt ShardedObject key. See gtp/README.md §3.3 (Optimizer - # state) for why the non-merged ``expt_tp_pp_group`` would cause a DCP "duplicate" error. + # each EGTP peer a distinct distopt ShardedObject key. See + # docs/api-guide/core/generalized_tensor_parallel.md §3.3 (Optimizer state) for why + # the non-merged ``expt_tp_pp_group`` would cause a DCP "duplicate" error. expt_model_parallel_rank = get_pg_rank(expt_tp_pp_with_egtp_group) # Gloo expert-DP group for the optimizer, only when (E)GTP is inactive. When active the # optimizer shards over the egtp-EXCLUDED (no_egtp) replicate group, which has no Gloo diff --git a/megatron/core/optimizer/layer_wise_optimizer.py b/megatron/core/optimizer/layer_wise_optimizer.py index 179c16c0062..48e73a5a3f0 100644 --- a/megatron/core/optimizer/layer_wise_optimizer.py +++ b/megatron/core/optimizer/layer_wise_optimizer.py @@ -105,7 +105,7 @@ def _build_gtp_replica_fold(pg_collection, model_chunks) -> Dict[str, Tuple[int, """ gtp_fold: Dict[str, Tuple[int, int]] = {} try: - from megatron.experimental.gtp import HAVE_GTP, GTPShardedParam + from megatron.core.tensor_parallel.gtp import HAVE_GTP, GTPShardedParam except ImportError: return gtp_fold if not HAVE_GTP: diff --git a/megatron/core/optimizer/optimizer.py b/megatron/core/optimizer/optimizer.py index 0913400d50e..ebbb5138284 100644 --- a/megatron/core/optimizer/optimizer.py +++ b/megatron/core/optimizer/optimizer.py @@ -794,7 +794,7 @@ def _backfill_gtp_sharded_param_map(id_to_sharded_param_map: dict, float16_group """ try: from megatron.core import parallel_state - from megatron.experimental.gtp import ( + from megatron.core.tensor_parallel.gtp import ( GTPShardedParam, make_sharded_tensors_for_checkpoint_with_gtp, ) diff --git a/megatron/core/parallel_state.py b/megatron/core/parallel_state.py index 8a34e21ddd9..8509ec66f41 100644 --- a/megatron/core/parallel_state.py +++ b/megatron/core/parallel_state.py @@ -1414,7 +1414,8 @@ def _inject_gtp(order_str: str, after: str = "tp") -> str: # Expert+tensor+pipeline group merged across EGTP peers — expert analog of the dense # _MODEL_PARALLEL_GROUP merge (above). The 'tp-ep-gtp-pp' token spans the egtp axis; with # expert_gtp_remat_size=1 it reduces to the plain tp-ep-pp groups. Merging gives EGTP peers - # distinct ranks; see gtp/README.md §3.3 (Optimizer state) for the DCP-collision rationale. + # distinct ranks; see docs/api-guide/core/generalized_tensor_parallel.md §3.3 + # (Optimizer state) for the DCP-collision rationale. global _EXPERT_TENSOR_MODEL_PIPELINE_PARALLEL_GROUP_WITH_EGTP for ranks in expert_decoder_rank_generator.get_ranks('tp-ep-gtp-pp'): group = create_group( diff --git a/megatron/core/ssm/mamba_mixer.py b/megatron/core/ssm/mamba_mixer.py index e456ff0ea76..40dd5d348c1 100644 --- a/megatron/core/ssm/mamba_mixer.py +++ b/megatron/core/ssm/mamba_mixer.py @@ -30,6 +30,7 @@ from megatron.core.ssm.ops.causal_conv1d_triton import causal_conv1d_update from megatron.core.ssm.ops.mamba_ssm import selective_state_update from megatron.core.tensor_parallel import get_cuda_rng_tracker +from megatron.core.tensor_parallel.gtp import HAVE_GTP from megatron.core.transformer import TransformerConfig from megatron.core.transformer.module import MegatronModule from megatron.core.transformer.spec_utils import ModuleSpec, build_module @@ -47,10 +48,9 @@ log_single_rank, make_tp_sharded_tensor_for_checkpoint, ) -from megatron.experimental.gtp import HAVE_GTP if HAVE_GTP: - from megatron.experimental.gtp import GTPShardedParam + from megatron.core.tensor_parallel.gtp import GTPShardedParam else: GTPShardedParam = None @@ -1415,7 +1415,8 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None): 0, ) - # GTP load-side inverse of the save-time all-gather (see gtp/README.md §3.3, in_proj + # GTP load-side inverse of the save-time all-gather (see + # docs/api-guide/core/generalized_tensor_parallel.md §3.3, in_proj # note): the checkpoint stores the FULL TP-local in_proj.weight (pad stripped) under the # 5 split keys [z|x|B|C|dt], so the default merge_fn cats them back to ``in_proj_dim`` # rows with no padding. To reload into the live GTPShardedParam we must mirror init diff --git a/megatron/experimental/gtp/generalized_tensor_parallelism.py b/megatron/core/tensor_parallel/generalized_tensor_parallelism.py similarity index 99% rename from megatron/experimental/gtp/generalized_tensor_parallelism.py rename to megatron/core/tensor_parallel/generalized_tensor_parallelism.py index d4c14152aec..ed5c95bbad0 100644 --- a/megatron/experimental/gtp/generalized_tensor_parallelism.py +++ b/megatron/core/tensor_parallel/generalized_tensor_parallelism.py @@ -30,7 +30,7 @@ _te_version = Version(te.__version__) if _te_version < _GTP_TE_MIN_VERSION and not os.environ.get("MEGATRON_GTP_FORCE_ENABLE"): raise ImportError( - f"megatron.experimental.gtp requires TransformerEngine >= {_GTP_TE_MIN_VERSION} " + f"megatron.core.tensor_parallel.gtp requires TransformerEngine >= {_GTP_TE_MIN_VERSION} " f"(found {_te_version}). Set MEGATRON_GTP_FORCE_ENABLE=1 to bypass this check " "when using a custom TE build that includes the GTP hook registry." ) @@ -57,7 +57,7 @@ ) except (ImportError, ModuleNotFoundError) as _gtp_te_import_err: raise ImportError( - "megatron.experimental.gtp requires TransformerEngine with FP8 / MXFP8 / " + "megatron.core.tensor_parallel.gtp requires TransformerEngine with FP8 / MXFP8 / " "NVFP4 tensor primitives. Original error: " + str(_gtp_te_import_err) ) from _gtp_te_import_err @@ -1987,7 +1987,7 @@ def make_sharded_tensors_for_checkpoint_with_gtp( ) except ImportError: warnings.warn( - "megatron.experimental.gtp: TransformerEngine does not expose register_gtp_hooks; " + "megatron.core.tensor_parallel.gtp: TransformerEngine does not expose register_gtp_hooks; " "GTP will be a no-op for te.Linear / te.LayerNormLinear / te.GroupedLinear. " "GTP requires TransformerEngine >= 2.17 (planned release). " "Upgrade TransformerEngine to a build that includes the GTP hook registry.", diff --git a/megatron/core/tensor_parallel/layers.py b/megatron/core/tensor_parallel/layers.py index 95f385ab989..70d62e0610d 100644 --- a/megatron/core/tensor_parallel/layers.py +++ b/megatron/core/tensor_parallel/layers.py @@ -309,7 +309,7 @@ def __init__( self.gtp_size = 1 if gtp_group is not None and gtp_group.size() > 1: - from megatron.experimental.gtp import wrap_module_params_gtp + from megatron.core.tensor_parallel.gtp import wrap_module_params_gtp wrap_module_params_gtp(self, ["weight"], gtp_group) self.gtp_size = gtp_group.size() @@ -335,7 +335,7 @@ def forward(self, input_): weight = self.weight if self.gtp_size > 1: - from megatron.experimental.gtp import GTPEmbeddingWeight + from megatron.core.tensor_parallel.gtp import GTPEmbeddingWeight weight = GTPEmbeddingWeight.apply(self.weight) @@ -1008,7 +1008,7 @@ def __init__( self.gtp_size = 1 if gtp_group is not None and gtp_group.size() > 1: - from megatron.experimental.gtp import wrap_module_params_gtp + from megatron.core.tensor_parallel.gtp import wrap_module_params_gtp wrap_module_params_gtp(self, ["weight"], gtp_group) self.gtp_size = gtp_group.size() @@ -1365,7 +1365,7 @@ def __init__( self.gtp_size = 1 if gtp_group is not None and gtp_group.size() > 1: - from megatron.experimental.gtp import wrap_module_params_gtp + from megatron.core.tensor_parallel.gtp import wrap_module_params_gtp wrap_module_params_gtp(self, ["weight"], gtp_group) self.gtp_size = gtp_group.size() diff --git a/megatron/core/transformer/cuda_graphs.py b/megatron/core/transformer/cuda_graphs.py index 414b9603b56..2d506b9aa3c 100644 --- a/megatron/core/transformer/cuda_graphs.py +++ b/megatron/core/transformer/cuda_graphs.py @@ -59,14 +59,14 @@ HAVE_TE_GRAPHS = False try: - from megatron.experimental.gtp import HAVE_GTP + from megatron.core.tensor_parallel.gtp import HAVE_GTP except ImportError: - # megatron.experimental is not shipped with the megatron.core wheel; treat - # GTP as unavailable when the package is absent. + # GTP requires TransformerEngine with the GTP hook registry; treat it as + # unavailable when that import path cannot be resolved. HAVE_GTP = False if HAVE_GTP: - from megatron.experimental.gtp import ( + from megatron.core.tensor_parallel.gtp import ( GTP_CONFIG, GTPChain, get_ag_stream, diff --git a/megatron/core/transformer/utils.py b/megatron/core/transformer/utils.py index 4a2bdff744a..82afaea56f0 100644 --- a/megatron/core/transformer/utils.py +++ b/megatron/core/transformer/utils.py @@ -135,10 +135,10 @@ def make_sharded_tensors_for_checkpoint( # GTP-sharded weights need the GTP axis layered onto the TP/DP offsets. The GTP helper # is a no-op for non-GTP state_dicts, but importing it eagerly would be circular, so # gate on HAVE_GTP and the presence of a GTPShardedParam before delegating. - from megatron.experimental.gtp import HAVE_GTP + from megatron.core.tensor_parallel.gtp import HAVE_GTP if HAVE_GTP: - from megatron.experimental.gtp import ( + from megatron.core.tensor_parallel.gtp import ( GTPShardedParam, make_sharded_tensors_for_checkpoint_with_gtp, ) diff --git a/megatron/core/utils.py b/megatron/core/utils.py index 70147a67192..274d375955e 100644 --- a/megatron/core/utils.py +++ b/megatron/core/utils.py @@ -969,10 +969,10 @@ def make_tp_sharded_tensor_for_checkpoint( # split onto the TP offset — mirrors make_sharded_tensors_for_checkpoint_with_gtp so direct # callers (e.g. VocabParallelEmbedding, which can't use that wrapper because it needs # allow_shape_mismatch) still save GTP weights with correct global offsets/shape. - from megatron.experimental.gtp import HAVE_GTP + from megatron.core.tensor_parallel.gtp import HAVE_GTP if HAVE_GTP: - from megatron.experimental.gtp import GTPShardedParam + from megatron.core.tensor_parallel.gtp import GTPShardedParam if isinstance(tensor, GTPShardedParam): gtp_rank = get_pg_rank(tensor.group) @@ -1026,10 +1026,10 @@ def make_sharded_tensor_for_checkpoint(tensor, key, prepend_offsets=(), replica_ (default: None, falls back to parallel_state) """ # Sanity guard. - from megatron.experimental.gtp import HAVE_GTP + from megatron.core.tensor_parallel.gtp import HAVE_GTP if HAVE_GTP: - from megatron.experimental.gtp import GTPShardedParam + from megatron.core.tensor_parallel.gtp import GTPShardedParam assert not isinstance(tensor, GTPShardedParam), ( f"GTPShardedParam '{key}' reached make_sharded_tensor_for_checkpoint (the replicated " diff --git a/megatron/experimental/__init__.py b/megatron/experimental/__init__.py deleted file mode 100644 index b5dff7b5663..00000000000 --- a/megatron/experimental/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. diff --git a/megatron/experimental/gtp/__init__.py b/megatron/experimental/gtp/__init__.py deleted file mode 100644 index d36c4e4074d..00000000000 --- a/megatron/experimental/gtp/__init__.py +++ /dev/null @@ -1,63 +0,0 @@ -# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - -"""Generalized Tensor Parallelism (GTP) public API. - -GTP shards weight tensors 1/N across a GTP process group along ``out_features`` -and materializes them on-demand via async all-gather. The implementation lives -in ``megatron.experimental.gtp.generalized_tensor_parallelism`` and depends on -TransformerEngine's FP8 / MXFP8 / NVFP4 primitives. - -If TransformerEngine is missing or too old, the inner import fails and the -package exposes only ``HAVE_GTP = False``. No core module imports GTP symbols -unconditionally at module load time. -""" - -try: - from megatron.experimental.gtp.generalized_tensor_parallelism import ( - GTP_CONFIG, - GTPChain, - GTPEmbeddingWeight, - GTPShardedParam, - classify_gtp_chains, - get_ag_stream, - get_rs_stream, - make_sharded_tensors_for_checkpoint_with_gtp, - reset_gtp_quantize_cache, - reset_gtp_state, - set_cuda_graph_mempool, - set_cuda_graph_modules, - tag_gtp_params_with_names, - update_gtp_config, - wait_async_comms, - wait_for_gtp_grad_reduction_on_current_stream, - wrap_module_params_gtp, - ) - - HAVE_GTP = True -except ImportError: - # GTP requires TransformerEngine with the GTP hook registry; when it's - # unavailable only ``HAVE_GTP`` is exposed. Consumers import the other - # symbols lazily under an ``if HAVE_GTP:`` guard, so no fallbacks are needed. - HAVE_GTP = False - - -__all__ = [ - "HAVE_GTP", - "GTP_CONFIG", - "GTPChain", - "GTPEmbeddingWeight", - "GTPShardedParam", - "classify_gtp_chains", - "get_ag_stream", - "get_rs_stream", - "make_sharded_tensors_for_checkpoint_with_gtp", - "reset_gtp_quantize_cache", - "reset_gtp_state", - "set_cuda_graph_mempool", - "set_cuda_graph_modules", - "tag_gtp_params_with_names", - "update_gtp_config", - "wait_async_comms", - "wait_for_gtp_grad_reduction_on_current_stream", - "wrap_module_params_gtp", -] diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py index 42462af109a..47083b0e574 100644 --- a/megatron/training/arguments.py +++ b/megatron/training/arguments.py @@ -1472,7 +1472,7 @@ def validate_args(args, defaults={}): # Propagate --fp8-param-gather into GTPConfig: enables optimizer-side # FP32->FP8 cast for GTP shards, so the forward skips BF16->FP8. if getattr(args, 'fp8_param_gather', False): - from megatron.experimental.gtp import update_gtp_config + from megatron.core.tensor_parallel.gtp import update_gtp_config update_gtp_config(fp8_param_gather=True) warn_rank_0( diff --git a/megatron/training/initialize.py b/megatron/training/initialize.py index b955f62109e..8aa882b69e2 100644 --- a/megatron/training/initialize.py +++ b/megatron/training/initialize.py @@ -358,7 +358,7 @@ def _initialize_distributed(get_embedding_ranks, get_position_embedding_ranks, s print("model parallel is already initialized") else: if args.gtp_weight_remat_size > 1 or args.expert_gtp_weight_remat_size > 1: - from megatron.experimental.gtp import HAVE_GTP + from megatron.core.tensor_parallel.gtp import HAVE_GTP assert HAVE_GTP, ( "GTP requires TransformerEngine >= 2.17. " diff --git a/megatron/training/training.py b/megatron/training/training.py index e784851cff2..13463fee32a 100644 --- a/megatron/training/training.py +++ b/megatron/training/training.py @@ -114,6 +114,7 @@ get_rerun_state_machine, ) from megatron.core.resharding.refit import swap_model_weights +from megatron.core.tensor_parallel.gtp import HAVE_GTP from megatron.core.transformer.cuda_graphs import TECudaGraphHelper from megatron.core.transformer.experimental_attention_variant.dsa import DSAIndexerLossLoggingHelper from megatron.core.transformer.module import Float16Module @@ -133,7 +134,6 @@ get_pg_size, unwrap_model, ) -from megatron.experimental.gtp import HAVE_GTP from megatron.training.checkpointing import ( checkpoint_exists, get_loaded_iteration, @@ -307,7 +307,7 @@ def reset_gtp_quantize_cache_after_load(model): """ if not HAVE_GTP: return - from megatron.experimental.gtp import reset_gtp_quantize_cache + from megatron.core.tensor_parallel.gtp import reset_gtp_quantize_cache for m in model: reset_gtp_quantize_cache(m) @@ -1722,7 +1722,7 @@ def get_model(model_provider_func, model_type=ModelType.encoder_or_decoder, wrap getattr(args, 'gtp_weight_remat_size', 1) > 1 or getattr(args, 'expert_gtp_weight_remat_size', 1) > 1 ): - from megatron.experimental.gtp import update_gtp_config + from megatron.core.tensor_parallel.gtp import update_gtp_config if getattr(args, 'fp4', None) is not None: update_gtp_config(pad_for_alignment=16) @@ -1786,7 +1786,7 @@ def build_model(): getattr(args, 'gtp_weight_remat_size', 1) > 1 or getattr(args, 'expert_gtp_weight_remat_size', 1) > 1 ): - from megatron.experimental.gtp import ( + from megatron.core.tensor_parallel.gtp import ( GTP_CONFIG, classify_gtp_chains, reset_gtp_state, diff --git a/pyproject.toml b/pyproject.toml index ce3b0668482..4e1d24b506d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,11 +13,6 @@ include = [ "megatron.core.*", "megatron.training", "megatron.training.*", - "megatron.experimental", - "megatron.experimental.*", -] -exclude = [ - "megatron.experimental.gtp.images", # documentation diagrams, not runtime code ] [tool.setuptools.dynamic] diff --git a/tests/unit_tests/generalized_tensor_parallel/gtp_test_utils.py b/tests/unit_tests/generalized_tensor_parallel/gtp_test_utils.py index 7919ee7c420..7af4c4c83bb 100644 --- a/tests/unit_tests/generalized_tensor_parallel/gtp_test_utils.py +++ b/tests/unit_tests/generalized_tensor_parallel/gtp_test_utils.py @@ -9,7 +9,7 @@ from transformer_engine.pytorch import is_mxfp8_available, is_nvfp4_available from transformer_engine.pytorch.quantization import FP8GlobalStateManager -from megatron.experimental.gtp import GTPShardedParam +from megatron.core.tensor_parallel.gtp import GTPShardedParam from tests.unit_tests.test_utilities import Utils # --------------------------------------------------------------------------- diff --git a/tests/unit_tests/generalized_tensor_parallel/test_attention_gtp.py b/tests/unit_tests/generalized_tensor_parallel/test_attention_gtp.py index d14bffcdbf4..8b983bef032 100644 --- a/tests/unit_tests/generalized_tensor_parallel/test_attention_gtp.py +++ b/tests/unit_tests/generalized_tensor_parallel/test_attention_gtp.py @@ -13,14 +13,14 @@ import torch import torch.distributed as dist -from megatron.experimental.gtp import HAVE_GTP +from megatron.core.tensor_parallel.gtp import HAVE_GTP if not HAVE_GTP: pytest.skip("GTP requires TransformerEngine >= 2.17", allow_module_level=True) from transformer_engine.pytorch import fp8_autocast -from megatron.experimental.gtp import GTPShardedParam +from megatron.core.tensor_parallel.gtp import GTPShardedParam from tests.unit_tests.generalized_tensor_parallel.gtp_test_utils import ( _requires_mxfp8, _run_distributed, diff --git a/tests/unit_tests/generalized_tensor_parallel/test_gtp.py b/tests/unit_tests/generalized_tensor_parallel/test_gtp.py index be495a683af..3ebff6b8e56 100644 --- a/tests/unit_tests/generalized_tensor_parallel/test_gtp.py +++ b/tests/unit_tests/generalized_tensor_parallel/test_gtp.py @@ -38,7 +38,7 @@ import torch.distributed as dist import torch.nn as nn -from megatron.experimental.gtp import HAVE_GTP +from megatron.core.tensor_parallel.gtp import HAVE_GTP if not HAVE_GTP: pytest.skip("GTP requires TransformerEngine >= 2.17", allow_module_level=True) @@ -48,9 +48,12 @@ from transformer_engine.pytorch import fp8_autocast from transformer_engine.pytorch.quantized_tensor import QuantizedTensor -import megatron.experimental.gtp.generalized_tensor_parallelism as gtp_module -from megatron.experimental.gtp import GTPShardedParam, wrap_module_params_gtp -from megatron.experimental.gtp.generalized_tensor_parallelism import GTPWeightCache, GTPWeightState +import megatron.core.tensor_parallel.generalized_tensor_parallelism as gtp_module +from megatron.core.tensor_parallel.generalized_tensor_parallelism import ( + GTPWeightCache, + GTPWeightState, +) +from megatron.core.tensor_parallel.gtp import GTPShardedParam, wrap_module_params_gtp from tests.unit_tests.generalized_tensor_parallel.gtp_test_utils import ( _make_gtp_grouped_linear, _make_gtp_linear, diff --git a/tests/unit_tests/generalized_tensor_parallel/test_gtp_cudagraph_grad.py b/tests/unit_tests/generalized_tensor_parallel/test_gtp_cudagraph_grad.py index cd5ae315da0..5ef0701034c 100644 --- a/tests/unit_tests/generalized_tensor_parallel/test_gtp_cudagraph_grad.py +++ b/tests/unit_tests/generalized_tensor_parallel/test_gtp_cudagraph_grad.py @@ -16,11 +16,11 @@ import pytest import torch +from megatron.core.tensor_parallel.gtp import HAVE_GTP from megatron.core.transformer.cuda_graphs import ( _backup_grads_before_capture, _restore_grads_after_capture, ) -from megatron.experimental.gtp import HAVE_GTP if not HAVE_GTP: pytest.skip("GTP requires TE with hook registry", allow_module_level=True) diff --git a/tests/unit_tests/generalized_tensor_parallel/test_gtp_dcp.py b/tests/unit_tests/generalized_tensor_parallel/test_gtp_dcp.py index 88b0e162960..9c1b054fe54 100644 --- a/tests/unit_tests/generalized_tensor_parallel/test_gtp_dcp.py +++ b/tests/unit_tests/generalized_tensor_parallel/test_gtp_dcp.py @@ -14,12 +14,12 @@ import torch.distributed as dist from megatron.core.dist_checkpointing import ShardedTensor -from megatron.experimental.gtp import HAVE_GTP +from megatron.core.tensor_parallel.gtp import HAVE_GTP if not HAVE_GTP: pytest.skip("GTP requires TE with hook registry", allow_module_level=True) -from megatron.experimental.gtp import ( # noqa: E402 +from megatron.core.tensor_parallel.gtp import ( # noqa: E402 GTP_CONFIG, GTPShardedParam, make_sharded_tensors_for_checkpoint_with_gtp, @@ -654,13 +654,13 @@ def _worker_mamba_inproj_optim_param_map(rank, world_size, port): from megatron.core.process_groups_config import ProcessGroupCollection from megatron.core.ssm.mamba_layer import MambaLayer, MambaLayerSubmodules from megatron.core.ssm.mamba_mixer import MambaMixer, MambaMixerSubmodules - from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed - from megatron.core.transformer.spec_utils import ModuleSpec - from megatron.core.transformer.transformer_config import TransformerConfig - from megatron.experimental.gtp import ( + from megatron.core.tensor_parallel.gtp import ( make_sharded_tensors_for_checkpoint_with_gtp, tag_gtp_params_with_names, ) + from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed + from megatron.core.transformer.spec_utils import ModuleSpec + from megatron.core.transformer.transformer_config import TransformerConfig ps.destroy_model_parallel() ps.initialize_model_parallel( diff --git a/tests/unit_tests/generalized_tensor_parallel/test_gtp_grad_correctness.py b/tests/unit_tests/generalized_tensor_parallel/test_gtp_grad_correctness.py index 1d1a1827961..831cf1c13e1 100644 --- a/tests/unit_tests/generalized_tensor_parallel/test_gtp_grad_correctness.py +++ b/tests/unit_tests/generalized_tensor_parallel/test_gtp_grad_correctness.py @@ -25,12 +25,12 @@ import torch import torch.distributed as dist -from megatron.experimental.gtp import HAVE_GTP +from megatron.core.tensor_parallel.gtp import HAVE_GTP if not HAVE_GTP: pytest.skip("GTP requires TransformerEngine >= 2.17", allow_module_level=True) -from megatron.experimental.gtp import GTPShardedParam +from megatron.core.tensor_parallel.gtp import GTPShardedParam from tests.unit_tests.generalized_tensor_parallel.gtp_test_utils import ( # noqa: F401 (autouse, module-scoped: initializes the dist PG); noqa: F401 (autouse) _run_distributed, _torchrun_dist_init, diff --git a/tests/unit_tests/generalized_tensor_parallel/test_gtp_loss_correctness.py b/tests/unit_tests/generalized_tensor_parallel/test_gtp_loss_correctness.py index 6427df482ec..1d1630dc585 100644 --- a/tests/unit_tests/generalized_tensor_parallel/test_gtp_loss_correctness.py +++ b/tests/unit_tests/generalized_tensor_parallel/test_gtp_loss_correctness.py @@ -16,14 +16,14 @@ import torch import torch.distributed as dist -from megatron.experimental.gtp import HAVE_GTP +from megatron.core.tensor_parallel.gtp import HAVE_GTP if not HAVE_GTP: pytest.skip("GTP requires TransformerEngine >= 2.17", allow_module_level=True) from transformer_engine.pytorch import fp8_autocast -from megatron.experimental.gtp import GTPShardedParam +from megatron.core.tensor_parallel.gtp import GTPShardedParam from tests.unit_tests.generalized_tensor_parallel.gtp_test_utils import ( # noqa: F401 (autouse, module-scoped: initializes the dist PG); noqa: F401 (autouse) _requires_mxfp8, _run_distributed, diff --git a/tests/unit_tests/generalized_tensor_parallel/test_gtp_muon_dcp.py b/tests/unit_tests/generalized_tensor_parallel/test_gtp_muon_dcp.py index 8a1bc967566..6b8fb1fe51c 100644 --- a/tests/unit_tests/generalized_tensor_parallel/test_gtp_muon_dcp.py +++ b/tests/unit_tests/generalized_tensor_parallel/test_gtp_muon_dcp.py @@ -53,7 +53,7 @@ def test_gtp_muon_moe_save_load(self, tmp_path_dist_ckpt): import pytest - from megatron.experimental.gtp import HAVE_GTP + from megatron.core.tensor_parallel.gtp import HAVE_GTP if not HAVE_GTP: pytest.skip("GTP requires TE with hook registry") @@ -63,7 +63,7 @@ def test_gtp_muon_moe_save_load(self, tmp_path_dist_ckpt): os.environ['MEGATRON_GTP_FORCE_ENABLE'] = '1' from megatron.core import parallel_state as ps from megatron.core.tensor_parallel import model_parallel_cuda_manual_seed - from megatron.experimental.gtp import GTP_CONFIG, GTPShardedParam, update_gtp_config + from megatron.core.tensor_parallel.gtp import GTP_CONFIG, GTPShardedParam, update_gtp_config from tests.unit_tests.dist_checkpointing.utils import initialize_moe_model Utils.initialize_model_parallel(1, 1) # bootstrap torch.distributed + model parallel diff --git a/tests/unit_tests/generalized_tensor_parallel/test_mamba_gtp.py b/tests/unit_tests/generalized_tensor_parallel/test_mamba_gtp.py index de0f8cf358a..5d81c868722 100644 --- a/tests/unit_tests/generalized_tensor_parallel/test_mamba_gtp.py +++ b/tests/unit_tests/generalized_tensor_parallel/test_mamba_gtp.py @@ -12,14 +12,14 @@ import torch import torch.distributed as dist -from megatron.experimental.gtp import HAVE_GTP +from megatron.core.tensor_parallel.gtp import HAVE_GTP if not HAVE_GTP: pytest.skip("GTP requires TransformerEngine >= 2.17", allow_module_level=True) from transformer_engine.pytorch import fp8_autocast -from megatron.experimental.gtp import GTPShardedParam +from megatron.core.tensor_parallel.gtp import GTPShardedParam from tests.unit_tests.generalized_tensor_parallel.gtp_test_utils import ( _requires_mxfp8, _run_distributed, diff --git a/tests/unit_tests/generalized_tensor_parallel/test_moe_egtp.py b/tests/unit_tests/generalized_tensor_parallel/test_moe_egtp.py index 6320750fd68..d949cabcdd2 100644 --- a/tests/unit_tests/generalized_tensor_parallel/test_moe_egtp.py +++ b/tests/unit_tests/generalized_tensor_parallel/test_moe_egtp.py @@ -12,15 +12,15 @@ import torch import torch.distributed as dist -from megatron.experimental.gtp import HAVE_GTP +from megatron.core.tensor_parallel.gtp import HAVE_GTP if not HAVE_GTP: pytest.skip("GTP requires TransformerEngine >= 2.17", allow_module_level=True) from transformer_engine.pytorch import fp8_autocast +from megatron.core.tensor_parallel.gtp import GTPShardedParam from megatron.core.transformer.moe.moe_utils import get_default_pg_collection -from megatron.experimental.gtp import GTPShardedParam from tests.unit_tests.generalized_tensor_parallel.gtp_test_utils import ( _requires_mxfp8, _run_distributed, diff --git a/tests/unit_tests/generalized_tensor_parallel/test_tp_gtp.py b/tests/unit_tests/generalized_tensor_parallel/test_tp_gtp.py index b30c4a3683d..2c206d63be6 100644 --- a/tests/unit_tests/generalized_tensor_parallel/test_tp_gtp.py +++ b/tests/unit_tests/generalized_tensor_parallel/test_tp_gtp.py @@ -26,14 +26,14 @@ import torch import torch.distributed as dist -from megatron.experimental.gtp import HAVE_GTP +from megatron.core.tensor_parallel.gtp import HAVE_GTP if not HAVE_GTP: pytest.skip("GTP requires TransformerEngine >= 2.17", allow_module_level=True) import transformer_engine.pytorch as te -from megatron.experimental.gtp import GTPShardedParam +from megatron.core.tensor_parallel.gtp import GTPShardedParam from tests.unit_tests.generalized_tensor_parallel.gtp_test_utils import ( _make_gtp_linear, _requires_multi_gpu, From 14464b5675d1c247bf8c4a64c3fdd4a3e771ccca Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Fri, 26 Jun 2026 01:44:08 -0700 Subject: [PATCH 55/59] GTP+CG: code clean: replace GTP bwd Phase-2 completion event with a runner.stream fence Signed-off-by: Shiqing Fan --- .../generalized_tensor_parallelism.py | 11 ++-- megatron/core/transformer/cuda_graphs.py | 59 ++++--------------- 2 files changed, 16 insertions(+), 54 deletions(-) diff --git a/megatron/core/tensor_parallel/generalized_tensor_parallelism.py b/megatron/core/tensor_parallel/generalized_tensor_parallelism.py index ed5c95bbad0..e2c69d9ff61 100644 --- a/megatron/core/tensor_parallel/generalized_tensor_parallelism.py +++ b/megatron/core/tensor_parallel/generalized_tensor_parallelism.py @@ -258,9 +258,8 @@ def get_rs_stream(chain_id: str = GTPChain.GRAPHED.value, group=None) -> torch.c def wait_for_gtp_grad_reduction_on_current_stream() -> None: """Fence the current stream against all GTP backward grad work before the DP gradient sync. - Drains in-flight AG/RS on the side streams (eager expert backward may still be writing - main_grad) and waits on each CUDA-graph runner's captured dense-GTP bwd Phase 2 - (main_grad.add_) completion event. No-op when GTP is inactive (empty streams / events). + Drains the eager AG/RS side streams, then waits on each CG runner's replay stream + (its tail = captured Phase 2 main_grad.add_). No-op when GTP is inactive. """ wait_async_comms() cur = torch.cuda.current_stream() @@ -269,10 +268,10 @@ def wait_for_gtp_grad_reduction_on_current_stream() -> None: for s in _RS_STREAMS.values(): cur.wait_stream(s) # Local import: cuda_graphs imports this module, so a module-level import would be circular. - from megatron.core.transformer.cuda_graphs import get_gtp_phase2_completion_events + from megatron.core.transformer.cuda_graphs import get_gtp_runner_streams - for evt in get_gtp_phase2_completion_events(): - cur.wait_event(evt) + for s in get_gtp_runner_streams(): + cur.wait_stream(s) @dataclass diff --git a/megatron/core/transformer/cuda_graphs.py b/megatron/core/transformer/cuda_graphs.py index 2d506b9aa3c..7ed5dbda1b0 100644 --- a/megatron/core/transformer/cuda_graphs.py +++ b/megatron/core/transformer/cuda_graphs.py @@ -97,16 +97,14 @@ logger = logging.getLogger(__name__) -_GTP_PHASE2_COMPLETION_EVENTS: List[torch.cuda.Event] = [] +_GTP_RUNNER_STREAMS: List[torch.cuda.Stream] = [] -def get_gtp_phase2_completion_events() -> List[torch.cuda.Event]: - """Return all GTP bwd Phase 2 completion events from CG runners. - - finalize_model_grads waits on these before reading main_grad, - ensuring captured main_grad.add_ on runner.stream has completed. +def get_gtp_runner_streams() -> List[torch.cuda.Stream]: + """Replay streams of all GTP CG runners; finalize_model_grads waits on these + (tail = captured Phase 2 main_grad.add_) before reading main_grad. """ - return _GTP_PHASE2_COMPLETION_EVENTS + return _GTP_RUNNER_STREAMS def _set_skip_fp8_weight_update_tensor(skip: bool) -> None: @@ -610,7 +608,7 @@ def delete_cuda_graphs(): _CudagraphGlobalRecord.cudagraph_created = False _CudagraphGlobalRecord.cudagraph_record = [] _CudagraphGlobalRecord.cudagraph_inference_record = [] - _GTP_PHASE2_COMPLETION_EVENTS.clear() + _GTP_RUNNER_STREAMS.clear() # TODO: Optional?: Force garbage collection to clean up memory gc.collect() @@ -662,29 +660,6 @@ class _CudagraphReplayNode(torch.autograd.Function): """Replays the runner's cudagraphs with autograd. Handles copying data into/out of the cudagraph io and fp8/fp4 if used.""" - # pylint: disable=line-too-long - ## Capture-time sync schemes (wait_async_comms is called INSIDE the captured - # graph so the drain ops are embedded in the graph itself, not before replay). - # - # Fwd — single-phase drain (full join before completion_event): - # runner_N.stream: GEMM ──▶ wait_async_comms ▶ _wait_side_streams ──fwd_completion_event.record - # ag_stream: AG ──────────────────────▶ ag_event.record - # main_stream: fwd_completion_event.wait ▶ [next runner] - # main_stream unblocks after ag/rs streams are fully drained, so eager - # ops that follow see completed data. - # - # Bwd — phased drain (cross-graph RS overlap, see _CudaGraphRunner.backward): - # runner_N.stream: GEMM ─▶ Phase 1 (drain AG) ─▶ fence ─▶ bwd_completion_event.record ─▶ Phase 2 (wait_side_streams) ─▶ phase2_event - # ag_stream: AG ──────────────────▶ ag_event.record ▶ fence_event.record - # rs_stream: RS_issue ──▶ handle.wait ▶ rs_event.record ▶ main_grad.add_ ─────────────────────────────────────────────────────┐ - # main_stream: bwd_completion_event.wait ▶ [next runner] │ - # phase2_event.wait (in finalize_model_grads) ◀─┘ - # main_grad.add_ runs on rs_stream right after NCCL RS — concurrent with - # Phase 1 AG drain. By the time bwd_completion_event fires and the next - # runner launches, the add_ is done (no SM saturation blocking overlap). - # finalize_model_grads waits phase2_completion_event before DP grad sync. - # pylint: enable=line-too-long - @staticmethod def forward(ctx, runner, is_first_microbatch, *inputs): """Replay the forward graph of the passed runner.""" @@ -797,13 +772,11 @@ def backward(ctx, *grads): for param, grad_added in runner.groundtruth_grad_added_to_main_grad.items(): param.grad_added_to_main_grad = grad_added - # DDP's grad-ready hook is silenced during capture and not re-fired at replay, so fire it - # here to let DDP RS overlap backward. Fire on each param's rs_stream (the one that ran its - # captured main_grad.add_) so FIFO orders DDP-RS after that write; wait_event guards - # cross-substream Phase 2. Plan precomputed in create_bwd_graph; main_stream stays free. + # DDP grad-ready hook is silenced at capture/replay, so fire it here (on each param's + # rs_stream, after wait_stream(runner.stream) fences Phase 2) to let DDP RS overlap bwd. if runner.gtp_remat: for gtp_rs_stream, params in runner._gtp_finalize_hook_plan: - gtp_rs_stream.wait_event(runner.bwd_phase2_completion_event) + gtp_rs_stream.wait_stream(runner.stream) with torch.cuda.stream(gtp_rs_stream): for param in params: hook = getattr(param, '_grad_accum_hook', None) @@ -924,13 +897,8 @@ def __init__( # NCCL into runner_stream before bwd_completion_event fires. if get_expert_gtp_weight_remat_world_size() > 1: self._register_gtp_side_streams(get_expert_gtp_weight_remat_group()) - # Records after Phase 2 (RS drain + main_grad.add_) completes - # on runner.stream. finalize_model_grads waits on this before - # reading main_grad for the DP gradient sync. - self.bwd_phase2_completion_event = torch.cuda.Event( - external=True, interprocess=True - ) - _GTP_PHASE2_COMPLETION_EVENTS.append(self.bwd_phase2_completion_event) + # Registered for finalize_model_grads to wait on (Phase 2 fence). + _GTP_RUNNER_STREAMS.append(self.stream) if self.fp8_enabled: self.fp8_recipe = FP8GlobalStateManager.get_fp8_recipe() @@ -1380,11 +1348,6 @@ def create_bwd_graph(self): if self.bwd_side_streams: self._wait_side_streams(self.bwd_side_streams) - if self.gtp_remat: - # Phase 2 + side-stream join done — record so - # finalize_model_grads can wait for main_grad.add_ completion. - self.bwd_phase2_completion_event.record() - if self.use_stream and not self.gtp_remat: # Non-GTP path: record after the side-stream join. self.bwd_completion_event.record() From 083c15f5c2877e75836886be4cfca811e909c6a6 Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Fri, 26 Jun 2026 01:55:18 -0700 Subject: [PATCH 56/59] add gtp public API file Signed-off-by: Shiqing Fan --- megatron/core/tensor_parallel/gtp.py | 63 ++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 megatron/core/tensor_parallel/gtp.py diff --git a/megatron/core/tensor_parallel/gtp.py b/megatron/core/tensor_parallel/gtp.py new file mode 100644 index 00000000000..6d31c7fc7b2 --- /dev/null +++ b/megatron/core/tensor_parallel/gtp.py @@ -0,0 +1,63 @@ +# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +"""Generalized Tensor Parallelism (GTP) public API. + +GTP shards weight tensors 1/N across a GTP process group along ``out_features`` +and materializes them on-demand via async all-gather. The implementation lives +in ``megatron.core.tensor_parallel.generalized_tensor_parallelism`` and depends +on TransformerEngine's FP8 / MXFP8 / NVFP4 primitives. + +If TransformerEngine is missing or too old, the inner import fails and the +module exposes only ``HAVE_GTP = False``. No core module imports GTP symbols +unconditionally at module load time. +""" + +try: + from megatron.core.tensor_parallel.generalized_tensor_parallelism import ( + GTP_CONFIG, + GTPChain, + GTPEmbeddingWeight, + GTPShardedParam, + classify_gtp_chains, + get_ag_stream, + get_rs_stream, + make_sharded_tensors_for_checkpoint_with_gtp, + reset_gtp_quantize_cache, + reset_gtp_state, + set_cuda_graph_mempool, + set_cuda_graph_modules, + tag_gtp_params_with_names, + update_gtp_config, + wait_async_comms, + wait_for_gtp_grad_reduction_on_current_stream, + wrap_module_params_gtp, + ) + + HAVE_GTP = True +except ImportError: + # GTP requires TransformerEngine with the GTP hook registry; when it's + # unavailable only ``HAVE_GTP`` is exposed. Consumers import the other + # symbols lazily under an ``if HAVE_GTP:`` guard, so no fallbacks are needed. + HAVE_GTP = False + + +__all__ = [ + "HAVE_GTP", + "GTP_CONFIG", + "GTPChain", + "GTPEmbeddingWeight", + "GTPShardedParam", + "classify_gtp_chains", + "get_ag_stream", + "get_rs_stream", + "make_sharded_tensors_for_checkpoint_with_gtp", + "reset_gtp_quantize_cache", + "reset_gtp_state", + "set_cuda_graph_mempool", + "set_cuda_graph_modules", + "tag_gtp_params_with_names", + "update_gtp_config", + "wait_async_comms", + "wait_for_gtp_grad_reduction_on_current_stream", + "wrap_module_params_gtp", +] From 8aa2b6d3e7c6a1cdaa82b0519b38c15e351d1d74 Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Fri, 26 Jun 2026 02:26:37 -0700 Subject: [PATCH 57/59] GTP: clean up generalized_tensor_parallelism after the core move Signed-off-by: Shiqing Fan --- .../generalized_tensor_parallelism.py | 454 +++++++----------- 1 file changed, 186 insertions(+), 268 deletions(-) diff --git a/megatron/core/tensor_parallel/generalized_tensor_parallelism.py b/megatron/core/tensor_parallel/generalized_tensor_parallelism.py index e2c69d9ff61..1c9faf6b825 100644 --- a/megatron/core/tensor_parallel/generalized_tensor_parallelism.py +++ b/megatron/core/tensor_parallel/generalized_tensor_parallelism.py @@ -7,8 +7,11 @@ prefetch chain + ticket-based buffer cache co-designed for CUDA graph capture/replay. Quantized AG (FP8 / MXFP8 / NVFP4) composes with the sharding for compounding bandwidth reduction. + +See ``docs/api-guide/core/generalized_tensor_parallel.md`` for design and usage. """ +import logging import math import os import re @@ -22,6 +25,10 @@ import torch from packaging.version import Version +from megatron.core.utils import log_single_rank + +logger = logging.getLogger(__name__) + _GTP_TE_MIN_VERSION = Version("2.17") try: @@ -30,9 +37,9 @@ _te_version = Version(te.__version__) if _te_version < _GTP_TE_MIN_VERSION and not os.environ.get("MEGATRON_GTP_FORCE_ENABLE"): raise ImportError( - f"megatron.core.tensor_parallel.gtp requires TransformerEngine >= {_GTP_TE_MIN_VERSION} " - f"(found {_te_version}). Set MEGATRON_GTP_FORCE_ENABLE=1 to bypass this check " - "when using a custom TE build that includes the GTP hook registry." + f"megatron.core.tensor_parallel.gtp requires TransformerEngine " + f">= {_GTP_TE_MIN_VERSION} (found {_te_version}). Set MEGATRON_GTP_FORCE_ENABLE=1 " + "to bypass this check when using a custom TE build with the GTP hook registry." ) import transformer_engine_torch as tex @@ -147,10 +154,9 @@ def _classify_param_chain(param_name: str) -> "GTPChain": def classify_gtp_chains(model) -> None: """Walk model.named_parameters() and set chain_id on every GTPShardedParam. - Call once at init, AFTER set_cuda_graph_modules() and BEFORE the first fwd - of any graphed param. Raises if an already chain-initialized param would - be reclassified into a different chain (its prev/next links are already - wired into the wrong list). + Call once at init, AFTER set_cuda_graph_modules() and BEFORE the first fwd of any + graphed param. Raises if an already-initialized param would be reclassified into a + different chain (its prev/next links are already wired into the wrong list). """ conflicts = [] for name, param in model.named_parameters(): @@ -162,9 +168,8 @@ def classify_gtp_chains(model) -> None: continue param.chain_id = target - # Bwd-prefetch opt-out: embedding.word_embeddings.weight does not need - # an AG in the bwd pass (its wgrad is a scatter-add on sharded rows - # and its input has no dgrad). Skipping its bwd AG saves one collective. + # Bwd-prefetch opt-out: embedding weight needs no bwd AG (wgrad is a + # scatter-add on sharded rows, input has no dgrad) — saves one collective. if "embedding" in name: param._need_weight_prefetch_bwd = False if conflicts: @@ -202,10 +207,8 @@ class GTPWeightState(Enum): def _wgrad_pool_get(shape: tuple, dtype: torch.dtype, device) -> torch.Tensor: - """Get a pool buffer or allocate fresh. Tagged so _wgrad_pool_put accepts - only pool-owned buffers — callers that don't use _wgrad_pool_get (e.g. - Megatron layers.py wgrad GEMM, aten F.embedding bwd) fall through to the - caching allocator on release.""" + """Get a pool buffer or allocate fresh, tagged so _wgrad_pool_put accepts only + pool-owned buffers (other callers fall through to the caching allocator on release).""" key = (shape, dtype) pool = _wgrad_buf_pool.get(key) if pool: @@ -230,11 +233,8 @@ def _wgrad_pool_put(buf: torch.Tensor): def _stream_key(chain_id: str, group) -> tuple: """Key for the per-(chain, group) AG/RS stream dicts. - Two partitioning axes: - - chain_id: captured (GRAPHED) vs eager (UNGRAPHED) ops must not share - a stream (eager ops would contaminate capture/replay state). - - group: independent NCCL communicators (e.g. GTP vs EGTP) get their - own user-level stream to avoid cross-group serialization. + Partitioned on two axes: chain_id (captured GRAPHED vs eager UNGRAPHED ops must not + share a stream) and group (independent NCCL comms, e.g. GTP vs EGTP, avoid serialization). """ return (chain_id, id(group) if group is not None else 0) @@ -281,15 +281,12 @@ class GTPConfig: pad_for_alignment: int = 16 check_param_states: bool = False weight_prefetch: bool = True - # When True (default), wgrad reduce-scatter for non-chain-head GTP - # params uses async_op=True; finalize (handle.wait + main_grad.add_) - # runs in the cascade walk of a later bwd call, allowing RS-compute - # overlap. When False, every wgrad RS is synchronous and finalizes - # inline, at the cost of that overlap. + # True (default): non-chain-head wgrad RS is async_op=True and finalizes + # (handle.wait + main_grad.add_) in a later bwd's cascade walk, overlapping RS with + # compute. False: every wgrad RS is synchronous + inline (no overlap). async_reduction: bool = True - # GTP companion to Megatron --fp8-param-gather: optimizer casts FP32 master - # directly into GTPShardedParam.quantized; forward's _quantize_if_needed - # short-circuits to the cached FP8. Moves BF16->FP8 off the fwd critical path. + # GTP companion to --fp8-param-gather: optimizer casts FP32 master directly into + # GTPShardedParam.quantized; forward reuses the cached FP8 (BF16->FP8 off critical path). fp8_param_gather: bool = False @@ -318,9 +315,8 @@ def tag_gtp_params_with_names(model): def _gtp_slice_one_param(param, gtp_group, *, name=""): """Pad + slice a full-size BF16 weight to this rank's GTP shard. - Caller attaches GTP attrs (see _gtp_attach_attrs). When called from the - legacy post-init path under fp8_model_init, tensor may be a - QuantizedTensor — F.pad dequantizes it before slicing. + Caller attaches GTP attrs (see _gtp_attach_attrs). On the legacy post-init path under + fp8_model_init, tensor may be a QuantizedTensor — F.pad dequantizes it before slicing. """ gtp_size = gtp_group.size() gtp_rank = gtp_group.rank() @@ -348,9 +344,8 @@ def _gtp_slice_one_param(param, gtp_group, *, name=""): shard = tensor[gtp_rank * shard_size : (gtp_rank + 1) * shard_size] gtp_shard = GTPShardedParam(shard.clone()) gtp_shard.pad_length = pad_length - # Preserve the source weight's tensor-model-parallel attributes (dropped when wrapping - # into GTPShardedParam). GTP only shards TP-parallel linears, so this keeps the param - # correctly classified by param_is_not_tensor_parallel_duplicate without GTP-specific code. + # Preserve the source weight's TP attributes (dropped when wrapping into GTPShardedParam), + # so param_is_not_tensor_parallel_duplicate still classifies it without GTP-specific code. from megatron.core.tensor_parallel import copy_tensor_model_parallel_attributes copy_tensor_model_parallel_attributes(gtp_shard, param) @@ -360,8 +355,8 @@ def _gtp_slice_one_param(param, gtp_group, *, name=""): def _gtp_attach_attrs(gtp_shard, gtp_group, *, is_grouped=False, expert_idx=0): """Attach group / gtp_size / routed-expert tags and register in _GTP_PARAMS. - Kept separate from _gtp_slice_one_param so attrs land on the post-quantize - param (when quantize fires between slice and attach). + Separate from _gtp_slice_one_param so attrs land on the post-quantize param (when + quantize fires between slice and attach). """ if is_grouped: gtp_shard.expert_idx = expert_idx @@ -378,11 +373,9 @@ def _gtp_attach_attrs(gtp_shard, gtp_group, *, is_grouped=False, expert_idx=0): def wrap_module_params_gtp(module, weight_names, gtp_group, is_grouped=None): """Shard and re-register module params as GTPShardedParam. - Two call paths: - 1. Megatron-style modules (ColumnParallelLinear, etc.): full post-init slice. - 2. TE modules: per-param body no-ops because the reset_parameters hook - already produced GTPShardedParam instances. - + Two call paths: (1) Megatron-style modules (ColumnParallelLinear, etc.) — full post-init + slice; (2) TE modules — per-param body no-ops, since the reset_parameters hook already + produced GTPShardedParam instances. """ if gtp_group.size() == 1: return @@ -410,13 +403,11 @@ def wrap_module_params_gtp(module, weight_names, gtp_group, is_grouped=None): def gtp_slice_in_reset_parameters(module, name, param, expert_idx=0): - """Slice + attach attrs for one param. Called between init_fn(param) and - the optional quantizer(param) in TransformerEngineBaseModule.reset_parameters. - - Only fires for params in module.weight_names (the GEMM weights); - layer-norm gammas, biases, etc. are left full-size. + """Slice + attach attrs for one param, between init_fn(param) and the optional + quantizer(param) in TransformerEngineBaseModule.reset_parameters. - Returns the new GTPShardedParam or None (GTP not active for this param). + Only fires for params in module.weight_names (GEMM weights); layer-norm gammas, biases, + etc. stay full-size. Returns the new GTPShardedParam, or None (GTP not active here). """ gtp_group = getattr(module, "_gtp_group", None) if gtp_group is None or gtp_group.size() == 1: @@ -431,9 +422,8 @@ def gtp_slice_in_reset_parameters(module, name, param, expert_idx=0): def gtp_finalize_module_in_reset_parameters(module, weight_names): - """GroupedLinear-only: attach weight_list to expert 0's shard for batched - all-gather. No-op when module._gtp_is_grouped is False. - """ + """GroupedLinear-only: attach weight_list to expert 0's shard for batched all-gather + (no-op when module._gtp_is_grouped is False).""" if not getattr(module, "_gtp_is_grouped", False): return gtp_group = getattr(module, "_gtp_group", None) @@ -447,9 +437,8 @@ def gtp_finalize_module_in_reset_parameters(module, weight_names): class GTPShardHandle: """Wrapper around a ``dist`` async-work handle for a GTP AG / RS. - Tracks the participating shards so the wait-site can transition their - ``GTPWeightState`` and so the GTP module can prune the param from - ``_inflight_comm_params`` when the collective completes. + Tracks the participating shards so the wait-site can transition their GTPWeightState + and prune the param from _inflight_comm_params when the collective completes. """ def __init__(self, handle, gtp_shards, reduce_scatter=False): @@ -476,15 +465,13 @@ def wait(self): class GTPShardedParam(torch.nn.Parameter): """A weight parameter sharded 1/N across a GTP process group. - Materialized on-demand via async all-gather and gradient-reduced via - reduce-scatter. Carries its own prefetch-chain wiring (``prev_w`` / - ``next_w``), per-chain state, AG/RS cache tickets, and the metadata the - integrator needs to drive overlap with captured compute. + Materialized on-demand via async all-gather and gradient-reduced via reduce-scatter. + Carries its own prefetch-chain wiring (prev_w/next_w), per-chain state, AG/RS cache + tickets, and the metadata the integrator needs to overlap with captured compute. """ - # Per-chain linked-list state, keyed by chain_id (GTPChain.GRAPHED/UNGRAPHED); chains - # never cross-link (prev_w/next_w join only same-chain_id params). Call reset_gtp_state() - # before rebuilding a GTP model in the same process. + # Per-chain linked-list state, keyed by chain_id; chains never cross-link (prev_w/next_w join + # only same-chain params). Call reset_gtp_state() before rebuilding a GTP model in-process. _chain_state: Dict[str, dict] = {} # Recompute-forward prefetch cursor, keyed by chain_id; also cleared by reset_gtp_state(). @@ -511,7 +498,7 @@ def _get_recompute_chain_state(cls, chain_id: str) -> dict: def _buffer_link_table_row( cls, prev: "GTPShardedParam", curr: "GTPShardedParam", chain: dict ) -> None: - """Buffer one row of the prefetch-link table (flushed atomically on the second forward pass).""" + """Buffer one prefetch-link row (flushed atomically on the second forward pass).""" _W = 70 def _layer_id(name: str) -> str: @@ -555,10 +542,9 @@ def __init__(self, tensor, *args, **kwargs): self._ag_ticket_bwd = None self._prefetch_handle = None self._need_weight_prefetch = True - # Per-direction prefetch opt-outs. Default True. The embedding weight - # never needs an AG during bwd (its wgrad is a scatter-add indexed by - # token ids, and its input is non-differentiable, so no dgrad either). - # classify_gtp_chains() sets this to False for embedding.word_embeddings.weight. + # Per-direction prefetch opt-outs (default True). The embedding weight needs no bwd AG + # (wgrad is a token-indexed scatter-add, input non-differentiable). classify_gtp_chains() + # sets this False for embedding.word_embeddings.weight. self._need_weight_prefetch_bwd = True self.ag_event = torch.cuda.Event(external=True) # DDP backward hook (set by register_grad_accum_hook); invoked after @@ -572,22 +558,19 @@ def __init__(self, tensor, *args, **kwargs): self.prefetch_initialized = False self.next_w = None self.prev_w = None - # Recompute-forward prefetch chain: a SEPARATE chain (own slot) linking - # the weights re-gathered rowwise during an activation-recompute forward - # in backward. Kept distinct from state/_prefetch_handle/ag_event above so - # it never clobbers the concurrent columnwise dgrad lifecycle of the same - # weight. Self-populates lazily from the first backward's recompute-fwd - # gathers (see all_gather_and_prefetch). + # Recompute-forward prefetch chain: a SEPARATE chain (own slot) for weights re-gathered + # rowwise during an activation-recompute forward in backward. Distinct from the + # state/_prefetch_handle/ag_event above so it never clobbers the concurrent columnwise + # dgrad lifecycle. Self-populates from the first backward's recompute gathers. self._recompute_initialized = False self._recompute_next = None self._recompute_prev = None self._recompute_prefetch_handle = None self._recompute_ag_event = torch.cuda.Event(external=True) self._recompute_already_drained = False - # Chain identity (GTPChain.GRAPHED / GTPChain.UNGRAPHED). Defaults to - # UNGRAPHED as a safe fallback; classify_gtp_chains(model) walks the - # model at init time (after set_cuda_graph_modules) and reclassifies - # based on param name + active cuda_graph_modules. + # Chain identity (GRAPHED/UNGRAPHED). Defaults to UNGRAPHED; classify_gtp_chains(model) + # walks the model at init (after set_cuda_graph_modules) and reclassifies on param name + + # active cuda_graph_modules. self.chain_id = GTPChain.UNGRAPHED.value # Grouped gemm self.is_routed_expert = False @@ -622,10 +605,9 @@ def _configure_quantizer(q, group): q.with_amax_reduction = True q.amax_reduction_group = group q.internal = False - # MXFP8 scales must stay in compact (unswizzled) layout so that - # per-shard scale_inv can be all-gathered via byte concatenation. - # GEMM-swizzled scales from independent shards don't compose into - # a valid swizzled layout for the full tensor after AG. + # MXFP8 scales must stay compact (unswizzled) so per-shard scale_inv can be + # all-gathered by byte concatenation. GEMM-swizzled scales from independent + # shards don't compose into a valid swizzled layout for the full tensor. q.optimize_for_gemm = not isinstance(q, MXFP8Quantizer) return q @@ -644,16 +626,15 @@ def _configure_quantizer(q, group): with _graphed_alloc(getattr(weight, "chain_id", GTPChain.UNGRAPHED.value)): weight.quantized = weight._quantizer.quantize(weight.get_padded_shard()) weight.quantized.is_routed_expert = getattr(weight, "is_routed_expert", False) - # fp8_param_gather: the init quantize above already produced a - # valid FP8 cache from the BF16 shard; flag did_cast so iter-0's - # forward _quantize_if_needed short-circuits and the redundant - # BF16->FP8 cast on iter 0 is skipped. + # fp8_param_gather: the init quantize already produced a valid FP8 cache from + # the BF16 shard; flag did_cast so iter-0 forward short-circuits and skips the + # redundant BF16->FP8 cast. if GTP_CONFIG.fp8_param_gather: weight.did_cast_to_low_precision = True @property def _weights(self): - """Return the list of individual weight shards (self for non-routed, weight_list for routed).""" + """Individual weight shards (self for non-routed, weight_list for routed).""" weights = self.weight_list if self.is_routed_expert else [self] # Only meaningful when _set_state is actively tracking transitions. if GTP_CONFIG.check_param_states: @@ -697,11 +678,10 @@ def _set_rs_state(self, new_state: GTPWeightState): self.rs_state = new_state def _get_cache_key(self, dtype, fwd: bool, reduce_scatter: bool) -> tuple: - """Build cache key using output shape + dtype. + """Build cache key from output shape + dtype. - Weights with matching gathered shape and dtype share a buffer. - For expert weights gathered in parallel, self.expert_idx distinguishes them so - each gets a distinct buffer, while same-indexed experts across layers share. + Weights with matching gathered shape and dtype share a buffer. For experts gathered + in parallel, self.expert_idx keeps each distinct; same-indexed experts across layers share. """ if not isinstance(dtype, torch.dtype): @@ -801,10 +781,9 @@ def _all_gather_weight(self, async_op, skip_weight_cast, cast_noop_flag, fwd, nv weights = self._weights - # 1. Transition state for async gathers. Skip during a recompute-forward: - # it gathers this weight rowwise (into _ag_ticket_fwd) while a bwd-chain - # prefetch may hold an in-flight columnwise AG state on the same weight - # (separate _ag_ticket_bwd) — clobbering it would break the dgrad consume. + # 1. Transition state for async gathers. Skip during recompute-forward: it gathers + # rowwise (_ag_ticket_fwd) while a bwd-chain prefetch may hold an in-flight columnwise + # AG state (_ag_ticket_bwd) on the same weight — clobbering breaks the dgrad consume. if GTP_CONFIG.check_param_states and not in_fp8_activation_recompute_phase(): new_state = GTPWeightState.ASYNC_WAIT if async_op else GTPWeightState.DATA_READY_SYNC for w in weights: @@ -822,9 +801,9 @@ def _all_gather_weight(self, async_op, skip_weight_cast, cast_noop_flag, fwd, nv w._quantizer.set_usage(rowwise=fwd, columnwise=not fwd) # 3. Build gather inputs. - # quantizers / dtypes / gtp_group are stable after model construction — - # cache on the anchor (self == weights[0]) to avoid rebuilding lists - # every call. w.quantized is NOT cached because it can rebind. + # quantizers / dtypes / gtp_group are stable post-construction — cache on the anchor + # (self == weights[0]) to avoid rebuilding lists each call. w.quantized is NOT cached + # (it can rebind). quantizers = self._cached_quantizers if quantizers is None: quantizers = [w._quantizer for w in weights] @@ -865,12 +844,10 @@ def _all_gather_weight(self, async_op, skip_weight_cast, cast_noop_flag, fwd, nv out_buffers ), "Duplicate output buffers in batched all-gather — experts need distinct cache keys" - # ASYNC AG: wrap issue on ag_stream — ag_stream's tail then reflects - # the collective's full lifecycle (what external wait_stream(ag_stream) - # drains depend on). The explicit outer→ag_stream sync event preserves - # the upstream quantize writer edge that the bare stream context would - # drop; held on self so PyTorch's event pool can't recycle the handle - # between capture and replay. + # ASYNC AG: issue on ag_stream so its tail reflects the collective's full lifecycle + # (what external wait_stream(ag_stream) drains depend on). The explicit outer→ag_stream + # sync event preserves the upstream quantize-writer edge the bare stream context drops; + # held on self so the event pool can't recycle it between capture and replay. # SYNC AG: stay on caller — output ready on return. if async_op: outer_stream = torch.cuda.current_stream() @@ -964,9 +941,8 @@ def _get_prefetched_weight(self, fwd, skip_weight_cast=False, cast_noop_flag=Non ) _was_drained = getattr(self, "_already_ag_drained", False) if _was_drained: - # Producer already drained via wait_async_comms; skip the captured - # cross-graph wait (CUDA no-op anyway). Correctness is provided by - # the eager main_stream sync chain in the surrounding training loop. + # Producer already drained via wait_async_comms; skip the captured cross-graph + # wait (a CUDA no-op anyway). Correctness comes from the eager main_stream sync. self._already_ag_drained = False else: # Intra-graph or eager consume: drain inline. @@ -998,9 +974,9 @@ def _wait_recompute_param_gather(self): self._recompute_ag_event.record() def _recompute_prefetch_next(self, target, nvtx_label=None): - # Issue target's rowwise (fwd) AG into its recompute slot. _all_gather_weight - # skips the AG-state transition under recompute, so the dgrad `state` of - # target is untouched; result lands in target._ag_ticket_fwd. + # Issue target's rowwise (fwd) AG into its recompute slot. _all_gather_weight skips the + # AG-state transition under recompute, so target's dgrad state is untouched; result lands + # in target._ag_ticket_fwd. _, handle = target._all_gather_weight( async_op=True, skip_weight_cast=True, @@ -1030,14 +1006,12 @@ def _get_recompute_prefetched_weight(self): return result if self.is_routed_expert else result[0] def all_gather_and_prefetch_bwd(self, nvtx_label=None): - """ - Backward variant: get current weight (from cache if prefetched, else - sync gather) and async-prefetch prev_w. + """Backward variant: get the current weight (cached if prefetched, else sync gather) + and async-prefetch prev_w. - Safe thanks to the coat-check cache: get() returns the current buffer - to the pool, and the prefetch's checkout() will allocate a separate - buffer if the pool is empty (i.e. the current buffer is still live - via the caller's tensor reference). + Safe via the coat-check cache: get() returns the current buffer to the pool, and the + prefetch's checkout allocates a separate buffer if the pool is empty (current buffer + still live via the caller's reference). Returns: weight_total @@ -1054,9 +1028,8 @@ def all_gather_and_prefetch_bwd(self, nvtx_label=None): and self.prev_w._need_weight_prefetch and self.prev_w._need_weight_prefetch_bwd ): - # Pre-AG work (quantize, ticket lookup) runs on caller's stream; - # the NCCL collective itself is wrapped on ag_stream inside - # _all_gather_weight (see the async/sync gate there for rationale). + # Pre-AG work (quantize, ticket lookup) runs on caller's stream; the NCCL collective + # is wrapped on ag_stream inside _all_gather_weight (see its async/sync gate). _, handle = self.prev_w._all_gather_weight( async_op=True, skip_weight_cast=True, @@ -1090,8 +1063,7 @@ def all_gather_and_prefetch( cast_noop_flag: torch.Tensor = None, nvtx_label: str = None, ): - """ - All-gather current weight and async-prefetch the next weight. + """All-gather the current weight and async-prefetch the next. Returns: weight_total @@ -1135,18 +1107,17 @@ def all_gather_and_prefetch( ) self.next_w._prefetch_handle = handle - # The unsharded tensor has been returned, no pending work so reset state to NONE. - # Skip during recompute: a bwd-chain prefetch may hold an in-flight AG state on - # this weight that its later dgrad consume still needs. + # Unsharded tensor returned, no pending work → reset state to NONE. Skip during recompute: + # a bwd-chain prefetch may hold an in-flight AG state this weight's later dgrad needs. if GTP_CONFIG.check_param_states and not in_recompute: for w in self._weights: w._set_state(GTPWeightState.NONE) cls = type(self) - # Lazy-build the recompute-forward prefetch chain (first backward, in - # recompute order). Consume/prefetch above used the prior iteration's links, - # so the first backward runs on-demand while these links are established. + # Lazy-build the recompute-forward prefetch chain (first backward, in recompute order). + # Consume/prefetch above used the prior iter's links, so the first backward runs on-demand + # while these are established. if in_recompute and not self._recompute_initialized: rchain = cls._get_recompute_chain_state(self.chain_id) last_r = rchain["last_weight"] @@ -1183,12 +1154,12 @@ def all_gather_and_prefetch( elif not chain["link_table_flushed"] and chain["link_table_buffer"]: # Second forward pass: flush the complete table atomically to avoid interleaving chain["link_table_flushed"] = True - print_rank_0("\n".join(chain["link_table_buffer"]) + "\n") + log_single_rank(logger, logging.INFO, "\n".join(chain["link_table_buffer"]) + "\n") return result def batched_all_gather_and_prefetch(self, **kwargs): - """Batched all-gather + prefetch for expert weights. Wrapper around all_gather_and_prefetch.""" + """Batched all-gather + prefetch for expert weights (wraps all_gather_and_prefetch).""" assert self.is_routed_expert and self.weight_list is not None return self.all_gather_and_prefetch(**kwargs) @@ -1197,27 +1168,23 @@ def get_wgrad_tensor(self): return _wgrad_pool_get(self._unsharded_shape, self.main_grad.dtype, self.device) def register_grad_accum_hook(self, grad_accum_node, hook): - """Register a DDP backward hook to be called after the wgrad RS finalize. - - For GTP params, autograd may receive None (async RS) so the normal grad - accumulator hook never fires. Instead, the integrator (Graphed.backward - for captured chains, or the eager chain-tail cascade) calls this hook - explicitly after RS wait + gradient accumulation, ensuring DDP's - register_grad_ready fires at exactly the right time. + """Register a DDP backward hook to call after the wgrad RS finalize. - ``grad_accum_node`` is accepted for caller-API compatibility but the - node itself is not retained — only the hook callable. + For GTP params autograd may receive None (async RS), so the normal grad-accumulator + hook never fires; the integrator (Graphed.backward for captured chains, or the eager + chain-tail cascade) calls this hook explicitly after RS wait + accumulation, so DDP's + register_grad_ready fires at the right time. grad_accum_node is accepted for API + compatibility but not retained — only the hook callable. """ del grad_accum_node self._grad_accum_hook = hook @staticmethod def _handle_megatron_grad_accum(param): - """Handle megatron DDP and gradient accumulation fusion. + """Handle megatron DDP and gradient-accumulation fusion. - Do NOT set param.grad before calling the hook — the hook checks - param.grad and would accumulate it into main_grad if zero_out_wgrad - is True, corrupting the gradient with a non-zero dummy. + Do NOT set param.grad before calling the hook — the hook checks param.grad and would + accumulate it into main_grad if zero_out_wgrad is True, corrupting it with a dummy. """ if hasattr(param, "grad_added_to_main_grad"): param.grad_added_to_main_grad = True @@ -1229,11 +1196,10 @@ def _handle_megatron_grad_accum(param): return dummy_grad def _wait_reduce_scatter(self, finalize_grad=False): - # Enter rs_stream context so handle.wait() + rs_event.record() land - # on rs_stream — mirrors _wait_param_gather for the RS path. - # When finalize_grad=True, main_grad.add_ also runs on rs_stream - # (right after NCCL RS), so it starts during AG drain rather than - # after it — avoids SM-saturation blocking cross-graph overlap. + # Enter rs_stream context so handle.wait() + rs_event.record() land on rs_stream + # (mirrors _wait_param_gather). With finalize_grad=True, main_grad.add_ also runs on + # rs_stream right after the NCCL RS — starts during AG drain, not after, avoiding + # SM-saturation that blocks cross-graph overlap. rs_stream = self._cached_rs_stream if rs_stream is None: rs_stream = get_rs_stream(self.chain_id, self.group) @@ -1265,11 +1231,8 @@ def _wait_reduce_scatter(self, finalize_grad=False): self._wgrad_input_bufs = None def _reduce_scatter(self, wgrads, async_op, nvtx_label=None): - """Reduce-scatter one or more wgrads. Returns (outputs, handle). - - Single tensor: plain reduce-scatter (no coalescing). - Multiple tensors: coalesced reduce-scatter. - """ + """Reduce-scatter one or more wgrads → (outputs, handle). Single tensor: plain RS; + multiple: coalesced RS.""" if nvtx_label is None: nvtx_label = self._debug_name + ".bwd" + (".async" if async_op else ".sync") @@ -1292,12 +1255,10 @@ def _reduce_scatter(self, wgrads, async_op, nvtx_label=None): else: out_buffers = [None] * len(wgrads) - # ASYNC RS: wrap issue on rs_stream — rs_stream's tail then reflects - # the collective's full lifecycle (what external wait_stream(rs_stream) - # drains depend on). The explicit outer→rs_stream sync event preserves - # the wgrad-GEMM writer edge that the bare stream context would drop; - # held on self so PyTorch's event pool can't recycle the handle - # between capture and replay. Mirrors AG path. + # ASYNC RS: issue on rs_stream so its tail reflects the collective's full lifecycle + # (what external wait_stream(rs_stream) drains depend on). The explicit outer→rs_stream + # sync event preserves the wgrad-GEMM writer edge the bare stream context drops; held on + # self so the event pool can't recycle it between capture and replay. Mirrors the AG path. # SYNC RS: stay on caller — output ready on return. if async_op: outer_stream = torch.cuda.current_stream() @@ -1333,27 +1294,24 @@ def _reduce_scatter(self, wgrads, async_op, nvtx_label=None): return outputs, cm if async_op else None def wgrad_reduce_scatter(self, wgrad, nvtx_label=None): - """Reduce-scatter wgrad(s). Sync for last weight, async+deferred for others. - - Accepts a single tensor (non-routed) or list of tensors (routed experts). + """Reduce-scatter wgrad(s): sync for the last weight, async+deferred for others. + Accepts a single tensor (non-routed) or a list (routed experts). Returns: - Single tensor or list for sync (last weight) — backward should return this. - None or tuple of Nones for async — backward should return this. + Single tensor or list for sync (last weight) — backward returns this. + None or tuple of Nones for async — backward returns this. """ batched = isinstance(wgrad, (list, tuple)) wgrads = list(wgrad) if batched else [wgrad] weights = self._weights - # UNGRAPHED-chain wgrads are recycled via the standalone pool (_wgrad_pool_put). - # GRAPHED-chain wgrads cannot pool-recycle because CUDA graphs require - # stable buffer addresses across replay. + # UNGRAPHED wgrads recycle via the standalone pool (_wgrad_pool_put); GRAPHED wgrads + # cannot, since CUDA graphs require stable buffer addresses across replay. poolable = self.chain_id == GTPChain.UNGRAPHED.value if GTP_CONFIG.async_reduction and self.prev_w is not None: - # Async reduce-scatter (not last weight — deferred finish). Pre-RS - # work on caller; NCCL wrap lives at the collective site inside - # _reduce_scatter (mirrors the AG prefetch sites). + # Async RS (not last weight — deferred finish). Pre-RS work on caller; NCCL wrap + # lives at the collective site inside _reduce_scatter (mirrors the AG prefetch sites). _, rs_handle = self._reduce_scatter(wgrads, async_op=True, nvtx_label=nvtx_label) self._wgrad_rs_handle = GTPShardHandle(rs_handle, weights, reduce_scatter=True) # Stash wgrad input buffers — cannot recycle yet because the async RS @@ -1409,9 +1367,8 @@ def batched_wgrad_reduce_scatter(self, wgrad_list, nvtx_label=None): def get_data_tensors(self): """Expose self as the lone data tensor for TE's offload-marking interface. - TE's ``mark_activation_offload`` treats any non-plain tensor as a storage - wrapper and calls ``get_data_tensors()`` on it; a sharded param has no inner - buffers, so it is its own data tensor. + TE's mark_activation_offload treats any non-plain tensor as a storage wrapper and calls + get_data_tensors() on it; a sharded param has no inner buffers, so it is its own. """ return (self,) @@ -1433,18 +1390,6 @@ def __torch_function__(self, func, types, args=(), kwargs=None): return func(*args, **kwargs) -def print_rank_0(message, rank=None): - """If distributed is initialized or rank is specified, print only on rank 0.""" - if rank is not None: - if rank == 0: - print(message, flush=True) - elif torch.distributed.is_initialized(): - if torch.distributed.get_rank() == 0: - print(message, flush=True) - else: - print(message, flush=True) - - @dataclass class _TicketSlot: """Internal slot backing a persistent ticket in the GTP buffer cache.""" @@ -1458,11 +1403,10 @@ class _TicketSlot: buf: Optional[torch.Tensor] = field(default=None) # None when released or after clear() -# CUDA-graph memory pool for routing GRAPHED-chain allocations (AG/RS buffers and -# quantized weight storage) into the capture pool *at creation time*, so no post-hoc -# reallocation is needed. Registered by the integrator (set_cuda_graph_mempool) after -# the pool is created and before the first graphed forward; stays None when CG is off, -# in which case _graphed_alloc is a no-op and allocations use regular memory. +# CUDA-graph memory pool: routes GRAPHED-chain allocations (AG/RS buffers, quantized weight +# storage) into the capture pool at creation time, avoiding post-hoc reallocation. Registered +# via set_cuda_graph_mempool before the first graphed forward; stays None when CG is off, where +# _graphed_alloc is a no-op (regular allocator). _CG_MEMPOOL_DEVICE = None _CG_MEMPOOL = None @@ -1489,25 +1433,17 @@ def _graphed_alloc(chain_id): class GTPWeightCache: - """ - Ticket-based buffer pool for GTP all-gather / reduce-scatter buffers. - - - ``reserve(param, dtype, fwd)`` → ``ticket`` - Assigns a persistent ticket (no buffer allocated yet). - - ``get(ticket)`` → ``buffer`` - Returns the buffer, lazily allocating from pool or fresh if needed. - - ``release(ticket)`` - Returns the buffer to the pool. Ticket remains valid; next ``get()`` - will re-allocate from the pool. - - ``clear()`` - Drops all buffers and pools. Tickets remain valid; next ``get()`` - lazily allocates fresh buffers. + """Ticket-based buffer pool for GTP all-gather / reduce-scatter buffers. + + - reserve(param, dtype, fwd) → ticket: assign a persistent ticket (no buffer yet). + - get(ticket) → buffer: return the buffer, lazily (re)allocating from pool or fresh. + - release(ticket): return the buffer to the pool; ticket stays valid. + - clear(): drop all buffers/pools; tickets stay valid, next get() allocates fresh. """ - # Bytes per element for known dtypes (used for logging). Add new entries - # here when GTP starts caching buffers of additional quantized dtypes. - # Only DType values guaranteed exposed by the TE pybind bindings — verify - # via ``hasattr(tex.DType, ...)`` before adding speculative entries. + # Bytes per element for known dtypes (for logging). Add entries when GTP caches buffers of + # new quantized dtypes — only DType values the TE pybind bindings expose (verify via + # hasattr(tex.DType, ...) before adding speculative entries). _BYTES_PER_ELEMENT = { torch.bfloat16: 2, torch.float16: 2, @@ -1568,9 +1504,11 @@ def _allocate_buffer( dtype_str = ( str(dtype) if isinstance(dtype, torch.dtype) else getattr(dtype, "name", str(dtype)) ) - print_rank_0( + log_single_rank( + logger, + logging.INFO, f"[GTP Cache] +{buf_bytes / 1024**2:.1f} MB (shape={out_shape}, dtype={dtype_str}) " - f"total={self._total_bytes / 1024**2:.1f} MB param: {param._debug_name} fwd: {fwd}" + f"total={self._total_bytes / 1024**2:.1f} MB param: {param._debug_name} fwd: {fwd}", ) return buf @@ -1612,10 +1550,10 @@ def get(self, ticket: int) -> torch.Tensor: return slot.buf def release(self, ticket: int): - """Return the buffer to the pool. Ticket remains valid. + """Return the buffer to the pool (ticket stays valid). - slot.buf is intentionally NOT cleared: get() must stay idempotent so that - CUDA-graph-captured buffers keep their fixed address across replays. + slot.buf is intentionally NOT cleared: get() must stay idempotent so CUDA-graph-captured + buffers keep their fixed address across replays. """ slot = self._slots[ticket] if slot.buf is None: @@ -1646,12 +1584,10 @@ def wait_async_comms( ): """Drain in-flight GTP async AG / RS handles. - When called inside CUDA graph capture, the drains are captured into that - graph. This is the producer-side hook for cross-graph AG/RS overlap: - captured cudaStreamWaitEvent on an event recorded in a different capture - session is a CUDA no-op, so consumer graphs can't safely wait on - cross-graph events. Instead, the producer drains here and flags the - param; the consumer reads the flag and skips its captured wait. + Inside CUDA graph capture the drains are captured into the graph — the producer-side hook + for cross-graph overlap. A captured cudaStreamWaitEvent on another capture session's event is + a CUDA no-op, so consumers can't wait cross-graph; instead the producer drains here and flags + the param, and the consumer skips its captured wait. Args: chain_id: If specified, only drain params on this chain. @@ -1683,10 +1619,9 @@ def wait_async_comms( param._recompute_already_drained = True if not skip_rs: param._wait_reduce_scatter(finalize_grad=finalize_after_drain) - # Fallback inline-accumulation: only when finalize is requested, - # _wait_reduce_scatter didn't already finalize, and an RS actually - # ran for this param (rs_ticket set). Skips pure-AG prefetches in - # _inflight_comm_params (no wgrad to accumulate). + # Fallback inline-accumulation: only when finalize is requested, _wait_reduce_scatter + # didn't already finalize, and an RS actually ran (rs_ticket set). Skips pure-AG + # prefetches in _inflight_comm_params (no wgrad). need_fallback_accumulation = ( finalize_after_drain and not getattr(param, "_already_finalized", False) @@ -1738,11 +1673,8 @@ def grouped_gather_along_first_dim( quantizers: list = None, output_tensors: list = None, ): - """ - All-gather multiple weights in a single coalesced operation. - - Handles NVFP4 post-processing for both sync and async paths. - """ + """All-gather multiple weights in one coalesced op; handles NVFP4 post-processing for both + sync and async paths.""" # Determine device from first weight. inp = weights[0] if isinstance(inp, NVFP4TensorStorage): @@ -1785,33 +1717,33 @@ def grouped_gather_along_first_dim( class GTPEmbeddingWeight(torch.autograd.Function): - """All-gather the embedding weight across the GTP group in forward, and - reduce-scatter its gradient back in backward. + """All-gather the embedding weight across the GTP group in forward, reduce-scatter its + gradient in backward. - The embedding weight is stored sharded along the vocab dimension across - the GTP group; this autograd function materializes the full weight for - the embedding lookup and distributes the gradient back to the shard. + The weight is stored sharded along the vocab dimension; this materializes the full weight + for the lookup and distributes the gradient back to the shard. """ @staticmethod def forward(ctx, weight): + """All-gather the full embedding weight across the GTP group for the lookup.""" ctx.save_for_backward(weight) return weight.all_gather_and_prefetch(fwd=True) @staticmethod def backward(ctx, grad_output): + """Reduce-scatter the gradient back to this rank's vocab-dim shard.""" (weight,) = ctx.saved_tensors return weight.wgrad_reduce_scatter(grad_output) def reset_gtp_state(): - """Clear the process-global GTP prefetch-chain state (``GTPShardedParam._chain_state`` / - ``._recompute_chain_state``). + """Clear the process-global GTP prefetch-chain state (GTPShardedParam._chain_state / + ._recompute_chain_state). - These class-level dicts survive model teardown, so a GTP model rebuilt in the same process - would otherwise inherit the prior model's stale ``last_weight`` pointers / flushed link - tables. Call once before the per-chunk ``classify_gtp_chains`` loop (never inside it — chains - span chunks). No-op on a fresh process. + These class-level dicts survive model teardown, so a GTP model rebuilt in-process would + inherit stale last_weight pointers / flushed link tables. Call once before the per-chunk + classify_gtp_chains loop (never inside it — chains span chunks). No-op on a fresh process. """ GTPShardedParam._chain_state.clear() GTPShardedParam._recompute_chain_state.clear() @@ -1820,10 +1752,9 @@ def reset_gtp_state(): def reset_gtp_quantize_cache(model): """Invalidate the per-shard low-precision cache after a checkpoint load. - DCP load copies new data into ``GTPShardedParam.data`` in-place, leaving - the stale FP8 / MXFP8 / NVFP4 buffer in ``self.quantized`` behind. Call - this once after ``load_state_dict`` / ``dist_checkpointing.load`` so the - next forward re-quantizes from the freshly-loaded high-precision weight. + DCP load copies new data into GTPShardedParam.data in-place, leaving a stale FP8/MXFP8/NVFP4 + buffer in self.quantized. Call once after load so the next forward re-quantizes from the + freshly-loaded weight. """ for param in model.parameters(): if isinstance(param, GTPShardedParam): @@ -1833,16 +1764,9 @@ def reset_gtp_quantize_cache(model): # ------------------------------------------------------------------------ # Distributed-checkpointing helpers # ------------------------------------------------------------------------ -# -# GTP shards weights further along axis 0 on top of TP. The vanilla helpers -# in ``megatron.core.transformer.utils`` only know about TP, so the -# ShardedTensor offsets they emit don't reflect the GTP slice the local -# rank actually owns. The helper below detects GTPShardedParam per-tensor -# and either composes TP × GTP into a single axis-0 offset (when TP is -# also on axis 0) or emits two offsets (TP on its axis, GTP on axis 0). -# ``replica_id`` uses the DP-with-GTP-with-CP rank — the set of ranks that -# actually hold identical copies of this chunk. -# +# GTP shards axis 0 on top of TP, but the vanilla utils helpers only know TP, so their offsets +# miss the GTP slice. The helper below detects GTPShardedParam per-tensor and composes TP × GTP +# into one axis-0 offset (or two offsets), with replica_id = the DP-with-GTP-with-CP rank. def make_sharded_tensors_for_checkpoint_with_gtp( @@ -1856,15 +1780,12 @@ def make_sharded_tensors_for_checkpoint_with_gtp( dp_cp_group, intra_dp_cp_no_gtp_group=None, ): - """GTP-aware analogue of ``make_sharded_tensors_for_checkpoint``. - - Detects GTP sharding per-tensor (via ``isinstance(tensor, GTPShardedParam)``). - Non-GTP tensors keep the vanilla offsets exactly; GTP tensors layer the - GTP axis-0 split on top. + """GTP-aware analogue of make_sharded_tensors_for_checkpoint. - No-op (delegates to the vanilla helper) when no tensor in ``state_dict`` - is a ``GTPShardedParam``, so plumbing this helper through has zero cost - when GTP is inactive. + Detects GTP sharding per-tensor (isinstance(tensor, GTPShardedParam)). Non-GTP tensors keep + the vanilla offsets exactly; GTP tensors layer the GTP axis-0 split on top. No-op (delegates + to the vanilla helper) when no tensor is a GTPShardedParam, so this is zero-cost when GTP is + inactive. """ from megatron.core.transformer.utils import ( # noqa: E402 make_sharded_object_for_checkpoint, @@ -1926,10 +1847,9 @@ def make_sharded_tensors_for_checkpoint_with_gtp( continue if not is_gtp: - # Non-GTPShardedParam under a GTP-active module (e.g. bias). The tensor is - # GTP-replicated, so different GTP ranks would collide on the same replica_id - # without intervention. Inject gtp_rank into position 1 of the replica_id, the same - # way the GTP-sharded branch below does. + # Non-GTPShardedParam under a GTP-active module (e.g. bias): GTP-replicated, so GTP + # ranks would collide on the same replica_id. Inject gtp_rank into replica_id + # position 1 (same as the GTP-sharded branch below). if layer_name in tensor_parallel_layers_axis_map: replica_id = (0, gtp_rank, dp_no_gtp_rank) sharded_state_dict[layer_key] = make_tp_sharded_tensor_for_checkpoint( @@ -1953,10 +1873,9 @@ def make_sharded_tensors_for_checkpoint_with_gtp( ) continue - # GTP-sharded tensor: delegate to the (GTP-aware) single-tensor helper, the one place - # that knows how to layer the axis-0 GTP split onto TP, elect the writer over the - # gtp-excluded DP group, and set allow_shape_mismatch for GTP alignment padding. - # (tp_axis None → 0: GTP always shards axis 0; tp_size is 1 when this param has no TP.) + # GTP-sharded tensor: delegate to the GTP-aware single-tensor helper — it layers the + # axis-0 GTP split onto TP, elects the writer over the gtp-excluded DP group, and sets + # allow_shape_mismatch for alignment padding. (tp_axis None → 0; tp_size 1 when no TP.) tp_axis = tensor_parallel_layers_axis_map.get(layer_name, None) sharded_state_dict[layer_key] = make_tp_sharded_tensor_for_checkpoint( tensor, @@ -1970,10 +1889,9 @@ def make_sharded_tensors_for_checkpoint_with_gtp( return sharded_state_dict -# Wire GTP into TE's hook registry. Done at module import time so any later -# ``te.Linear(gtp_group=...)`` call routes through the hooks below. The -# warning fires if TE is too old to expose ``register_gtp_hooks`` — in that -# case GTP silently no-ops, which is the failure mode we want to surface. +# Wire GTP into TE's hook registry at import time, so any later +# ``te.Linear(gtp_group=...)`` routes through the hooks below. If TE is too old to +# expose ``register_gtp_hooks``, GTP silently no-ops (the warning surfaces that). try: from transformer_engine.pytorch.module.base import ( # noqa: E402 register_gtp_hooks as _te_register_gtp_hooks, From dc629ccb410ab8eb89194c83060b56ff090995bf Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Fri, 26 Jun 2026 07:19:48 -0700 Subject: [PATCH 58/59] fix1: populate EGTP-excluded expert-DP groups in get_default_pg_collection fix2: make GTP module import gracefully without TransformerEngine Signed-off-by: Shiqing Fan --- .../generalized_tensor_parallelism.py | 27 +++++++++++++++---- megatron/core/tensor_parallel/gtp.py | 15 ++++++----- megatron/core/transformer/moe/moe_utils.py | 8 ++++++ 3 files changed, 38 insertions(+), 12 deletions(-) diff --git a/megatron/core/tensor_parallel/generalized_tensor_parallelism.py b/megatron/core/tensor_parallel/generalized_tensor_parallelism.py index 1c9faf6b825..abdb60a882c 100644 --- a/megatron/core/tensor_parallel/generalized_tensor_parallelism.py +++ b/megatron/core/tensor_parallel/generalized_tensor_parallelism.py @@ -11,6 +11,8 @@ See ``docs/api-guide/core/generalized_tensor_parallel.md`` for design and usage. """ +from __future__ import annotations + import logging import math import os @@ -62,11 +64,26 @@ nvtx_range_push, round_up_to_nearest_multiple, ) -except (ImportError, ModuleNotFoundError) as _gtp_te_import_err: - raise ImportError( - "megatron.core.tensor_parallel.gtp requires TransformerEngine with FP8 / MXFP8 / " - "NVFP4 tensor primitives. Original error: " + str(_gtp_te_import_err) - ) from _gtp_te_import_err + + HAVE_TE = True +except (ImportError, ModuleNotFoundError): + # TE unavailable/too old -> stub the TE-backed names so this module still imports, + # and flag GTP unusable via HAVE_TE (gtp.py surfaces this as HAVE_GTP=False). No + # GTP path runs without TE. The `annotations` future-import keeps the lone + # module-level TE reference (a dataclass field annotation) from being evaluated. + from unittest.mock import MagicMock + + te = tex = MagicMock() + MXFP8_BLOCK_SCALING_SIZE = NVFP4_BLOCK_SCALING_SIZE = None + _NVFP4AllGatherAsyncHandle = MagicMock() + gather_along_first_dim = reduce_scatter_along_first_dim = MagicMock() + in_fp8_activation_recompute_phase = MagicMock() + get_dummy_wgrad = MagicMock() + QuantizedTensor = MagicMock() + MXFP8TensorStorage = NVFP4TensorStorage = MagicMock() + MXFP8Quantizer = MagicMock() + nvtx_range_pop = nvtx_range_push = round_up_to_nearest_multiple = MagicMock() + HAVE_TE = False class GTPChain(str, Enum): diff --git a/megatron/core/tensor_parallel/gtp.py b/megatron/core/tensor_parallel/gtp.py index 6d31c7fc7b2..fa6db4eeee7 100644 --- a/megatron/core/tensor_parallel/gtp.py +++ b/megatron/core/tensor_parallel/gtp.py @@ -7,14 +7,16 @@ in ``megatron.core.tensor_parallel.generalized_tensor_parallelism`` and depends on TransformerEngine's FP8 / MXFP8 / NVFP4 primitives. -If TransformerEngine is missing or too old, the inner import fails and the -module exposes only ``HAVE_GTP = False``. No core module imports GTP symbols -unconditionally at module load time. +If TransformerEngine is missing or too old, the inner module imports cleanly +but stubs its TE-backed symbols and reports ``HAVE_TE = False``; this module +mirrors that as ``HAVE_GTP = False``. Consumers gate every GTP code path behind +``if HAVE_GTP:``, so no core module uses GTP symbols without TE. """ try: from megatron.core.tensor_parallel.generalized_tensor_parallelism import ( GTP_CONFIG, + HAVE_TE, GTPChain, GTPEmbeddingWeight, GTPShardedParam, @@ -33,11 +35,10 @@ wrap_module_params_gtp, ) - HAVE_GTP = True + HAVE_GTP = HAVE_TE except ImportError: - # GTP requires TransformerEngine with the GTP hook registry; when it's - # unavailable only ``HAVE_GTP`` is exposed. Consumers import the other - # symbols lazily under an ``if HAVE_GTP:`` guard, so no fallbacks are needed. + # Defensive fallback for any unexpected inner-import failure; consumers import + # the other symbols lazily under an ``if HAVE_GTP:`` guard, so no stubs needed. HAVE_GTP = False diff --git a/megatron/core/transformer/moe/moe_utils.py b/megatron/core/transformer/moe/moe_utils.py index ca004bd10b1..76b112bf01f 100644 --- a/megatron/core/transformer/moe/moe_utils.py +++ b/megatron/core/transformer/moe/moe_utils.py @@ -1387,6 +1387,14 @@ def get_default_pg_collection() -> ProcessGroupCollection: pg_collection.cp = parallel_state.get_context_parallel_group() pg_collection.expt_tp = parallel_state.get_expert_tensor_parallel_group() pg_collection.expt_dp = parallel_state.get_expert_data_parallel_group() + # EGTP-excluded expert-DP groups used to stamp expert-weight replica_ids. Must not be + # left None (get_pg_rank(None)==0 -> duplicate-writer collision at checkpoint save). + pg_collection.expt_dp_no_egtp = parallel_state.get_expert_data_parallel_group( + no_gtp=True, check_initialized=False + ) + pg_collection.intra_expt_dp_no_egtp = parallel_state.get_expert_data_parallel_group( + no_gtp=True, partial_expert_data_parallel=True, check_initialized=False + ) pg_collection.tp_ep = parallel_state.get_expert_tensor_and_model_parallel_group() pg_collection.tp_cp = parallel_state.get_tensor_and_context_parallel_group() pg_collection.tp_dp_cp = parallel_state.get_tensor_and_data_parallel_group( From 00c9d20af8d8c04c62a77702088f6d68ff6836f5 Mon Sep 17 00:00:00 2001 From: Shiqing Fan Date: Fri, 26 Jun 2026 08:20:46 -0700 Subject: [PATCH 59/59] Fix: defer global TP/DP group reads in _backfill_gtp_sharded_param_map Signed-off-by: Shiqing Fan --- megatron/core/optimizer/optimizer.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/megatron/core/optimizer/optimizer.py b/megatron/core/optimizer/optimizer.py index ebbb5138284..880e7a71ce8 100644 --- a/megatron/core/optimizer/optimizer.py +++ b/megatron/core/optimizer/optimizer.py @@ -801,14 +801,17 @@ def _backfill_gtp_sharded_param_map(id_to_sharded_param_map: dict, float16_group except ImportError: return # GTP not built in -- nothing to backfill. - # Checkpoint compatibility point: source the groups from parallel_state, mirroring the - # make_*_for_checkpoint helpers (which fall back to these same globals). - tp_group = parallel_state.get_tensor_model_parallel_group() - dp_cp_group = parallel_state.get_data_parallel_group(with_context_parallel=True) + # Groups sourced lazily (below) only when a GTP param is found, so GTP-free models on + # explicit grids (e.g. MiMo) never require the global MPU groups to be initialized. + tp_group = None + dp_cp_group = None for param_id, p in enumerate(chain.from_iterable(float16_groups)): # Skip params that already matched, and any non-GTP param (those always match). if param_id in id_to_sharded_param_map or not isinstance(p, GTPShardedParam): continue + if tp_group is None: + tp_group = parallel_state.get_tensor_model_parallel_group() + dp_cp_group = parallel_state.get_data_parallel_group(with_context_parallel=True) # Key by the param's dotted name (set in prod by tag_gtp_params_with_names); the fallback # keeps the function usable in tests where the name was not tagged. key = p._debug_name or f'_gtp_optim_param_{param_id}'