import asyncio
import collections
import logging
import random
import socket
import threading
import time
import warnings
import weakref
from abc import ABC, abstractmethod
from collections import defaultdict
from copy import copy
from itertools import chain
from types import MethodType
from typing import (
    TYPE_CHECKING,
    Any,
    Callable,
    Coroutine,
    Deque,
    Dict,
    Generator,
    List,
    Literal,
    Mapping,
    Optional,
    Set,
    Tuple,
    Type,
    TypeVar,
    Union,
)

if TYPE_CHECKING:
    from redis.asyncio.keyspace_notifications import (
        AsyncClusterKeyspaceNotifications,
    )

from redis._defaults import (
    DEFAULT_RETRY_BASE,
    DEFAULT_RETRY_CAP,
    DEFAULT_RETRY_COUNT,
    DEFAULT_SOCKET_CONNECT_TIMEOUT,
    DEFAULT_SOCKET_READ_SIZE,
    DEFAULT_SOCKET_TIMEOUT,
)
from redis._parsers import AsyncCommandsParser, Encoder
from redis._parsers.commands import CommandPolicies, RequestPolicy, ResponsePolicy
from redis._parsers.helpers import get_response_callbacks
from redis.asyncio.client import PubSub, ResponseCallbackT
from redis.asyncio.connection import (
    AbstractConnection,
    Connection,
    ConnectionPoolInterface,
    SSLConnection,
    parse_url,
)
from redis.asyncio.lock import Lock
from redis.asyncio.observability.recorder import (
    record_error_count,
    record_operation_duration,
)
from redis.asyncio.retry import Retry
from redis.auth.token import TokenInterface
from redis.backoff import ExponentialWithJitterBackoff, NoBackoff
from redis.client import EMPTY_RESPONSE, NEVER_DECODE, AbstractRedis
from redis.cluster import (
    PIPELINE_BLOCKED_COMMANDS,
    PRIMARY,
    REPLICA,
    SLOT_ID,
    AbstractRedisCluster,
    LoadBalancer,
    LoadBalancingStrategy,
    block_pipeline_command,
    get_node_name,
    parse_cluster_shards,
    parse_cluster_shards_unified,
    parse_cluster_shards_with_str_keys,
    parse_cluster_slots,
)
from redis.commands import READ_COMMANDS, AsyncRedisClusterCommands
from redis.commands.helpers import list_or_args, parse_pubsub_subscriptions
from redis.commands.policies import AsyncPolicyResolver, AsyncStaticPolicyResolver
from redis.crc import REDIS_CLUSTER_HASH_SLOTS, key_slot
from redis.credentials import CredentialProvider
from redis.driver_info import DriverInfo, resolve_driver_info
from redis.event import (
    AfterAsyncClusterInstantiationEvent,
    AsyncAfterSlotsCacheRefreshEvent,
    AsyncEventListenerInterface,
    EventDispatcher,
)
from redis.exceptions import (
    AskError,
    BusyLoadingError,
    ClusterDownError,
    ClusterError,
    ConnectionError,
    CrossSlotTransactionError,
    DataError,
    ExecAbortError,
    InvalidPipelineStack,
    MaxConnectionsError,
    MovedError,
    RedisClusterException,
    RedisError,
    ResponseError,
    SlotNotCoveredError,
    TimeoutError,
    TryAgainError,
    WatchError,
)
from redis.typing import (
    AnyKeyT,
    ChannelT,
    EncodableT,
    KeyT,
    PubSubHandler,
    Subscription,
)
from redis.utils import (
    SENTINEL,
    SSL_AVAILABLE,
    deprecated_args,
    deprecated_function,
    safe_str,
    str_if_bytes,
    truncate_text,
)

if SSL_AVAILABLE:
    from ssl import TLSVersion, VerifyFlags, VerifyMode
else:
    TLSVersion = None
    VerifyMode = None
    VerifyFlags = None

logger = logging.getLogger(__name__)

TargetNodesT = TypeVar(
    "TargetNodesT", str, "ClusterNode", List["ClusterNode"], Dict[Any, "ClusterNode"]
)


class RedisCluster(AbstractRedis, AbstractRedisCluster, AsyncRedisClusterCommands):
    """
    Create a new RedisCluster client.

    Pass one of parameters:

      - `host` & `port`
      - `startup_nodes`

    | Use ``await`` :meth:`initialize` to find cluster nodes & create connections.
    | Use ``await`` :meth:`close` to disconnect connections & close client.

    Many commands support the target_nodes kwarg. It can be one of the
    :attr:`NODE_FLAGS`:

      - :attr:`PRIMARIES`
      - :attr:`REPLICAS`
      - :attr:`ALL_NODES`
      - :attr:`RANDOM`
      - :attr:`DEFAULT_NODE`

    Note: This client is not thread/process/fork safe.

    :param host:
        | Can be used to point to a startup node
    :param port:
        | Port used if **host** is provided
    :param startup_nodes:
        | :class:`~.ClusterNode` to used as a startup node
    :param require_full_coverage:
        | When set to ``False``: the client will not require a full coverage of
          the slots. However, if not all slots are covered, and at least one node
          has ``cluster-require-full-coverage`` set to ``yes``, the server will throw
          a :class:`~.ClusterDownError` for some key-based commands.
        | When set to ``True``: all slots must be covered to construct the cluster
          client. If not all slots are covered, :class:`~.RedisClusterException` will be
          thrown.
        | See:
          https://redis.io/docs/manual/scaling/#redis-cluster-configuration-parameters
    :param read_from_replicas:
        | @deprecated - please use load_balancing_strategy instead
        | Enable read from replicas in READONLY mode.
          When set to true, read commands will be assigned between the primary and
          its replications in a Round-Robin manner.
          The data read from replicas is eventually consistent with the data in primary nodes.
    :param load_balancing_strategy:
        | Enable read from replicas in READONLY mode and defines the load balancing
          strategy that will be used for cluster node selection.
          The data read from replicas is eventually consistent with the data in primary nodes.
    :param dynamic_startup_nodes:
        | Set the RedisCluster's startup nodes to all the discovered nodes.
          If true (default value), the cluster's discovered nodes will be used to
          determine the cluster nodes-slots mapping in the next topology refresh.
          It will remove the initial passed startup nodes if their endpoints aren't
          listed in the CLUSTER SLOTS output.
          If you use dynamic DNS endpoints for startup nodes but CLUSTER SLOTS lists
          specific IP addresses, it is best to set it to false.
    :param reinitialize_steps:
        | Specifies the number of MOVED errors that need to occur before reinitializing
          the whole cluster topology. If a MOVED error occurs and the cluster does not
          need to be reinitialized on this current error handling, only the MOVED slot
          will be patched with the redirected node.
          To reinitialize the cluster on every MOVED error, set reinitialize_steps to 1.
          To avoid reinitializing the cluster on moved errors, set reinitialize_steps to
          0.
    :param cluster_error_retry_attempts:
        | @deprecated - Please configure the 'retry' object instead
          In case 'retry' object is set - this argument is ignored!

          Number of times to retry before raising an error when :class:`~.TimeoutError`,
          :class:`~.ConnectionError`, :class:`~.SlotNotCoveredError`
          or :class:`~.ClusterDownError` are encountered
    :param retry:
        | A retry object that defines the retry strategy and the number of
          retries for the cluster client.
          In current implementation for the cluster client (starting form redis-py version 6.0.0)
          the retry object is not yet fully utilized, instead it is used just to determine
          the number of retries for the cluster client.
          In the future releases the retry object will be used to handle the cluster client retries!
    :param max_connections:
        | Maximum number of connections per node. If there are no free connections & the
          maximum number of connections are already created, a
          :class:`~.MaxConnectionsError` is raised.
    :param socket_keepalive:
        | If ``True``, TCP keepalive is enabled for TCP socket connections.
    :param socket_keepalive_options:
        | Mapping of TCP keepalive socket option constants to values, for
          example ``{socket.TCP_KEEPIDLE: 30}``. If left unspecified, redis-py
          uses TCP keepalive defaults when ``socket_keepalive`` is enabled:
          idle 30 seconds, interval 5 seconds, and 3 probes.
          Platform-specific options that are not available are skipped.
          Pass ``None`` or ``{}`` to avoid setting additional TCP keepalive
          options.
    :param address_remap:
        | An optional callable which, when provided with an internal network
          address of a node, e.g. a `(host, port)` tuple, will return the address
          where the node is reachable.  This can be used to map the addresses at
          which the nodes _think_ they are, to addresses at which a client may
          reach them, such as when they sit behind a proxy.

    | Rest of the arguments will be passed to the
      :class:`~redis.asyncio.connection.Connection` instances when created

    :raises RedisClusterException:
        if any arguments are invalid or unknown. Eg:

        - `db` != 0 or None
        - `path` argument for unix socket connection
        - none of the `host`/`port` & `startup_nodes` were provided

    """

    @classmethod
    def from_url(cls, url: str, **kwargs: Any) -> "RedisCluster":
        """
        Return a Redis client object configured from the given URL.

        For example::

            redis://[[username]:[password]]@localhost:6379/0
            rediss://[[username]:[password]]@localhost:6379/0

        Three URL schemes are supported:

        - `redis://` creates a TCP socket connection. See more at:
          <https://www.iana.org/assignments/uri-schemes/prov/redis>
        - `rediss://` creates a SSL wrapped TCP socket connection. See more at:
          <https://www.iana.org/assignments/uri-schemes/prov/rediss>

        The username, password, hostname, path and all querystring values are passed
        through ``urllib.parse.unquote`` in order to replace any percent-encoded values
        with their corresponding characters.

        All querystring options are cast to their appropriate Python types. Boolean
        arguments can be specified with string values "True"/"False" or "Yes"/"No".
        Values that cannot be properly cast cause a ``ValueError`` to be raised. Once
        parsed, the querystring arguments and keyword arguments are passed to
        :class:`~redis.asyncio.connection.Connection` when created.
        In the case of conflicting arguments, querystring arguments are used.
        """
        kwargs.update(parse_url(url))
        if kwargs.pop("connection_class", None) is SSLConnection:
            kwargs["ssl"] = True
        return cls(**kwargs)

    # Type discrimination marker for @overload self-type pattern
    _is_async_client: Literal[True] = True

    __slots__ = (
        "_initialize",
        "_lock",
        "retry",
        "command_flags",
        "commands_parser",
        "connection_kwargs",
        "encoder",
        "node_flags",
        "nodes_manager",
        "read_from_replicas",
        "reinitialize_counter",
        "reinitialize_steps",
        "response_callbacks",
        "result_callbacks",
    )

    @deprecated_args(
        args_to_warn=["read_from_replicas"],
        reason="Please configure the 'load_balancing_strategy' instead",
        version="5.3.0",
    )
    @deprecated_args(
        args_to_warn=[
            "cluster_error_retry_attempts",
        ],
        reason="Please configure the 'retry' object instead",
        version="6.0.0",
    )
    @deprecated_args(
        args_to_warn=["lib_name", "lib_version"],
        reason="Use 'driver_info' parameter instead. "
        "lib_name and lib_version will be removed in a future version.",
    )
    def __init__(
        self,
        host: str | None = None,
        port: str | int = 6379,
        # Cluster related kwargs
        startup_nodes: List["ClusterNode"] | None = None,
        require_full_coverage: bool = True,
        read_from_replicas: bool = False,
        load_balancing_strategy: LoadBalancingStrategy | None = None,
        dynamic_startup_nodes: bool = True,
        reinitialize_steps: int = 5,
        cluster_error_retry_attempts: int = DEFAULT_RETRY_COUNT,
        max_connections: int = 100,
        retry: Retry | None = None,
        retry_on_error: List[Type[Exception]] | None = None,
        # Client related kwargs
        db: str | int = 0,
        path: str | None = None,
        credential_provider: CredentialProvider | None = None,
        username: str | None = None,
        password: str | None = None,
        client_name: str | None = None,
        lib_name: str | object | None = SENTINEL,
        lib_version: str | object | None = SENTINEL,
        driver_info: DriverInfo | object | None = SENTINEL,
        # Encoding related kwargs
        encoding: str = "utf-8",
        encoding_errors: str = "strict",
        decode_responses: bool = False,
        # Connection related kwargs
        health_check_interval: float = 0,
        socket_timeout: float | None = DEFAULT_SOCKET_TIMEOUT,
        socket_connect_timeout: float | None = DEFAULT_SOCKET_CONNECT_TIMEOUT,
        socket_read_size: int = DEFAULT_SOCKET_READ_SIZE,
        socket_keepalive: bool = True,
        socket_keepalive_options: Mapping[int, int | bytes] | object | None = SENTINEL,
        # SSL related kwargs
        ssl: bool = False,
        ssl_ca_certs: str | None = None,
        ssl_ca_data: str | None = None,
        ssl_cert_reqs: "str | VerifyMode" = "required",
        ssl_include_verify_flags: List["VerifyFlags"] | None = None,
        ssl_exclude_verify_flags: List["VerifyFlags"] | None = None,
        ssl_certfile: str | None = None,
        ssl_check_hostname: bool = True,
        ssl_keyfile: str | None = None,
        ssl_min_version: "TLSVersion | None" = None,
        ssl_ciphers: str | None = None,
        protocol: int | None = None,
        legacy_responses: bool = True,
        address_remap: Callable[[Tuple[str, int]], Tuple[str, int]] | None = None,
        event_dispatcher: EventDispatcher | None = None,
        policy_resolver: AsyncPolicyResolver = AsyncStaticPolicyResolver(),
    ) -> None:
        if db:
            raise RedisClusterException(
                "Argument 'db' must be 0 or None in cluster mode"
            )

        if path:
            raise RedisClusterException(
                "Unix domain socket is not supported in cluster mode"
            )

        if (not host or not port) and not startup_nodes:
            raise RedisClusterException(
                "RedisCluster requires at least one node to discover the cluster.\n"
                "Please provide one of the following or use RedisCluster.from_url:\n"
                '   - host and port: RedisCluster(host="localhost", port=6379)\n'
                "   - startup_nodes: RedisCluster(startup_nodes=["
                'ClusterNode("localhost", 6379), ClusterNode("localhost", 6380)])'
            )

        computed_driver_info = resolve_driver_info(driver_info, lib_name, lib_version)

        kwargs: Dict[str, Any] = {
            "max_connections": max_connections,
            "connection_class": Connection,
            # Client related kwargs
            "credential_provider": credential_provider,
            "username": username,
            "password": password,
            "client_name": client_name,
            "driver_info": computed_driver_info,
            # Encoding related kwargs
            "encoding": encoding,
            "encoding_errors": encoding_errors,
            "decode_responses": decode_responses,
            # Connection related kwargs
            "health_check_interval": health_check_interval,
            "socket_connect_timeout": socket_connect_timeout,
            "socket_keepalive": socket_keepalive,
            "socket_keepalive_options": socket_keepalive_options,
            "socket_read_size": socket_read_size,
            "socket_timeout": socket_timeout,
            "protocol": protocol,
            "legacy_responses": legacy_responses,
        }

        if ssl:
            # SSL related kwargs
            kwargs.update(
                {
                    "connection_class": SSLConnection,
                    "ssl_ca_certs": ssl_ca_certs,
                    "ssl_ca_data": ssl_ca_data,
                    "ssl_cert_reqs": ssl_cert_reqs,
                    "ssl_include_verify_flags": ssl_include_verify_flags,
                    "ssl_exclude_verify_flags": ssl_exclude_verify_flags,
                    "ssl_certfile": ssl_certfile,
                    "ssl_check_hostname": ssl_check_hostname,
                    "ssl_keyfile": ssl_keyfile,
                    "ssl_min_version": ssl_min_version,
                    "ssl_ciphers": ssl_ciphers,
                }
            )

        if read_from_replicas or load_balancing_strategy:
            # Call our on_connect function to configure READONLY mode
            kwargs["redis_connect_func"] = self.on_connect

        if retry:
            self.retry = retry
        else:
            self.retry = Retry(
                backoff=ExponentialWithJitterBackoff(
                    base=DEFAULT_RETRY_BASE, cap=DEFAULT_RETRY_CAP
                ),
                retries=cluster_error_retry_attempts,
            )
        if retry_on_error:
            self.retry.update_supported_errors(retry_on_error)

        kwargs["response_callbacks"] = get_response_callbacks(
            user_protocol=kwargs.get("protocol"),
            legacy_responses=kwargs.get("legacy_responses", True),
        )
        if not kwargs.get("legacy_responses", True):
            kwargs["response_callbacks"]["CLUSTER SHARDS"] = (
                parse_cluster_shards_unified
            )
        elif kwargs.get("protocol") is None:
            kwargs["response_callbacks"]["CLUSTER SHARDS"] = (
                parse_cluster_shards_with_str_keys
            )
        else:
            kwargs["response_callbacks"]["CLUSTER SHARDS"] = parse_cluster_shards
        self.connection_kwargs = kwargs

        if startup_nodes:
            passed_nodes = []
            for node in startup_nodes:
                passed_nodes.append(
                    ClusterNode(node.host, node.port, **self.connection_kwargs)
                )
            startup_nodes = passed_nodes
        else:
            startup_nodes = []
        if host and port:
            startup_nodes.append(ClusterNode(host, port, **self.connection_kwargs))

        if event_dispatcher is None:
            self._event_dispatcher = EventDispatcher()
        else:
            self._event_dispatcher = event_dispatcher

        self.startup_nodes = startup_nodes
        self.nodes_manager = NodesManager(
            startup_nodes,
            require_full_coverage,
            kwargs,
            dynamic_startup_nodes=dynamic_startup_nodes,
            address_remap=address_remap,
            event_dispatcher=self._event_dispatcher,
        )
        self.encoder = Encoder(encoding, encoding_errors, decode_responses)
        self.read_from_replicas = read_from_replicas
        self.load_balancing_strategy = load_balancing_strategy
        self.reinitialize_steps = reinitialize_steps
        self.reinitialize_counter = 0

        # For backward compatibility, mapping from existing policies to new one
        self._command_flags_mapping: dict[str, Union[RequestPolicy, ResponsePolicy]] = {
            self.__class__.RANDOM: RequestPolicy.DEFAULT_KEYLESS,
            self.__class__.PRIMARIES: RequestPolicy.ALL_SHARDS,
            self.__class__.ALL_NODES: RequestPolicy.ALL_NODES,
            self.__class__.REPLICAS: RequestPolicy.ALL_REPLICAS,
            self.__class__.DEFAULT_NODE: RequestPolicy.DEFAULT_NODE,
            SLOT_ID: RequestPolicy.DEFAULT_KEYED,
        }

        self._policies_callback_mapping: dict[
            Union[RequestPolicy, ResponsePolicy], Callable
        ] = {
            RequestPolicy.DEFAULT_KEYLESS: lambda command_name: [
                self.get_random_primary_or_all_nodes(command_name)
            ],
            RequestPolicy.DEFAULT_KEYED: self.get_nodes_from_slot,
            RequestPolicy.DEFAULT_NODE: lambda: [self.get_default_node()],
            RequestPolicy.ALL_SHARDS: self.get_primaries,
            RequestPolicy.ALL_NODES: self.get_nodes,
            RequestPolicy.ALL_REPLICAS: self.get_replicas,
            RequestPolicy.SPECIAL: self.get_special_nodes,
            ResponsePolicy.DEFAULT_KEYLESS: lambda res: res,
            ResponsePolicy.DEFAULT_KEYED: lambda res: res,
        }

        self._policy_resolver = policy_resolver
        self.commands_parser = AsyncCommandsParser()
        self._aggregate_nodes = None
        self.node_flags = self.__class__.NODE_FLAGS.copy()
        self.command_flags = self.__class__.COMMAND_FLAGS.copy()
        self.response_callbacks = kwargs["response_callbacks"]
        self.result_callbacks = self.__class__.RESULT_CALLBACKS.copy()
        self.result_callbacks["CLUSTER SLOTS"] = (
            lambda cmd, res, **kwargs: parse_cluster_slots(
                list(res.values())[0], **kwargs
            )
        )

        self._initialize = True
        self._lock: Optional[asyncio.Lock] = None

        # When used as an async context manager, we need to increment and decrement
        # a usage counter so that we can close the connection pool when no one is
        # using the client.
        self._usage_counter = 0
        self._usage_lock = asyncio.Lock()

    async def initialize(
        self,
        additional_startup_nodes_info: Optional[List[Tuple[str, int]]] = None,
        last_failed_node_name: Optional[str] = None,
    ) -> "RedisCluster":
        """Get all nodes from startup nodes & creates connections if not initialized."""
        if self._initialize:
            if not self._lock:
                self._lock = asyncio.Lock()
            async with self._lock:
                if self._initialize:
                    try:
                        await self.nodes_manager.initialize(
                            additional_startup_nodes_info=additional_startup_nodes_info,
                            last_failed_node_name=last_failed_node_name,
                        )
                        await self.commands_parser.initialize(
                            self.nodes_manager.default_node
                        )
                        self._initialize = False
                    except BaseException:
                        await self.nodes_manager.aclose()
                        await self.nodes_manager.aclose("startup_nodes")
                        raise
        return self

    async def aclose(self) -> None:
        """Close all connections & client if initialized."""
        if not self._initialize:
            if not self._lock:
                self._lock = asyncio.Lock()
            async with self._lock:
                if not self._initialize:
                    self._initialize = True
                    await self.nodes_manager.aclose()
                    await self.nodes_manager.aclose("startup_nodes")

    @deprecated_function(version="5.0.0", reason="Use aclose() instead", name="close")
    async def close(self) -> None:
        """alias for aclose() for backwards compatibility"""
        await self.aclose()

    async def __aenter__(self) -> "RedisCluster":
        """
        Async context manager entry. Increments a usage counter so that the
        connection pool is only closed (via aclose()) when no context is using
        the client.
        """
        await self._increment_usage()
        try:
            # Initialize the client (i.e. establish connection, etc.)
            return await self.initialize()
        except Exception:
            # If initialization fails, decrement the counter to keep it in sync
            await self._decrement_usage()
            raise

    async def _increment_usage(self) -> int:
        """
        Helper coroutine to increment the usage counter while holding the lock.
        Returns the new value of the usage counter.
        """
        async with self._usage_lock:
            self._usage_counter += 1
            return self._usage_counter

    async def _decrement_usage(self) -> int:
        """
        Helper coroutine to decrement the usage counter while holding the lock.
        Returns the new value of the usage counter.
        """
        async with self._usage_lock:
            self._usage_counter -= 1
            return self._usage_counter

    async def __aexit__(self, exc_type, exc_value, traceback):
        """
        Async context manager exit. Decrements a usage counter. If this is the
        last exit (counter becomes zero), the client closes its connection pool.
        """
        current_usage = await asyncio.shield(self._decrement_usage())
        if current_usage == 0:
            # This was the last active context, so disconnect the pool.
            await asyncio.shield(self.aclose())

    def __await__(self) -> Generator[Any, None, "RedisCluster"]:
        return self.initialize().__await__()

    _DEL_MESSAGE = "Unclosed RedisCluster client"

    def __del__(
        self,
        _warn: Any = warnings.warn,
        _grl: Any = asyncio.get_running_loop,
    ) -> None:
        if hasattr(self, "_initialize") and not self._initialize:
            _warn(f"{self._DEL_MESSAGE} {self!r}", ResourceWarning, source=self)
            try:
                context = {"client": self, "message": self._DEL_MESSAGE}
                _grl().call_exception_handler(context)
            except RuntimeError:
                pass

    async def on_connect(self, connection: Connection) -> None:
        await connection.on_connect()

        # Sending READONLY command to server to configure connection as
        # readonly. Since each cluster node may change its server type due
        # to a failover, we should establish a READONLY connection
        # regardless of the server type. If this is a primary connection,
        # READONLY would not affect executing write commands.
        await connection.send_command("READONLY")
        if str_if_bytes(await connection.read_response()) != "OK":
            raise ConnectionError("READONLY command failed")

    def get_nodes(self) -> List["ClusterNode"]:
        """Get all nodes of the cluster."""
        return list(self.nodes_manager.nodes_cache.values())

    def get_primaries(self) -> List["ClusterNode"]:
        """Get the primary nodes of the cluster."""
        return self.nodes_manager.get_nodes_by_server_type(PRIMARY)

    def get_replicas(self) -> List["ClusterNode"]:
        """Get the replica nodes of the cluster."""
        return self.nodes_manager.get_nodes_by_server_type(REPLICA)

    def get_random_node(self) -> "ClusterNode":
        """Get a random node of the cluster."""
        return random.choice(list(self.nodes_manager.nodes_cache.values()))

    def get_default_node(self) -> "ClusterNode":
        """Get the default node of the client."""
        return self.nodes_manager.default_node

    def set_default_node(self, node: "ClusterNode") -> None:
        """
        Set the default node of the client.

        :raises DataError: if None is passed or node does not exist in cluster.
        """
        if not node or not self.get_node(node_name=node.name):
            raise DataError("The requested node does not exist in the cluster.")

        self.nodes_manager.default_node = node

    def get_node(
        self,
        host: Optional[str] = None,
        port: Optional[int] = None,
        node_name: Optional[str] = None,
    ) -> Optional["ClusterNode"]:
        """Get node by (host, port) or node_name."""
        return self.nodes_manager.get_node(host, port, node_name)

    def get_node_from_key(
        self, key: str, replica: bool = False
    ) -> Optional["ClusterNode"]:
        """
        Get the cluster node corresponding to the provided key.

        :param key:
        :param replica:
            | Indicates if a replica should be returned
            |
              None will returned if no replica holds this key

        :raises SlotNotCoveredError: if the key is not covered by any slot.
        """
        slot = self.keyslot(key)
        slot_cache = self.nodes_manager.slots_cache.get(slot)
        if not slot_cache:
            raise SlotNotCoveredError(f'Slot "{slot}" is not covered by the cluster.')

        if replica:
            if len(self.nodes_manager.slots_cache[slot]) < 2:
                return None
            node_idx = 1
        else:
            node_idx = 0

        return slot_cache[node_idx]

    def get_random_primary_or_all_nodes(self, command_name):
        """
        Returns random primary or all nodes depends on READONLY mode.
        """
        if self.read_from_replicas and command_name in READ_COMMANDS:
            return self.get_random_node()

        return self.get_random_primary_node()

    def get_random_primary_node(self) -> "ClusterNode":
        """
        Returns a random primary node
        """
        return random.choice(self.get_primaries())

    async def get_nodes_from_slot(self, command: str, *args):
        """
        Returns a list of nodes that hold the specified keys' slots.
        """
        # get the node that holds the key's slot
        return [
            self.nodes_manager.get_node_from_slot(
                await self._determine_slot(command, *args),
                self.read_from_replicas and command in READ_COMMANDS,
                self.load_balancing_strategy if command in READ_COMMANDS else None,
            )
        ]

    def get_special_nodes(self) -> Optional[list["ClusterNode"]]:
        """
        Returns a list of nodes for commands with a special policy.
        """
        if not self._aggregate_nodes:
            raise RedisClusterException(
                "Cannot execute FT.CURSOR commands without FT.AGGREGATE"
            )

        return self._aggregate_nodes

    def keyslot(self, key: EncodableT) -> int:
        """
        Find the keyslot for a given key.

        See: https://redis.io/docs/manual/scaling/#redis-cluster-data-sharding
        """
        return key_slot(self.encoder.encode(key))

    def get_encoder(self) -> Encoder:
        """Get the encoder object of the client."""
        return self.encoder

    def get_connection_kwargs(self) -> Dict[str, Optional[Any]]:
        """Get the kwargs passed to :class:`~redis.asyncio.connection.Connection`."""
        return self.connection_kwargs

    def set_retry(self, retry: Retry) -> None:
        self.retry = retry

    def set_response_callback(self, command: str, callback: ResponseCallbackT) -> None:
        """Set a custom response callback."""
        self.response_callbacks[command] = callback

    async def _determine_nodes(
        self,
        command: str,
        *args: Any,
        request_policy: RequestPolicy,
        node_flag: Optional[str] = None,
    ) -> List["ClusterNode"]:
        # Determine which nodes should be executed the command on.
        # Returns a list of target nodes.
        if not node_flag:
            # get the nodes group for this command if it was predefined
            node_flag = self.command_flags.get(command)

        if node_flag in self._command_flags_mapping:
            request_policy = self._command_flags_mapping[node_flag]

        policy_callback = self._policies_callback_mapping[request_policy]

        if request_policy == RequestPolicy.DEFAULT_KEYED:
            nodes = await policy_callback(command, *args)
        elif request_policy == RequestPolicy.DEFAULT_KEYLESS:
            nodes = policy_callback(command)
        else:
            nodes = policy_callback()

        if command.lower() == "ft.aggregate":
            self._aggregate_nodes = nodes

        return nodes

    async def _determine_slot(self, command: str, *args: Any) -> int:
        if self.command_flags.get(command) == SLOT_ID:
            # The command contains the slot ID
            return int(args[0])

        # Get the keys in the command

        # EVAL and EVALSHA are common enough that it's wasteful to go to the
        # redis server to parse the keys. Besides, there is a bug in redis<7.0
        # where `self._get_command_keys()` fails anyway. So, we special case
        # EVAL/EVALSHA.
        # - issue: https://github.com/redis/redis/issues/9493
        # - fix: https://github.com/redis/redis/pull/9733
        if command.upper() in ("EVAL", "EVALSHA"):
            # command syntax: EVAL "script body" num_keys ...
            if len(args) < 2:
                raise RedisClusterException(
                    f"Invalid args in command: {command, *args}"
                )
            keys = args[2 : 2 + int(args[1])]
            # if there are 0 keys, that means the script can be run on any node
            # so we can just return a random slot
            if not keys:
                return random.randrange(0, REDIS_CLUSTER_HASH_SLOTS)
        else:
            keys = await self.commands_parser.get_keys(command, *args)
            if not keys:
                # FCALL can call a function with 0 keys, that means the function
                #  can be run on any node so we can just return a random slot
                if command.upper() in ("FCALL", "FCALL_RO"):
                    return random.randrange(0, REDIS_CLUSTER_HASH_SLOTS)
                raise RedisClusterException(
                    "No way to dispatch this command to Redis Cluster. "
                    "Missing key.\nYou can execute the command by specifying "
                    f"target nodes.\nCommand: {args}"
                )

        # single key command
        if len(keys) == 1:
            return self.keyslot(keys[0])

        # multi-key command; we need to make sure all keys are mapped to
        # the same slot
        slots = {self.keyslot(key) for key in keys}
        if len(slots) != 1:
            raise RedisClusterException(
                f"{command} - all keys must map to the same key slot"
            )

        return slots.pop()

    def _is_node_flag(self, target_nodes: Any) -> bool:
        return isinstance(target_nodes, str) and target_nodes in self.node_flags

    def _parse_target_nodes(self, target_nodes: Any) -> List["ClusterNode"]:
        if isinstance(target_nodes, list):
            nodes = target_nodes
        elif isinstance(target_nodes, ClusterNode):
            # Supports passing a single ClusterNode as a variable
            nodes = [target_nodes]
        elif isinstance(target_nodes, dict):
            # Supports dictionaries of the format {node_name: node}.
            # It enables to execute commands with multi nodes as follows:
            # rc.cluster_save_config(rc.get_primaries())
            nodes = list(target_nodes.values())
        else:
            raise TypeError(
                "target_nodes type can be one of the following: "
                "node_flag (PRIMARIES, REPLICAS, RANDOM, ALL_NODES),"
                "ClusterNode, list<ClusterNode>, or dict<any, ClusterNode>. "
                f"The passed type is {type(target_nodes)}"
            )
        return nodes

    async def _record_error_metric(
        self,
        error: Exception,
        connection: Union[Connection, "ClusterNode"],
        is_internal: bool = True,
        retry_attempts: Optional[int] = None,
    ):
        """
        Records error count metric directly.
        Accepts either a Connection or ClusterNode object.
        """
        await record_error_count(
            server_address=connection.host,
            server_port=connection.port,
            network_peer_address=connection.host,
            network_peer_port=connection.port,
            error_type=error,
            retry_attempts=retry_attempts if retry_attempts is not None else 0,
            is_internal=is_internal,
        )

    async def _record_command_metric(
        self,
        command_name: str,
        duration_seconds: float,
        connection: Union[Connection, "ClusterNode"],
        error: Optional[Exception] = None,
    ):
        """
        Records operation duration metric directly.
        Accepts either a Connection or ClusterNode object.
        """
        # Connection has db attribute, ClusterNode has connection_kwargs
        if hasattr(connection, "db"):
            db = connection.db
        else:
            db = connection.connection_kwargs.get("db", 0)
        await record_operation_duration(
            command_name=command_name,
            duration_seconds=duration_seconds,
            server_address=connection.host,
            server_port=connection.port,
            db_namespace=str(db) if db is not None else None,
            error=error,
        )

    async def execute_command(self, *args: EncodableT, **kwargs: Any) -> Any:
        """
        Execute a raw command on the appropriate cluster node or target_nodes.

        It will retry the command as specified by the retries property of
        the :attr:`retry` & then raise an exception.

        :param args:
            | Raw command args
        :param kwargs:

            - target_nodes: :attr:`NODE_FLAGS` or :class:`~.ClusterNode`
              or List[:class:`~.ClusterNode`] or Dict[Any, :class:`~.ClusterNode`]
            - Rest of the kwargs are passed to the Redis connection

        :raises RedisClusterException: if target_nodes is not provided & the command
            can't be mapped to a slot
        """
        command = args[0]
        target_nodes = []
        target_nodes_specified = False
        retry_attempts = self.retry.get_retries()

        passed_targets = kwargs.pop("target_nodes", None)
        if passed_targets and not self._is_node_flag(passed_targets):
            target_nodes = self._parse_target_nodes(passed_targets)
            target_nodes_specified = True
            retry_attempts = 0

        command_policies = await self._policy_resolver.resolve(args[0].lower())

        if not command_policies and not target_nodes_specified:
            command_flag = self.command_flags.get(command)
            if not command_flag:
                # Fallback to default policy
                if not self.get_default_node():
                    slot = None
                else:
                    slot = await self._determine_slot(*args)
                if slot is None:
                    command_policies = CommandPolicies()
                else:
                    command_policies = CommandPolicies(
                        request_policy=RequestPolicy.DEFAULT_KEYED,
                        response_policy=ResponsePolicy.DEFAULT_KEYED,
                    )
            else:
                if command_flag in self._command_flags_mapping:
                    command_policies = CommandPolicies(
                        request_policy=self._command_flags_mapping[command_flag]
                    )
                else:
                    command_policies = CommandPolicies()
        elif not command_policies and target_nodes_specified:
            command_policies = CommandPolicies()

        # Add one for the first execution
        execute_attempts = 1 + retry_attempts
        failure_count = 0

        # Start timing for observability
        start_time = time.monotonic()
        last_failed_node_name = None

        for _ in range(execute_attempts):
            if self._initialize:
                await self.initialize(last_failed_node_name=last_failed_node_name)
                last_failed_node_name = None
                if (
                    len(target_nodes) == 1
                    and target_nodes[0] == self.get_default_node()
                ):
                    # Replace the default cluster node
                    self.replace_default_node()
            try:
                if not target_nodes_specified:
                    # Determine the nodes to execute the command on
                    target_nodes = await self._determine_nodes(
                        *args,
                        request_policy=command_policies.request_policy,
                        node_flag=passed_targets,
                    )
                    if not target_nodes:
                        raise RedisClusterException(
                            f"No targets were found to execute {args} command on"
                        )

                if len(target_nodes) == 1:
                    # Return the processed result
                    ret = await self._execute_command(target_nodes[0], *args, **kwargs)
                    if command in self.result_callbacks:
                        ret = self.result_callbacks[command](
                            command, {target_nodes[0].name: ret}, **kwargs
                        )
                    return self._policies_callback_mapping[
                        command_policies.response_policy
                    ](ret)
                else:
                    keys = [node.name for node in target_nodes]
                    values = await asyncio.gather(
                        *(
                            asyncio.create_task(
                                self._execute_command(node, *args, **kwargs)
                            )
                            for node in target_nodes
                        )
                    )
                    if command in self.result_callbacks:
                        return self.result_callbacks[command](
                            command, dict(zip(keys, values)), **kwargs
                        )
                    return self._policies_callback_mapping[
                        command_policies.response_policy
                    ](dict(zip(keys, values)))
            except Exception as e:
                if retry_attempts > 0 and type(e) in self.__class__.ERRORS_ALLOW_RETRY:
                    # The nodes and slots cache were should be reinitialized.
                    # Try again with the new cluster setup.
                    retry_attempts -= 1
                    failure_count += 1
                    last_failed_node_name = getattr(e, "last_failed_node_name", None)

                    if hasattr(e, "connection"):
                        await self._record_command_metric(
                            command_name=command,
                            duration_seconds=time.monotonic() - start_time,
                            connection=e.connection,
                            error=e,
                        )
                        await self._record_error_metric(
                            error=e,
                            connection=e.connection,
                            retry_attempts=failure_count,
                        )
                    continue
                else:
                    # raise the exception
                    if hasattr(e, "connection"):
                        await self._record_error_metric(
                            error=e,
                            connection=e.connection,
                            retry_attempts=failure_count,
                            is_internal=False,
                        )
                    raise e

    async def _execute_command(
        self, target_node: "ClusterNode", *args: Union[KeyT, EncodableT], **kwargs: Any
    ) -> Any:
        asking = moved = False
        redirect_addr = None
        ttl = self.RedisClusterRequestTTL
        command = args[0]
        start_time = time.monotonic()

        while ttl > 0:
            ttl -= 1
            try:
                if asking:
                    target_node = self.get_node(node_name=redirect_addr)
                    await target_node.execute_command("ASKING")
                    asking = False
                elif moved:
                    # MOVED occurred and the slots cache was updated,
                    # refresh the target node
                    slot = await self._determine_slot(*args)
                    target_node = self.nodes_manager.get_node_from_slot(
                        slot,
                        self.read_from_replicas and args[0] in READ_COMMANDS,
                        self.load_balancing_strategy
                        if args[0] in READ_COMMANDS
                        else None,
                    )
                    moved = False

                response = await target_node.execute_command(*args, **kwargs)
                await self._record_command_metric(
                    command_name=command,
                    duration_seconds=time.monotonic() - start_time,
                    connection=target_node,
                )
                return response
            except BusyLoadingError as e:
                e.connection = target_node
                await self._record_command_metric(
                    command_name=command,
                    duration_seconds=time.monotonic() - start_time,
                    connection=target_node,
                    error=e,
                )
                raise
            except MaxConnectionsError as e:
                # MaxConnectionsError indicates client-side resource exhaustion
                # (too many connections in the pool), not a node failure.
                # Don't treat this as a node failure - just re-raise the error
                # without reinitializing the cluster.
                e.connection = target_node
                await self._record_command_metric(
                    command_name=command,
                    duration_seconds=time.monotonic() - start_time,
                    connection=target_node,
                    error=e,
                )
                raise
            except (ConnectionError, TimeoutError) as e:
                # Connection retries are being handled in the node's
                # Retry object.
                # Mark active connections for reconnect and disconnect free ones
                # This handles connection state (like READONLY) that may be stale
                target_node.update_active_connections_for_reconnect()
                await target_node.disconnect_free_connections()

                # Move the failed node to the end of the cached nodes list
                # so it's tried last during reinitialization
                self.nodes_manager.move_node_to_end_of_cached_nodes(target_node.name)
                e.last_failed_node_name = target_node.name

                # Signal that reinitialization is needed
                # The retry loop will handle initialize() AND replace_default_node()
                self._initialize = True
                e.connection = target_node
                await self._record_command_metric(
                    command_name=command,
                    duration_seconds=time.monotonic() - start_time,
                    connection=target_node,
                    error=e,
                )
                raise
            except (ClusterDownError, SlotNotCoveredError) as e:
                # ClusterDownError can occur during a failover and to get
                # self-healed, we will try to reinitialize the cluster layout
                # and retry executing the command

                # SlotNotCoveredError can occur when the cluster is not fully
                # initialized or can be temporary issue.
                # We will try to reinitialize the cluster topology
                # and retry executing the command

                await self.aclose()
                await asyncio.sleep(0.25)
                e.connection = target_node
                await self._record_command_metric(
                    command_name=command,
                    duration_seconds=time.monotonic() - start_time,
                    connection=target_node,
                    error=e,
                )
                raise
            except MovedError as e:
                # First, we will try to patch the slots/nodes cache with the
                # redirected node output and try again. If MovedError exceeds
                # 'reinitialize_steps' number of times, we will force
                # reinitializing the tables, and then try again.
                # 'reinitialize_steps' counter will increase faster when
                # the same client object is shared between multiple threads. To
                # reduce the frequency you can set this variable in the
                # RedisCluster constructor.
                self.reinitialize_counter += 1
                if (
                    self.reinitialize_steps
                    and self.reinitialize_counter % self.reinitialize_steps == 0
                ):
                    await self.aclose()
                    # Reset the counter
                    self.reinitialize_counter = 0
                else:
                    await self.nodes_manager.move_slot(e)
                moved = True
                await self._record_command_metric(
                    command_name=command,
                    duration_seconds=time.monotonic() - start_time,
                    connection=target_node,
                    error=e,
                )
                await self._record_error_metric(
                    error=e,
                    connection=target_node,
                )
            except AskError as e:
                redirect_addr = get_node_name(host=e.host, port=e.port)
                asking = True
                await self._record_command_metric(
                    command_name=command,
                    duration_seconds=time.monotonic() - start_time,
                    connection=target_node,
                    error=e,
                )
                await self._record_error_metric(
                    error=e,
                    connection=target_node,
                )
            except TryAgainError as e:
                if ttl < self.RedisClusterRequestTTL / 2:
                    await asyncio.sleep(0.05)
                await self._record_command_metric(
                    command_name=command,
                    duration_seconds=time.monotonic() - start_time,
                    connection=target_node,
                    error=e,
                )
                await self._record_error_metric(
                    error=e,
                    connection=target_node,
                )
            except ResponseError as e:
                e.connection = target_node
                await self._record_command_metric(
                    command_name=command,
                    duration_seconds=time.monotonic() - start_time,
                    connection=target_node,
                    error=e,
                )
                raise
            except Exception as e:
                e.connection = target_node
                await self._record_command_metric(
                    command_name=command,
                    duration_seconds=time.monotonic() - start_time,
                    connection=target_node,
                    error=e,
                )
                raise

        e = ClusterError("TTL exhausted.")
        e.connection = target_node
        await self._record_command_metric(
            command_name=command,
            duration_seconds=time.monotonic() - start_time,
            connection=target_node,
            error=e,
        )
        raise e

    def pipeline(
        self, transaction: Optional[Any] = None, shard_hint: Optional[Any] = None
    ) -> "ClusterPipeline":
        """
        Create & return a new :class:`~.ClusterPipeline` object.

        Cluster implementation of pipeline does not support transaction or shard_hint.

        :raises RedisClusterException: if transaction or shard_hint are truthy values
        """
        if shard_hint:
            raise RedisClusterException("shard_hint is deprecated in cluster mode")

        return ClusterPipeline(self, transaction)

    def pubsub(
        self,
        node: Optional["ClusterNode"] = None,
        host: Optional[str] = None,
        port: Optional[int] = None,
        **kwargs: Any,
    ) -> "ClusterPubSub":
        """
        Create and return a ClusterPubSub instance.

        Allows passing a ClusterNode, or host&port, to get a pubsub instance
        connected to the specified node

        :param node: ClusterNode to connect to
        :param host: Host of the node to connect to
        :param port: Port of the node to connect to
        :param kwargs: Additional keyword arguments
        :return: ClusterPubSub instance
        """
        return ClusterPubSub(self, node=node, host=host, port=port, **kwargs)

    def keyspace_notifications(
        self,
        key_prefix: Union[str, bytes, None] = None,
        ignore_subscribe_messages: bool = True,
    ) -> "AsyncClusterKeyspaceNotifications":
        """
        Return an
        :class:`~redis.asyncio.keyspace_notifications.AsyncClusterKeyspaceNotifications`
        object for subscribing to keyspace and keyevent notifications across
        all primary nodes in the cluster.

        Note: Keyspace notifications must be enabled on all Redis cluster nodes
        via the ``notify-keyspace-events`` configuration option.

        Args:
            key_prefix: Optional prefix to filter and strip from keys in
                        notifications.
            ignore_subscribe_messages: If True, subscribe/unsubscribe
                                      confirmations are not returned by
                                      get_message/listen.
        """
        from redis.asyncio.keyspace_notifications import (
            AsyncClusterKeyspaceNotifications,
        )

        return AsyncClusterKeyspaceNotifications(
            self,
            key_prefix=key_prefix,
            ignore_subscribe_messages=ignore_subscribe_messages,
        )

    def lock(
        self,
        name: KeyT,
        timeout: Optional[float] = None,
        sleep: float = 0.1,
        blocking: bool = True,
        blocking_timeout: Optional[float] = None,
        lock_class: Optional[Type[Lock]] = None,
        thread_local: bool = True,
        raise_on_release_error: bool = True,
    ) -> Lock:
        """
        Return a new Lock object using key ``name`` that mimics
        the behavior of threading.Lock.

        If specified, ``timeout`` indicates a maximum life for the lock.
        By default, it will remain locked until release() is called.

        ``sleep`` indicates the amount of time to sleep per loop iteration
        when the lock is in blocking mode and another client is currently
        holding the lock.

        ``blocking`` indicates whether calling ``acquire`` should block until
        the lock has been acquired or to fail immediately, causing ``acquire``
        to return False and the lock not being acquired. Defaults to True.
        Note this value can be overridden by passing a ``blocking``
        argument to ``acquire``.

        ``blocking_timeout`` indicates the maximum amount of time in seconds to
        spend trying to acquire the lock. A value of ``None`` indicates
        continue trying forever. ``blocking_timeout`` can be specified as a
        float or integer, both representing the number of seconds to wait.

        ``lock_class`` forces the specified lock implementation. Note that as
        of redis-py 3.0, the only lock class we implement is ``Lock`` (which is
        a Lua-based lock). So, it's unlikely you'll need this parameter, unless
        you have created your own custom lock class.

        ``thread_local`` indicates whether the lock token is placed in
        thread-local storage. By default, the token is placed in thread local
        storage so that a thread only sees its token, not a token set by
        another thread. Consider the following timeline:

            time: 0, thread-1 acquires `my-lock`, with a timeout of 5 seconds.
                     thread-1 sets the token to "abc"
            time: 1, thread-2 blocks trying to acquire `my-lock` using the
                     Lock instance.
            time: 5, thread-1 has not yet completed. redis expires the lock
                     key.
            time: 5, thread-2 acquired `my-lock` now that it's available.
                     thread-2 sets the token to "xyz"
            time: 6, thread-1 finishes its work and calls release(). if the
                     token is *not* stored in thread local storage, then
                     thread-1 would see the token value as "xyz" and would be
                     able to successfully release the thread-2's lock.

        ``raise_on_release_error`` indicates whether to raise an exception when
        the lock is no longer owned when exiting the context manager. By default,
        this is True, meaning an exception will be raised. If False, the warning
        will be logged and the exception will be suppressed.

        In some use cases it's necessary to disable thread local storage. For
        example, if you have code where one thread acquires a lock and passes
        that lock instance to a worker thread to release later. If thread
        local storage isn't disabled in this case, the worker thread won't see
        the token set by the thread that acquired the lock. Our assumption
        is that these cases aren't common and as such default to using
        thread local storage."""
        if lock_class is None:
            lock_class = Lock
        return lock_class(
            self,
            name,
            timeout=timeout,
            sleep=sleep,
            blocking=blocking,
            blocking_timeout=blocking_timeout,
            thread_local=thread_local,
            raise_on_release_error=raise_on_release_error,
        )

    async def transaction(
        self, func: Coroutine[None, "ClusterPipeline", Any], *watches, **kwargs
    ):
        """
        Convenience method for executing the callable `func` as a transaction
        while watching all keys specified in `watches`. The 'func' callable
        should expect a single argument which is a Pipeline object.
        """
        shard_hint = kwargs.pop("shard_hint", None)
        value_from_callable = kwargs.pop("value_from_callable", False)
        watch_delay = kwargs.pop("watch_delay", None)
        async with self.pipeline(True, shard_hint) as pipe:
            while True:
                try:
                    if watches:
                        await pipe.watch(*watches)
                    func_value = await func(pipe)
                    exec_value = await pipe.execute()
                    return func_value if value_from_callable else exec_value
                except WatchError:
                    if watch_delay is not None and watch_delay > 0:
                        time.sleep(watch_delay)
                    continue


class ClusterNode:
    """
    Create a new ClusterNode.

    Each ClusterNode manages multiple :class:`~redis.asyncio.connection.Connection`
    objects for the (host, port).
    """

    __slots__ = (
        "_background_tasks",
        "_connections",
        "_free",
        "_lock",
        "_event_dispatcher",
        "connection_class",
        "connection_kwargs",
        "host",
        "max_connections",
        "name",
        "port",
        "response_callbacks",
        "server_type",
    )

    def __init__(
        self,
        host: str,
        port: Union[str, int],
        server_type: Optional[str] = None,
        *,
        max_connections: int = 100,
        connection_class: Type[Connection] = Connection,
        **connection_kwargs: Any,
    ) -> None:
        if host == "localhost":
            host = socket.gethostbyname(host)

        connection_kwargs["host"] = host
        connection_kwargs["port"] = port
        self.host = host
        self.port = port
        self.name = get_node_name(host, port)
        self.server_type = server_type

        self.max_connections = max_connections
        self.connection_class = connection_class
        self.connection_kwargs = connection_kwargs
        self.response_callbacks = connection_kwargs.pop("response_callbacks", {})

        self._connections: List[Connection] = []
        self._free: Deque[Connection] = collections.deque(maxlen=self.max_connections)
        self._background_tasks: Set[asyncio.Task] = set()
        self._event_dispatcher = self.connection_kwargs.get("event_dispatcher", None)
        if self._event_dispatcher is None:
            self._event_dispatcher = EventDispatcher()

    def __repr__(self) -> str:
        return (
            f"[host={self.host}, port={self.port}, "
            f"name={self.name}, server_type={self.server_type}]"
        )

    def __eq__(self, obj: Any) -> bool:
        return isinstance(obj, ClusterNode) and obj.name == self.name

    def __hash__(self) -> int:
        return hash(self.name)

    _DEL_MESSAGE = "Unclosed ClusterNode object"

    def __del__(
        self,
        _warn: Any = warnings.warn,
        _grl: Any = asyncio.get_running_loop,
    ) -> None:
        for connection in self._connections:
            if connection.is_connected:
                _warn(f"{self._DEL_MESSAGE} {self!r}", ResourceWarning, source=self)

                try:
                    context = {"client": self, "message": self._DEL_MESSAGE}
                    _grl().call_exception_handler(context)
                except RuntimeError:
                    pass
                break

    async def disconnect(self) -> None:
        ret = await asyncio.gather(
            *(
                asyncio.create_task(connection.disconnect())
                for connection in self._connections
            ),
            return_exceptions=True,
        )
        exc = next((res for res in ret if isinstance(res, Exception)), None)
        if exc:
            raise exc

    def acquire_connection(self) -> Connection:
        try:
            return self._free.popleft()
        except IndexError:
            if len(self._connections) < self.max_connections:
                # We are configuring the connection pool not to retry
                # connections on lower level clients to avoid retrying
                # connections to nodes that are not reachable
                # and to avoid blocking the connection pool.
                # The only error that will have some handling in the lower
                # level clients is ConnectionError which will trigger disconnection
                # of the socket.
                # The retries will be handled on cluster client level
                # where we will have proper handling of the cluster topology
                retry = Retry(
                    backoff=NoBackoff(),
                    retries=0,
                    supported_errors=(ConnectionError,),
                )
                connection_kwargs = self.connection_kwargs.copy()
                connection_kwargs["retry"] = retry
                connection = self.connection_class(**connection_kwargs)
                self._connections.append(connection)
                return connection

            raise MaxConnectionsError()

    async def disconnect_if_needed(self, connection: Connection) -> None:
        """
        Disconnect a connection if it's marked for reconnect.
        This implements lazy disconnection to avoid race conditions.
        The connection will auto-reconnect on next use.
        """
        if connection.should_reconnect():
            await connection.disconnect()

    def release(self, connection: Connection) -> None:
        """
        Release connection back to free queue.
        If the connection is marked for reconnect, disconnect it before
        returning it to the free queue.
        """
        if connection.should_reconnect():
            task = asyncio.create_task(self._disconnect_and_release(connection))
            self._background_tasks.add(task)
            task.add_done_callback(self._background_tasks.discard)
            return
        self._free.append(connection)

    async def _disconnect_and_release(self, connection: Connection) -> None:
        try:
            await connection.disconnect()
        except Exception as exc:
            logger.debug(
                "disconnecting released cluster connection failed: %r",
                exc,
                exc_info=True,
            )
            try:
                self._connections.remove(connection)
            except ValueError:
                pass
            return

        self._free.append(connection)

    def get_encoder(self) -> Encoder:
        """Return an :class:`Encoder` derived from this node's connection kwargs."""
        kwargs = self.connection_kwargs
        encoder_class = kwargs.get("encoder_class", Encoder)
        return encoder_class(
            encoding=kwargs.get("encoding", "utf-8"),
            encoding_errors=kwargs.get("encoding_errors", "strict"),
            decode_responses=kwargs.get("decode_responses", False),
        )

    def update_active_connections_for_reconnect(self) -> None:
        """
        Mark all in-use (active) connections for reconnect.
        In-use connections are those in _connections but not currently in _free.
        They will be disconnected after their current operation completes.
        """
        free_set = set(self._free)
        for connection in self._connections:
            if connection not in free_set:
                connection.mark_for_reconnect()

    async def disconnect_free_connections(self) -> None:
        """
        Disconnect all free/idle connections in the pool.
        This is useful after topology changes (e.g., failover) to clear
        stale connection state like READONLY mode.
        The connections remain in the pool and will reconnect on next use.
        """
        if self._free:
            # Take a snapshot to avoid issues if _free changes during await
            await asyncio.gather(
                *(connection.disconnect() for connection in tuple(self._free)),
                return_exceptions=True,
            )

    async def parse_response(
        self, connection: Connection, command: str, **kwargs: Any
    ) -> Any:
        try:
            if NEVER_DECODE in kwargs:
                response = await connection.read_response(disable_decoding=True)
                kwargs.pop(NEVER_DECODE)
            else:
                response = await connection.read_response()
        except ResponseError:
            if EMPTY_RESPONSE in kwargs:
                return kwargs[EMPTY_RESPONSE]
            raise

        if EMPTY_RESPONSE in kwargs:
            kwargs.pop(EMPTY_RESPONSE)

        # Remove keys entry, it needs only for cache.
        kwargs.pop("keys", None)

        # Return response
        if command in self.response_callbacks:
            return self.response_callbacks[command](response, **kwargs)

        return response

    async def execute_command(self, *args: Any, **kwargs: Any) -> Any:
        # Acquire connection
        connection = self.acquire_connection()
        try:
            # Handle lazy disconnect for connections marked for reconnect
            await self.disconnect_if_needed(connection)

            # Execute command
            await connection.send_packed_command(connection.pack_command(*args))

            # Read response
            return await self.parse_response(connection, args[0], **kwargs)
        finally:
            try:
                await self.disconnect_if_needed(connection)
            finally:
                # Release connection
                self.release(connection)

    async def execute_pipeline(self, commands: List["PipelineCommand"]) -> bool:
        # Acquire connection
        connection = self.acquire_connection()
        try:
            # Handle lazy disconnect for connections marked for reconnect
            await self.disconnect_if_needed(connection)

            # Execute command
            await connection.send_packed_command(
                connection.pack_commands(cmd.args for cmd in commands)
            )

            # Read responses
            ret = False
            for cmd in commands:
                try:
                    cmd.result = await self.parse_response(
                        connection, cmd.args[0], **cmd.kwargs
                    )
                except Exception as e:
                    cmd.result = e
                    ret = True

            return ret
        finally:
            try:
                await self.disconnect_if_needed(connection)
            finally:
                # Release connection
                self.release(connection)

    async def re_auth_callback(self, token: TokenInterface):
        tmp_queue = collections.deque()
        while self._free:
            conn = self._free.popleft()
            await conn.retry.call_with_retry(
                lambda: conn.send_command(
                    "AUTH", token.try_get("oid"), token.get_value()
                ),
                lambda error: self._mock(error),
            )
            await conn.retry.call_with_retry(
                lambda: conn.read_response(), lambda error: self._mock(error)
            )
            tmp_queue.append(conn)

        while tmp_queue:
            conn = tmp_queue.popleft()
            self._free.append(conn)

    async def _mock(self, error: RedisError):
        """
        Dummy functions, needs to be passed as error callback to retry object.
        :param error:
        :return:
        """
        pass


class NodesManager:
    __slots__ = (
        "_dynamic_startup_nodes",
        "_event_dispatcher",
        "_background_tasks",
        "connection_kwargs",
        "default_node",
        "nodes_cache",
        "_epoch",
        "read_load_balancer",
        "_initialize_lock",
        "require_full_coverage",
        "slots_cache",
        "startup_nodes",
        "address_remap",
    )

    def __init__(
        self,
        startup_nodes: List["ClusterNode"],
        require_full_coverage: bool,
        connection_kwargs: Dict[str, Any],
        dynamic_startup_nodes: bool = True,
        address_remap: Optional[Callable[[Tuple[str, int]], Tuple[str, int]]] = None,
        event_dispatcher: Optional[EventDispatcher] = None,
    ) -> None:
        self.startup_nodes = {node.name: node for node in startup_nodes}
        self.require_full_coverage = require_full_coverage
        self.connection_kwargs = connection_kwargs
        self.address_remap = address_remap

        self.default_node: "ClusterNode" = None
        self.nodes_cache: Dict[str, "ClusterNode"] = {}
        self.slots_cache: Dict[int, List["ClusterNode"]] = {}
        self._epoch: int = 0
        self.read_load_balancer = LoadBalancer()
        self._initialize_lock: asyncio.Lock = asyncio.Lock()

        self._background_tasks: Set[asyncio.Task] = set()
        self._dynamic_startup_nodes: bool = dynamic_startup_nodes
        if event_dispatcher is None:
            self._event_dispatcher = EventDispatcher()
        else:
            self._event_dispatcher = event_dispatcher

    def get_node(
        self,
        host: Optional[str] = None,
        port: Optional[int] = None,
        node_name: Optional[str] = None,
    ) -> Optional["ClusterNode"]:
        if host and port:
            # the user passed host and port
            if host == "localhost":
                host = socket.gethostbyname(host)
            return self.nodes_cache.get(get_node_name(host=host, port=port))
        elif node_name:
            return self.nodes_cache.get(node_name)
        else:
            raise DataError(
                "get_node requires one of the following: 1. node name 2. host and port"
            )

    def set_nodes(
        self,
        old: Dict[str, "ClusterNode"],
        new: Dict[str, "ClusterNode"],
        remove_old: bool = False,
    ) -> None:
        if remove_old:
            for name in list(old.keys()):
                if name not in new:
                    # Node is removed from cache before disconnect starts,
                    # so it won't be found in lookups during disconnect
                    # Mark active connections so in-flight commands can
                    # finish, then disconnect them when their current
                    # operation completes. Free connections can be
                    # disconnected immediately.
                    removed_node = old.pop(name)
                    removed_node.update_active_connections_for_reconnect()
                    task = asyncio.create_task(
                        removed_node.disconnect_free_connections()
                    )
                    self._background_tasks.add(task)
                    task.add_done_callback(self._background_tasks.discard)

        for name, node in new.items():
            if name in old:
                # Preserve the existing node but mark connections for reconnect.
                # This method is sync so we can't call disconnect_free_connections()
                # which is async. Instead, we mark free connections for reconnect
                # and they will be lazily disconnected when acquired via
                # disconnect_if_needed() to avoid race conditions.
                # TODO: Make this method async in the next major release to allow
                # immediate disconnection of free connections.
                existing_node = old[name]
                existing_node.server_type = node.server_type
                existing_node.update_active_connections_for_reconnect()
                for conn in existing_node._free:
                    conn.mark_for_reconnect()
                continue
            # New node is detected and should be added to the pool
            old[name] = node

    def move_node_to_end_of_cached_nodes(self, node_name: str) -> None:
        """
        Move a failing node to the end of startup_nodes and nodes_cache so it's
        tried last during reinitialization and when selecting the default node.
        If the node is not in the respective list, nothing is done.
        """
        # Move in startup_nodes
        if node_name in self.startup_nodes and len(self.startup_nodes) > 1:
            node = self.startup_nodes.pop(node_name)
            self.startup_nodes[node_name] = node  # Re-insert at end

        # Move in nodes_cache - this affects get_nodes_by_server_type ordering
        # which is used to select the default_node during initialize()
        if node_name in self.nodes_cache and len(self.nodes_cache) > 1:
            node = self.nodes_cache.pop(node_name)
            self.nodes_cache[node_name] = node  # Re-insert at end

    async def move_slot(self, e: AskError | MovedError):
        node_changed = False
        redirected_node = self.get_node(host=e.host, port=e.port)
        if redirected_node:
            # The node already exists
            if redirected_node.server_type != PRIMARY:
                # Update the node's server type
                redirected_node.server_type = PRIMARY
        else:
            # This is a new node, we will add it to the nodes cache
            redirected_node = ClusterNode(
                e.host, e.port, PRIMARY, **self.connection_kwargs
            )
            self.set_nodes(self.nodes_cache, {redirected_node.name: redirected_node})
        slot_nodes = self.slots_cache[e.slot_id]
        if redirected_node not in slot_nodes:
            # The new slot owner is a new server, or a server from a different
            # shard. We need to remove all current nodes from the slot's list
            # (including replications) and add just the new node.
            self.slots_cache[e.slot_id] = [redirected_node]
            node_changed = True
        elif redirected_node is not slot_nodes[0]:
            # The MOVED error resulted from a failover, and the new slot owner
            # had previously been a replica.
            old_primary = slot_nodes[0]
            # Update the old primary to be a replica and add it to the end of
            # the slot's node list
            old_primary.server_type = REPLICA
            slot_nodes.append(old_primary)
            # Remove the old replica, which is now a primary, from the slot's
            # node list
            slot_nodes.remove(redirected_node)
            # Override the old primary with the new one
            slot_nodes[0] = redirected_node
            if self.default_node == old_primary:
                # Update the default node with the new primary
                self.default_node = redirected_node
            node_changed = True
        # else: circular MOVED to current primary -> no-op
        # Dispatch so listeners can run shard-pubsub reconciliation; skipped on
        # the no-op branch to avoid needless walks under MOVED storms. A
        # listener must not break slots-cache refresh; log and continue so a
        # single buggy listener cannot starve the rest.
        if node_changed:
            try:
                await self._event_dispatcher.dispatch_async(
                    AsyncAfterSlotsCacheRefreshEvent()
                )
            except Exception as exc:
                # Don't shadow the method parameter ``e``: ``except as`` binds
                # the listener exception in the function scope and ``del``s
                # the name on block exit (PEP 3134), which would also wipe
                # out the original AskError/MovedError parameter.
                logger.exception(
                    "listener raised during slots-cache refresh: %s: %s",
                    type(exc).__name__,
                    exc,
                )

    def get_node_from_slot(
        self,
        slot: int,
        read_from_replicas: bool = False,
        load_balancing_strategy=None,
    ) -> "ClusterNode":
        if read_from_replicas is True and load_balancing_strategy is None:
            load_balancing_strategy = LoadBalancingStrategy.ROUND_ROBIN

        try:
            if len(self.slots_cache[slot]) > 1 and load_balancing_strategy:
                # get the server index using the strategy defined in load_balancing_strategy
                primary_name = self.slots_cache[slot][0].name
                node_idx = self.read_load_balancer.get_server_index(
                    primary_name, len(self.slots_cache[slot]), load_balancing_strategy
                )
                return self.slots_cache[slot][node_idx]
            return self.slots_cache[slot][0]
        except (IndexError, TypeError):
            raise SlotNotCoveredError(
                f'Slot "{slot}" not covered by the cluster. '
                f'"require_full_coverage={self.require_full_coverage}"'
            )

    def get_nodes_by_server_type(self, server_type: str) -> List["ClusterNode"]:
        return [
            node
            for node in self.nodes_cache.values()
            if node.server_type == server_type
        ]

    async def initialize(
        self,
        additional_startup_nodes_info: Optional[List[Tuple[str, int]]] = None,
        last_failed_node_name: Optional[str] = None,
    ) -> None:
        self.read_load_balancer.reset()
        tmp_nodes_cache: Dict[str, "ClusterNode"] = {}
        tmp_slots: Dict[int, List["ClusterNode"]] = {}
        disagreements = []
        startup_nodes_reachable = False
        fully_covered = False
        exception = None
        epoch = self._epoch
        if additional_startup_nodes_info is None:
            additional_startup_nodes_info = []

        async with self._initialize_lock:
            if self._epoch != epoch:
                # another initialize call has already reinitialized the
                # nodes since we started waiting for the lock;
                # we don't need to do it again.
                return

            # Copy to a list to prevent RuntimeError if self.startup_nodes
            # is modified during iteration, then shuffle the iteration order.
            startup_nodes = list(self.startup_nodes.values())
            deferred_failed_nodes = []
            if last_failed_node_name is not None:
                for index, node in enumerate(startup_nodes):
                    if node.name == last_failed_node_name:
                        deferred_failed_nodes.append(startup_nodes.pop(index))
                        break
            if len(startup_nodes) > 1:
                # Vary which startup node is queried first so clients do not
                # all reinitialize through the same node.
                random.shuffle(startup_nodes)
            additional_startup_nodes = [
                ClusterNode(host, port, **self.connection_kwargs)
                for host, port in additional_startup_nodes_info
            ]
            if last_failed_node_name is not None:
                for index, node in enumerate(additional_startup_nodes):
                    if node.name == last_failed_node_name:
                        if not deferred_failed_nodes:
                            deferred_failed_nodes.append(node)
                        additional_startup_nodes.pop(index)
                        break
            for startup_node in chain(
                startup_nodes,
                additional_startup_nodes,
                deferred_failed_nodes,
            ):
                try:
                    # Make sure cluster mode is enabled on this node
                    try:
                        self._event_dispatcher.dispatch(
                            AfterAsyncClusterInstantiationEvent(
                                self.nodes_cache,
                                self.connection_kwargs.get("credential_provider", None),
                            )
                        )
                        cluster_slots = await startup_node.execute_command(
                            "CLUSTER SLOTS"
                        )
                    except ResponseError:
                        raise RedisClusterException(
                            "Cluster mode is not enabled on this node"
                        )
                    startup_nodes_reachable = True
                except Exception as e:
                    # Try the next startup node.
                    # The exception is saved and raised only if we have no more nodes.
                    exception = e
                    continue

                # CLUSTER SLOTS command results in the following output:
                # [[slot_section[from_slot,to_slot,master,replica1,...,replicaN]]]
                # where each node contains the following list: [IP, port, node_id]
                # Therefore, cluster_slots[0][2][0] will be the IP address of the
                # primary node of the first slot section.
                # If there's only one server in the cluster, its ``host`` is ''
                # Fix it to the host in startup_nodes
                if (
                    len(cluster_slots) == 1
                    and not cluster_slots[0][2][0]
                    and len(self.startup_nodes) == 1
                ):
                    cluster_slots[0][2][0] = startup_node.host

                for slot in cluster_slots:
                    for i in range(2, len(slot)):
                        slot[i] = [str_if_bytes(val) for val in slot[i]]
                    primary_node = slot[2]
                    host = primary_node[0]
                    if host == "":
                        host = startup_node.host
                    port = int(primary_node[1])
                    host, port = self.remap_host_port(host, port)

                    nodes_for_slot = []

                    target_node = tmp_nodes_cache.get(get_node_name(host, port))
                    if not target_node:
                        target_node = ClusterNode(
                            host, port, PRIMARY, **self.connection_kwargs
                        )
                    # add this node to the nodes cache
                    tmp_nodes_cache[target_node.name] = target_node
                    nodes_for_slot.append(target_node)

                    replica_nodes = slot[3:]
                    for replica_node in replica_nodes:
                        host = replica_node[0]
                        port = replica_node[1]
                        host, port = self.remap_host_port(host, port)

                        target_replica_node = tmp_nodes_cache.get(
                            get_node_name(host, port)
                        )
                        if not target_replica_node:
                            target_replica_node = ClusterNode(
                                host, port, REPLICA, **self.connection_kwargs
                            )
                        # add this node to the nodes cache
                        tmp_nodes_cache[target_replica_node.name] = target_replica_node
                        nodes_for_slot.append(target_replica_node)

                    for i in range(int(slot[0]), int(slot[1]) + 1):
                        if i not in tmp_slots:
                            tmp_slots[i] = nodes_for_slot
                        else:
                            # Validate that 2 nodes want to use the same slot cache
                            # setup
                            tmp_slot = tmp_slots[i][0]
                            if tmp_slot.name != target_node.name:
                                disagreements.append(
                                    f"{tmp_slot.name} vs {target_node.name} on slot: {i}"
                                )

                                if len(disagreements) > 5:
                                    raise RedisClusterException(
                                        f"startup_nodes could not agree on a valid "
                                        f"slots cache: {', '.join(disagreements)}"
                                    )

                # Validate if all slots are covered or if we should try next startup node
                fully_covered = True
                for i in range(REDIS_CLUSTER_HASH_SLOTS):
                    if i not in tmp_slots:
                        fully_covered = False
                        break
                if fully_covered:
                    break

            if not startup_nodes_reachable:
                raise RedisClusterException(
                    f"Redis Cluster cannot be connected. Please provide at least "
                    f"one reachable node: {str(exception)}"
                ) from exception

            # Check if the slots are not fully covered
            if not fully_covered and self.require_full_coverage:
                # Despite the requirement that the slots be covered, there
                # isn't a full coverage
                raise RedisClusterException(
                    f"All slots are not covered after query all startup_nodes. "
                    f"{len(tmp_slots)} of {REDIS_CLUSTER_HASH_SLOTS} "
                    f"covered..."
                )

            # Set the tmp variables to the real variables
            self.set_nodes(self.nodes_cache, tmp_nodes_cache, remove_old=True)
            # tmp_slots was built from CLUSTER SLOTS responses and can contain
            # newly-created ClusterNode objects for nodes we already know about.
            # Rebuild the slots cache with the preserved nodes_cache instances
            # so existing per-node connection pools stay in use after refresh.
            # Keep the shared node-list-per-slot-range shape from tmp_slots to
            # avoid allocating a separate list for every slot.
            node_lists_by_id: Dict[int, List["ClusterNode"]] = {}
            new_slots_cache: Dict[int, List["ClusterNode"]] = {}
            for slot, nodes in tmp_slots.items():
                node_list_id = id(nodes)
                slot_nodes = node_lists_by_id.get(node_list_id)
                if slot_nodes is None:
                    slot_nodes = [self.nodes_cache[node.name] for node in nodes]
                    node_lists_by_id[node_list_id] = slot_nodes
                new_slots_cache[slot] = slot_nodes
            self.slots_cache = new_slots_cache

            if self._dynamic_startup_nodes:
                # Populate the startup nodes with all discovered nodes
                self.set_nodes(self.startup_nodes, self.nodes_cache, remove_old=True)

            # Set the default node
            self.default_node = self.get_nodes_by_server_type(PRIMARY)[0]
            self._epoch += 1
        # Dispatch so listeners (e.g. ClusterPubSub) can reconcile per-node
        # state after slot ownership may have changed. A listener must not
        # break slots-cache refresh; log and continue so a single buggy
        # listener cannot starve the rest.
        try:
            await self._event_dispatcher.dispatch_async(
                AsyncAfterSlotsCacheRefreshEvent()
            )
        except Exception as e:
            logger.exception(
                "listener raised during slots-cache refresh: %s: %s",
                type(e).__name__,
                e,
            )

    async def aclose(self, attr: str = "nodes_cache") -> None:
        self.default_node = None
        await asyncio.gather(
            *(
                asyncio.create_task(node.disconnect())
                for node in getattr(self, attr).values()
            )
        )

    def remap_host_port(self, host: str, port: int) -> Tuple[str, int]:
        """
        Remap the host and port returned from the cluster to a different
        internal value.  Useful if the client is not connecting directly
        to the cluster.
        """
        if self.address_remap:
            return self.address_remap((host, port))
        return host, port


class ClusterPipeline(AbstractRedis, AbstractRedisCluster, AsyncRedisClusterCommands):
    """
    Create a new ClusterPipeline object.

    Usage::

        result = await (
            rc.pipeline()
            .set("A", 1)
            .get("A")
            .hset("K", "F", "V")
            .hgetall("K")
            .mset_nonatomic({"A": 2, "B": 3})
            .get("A")
            .get("B")
            .delete("A", "B", "K")
            .execute()
        )
        # result = [True, "1", 1, {"F": "V"}, True, True, "2", "3", 1, 1, 1]

    Note: For commands `DELETE`, `EXISTS`, `TOUCH`, `UNLINK`, `mset_nonatomic`, which
    are split across multiple nodes, you'll get multiple results for them in the array.

    Retryable errors:
        - :class:`~.ClusterDownError`
        - :class:`~.ConnectionError`
        - :class:`~.TimeoutError`

    Redirection errors:
        - :class:`~.TryAgainError`
        - :class:`~.MovedError`
        - :class:`~.AskError`

    :param client:
        | Existing :class:`~.RedisCluster` client
    """

    __slots__ = (
        "cluster_client",
        "_transaction",
        "_execution_strategy",
    )

    # Type discrimination marker for @overload self-type pattern
    _is_async_client: Literal[True] = True

    def __init__(
        self, client: RedisCluster, transaction: Optional[bool] = None
    ) -> None:
        self.cluster_client = client
        self._transaction = transaction
        self._execution_strategy: ExecutionStrategy = (
            PipelineStrategy(self)
            if not self._transaction
            else TransactionStrategy(self)
        )

    @property
    def nodes_manager(self) -> "NodesManager":
        """Get the nodes manager from the cluster client."""
        return self.cluster_client.nodes_manager

    def set_response_callback(self, command: str, callback: ResponseCallbackT) -> None:
        """Set a custom response callback on the cluster client."""
        self.cluster_client.set_response_callback(command, callback)

    async def initialize(self) -> "ClusterPipeline":
        await self._execution_strategy.initialize()
        return self

    async def __aenter__(self) -> "ClusterPipeline":
        return await self.initialize()

    async def __aexit__(self, exc_type: None, exc_value: None, traceback: None) -> None:
        await self.reset()

    def __await__(self) -> Generator[Any, None, "ClusterPipeline"]:
        return self.initialize().__await__()

    def __bool__(self) -> bool:
        "Pipeline instances should  always evaluate to True on Python 3+"
        return True

    def __len__(self) -> int:
        return len(self._execution_strategy)

    def execute_command(
        self, *args: Union[KeyT, EncodableT], **kwargs: Any
    ) -> "ClusterPipeline":
        """
        Append a raw command to the pipeline.

        :param args:
            | Raw command args
        :param kwargs:

            - target_nodes: :attr:`NODE_FLAGS` or :class:`~.ClusterNode`
              or List[:class:`~.ClusterNode`] or Dict[Any, :class:`~.ClusterNode`]
            - Rest of the kwargs are passed to the Redis connection
        """
        return self._execution_strategy.execute_command(*args, **kwargs)

    async def execute(
        self, raise_on_error: bool = True, allow_redirections: bool = True
    ) -> List[Any]:
        """
        Execute the pipeline.

        It will retry the commands as specified by retries specified in :attr:`retry`
        & then raise an exception.

        :param raise_on_error:
            | Raise the first error if there are any errors
        :param allow_redirections:
            | Whether to retry each failed command individually in case of redirection
              errors

        :raises RedisClusterException: if target_nodes is not provided & the command
            can't be mapped to a slot
        """
        try:
            return await self._execution_strategy.execute(
                raise_on_error, allow_redirections
            )
        finally:
            await self.reset()

    def _split_command_across_slots(
        self, command: str, *keys: KeyT
    ) -> "ClusterPipeline":
        for slot_keys in self.cluster_client._partition_keys_by_slot(keys).values():
            self.execute_command(command, *slot_keys)

        return self

    async def reset(self):
        """
        Reset back to empty pipeline.
        """
        await self._execution_strategy.reset()

    def multi(self):
        """
        Start a transactional block of the pipeline after WATCH commands
        are issued. End the transactional block with `execute`.
        """
        self._execution_strategy.multi()

    async def discard(self):
        """ """
        await self._execution_strategy.discard()

    async def watch(self, *names):
        """Watches the values at keys ``names``"""
        await self._execution_strategy.watch(*names)

    async def unwatch(self):
        """Unwatches all previously specified keys"""
        await self._execution_strategy.unwatch()

    async def unlink(self, *names):
        await self._execution_strategy.unlink(*names)

    def mset_nonatomic(
        self, mapping: Mapping[AnyKeyT, EncodableT]
    ) -> "ClusterPipeline":
        return self._execution_strategy.mset_nonatomic(mapping)


for command in PIPELINE_BLOCKED_COMMANDS:
    command = command.replace(" ", "_").lower()
    if command == "mset_nonatomic":
        continue

    setattr(ClusterPipeline, command, block_pipeline_command(command))


class PipelineCommand:
    def __init__(self, position: int, *args: Any, **kwargs: Any) -> None:
        self.args = args
        self.kwargs = kwargs
        self.position = position
        self.result: Union[Any, Exception] = None
        self.command_policies: Optional[CommandPolicies] = None

    def __repr__(self) -> str:
        return f"[{self.position}] {self.args} ({self.kwargs})"


class ExecutionStrategy(ABC):
    @abstractmethod
    async def initialize(self) -> "ClusterPipeline":
        """
        Initialize the execution strategy.

        See ClusterPipeline.initialize()
        """
        pass

    @abstractmethod
    def execute_command(
        self, *args: Union[KeyT, EncodableT], **kwargs: Any
    ) -> "ClusterPipeline":
        """
        Append a raw command to the pipeline.

        See ClusterPipeline.execute_command()
        """
        pass

    @abstractmethod
    async def execute(
        self, raise_on_error: bool = True, allow_redirections: bool = True
    ) -> List[Any]:
        """
        Execute the pipeline.

        It will retry the commands as specified by retries specified in :attr:`retry`
        & then raise an exception.

        See ClusterPipeline.execute()
        """
        pass

    @abstractmethod
    def mset_nonatomic(
        self, mapping: Mapping[AnyKeyT, EncodableT]
    ) -> "ClusterPipeline":
        """
        Executes multiple MSET commands according to the provided slot/pairs mapping.

        See ClusterPipeline.mset_nonatomic()
        """
        pass

    @abstractmethod
    async def reset(self):
        """
        Resets current execution strategy.

        See: ClusterPipeline.reset()
        """
        pass

    @abstractmethod
    def multi(self):
        """
        Starts transactional context.

        See: ClusterPipeline.multi()
        """
        pass

    @abstractmethod
    async def watch(self, *names):
        """
        Watch given keys.

        See: ClusterPipeline.watch()
        """
        pass

    @abstractmethod
    async def unwatch(self):
        """
        Unwatches all previously specified keys

        See: ClusterPipeline.unwatch()
        """
        pass

    @abstractmethod
    async def discard(self):
        pass

    @abstractmethod
    async def unlink(self, *names):
        """
        "Unlink a key specified by ``names``"

        See: ClusterPipeline.unlink()
        """
        pass

    @abstractmethod
    def __len__(self) -> int:
        pass


class AbstractStrategy(ExecutionStrategy):
    def __init__(self, pipe: ClusterPipeline) -> None:
        self._pipe: ClusterPipeline = pipe
        self._command_queue: List["PipelineCommand"] = []

    async def initialize(self) -> "ClusterPipeline":
        if self._pipe.cluster_client._initialize:
            await self._pipe.cluster_client.initialize()
        self._command_queue = []
        return self._pipe

    def execute_command(
        self, *args: Union[KeyT, EncodableT], **kwargs: Any
    ) -> "ClusterPipeline":
        self._command_queue.append(
            PipelineCommand(len(self._command_queue), *args, **kwargs)
        )
        return self._pipe

    def _annotate_exception(self, exception, number, command):
        """
        Provides extra context to the exception prior to it being handled
        """
        cmd = " ".join(map(safe_str, command))
        msg = (
            f"Command # {number} ({truncate_text(cmd)}) of pipeline "
            f"caused error: {exception.args[0]}"
        )
        exception.args = (msg,) + exception.args[1:]

    @abstractmethod
    def mset_nonatomic(
        self, mapping: Mapping[AnyKeyT, EncodableT]
    ) -> "ClusterPipeline":
        pass

    @abstractmethod
    async def execute(
        self, raise_on_error: bool = True, allow_redirections: bool = True
    ) -> List[Any]:
        pass

    @abstractmethod
    async def reset(self):
        pass

    @abstractmethod
    def multi(self):
        pass

    @abstractmethod
    async def watch(self, *names):
        pass

    @abstractmethod
    async def unwatch(self):
        pass

    @abstractmethod
    async def discard(self):
        pass

    @abstractmethod
    async def unlink(self, *names):
        pass

    def __len__(self) -> int:
        return len(self._command_queue)


class PipelineStrategy(AbstractStrategy):
    def __init__(self, pipe: ClusterPipeline) -> None:
        super().__init__(pipe)

    def mset_nonatomic(
        self, mapping: Mapping[AnyKeyT, EncodableT]
    ) -> "ClusterPipeline":
        encoder = self._pipe.cluster_client.encoder

        slots_pairs = {}
        for pair in mapping.items():
            slot = key_slot(encoder.encode(pair[0]))
            slots_pairs.setdefault(slot, []).extend(pair)

        for pairs in slots_pairs.values():
            self.execute_command("MSET", *pairs)

        return self._pipe

    async def execute(
        self, raise_on_error: bool = True, allow_redirections: bool = True
    ) -> List[Any]:
        if not self._command_queue:
            return []

        try:
            retry_attempts = self._pipe.cluster_client.retry.get_retries()
            while True:
                try:
                    if self._pipe.cluster_client._initialize:
                        await self._pipe.cluster_client.initialize()
                    return await self._execute(
                        self._pipe.cluster_client,
                        self._command_queue,
                        raise_on_error=raise_on_error,
                        allow_redirections=allow_redirections,
                    )

                except RedisCluster.ERRORS_ALLOW_RETRY as e:
                    if retry_attempts > 0:
                        # Try again with the new cluster setup. All other errors
                        # should be raised.
                        retry_attempts -= 1
                        await self._pipe.cluster_client.aclose()
                        await asyncio.sleep(0.25)
                    else:
                        # All other errors should be raised.
                        raise e
        finally:
            await self.reset()

    async def _execute(
        self,
        client: "RedisCluster",
        stack: List["PipelineCommand"],
        raise_on_error: bool = True,
        allow_redirections: bool = True,
    ) -> List[Any]:
        todo = [
            cmd for cmd in stack if not cmd.result or isinstance(cmd.result, Exception)
        ]

        nodes = {}
        for cmd in todo:
            passed_targets = cmd.kwargs.pop("target_nodes", None)
            command_policies = await client._policy_resolver.resolve(
                cmd.args[0].lower()
            )

            if passed_targets and not client._is_node_flag(passed_targets):
                target_nodes = client._parse_target_nodes(passed_targets)

                if not command_policies:
                    command_policies = CommandPolicies()
            else:
                if not command_policies:
                    command_flag = client.command_flags.get(cmd.args[0])
                    if not command_flag:
                        # Fallback to default policy
                        if not client.get_default_node():
                            slot = None
                        else:
                            slot = await client._determine_slot(*cmd.args)
                        if slot is None:
                            command_policies = CommandPolicies()
                        else:
                            command_policies = CommandPolicies(
                                request_policy=RequestPolicy.DEFAULT_KEYED,
                                response_policy=ResponsePolicy.DEFAULT_KEYED,
                            )
                    else:
                        if command_flag in client._command_flags_mapping:
                            command_policies = CommandPolicies(
                                request_policy=client._command_flags_mapping[
                                    command_flag
                                ]
                            )
                        else:
                            command_policies = CommandPolicies()

                target_nodes = await client._determine_nodes(
                    *cmd.args,
                    request_policy=command_policies.request_policy,
                    node_flag=passed_targets,
                )
                if not target_nodes:
                    raise RedisClusterException(
                        f"No targets were found to execute {cmd.args} command on"
                    )
            cmd.command_policies = command_policies
            if len(target_nodes) > 1:
                raise RedisClusterException(f"Too many targets for command {cmd.args}")
            node = target_nodes[0]
            if node.name not in nodes:
                nodes[node.name] = (node, [])
            nodes[node.name][1].append(cmd)

        # Start timing for observability
        start_time = time.monotonic()

        errors = await asyncio.gather(
            *(
                asyncio.create_task(node[0].execute_pipeline(node[1]))
                for node in nodes.values()
            )
        )

        # Record operation duration for each node
        for node_name, (node, commands) in nodes.items():
            # Find the first error in this node's commands, if any
            node_error = None
            for cmd in commands:
                if isinstance(cmd.result, Exception):
                    node_error = cmd.result
                    break

            db = node.connection_kwargs.get("db", 0)
            await record_operation_duration(
                command_name="PIPELINE",
                duration_seconds=time.monotonic() - start_time,
                server_address=node.host,
                server_port=node.port,
                db_namespace=str(db) if db is not None else None,
                error=node_error,
            )

        if any(errors):
            if allow_redirections:
                # send each errored command individually
                for cmd in todo:
                    if isinstance(cmd.result, (TryAgainError, MovedError, AskError)):
                        try:
                            cmd.result = client._policies_callback_mapping[
                                cmd.command_policies.response_policy
                            ](await client.execute_command(*cmd.args, **cmd.kwargs))
                        except Exception as e:
                            cmd.result = e

            if raise_on_error:
                for cmd in todo:
                    result = cmd.result
                    if isinstance(result, Exception):
                        command = " ".join(map(safe_str, cmd.args))
                        msg = (
                            f"Command # {cmd.position + 1} "
                            f"({truncate_text(command)}) "
                            f"of pipeline caused error: {result.args}"
                        )
                        result.args = (msg,) + result.args[1:]
                        raise result

            default_cluster_node = client.get_default_node()

            # Check whether the default node was used. In some cases,
            # 'client.get_default_node()' may return None. The check below
            # prevents a potential AttributeError.
            if default_cluster_node is not None:
                default_node = nodes.get(default_cluster_node.name)
                if default_node is not None:
                    # This pipeline execution used the default node, check if we need
                    # to replace it.
                    # Note: when the error is raised we'll reset the default node in the
                    # caller function.
                    for cmd in default_node[1]:
                        # Check if it has a command that failed with a relevant
                        # exception
                        if type(cmd.result) in RedisCluster.ERRORS_ALLOW_RETRY:
                            client.replace_default_node()
                            break

        return [cmd.result for cmd in stack]

    async def reset(self):
        """
        Reset back to empty pipeline.
        """
        self._command_queue = []

    def multi(self):
        raise RedisClusterException(
            "method multi() is not supported outside of transactional context"
        )

    async def watch(self, *names):
        raise RedisClusterException(
            "method watch() is not supported outside of transactional context"
        )

    async def unwatch(self):
        raise RedisClusterException(
            "method unwatch() is not supported outside of transactional context"
        )

    async def discard(self):
        raise RedisClusterException(
            "method discard() is not supported outside of transactional context"
        )

    async def unlink(self, *names):
        if len(names) != 1:
            raise RedisClusterException(
                "unlinking multiple keys is not implemented in pipeline command"
            )

        return self.execute_command("UNLINK", names[0])


class TransactionStrategy(AbstractStrategy):
    NO_SLOTS_COMMANDS = {"UNWATCH"}
    IMMEDIATE_EXECUTE_COMMANDS = {"WATCH", "UNWATCH"}
    UNWATCH_COMMANDS = {"DISCARD", "EXEC", "UNWATCH"}
    SLOT_REDIRECT_ERRORS = (AskError, MovedError)
    CONNECTION_ERRORS = (
        ConnectionError,
        OSError,
        ClusterDownError,
        SlotNotCoveredError,
    )

    def __init__(self, pipe: ClusterPipeline) -> None:
        super().__init__(pipe)
        self._explicit_transaction = False
        self._watching = False
        self._pipeline_slots: Set[int] = set()
        self._transaction_node: Optional[ClusterNode] = None
        self._transaction_connection: Optional[Connection] = None
        self._executing = False
        self._retry = copy(self._pipe.cluster_client.retry)
        self._retry.update_supported_errors(
            RedisCluster.ERRORS_ALLOW_RETRY + self.SLOT_REDIRECT_ERRORS
        )

    def _get_client_and_connection_for_transaction(
        self,
    ) -> Tuple[ClusterNode, Connection]:
        """
        Find a connection for a pipeline transaction.

        For running an atomic transaction, watch keys ensure that contents have not been
        altered as long as the watch commands for those keys were sent over the same
        connection. So once we start watching a key, we fetch a connection to the
        node that owns that slot and reuse it.
        """
        if not self._pipeline_slots:
            raise RedisClusterException(
                "At least a command with a key is needed to identify a node"
            )

        node: ClusterNode = self._pipe.cluster_client.nodes_manager.get_node_from_slot(
            list(self._pipeline_slots)[0], False
        )
        self._transaction_node = node

        if not self._transaction_connection:
            connection: Connection = self._transaction_node.acquire_connection()
            self._transaction_connection = connection

        return self._transaction_node, self._transaction_connection

    def execute_command(self, *args: Union[KeyT, EncodableT], **kwargs: Any) -> "Any":
        # Given the limitation of ClusterPipeline sync API, we have to run it in thread.
        response = None
        error = None

        def runner():
            nonlocal response
            nonlocal error
            try:
                response = asyncio.run(self._execute_command(*args, **kwargs))
            except Exception as e:
                error = e

        thread = threading.Thread(target=runner)
        thread.start()
        thread.join()

        if error:
            raise error

        return response

    async def _execute_command(
        self, *args: Union[KeyT, EncodableT], **kwargs: Any
    ) -> Any:
        if self._pipe.cluster_client._initialize:
            await self._pipe.cluster_client.initialize()

        slot_number: Optional[int] = None
        if args[0] not in self.NO_SLOTS_COMMANDS:
            slot_number = await self._pipe.cluster_client._determine_slot(*args)

        if (
            self._watching or args[0] in self.IMMEDIATE_EXECUTE_COMMANDS
        ) and not self._explicit_transaction:
            if args[0] == "WATCH":
                self._validate_watch()

            if slot_number is not None:
                if self._pipeline_slots and slot_number not in self._pipeline_slots:
                    raise CrossSlotTransactionError(
                        "Cannot watch or send commands on different slots"
                    )

                self._pipeline_slots.add(slot_number)
            elif args[0] not in self.NO_SLOTS_COMMANDS:
                raise RedisClusterException(
                    f"Cannot identify slot number for command: {args[0]},"
                    "it cannot be triggered in a transaction"
                )

            return self._immediate_execute_command(*args, **kwargs)
        else:
            if slot_number is not None:
                self._pipeline_slots.add(slot_number)

            return super().execute_command(*args, **kwargs)

    def _validate_watch(self):
        if self._explicit_transaction:
            raise RedisError("Cannot issue a WATCH after a MULTI")

        self._watching = True

    async def _immediate_execute_command(self, *args, **options):
        return await self._retry.call_with_retry(
            lambda: self._get_connection_and_send_command(*args, **options),
            self._reinitialize_on_error,
            with_failure_count=True,
        )

    async def _get_connection_and_send_command(self, *args, **options):
        redis_node, connection = self._get_client_and_connection_for_transaction()
        # Only disconnect if not watching - disconnecting would lose WATCH state
        if not self._watching:
            await redis_node.disconnect_if_needed(connection)

        # Start timing for observability
        start_time = time.monotonic()

        try:
            response = await self._send_command_parse_response(
                connection, redis_node, args[0], *args, **options
            )

            await record_operation_duration(
                command_name=args[0],
                duration_seconds=time.monotonic() - start_time,
                server_address=connection.host,
                server_port=connection.port,
                db_namespace=str(connection.db),
            )

            return response
        except Exception as e:
            e.connection = connection
            await record_operation_duration(
                command_name=args[0],
                duration_seconds=time.monotonic() - start_time,
                server_address=connection.host,
                server_port=connection.port,
                db_namespace=str(connection.db),
                error=e,
            )
            raise

    async def _send_command_parse_response(
        self,
        connection: Connection,
        redis_node: ClusterNode,
        command_name,
        *args,
        **options,
    ):
        """
        Send a command and parse the response
        """

        await connection.send_command(*args)
        output = await redis_node.parse_response(connection, command_name, **options)

        if command_name in self.UNWATCH_COMMANDS:
            self._watching = False
        return output

    async def _reinitialize_on_error(self, error, failure_count):
        if hasattr(error, "connection"):
            await record_error_count(
                server_address=error.connection.host,
                server_port=error.connection.port,
                network_peer_address=error.connection.host,
                network_peer_port=error.connection.port,
                error_type=error,
                retry_attempts=failure_count,
                is_internal=True,
            )

        if self._watching:
            if type(error) in self.SLOT_REDIRECT_ERRORS and self._executing:
                raise WatchError("Slot rebalancing occurred while watching keys")

        if (
            type(error) in self.SLOT_REDIRECT_ERRORS
            or type(error) in self.CONNECTION_ERRORS
        ):
            if self._transaction_connection and self._transaction_node:
                # Disconnect and release back to pool
                await self._transaction_connection.disconnect()
                self._transaction_node.release(self._transaction_connection)
                self._transaction_connection = None

            self._pipe.cluster_client.reinitialize_counter += 1
            if (
                self._pipe.cluster_client.reinitialize_steps
                and self._pipe.cluster_client.reinitialize_counter
                % self._pipe.cluster_client.reinitialize_steps
                == 0
            ):
                await self._pipe.cluster_client.nodes_manager.initialize()
                self.reinitialize_counter = 0
            else:
                if isinstance(error, AskError):
                    await self._pipe.cluster_client.nodes_manager.move_slot(error)

        self._executing = False

    async def _raise_first_error(self, responses, stack, start_time):
        """
        Raise the first exception on the stack
        """
        for r, cmd in zip(responses, stack):
            if isinstance(r, Exception):
                self._annotate_exception(r, cmd.position + 1, cmd.args)

                await record_operation_duration(
                    command_name="TRANSACTION",
                    duration_seconds=time.monotonic() - start_time,
                    server_address=self._transaction_connection.host,
                    server_port=self._transaction_connection.port,
                    db_namespace=str(self._transaction_connection.db),
                    error=r,
                )

                raise r

    def mset_nonatomic(
        self, mapping: Mapping[AnyKeyT, EncodableT]
    ) -> "ClusterPipeline":
        raise NotImplementedError("Method is not supported in transactional context.")

    async def execute(
        self, raise_on_error: bool = True, allow_redirections: bool = True
    ) -> List[Any]:
        stack = self._command_queue
        if not stack and (not self._watching or not self._pipeline_slots):
            return []

        return await self._execute_transaction_with_retries(stack, raise_on_error)

    async def _execute_transaction_with_retries(
        self, stack: List["PipelineCommand"], raise_on_error: bool
    ):
        return await self._retry.call_with_retry(
            lambda: self._execute_transaction(stack, raise_on_error),
            lambda error, failure_count: self._reinitialize_on_error(
                error, failure_count
            ),
            with_failure_count=True,
        )

    async def _execute_transaction(
        self, stack: List["PipelineCommand"], raise_on_error: bool
    ):
        if len(self._pipeline_slots) > 1:
            raise CrossSlotTransactionError(
                "All keys involved in a cluster transaction must map to the same slot"
            )

        self._executing = True

        redis_node, connection = self._get_client_and_connection_for_transaction()
        # Only disconnect if not watching - disconnecting would lose WATCH state
        if not self._watching:
            await redis_node.disconnect_if_needed(connection)

        stack = chain(
            [PipelineCommand(0, "MULTI")],
            stack,
            [PipelineCommand(0, "EXEC")],
        )
        commands = [c.args for c in stack if EMPTY_RESPONSE not in c.kwargs]
        packed_commands = connection.pack_commands(commands)

        # Start timing for observability
        start_time = time.monotonic()

        await connection.send_packed_command(packed_commands)
        errors = []

        # parse off the response for MULTI
        # NOTE: we need to handle ResponseErrors here and continue
        # so that we read all the additional command messages from
        # the socket
        try:
            await redis_node.parse_response(connection, "MULTI")
        except ResponseError as e:
            self._annotate_exception(e, 0, "MULTI")
            errors.append(e)
        except self.CONNECTION_ERRORS as cluster_error:
            self._annotate_exception(cluster_error, 0, "MULTI")
            cluster_error.connection = connection
            raise

        # and all the other commands
        for i, command in enumerate(self._command_queue):
            if EMPTY_RESPONSE in command.kwargs:
                errors.append((i, command.kwargs[EMPTY_RESPONSE]))
            else:
                try:
                    _ = await redis_node.parse_response(connection, "_")
                except self.SLOT_REDIRECT_ERRORS as slot_error:
                    self._annotate_exception(slot_error, i + 1, command.args)
                    errors.append(slot_error)
                except self.CONNECTION_ERRORS as cluster_error:
                    self._annotate_exception(cluster_error, i + 1, command.args)
                    cluster_error.connection = connection
                    raise
                except ResponseError as e:
                    self._annotate_exception(e, i + 1, command.args)
                    errors.append(e)

        response = None
        # parse the EXEC.
        try:
            response = await redis_node.parse_response(connection, "EXEC")
        except ExecAbortError:
            if errors:
                raise errors[0]
            raise

        self._executing = False

        # EXEC clears any watched keys
        self._watching = False

        if response is None:
            raise WatchError("Watched variable changed.")

        # put any parse errors into the response
        for i, e in errors:
            response.insert(i, e)

        if len(response) != len(self._command_queue):
            raise InvalidPipelineStack(
                "Unexpected response length for cluster pipeline EXEC."
                " Command stack was {} but response had length {}".format(
                    [c.args[0] for c in self._command_queue], len(response)
                )
            )

        # find any errors in the response and raise if necessary
        if raise_on_error or len(errors) > 0:
            await self._raise_first_error(
                response,
                self._command_queue,
                start_time,
            )

        # We have to run response callbacks manually
        data = []
        for r, cmd in zip(response, self._command_queue):
            if not isinstance(r, Exception):
                command_name = cmd.args[0]
                if command_name in self._pipe.cluster_client.response_callbacks:
                    r = self._pipe.cluster_client.response_callbacks[command_name](
                        r, **cmd.kwargs
                    )
            data.append(r)

        await record_operation_duration(
            command_name="TRANSACTION",
            duration_seconds=time.monotonic() - start_time,
            server_address=connection.host,
            server_port=connection.port,
            db_namespace=str(connection.db),
        )

        return data

    async def reset(self):
        self._command_queue = []

        try:
            # make sure to reset the connection state in the event that we
            # were watching something
            if self._transaction_connection:
                try:
                    if self._watching:
                        # call this manually since our unwatch or
                        # immediate_execute_command methods can call reset()
                        await self._transaction_connection.send_command("UNWATCH")
                        await self._transaction_connection.read_response()
                except self.CONNECTION_ERRORS:
                    # disconnect will also remove any previous WATCHes
                    if self._transaction_connection:
                        await self._transaction_connection.disconnect()
                except asyncio.CancelledError:
                    # Disconnect so any unread UNWATCH reply does not get
                    # served to the next caller that takes the connection.
                    if self._transaction_connection:
                        await self._transaction_connection.disconnect()
                    raise
                else:
                    # On the happy path, honor lazy reconnect before release.
                    await self._transaction_node.disconnect_if_needed(
                        self._transaction_connection
                    )
        finally:
            # Always return the connection to the node's free queue, even on
            # cancellation, so cancelled resets do not leak pooled
            # connections. Detach the reference before releasing so the
            # strategy never holds a pointer to a returned connection.
            # ClusterNode.release is synchronous, so no shield is required.
            if self._transaction_connection and self._transaction_node:
                connection, self._transaction_connection = (
                    self._transaction_connection,
                    None,
                )
                self._transaction_node.release(connection)
            # clean up the other instance attributes
            self._transaction_connection = None
            self._transaction_node = None
            self._watching = False
            self._explicit_transaction = False
            self._pipeline_slots = set()
            self._executing = False

    def multi(self):
        if self._explicit_transaction:
            raise RedisError("Cannot issue nested calls to MULTI")
        if self._command_queue:
            raise RedisError(
                "Commands without an initial WATCH have already been issued"
            )
        self._explicit_transaction = True

    async def watch(self, *names):
        if self._explicit_transaction:
            raise RedisError("Cannot issue a WATCH after a MULTI")

        return await self.execute_command("WATCH", *names)

    async def unwatch(self):
        if self._watching:
            return await self.execute_command("UNWATCH")

        return True

    async def discard(self):
        await self.reset()

    async def unlink(self, *names):
        return self.execute_command("UNLINK", *names)


class _ClusterNodePoolAdapter(ConnectionPoolInterface):
    """Thin adapter exposing the :class:`ConnectionPoolInterface` that
    :class:`PubSub` requires, backed by a :class:`ClusterNode`'s own
    connection pool.

    Connections are acquired from the node via
    :meth:`ClusterNode.acquire_connection` and returned via
    :meth:`ClusterNode.release`.  :meth:`PubSub.aclose` already
    disconnects the connection *before* calling :meth:`release`, so the
    connection is returned to the node's free-queue in a disconnected
    state — guaranteeing that a subscribed socket is never silently
    reused for regular commands.

    Methods that do not apply to this adapter (the underlying node's
    lifecycle is managed by the cluster, not by individual PubSub
    instances) are implemented as no-ops so the adapter remains a valid
    :class:`ConnectionPoolInterface`.
    """

    def __init__(self, node: "ClusterNode") -> None:
        self._node = node
        self.connection_kwargs = node.connection_kwargs

    # -- methods used by PubSub ------------------------------------------------

    def get_encoder(self) -> Encoder:
        return self._node.get_encoder()

    async def get_connection(
        self, command_name: Optional[str] = None, *keys: Any, **options: Any
    ) -> AbstractConnection:
        connection = self._node.acquire_connection()
        try:
            await connection.connect()
        except BaseException:
            # connect() may fail mid-handshake (e.g. after the TCP socket
            # is established but before AUTH/HELLO completes) leaving the
            # connection in a partially-connected state.  Disconnect before
            # returning it to the node's free queue so it is not reused.
            await connection.disconnect()
            self._node.release(connection)
            raise
        return connection

    async def release(self, connection: AbstractConnection) -> None:
        # PubSub.aclose() disconnects the connection before calling
        # release(), so it is safe to put it back in the node's free
        # queue – it will reconnect lazily on next use.
        await self._node.disconnect_if_needed(connection)
        self._node.release(connection)

    # -- no-op stubs for the rest of ConnectionPoolInterface -------------------
    # The node's connections are shared with regular cluster traffic and its
    # lifecycle is managed by RedisCluster / NodesManager, so the adapter must
    # not reset, disconnect, retry-configure or re-auth them on behalf of a
    # single PubSub instance.

    def get_protocol(self):
        return self.connection_kwargs.get("protocol", None)

    def reset(self) -> None:
        pass

    async def disconnect(self, inuse_connections: bool = True) -> None:
        pass

    async def aclose(self) -> None:
        pass

    def set_retry(self, retry: "Retry") -> None:
        pass

    async def re_auth_callback(self, token: TokenInterface) -> None:
        pass

    def get_connection_count(self) -> List[Tuple[int, dict]]:
        return []


def _unregister_slots_cache_listener(
    dispatcher_ref: "weakref.ref[EventDispatcher]",
    listener: AsyncEventListenerInterface,
    event_type: Type[object],
) -> None:
    # Module-level finalizer callback. Kept free of strong references to the
    # owning ClusterPubSub so attaching it via weakref.finalize does not
    # extend the pubsub's lifetime.
    dispatcher = dispatcher_ref()
    if dispatcher is not None:
        dispatcher.unregister_listeners({event_type: [listener]})


class ClusterPubSubSlotsCacheListener(AsyncEventListenerInterface):
    """
    Async listener that forwards AsyncAfterSlotsCacheRefreshEvent to a
    ClusterPubSub.

    Holds a weak reference to the pubsub so it does not keep the instance
    alive. Deterministic cleanup of the dispatcher's strong reference to this
    listener is performed by a ``weakref.finalize`` attached to the owning
    ClusterPubSub in ``ClusterPubSub.__init__``.
    """

    def __init__(self, pubsub: "ClusterPubSub") -> None:
        self._pubsub_ref: "weakref.ref[ClusterPubSub]" = weakref.ref(pubsub)

    async def listen(self, event: object) -> None:
        pubsub = self._pubsub_ref()
        if pubsub is None:
            # Race window between pubsub GC and the finalizer running; safe
            # no-op, finalizer will remove this listener shortly.
            return
        try:
            await pubsub.on_slots_changed()
        except Exception as e:
            # Listeners must not break slots-cache refresh; log and continue so
            # a single buggy pubsub cannot starve the rest.
            logger.exception(
                "pubsub %r raised during slots-cache change: %s: %s",
                pubsub,
                type(e).__name__,
                e,
            )


class ClusterPubSub(PubSub):
    """
    Async cluster implementation for pub/sub.

    IMPORTANT: before using ClusterPubSub, read about the known limitations
    with pubsub in Cluster mode and learn how to workaround them:
    https://redis.readthedocs.io/en/stable/clustering.html#known-pubsub-limitations
    """

    def __init__(
        self,
        redis_cluster: "RedisCluster",
        node: Optional["ClusterNode"] = None,
        host: Optional[str] = None,
        port: Optional[int] = None,
        push_handler_func: Optional[Callable] = None,
        event_dispatcher: Optional[EventDispatcher] = None,
        **kwargs: Any,
    ) -> None:
        """
        When a pubsub instance is created without specifying a node, a single
        node will be transparently chosen for the pubsub connection on the
        first command execution. The node will be determined by:
         1. Hashing the channel name in the request to find its keyslot
         2. Selecting a node that handles the keyslot: If read_from_replicas is
            set to true or load_balancing_strategy is set, a replica can be selected.

        :param redis_cluster: RedisCluster instance
        :param node: ClusterNode to connect to
        :param host: Host of the node to connect to
        :param port: Port of the node to connect to
        :param push_handler_func: Optional push handler function
        :param event_dispatcher: Optional event dispatcher
        :param kwargs: Additional keyword arguments
        """
        self.node = None
        self.set_pubsub_node(redis_cluster, node, host, port)

        # Borrow the node's own connection pool via an adapter rather than
        # creating a second, detached ConnectionPool for pubsub.
        if self.node is not None:
            connection_pool = _ClusterNodePoolAdapter(self.node)
        else:
            connection_pool = None

        self.cluster = redis_cluster
        self.node_pubsub_mapping: Dict[str, PubSub] = {}
        # Reverse index: shard channel (normalized) -> owning node.name. Used to
        # route sunsubscribe calls and reconcile subscriptions after slot
        # migration / failover.
        self._shard_channel_to_node: Dict[Any, str] = {}
        # Dedicated lock for shard-subscription bookkeeping. Distinct from
        # PubSub.self._lock (which serializes wire I/O on the cluster-level
        # connection used by aclose / send_command / regular subscribe) so
        # that reconciliation cannot starve those unrelated coroutines
        # during long per-channel migrations.
        self._shard_state_lock: asyncio.Lock = asyncio.Lock()
        # Background tasks created by on_slots_changed; kept to prevent GC.
        self._reconcile_tasks: Set[asyncio.Task] = set()
        self._pubsubs_generator = self._pubsubs_generator()
        if event_dispatcher is None:
            self._event_dispatcher = EventDispatcher()
        else:
            self._event_dispatcher = event_dispatcher
        super().__init__(
            connection_pool=connection_pool,
            encoder=redis_cluster.encoder,
            push_handler_func=push_handler_func,
            event_dispatcher=self._event_dispatcher,
            **kwargs,
        )
        # Subscribe to slots-cache change notifications so shard subscriptions
        # can be reconciled automatically after topology refreshes.
        nm_dispatcher = redis_cluster.nodes_manager._event_dispatcher
        self._slots_cache_listener = ClusterPubSubSlotsCacheListener(self)
        nm_dispatcher.register_listeners(
            {AsyncAfterSlotsCacheRefreshEvent: [self._slots_cache_listener]}
        )
        # Deterministic GC-time cleanup so short-lived pubsubs do not leak
        # listeners in the dispatcher when no slots-refresh event ever fires.
        weakref.finalize(
            self,
            _unregister_slots_cache_listener,
            weakref.ref(nm_dispatcher),
            self._slots_cache_listener,
            AsyncAfterSlotsCacheRefreshEvent,
        )

    def set_pubsub_node(
        self,
        cluster: "RedisCluster",
        node: Optional["ClusterNode"] = None,
        host: Optional[str] = None,
        port: Optional[int] = None,
    ) -> None:
        """
        The pubsub node will be set according to the passed node, host and port
        When none of the node, host, or port are specified - the node is set
        to None and will be determined by the keyslot of the channel in the
        first command to be executed.
        RedisClusterException will be thrown if the passed node does not exist
        in the cluster.
        If host is passed without port, or vice versa, a DataError will be
        thrown.
        """
        if node is not None:
            # node is passed by the user
            self._raise_on_invalid_node(cluster, node, node.host, node.port)
            pubsub_node = node
        elif host is not None and port is not None:
            # host and port passed by the user
            node = cluster.get_node(host=host, port=port)
            self._raise_on_invalid_node(cluster, node, host, port)
            pubsub_node = node
        elif host is not None or port is not None:
            # only one of host and port is specified
            raise DataError("Specify both host and port")
        else:
            # nothing specified by the user
            pubsub_node = None
        self.node = pubsub_node

    def get_pubsub_node(self) -> Optional["ClusterNode"]:
        """
        Get the node that is being used as the pubsub connection.

        :return: The ClusterNode being used for pubsub, or None if not yet determined
        """
        return self.node

    async def _resubscribe_shard_channels(self) -> None:
        # A single node can own multiple slot ranges, so a batched
        # ``SSUBSCRIBE`` covering every tracked channel would be rejected by
        # Redis with a ``CROSSSLOT`` error. Group by hash slot and emit one
        # ``SSUBSCRIBE`` per slot.
        by_slot: defaultdict[int, dict] = defaultdict(dict)
        for k, v in self.shard_channels.items():
            by_slot[key_slot(self.encoder.encode(k))][k] = v
        for subscriptions in by_slot.values():
            await self._resubscribe(subscriptions, self.ssubscribe)

    def _get_node_pubsub(self, node: "ClusterNode") -> PubSub:
        """Get or create a PubSub instance for the given node."""
        try:
            return self.node_pubsub_mapping[node.name]
        except KeyError:
            pubsub = PubSub(
                connection_pool=_ClusterNodePoolAdapter(node),
                encoder=self.cluster.encoder,
                push_handler_func=self.push_handler_func,
                event_dispatcher=self._event_dispatcher,
            )
            # Replay shard subscriptions on reconnect with slot-aware grouping
            # so that channels spanning multiple slots owned by this node do
            # not trigger a CROSSSLOT error.
            pubsub._resubscribe_shard_channels = MethodType(
                ClusterPubSub._resubscribe_shard_channels, pubsub
            )
            self.node_pubsub_mapping[node.name] = pubsub
            return pubsub

    def _find_node_name_for_pubsub(self, pubsub: PubSub) -> Optional[str]:
        for name, candidate in self.node_pubsub_mapping.items():
            if candidate is pubsub:
                return name
        return None

    async def _sharded_message_generator(
        self, timeout: float = 0.0
    ) -> Tuple[Optional[PubSub], Optional[Dict[str, Any]]]:
        """Generate messages from shard channels across all nodes."""
        for _ in range(len(self.node_pubsub_mapping)):
            pubsub = next(self._pubsubs_generator)
            # Don't pass ignore_subscribe_messages here - let get_sharded_message
            # handle the filtering after processing subscription state changes
            message = await pubsub.get_message(
                ignore_subscribe_messages=False, timeout=timeout
            )
            if message is not None:
                return pubsub, message
        return None, None

    def _pubsubs_generator(self) -> Generator[PubSub, None, None]:
        """Generator that yields PubSub instances in round-robin fashion."""
        while True:
            current_nodes = list(self.node_pubsub_mapping.values())
            if not current_nodes:
                return  # Avoid infinite loop when no subscriptions exist
            yield from current_nodes

    async def get_sharded_message(
        self,
        ignore_subscribe_messages: bool = False,
        timeout: float = 0.0,
        target_node: Optional["ClusterNode"] = None,
    ) -> Optional[Dict[str, Any]]:
        """
        Get a message from shard channels.

        :param ignore_subscribe_messages: Whether to ignore subscribe messages
        :param timeout: Timeout for message retrieval
        :param target_node: Specific node to get message from
        :return: Message dictionary or None
        """
        pubsub: Optional[PubSub]
        if target_node:
            pubsub = self.node_pubsub_mapping.get(target_node.name)
            if pubsub:
                # Don't pass ignore_subscribe_messages here - let get_sharded_message
                # handle the filtering after processing subscription state changes
                message = await pubsub.get_message(
                    ignore_subscribe_messages=False, timeout=timeout
                )
            else:
                message = None
        else:
            pubsub, message = await self._sharded_message_generator(timeout=timeout)

        if message is None:
            return None
        # Only sunsubscribe mutates cluster-level shard state; bypassing the
        # lock on the data-message hot path keeps smessage delivery from
        # competing with the reconciliation task for _shard_state_lock.
        if str_if_bytes(message["type"]) == "sunsubscribe":
            # Serialize state mutation against reinitialize_shard_subscriptions
            # (background task). The blocking get_message above intentionally
            # runs outside the lock so reconciliation is not stalled by long
            # polls.
            async with self._shard_state_lock:
                if message["channel"] in self.pending_unsubscribe_shard_channels:
                    # User-initiated sunsubscribe: drop from cluster-level tracking.
                    self.pending_unsubscribe_shard_channels.remove(message["channel"])
                    self.shard_channels.pop(message["channel"], None)
                    self._shard_channel_to_node.pop(message["channel"], None)
                # Drop the per-node pubsub that delivered the confirmation once
                # it no longer holds any shard subscriptions, regardless of
                # whether the sunsubscribe was user-initiated or driven by
                # slot-migration reconciliation (_migrate_shard_channel, which
                # intentionally does not add the channel to
                # pending_unsubscribe_shard_channels). This releases the
                # dedicated connection that would otherwise linger.
                # Identifying the receiving pubsub directly (rather than via
                # the cluster's current slot map) is required after slot
                # migration, where the channel's owner is no longer the node
                # that received our original SSUBSCRIBE.
                if pubsub is not None and not pubsub.subscribed:
                    name = self._find_node_name_for_pubsub(pubsub)
                    if name is not None:
                        try:
                            await pubsub.aclose()
                        except Exception:
                            pass
                        self.node_pubsub_mapping.pop(name, None)

        # Only suppress subscribe/unsubscribe messages, not data messages (smessage)
        if str_if_bytes(message["type"]) in ("ssubscribe", "sunsubscribe"):
            if self.ignore_subscribe_messages or ignore_subscribe_messages:
                return None
        return message

    async def ssubscribe(
        self, *args: ChannelT | Subscription, **kwargs: PubSubHandler
    ) -> None:
        """
        Subscribe to shard channels.

        :param args: Channel names or ``Subscription`` objects
        :param kwargs: Channel names with handlers
        """
        s_channels = parse_pubsub_subscriptions(args, kwargs)

        # Serialize against reinitialize_shard_subscriptions (background
        # task) so the reverse index, shard_channels, and node_pubsub_mapping
        # are not mutated concurrently. _migrate_shard_channel below does not
        # re-acquire this lock (asyncio.Lock is non-reentrant).
        async with self._shard_state_lock:
            for s_channel, handler in s_channels.items():
                node = self.cluster.get_node_from_key(s_channel)
                if not node:
                    continue
                # Lazy re-route: if this channel is already tracked against a
                # different node (e.g. after a slot migration), migrate it now
                # so the caller's intent is applied on the current owner.
                normalized_key = next(iter(self._normalize_keys({s_channel: None})))
                old_name = self._shard_channel_to_node.get(normalized_key)
                if old_name and old_name != node.name:
                    # Match PubSub.ssubscribe() dict.update() semantics: the
                    # caller's newly supplied handler (including None) always
                    # overrides any previously registered handler.
                    await self._migrate_shard_channel(
                        normalized_key,
                        handler,
                        old_name,
                        node,
                    )
                    continue
                pubsub = self._get_node_pubsub(node)
                if handler:
                    await pubsub.ssubscribe(Subscription(s_channel, handler))
                else:
                    await pubsub.ssubscribe(s_channel)
                self.shard_channels.update(pubsub.shard_channels)
                self._shard_channel_to_node[normalized_key] = node.name
                self.pending_unsubscribe_shard_channels.difference_update(
                    self._normalize_keys({s_channel: None})
                )

    async def sunsubscribe(self, *args: Any) -> None:
        """
        Unsubscribe from shard channels.

        :param args: Channel names to unsubscribe from. If empty, unsubscribe from all.
        """
        if args:
            args = list_or_args(args[0], args[1:])
        else:
            args = list(self.shard_channels.keys())

        # Serialize against reinitialize_shard_subscriptions: the reverse
        # index and node_pubsub_mapping must not change between the lookup
        # and the per-node sunsubscribe call below.
        async with self._shard_state_lock:
            for s_channel in args:
                normalized_key = next(iter(self._normalize_keys({s_channel: None})))
                # Route via the reverse index so we unsubscribe on the node
                # that actually holds the subscription. After a slot migration
                # the cluster's current owner may no longer be that node.
                name = self._shard_channel_to_node.get(normalized_key)
                if name and name in self.node_pubsub_mapping:
                    pubsub = self.node_pubsub_mapping[name]
                else:
                    node = self.cluster.get_node_from_key(s_channel)
                    if not node or node.name not in self.node_pubsub_mapping:
                        continue
                    pubsub = self.node_pubsub_mapping[node.name]
                await pubsub.sunsubscribe(s_channel)
                self.pending_unsubscribe_shard_channels.update(
                    pubsub.pending_unsubscribe_shard_channels
                )

    async def reinitialize_shard_subscriptions(self) -> None:
        """
        Reconcile per-node shard subscriptions against the cluster's current
        slot ownership map. For each tracked shard channel whose owning node
        has changed (e.g. after CLUSTER SETSLOT / failover), sunsubscribe on
        the old node's pubsub and ssubscribe on the new owner's pubsub,
        preserving any registered handler.
        """
        uncovered: list = []
        made_progress = False
        first_migrate_error: Optional[BaseException] = None
        async with self._shard_state_lock:
            for channel, handler in list(self.shard_channels.items()):
                try:
                    new_node = self.cluster.get_node_from_key(channel)
                except SlotNotCoveredError:
                    # Slot is transiently uncovered (mid-migration / partial
                    # topology refresh). Defer this channel so coverable
                    # siblings still reconcile this pass; we surface the
                    # error below so the caller (and logs) know not every
                    # channel was reconciled. Retry happens on the next
                    # slots-cache change notification.
                    uncovered.append(channel)
                    continue
                old_name = self._shard_channel_to_node.get(channel)
                if old_name == new_node.name:
                    continue
                try:
                    await self._migrate_shard_channel(
                        channel, handler, old_name, new_node
                    )
                    made_progress = True
                except (ConnectionError, TimeoutError, OSError) as e:
                    # Transient connectivity error while subscribing on the
                    # new owner (or unsubscribing on the old owner if its
                    # handler chose to re-raise). Do not abort reconciliation
                    # for sibling channels: _shard_channel_to_node was not
                    # advanced for this channel, so the next slots-cache
                    # change notification will retry it.
                    logger.warning(
                        "shard channel %r migration deferred: %s: %s",
                        channel,
                        type(e).__name__,
                        e,
                    )
                    if first_migrate_error is None:
                        first_migrate_error = e
                    continue
            # Garbage-collect per-node pubsubs that no longer hold any
            # subscription so their connections are released.
            for name, pubsub in list(self.node_pubsub_mapping.items()):
                if not pubsub.subscribed:
                    try:
                        await pubsub.aclose()
                    except Exception:
                        pass
                    self.node_pubsub_mapping.pop(name, None)
        if uncovered:
            # Surface the uncovered channels so the caller (and observer
            # notification path) knows reconciliation was incomplete. All
            # coverable siblings have already been migrated above.
            raise SlotNotCoveredError(
                f"{len(uncovered)} shard channel(s) left unreconciled; "
                f"slot(s) not covered by the cluster: {uncovered!r}"
            )
        if first_migrate_error is not None and not made_progress:
            # Every migration attempted in this pass failed transiently and
            # nothing else made progress. Re-raise the first caught error
            # (typically the root cause; later failures are often downstream
            # symptoms of the same unreachable node) so the task's done-
            # callback surfaces a single representative failure through the
            # same logger channel used for SlotNotCoveredError. Per-channel
            # WARNINGs above preserve the full forensic detail.
            raise first_migrate_error

    async def _migrate_shard_channel(
        self,
        channel: Any,
        handler: Optional[Callable],
        old_name: Optional[str],
        new_node: "ClusterNode",
    ) -> None:
        # Detach from the old per-node pubsub, best-effort: the old node may
        # already be unreachable during migration / failover.
        if old_name and old_name in self.node_pubsub_mapping:
            old_pubsub = self.node_pubsub_mapping[old_name]
            try:
                await old_pubsub.sunsubscribe(channel)
            except (ConnectionError, TimeoutError, OSError):
                # redis-py's Connection has already called ``disconnect()``
                # before raising (see Connection.read_response /
                # send_packed_command with ``disconnect_on_error=True``),
                # so ``old_pubsub``'s dedicated socket is gone. Two cases:
                #
                # 1. The old node is no longer in the cluster topology
                #    (e.g. removed by failover / topology refresh): no
                #    reconnect target exists, so ``old_pubsub.subscribed``
                #    would stay True forever and the end-of-pass GC block
                #    would skip it. Drop it eagerly so the round-robin
                #    generator does not keep yielding a dead pubsub that
                #    produces periodic errors from ``get_sharded_message``.
                # 2. The old node is still known (transiently slow /
                #    unreachable): ``PubSub._execute`` auto-reconnects and
                #    ``on_connect`` re-subscribes to remaining channels,
                #    so other subscriptions on the same pubsub recover
                #    naturally. Leave it alone.
                if self.cluster.get_node(node_name=old_name) is None:
                    try:
                        await old_pubsub.aclose()
                    except Exception:
                        pass
                    self.node_pubsub_mapping.pop(old_name, None)
        # Attach to the new per-node pubsub, preserving the handler. Decode to
        # a text key only when we must pass it as a kwarg (handler present).
        new_pubsub = self._get_node_pubsub(new_node)
        if handler:
            await new_pubsub.ssubscribe(Subscription(channel, handler))
        else:
            await new_pubsub.ssubscribe(channel)
        self.shard_channels.update(new_pubsub.shard_channels)
        normalized_key = next(iter(self._normalize_keys({channel: None})))
        self._shard_channel_to_node[normalized_key] = new_node.name
        self.pending_unsubscribe_shard_channels.difference_update(
            self._normalize_keys({channel: None})
        )

    async def on_slots_changed(self) -> None:
        # Observer hook invoked by NodesManager after a slots-cache refresh.
        # Schedule reconciliation as a separate task so the caller's code
        # path (typically MovedError handling in _execute_command) is not
        # blocked on the network I/O performed by reinitialize_shard_
        # subscriptions. No-op when there are no shard subscriptions to
        # reconcile.
        if not self.shard_channels:
            return
        task = asyncio.create_task(self.reinitialize_shard_subscriptions())
        self._reconcile_tasks.add(task)
        task.add_done_callback(self._reconcile_tasks.discard)
        # Consume the task's exception (if any) so Python does not emit a
        # "Task exception was never retrieved" warning. reinitialize_shard_
        # subscriptions surfaces SlotNotCoveredError when a slot is still
        # transiently uncovered; route it through the same logger channel
        # as sync ClusterPubSubSlotsCacheListener for consistent observability.
        task.add_done_callback(self._log_reconcile_task_exception)

    @staticmethod
    def _log_reconcile_task_exception(task: "asyncio.Task") -> None:
        if task.cancelled():
            return
        exc = task.exception()
        if exc is not None:
            logger.error(
                "shard subscription reconciliation failed: %r", exc, exc_info=exc
            )

    def get_redis_connection(self) -> Optional["AbstractConnection"]:
        """
        Get the Redis connection of the pubsub connected node.

        Returns the pubsub's dedicated connection (acquired from its own
        connection pool), not from the ClusterNode's connection pool.
        This avoids the connection pool resource leak that would occur
        if we called node.acquire_connection() without releasing.
        """
        # Return the pubsub's own dedicated connection, which is acquired
        # from self.connection_pool when executing pubsub commands.
        # This is safe because it's the connection dedicated to this pubsub
        # instance, not a shared pool connection from the ClusterNode.
        return self.connection

    async def aclose(self) -> None:
        """
        Disconnect the pubsub connection.
        """
        # Cancel and gather in-flight reconciliation tasks BEFORE acquiring
        # _shard_state_lock. The tasks themselves take that lock inside
        # reinitialize_shard_subscriptions; since asyncio.Lock is non-
        # reentrant, gathering while holding it would deadlock. Awaiting
        # each task with suppressed CancelledError also avoids unhandled-
        # exception warnings if the task was created but not yet scheduled.
        if self._reconcile_tasks:
            tasks = list(self._reconcile_tasks)
            for task in tasks:
                task.cancel()
            await asyncio.gather(*tasks, return_exceptions=True)
        # Hold _shard_state_lock across the rest of the teardown so it
        # observes the same mutual-exclusion discipline as ssubscribe /
        # sunsubscribe / get_sharded_message / reinitialize_shard_
        # subscriptions, which all mutate shard_channels,
        # _shard_channel_to_node, and node_pubsub_mapping under this lock.
        # Without it, super().aclose() rebinds shard_channels and
        # pending_unsubscribe_shard_channels in parallel with a concurrent
        # user-coroutine mutation that resumes during one of the awaits
        # below, silently dropping subscription intent.
        async with self._shard_state_lock:
            self._reconcile_tasks.clear()
            # Close all shard pubsub instances first
            for pubsub in self.node_pubsub_mapping.values():
                await pubsub.aclose()
            # Drop the now-dead per-node pubsubs from the mapping so the
            # round-robin in _pubsubs_generator / _sharded_message_generator
            # cannot yield them between teardown and re-subscription.
            self.node_pubsub_mapping.clear()
            # _pubsubs_generator captures node_pubsub_mapping.values() into
            # a local list inside ``yield from``; clearing the mapping does
            # not reach references already held by that captured snapshot,
            # so a generator suspended mid-yield-from would still surface
            # the now-aclose()'d per-node pubsubs after re-subscription.
            # Recreate it to drop the captured list. type(self) bypasses
            # the instance-level self-shadow established at __init__
            # (self._pubsubs_generator = self._pubsubs_generator()).
            self._pubsubs_generator = type(self)._pubsubs_generator(  # type: ignore[method-assign]
                self
            )
            # Let parent handle self.connection disconnect under the lock
            # (includes disconnect, release to pool, and clearing
            # self.connection)
            await super().aclose()
            # Clear the reverse index so a reused instance doesn't route
            # against stale mappings. super().aclose() has already cleared
            # shard_channels.
            self._shard_channel_to_node.clear()

    def _raise_on_invalid_node(
        self,
        redis_cluster: "RedisCluster",
        node: Optional["ClusterNode"],
        host: Optional[str],
        port: Optional[int],
    ) -> None:
        """
        Raise a RedisClusterException if the node is None or doesn't exist in
        the cluster.
        """
        if node is None or redis_cluster.get_node(node_name=node.name) is None:
            raise RedisClusterException(
                f"Node {host}:{port} doesn't exist in the cluster"
            )

    async def execute_command(self, *args: Any, **kwargs: Any) -> Any:
        """
        Execute a command on the appropriate cluster node.

        Taken code from redis-py and tweaked to make it work within a cluster.
        """
        # NOTE: don't parse the response in this function -- it could pull a
        # legitimate message off the stack if the connection is already
        # subscribed to one or more channels

        # For shard commands, route to appropriate node
        command = args[0].upper() if args else ""
        if command in ("SSUBSCRIBE", "SUNSUBSCRIBE", "SPUBLISH"):
            if len(args) > 1:
                channel = args[1]
                node = self.cluster.get_node_from_key(channel)
                if node:
                    pubsub = self._get_node_pubsub(node)
                    return await pubsub.execute_command(*args, **kwargs)

        # For other commands, use the set node or lazily discover one
        if self.connection is None:
            if self.connection_pool is None:
                if len(args) > 1:
                    # Hash the first channel and get one of the nodes holding
                    # this slot
                    channel = args[1]
                    slot = self.cluster.keyslot(channel)
                    node = self.cluster.nodes_manager.get_node_from_slot(
                        slot,
                        self.cluster.read_from_replicas,
                        self.cluster.load_balancing_strategy,
                    )
                else:
                    # Get a random node
                    node = self.cluster.get_random_node()
                self.node = node
                self.connection_pool = _ClusterNodePoolAdapter(node)

        # Now we have a connection_pool, use parent's execute_command
        return await super().execute_command(*args, **kwargs)
