Bug#1109865: unblock: patroni/4.0.6-1 (2/4)
From
Michael Banck@21:1/5 to
All on Fri Jul 25 11:40:01 2025
[continued from previous message]
""":returns: "primary_stop_timeout" from the global configuration or `None` when not in synchronous mode."""
ret = global_config.primary_stop_timeout
@@ -853,7 +850,7 @@
voters=sync.voters,
numsync=sync_state.numsync,
sync=sync_state.sync,
- numsync_confirmed=sync_state.numsync_confirmed,
+ numsync_confirmed=len(sync_state.sync_confirmed),
active=sync_state.active,
sync_wanted=sync_wanted,
leader_wanted=self.state_handler.name):
@@ -899,7 +896,7 @@
current_state = self.state_handler.sync_handler.current_state(self.cluster)
picked = current_state.active
- allow_promote = current_state.sync
+ allow_promote = current_state.sync_confirmed
voters = CaseInsensitiveSet(sync.voters)
if picked == voters and voters != allow_promote:
@@ -910,7 +907,7 @@
return logger.warning("Updating sync state failed")
voters = CaseInsensitiveSet(sync.voters)
- if picked == voters:
+ if picked == voters == current_state.sync and current_state.numsync == len(picked):
return
# update synchronous standby list in dcs temporarily to point to common nodes in current and picked
@@ -934,7 +931,7 @@
if picked and picked != CaseInsensitiveSet('*') and allow_promote != picked:
# Wait for PostgreSQL to enable synchronous mode and see if we can immediately set sync_standby
time.sleep(2)
- allow_promote = self.state_handler.sync_handler.current_state(self.cluster).sync
+ allow_promote = self.state_handler.sync_handler.current_state(self.cluster).sync_confirmed
if allow_promote and allow_promote != sync_common:
if self.dcs.write_sync_state(self.state_handler.name, allow_promote, 0, version=sync.version):
@@ -1114,6 +1111,7 @@
self._failsafe.set_is_active(0)
def before_promote():
+ self._rewind.reset_state() # make sure we will trigger checkpoint after promote
self.notify_mpp_coordinator('before_promote')
with self._async_response:
@@ -1249,12 +1247,15 @@
lag = self.cluster.status.last_lsn - wal_position
return lag > global_config.maximum_lag_on_failover
- def _is_healthiest_node(self, members: Collection[Member], check_replication_lag: bool = True) -> bool:
+ def _is_healthiest_node(self, members: Collection[Member],
+ check_replication_lag: bool = True,
+ leader: Optional[Leader] = None) -> bool:
"""Determine whether the current node is healthy enough to become a new leader candidate.
:param members: the list of nodes to check against
:param check_replication_lag: whether to take the replication lag into account.
If the lag exceeds configured threshold the node disqualifies itself.
+ :param leader: the old cluster leader, it will be used to ignore its ``failover_priority`` value.
:returns: ``True`` if the node is eligible to become the new leader. Since this method is executed
on multiple nodes independently it is possible that multiple nodes could count
themselves as the healthiest because they received/replayed up to the same LSN,
@@ -1296,6 +1297,12 @@
quorum_votes = 0 if self.state_handler.name in voting_set else -1
nodes_ahead = 0
+ # we need to know the name of the former leader to ignore it if it has higher failover_priority
+ if self.sync_mode_is_active():
+ leader_name = self.cluster.sync.leader
+ else:
+ leader_name = leader and leader.name
+
for st in self.fetch_nodes_statuses(members):
if st.failover_limitation() is None:
if st.in_recovery is False:
@@ -1314,6 +1321,11 @@
low_priority = my_wal_position == st.wal_position \
and self.patroni.failover_priority < st.failover_priority
+ if low_priority and leader_name and leader_name == st.member.name:
+ logger.info('Ignoring former leader %s having priority %s higher than this nodes %s priority',
+ leader_name, st.failover_priority, self.patroni.failover_priority)
+ low_priority = False
+
if low_priority and (not self.sync_mode_is_active() or quorum_vote):
# There's a higher priority non-lagging replica
logger.info(
@@ -1364,7 +1376,14 @@
quorum_votes += 1
# In case of quorum replication we need to make sure that there is enough healthy synchronous replicas!
- return quorum_votes >= (self.cluster.sync.quorum if self.quorum_commit_mode_is_active() else 0)
+ # However, when failover candidate is set, we can ignore quorum requirements.
+ check_quorum = self.quorum_commit_mode_is_active() and\
+ not (self.cluster.failover and self.cluster.failover.candidate and not exclude_failover_candidate)
+ if check_quorum and quorum_votes < self.cluster.sync.quorum:
+ logger.info('Quorum requirement %d can not be reached', self.cluster.sync.quorum)
+ return False
+
+ return quorum_votes >= 0
def manual_failover_process_no_leader(self) -> Optional[bool]:
"""Handles manual failover/switchover when the old leader already stepped down.
@@ -1504,7 +1523,7 @@
# run usual health check
members = {m.name: m for m in all_known_members}
- return self._is_healthiest_node(members.values())
+ return self._is_healthiest_node(members.values(), leader=self.old_cluster.leader)
def _delete_leader(self, last_lsn: Optional[int] = None) -> None:
self.set_is_leader(False)
@@ -2253,10 +2272,7 @@
self._sync_replication_slots(True)
return 'continue to run as a leader because failsafe mode is enabled and all members are accessible'
self._failsafe.set_is_active(0)
- msg = 'demoting self because DCS is not accessible and I was a leader'
- if not self._async_executor.try_run_async(msg, self.demote, ('offline',)):
- return msg
- logger.warning('AsyncExecutor is busy, demoting from the main thread')
+ logger.info('demoting self because DCS is not accessible and I was a leader')
self.demote('offline')
return 'demoted self because DCS is not accessible and I was a leader'
else:
@@ -2404,8 +2420,9 @@
return False
# Don't spend time on "nofailover" nodes checking.
# We also don't need nodes which we can't query with the api in the list.
- return node.name not in exclude and \
- not node.nofailover and bool(node.api_url) and \
- (not failover or not failover.candidate or node.name == failover.candidate)
+ # And, if exclude_failover_candidate is True we want to skip node.name == failover.candidate check.
+ return node.name not in exclude and not node.nofailover and bool(node.api_url) and \
+ (exclude_failover_candidate or not failover
+ or not failover.candidate or node.name == failover.candidate)
return list(filter(is_eligible, self.cluster.members))
diff -Nru patroni-4.0.5/patroni/__main__.py patroni-4.0.6/patroni/__main__.py --- patroni-4.0.5/patroni/__main__.py 2025-02-20 16:40:20.000000000 +0100
+++ patroni-4.0.6/patroni/__main__.py 2025-06-06 19:27:48.000000000 +0200
@@ -12,7 +12,7 @@
from argparse import Namespace
from typing import Any, Dict, List, Optional, TYPE_CHECKING
-from patroni import MIN_PSYCOPG2, MIN_PSYCOPG3, parse_version
+from patroni import global_config, MIN_PSYCOPG2, MIN_PSYCOPG3, parse_version
from patroni.daemon import abstract_main, AbstractPatroniDaemon, get_base_arg_parser
from patroni.tags import Tags
@@ -70,6 +70,9 @@
self.watchdog = Watchdog(self.config)
self.apply_dynamic_configuration(cluster)
+ # Initialize global config
+ global_config.update(None, self.config.dynamic_configuration)
+
self.postgresql = Postgresql(self.config['postgresql'], self.dcs.mpp)
self.api = RestApiServer(self, self.config['restapi'])
self.ha = Ha(self)
diff -Nru patroni-4.0.5/patroni/postgresql/config.py patroni-4.0.6/patroni/postgresql/c