Forum: >>> Magnum BBS <<<

Bug#1109865: unblock: patroni/4.0.6-1 (2/4)

From Michael Banck@21:1/5 to All on Fri Jul 25 11:40:01 2025

[continued from previous message]

""":returns: "primary_stop_timeout" from the global configuration or `None` when not in synchronous mode."""
ret = global_config.primary_stop_timeout
@@ -853,7 +850,7 @@
voters=sync.voters,
numsync=sync_state.numsync,
sync=sync_state.sync,
- numsync_confirmed=sync_state.numsync_confirmed,
+ numsync_confirmed=len(sync_state.sync_confirmed),
active=sync_state.active,
sync_wanted=sync_wanted,
leader_wanted=self.state_handler.name):
@@ -899,7 +896,7 @@

current_state = self.state_handler.sync_handler.current_state(self.cluster)
picked = current_state.active
- allow_promote = current_state.sync
+ allow_promote = current_state.sync_confirmed
voters = CaseInsensitiveSet(sync.voters)

if picked == voters and voters != allow_promote:
@@ -910,7 +907,7 @@
return logger.warning("Updating sync state failed")
voters = CaseInsensitiveSet(sync.voters)

- if picked == voters:
+ if picked == voters == current_state.sync and current_state.numsync == len(picked):
return

# update synchronous standby list in dcs temporarily to point to common nodes in current and picked
@@ -934,7 +931,7 @@
if picked and picked != CaseInsensitiveSet('*') and allow_promote != picked:
# Wait for PostgreSQL to enable synchronous mode and see if we can immediately set sync_standby
time.sleep(2)
- allow_promote = self.state_handler.sync_handler.current_state(self.cluster).sync
+ allow_promote = self.state_handler.sync_handler.current_state(self.cluster).sync_confirmed

if allow_promote and allow_promote != sync_common:
if self.dcs.write_sync_state(self.state_handler.name, allow_promote, 0, version=sync.version):
@@ -1114,6 +1111,7 @@
self._failsafe.set_is_active(0)

def before_promote():
+ self._rewind.reset_state() # make sure we will trigger checkpoint after promote
self.notify_mpp_coordinator('before_promote')

with self._async_response:
@@ -1249,12 +1247,15 @@
lag = self.cluster.status.last_lsn - wal_position
return lag > global_config.maximum_lag_on_failover

- def _is_healthiest_node(self, members: Collection[Member], check_replication_lag: bool = True) -> bool:
+ def _is_healthiest_node(self, members: Collection[Member],
+ check_replication_lag: bool = True,
+ leader: Optional[Leader] = None) -> bool:
"""Determine whether the current node is healthy enough to become a new leader candidate.

:param members: the list of nodes to check against
:param check_replication_lag: whether to take the replication lag into account.
If the lag exceeds configured threshold the node disqualifies itself.
+ :param leader: the old cluster leader, it will be used to ignore its ``failover_priority`` value.
:returns: ``True`` if the node is eligible to become the new leader. Since this method is executed
on multiple nodes independently it is possible that multiple nodes could count
themselves as the healthiest because they received/replayed up to the same LSN,
@@ -1296,6 +1297,12 @@
quorum_votes = 0 if self.state_handler.name in voting_set else -1
nodes_ahead = 0

+ # we need to know the name of the former leader to ignore it if it has higher failover_priority
+ if self.sync_mode_is_active():
+ leader_name = self.cluster.sync.leader
+ else:
+ leader_name = leader and leader.name
+
for st in self.fetch_nodes_statuses(members):
if st.failover_limitation() is None:
if st.in_recovery is False:
@@ -1314,6 +1321,11 @@
low_priority = my_wal_position == st.wal_position \
and self.patroni.failover_priority < st.failover_priority

+ if low_priority and leader_name and leader_name == st.member.name:
+ logger.info('Ignoring former leader %s having priority %s higher than this nodes %s priority',
+ leader_name, st.failover_priority, self.patroni.failover_priority)
+ low_priority = False
+
if low_priority and (not self.sync_mode_is_active() or quorum_vote):
# There's a higher priority non-lagging replica
logger.info(
@@ -1364,7 +1376,14 @@
quorum_votes += 1

# In case of quorum replication we need to make sure that there is enough healthy synchronous replicas!
- return quorum_votes >= (self.cluster.sync.quorum if self.quorum_commit_mode_is_active() else 0)
+ # However, when failover candidate is set, we can ignore quorum requirements.
+ check_quorum = self.quorum_commit_mode_is_active() and\
+ not (self.cluster.failover and self.cluster.failover.candidate and not exclude_failover_candidate)
+ if check_quorum and quorum_votes < self.cluster.sync.quorum:
+ logger.info('Quorum requirement %d can not be reached', self.cluster.sync.quorum)
+ return False
+
+ return quorum_votes >= 0

def manual_failover_process_no_leader(self) -> Optional[bool]:
"""Handles manual failover/switchover when the old leader already stepped down.
@@ -1504,7 +1523,7 @@
# run usual health check
members = {m.name: m for m in all_known_members}

- return self._is_healthiest_node(members.values())
+ return self._is_healthiest_node(members.values(), leader=self.old_cluster.leader)

def _delete_leader(self, last_lsn: Optional[int] = None) -> None:
self.set_is_leader(False)
@@ -2253,10 +2272,7 @@
self._sync_replication_slots(True)
return 'continue to run as a leader because failsafe mode is enabled and all members are accessible'
self._failsafe.set_is_active(0)
- msg = 'demoting self because DCS is not accessible and I was a leader'
- if not self._async_executor.try_run_async(msg, self.demote, ('offline',)):
- return msg
- logger.warning('AsyncExecutor is busy, demoting from the main thread')
+ logger.info('demoting self because DCS is not accessible and I was a leader')
self.demote('offline')
return 'demoted self because DCS is not accessible and I was a leader'
else:
@@ -2404,8 +2420,9 @@
return False
# Don't spend time on "nofailover" nodes checking.
# We also don't need nodes which we can't query with the api in the list.
- return node.name not in exclude and \
- not node.nofailover and bool(node.api_url) and \
- (not failover or not failover.candidate or node.name == failover.candidate)
+ # And, if exclude_failover_candidate is True we want to skip node.name == failover.candidate check.
+ return node.name not in exclude and not node.nofailover and bool(node.api_url) and \
+ (exclude_failover_candidate or not failover
+ or not failover.candidate or node.name == failover.candidate)

return list(filter(is_eligible, self.cluster.members))
diff -Nru patroni-4.0.5/patroni/__main__.py patroni-4.0.6/patroni/__main__.py --- patroni-4.0.5/patroni/__main__.py 2025-02-20 16:40:20.000000000 +0100
+++ patroni-4.0.6/patroni/__main__.py 2025-06-06 19:27:48.000000000 +0200
@@ -12,7 +12,7 @@
from argparse import Namespace
from typing import Any, Dict, List, Optional, TYPE_CHECKING

-from patroni import MIN_PSYCOPG2, MIN_PSYCOPG3, parse_version
+from patroni import global_config, MIN_PSYCOPG2, MIN_PSYCOPG3, parse_version
from patroni.daemon import abstract_main, AbstractPatroniDaemon, get_base_arg_parser
from patroni.tags import Tags

@@ -70,6 +70,9 @@
self.watchdog = Watchdog(self.config)
self.apply_dynamic_configuration(cluster)

+ # Initialize global config
+ global_config.update(None, self.config.dynamic_configuration)
+
self.postgresql = Postgresql(self.config['postgresql'], self.dcs.mpp)
self.api = RestApiServer(self, self.config['restapi'])
self.ha = Ha(self)
diff -Nru patroni-4.0.5/patroni/postgresql/config.py patroni-4.0.6/patroni/postgresql/c

Who's Online
Recent Visitors
- Rixter
  Fri Jul 31 12:17:09 2026
  from Madison, Nc via Telnet
- Krenn
  Fri Jul 31 10:41:58 2026
  from Sydney, Nsw via Telnet
- Krenn
  Fri Jul 31 10:34:35 2026
  from Sydney, Nsw via Telnet
- Shift
  Fri Jul 31 06:46:34 2026
  from Leeds, England via SSH
- Centurion
  Fri Jul 31 00:59:56 2026
  from Berea, Ohio via Telnet
- Rixter
  Fri Jul 31 00:00:46 2026
  from Madison, Nc via Telnet
- Bob Worm
  Thu Jul 30 20:01:55 2026
  from Wales, Uk via Telnet
- Rixter
  Thu Jul 30 14:17:17 2026
  from Madison, Nc via Telnet

System Info

Sysop:	Keyop
Location:	Huddersfield, West Yorkshire, UK
Users:	741
Nodes:	16 (2 / 14)
Uptime:	114:43:11
Calls:	12,464
Calls today:	6
Files:	15,200
Messages:	6,538,219

Bug#1109865: unblock: patroni/4.0.6-1 (2/4)

Who's Online

Recent Visitors

System Info