Skip to content

Commit ec1f735

Browse files
committed
fix(linstor): ensure that drbd is synced up before resizing
Avoid most common drbd sync errors during resize with a cleaner wait mechanism and friendly user error Signed-off-by: Antoine Bartuccio <antoine.bartuccio@vates.tech>
1 parent 1cb74b3 commit ec1f735

File tree

1 file changed

+22
-20
lines changed

1 file changed

+22
-20
lines changed

drivers/linstorvolumemanager.py

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -841,31 +841,33 @@ def resize_volume(self, volume_uuid, new_size):
841841
self.ensure_volume_is_not_locked(volume_uuid)
842842
new_size = self.round_up_volume_size(new_size) // 1024
843843

844-
retry_count = 30
845-
while True:
846-
result = self._linstor.volume_dfn_modify(
847-
rsc_name=volume_name,
848-
volume_nr=0,
849-
size=new_size
844+
# We can't resize anything until DRBD is up to date.
845+
# We wait here for 5min max and raise an easy to understand error for the user.
846+
# 5min is an arbitrary time, it's impossible to get a fit all situation value
847+
# and it's currently impossible to know how much time we have to wait
848+
# This is mostly an issue for thick provisioning, thin isn't affected.
849+
start_time = time.monotonic()
850+
try:
851+
self._linstor.resource_dfn_wait_synced(volume_name, wait_interval=1.0, timeout=60*5)
852+
except linstor.LinstorTimeoutError:
853+
raise LinstorVolumeManagerError(
854+
f"Volume `{volume_uuid}` from SR `{self._group_name}` is busy and can't be resized right now. " +
855+
"Please retry later."
850856
)
857+
util.SMlog(f"DRBD is up to date, syncing took {time.monotonic() - start_time}s")
851858

852-
self._mark_resource_cache_as_dirty()
853-
854-
error_str = self._get_error_str(result)
855-
if not error_str:
856-
break
859+
result = self._linstor.volume_dfn_modify(
860+
rsc_name=volume_name,
861+
volume_nr=0,
862+
size=new_size
863+
)
857864

858-
# After volume creation, DRBD volume can be unusable during many seconds.
859-
# So we must retry the definition change if the device is not up to date.
860-
# Often the case for thick provisioning.
861-
if retry_count and error_str.find('non-UpToDate DRBD device') >= 0:
862-
time.sleep(2)
863-
retry_count -= 1
864-
continue
865+
self._mark_resource_cache_as_dirty()
865866

867+
error_str = self._get_error_str(result)
868+
if error_str:
866869
raise LinstorVolumeManagerError(
867-
'Could not resize volume `{}` from SR `{}`: {}'
868-
.format(volume_uuid, self._group_name, error_str)
870+
f"Could not resize volume `{volume_uuid}` from SR `{self._group_name}`: {error_str}"
869871
)
870872

871873
def get_volume_name(self, volume_uuid):

0 commit comments

Comments
 (0)