dump: Try to seize task tree several times on error

Tasks can fork or die while we try to seize them. It's much more simpler and much more reliably to unseize what was seized and walk the tree again in case of some tree check failed. Yes, this makes it impossible to suspend a big tree which constantly forks :( but I'd prefer fixing more urgent issues before. Signed-off-by: Pavel Emelyanov <xemul@parallels.com> Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
2025-08-31 14:25:49 +00:00 · 2012-03-01 19:11:29 +04:00
parent 9e0b308af0
commit a1fe2c58a9
1 changed files with 36 additions and 1 deletions
--- a/cr-dump.c
+++ b/cr-dump.c
@@ -1012,7 +1012,42 @@ static int collect_subtree(pid_t pid, pid_t ppid, struct list_head *pstree_list,

 static int collect_pstree(pid_t pid, struct list_head *pstree_list, int leader_only)
 {
-	return collect_subtree(pid, -1, pstree_list, leader_only);
+	int ret, attempts = 5;
+
+	while (1) {
+		ret = collect_subtree(pid, -1, pstree_list, leader_only);
+		if (ret == 0)
+			break;
+
+		/*
+		 * Old tasks can die and new ones can appear while we
+		 * try to seize the swarm. It's much simpler (and reliable)
+		 * just to restart the collection from the beginning
+		 * rather than trying to chase them.
+		 */
+
+		if (attempts == 0)
+			break;
+
+		attempts--;
+		pr_info("Trying to suspend tasks again\n");
+
+		while (!list_empty(pstree_list)) {
+			struct pstree_item *item;
+
+			item = list_first_entry(pstree_list,
+					struct pstree_item, list);
+			list_del(&item->list);
+
+			unseize_task_and_threads(item, TASK_ALIVE);
+
+			xfree(item->children);
+			xfree(item->threads);
+			xfree(item);
+		}
+	}
+
+	return ret;
 }

 static int dump_pstree(pid_t pid, struct list_head *pstree_list, struct cr_fdset *cr_fdset)