From da19e8dd21bb189f98d6545cba9725d1760f941c Mon Sep 17 00:00:00 2001 From: Cyanocitta Yinhao Wang <1410669639@qq.com> Date: Wed, 11 Sep 2024 18:17:41 +0800 Subject: [PATCH 1/7] Update faster_live_portrait_pipeline.py Rethinking of tracking --- src/pipelines/faster_live_portrait_pipeline.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/pipelines/faster_live_portrait_pipeline.py b/src/pipelines/faster_live_portrait_pipeline.py index 9446d1f..6c9aead 100644 --- a/src/pipelines/faster_live_portrait_pipeline.py +++ b/src/pipelines/faster_live_portrait_pipeline.py @@ -289,7 +289,14 @@ def run(self, image, img_src, src_info, **kwargs): realtime = kwargs.get("realtime", False) if self.cfg.infer_params.flag_crop_driving_video: - if self.src_lmk_pre is None: + NEED_INITIAL_ANAYLYSIS = False + if self.src_lmk_pre is not None: + lt_pt = np.min(self.src_lmk_pre, axis=0) + rb_pt = np.max(self.src_lmk_pre, axis=0) + size = rb_pt - lt_pt + if min(size)< img_bgr.shape[0]//5: # if less than 5 times, say it losts track + NEED_INITIAL_ANAYLYSIS = True + if self.src_lmk_pre is None or NEED_INITIAL_ANAYLYSIS is True: src_face = self.model_dict["face_analysis"].predict(img_bgr) if len(src_face) == 0: self.src_lmk_pre = None From 236624e5b5077751d24ee530fc8e95112fafc398 Mon Sep 17 00:00:00 2001 From: Cyanocitta Yinhao Wang <1410669639@qq.com> Date: Wed, 11 Sep 2024 19:08:10 +0800 Subject: [PATCH 2/7] Update faster_live_portrait_pipeline.py Direct without tracking different face. --- .../faster_live_portrait_pipeline.py | 25 +++++++------------ 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/src/pipelines/faster_live_portrait_pipeline.py b/src/pipelines/faster_live_portrait_pipeline.py index 6c9aead..b23faaa 100644 --- a/src/pipelines/faster_live_portrait_pipeline.py +++ b/src/pipelines/faster_live_portrait_pipeline.py @@ -289,24 +289,17 @@ def run(self, image, img_src, src_info, **kwargs): realtime = kwargs.get("realtime", False) if self.cfg.infer_params.flag_crop_driving_video: - NEED_INITIAL_ANAYLYSIS = False - if self.src_lmk_pre is not None: - lt_pt = np.min(self.src_lmk_pre, axis=0) - rb_pt = np.max(self.src_lmk_pre, axis=0) - size = rb_pt - lt_pt - if min(size)< img_bgr.shape[0]//5: # if less than 5 times, say it losts track - NEED_INITIAL_ANAYLYSIS = True - if self.src_lmk_pre is None or NEED_INITIAL_ANAYLYSIS is True: - src_face = self.model_dict["face_analysis"].predict(img_bgr) - if len(src_face) == 0: - self.src_lmk_pre = None + dri_face = self.model_dict["face_analysis"].predict(img_bgr) + if len(dri_face) == 0: + if self.dri_lmk_pre is not None: + # Temporarily use the frame before lost + lmk = self.dri_lmk_pre + else: + self.dri_lmk_pre = None return None, None, None - lmk = src_face[0] - lmk = self.model_dict["landmark"].predict(img_rgb, lmk) - self.src_lmk_pre = lmk.copy() else: - lmk = self.model_dict["landmark"].predict(img_rgb, self.src_lmk_pre) - self.src_lmk_pre = lmk.copy() + lmk = self.model_dict["landmark"].predict(img_rgb, dri_face[0]) + self.dri_lmk_pre = lmk.copy() ret_bbox = parse_bbox_from_landmark( lmk, From f762062233cdf736e839719089be8562b0a75114 Mon Sep 17 00:00:00 2001 From: Cyanocitta Yinhao Wang <1410669639@qq.com> Date: Wed, 11 Sep 2024 19:09:11 +0800 Subject: [PATCH 3/7] Update faster_live_portrait_pipeline.py Change name --- .../faster_live_portrait_pipeline.py | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/src/pipelines/faster_live_portrait_pipeline.py b/src/pipelines/faster_live_portrait_pipeline.py index b23faaa..0035715 100644 --- a/src/pipelines/faster_live_portrait_pipeline.py +++ b/src/pipelines/faster_live_portrait_pipeline.py @@ -73,7 +73,7 @@ def init_models(self, **kwargs): def init_vars(self, **kwargs): self.mask_crop = cv2.imread(self.cfg.infer_params.mask_crop_path, cv2.IMREAD_COLOR) self.frame_id = 0 - self.src_lmk_pre = None + self.dri_lmk_pre = None self.R_d_0 = None self.x_d_0_info = None @@ -187,6 +187,7 @@ def prepare_source(self, source_path, **kwargs): img_crop, img_crop_256x256 = crop_info['img_crop'], crop_info['img_crop_256x256'] pitch, yaw, roll, t, exp, scale, kp = self.model_dict["motion_extractor"].predict( img_crop_256x256) + print(f"motion precdicted scale:{scale}") x_s_info = { "pitch": pitch, "yaw": yaw, @@ -288,7 +289,7 @@ def run(self, image, img_src, src_info, **kwargs): I_p_pstbk = torch.from_numpy(img_src).to(self.device).float() realtime = kwargs.get("realtime", False) - if self.cfg.infer_params.flag_crop_driving_video: + if self.cfg.infer_params.flag_crop_driving_video: dri_face = self.model_dict["face_analysis"].predict(img_bgr) if len(dri_face) == 0: if self.dri_lmk_pre is not None: @@ -300,6 +301,10 @@ def run(self, image, img_src, src_info, **kwargs): else: lmk = self.model_dict["landmark"].predict(img_rgb, dri_face[0]) self.dri_lmk_pre = lmk.copy() + # else: + # lmk = self.model_dict["landmark"].predict(img_rgb, self.dri_lmk_pre) + # self.dri_lmk_pre = lmk.copy() + ret_bbox = parse_bbox_from_landmark( lmk, @@ -325,17 +330,17 @@ def run(self, image, img_src, src_info, **kwargs): img_crop = ret_dct["img_crop"] img_crop = cv2.resize(img_crop, (256, 256)) else: - if self.src_lmk_pre is None: - src_face = self.model_dict["face_analysis"].predict(img_bgr) - if len(src_face) == 0: - self.src_lmk_pre = None + if self.dri_lmk_pre is None: + dri_face = self.model_dict["face_analysis"].predict(img_bgr) + if len(dri_face) == 0: + self.dri_lmk_pre = None return None, None, None - lmk = src_face[0] + lmk = dri_face[0] lmk = self.model_dict["landmark"].predict(img_rgb, lmk) - self.src_lmk_pre = lmk.copy() + self.dri_lmk_pre = lmk.copy() else: - lmk = self.model_dict["landmark"].predict(img_rgb, self.src_lmk_pre) - self.src_lmk_pre = lmk.copy() + lmk = self.model_dict["landmark"].predict(img_rgb, self.dri_lmk_pre) + self.dri_lmk_pre = lmk.copy() lmk_crop = lmk.copy() img_crop = cv2.resize(img_rgb, (256, 256)) From a4f8ac2fb0afd2aed192e2653383a3ef72979c05 Mon Sep 17 00:00:00 2001 From: Cyanocitta Yinhao Wang <1410669639@qq.com> Date: Thu, 12 Sep 2024 15:28:48 +0800 Subject: [PATCH 4/7] Update faster_live_portrait_pipeline.py confidence by pixel --- .../faster_live_portrait_pipeline.py | 36 +++++++++++++------ 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/src/pipelines/faster_live_portrait_pipeline.py b/src/pipelines/faster_live_portrait_pipeline.py index 0035715..801e04b 100644 --- a/src/pipelines/faster_live_portrait_pipeline.py +++ b/src/pipelines/faster_live_portrait_pipeline.py @@ -74,6 +74,7 @@ def init_vars(self, **kwargs): self.mask_crop = cv2.imread(self.cfg.infer_params.mask_crop_path, cv2.IMREAD_COLOR) self.frame_id = 0 self.dri_lmk_pre = None + self.dri_reanalysis = False self.R_d_0 = None self.x_d_0_info = None @@ -290,21 +291,34 @@ def run(self, image, img_src, src_info, **kwargs): realtime = kwargs.get("realtime", False) if self.cfg.infer_params.flag_crop_driving_video: - dri_face = self.model_dict["face_analysis"].predict(img_bgr) - if len(dri_face) == 0: - if self.dri_lmk_pre is not None: - # Temporarily use the frame before lost - lmk = self.dri_lmk_pre - else: + + if self.dri_lmk_pre is None: + #initialization + dri_face = self.model_dict["face_analysis"].predict(img_bgr) + if len(dri_face) == 0: self.dri_lmk_pre = None return None, None, None - else: lmk = self.model_dict["landmark"].predict(img_rgb, dri_face[0]) self.dri_lmk_pre = lmk.copy() - # else: - # lmk = self.model_dict["landmark"].predict(img_rgb, self.dri_lmk_pre) - # self.dri_lmk_pre = lmk.copy() - + elif self.dri_reanalysis: + dri_face = self.model_dict["face_analysis"].predict(img_bgr) + if len(dri_face) == 0: + # assert self.dri_lmk_pre is not None + # Temporarily use the frame before lost + lmk = self.dri_lmk_pre + else: + # Re initialization + self.dri_reanalysis = False + lmk = self.model_dict["landmark"].predict(img_rgb, dri_face[0]) + self.dri_lmk_pre = lmk.copy() + else: + lmk = self.model_dict["landmark"].predict(img_rgb, self.dri_lmk_pre) + slice = lmk[:,0] + diff = slice.max()-slice.min() + if diff < 32: # not confident, say less than 32 pixels + self.dri_reanalysis = True + self.dri_lmk_pre = lmk.copy() + ret_bbox = parse_bbox_from_landmark( lmk, From 3e249a46af75c23e1e70782142d8fc1f46512ce0 Mon Sep 17 00:00:00 2001 From: Cyanocitta Yinhao Wang <1410669639@qq.com> Date: Thu, 12 Sep 2024 15:29:31 +0800 Subject: [PATCH 5/7] Update faster_live_portrait_pipeline.py --- src/pipelines/faster_live_portrait_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pipelines/faster_live_portrait_pipeline.py b/src/pipelines/faster_live_portrait_pipeline.py index 801e04b..0dbfdc8 100644 --- a/src/pipelines/faster_live_portrait_pipeline.py +++ b/src/pipelines/faster_live_portrait_pipeline.py @@ -188,7 +188,7 @@ def prepare_source(self, source_path, **kwargs): img_crop, img_crop_256x256 = crop_info['img_crop'], crop_info['img_crop_256x256'] pitch, yaw, roll, t, exp, scale, kp = self.model_dict["motion_extractor"].predict( img_crop_256x256) - print(f"motion precdicted scale:{scale}") + # print(f"motion precdicted scale:{scale}") x_s_info = { "pitch": pitch, "yaw": yaw, From 2e21ca9d885d4b2f2da2febf961684a4b68c7029 Mon Sep 17 00:00:00 2001 From: Cyanocitta Yinhao Wang <1410669639@qq.com> Date: Thu, 12 Sep 2024 16:07:12 +0800 Subject: [PATCH 6/7] Update faster_live_portrait_pipeline.py inconfident when shrink 20 pixels, or less than 32 pixels face --- .../faster_live_portrait_pipeline.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/pipelines/faster_live_portrait_pipeline.py b/src/pipelines/faster_live_portrait_pipeline.py index 0dbfdc8..5b1f174 100644 --- a/src/pipelines/faster_live_portrait_pipeline.py +++ b/src/pipelines/faster_live_portrait_pipeline.py @@ -74,6 +74,8 @@ def init_vars(self, **kwargs): self.mask_crop = cv2.imread(self.cfg.infer_params.mask_crop_path, cv2.IMREAD_COLOR) self.frame_id = 0 self.dri_lmk_pre = None + self.dir_initial = None + self.dir_diff = None self.dri_reanalysis = False self.R_d_0 = None self.x_d_0_info = None @@ -188,7 +190,7 @@ def prepare_source(self, source_path, **kwargs): img_crop, img_crop_256x256 = crop_info['img_crop'], crop_info['img_crop_256x256'] pitch, yaw, roll, t, exp, scale, kp = self.model_dict["motion_extractor"].predict( img_crop_256x256) - # print(f"motion precdicted scale:{scale}") + print(f"\n motion precdicted scale:{scale}") x_s_info = { "pitch": pitch, "yaw": yaw, @@ -299,24 +301,33 @@ def run(self, image, img_src, src_info, **kwargs): self.dri_lmk_pre = None return None, None, None lmk = self.model_dict["landmark"].predict(img_rgb, dri_face[0]) + slice = lmk[:,0] + self.diff = slice.max()-slice.min() self.dri_lmk_pre = lmk.copy() + self.dir_initial = lmk.copy() elif self.dri_reanalysis: dri_face = self.model_dict["face_analysis"].predict(img_bgr) if len(dri_face) == 0: # assert self.dri_lmk_pre is not None # Temporarily use the frame before lost - lmk = self.dri_lmk_pre + lmk = self.dir_initial else: # Re initialization self.dri_reanalysis = False lmk = self.model_dict["landmark"].predict(img_rgb, dri_face[0]) + slice = lmk[:,0] + self.diff = slice.max()-slice.min() self.dri_lmk_pre = lmk.copy() + self.dir_initial = lmk.copy() else: lmk = self.model_dict["landmark"].predict(img_rgb, self.dri_lmk_pre) slice = lmk[:,0] - diff = slice.max()-slice.min() - if diff < 32: # not confident, say less than 32 pixels + diff = slice.max()-slice.min() + if self.diff - diff > 20: + self.dri_reanalysis = True # not confident when weird shrink + elif diff < 32: # not confident, say less than 32 pixels self.dri_reanalysis = True + self.diff = diff self.dri_lmk_pre = lmk.copy() From 965e872d65e10d65ab0a021b757f1624e35767fa Mon Sep 17 00:00:00 2001 From: Cyanocitta Yinhao Wang <1410669639@qq.com> Date: Thu, 12 Sep 2024 16:09:32 +0800 Subject: [PATCH 7/7] Update faster_live_portrait_pipeline.py Fix type --- src/pipelines/faster_live_portrait_pipeline.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/pipelines/faster_live_portrait_pipeline.py b/src/pipelines/faster_live_portrait_pipeline.py index 5b1f174..a4413a7 100644 --- a/src/pipelines/faster_live_portrait_pipeline.py +++ b/src/pipelines/faster_live_portrait_pipeline.py @@ -74,8 +74,8 @@ def init_vars(self, **kwargs): self.mask_crop = cv2.imread(self.cfg.infer_params.mask_crop_path, cv2.IMREAD_COLOR) self.frame_id = 0 self.dri_lmk_pre = None - self.dir_initial = None - self.dir_diff = None + self.dri_initial = None + self.dri_diff = None self.dri_reanalysis = False self.R_d_0 = None self.x_d_0_info = None @@ -304,13 +304,13 @@ def run(self, image, img_src, src_info, **kwargs): slice = lmk[:,0] self.diff = slice.max()-slice.min() self.dri_lmk_pre = lmk.copy() - self.dir_initial = lmk.copy() + self.dri_initial = lmk.copy() elif self.dri_reanalysis: dri_face = self.model_dict["face_analysis"].predict(img_bgr) if len(dri_face) == 0: # assert self.dri_lmk_pre is not None # Temporarily use the frame before lost - lmk = self.dir_initial + lmk = self.dri_initial else: # Re initialization self.dri_reanalysis = False @@ -318,16 +318,16 @@ def run(self, image, img_src, src_info, **kwargs): slice = lmk[:,0] self.diff = slice.max()-slice.min() self.dri_lmk_pre = lmk.copy() - self.dir_initial = lmk.copy() + self.dri_initial = lmk.copy() else: lmk = self.model_dict["landmark"].predict(img_rgb, self.dri_lmk_pre) slice = lmk[:,0] - diff = slice.max()-slice.min() - if self.diff - diff > 20: + dri_diff = slice.max()-slice.min() + if self.dri_diff - dri_diff > 20: self.dri_reanalysis = True # not confident when weird shrink - elif diff < 32: # not confident, say less than 32 pixels + elif dri_diff < 32: # not confident, say less than 32 pixels self.dri_reanalysis = True - self.diff = diff + self.dri_diff = dri_diff self.dri_lmk_pre = lmk.copy()