Skip to content

Commit 966d75f

Browse files
committed
同步VSR中带来的改进:
支持不同任务使用不同选区 选区部分全部采用浮点值, 以便更好的适配不同尺寸不同分辨率 支持设置视频保存路径 将字幕选区改为实体对象 美化右键菜单样式 支持合并进度展示
1 parent 01ac8af commit 966d75f

19 files changed

+1092
-458
lines changed

backend/bean/subtitle_area.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
2+
from typing import Union
3+
from dataclasses import dataclass
4+
from shapely.geometry import Polygon
5+
6+
@dataclass
7+
class SubtitleArea:
8+
"""
9+
字幕区域
10+
"""
11+
ymin: Union[int, float]
12+
ymax: Union[int, float]
13+
xmin: Union[int, float]
14+
xmax: Union[int, float]
15+
# 字幕区域在视频中的位置
16+
ab_section: range = None
17+
18+
def __init__(self, ymin: Union[int, float], ymax: Union[int, float],
19+
xmin: Union[int, float], xmax: Union[int, float],
20+
ab_section: range = None):
21+
self.ymin = ymin
22+
self.ymax = ymax
23+
self.xmin = xmin
24+
self.xmax = xmax
25+
self.ab_section = ab_section
26+
27+
def normalized(self):
28+
if self.xmin > self.xmax:
29+
self.xmin, self.xmax = self.xmax, self.xmin
30+
if self.ymin > self.ymax:
31+
self.ymin, self.ymax = self.ymax, self.ymin
32+
33+
def is_empty(self):
34+
return self.xmin == 0 and self.xmax == 0 and self.ymin == 0 and self.ymax == 0
35+
36+
@property
37+
def width(self):
38+
return self.xmax - self.xmin
39+
40+
@property
41+
def height(self):
42+
return self.ymax - self.ymin
43+
44+
def in_ab_section(self, frame_idx):
45+
return True
46+
47+
def to_polygon(self):
48+
return Polygon([[self.xmin, self.ymin], [self.xmax, self.ymin], [self.xmax, self.ymax], [self.xmin, self.ymax]])

backend/config.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import os
33
from pathlib import Path
44
from qfluentwidgets import (qconfig, ConfigItem, QConfig, OptionsValidator, BoolValidator, OptionsConfigItem,
5-
EnumSerializer, RangeValidator, RangeConfigItem)
5+
EnumSerializer, RangeValidator, RangeConfigItem, ConfigValidator)
66
from backend.tools.constant import SubtitleArea, VideoSubFinderDecoder
77
import configparser
88

@@ -44,10 +44,9 @@ class Config(QConfig):
4444
windowW = ConfigItem("Window", "Width", 1200)
4545
windowH = ConfigItem("Window", "Height", 1200)
4646

47-
subtitleSelectionAreaX = ConfigItem("Main", "SubtitleSelectionAreaX", 0.05)
48-
subtitleSelectionAreaY = ConfigItem("Main", "SubtitleSelectionAreaY", 0.78)
49-
subtitleSelectionAreaW = ConfigItem("Main", "SubtitleSelectionAreaW", 0.90)
50-
subtitleSelectionAreaH = ConfigItem("Main", "SubtitleSelectionAreaH", 0.21)
47+
# 使用一个配置项存储所有选区
48+
# 默认值为一个选区,格式为:"ymin,ymax,xmin,xmax;ymin,ymax,xmin,xmax;...",分号分隔不同选区
49+
subtitleSelectionAreas = ConfigItem("Main", "SubtitleSelectionAreas", "0.78,0.99,0.05,0.95")
5150

5251
# 字幕语言设置
5352
language = OptionsConfigItem("Main", "Language", "ch", OptionsValidator([name for name in tr["Language"]]))
@@ -91,6 +90,8 @@ class Config(QConfig):
9190
hardwareAcceleration = ConfigItem("Main", "HardwareAcceleration", HARDWARD_ACCELERATION_OPTION, BoolValidator())
9291
# 启动时检查应用更新
9392
checkUpdateOnStartup = ConfigItem("Main", "CheckUpdateOnStartup", True, BoolValidator())
93+
# 视频保存目录
94+
saveDirectory = ConfigItem("Main", "SaveDirectory", "", ConfigValidator())
9495
# VideoSubFinder CPU核心数
9596
videoSubFinderCpuCores = RangeConfigItem("Main", "VideoSubFinderCpuCores", 0, RangeValidator(0, os.cpu_count()))
9697
# VideoSubFinder 视频解码组件

backend/interface/ch.ini

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@ VideoSubFinderCpuCores = CPU核心数
5555
VideoSubFinderCpuCoresDesc = 字幕提取时使用的CPU核心数,默认为0,即自动, 选择所有核心并不会变得快
5656
VideoSubFinderDecoder = 视频解码组件
5757
VideoSubFinderDecoderDesc = 用于针对不同视频兼容性需求, 默认为OpenCV(推荐), 当遇到无法正常提取时可以切换为FFmpeg(可能会造成时间轴细微偏移)
58+
ChooseDirectory = 选择文件夹
59+
SaveDirectory = 字幕保存目录
60+
SaveDirectoryDefault = 默认保存到输入视频当前目录
5861

5962
[SubtitleArea]
6063
LowerPart = 下半部分
@@ -176,6 +179,7 @@ OpenVideoFirst = 请先打开视频
176179
SubtitleArea = 字幕区域
177180
VideoPreview = 视频预览
178181
ErrorDuringProcessing = 处理过程中发生错误: {}
182+
DeleteSelection = 删除当前激活选区
179183

180184
[Main]
181185
RecSubLang = 识别字幕语言

backend/interface/chinese_cht.ini

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@ VideoSubFinderCpuCores = CPU核心數
5555
VideoSubFinderCpuCoresDesc = 提取字幕時使用的CPU核心數,預設為0(自動),選擇所有核心不一定更快
5656
VideoSubFinderDecoder = 視頻解碼組件
5757
VideoSubFinderDecoderDesc = 用於不同影片相容性需求,預設為OpenCV(推薦),若無法正常提取可切換為FFmpeg(可能導致時間軸略有偏移)
58+
ChooseDirectory = 選擇資料夾
59+
SaveDirectory = 字幕儲存目錄
60+
SaveDirectoryDefault = 預設儲存至輸入影片的目前目錄
5861

5962
[SubtitleArea]
6063
LowerPart = 下半部分
@@ -176,6 +179,7 @@ OpenVideoFirst = 請先打開視頻
176179
SubtitleArea = 字幕區域
177180
VideoPreview = 視頻預覽
178181
ErrorDuringProcessing = 處理過程中發生錯誤: {}
182+
DeleteSelection = 刪除當前激活選區
179183

180184
[Main]
181185
RecSubLang = 識別字幕語言

backend/interface/en.ini

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@ VideoSubFinderCpuCores = CPU Cores
5555
VideoSubFinderCpuCoresDesc = Number of CPU cores used for subtitle extraction. Default is 0 (auto). Using all cores may not be faster.
5656
VideoSubFinderDecoder = Video Decoder
5757
VideoSubFinderDecoderDesc = For different video compatibility needs. Default is OpenCV (recommended). Switch to FFmpeg if extraction fails (may cause slight timeline shift).
58+
ChooseDirectory = Choose Folder
59+
SaveDirectory = Subtitle Save Directory
60+
SaveDirectoryDefault = Default: save to the current directory of the input video
5861

5962
[SubtitleArea]
6063
LowerPart = Lower Part
@@ -177,6 +180,7 @@ OpenVideoFirst = Please Open Video First
177180
SubtitleArea = Subtitle Area
178181
VideoPreview = Video Preview
179182
ErrorDuringProcessing = Error during processing: {}
183+
DeleteSelection = Delete Selection
180184

181185
[Main]
182186
RecSubLang = Subtitle Language

backend/interface/es.ini

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@ VideoSubFinderCpuCores = Núcleos de CPU
5555
VideoSubFinderCpuCoresDesc = Núcleos de CPU usados para extraer subtítulos. Por defecto es 0 (automático). Usar todos los núcleos no siempre es más rápido.
5656
VideoSubFinderDecoder = Componente de decodificación de video
5757
VideoSubFinderDecoderDesc = Para diferentes necesidades de compatibilidad de video. Por defecto es OpenCV (recomendado). Cambia a FFmpeg si la extracción falla (puede causar un pequeño desfase en la línea de tiempo).
58+
ChooseDirectory = Seleccionar Carpeta
59+
SaveDirectory = Directorio de guardado de subtítulos
60+
SaveDirectoryDefault = Predeterminado: guardar en el directorio actual del video de entrada
5861

5962
[SubtitleArea]
6063
LowerPart = Parte inferior
@@ -176,6 +179,7 @@ OpenVideoFirst = Por favor, abra el video primero
176179
SubtitleArea = Área de subtítulos
177180
VideoPreview = Vista previa de video
178181
ErrorDuringProcessing = Error durante el procesamiento: {}
182+
DeleteSelection = Eliminar selección
179183

180184
[Main]
181185
RecSubLang = Idioma de subtítulos

backend/interface/japan.ini

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@ VideoSubFinderCpuCores = CPUコア数
5555
VideoSubFinderCpuCoresDesc = 字幕抽出に使用するCPUコア数。デフォルトは0(自動)。全コアを選択しても速くなるとは限りません。
5656
VideoSubFinderDecoder = ビデオデコーダー
5757
VideoSubFinderDecoderDesc = 異なる動画互換性のニーズに対応。デフォルトはOpenCV(推奨)。抽出できない場合はFFmpegに切り替えてください(タイムラインがわずかにずれる場合があります)。
58+
ChooseDirectory = フォルダを選択
59+
SaveDirectory = 字幕の保存ディレクトリ
60+
SaveDirectoryDefault = デフォルト: 入力動画の現在のディレクトリに保存
5861

5962
[SubtitleArea]
6063
LowerPart = 下部
@@ -176,6 +179,7 @@ OpenVideoFirst = 最初にビデオを開いてください
176179
SubtitleArea = サブタイトル領域
177180
VideoPreview = ビデオプレビュー
178181
ErrorDuringProcessing = 処理中にエラーが発生しました: {}
182+
DeleteSelection = 現在のアクティブ選択範囲を削除
179183

180184
[Main]
181185
RecSubLang = サブタイトル言語

backend/interface/ko.ini

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@ VideoSubFinderCpuCores = CPU 코어 수
5555
VideoSubFinderCpuCoresDesc = 자막 추출에 사용할 CPU 코어 수입니다. 기본값은 0(자동)이며, 모든 코어를 선택해도 더 빨라지지 않을 수 있습니다.
5656
VideoSubFinderDecoder = 비디오 디코더
5757
VideoSubFinderDecoderDesc = 다양한 비디오 호환성 요구에 사용. 기본값은 OpenCV(추천)이며, 추출이 실패할 경우 FFmpeg로 전환하세요(타임라인이 약간 어긋날 수 있음).
58+
ChooseDirectory = 폴더 선택
59+
SaveDirectory = 자막 저장 디렉터리
60+
SaveDirectoryDefault = 기본값: 입력 동영상의 현재 디렉터리에 저장
5861

5962
[SubtitleArea]
6063
LowerPart = 하단 부분
@@ -176,6 +179,7 @@ OpenVideoFirst = 비디오을 먼저 열어주세요
176179
SubtitleArea = 자막 영역
177180
VideoPreview = 비디오 미리보기
178181
ErrorDuringProcessing = 처리 중 오류: {}
182+
DeleteSelection = 현재 활성 선택 영역 삭제
179183

180184
[Main]
181185
RecSubLang = 자막 언어 인식

backend/interface/vi.ini

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,10 @@ VideoSubFinderCpuCores = Số lõi CPU
5555
VideoSubFinderCpuCoresDesc = Số lõi CPU dùng để tách phụ đề. Mặc định là 0 (tự động). Dùng tất cả lõi không phải lúc nào cũng nhanh hơn.
5656
VideoSubFinderDecoder = Bộ giải mã video
5757
VideoSubFinderDecoderDesc = Dùng cho các nhu cầu tương thích video khác nhau. Mặc định là OpenCV (khuyến nghị). Chuyển sang FFmpeg nếu tách phụ đề thất bại (có thể lệch nhẹ thời gian).
58+
ChooseDirectory = Chọn Thư Mục
59+
SaveDirectory = Thư mục lưu phụ đề
60+
SaveDirectoryDefault = Mặc định: lưu vào thư mục hiện tại của video đầu vào
61+
DeleteSelection = Xóa lựa chọn
5862

5963
[SubtitleArea]
6064
LowerPart = Phần dưới

backend/main.py

Lines changed: 30 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import re
1010
import random
1111
import shutil
12+
import traceback
1213
from collections import Counter, namedtuple
1314
import unicodedata
1415
from threading import Thread
@@ -30,46 +31,24 @@
3031
from backend.tools import subtitle_ocr
3132
from backend.tools.paddle_model_config import PaddleModelConfig
3233
from backend.tools.process_manager import ProcessManager
34+
from backend.tools.subtitle_detect import SubtitleDetect
35+
from backend.bean.subtitle_area import SubtitleArea
3336
import threading
3437
import platform
3538
import multiprocessing
3639
import time
3740
import pysrt
3841

39-
class SubtitleDetect:
40-
"""
41-
文本框检测类,用于检测视频帧中是否存在文本框
42-
"""
43-
44-
def __init__(self):
45-
from paddleocr.tools.infer import utility
46-
from paddleocr.tools.infer.predict_det import TextDetector
47-
hardware_accelerator = HardwareAccelerator.instance()
48-
onnx_providers = hardware_accelerator.onnx_providers
49-
model_config = PaddleModelConfig(hardware_accelerator)
50-
args = utility.parse_args()
51-
args.det_algorithm = 'DB'
52-
args.det_model_dir = model_config.convertToOnnxModelIfNeeded(model_config.DET_MODEL_PATH)
53-
args.use_gpu=hardware_accelerator.has_cuda()
54-
args.use_onnx=len(onnx_providers) > 0
55-
args.onnx_providers=onnx_providers
56-
self.text_detector = TextDetector(args)
57-
58-
def detect_subtitle(self, img):
59-
dt_boxes, elapse = self.text_detector(img)
60-
return dt_boxes, elapse
61-
62-
6342
class SubtitleExtractor:
6443
"""
6544
视频字幕提取类
6645
"""
6746

68-
def __init__(self, vd_path, sub_area=None):
47+
def __init__(self, vd_path):
6948
# 线程锁
7049
self.lock = threading.RLock()
7150
# 用户指定的字幕区域位置
72-
self.sub_area = sub_area
51+
self.sub_area = None
7352
self.hardware_accelerator = HardwareAccelerator.instance()
7453
# 是否使用硬件加速
7554
self.hardware_accelerator.set_enabled(config.hardwareAcceleration.value)
@@ -105,7 +84,7 @@ def __init__(self, vd_path, sub_area=None):
10584
# 自定义ocr对象
10685
self.ocr = None
10786
# 总处理进度
108-
self.progress_total = 0
87+
self.progress_total = 200
10988
# 视频帧提取进度
11089
self.progress_frame_extract = 0
11190
# OCR识别进度
@@ -231,12 +210,12 @@ def capture_frame_with_subtitle_area(self):
231210

232211
if ret:
233212
# 如果有字幕区域,绘制矩形
234-
if self.sub_area is not None:
235-
s_ymin, s_ymax, s_xmin, s_xmax = self.sub_area
213+
sub_area = self.sub_area
214+
if sub_area is not None:
236215
# 绘制绿色矩形框
237-
cv2.rectangle(frame, (s_xmin, s_ymin), (s_xmax, s_ymax), (0, 255, 0), 2)
216+
cv2.rectangle(frame, (sub_area.xmin, sub_area.ymin), (sub_area.xmax, sub_area.ymax), (0, 255, 0), 2)
238217
# 添加文字标注
239-
cv2.putText(frame, "Subtitle Area", (s_xmin, s_ymin - 10),
218+
cv2.putText(frame, "Subtitle Area", (sub_area.xmin, sub_area.ymin - 10),
240219
cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
241220

242221
# 保存图像
@@ -303,15 +282,15 @@ def extract_frame_by_det(self):
303282
tbar.update(1)
304283
dt_boxes, elapse = self.sub_detector.detect_subtitle(frame)
305284
has_subtitle = False
306-
if self.sub_area is not None:
307-
s_ymin, s_ymax, s_xmin, s_xmax = self.sub_area
285+
sub_area = self.sub_area
286+
if sub_area is not None:
308287
coordinate_list = get_coordinates(dt_boxes.tolist())
309288
if coordinate_list:
310289
for coordinate in coordinate_list:
311290
xmin, xmax, ymin, ymax = coordinate
312-
if (s_xmin <= xmin and xmax <= s_xmax
313-
and s_ymin <= ymin
314-
and ymax <= s_ymax):
291+
if (sub_area.xmin <= xmin and xmax <= sub_area.xmax
292+
and sub_area.ymin <= ymin
293+
and ymax <= sub_area.ymax):
315294
has_subtitle = True
316295
# 检测到字幕时,如果列表为空,则为字幕头
317296
if first_flag:
@@ -476,13 +455,13 @@ def vsf_output(out, ):
476455
path_vsf = os.path.join(BASE_DIR, 'subfinder', 'linux', 'VideoSubFinderCli.run')
477456
os.chmod(path_vsf, 0o775)
478457
# :图像上半部分所占百分比,取值【0-1】
479-
top_end = 1 - self.sub_area[0] / self.frame_height
458+
top_end = 1 - self.sub_area.ymin / self.frame_height
480459
# bottom_end:图像下半部分所占百分比,取值【0-1】
481-
bottom_end = 1 - self.sub_area[1] / self.frame_height
460+
bottom_end = 1 - self.sub_area.ymax / self.frame_height
482461
# left_end:图像左半部分所占百分比,取值【0-1】
483-
left_end = self.sub_area[2] / self.frame_width
462+
left_end = self.sub_area.xmin / self.frame_width
484463
# re:图像右半部分所占百分比,取值【0-1】
485-
right_end = self.sub_area[3] / self.frame_width
464+
right_end = self.sub_area.xmax / self.frame_width
486465
if (not self.hardware_accelerator.has_cuda()) and len(self.hardware_accelerator.onnx_providers) > 0:
487466
cpu_count = multiprocessing.cpu_count()
488467
else:
@@ -932,16 +911,13 @@ def __get_area_text(self, ocr_result):
932911
coordinates = get_coordinates(box)
933912
area_text = []
934913
for content, coordinate in zip(text, coordinates):
935-
if self.sub_area is not None:
936-
s_ymin = self.sub_area[0]
937-
s_ymax = self.sub_area[1]
938-
s_xmin = self.sub_area[2]
939-
s_xmax = self.sub_area[3]
914+
sub_area = self.sub_area
915+
if sub_area is not None:
940916
xmin = coordinate[0]
941917
xmax = coordinate[1]
942918
ymin = coordinate[2]
943919
ymax = coordinate[3]
944-
if s_xmin <= xmin and xmax <= s_xmax and s_ymin <= ymin and ymax <= s_ymax:
920+
if sub_area.xmin <= xmin and xmax <= sub_area.xmax and sub_area.ymin <= ymin and ymax <= sub_area.ymax:
945921
area_text.append(content[0])
946922
return area_text
947923

@@ -1019,10 +995,8 @@ def update_progress(self, ocr=None, frame_extract=None):
1019995
if ocr is not None:
1020996
self.progress_ocr = max(0, min(100, ocr)) # Clamp value between 0 and 100
1021997
if frame_extract is not None:
1022-
self.progress_frame_extract = max(0, min(100, frame_extract)) # Clamp value between 0 and 100
1023-
1024-
self.progress_total = (self.progress_frame_extract * 0.4) + (self.progress_ocr * 0.6)
1025-
# Notify listeners
998+
self.progress_frame_extract = max(0, min(100, frame_extract))
999+
# 通知所有监听器
10261000
self.notify_progress_listeners()
10271001

10281002
def start_subtitle_ocr_async(self):
@@ -1080,7 +1054,7 @@ def add_progress_listener(self, listener):
10801054
添加进度监听器
10811055
10821056
Args:
1083-
listener: 一个回调函数,接收参数 (progress_total, isFinished)
1057+
listener: 一个回调函数,接收参数 (progress_ocr, progress_frame_extract, progress_total, isFinished)
10841058
"""
10851059
if listener not in self.progress_listeners:
10861060
self.progress_listeners.append(listener)
@@ -1101,9 +1075,9 @@ def notify_progress_listeners(self):
11011075
"""
11021076
for listener in self.progress_listeners:
11031077
try:
1104-
listener(self.progress_total, self.isFinished)
1078+
listener(self.progress_ocr, self.progress_frame_extract, self.progress_total, self.isFinished)
11051079
except Exception as e:
1106-
print(f"通知进度监听器时出错: {str(e)}")
1080+
traceback.print_exc()
11071081

11081082
def manage_process(pid):
11091083
pass
@@ -1116,10 +1090,11 @@ def manage_process(pid):
11161090
try:
11171091
y_min, y_max, x_min, x_max = map(int, input(
11181092
f"{tr['Main']['ChooseSubArea']} (ymin ymax xmin xmax):").split())
1119-
subtitle_area = (y_min, y_max, x_min, x_max)
1093+
subtitle_area = SubtitleArea(y_min, y_max, x_min, x_max)
11201094
except ValueError as e:
11211095
subtitle_area = None
11221096
# 新建字幕提取对象
1123-
se = SubtitleExtractor(video_path, subtitle_area)
1097+
se = SubtitleExtractor(video_path)
1098+
se.sub_area = subtitle_area
11241099
# 开始提取字幕
11251100
se.run()

0 commit comments

Comments
 (0)