diff --git a/docs/api/migration_guide.rst b/docs/api/migration_guide.rst index a3ae9fa7..f8a6d1a9 100644 --- a/docs/api/migration_guide.rst +++ b/docs/api/migration_guide.rst @@ -96,6 +96,32 @@ The following have been removed from the ``SceneDetector`` interface: - ``stats_manager_required`` property - no longer needed - ``SparseSceneDetector`` interface - removed entirely +Temporal Defaults +----------------------------------------------------------------------- + +All built-in detector constructors now default ``min_scene_len`` to ``"0.6s"`` (temporal) instead of ``15`` (frames). This makes detection behavior consistent across different framerates and is required for correct VFR support. Existing code passing an explicit ``int`` still works: + +.. code:: python + + # v0.6 - default was 15 frames + detector = ContentDetector() + + # v0.7 - default is "0.6s" (~15 frames at 25fps, ~14 at 24fps, ~18 at 30fps) + detector = ContentDetector() + + # To preserve exact v0.6 behavior: + detector = ContentDetector(min_scene_len=15) + +The ``save_images()`` function parameter ``frame_margin`` has been renamed to ``margin`` and now defaults to ``"0.1s"`` instead of ``1`` (frame). The old keyword ``frame_margin=`` still works with a deprecation warning: + +.. code:: python + + # v0.6 + save_images(scene_list, video, frame_margin=1) + + # v0.7 + save_images(scene_list, video, margin="0.1s") + ======================================================================= ``FrameTimecode`` Changes @@ -204,3 +230,5 @@ CLI Changes - The ``-d``/``--min-delta-hsv`` option on ``detect-adaptive`` has been removed. Use ``-c``/``--min-content-val`` instead. - VFR videos now work correctly with both the OpenCV and PyAV backends. - New ``save-xml`` command for exporting scenes in Final Cut Pro XML format. +- ``save-images``: ``--frame-margin`` renamed to ``--margin``, now accepts temporal values (e.g. ``0.1s``). Default changed from 1 frame to ``0.1s``. Old name still works with a deprecation warning. +- Config file: ``[save-images]`` option ``frame-margin`` renamed to ``margin``. Old name still accepted with a deprecation warning. diff --git a/docs/api/output.rst b/docs/api/output.rst index 480e4371..f6e2f13a 100644 --- a/docs/api/output.rst +++ b/docs/api/output.rst @@ -6,6 +6,8 @@ Ouptut ------------------------------------------------- +.. autodata:: scenedetect.output.DEFAULT_MARGIN + .. autofunction:: scenedetect.output.save_images .. autofunction:: scenedetect.output.is_ffmpeg_available diff --git a/docs/cli.rst b/docs/cli.rst index 6df3d9f7..43f23cbc 100644 --- a/docs/cli.rst +++ b/docs/cli.rst @@ -658,11 +658,11 @@ Options Default: ``3`` -.. option:: -m N, --frame-margin N +.. option:: -m DURATION, --margin DURATION - Number of frames to ignore at beginning/end of scenes when saving images. Controls temporal padding on scene boundaries. + Margin from scene boundary for first/last image. Accepts duration (``0.1s``), frame count (``3``), or ``HH:MM:SS.mmm`` format. - Default: ``3`` + Default: ``0.1s`` .. option:: -s S, --scale S diff --git a/scenedetect.cfg b/scenedetect.cfg index fd6241cd..57ecca48 100644 --- a/scenedetect.cfg +++ b/scenedetect.cfg @@ -227,8 +227,9 @@ # Compression amount for png images (0 to 9). Only affects size, not quality. #compression = 3 -# Number of frames to ignore around each scene cut when selecting frames. -#frame-margin = 1 +# Margin from scene boundary for first/last image. Accepts time (0.1s), +# frames (3), or timecode (00:00:00.100). +#margin = 0.1s # Resize by scale factor (0.5 = half, 1.0 = same, 2.0 = double). #scale = 1.0 diff --git a/scenedetect/__init__.py b/scenedetect/__init__.py index 59fd44d6..f6c40099 100644 --- a/scenedetect/__init__.py +++ b/scenedetect/__init__.py @@ -43,6 +43,7 @@ from scenedetect.video_stream import VideoStream, VideoOpenFailure from scenedetect.output import ( save_images, + DEFAULT_MARGIN, split_video_ffmpeg, split_video_mkvmerge, is_ffmpeg_available, @@ -53,7 +54,7 @@ VideoMetadata, SceneMetadata, ) -from scenedetect.detector import SceneDetector +from scenedetect.detector import DEFAULT_MIN_SCENE_LEN, SceneDetector from scenedetect.detectors import ( ContentDetector, AdaptiveDetector, diff --git a/scenedetect/_cli/__init__.py b/scenedetect/_cli/__init__.py index a0c639d2..3534f73b 100644 --- a/scenedetect/_cli/__init__.py +++ b/scenedetect/_cli/__init__.py @@ -1396,12 +1396,18 @@ def split_video_command( ) @click.option( "-m", + "--margin", + metavar="DURATION", + default=None, + type=click.STRING, + help="Margin from scene boundary for first/last image. Accepts duration (0.1s), frame count (3), or HH:MM:SS.mmm format.%s" + % (USER_CONFIG.get_help_string("save-images", "margin")), +) +@click.option( "--frame-margin", - metavar="N", default=None, - type=click.INT, - help="Number of frames to ignore at beginning/end of scenes when saving images. Controls temporal padding on scene boundaries.%s" - % (USER_CONFIG.get_help_string("save-images", "num-images")), + type=click.STRING, + hidden=True, ) @click.option( "--scale", @@ -1441,7 +1447,8 @@ def save_images_command( quality: ty.Optional[int] = None, png: bool = False, compression: ty.Optional[int] = None, - frame_margin: ty.Optional[int] = None, + margin: ty.Optional[str] = None, + frame_margin: ty.Optional[str] = None, scale: ty.Optional[float] = None, height: ty.Optional[int] = None, width: ty.Optional[int] = None, @@ -1487,9 +1494,13 @@ def save_images_command( raise click.BadParameter("\n".join(error_strs), param_hint="save-images") output = ctx.config.get_value("save-images", "output", output) + if frame_margin is not None and margin is None: + logger.warning("--frame-margin is deprecated, use --margin instead.") + margin = frame_margin + save_images_args = { "encoder_param": compression if png else quality, - "frame_margin": ctx.config.get_value("save-images", "frame-margin", frame_margin), + "margin": ctx.config.get_value("save-images", "margin", margin), "height": height, "image_extension": image_extension, "filename": ctx.config.get_value("save-images", "filename", filename), diff --git a/scenedetect/_cli/commands.py b/scenedetect/_cli/commands.py index 0003f30e..e10fd80a 100644 --- a/scenedetect/_cli/commands.py +++ b/scenedetect/_cli/commands.py @@ -180,7 +180,7 @@ def save_images( scenes: SceneList, cuts: CutList, num_images: int, - frame_margin: int, + margin: ty.Union[int, float, str], image_extension: str, encoder_param: int, filename: str, @@ -199,7 +199,7 @@ def save_images( scene_list=scenes, video=context.video_stream, num_images=num_images, - frame_margin=frame_margin, + margin=margin, image_extension=image_extension, encoder_param=encoder_param, image_name_template=filename, diff --git a/scenedetect/_cli/config.py b/scenedetect/_cli/config.py index ee851da8..6b1710c0 100644 --- a/scenedetect/_cli/config.py +++ b/scenedetect/_cli/config.py @@ -412,7 +412,7 @@ class XmlFormat(Enum): "compression": RangeValue(3, min_val=0, max_val=9), "filename": "$VIDEO_NAME-Scene-$SCENE_NUMBER-$IMAGE_NUMBER", "format": "jpeg", - "frame-margin": 1, + "margin": TimecodeValue("0.1s"), "height": 0, "num-images": 3, "output": None, @@ -504,6 +504,12 @@ class XmlFormat(Enum): DEPRECATED_COMMANDS: ty.Dict[str, str] = {"export-html": "save-html"} """Deprecated config file sections that have a 1:1 mapping to a new replacement.""" +DEPRECATED_OPTIONS: ty.Dict[ty.Tuple[str, str], str] = { + ("save-images", "frame-margin"): "margin", +} +"""Deprecated config file options that have a 1:1 mapping to a new replacement. +Keys are (section, old_option) tuples, values are the new option name.""" + def _validate_structure(parser: ConfigParser) -> ty.Tuple[bool, ty.List[LogMessage]]: """Validates the layout of the section/option mapping. Returns a bool indicating if validation @@ -538,7 +544,16 @@ def _validate_structure(parser: ConfigParser) -> ty.Tuple[bool, ty.List[LogMessa logs.append((logging.ERROR, f"Unsupported config section: [{section_name}]")) continue for option_name, _ in parser.items(section_name): - if option_name not in CONFIG_MAP[section].keys(): + if (section, option_name) in DEPRECATED_OPTIONS: + new_option = DEPRECATED_OPTIONS[(section, option_name)] + logs.append( + ( + logging.WARNING, + f"[{section_name}] option `{option_name}` is deprecated," + f" use `{new_option}` instead.", + ) + ) + elif option_name not in CONFIG_MAP[section].keys(): success = False logs.append( ( @@ -564,6 +579,13 @@ def _parse_config(parser: ConfigParser) -> ty.Tuple[ty.Optional[ConfigDict], ty. replacement = DEPRECATED_COMMANDS[deprecated_command] parser[replacement] = parser[deprecated_command] del parser[deprecated_command] + # Re-map deprecated options to their replacements. Only remap when the new option is not + # already explicitly set (the explicit value should take precedence). + for (section, old_option), new_option in DEPRECATED_OPTIONS.items(): + if section in parser and old_option in parser[section]: + if new_option not in parser[section]: + parser[section][new_option] = parser[section][old_option] + parser.remove_option(section, old_option) for command in CONFIG_MAP: config[command] = {} for option in CONFIG_MAP[command]: diff --git a/scenedetect/_cli/context.py b/scenedetect/_cli/context.py index b23287e3..d141dd03 100644 --- a/scenedetect/_cli/context.py +++ b/scenedetect/_cli/context.py @@ -314,7 +314,7 @@ def get_detect_content_params( self, threshold: ty.Optional[float] = None, luma_only: bool = None, - min_scene_len: ty.Optional[str] = None, + min_scene_len: ty.Optional[ty.Union[int, float, str]] = None, weights: ty.Optional[ty.Tuple[float, float, float, float]] = None, kernel_size: ty.Optional[int] = None, filter_mode: ty.Optional[str] = None, @@ -325,10 +325,9 @@ def get_detect_content_params( else: if min_scene_len is None: if self.config.is_default("detect-content", "min-scene-len"): - min_scene_len = self.min_scene_len.frame_num + min_scene_len = self.min_scene_len.seconds else: min_scene_len = self.config.get_value("detect-content", "min-scene-len") - min_scene_len = self.parse_timecode(min_scene_len).frame_num if weights is not None: try: @@ -354,7 +353,7 @@ def get_detect_adaptive_params( min_content_val: ty.Optional[float] = None, frame_window: ty.Optional[int] = None, luma_only: bool = None, - min_scene_len: ty.Optional[str] = None, + min_scene_len: ty.Optional[ty.Union[int, float, str]] = None, weights: ty.Optional[ty.Tuple[float, float, float, float]] = None, kernel_size: ty.Optional[int] = None, ) -> ty.Dict[str, ty.Any]: @@ -365,10 +364,9 @@ def get_detect_adaptive_params( else: if min_scene_len is None: if self.config.is_default("detect-adaptive", "min-scene-len"): - min_scene_len = self.min_scene_len.frame_num + min_scene_len = self.min_scene_len.seconds else: min_scene_len = self.config.get_value("detect-adaptive", "min-scene-len") - min_scene_len = self.parse_timecode(min_scene_len).frame_num if weights is not None: try: @@ -395,7 +393,7 @@ def get_detect_threshold_params( threshold: ty.Optional[float] = None, fade_bias: ty.Optional[float] = None, add_last_scene: bool = None, - min_scene_len: ty.Optional[str] = None, + min_scene_len: ty.Optional[ty.Union[int, float, str]] = None, ) -> ty.Dict[str, ty.Any]: """Handle detect-threshold command options and return args to construct one with.""" @@ -404,10 +402,9 @@ def get_detect_threshold_params( else: if min_scene_len is None: if self.config.is_default("detect-threshold", "min-scene-len"): - min_scene_len = self.min_scene_len.frame_num + min_scene_len = self.min_scene_len.seconds else: min_scene_len = self.config.get_value("detect-threshold", "min-scene-len") - min_scene_len = self.parse_timecode(min_scene_len).frame_num # TODO(v1.0): add_last_scene cannot be disabled right now. return { "add_final_scene": add_last_scene @@ -421,7 +418,7 @@ def get_detect_hist_params( self, threshold: ty.Optional[float] = None, bins: ty.Optional[int] = None, - min_scene_len: ty.Optional[str] = None, + min_scene_len: ty.Optional[ty.Union[int, float, str]] = None, ) -> ty.Dict[str, ty.Any]: """Handle detect-hist command options and return args to construct one with.""" @@ -430,10 +427,9 @@ def get_detect_hist_params( else: if min_scene_len is None: if self.config.is_default("detect-hist", "min-scene-len"): - min_scene_len = self.min_scene_len.frame_num + min_scene_len = self.min_scene_len.seconds else: min_scene_len = self.config.get_value("detect-hist", "min-scene-len") - min_scene_len = self.parse_timecode(min_scene_len).frame_num return { "bins": self.config.get_value("detect-hist", "bins", bins), "min_scene_len": min_scene_len, @@ -445,7 +441,7 @@ def get_detect_hash_params( threshold: ty.Optional[float] = None, size: ty.Optional[int] = None, lowpass: ty.Optional[int] = None, - min_scene_len: ty.Optional[str] = None, + min_scene_len: ty.Optional[ty.Union[int, float, str]] = None, ) -> ty.Dict[str, ty.Any]: """Handle detect-hash command options and return args to construct one with.""" @@ -454,10 +450,9 @@ def get_detect_hash_params( else: if min_scene_len is None: if self.config.is_default("detect-hash", "min-scene-len"): - min_scene_len = self.min_scene_len.frame_num + min_scene_len = self.min_scene_len.seconds else: min_scene_len = self.config.get_value("detect-hash", "min-scene-len") - min_scene_len = self.parse_timecode(min_scene_len).frame_num return { "lowpass": self.config.get_value("detect-hash", "lowpass", lowpass), "min_scene_len": min_scene_len, diff --git a/scenedetect/detector.py b/scenedetect/detector.py index 7c3e1b70..47fffc1b 100644 --- a/scenedetect/detector.py +++ b/scenedetect/detector.py @@ -24,6 +24,7 @@ event (in, out, cut, etc...). """ +import math import typing as ty from abc import ABC, abstractmethod from enum import Enum @@ -33,6 +34,18 @@ from scenedetect.common import FrameTimecode from scenedetect.stats_manager import StatsManager +DEFAULT_MIN_SCENE_LEN = "0.6s" +"""Default minimum scene length for all detectors. + +Once a cut is detected, this much time must pass before a new one can be added to the scene +list. This value is used as the default for the ``min_scene_len`` parameter in all detector +constructors. At 24fps this is approximately 14 frames, at 30fps approximately 18 frames. + +Can be overridden per-detector or globally via the ``[global] min-scene-len`` config option. +Accepts the same formats as ``min_scene_len``: int (frames), float (seconds), or str +(e.g. ``"0.6s"``, ``"00:00:00.600"``). +""" + class SceneDetector(ABC): """Base class to inherit from when implementing a scene detection algorithm. @@ -114,15 +127,26 @@ class Mode(Enum): SUPPRESS = 1 """Suppress consecutive cuts until the filter length has passed.""" - def __init__(self, mode: Mode, length: int): + def __init__(self, mode: Mode, length: ty.Union[int, float, str]): """ Arguments: mode: The mode to use when enforcing `length`. - length: Number of frames to use when filtering cuts. + length: Minimum scene length. Can be an int (number of frames), float (seconds), + or str (e.g. ``"0.6s"``, ``"00:00:00.600"``). """ self._mode = mode - self._filter_length = length # Number of frames to use for activating the filter. - self._filter_secs: ty.Optional[float] = None # Threshold in seconds, computed on first use. + self._filter_length = length + # Threshold in seconds. Set immediately for temporal values, or computed on first use + # from the video framerate for frame-based (int) values. + self._filter_secs: ty.Optional[float] = None + if isinstance(length, float): + self._filter_secs = length + elif isinstance(length, str) and not length.strip().isdigit(): + # Temporal string like "0.6s" or "00:00:00.600" - parse to seconds immediately. + self._filter_secs = FrameTimecode(timecode=length, fps=100.0).seconds + elif isinstance(length, str): + # Digit-only string - treat as frame count, defer until we know the framerate. + self._filter_length = int(length) self._last_above = None # Last frame above threshold. self._merge_enabled = False # Used to disable merging until at least one cut was found. self._merge_triggered = False # True when the merge filter is active. @@ -130,10 +154,23 @@ def __init__(self, mode: Mode, length: int): @property def max_behind(self) -> int: - return 0 if self._mode == FlashFilter.Mode.SUPPRESS else self._filter_length + if self._mode == FlashFilter.Mode.SUPPRESS: + return 0 + if isinstance(self._filter_length, int): + return self._filter_length + # For temporal values, estimate using a conservative high framerate to ensure the event + # buffer is large enough. ceil(seconds * 240fps) covers up to 240fps video. + return math.ceil(self._filter_secs * 240.0) if self._filter_secs else 0 + + @property + def _is_disabled(self) -> bool: + """Filter is disabled when length is zero.""" + if self._filter_secs is not None: + return self._filter_secs <= 0.0 + return self._filter_length <= 0 def filter(self, timecode: FrameTimecode, above_threshold: bool) -> ty.List[FrameTimecode]: - if not self._filter_length > 0: + if self._is_disabled: return [timecode] if above_threshold else [] if self._last_above is None: self._last_above = timecode diff --git a/scenedetect/detectors/adaptive_detector.py b/scenedetect/detectors/adaptive_detector.py index 7a0a23af..770d2731 100644 --- a/scenedetect/detectors/adaptive_detector.py +++ b/scenedetect/detectors/adaptive_detector.py @@ -22,6 +22,7 @@ import numpy as np from scenedetect.common import FrameTimecode +from scenedetect.detector import DEFAULT_MIN_SCENE_LEN from scenedetect.detectors import ContentDetector logger = getLogger("pyscenedetect") @@ -38,7 +39,7 @@ class AdaptiveDetector(ContentDetector): def __init__( self, adaptive_threshold: float = 3.0, - min_scene_len: int = 15, + min_scene_len: ty.Union[int, float, str] = DEFAULT_MIN_SCENE_LEN, window_width: int = 2, min_content_val: float = 15.0, weights: ContentDetector.Components = ContentDetector.DEFAULT_COMPONENT_WEIGHTS, @@ -49,8 +50,9 @@ def __init__( Arguments: adaptive_threshold: Threshold (float) that score ratio must exceed to trigger a new scene (see frame metric adaptive_ratio in stats file). - min_scene_len: Once a cut is detected, this many frames must pass before a new one can - be added to the scene list. Can be an int or FrameTimecode type. + min_scene_len: Once a cut is detected, this much time must pass before a new one can + be added to the scene list. Can be an int (frames), float (seconds), or str + (e.g. ``"0.6s"``). window_width: Size of window (number of frames) before and after each frame to average together in order to detect deviations from the mean. Must be at least 1. min_content_val: Minimum threshold (float) that the content_val must exceed in order to diff --git a/scenedetect/detectors/content_detector.py b/scenedetect/detectors/content_detector.py index 6cf757fa..3a4cf772 100644 --- a/scenedetect/detectors/content_detector.py +++ b/scenedetect/detectors/content_detector.py @@ -23,7 +23,7 @@ import numpy from scenedetect.common import FrameTimecode -from scenedetect.detector import FlashFilter, SceneDetector +from scenedetect.detector import DEFAULT_MIN_SCENE_LEN, FlashFilter, SceneDetector def _mean_pixel_distance(left: numpy.ndarray, right: numpy.ndarray) -> float: @@ -104,7 +104,7 @@ class _FrameData: def __init__( self, threshold: float = 27.0, - min_scene_len: int = 15, + min_scene_len: ty.Union[int, float, str] = DEFAULT_MIN_SCENE_LEN, weights: "ContentDetector.Components" = DEFAULT_COMPONENT_WEIGHTS, luma_only: bool = False, kernel_size: ty.Optional[int] = None, @@ -113,8 +113,9 @@ def __init__( """ Arguments: threshold: Threshold the average change in pixel intensity must exceed to trigger a cut. - min_scene_len: Once a cut is detected, this many frames must pass before a new one can - be added to the scene list. Can be an int or FrameTimecode type. + min_scene_len: Once a cut is detected, this much time must pass before a new one can + be added to the scene list. Can be an int (frames), float (seconds), or str + (e.g. ``"0.6s"``). weights: Weight to place on each component when calculating frame score (`content_val` in a statsfile, the value `threshold` is compared against). luma_only: If True, only considers changes in the luminance channel of the video. @@ -137,8 +138,6 @@ def __init__( raise ValueError("kernel_size must be odd integer >= 3") self._kernel = numpy.ones((kernel_size, kernel_size), numpy.uint8) self._frame_score: ty.Optional[float] = None - # TODO(https://scenedetect.com/issue/168): Figure out a better long term plan for handling - # `min_scene_len` which should be specified in seconds, not frames. self._flash_filter = FlashFilter(mode=filter_mode, length=min_scene_len) def get_metrics(self): diff --git a/scenedetect/detectors/hash_detector.py b/scenedetect/detectors/hash_detector.py index 484f49d5..bd4c36cd 100644 --- a/scenedetect/detectors/hash_detector.py +++ b/scenedetect/detectors/hash_detector.py @@ -22,7 +22,7 @@ import numpy from scenedetect.common import FrameTimecode -from scenedetect.detector import SceneDetector +from scenedetect.detector import DEFAULT_MIN_SCENE_LEN, SceneDetector class HashDetector(SceneDetector): @@ -41,8 +41,9 @@ class HashDetector(SceneDetector): size: Size of square of low frequency data to use for the DCT lowpass: How much high frequency information to filter from the DCT. A value of 2 means keep lower 1/2 of the frequency data, 4 means only keep 1/4, etc... - min_scene_len: Once a cut is detected, this many frames must pass before a new one can - be added to the scene list. Can be an int or FrameTimecode type. + min_scene_len: Once a cut is detected, this much time must pass before a new one can + be added to the scene list. Can be an int (frames), float (seconds), or str + (e.g. ``"0.6s"``). """ def __init__( @@ -50,7 +51,7 @@ def __init__( threshold: float = 0.395, size: int = 16, lowpass: int = 2, - min_scene_len: int = 15, + min_scene_len: ty.Union[int, float, str] = DEFAULT_MIN_SCENE_LEN, ): super(HashDetector, self).__init__() self._threshold = threshold diff --git a/scenedetect/detectors/histogram_detector.py b/scenedetect/detectors/histogram_detector.py index 812c5852..aa811c4a 100644 --- a/scenedetect/detectors/histogram_detector.py +++ b/scenedetect/detectors/histogram_detector.py @@ -21,7 +21,7 @@ import numpy from scenedetect.common import FrameTimecode -from scenedetect.detector import SceneDetector +from scenedetect.detector import DEFAULT_MIN_SCENE_LEN, SceneDetector class HistogramDetector(SceneDetector): @@ -30,7 +30,12 @@ class HistogramDetector(SceneDetector): METRIC_KEYS = ["hist_diff"] - def __init__(self, threshold: float = 0.05, bins: int = 256, min_scene_len: int = 15): + def __init__( + self, + threshold: float = 0.05, + bins: int = 256, + min_scene_len: ty.Union[int, float, str] = DEFAULT_MIN_SCENE_LEN, + ): """ Arguments: threshold: maximum relative difference between 0.0 and 1.0 that the histograms can @@ -38,8 +43,9 @@ def __init__(self, threshold: float = 0.05, bins: int = 256, min_scene_len: int YUV, and normalized based on the number of bins. Higher dicfferences imply greater change in content, so larger threshold values are less sensitive to cuts. bins: Number of bins to use for the histogram. - min_scene_len: Once a cut is detected, this many frames must pass before a new one can - be added to the scene list. Can be an int or FrameTimecode type. + min_scene_len: Once a cut is detected, this much time must pass before a new one can + be added to the scene list. Can be an int (frames), float (seconds), or str + (e.g. ``"0.6s"``). """ super().__init__() # Internally, threshold represents the correlation between two histograms and has values diff --git a/scenedetect/detectors/threshold_detector.py b/scenedetect/detectors/threshold_detector.py index 8d28cd62..be95bc6b 100644 --- a/scenedetect/detectors/threshold_detector.py +++ b/scenedetect/detectors/threshold_detector.py @@ -23,7 +23,7 @@ import numpy from scenedetect.common import FrameTimecode -from scenedetect.detector import SceneDetector +from scenedetect.detector import DEFAULT_MIN_SCENE_LEN, SceneDetector logger = getLogger("pyscenedetect") @@ -48,7 +48,7 @@ class Method(Enum): def __init__( self, threshold: float = 12, - min_scene_len: int = 15, + min_scene_len: ty.Union[int, float, str] = DEFAULT_MIN_SCENE_LEN, fade_bias: float = 0.0, add_final_scene: bool = False, method: Method = Method.FLOOR, @@ -58,8 +58,9 @@ def __init__( Arguments: threshold: 8-bit intensity value that each pixel value (R, G, and B) must be <= to in order to trigger a fade in/out. - min_scene_len: Once a cut is detected, this many frames must pass before a new one can - be added to the scene list. Can be an int or FrameTimecode type. + min_scene_len: Once a cut is detected, this much time must pass before a new one can + be added to the scene list. Can be an int (frames), float (seconds), or str + (e.g. ``"0.6s"``). fade_bias: Float between -1.0 and +1.0 representing the percentage of timecode skew for the start of a scene (-1.0 causing a cut at the fade-to-black, 0.0 in the middle, and +1.0 causing the cut to be @@ -141,7 +142,7 @@ def process_frame( (self.method == ThresholdDetector.Method.FLOOR and frame_avg >= self.threshold) or (self.method == ThresholdDetector.Method.CEILING and frame_avg < self.threshold) ): - # Only add the scene if min_scene_len frames have passed. + # Only add the scene if min_scene_len has passed. if (timecode - self.last_scene_cut) >= self.min_scene_len: # Just faded into a new scene, compute timecode for the scene # split based on the fade bias. diff --git a/scenedetect/detectors/transnet_v2.py b/scenedetect/detectors/transnet_v2.py index 752749cd..51e98e6f 100644 --- a/scenedetect/detectors/transnet_v2.py +++ b/scenedetect/detectors/transnet_v2.py @@ -24,7 +24,7 @@ import numpy as np from scenedetect.common import FrameTimecode, Timecode -from scenedetect.detector import FlashFilter, SceneDetector +from scenedetect.detector import DEFAULT_MIN_SCENE_LEN, FlashFilter, SceneDetector logger = getLogger("pyscenedetect") @@ -135,7 +135,7 @@ def __init__( model_path: ty.Union[str, Path] = "tests/resources/transnetv2.onnx", onnx_providers: ty.Union[ty.List[str], None] = None, threshold: float = 0.5, - min_scene_len: int = 15, + min_scene_len: ty.Union[int, float, str] = DEFAULT_MIN_SCENE_LEN, filter_mode: FlashFilter.Mode = FlashFilter.Mode.MERGE, ): super().__init__() @@ -154,8 +154,6 @@ def __init__( onnx_providers=onnx_providers, threshold=threshold, ) - # TODO(https://scenedetect.com/issue/168): Figure out a better long term plan for handling - # `min_scene_len` which should be specified in seconds, not frames. self._flash_filter = FlashFilter(mode=filter_mode, length=min_scene_len) def mk_ft(self, pts: int): diff --git a/scenedetect/output/__init__.py b/scenedetect/output/__init__.py index 3acd48f8..661bac4d 100644 --- a/scenedetect/output/__init__.py +++ b/scenedetect/output/__init__.py @@ -32,7 +32,7 @@ ) # Commonly used classes/functions exported under the `scenedetect.output` namespace for brevity. -from scenedetect.output.image import save_images +from scenedetect.output.image import DEFAULT_MARGIN, save_images from scenedetect.output.video import ( PathFormatter, SceneMetadata, diff --git a/scenedetect/output/image.py b/scenedetect/output/image.py index d5cf00de..48f67400 100644 --- a/scenedetect/output/image.py +++ b/scenedetect/output/image.py @@ -33,6 +33,35 @@ logger = logging.getLogger("pyscenedetect") +DEFAULT_MARGIN = "0.1s" +"""Default margin from scene boundary for the first/last image when using :func:`save_images`. + +The margin moves the first image forward from the scene start, and the last image backward from +the scene end, to avoid extracting frames at the exact cut point. At 24fps this is approximately +2-3 frames. + +Accepts the same formats as the ``margin`` parameter: int (frames), float (seconds), or str +(e.g. ``"0.1s"``, ``"00:00:00.100"``). +""" + + +def _margin_to_seconds(margin: ty.Union[int, float, str], framerate: float) -> float: + """Convert a margin value to seconds. Accepts int (frames), float (seconds), or str.""" + if isinstance(margin, int): + return margin / framerate + if isinstance(margin, float): + return margin + return FrameTimecode(timecode=margin, fps=framerate).seconds + + +def _margin_to_frames(margin: ty.Union[int, float, str], framerate: float) -> int: + """Convert a margin value to a frame count. Accepts int (frames), float (seconds), or str.""" + if isinstance(margin, int): + return margin + if isinstance(margin, float): + return round(margin * framerate) + return FrameTimecode(timecode=margin, fps=framerate).frame_num + def _scale_image( image: np.ndarray, @@ -69,7 +98,7 @@ class _ImageExtractor: def __init__( self, num_images: int = 3, - frame_margin: int = 1, + margin: ty.Union[int, float, str] = DEFAULT_MARGIN, image_extension: str = "jpg", imwrite_param: ty.Dict[str, ty.Union[int, None]] = None, image_name_template: str = "$VIDEO_NAME-Scene-$SCENE_NUMBER-$IMAGE_NUMBER", @@ -85,10 +114,9 @@ def __init__( Arguments: num_images: Number of images to generate for each scene. Minimum is 1. - frame_margin: Number of frames to pad each scene around the beginning - and end (e.g. moves the first/last image into the scene by N frames). - Can set to 0, but will result in some video files failing to extract - the very last frame. + margin: Margin from scene boundary for first/last image. Moves the + first/last image into the scene by the specified amount. Accepts + int (frames), float (seconds), or str (e.g. ``"0.1s"``). image_extension: Type of image to save (must be one of 'jpg', 'png', or 'webp'). encoder_param: Quality/compression efficiency, based on type of image: 'jpg' / 'webp': Quality 0-100, higher is better quality. 100 is lossless for webp. @@ -111,7 +139,7 @@ def __init__( interpolation: Type of interpolation to use when resizing images. """ self._num_images = num_images - self._frame_margin = frame_margin + self._margin = margin self._image_extension = image_extension self._image_name_template = image_name_template self._scale = scale @@ -297,8 +325,7 @@ def generate_timecode_list(self, scene_list: SceneList) -> ty.List[ty.List[Frame Uses PTS-accurate seconds-based timing so results are correct for both CFR and VFR video. """ framerate = scene_list[0][0].framerate - # Convert frame_margin to seconds using the nominal framerate. - margin_secs = self._frame_margin / framerate + margin_secs = _margin_to_seconds(self._margin, framerate) result = [] for start, end in scene_list: duration_secs = (end - start).seconds @@ -336,7 +363,7 @@ def save_images( scene_list: SceneList, video: VideoStream, num_images: int = 3, - frame_margin: int = 1, + margin: ty.Union[int, float, str] = DEFAULT_MARGIN, image_extension: str = "jpg", encoder_param: int = 95, image_name_template: str = "$VIDEO_NAME-Scene-$SCENE_NUMBER-$IMAGE_NUMBER", @@ -347,6 +374,8 @@ def save_images( width: ty.Optional[int] = None, interpolation: Interpolation = Interpolation.CUBIC, threading: bool = True, + *, + frame_margin: ty.Optional[int] = None, ) -> ty.Dict[int, ty.List[str]]: """Save a set number of images from each scene, given a list of scenes and the associated video/frame source. @@ -357,10 +386,9 @@ def save_images( video: A VideoStream object corresponding to the scene list. Note that the video will be closed/re-opened and seeked through. num_images: Number of images to generate for each scene. Minimum is 1. - frame_margin: Number of frames to pad each scene around the beginning - and end (e.g. moves the first/last image into the scene by N frames). - Can set to 0, but will result in some video files failing to extract - the very last frame. + margin: Margin from scene boundary for first/last image. Moves the first/last image + into the scene by the specified amount. Accepts int (frames), float (seconds), + or str (e.g. ``"0.1s"``). image_extension: Type of image to save (must be one of 'jpg', 'png', or 'webp'). encoder_param: Quality/compression efficiency, based on type of image: 'jpg' / 'webp': Quality 0-100, higher is better quality. 100 is lossless for webp. @@ -385,6 +413,7 @@ def save_images( while preserving the aspect ratio. interpolation: Type of interpolation to use when resizing images. threading: Offload image encoding and disk IO to background threads to improve performance. + frame_margin: [DEPRECATED] Use ``margin`` instead. Returns: Dictionary of the format { scene_num : [image_paths] }, where scene_num is the @@ -395,11 +424,20 @@ def save_images( ValueError: Raised if any arguments are invalid or out of range (e.g. if num_images is negative). """ + if frame_margin is not None: + import warnings + + warnings.warn( + "frame_margin is deprecated, use margin instead.", DeprecationWarning, stacklevel=2 + ) + margin = frame_margin if not scene_list: return {} - if num_images <= 0 or frame_margin < 0: - raise ValueError() + if num_images <= 0: + raise ValueError("num_images must be >= 1") + if isinstance(margin, (int, float)) and margin < 0: + raise ValueError("margin must be non-negative") # TODO: Validate that encoder_param is within the proper range. # Should be between 0 and 100 (inclusive) for jpg/webp, and 1-9 for png. @@ -413,7 +451,7 @@ def save_images( if threading: extractor = _ImageExtractor( num_images, - frame_margin, + margin, image_extension, imwrite_param, image_name_template, @@ -441,6 +479,7 @@ def save_images( image_num_format += str(math.floor(math.log(num_images, 10)) + 2) + "d" framerate = scene_list[0][0]._rate + margin_frames = _margin_to_frames(margin, float(framerate)) # TODO(v1.0): Split up into multiple sub-expressions so auto-formatter works correctly. timecode_list = [ @@ -451,10 +490,10 @@ def save_images( a[len(a) // 2] if (0 < j < num_images - 1) or num_images == 1 # first frame - else min(a[0] + frame_margin, a[-1]) + else min(a[0] + margin_frames, a[-1]) if j == 0 # last frame - else max(a[-1] - frame_margin, a[0]) + else max(a[-1] - margin_frames, a[0]) # for each evenly-split array of frames in the scene list for j, a in enumerate(np.array_split(r, num_images)) ) diff --git a/tests/test_detectors.py b/tests/test_detectors.py index 0e5f4214..74037a55 100644 --- a/tests/test_detectors.py +++ b/tests/test_detectors.py @@ -224,3 +224,74 @@ def test_detectors_with_stats(test_video_file): scene_manager.detect_scenes(video=video, end_time=end_time) scene_list = scene_manager.get_scene_list() assert len(scene_list) == initial_scene_len + + +def test_temporal_min_scene_len(): + """Test that detectors accept temporal min_scene_len values.""" + # Default is "0.6s" - ensure construction works with various formats + for detector_class in FAST_CUT_DETECTORS: + # String temporal + d = detector_class(min_scene_len="0.6s") + assert d is not None + # Float seconds + d = detector_class(min_scene_len=0.6) + assert d is not None + # Int frames (backward compat) + d = detector_class(min_scene_len=15) + assert d is not None + # Zero (disable) + d = detector_class(min_scene_len=0) + assert d is not None + # ThresholdDetector also + d = ThresholdDetector(min_scene_len="0.6s") + assert d is not None + d = ThresholdDetector(min_scene_len=0.6) + assert d is not None + + +def test_flash_filter_disabled_when_zero(): + """FlashFilter with zero length should be disabled and pass through cuts.""" + from scenedetect.detector import FlashFilter + + fps = 24.0 + for length_zero in [0, 0.0, "0"]: + ff = FlashFilter(mode=FlashFilter.Mode.SUPPRESS, length=length_zero) + assert ff._is_disabled, f"Expected disabled for length={length_zero!r}" + tc = FrameTimecode(10, fps=fps) + assert ff.filter(tc, above_threshold=True) == [tc] + assert ff.filter(tc, above_threshold=False) == [] + + +def test_flash_filter_digit_string_treated_as_frames(): + """A digit-only string like '15' should be treated as a frame count.""" + from scenedetect.detector import FlashFilter + + ff = FlashFilter(mode=FlashFilter.Mode.SUPPRESS, length="15") + assert ff._filter_secs is None + assert ff._filter_length == 15 + assert isinstance(ff._filter_length, int) + + +def test_flash_filter_max_behind(): + """max_behind returns correct values for all length types and modes.""" + import math + + from scenedetect.detector import FlashFilter + + # SUPPRESS mode always returns 0 + assert FlashFilter(mode=FlashFilter.Mode.SUPPRESS, length=30).max_behind == 0 + assert FlashFilter(mode=FlashFilter.Mode.SUPPRESS, length=0.6).max_behind == 0 + + # MERGE with int returns the int directly + assert FlashFilter(mode=FlashFilter.Mode.MERGE, length=30).max_behind == 30 + + # MERGE with float uses ceil(secs * 240) + assert FlashFilter(mode=FlashFilter.Mode.MERGE, length=0.6).max_behind == math.ceil(0.6 * 240.0) + + # MERGE with str timecode + assert FlashFilter(mode=FlashFilter.Mode.MERGE, length="0.6s").max_behind == math.ceil( + 0.6 * 240.0 + ) + + # Zero temporal value + assert FlashFilter(mode=FlashFilter.Mode.MERGE, length=0.0).max_behind == 0 diff --git a/tests/test_output.py b/tests/test_output.py index db3f2307..d91a6204 100644 --- a/tests/test_output.py +++ b/tests/test_output.py @@ -161,6 +161,112 @@ def test_save_images_singlethreaded(test_video_file, tmp_path: Path): assert total_images == len([path for path in tmp_path.glob(image_name_glob)]) +def test_save_images_temporal_margin(test_video_file, tmp_path: Path): + """Test save_images with temporal margin values (str and float).""" + video = VideoStreamCv2(test_video_file) + video_fps = video.frame_rate + scene_list = [ + (FrameTimecode(start, video_fps), FrameTimecode(end, video_fps)) + for start, end in [(0, 100), (200, 300)] + ] + template = "temporal.$SCENE_NUMBER.$IMAGE_NUMBER" + + for margin_val in ["0.1s", 0.1, 3]: + for use_threading in [True, False]: + out = tmp_path / f"margin_{margin_val}_{use_threading}" + out.mkdir() + result = save_images( + scene_list=scene_list, + output_dir=out, + video=video, + num_images=3, + margin=margin_val, + image_extension="jpg", + image_name_template=template, + threading=use_threading, + ) + total = sum(len(paths) for paths in result.values()) + assert total == 6, ( + f"Expected 6 images for margin={margin_val}, threading={use_threading}" + ) + + +def test_save_images_zero_margin(test_video_file, tmp_path: Path): + """Test save_images with margin=0 works without errors.""" + video = VideoStreamCv2(test_video_file) + video_fps = video.frame_rate + scene_list = [ + (FrameTimecode(start, video_fps), FrameTimecode(end, video_fps)) + for start, end in [(0, 100)] + ] + result = save_images( + scene_list=scene_list, + output_dir=tmp_path, + video=video, + num_images=3, + margin=0, + image_extension="jpg", + ) + assert sum(len(paths) for paths in result.values()) == 3 + + +def test_save_images_deprecated_frame_margin(test_video_file, tmp_path: Path): + """Test that the deprecated frame_margin keyword still works.""" + import warnings + + video = VideoStreamCv2(test_video_file) + video_fps = video.frame_rate + scene_list = [ + (FrameTimecode(start, video_fps), FrameTimecode(end, video_fps)) + for start, end in [(0, 100)] + ] + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + result = save_images( + scene_list=scene_list, + output_dir=tmp_path, + video=video, + num_images=3, + image_extension="jpg", + frame_margin=2, + ) + assert any("frame_margin is deprecated" in str(warning.message) for warning in w) + assert sum(len(paths) for paths in result.values()) == 3 + + +def test_save_images_negative_margin_raises(test_video_file, tmp_path: Path): + """Negative margin should raise ValueError.""" + video = VideoStreamCv2(test_video_file) + video_fps = video.frame_rate + scene_list = [(FrameTimecode(0, video_fps), FrameTimecode(50, video_fps))] + with pytest.raises(ValueError): + save_images( + scene_list=scene_list, video=video, output_dir=tmp_path, num_images=3, margin=-1 + ) + with pytest.raises(ValueError): + save_images( + scene_list=scene_list, video=video, output_dir=tmp_path, num_images=3, margin=-0.1 + ) + + +def test_margin_helpers(): + """Test _margin_to_seconds and _margin_to_frames helper functions.""" + from scenedetect.output.image import _margin_to_frames, _margin_to_seconds + + fps = 25.0 + # int (frames) + assert _margin_to_seconds(0, fps) == 0.0 + assert _margin_to_seconds(25, fps) == 1.0 + assert _margin_to_frames(0, fps) == 0 + assert _margin_to_frames(3, fps) == 3 + # float (seconds) + assert _margin_to_seconds(0.5, fps) == 0.5 + assert _margin_to_frames(1.0, fps) == 25 + # str + assert abs(_margin_to_seconds("0.5s", fps) - 0.5) < 1e-6 + assert _margin_to_frames("1.0s", fps) == 25 + + # TODO: Test other functionality against zero width scenes. def test_save_images_zero_width_scene(test_video_file, tmp_path: Path): """Test scenedetect.scene_manager.save_images guards against zero width scenes.""" diff --git a/website/pages/changelog.md b/website/pages/changelog.md index 535e2d5a..4ed43c19 100644 --- a/website/pages/changelog.md +++ b/website/pages/changelog.md @@ -692,6 +692,19 @@ Although there have been minimal changes to most API examples, there are several * Common NTSC rates (23.976, 29.97, 59.94) are automatically detected from float values * `FrameTimecode.frame_num` is now approximate for VFR video (based on PTS-derived time) +**Temporal Defaults** ([#531](https://github.com/Breakthrough/PySceneDetect/issues/531)): + + * `save-images` command: `--frame-margin` renamed to `--margin`, now accepts temporal values (e.g. `0.1s`, `00:00:00.100`) in addition to frame counts. Default changed from 1 frame to `0.1s`. Old name is deprecated but still works + * `min_scene_len` parameter in all detector constructors now defaults to `"0.6s"` instead of `15` frames, and accepts temporal values (e.g. `"0.6s"`, `0.6`) + * Config file: `[save-images]` option `frame-margin` renamed to `margin` (old name still accepted with deprecation warning) + +**Module Reorganization:** + + * `scenedetect.scene_detector` moved to `scenedetect.detector` + * `scenedetect.frame_timecode` moved to `scenedetect.common` + * Image/HTML/CSV export in `scenedetect.scene_manager` moved to `scenedetect.output` [#463](https://github.com/Breakthrough/PySceneDetect/issues/463) + * `scenedetect.video_splitter` moved to `scenedetect.output.video` [#463](https://github.com/Breakthrough/PySceneDetect/issues/463) + **Detector Interface:** * Replace `frame_num` parameter (`int`) with `timecode` (`FrameTimecode`) in `SceneDetector` interface [#168](https://github.com/Breakthrough/PySceneDetect/issues/168): @@ -703,13 +716,6 @@ Although there have been minimal changes to most API examples, there are several * Remove `SceneDetector.stats_manager_required` property, no longer required * Remove deprecated `SparseSceneDetector` interface -**Module Reorganization:** - - * `scenedetect.scene_detector` moved to `scenedetect.detector` - * `scenedetect.frame_timecode` moved to `scenedetect.common` - * Image/HTML/CSV export in `scenedetect.scene_manager` moved to `scenedetect.output` [#463](https://github.com/Breakthrough/PySceneDetect/issues/463) - * `scenedetect.video_splitter` moved to `scenedetect.output.video` [#463](https://github.com/Breakthrough/PySceneDetect/issues/463) - **FrameTimecode:** * `frame_num` and `framerate` are now read-only properties, construct a new `FrameTimecode` to change them