#### pip install imagehash
#### python noinstance_v2i.py 翠星测试.mp4  翠星图片
#### python noinstance_v2i.py 翠星测试.mp4  翠星图片25_0_1

import os
import cv2
import argparse
import shutil
import imagehash
from pathlib import Path
from tqdm import tqdm
from moviepy.editor import VideoFileClip
from collections import deque
from PIL import Image

from animeinsseg import AnimeInsSeg

# 配置参数
CKPT_PATH = "models/AnimeInstanceSegmentation/rtmdetl_e60.ckpt"
DOWNSAMPLE_SIZE = (320, 180)  # 下采样尺寸提升处理速度
MIN_SCENE_CHANGE = 0.3        # 场景变化阈值 (0-1)
HASH_THRESHOLD = 25            # 哈希相似度阈值
FRAME_BUFFER = 10             # 连续帧缓冲区

class VideoProcessor:
    def __init__(self):
        self.net = AnimeInsSeg(
            CKPT_PATH, 
            mask_thr=0.1,
            refine_kwargs={'refine_method': 'refinenet_isnet'}
        )
        self.hash_dict = {}
        self.last_saved_hash = None

    def _preprocess_frame(self, frame):
        """预处理帧：下采样 + 灰度化"""
        small_frame = cv2.resize(frame, DOWNSAMPLE_SIZE)
        return cv2.cvtColor(small_frame, cv2.COLOR_BGR2GRAY)

    def _has_human(self, frame):
        """快速人物检测"""
        instances = self.net.infer(
            cv2.cvtColor(frame, cv2.COLOR_BGR2RGB),
            output_type='numpy',
            pred_score_thr=0.5  # 适当提高置信度阈值加速判断
        )
        return instances.bboxes is not None

    def _is_duplicate(self, frame):
        """基于感知哈希的去重检测"""
        current_hash = imagehash.dhash(Image.fromarray(frame))
        for existing_hash in self.hash_dict.values():
            if current_hash - existing_hash < HASH_THRESHOLD:
                return True
        return False

    def process_video(self, video_path, output_dir):
        """核心处理流程"""
        clip = VideoFileClip(str(video_path))
        output_path = Path(output_dir)
        output_path.mkdir(parents=True, exist_ok=True)

        # 初始化场景检测
        prev_frame = None
        frame_buffer = deque(maxlen=FRAME_BUFFER)

        for i, frame in enumerate(clip.iter_frames()):
            # 转换为OpenCV格式
            cv_frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            
            # 动态抽帧策略
            processed_frame = self._preprocess_frame(cv_frame)
            if prev_frame is not None:
                # 使用结构相似性检测场景变化
                similarity = cv2.compareHist(
                    cv2.calcHist([processed_frame], [0], None, [256], [0, 256]),
                    cv2.calcHist([prev_frame], [0], None, [256], [0, 256]),
                    cv2.HISTCMP_CORREL
                )
                if similarity > (1 - MIN_SCENE_CHANGE):
                    continue
            
            prev_frame = processed_frame
            
            # 缓冲区去重检测
            if any(cv2.absdiff(processed_frame, f).sum() < 1000 for f in frame_buffer):
                continue
            frame_buffer.append(processed_frame)

            # 执行人物检测
            if not self._has_human(cv_frame):
                # 哈希去重检查
                if not self._is_duplicate(frame):
                    frame_hash = imagehash.dhash(Image.fromarray(frame))
                    self.hash_dict[i] = frame_hash
                    cv2.imwrite(str(output_path / f"frame_{i:06d}.jpg"), cv_frame)

        clip.close()

def main():
    parser = argparse.ArgumentParser(description="提取视频中无人物出现的帧")
    parser.add_argument("video_path", type=str, help="输入视频路径")
    parser.add_argument("output_dir", type=str, help="输出目录路径")
    args = parser.parse_args()

    processor = VideoProcessor()
    processor.process_video(args.video_path, args.output_dir)

if __name__ == "__main__":
    main()