Skip to content

第33章 计算机视觉

学习目标

完成本章学习后,你将能够:

  1. 理解图像基础:像素、通道、色彩空间、图像格式
  2. 掌握OpenCV核心操作:图像读写、显示、基本变换
  3. 实现图像处理:滤波、形态学操作、直方图处理
  4. 进行特征检测:边缘检测、角点检测、blob检测
  5. 实现图像分割:阈值分割、区域生长、轮廓检测
  6. 应用几何变换:仿射变换、透视变换、图像配准
  7. 实现目标检测:模板匹配、特征匹配、级联分类器
  8. 构建视觉应用:人脸检测、物体追踪、文档扫描

33.1 图像基础

33.1.1 图像表示

python
import cv2
import numpy as np
from typing import List, Tuple, Optional, Union
from dataclasses import dataclass
from enum import Enum


class ColorSpace(Enum):
    BGR = cv2.COLOR_BGR2BGR
    RGB = cv2.COLOR_BGR2RGB
    GRAY = cv2.COLOR_BGR2GRAY
    HSV = cv2.COLOR_BGR2HSV
    LAB = cv2.COLOR_BGR2LAB
    YUV = cv2.COLOR_BGR2YUV
    HLS = cv2.COLOR_BGR2HLS


@dataclass
class ImageInfo:
    width: int
    height: int
    channels: int
    dtype: np.dtype
    total_pixels: int
    memory_size: int

    @classmethod
    def from_image(cls, image: np.ndarray) -> "ImageInfo":
        height, width = image.shape[:2]
        channels = image.shape[2] if len(image.shape) == 3 else 1
        return cls(
            width=width,
            height=height,
            channels=channels,
            dtype=image.dtype,
            total_pixels=width * height,
            memory_size=image.nbytes
        )

    @property
    def shape(self) -> Tuple[int, int, int]:
        return (self.height, self.width, self.channels)

    def __repr__(self) -> str:
        return (
            f"ImageInfo(width={self.width}, height={self.height}, "
            f"channels={self.channels}, dtype={self.dtype})"
        )


class ImageLoader:
    @staticmethod
    def load(filepath: str, flags: int = cv2.IMREAD_COLOR) -> np.ndarray:
        return cv2.imread(filepath, flags)

    @staticmethod
    def load_grayscale(filepath: str) -> np.ndarray:
        return cv2.imread(filepath, cv2.IMREAD_GRAYSCALE)

    @staticmethod
    def load_unchanged(filepath: str) -> np.ndarray:
        return cv2.imread(filepath, cv2.IMREAD_UNCHANGED)

    @staticmethod
    def save(filepath: str, image: np.ndarray) -> bool:
        return cv2.imwrite(filepath, image)

    @staticmethod
    def load_from_bytes(data: bytes, flags: int = cv2.IMREAD_COLOR) -> np.ndarray:
        nparr = np.frombuffer(data, np.uint8)
        return cv2.imdecode(nparr, flags)

    @staticmethod
    def encode_to_bytes(image: np.ndarray, ext: str = ".jpg") -> bytes:
        _, buffer = cv2.imencode(ext, image)
        return buffer.tobytes()


class ImageDisplay:
    def __init__(self):
        self._windows: List[str] = []

    def show(self, image: np.ndarray, window_name: str = "Image") -> None:
        if window_name not in self._windows:
            cv2.namedWindow(window_name)
            self._windows.append(window_name)
        cv2.imshow(window_name, image)

    def show_multiple(self, images: List[Tuple[np.ndarray, str]]) -> None:
        for image, name in images:
            self.show(image, name)

    def wait_key(self, delay: int = 0) -> int:
        return cv2.waitKey(delay) & 0xFF

    def close_all(self) -> None:
        cv2.destroyAllWindows()
        self._windows.clear()

    def close(self, window_name: str) -> None:
        cv2.destroyWindow(window_name)
        if window_name in self._windows:
            self._windows.remove(window_name)

33.1.2 色彩空间转换

python
class ColorConverter:
    def __init__(self, image: np.ndarray):
        self.image = image

    def to_rgb(self) -> np.ndarray:
        return cv2.cvtColor(self.image, cv2.COLOR_BGR2RGB)

    def to_gray(self) -> np.ndarray:
        return cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY)

    def to_hsv(self) -> np.ndarray:
        return cv2.cvtColor(self.image, cv2.COLOR_BGR2HSV)

    def to_lab(self) -> np.ndarray:
        return cv2.cvtColor(self.image, cv2.COLOR_BGR2LAB)

    def to_hls(self) -> np.ndarray:
        return cv2.cvtColor(self.image, cv2.COLOR_BGR2HLS)

    def to_yuv(self) -> np.ndarray:
        return cv2.cvtColor(self.image, cv2.COLOR_BGR2YUV)

    def convert(self, color_space: ColorSpace) -> np.ndarray:
        return cv2.cvtColor(self.image, color_space.value)


class ColorExtractor:
    @staticmethod
    def extract_channel(image: np.ndarray, channel: int) -> np.ndarray:
        return image[:, :, channel]

    @staticmethod
    def split_channels(image: np.ndarray) -> List[np.ndarray]:
        return list(cv2.split(image))

    @staticmethod
    def merge_channels(channels: List[np.ndarray]) -> np.ndarray:
        return cv2.merge(channels)

    @staticmethod
    def extract_color_range_hsv(
        image: np.ndarray,
        lower: Tuple[int, int, int],
        upper: Tuple[int, int, int]
    ) -> Tuple[np.ndarray, np.ndarray]:
        hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        mask = cv2.inRange(hsv, np.array(lower), np.array(upper))
        result = cv2.bitwise_and(image, image, mask=mask)
        return mask, result

    @staticmethod
    def extract_color_range_rgb(
        image: np.ndarray,
        lower: Tuple[int, int, int],
        upper: Tuple[int, int, int]
    ) -> Tuple[np.ndarray, np.ndarray]:
        mask = cv2.inRange(image, np.array(lower), np.array(upper))
        result = cv2.bitwise_and(image, image, mask=mask)
        return mask, result


class ColorAdjustment:
    @staticmethod
    def adjust_brightness(image: np.ndarray, value: int) -> np.ndarray:
        hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        h, s, v = cv2.split(hsv)
        v = cv2.add(v, value)
        v = np.clip(v, 0, 255).astype(np.uint8)
        hsv = cv2.merge([h, s, v])
        return cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)

    @staticmethod
    def adjust_contrast(image: np.ndarray, alpha: float) -> np.ndarray:
        return cv2.convertScaleAbs(image, alpha=alpha, beta=0)

    @staticmethod
    def adjust_saturation(image: np.ndarray, value: int) -> np.ndarray:
        hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        h, s, v = cv2.split(hsv)
        s = cv2.add(s, value)
        s = np.clip(s, 0, 255).astype(np.uint8)
        hsv = cv2.merge([h, s, v])
        return cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)

    @staticmethod
    def gamma_correction(image: np.ndarray, gamma: float) -> np.ndarray:
        inv_gamma = 1.0 / gamma
        table = np.array([
            ((i / 255.0) ** inv_gamma) * 255
            for i in np.arange(0, 256)
        ]).astype(np.uint8)
        return cv2.LUT(image, table)

33.2 图像处理

33.2.1 几何变换

python
class GeometricTransform:
    def __init__(self, image: np.ndarray):
        self.image = image

    def resize(
        self,
        width: Optional[int] = None,
        height: Optional[int] = None,
        scale: Optional[float] = None,
        interpolation: int = cv2.INTER_LINEAR
    ) -> np.ndarray:
        if scale is not None:
            width = int(self.image.shape[1] * scale)
            height = int(self.image.shape[0] * scale)
        elif width is None or height is None:
            raise ValueError("Must specify width and height, or scale")

        return cv2.resize(self.image, (width, height), interpolation=interpolation)

    def rotate(
        self,
        angle: float,
        center: Optional[Tuple[int, int]] = None,
        scale: float = 1.0
    ) -> np.ndarray:
        h, w = self.image.shape[:2]
        if center is None:
            center = (w // 2, h // 2)

        matrix = cv2.getRotationMatrix2D(center, angle, scale)
        return cv2.warpAffine(self.image, matrix, (w, h))

    def flip(self, flip_code: int = 1) -> np.ndarray:
        return cv2.flip(self.image, flip_code)

    def flip_horizontal(self) -> np.ndarray:
        return cv2.flip(self.image, 1)

    def flip_vertical(self) -> np.ndarray:
        return cv2.flip(self.image, 0)

    def translate(self, tx: int, ty: int) -> np.ndarray:
        h, w = self.image.shape[:2]
        matrix = np.float32([[1, 0, tx], [0, 1, ty]])
        return cv2.warpAffine(self.image, matrix, (w, h))

    def crop(self, x: int, y: int, width: int, height: int) -> np.ndarray:
        return self.image[y:y + height, x:x + width]

    def crop_center(self, width: int, height: int) -> np.ndarray:
        h, w = self.image.shape[:2]
        start_x = (w - width) // 2
        start_y = (h - height) // 2
        return self.image[start_y:start_y + height, start_x:start_x + width]

    def affine_transform(
        self,
        src_points: List[Tuple[float, float]],
        dst_points: List[Tuple[float, float]]
    ) -> np.ndarray:
        src_points = np.float32(src_points)
        dst_points = np.float32(dst_points)
        matrix = cv2.getAffineTransform(src_points, dst_points)
        h, w = self.image.shape[:2]
        return cv2.warpAffine(self.image, matrix, (w, h))

    def perspective_transform(
        self,
        src_points: List[Tuple[float, float]],
        dst_points: List[Tuple[float, float]]
    ) -> np.ndarray:
        src_points = np.float32(src_points)
        dst_points = np.float32(dst_points)
        matrix = cv2.getPerspectiveTransform(src_points, dst_points)
        h, w = self.image.shape[:2]
        return cv2.warpPerspective(self.image, matrix, (w, h))

    def rotate_bound(self, angle: float) -> np.ndarray:
        h, w = self.image.shape[:2]
        center = (w // 2, h // 2)
        matrix = cv2.getRotationMatrix2D(center, angle, 1.0)

        cos = np.abs(matrix[0, 0])
        sin = np.abs(matrix[0, 1])

        new_w = int((h * sin) + (w * cos))
        new_h = int((h * cos) + (w * sin))

        matrix[0, 2] += (new_w / 2) - center[0]
        matrix[1, 2] += (new_h / 2) - center[1]

        return cv2.warpAffine(self.image, matrix, (new_w, new_h))

33.2.2 图像滤波

python
class ImageFilter:
    def __init__(self, image: np.ndarray):
        self.image = image

    def blur(self, ksize: Tuple[int, int] = (5, 5)) -> np.ndarray:
        return cv2.blur(self.image, ksize)

    def gaussian_blur(self, ksize: Tuple[int, int] = (5, 5), sigma: float = 0) -> np.ndarray:
        return cv2.GaussianBlur(self.image, ksize, sigma)

    def median_blur(self, ksize: int = 5) -> np.ndarray:
        return cv2.medianBlur(self.image, ksize)

    def bilateral_filter(
        self,
        d: int = 9,
        sigma_color: float = 75,
        sigma_space: float = 75
    ) -> np.ndarray:
        return cv2.bilateralFilter(self.image, d, sigma_color, sigma_space)

    def box_filter(self, ksize: Tuple[int, int] = (5, 5)) -> np.ndarray:
        return cv2.boxFilter(self.image, -1, ksize)

    def custom_kernel(self, kernel: np.ndarray) -> np.ndarray:
        return cv2.filter2D(self.image, -1, kernel)

    def sharpen(self) -> np.ndarray:
        kernel = np.array([
            [0, -1, 0],
            [-1, 5, -1],
            [0, -1, 0]
        ])
        return cv2.filter2D(self.image, -1, kernel)

    def emboss(self) -> np.ndarray:
        kernel = np.array([
            [-2, -1, 0],
            [-1, 1, 1],
            [0, 1, 2]
        ])
        return cv2.filter2D(self.image, -1, kernel)

    def edge_enhance(self) -> np.ndarray:
        kernel = np.array([
            [-1, -1, -1],
            [-1, 9, -1],
            [-1, -1, -1]
        ])
        return cv2.filter2D(self.image, -1, kernel)


class EdgeDetection:
    def __init__(self, image: np.ndarray):
        if len(image.shape) == 3:
            self.gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        else:
            self.gray = image
        self.image = image

    def sobel(
        self,
        dx: int = 1,
        dy: int = 1,
        ksize: int = 3
    ) -> np.ndarray:
        sobel_x = cv2.Sobel(self.gray, cv2.CV_64F, dx, 0, ksize=ksize)
        sobel_y = cv2.Sobel(self.gray, cv2.CV_64F, 0, dy, ksize=ksize)
        sobel = np.sqrt(sobel_x ** 2 + sobel_y ** 2)
        return np.uint8(sobel / sobel.max() * 255)

    def sobel_x(self, ksize: int = 3) -> np.ndarray:
        return cv2.Sobel(self.gray, cv2.CV_64F, 1, 0, ksize=ksize)

    def sobel_y(self, ksize: int = 3) -> np.ndarray:
        return cv2.Sobel(self.gray, cv2.CV_64F, 0, 1, ksize=ksize)

    def laplacian(self, ksize: int = 3) -> np.ndarray:
        return cv2.Laplacian(self.gray, cv2.CV_64F, ksize=ksize)

    def canny(self, threshold1: float = 50, threshold2: float = 150) -> np.ndarray:
        return cv2.Canny(self.gray, threshold1, threshold2)

    def prewitt(self) -> np.ndarray:
        kernel_x = np.array([
            [-1, 0, 1],
            [-1, 0, 1],
            [-1, 0, 1]
        ])
        kernel_y = np.array([
            [-1, -1, -1],
            [0, 0, 0],
            [1, 1, 1]
        ])
        prewitt_x = cv2.filter2D(self.gray, -1, kernel_x)
        prewitt_y = cv2.filter2D(self.gray, -1, kernel_y)
        return cv2.add(prewitt_x, prewitt_y)

    def scharr(self) -> np.ndarray:
        scharr_x = cv2.Scharr(self.gray, cv2.CV_64F, 1, 0)
        scharr_y = cv2.Scharr(self.gray, cv2.CV_64F, 0, 1)
        return np.sqrt(scharr_x ** 2 + scharr_y ** 2).astype(np.uint8)

33.2.3 形态学操作

python
class MorphologicalOperations:
    def __init__(self, image: np.ndarray):
        if len(image.shape) == 3:
            self.binary = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            _, self.binary = cv2.threshold(self.binary, 127, 255, cv2.THRESH_BINARY)
        else:
            self.binary = image
        self.image = image

    def _get_kernel(self, shape: int, ksize: Tuple[int, int]) -> np.ndarray:
        return cv2.getStructuringElement(shape, ksize)

    def erode(
        self,
        ksize: Tuple[int, int] = (5, 5),
        iterations: int = 1,
        shape: int = cv2.MORPH_RECT
    ) -> np.ndarray:
        kernel = self._get_kernel(shape, ksize)
        return cv2.erode(self.binary, kernel, iterations=iterations)

    def dilate(
        self,
        ksize: Tuple[int, int] = (5, 5),
        iterations: int = 1,
        shape: int = cv2.MORPH_RECT
    ) -> np.ndarray:
        kernel = self._get_kernel(shape, ksize)
        return cv2.dilate(self.binary, kernel, iterations=iterations)

    def opening(
        self,
        ksize: Tuple[int, int] = (5, 5),
        iterations: int = 1,
        shape: int = cv2.MORPH_RECT
    ) -> np.ndarray:
        kernel = self._get_kernel(shape, ksize)
        return cv2.morphologyEx(self.binary, cv2.MORPH_OPEN, kernel, iterations=iterations)

    def closing(
        self,
        ksize: Tuple[int, int] = (5, 5),
        iterations: int = 1,
        shape: int = cv2.MORPH_RECT
    ) -> np.ndarray:
        kernel = self._get_kernel(shape, ksize)
        return cv2.morphologyEx(self.binary, cv2.MORPH_CLOSE, kernel, iterations=iterations)

    def gradient(
        self,
        ksize: Tuple[int, int] = (5, 5),
        shape: int = cv2.MORPH_RECT
    ) -> np.ndarray:
        kernel = self._get_kernel(shape, ksize)
        return cv2.morphologyEx(self.binary, cv2.MORPH_GRADIENT, kernel)

    def tophat(
        self,
        ksize: Tuple[int, int] = (5, 5),
        shape: int = cv2.MORPH_RECT
    ) -> np.ndarray:
        kernel = self._get_kernel(shape, ksize)
        return cv2.morphologyEx(self.binary, cv2.MORPH_TOPHAT, kernel)

    def blackhat(
        self,
        ksize: Tuple[int, int] = (5, 5),
        shape: int = cv2.MORPH_RECT
    ) -> np.ndarray:
        kernel = self._get_kernel(shape, ksize)
        return cv2.morphologyEx(self.binary, cv2.MORPH_BLACKHAT, kernel)

    def hit_miss(self, kernel: np.ndarray) -> np.ndarray:
        return cv2.morphologyEx(self.binary, cv2.MORPH_HITMISS, kernel)

33.3 特征检测

33.3.1 角点检测

python
class CornerDetection:
    def __init__(self, image: np.ndarray):
        if len(image.shape) == 3:
            self.gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        else:
            self.gray = image
        self.image = image

    def harris(
        self,
        block_size: int = 2,
        ksize: int = 3,
        k: float = 0.04,
        threshold: float = 0.01
    ) -> Tuple[np.ndarray, List[Tuple[int, int]]]:
        harris = cv2.cornerHarris(np.float32(self.gray), block_size, ksize, k)
        harris = cv2.dilate(harris, None)

        corners = np.where(harris > threshold * harris.max())
        corner_points = list(zip(corners[1], corners[0]))

        return harris, corner_points

    def shi_tomasi(
        self,
        max_corners: int = 100,
        quality_level: float = 0.01,
        min_distance: int = 10,
        block_size: int = 3
    ) -> List[Tuple[float, float]]:
        corners = cv2.goodFeaturesToTrack(
            self.gray,
            max_corners,
            quality_level,
            min_distance,
            blockSize=block_size
        )
        if corners is not None:
            return [tuple(corner[0]) for corner in corners]
        return []

    def fast(
        self,
        threshold: int = 10,
        non_max_suppression: bool = True
    ) -> List[cv2.KeyPoint]:
        fast = cv2.FastFeatureDetector_create(threshold, non_max_suppression)
        keypoints = fast.detect(self.gray, None)
        return keypoints

    def draw_corners(
        self,
        corners: List[Tuple[int, int]],
        color: Tuple[int, int, int] = (0, 255, 0),
        radius: int = 3
    ) -> np.ndarray:
        result = self.image.copy()
        for x, y in corners:
            cv2.circle(result, (int(x), int(y)), radius, color, -1)
        return result

    def draw_keypoints(
        self,
        keypoints: List[cv2.KeyPoint],
        color: Tuple[int, int, int] = (0, 255, 0)
    ) -> np.ndarray:
        return cv2.drawKeypoints(
            self.image,
            keypoints,
            None,
            color,
            cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS
        )

33.3.2 特征描述符

python
class FeatureDescriptor:
    def __init__(self, image: np.ndarray):
        if len(image.shape) == 3:
            self.gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        else:
            self.gray = image
        self.image = image

    def sift_detect_and_compute(self) -> Tuple[List[cv2.KeyPoint], np.ndarray]:
        sift = cv2.SIFT_create()
        keypoints, descriptors = sift.detectAndCompute(self.gray, None)
        return keypoints, descriptors

    def surf_detect_and_compute(self, hessian_threshold: float = 400) -> Tuple[List[cv2.KeyPoint], np.ndarray]:
        surf = cv2.xfeatures2d.SURF_create(hessian_threshold)
        keypoints, descriptors = surf.detectAndCompute(self.gray, None)
        return keypoints, descriptors

    def orb_detect_and_compute(self, n_features: int = 500) -> Tuple[List[cv2.KeyPoint], np.ndarray]:
        orb = cv2.ORB_create(n_features)
        keypoints, descriptors = orb.detectAndCompute(self.gray, None)
        return keypoints, descriptors

    def brisk_detect_and_compute(self) -> Tuple[List[cv2.KeyPoint], np.ndarray]:
        brisk = cv2.BRISK_create()
        keypoints, descriptors = brisk.detectAndCompute(self.gray, None)
        return keypoints, descriptors

    def akaze_detect_and_compute(self) -> Tuple[List[cv2.KeyPoint], np.ndarray]:
        akaze = cv2.AKAZE_create()
        keypoints, descriptors = akaze.detectAndCompute(self.gray, None)
        return keypoints, descriptors


class FeatureMatcher:
    @staticmethod
    def match_bf(
        descriptors1: np.ndarray,
        descriptors2: np.ndarray,
        norm_type: int = cv2.NORM_L2
    ) -> List[cv2.DMatch]:
        bf = cv2.BFMatcher(norm_type)
        matches = bf.match(descriptors1, descriptors2)
        return sorted(matches, key=lambda x: x.distance)

    @staticmethod
    def match_knn(
        descriptors1: np.ndarray,
        descriptors2: np.ndarray,
        k: int = 2,
        norm_type: int = cv2.NORM_L2
    ) -> List[List[cv2.DMatch]]:
        bf = cv2.BFMatcher(norm_type)
        return bf.knnMatch(descriptors1, descriptors2, k=k)

    @staticmethod
    def match_flann(
        descriptors1: np.ndarray,
        descriptors2: np.ndarray,
        k: int = 2
    ) -> List[List[cv2.DMatch]]:
        if descriptors1.dtype == np.uint8:
            index_params = dict(algorithm=6, table_number=6, key_size=12, multi_probe_level=1)
        else:
            index_params = dict(algorithm=0, trees=5)

        search_params = dict(checks=50)
        flann = cv2.FlannBasedMatcher(index_params, search_params)
        return flann.knnMatch(descriptors1, descriptors2, k=k)

    @staticmethod
    def ratio_test(matches: List[List[cv2.DMatch]], ratio: float = 0.75) -> List[cv2.DMatch]:
        good = []
        for m, n in matches:
            if m.distance < ratio * n.distance:
                good.append(m)
        return good

    @staticmethod
    def draw_matches(
        image1: np.ndarray,
        keypoints1: List[cv2.KeyPoint],
        image2: np.ndarray,
        keypoints2: List[cv2.KeyPoint],
        matches: List[cv2.DMatch]
    ) -> np.ndarray:
        return cv2.drawMatches(
            image1, keypoints1,
            image2, keypoints2,
            matches, None,
            flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS
        )

33.3.3 轮廓检测

python
class ContourDetection:
    def __init__(self, image: np.ndarray):
        if len(image.shape) == 3:
            self.gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        else:
            self.gray = image
        self.image = image

    def find_contours(
        self,
        mode: int = cv2.RETR_EXTERNAL,
        method: int = cv2.CHAIN_APPROX_SIMPLE
    ) -> Tuple[np.ndarray, List[np.ndarray], np.ndarray]:
        _, binary = cv2.threshold(self.gray, 127, 255, cv2.THRESH_BINARY)
        contours, hierarchy = cv2.findContours(binary, mode, method)
        return binary, contours, hierarchy

    def find_contours_canny(
        self,
        threshold1: float = 50,
        threshold2: float = 150,
        mode: int = cv2.RETR_EXTERNAL,
        method: int = cv2.CHAIN_APPROX_SIMPLE
    ) -> Tuple[np.ndarray, List[np.ndarray], np.ndarray]:
        edges = cv2.Canny(self.gray, threshold1, threshold2)
        contours, hierarchy = cv2.findContours(edges, mode, method)
        return edges, contours, hierarchy

    def draw_contours(
        self,
        contours: List[np.ndarray],
        color: Tuple[int, int, int] = (0, 255, 0),
        thickness: int = 2
    ) -> np.ndarray:
        result = self.image.copy()
        cv2.drawContours(result, contours, -1, color, thickness)
        return result

    @staticmethod
    def get_contour_area(contour: np.ndarray) -> float:
        return cv2.contourArea(contour)

    @staticmethod
    def get_contour_perimeter(contour: np.ndarray, closed: bool = True) -> float:
        return cv2.arcLength(contour, closed)

    @staticmethod
    def approximate_contour(contour: np.ndarray, epsilon: float = 0.02) -> np.ndarray:
        peri = cv2.arcLength(contour, True)
        return cv2.approxPolyDP(contour, epsilon * peri, True)

    @staticmethod
    def get_bounding_rect(contour: np.ndarray) -> Tuple[int, int, int, int]:
        return cv2.boundingRect(contour)

    @staticmethod
    def get_min_area_rect(contour: np.ndarray) -> cv2.RotatedRect:
        return cv2.minAreaRect(contour)

    @staticmethod
    def get_min_enclosing_circle(contour: np.ndarray) -> Tuple[Tuple[float, float], float]:
        return cv2.minEnclosingCircle(contour)

    @staticmethod
    def get_convex_hull(contour: np.ndarray) -> np.ndarray:
        return cv2.convexHull(contour)

    @staticmethod
    def is_contour_convex(contour: np.ndarray) -> bool:
        return cv2.isContourConvex(contour)

    @staticmethod
    def get_moments(contour: np.ndarray) -> cv2.Moments:
        return cv2.moments(contour)

    @staticmethod
    def get_centroid(moments: cv2.Moments) -> Tuple[float, float]:
        if moments["m00"] != 0:
            cx = moments["m10"] / moments["m00"]
            cy = moments["m01"] / moments["m00"]
            return (cx, cy)
        return (0, 0)

33.4 图像分割

33.4.1 阈值分割

python
class ThresholdSegmentation:
    def __init__(self, image: np.ndarray):
        if len(image.shape) == 3:
            self.gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        else:
            self.gray = image
        self.image = image

    def binary_threshold(
        self,
        threshold: int = 127,
        max_value: int = 255,
        type_: int = cv2.THRESH_BINARY
    ) -> Tuple[float, np.ndarray]:
        return cv2.threshold(self.gray, threshold, max_value, type_)

    def otsu_threshold(self, max_value: int = 255) -> Tuple[float, np.ndarray]:
        return cv2.threshold(
            self.gray, 0, max_value,
            cv2.THRESH_BINARY + cv2.THRESH_OTSU
        )

    def adaptive_threshold_mean(
        self,
        block_size: int = 11,
        c: int = 2
    ) -> np.ndarray:
        return cv2.adaptiveThreshold(
            self.gray, 255,
            cv2.ADAPTIVE_THRESH_MEAN_C,
            cv2.THRESH_BINARY,
            block_size, c
        )

    def adaptive_threshold_gaussian(
        self,
        block_size: int = 11,
        c: int = 2
    ) -> np.ndarray:
        return cv2.adaptiveThreshold(
            self.gray, 255,
            cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
            cv2.THRESH_BINARY,
            block_size, c
        )

    def triangle_threshold(self, max_value: int = 255) -> Tuple[float, np.ndarray]:
        return cv2.threshold(
            self.gray, 0, max_value,
            cv2.THRESH_BINARY + cv2.THRESH_TRIANGLE
        )

    def multi_threshold(
        self,
        thresholds: List[int],
        max_value: int = 255
    ) -> np.ndarray:
        result = np.zeros_like(self.gray)
        for i, threshold in enumerate(sorted(thresholds)):
            if i == 0:
                result[self.gray < threshold] = 0
            else:
                result[(self.gray >= thresholds[i - 1]) & (self.gray < threshold)] = int(max_value * i / len(thresholds))
        result[self.gray >= thresholds[-1]] = max_value
        return result


class ColorSegmentation:
    def __init__(self, image: np.ndarray):
        self.image = image

    def segment_by_color_hsv(
        self,
        lower: Tuple[int, int, int],
        upper: Tuple[int, int, int]
    ) -> Tuple[np.ndarray, np.ndarray]:
        hsv = cv2.cvtColor(self.image, cv2.COLOR_BGR2HSV)
        mask = cv2.inRange(hsv, np.array(lower), np.array(upper))
        result = cv2.bitwise_and(self.image, self.image, mask=mask)
        return mask, result

    def segment_by_color_rgb(
        self,
        lower: Tuple[int, int, int],
        upper: Tuple[int, int, int]
    ) -> Tuple[np.ndarray, np.ndarray]:
        mask = cv2.inRange(self.image, np.array(lower), np.array(upper))
        result = cv2.bitwise_and(self.image, self.image, mask=mask)
        return mask, result

    def segment_skin_color(self) -> Tuple[np.ndarray, np.ndarray]:
        hsv = cv2.cvtColor(self.image, cv2.COLOR_BGR2HSV)
        lower = np.array([0, 20, 70])
        upper = np.array([20, 255, 255])
        mask = cv2.inRange(hsv, lower, upper)
        result = cv2.bitwise_and(self.image, self.image, mask=mask)
        return mask, result

    def segment_green(self) -> Tuple[np.ndarray, np.ndarray]:
        hsv = cv2.cvtColor(self.image, cv2.COLOR_BGR2HSV)
        lower = np.array([35, 43, 46])
        upper = np.array([77, 255, 255])
        mask = cv2.inRange(hsv, lower, upper)
        result = cv2.bitwise_and(self.image, self.image, mask=mask)
        return mask, result

    def segment_red(self) -> Tuple[np.ndarray, np.ndarray]:
        hsv = cv2.cvtColor(self.image, cv2.COLOR_BGR2HSV)
        lower1 = np.array([0, 43, 46])
        upper1 = np.array([10, 255, 255])
        lower2 = np.array([156, 43, 46])
        upper2 = np.array([180, 255, 255])

        mask1 = cv2.inRange(hsv, lower1, upper1)
        mask2 = cv2.inRange(hsv, lower2, upper2)
        mask = cv2.bitwise_or(mask1, mask2)
        result = cv2.bitwise_and(self.image, self.image, mask=mask)
        return mask, result

33.4.2 高级分割

python
class AdvancedSegmentation:
    def __init__(self, image: np.ndarray):
        self.image = image
        if len(image.shape) == 3:
            self.gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        else:
            self.gray = image

    def watershed(self, markers: np.ndarray) -> np.ndarray:
        if len(self.image.shape) == 2:
            image = cv2.cvtColor(self.image, cv2.COLOR_GRAY2BGR)
        else:
            image = self.image

        markers = cv2.watershed(image, markers)
        return markers

    def grabcut(
        self,
        rect: Tuple[int, int, int, int],
        iter_count: int = 5
    ) -> Tuple[np.ndarray, np.ndarray]:
        mask = np.zeros(self.image.shape[:2], np.uint8)
        bgd_model = np.zeros((1, 65), np.float64)
        fgd_model = np.zeros((1, 65), np.float64)

        cv2.grabCut(
            self.image, mask, rect,
            bgd_model, fgd_model,
            iter_count, cv2.GC_INIT_WITH_RECT
        )

        mask2 = np.where((mask == 2) | (mask == 0), 0, 1).astype(np.uint8)
        result = self.image * mask2[:, :, np.newaxis]

        return mask2, result

    def kmeans_segmentation(
        self,
        k: int = 3,
        attempts: int = 10
    ) -> np.ndarray:
        pixel_values = self.image.reshape((-1, 3))
        pixel_values = np.float32(pixel_values)

        criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.2)
        _, labels, centers = cv2.kmeans(
            pixel_values, k, None,
            criteria, attempts,
            cv2.KMEANS_RANDOM_CENTERS
        )

        centers = np.uint8(centers)
        segmented = centers[labels.flatten()]
        return segmented.reshape(self.image.shape)

    def mean_shift_segmentation(
        self,
        spatial_radius: float = 20,
        color_radius: float = 30,
        min_size: int = 20
    ) -> np.ndarray:
        return cv2.pyrMeanShiftFiltering(
            self.image,
            spatial_radius,
            color_radius,
            None,
            min_size
        )

    def connected_components(
        self,
        connectivity: int = 8
    ) -> Tuple[int, np.ndarray]:
        _, binary = cv2.threshold(self.gray, 127, 255, cv2.THRESH_BINARY)
        num_labels, labels = cv2.connectedComponents(binary, connectivity=connectivity)
        return num_labels, labels

33.5 目标检测

33.5.1 模板匹配

python
class TemplateMatching:
    def __init__(self, image: np.ndarray):
        self.image = image
        if len(image.shape) == 3:
            self.gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        else:
            self.gray = image

    def match_template(
        self,
        template: np.ndarray,
        method: int = cv2.TM_CCOEFF_NORMED
    ) -> Tuple[float, Tuple[int, int], Tuple[int, int]]:
        if len(template.shape) == 3:
            template_gray = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
        else:
            template_gray = template

        result = cv2.matchTemplate(self.gray, template_gray, method)
        min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)

        if method in [cv2.TM_SQDIFF, cv2.TM_SQDIFF_NORMED]:
            top_left = min_loc
            match_value = min_val
        else:
            top_left = max_loc
            match_value = max_val

        h, w = template_gray.shape
        bottom_right = (top_left[0] + w, top_left[1] + h)

        return match_value, top_left, bottom_right

    def match_multiple(
        self,
        template: np.ndarray,
        threshold: float = 0.8,
        method: int = cv2.TM_CCOEFF_NORMED
    ) -> List[Tuple[Tuple[int, int], Tuple[int, int], float]]:
        if len(template.shape) == 3:
            template_gray = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
        else:
            template_gray = template

        result = cv2.matchTemplate(self.gray, template_gray, method)
        locations = np.where(result >= threshold)

        h, w = template_gray.shape
        matches = []

        for pt in zip(*locations[::-1]):
            top_left = pt
            bottom_right = (pt[0] + w, pt[1] + h)
            match_value = result[pt[1], pt[0]]
            matches.append((top_left, bottom_right, float(match_value)))

        return matches

    def draw_match(
        self,
        top_left: Tuple[int, int],
        bottom_right: Tuple[int, int],
        color: Tuple[int, int, int] = (0, 255, 0),
        thickness: int = 2
    ) -> np.ndarray:
        result = self.image.copy()
        cv2.rectangle(result, top_left, bottom_right, color, thickness)
        return result

    def non_max_suppression(
        self,
        boxes: List[Tuple[Tuple[int, int], Tuple[int, int], float]],
        overlap_thresh: float = 0.3
    ) -> List[Tuple[Tuple[int, int], Tuple[int, int], float]]:
        if len(boxes) == 0:
            return []

        boxes = sorted(boxes, key=lambda x: x[2], reverse=True)
        keep = []

        while boxes:
            current = boxes.pop(0)
            keep.append(current)

            boxes = [
                box for box in boxes
                if self._iou(current[0], current[1], box[0], box[1]) < overlap_thresh
            ]

        return keep

    @staticmethod
    def _iou(
        box1_tl: Tuple[int, int],
        box1_br: Tuple[int, int],
        box2_tl: Tuple[int, int],
        box2_br: Tuple[int, int]
    ) -> float:
        x1 = max(box1_tl[0], box2_tl[0])
        y1 = max(box1_tl[1], box2_tl[1])
        x2 = min(box1_br[0], box2_br[0])
        y2 = min(box1_br[1], box2_br[1])

        if x2 < x1 or y2 < y1:
            return 0.0

        intersection = (x2 - x1) * (y2 - y1)
        area1 = (box1_br[0] - box1_tl[0]) * (box1_br[1] - box1_tl[1])
        area2 = (box2_br[0] - box2_tl[0]) * (box2_br[1] - box2_tl[1])

        return intersection / (area1 + area2 - intersection)

33.5.2 级联分类器

python
class CascadeClassifier:
    def __init__(self, cascade_path: str):
        self.cascade = cv2.CascadeClassifier(cascade_path)

    def detect(
        self,
        image: np.ndarray,
        scale_factor: float = 1.1,
        min_neighbors: int = 3,
        min_size: Tuple[int, int] = (30, 30),
        max_size: Optional[Tuple[int, int]] = None
    ) -> List[Tuple[int, int, int, int]]:
        if len(image.shape) == 3:
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        else:
            gray = image

        objects = self.cascade.detectMultiScale(
            gray,
            scaleFactor=scale_factor,
            minNeighbors=min_neighbors,
            minSize=min_size,
            maxSize=max_size
        )

        return [tuple(obj) for obj in objects]

    def draw_detections(
        self,
        image: np.ndarray,
        detections: List[Tuple[int, int, int, int]],
        color: Tuple[int, int, int] = (0, 255, 0),
        thickness: int = 2
    ) -> np.ndarray:
        result = image.copy()
        for x, y, w, h in detections:
            cv2.rectangle(result, (x, y), (x + w, y + h), color, thickness)
        return result


class FaceDetector:
    def __init__(self):
        self.face_cascade = cv2.CascadeClassifier(
            cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
        )
        self.eye_cascade = cv2.CascadeClassifier(
            cv2.data.haarcascades + "haarcascade_eye.xml"
        )
        self.smile_cascade = cv2.CascadeClassifier(
            cv2.data.haarcascades + "haarcascade_smile.xml"
        )

    def detect_faces(
        self,
        image: np.ndarray,
        scale_factor: float = 1.1,
        min_neighbors: int = 5,
        min_size: Tuple[int, int] = (30, 30)
    ) -> List[Tuple[int, int, int, int]]:
        if len(image.shape) == 3:
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        else:
            gray = image

        faces = self.face_cascade.detectMultiScale(
            gray,
            scaleFactor=scale_factor,
            minNeighbors=min_neighbors,
            minSize=min_size
        )

        return [tuple(face) for face in faces]

    def detect_eyes(
        self,
        image: np.ndarray,
        face_region: Tuple[int, int, int, int]
    ) -> List[Tuple[int, int, int, int]]:
        x, y, w, h = face_region
        if len(image.shape) == 3:
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        else:
            gray = image

        roi_gray = gray[y:y + h, x:x + w]
        eyes = self.eye_cascade.detectMultiScale(roi_gray)

        return [(x + ex, y + ey, ew, eh) for ex, ey, ew, eh in eyes]

    def detect_smile(
        self,
        image: np.ndarray,
        face_region: Tuple[int, int, int, int]
    ) -> List[Tuple[int, int, int, int]]:
        x, y, w, h = face_region
        if len(image.shape) == 3:
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        else:
            gray = image

        roi_gray = gray[y:y + h, x:x + w]
        smiles = self.smile_cascade.detectMultiScale(
            roi_gray,
            scaleFactor=1.7,
            minNeighbors=22,
            minSize=(25, 25)
        )

        return [(x + sx, y + sy, sw, sh) for sx, sy, sw, sh in smiles]

    def draw_face_with_features(
        self,
        image: np.ndarray,
        face_color: Tuple[int, int, int] = (255, 0, 0),
        eye_color: Tuple[int, int, int] = (0, 255, 0),
        smile_color: Tuple[int, int, int] = (0, 0, 255)
    ) -> np.ndarray:
        result = image.copy()
        faces = self.detect_faces(image)

        for face in faces:
            x, y, w, h = face
            cv2.rectangle(result, (x, y), (x + w, y + h), face_color, 2)

            eyes = self.detect_eyes(image, face)
            for ex, ey, ew, eh in eyes:
                cv2.rectangle(result, (ex, ey), (ex + ew, ey + eh), eye_color, 2)

            smiles = self.detect_smile(image, face)
            for sx, sy, sw, sh in smiles:
                cv2.rectangle(result, (sx, sy), (sx + sw, sy + sh), smile_color, 2)

        return result

33.6 视频处理

33.6.1 视频读写

python
class VideoProcessor:
    def __init__(self, source: Union[str, int] = 0):
        self.cap = cv2.VideoCapture(source)
        self.fps = self.cap.get(cv2.CAP_PROP_FPS)
        self.width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        self.height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        self.frame_count = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))

    def read_frame(self) -> Tuple[bool, Optional[np.ndarray]]:
        return self.cap.read()

    def read_all_frames(self) -> List[np.ndarray]:
        frames = []
        while True:
            ret, frame = self.cap.read()
            if not ret:
                break
            frames.append(frame)
        return frames

    def process_video(
        self,
        process_func: callable,
        output_path: Optional[str] = None,
        codec: str = "mp4v"
    ) -> Optional[List[np.ndarray]]:
        results = []

        if output_path:
            fourcc = cv2.VideoWriter_fourcc(*codec)
            out = cv2.VideoWriter(output_path, fourcc, self.fps, (self.width, self.height))

        while True:
            ret, frame = self.cap.read()
            if not ret:
                break

            processed = process_func(frame)
            results.append(processed)

            if output_path:
                out.write(processed)

        if output_path:
            out.release()

        return results

    def release(self) -> None:
        self.cap.release()

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.release()


class VideoWriter:
    def __init__(
        self,
        output_path: str,
        fps: float,
        width: int,
        height: int,
        codec: str = "mp4v"
    ):
        fourcc = cv2.VideoWriter_fourcc(*codec)
        self.writer = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    def write(self, frame: np.ndarray) -> None:
        self.writer.write(frame)

    def write_frames(self, frames: List[np.ndarray]) -> None:
        for frame in frames:
            self.writer.write(frame)

    def release(self) -> None:
        self.writer.release()

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.release()

33.6.2 物体追踪

python
class ObjectTracker:
    def __init__(self, tracker_type: str = "CSRT"):
        tracker_types = {
            "BOOSTING": cv2.legacy.TrackerBoosting_create,
            "MIL": cv2.legacy.TrackerMIL_create,
            "KCF": cv2.legacy.TrackerKCF_create,
            "TLD": cv2.legacy.TrackerTLD_create,
            "MEDIANFLOW": cv2.legacy.TrackerMedianFlow_create,
            "CSRT": cv2.legacy.TrackerCSRT_create,
            "MOSSE": cv2.legacy.TrackerMOSSE_create
        }

        if tracker_type.upper() in tracker_types:
            self.tracker = tracker_types[tracker_type.upper()]()
        else:
            raise ValueError(f"Unknown tracker type: {tracker_type}")

        self.initialized = False

    def init(self, frame: np.ndarray, bbox: Tuple[int, int, int, int]) -> None:
        self.tracker.init(frame, bbox)
        self.initialized = True

    def update(self, frame: np.ndarray) -> Tuple[bool, Tuple[int, int, int, int]]:
        if not self.initialized:
            raise RuntimeError("Tracker not initialized. Call init() first.")
        return self.tracker.update(frame)

    def draw_tracking(
        self,
        frame: np.ndarray,
        bbox: Tuple[int, int, int, int],
        color: Tuple[int, int, int] = (0, 255, 0),
        thickness: int = 2
    ) -> np.ndarray:
        result = frame.copy()
        x, y, w, h = [int(v) for v in bbox]
        cv2.rectangle(result, (x, y), (x + w, y + h), color, thickness)
        return result


class MultiObjectTracker:
    def __init__(self, tracker_type: str = "CSRT"):
        self.tracker_type = tracker_type
        self.trackers: Dict[int, cv2.Tracker] = {}
        self.next_id = 0

    def add(self, frame: np.ndarray, bbox: Tuple[int, int, int, int]) -> int:
        tracker_types = {
            "CSRT": cv2.legacy.TrackerCSRT_create,
            "KCF": cv2.legacy.TrackerKCF_create,
            "MOSSE": cv2.legacy.TrackerMOSSE_create
        }

        tracker = tracker_types.get(
            self.tracker_type.upper(),
            cv2.legacy.TrackerCSRT_create
        )()
        tracker.init(frame, bbox)

        track_id = self.next_id
        self.trackers[track_id] = tracker
        self.next_id += 1

        return track_id

    def remove(self, track_id: int) -> None:
        if track_id in self.trackers:
            del self.trackers[track_id]

    def update(self, frame: np.ndarray) -> Dict[int, Tuple[bool, Tuple[int, int, int, int]]]:
        results = {}
        for track_id, tracker in self.trackers.items():
            success, bbox = tracker.update(frame)
            results[track_id] = (success, tuple(int(v) for v in bbox))
        return results

33.7 知识图谱

33.7.1 计算机视觉技术体系

计算机视觉技术层次

┌─────────────────────────────────────────────────────────────┐
│                    应用层                                   │
│  人脸识别、目标检测、图像分割、OCR、姿态估计               │
└─────────────────────────────────────────────────────────────┘


┌─────────────────────────────────────────────────────────────┐
│                    算法层                                   │
│  特征提取、深度学习、传统算法、图像处理                    │
└─────────────────────────────────────────────────────────────┘


┌─────────────────────────────────────────────────────────────┐
│                    工具层                                   │
│  OpenCV、PIL、scikit-image、PyTorch、TensorFlow            │
└─────────────────────────────────────────────────────────────┘

OpenCV核心模块:
┌─────────────────────────────────────────┐
│ cv2.imread/imwrite  图像读写           │
│ cv2.cvtColor        颜色空间转换       │
│ cv2.resize          图像缩放           │
│ cv2.GaussianBlur    高斯模糊           │
│ cv2.Canny           边缘检测           │
│ cv2.findContours    轮廓检测           │
│ cv2.VideoCapture    视频捕获           │
└─────────────────────────────────────────┘

33.7.2 图像处理流程

图像处理标准流程

┌─────────────────────────────────────────┐
│ 1. 图像获取    读取图像/视频           │
│ 2. 预处理      缩放、去噪、归一化      │
│ 3. 特征提取    边缘、角点、纹理        │
│ 4. 处理分析    分割、检测、识别        │
│ 5. 结果输出    标注、保存、展示        │
└─────────────────────────────────────────┘

33.8 技术选型指南

33.8.1 图像处理库选型

场景推荐库原因
传统图像处理OpenCV功能全面
简单图像操作PIL/Pillow易用性好
科学图像分析scikit-image算法丰富
深度学习视觉PyTorch/TensorFlowGPU加速

33.8.2 特征提取方法选型

场景推荐方法说明
边缘检测Canny效果稳定
角点检测Harris/SIFT尺度不变
特征匹配ORB速度快
深度特征CNN表达能力强

33.9 常见问题与解决方案

33.9.1 图像读取问题

python
# 问题:中文路径读取失败
# 解决方案:使用numpy读取

import cv2
import numpy as np

def imread_chinese(path):
    return cv2.imdecode(np.fromfile(path, dtype=np.uint8), -1)

33.9.2 颜色空间转换

python
# 问题:BGR与RGB混淆
# 解决方案:明确转换

import cv2

# OpenCV默认BGR
img = cv2.imread('image.jpg')
rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

33.9.3 视频处理内存问题

python
# 问题:处理大视频内存溢出
# 解决方案:逐帧处理

cap = cv2.VideoCapture('video.mp4')
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    # 处理单帧
    process_frame(frame)
cap.release()

33.10 本章小结

本章详细介绍了Python计算机视觉的核心概念和实践:

  1. 图像基础:图像表示、色彩空间、图像读写
  2. 图像处理:几何变换、滤波、形态学操作
  3. 特征检测:角点检测、特征描述符、轮廓检测
  4. 图像分割:阈值分割、颜色分割、高级分割算法
  5. 目标检测:模板匹配、级联分类器、人脸检测
  6. 视频处理:视频读写、物体追踪
  7. 应用实例:完整的视觉处理流程

练习题

  1. 实现一个图像滤镜系统,支持模糊、锐化、边缘检测等效果
  2. 开发一个文档扫描应用,自动检测文档边缘并进行透视变换
  3. 实现一个车牌识别系统,包含车牌定位和字符识别
  4. 开发一个实时人脸检测应用,支持多人脸检测和标记
  5. 实现一个运动检测系统,检测视频中的移动物体

扩展阅读

Python技术丛书 - 江苏省宿城中等专业学校