第33章 计算机视觉
学习目标
完成本章学习后,你将能够:
- 理解图像基础:像素、通道、色彩空间、图像格式
- 掌握OpenCV核心操作:图像读写、显示、基本变换
- 实现图像处理:滤波、形态学操作、直方图处理
- 进行特征检测:边缘检测、角点检测、blob检测
- 实现图像分割:阈值分割、区域生长、轮廓检测
- 应用几何变换:仿射变换、透视变换、图像配准
- 实现目标检测:模板匹配、特征匹配、级联分类器
- 构建视觉应用:人脸检测、物体追踪、文档扫描
33.1 图像基础
33.1.1 图像表示
python
import cv2
import numpy as np
from typing import List, Tuple, Optional, Union
from dataclasses import dataclass
from enum import Enum
class ColorSpace(Enum):
BGR = cv2.COLOR_BGR2BGR
RGB = cv2.COLOR_BGR2RGB
GRAY = cv2.COLOR_BGR2GRAY
HSV = cv2.COLOR_BGR2HSV
LAB = cv2.COLOR_BGR2LAB
YUV = cv2.COLOR_BGR2YUV
HLS = cv2.COLOR_BGR2HLS
@dataclass
class ImageInfo:
width: int
height: int
channels: int
dtype: np.dtype
total_pixels: int
memory_size: int
@classmethod
def from_image(cls, image: np.ndarray) -> "ImageInfo":
height, width = image.shape[:2]
channels = image.shape[2] if len(image.shape) == 3 else 1
return cls(
width=width,
height=height,
channels=channels,
dtype=image.dtype,
total_pixels=width * height,
memory_size=image.nbytes
)
@property
def shape(self) -> Tuple[int, int, int]:
return (self.height, self.width, self.channels)
def __repr__(self) -> str:
return (
f"ImageInfo(width={self.width}, height={self.height}, "
f"channels={self.channels}, dtype={self.dtype})"
)
class ImageLoader:
@staticmethod
def load(filepath: str, flags: int = cv2.IMREAD_COLOR) -> np.ndarray:
return cv2.imread(filepath, flags)
@staticmethod
def load_grayscale(filepath: str) -> np.ndarray:
return cv2.imread(filepath, cv2.IMREAD_GRAYSCALE)
@staticmethod
def load_unchanged(filepath: str) -> np.ndarray:
return cv2.imread(filepath, cv2.IMREAD_UNCHANGED)
@staticmethod
def save(filepath: str, image: np.ndarray) -> bool:
return cv2.imwrite(filepath, image)
@staticmethod
def load_from_bytes(data: bytes, flags: int = cv2.IMREAD_COLOR) -> np.ndarray:
nparr = np.frombuffer(data, np.uint8)
return cv2.imdecode(nparr, flags)
@staticmethod
def encode_to_bytes(image: np.ndarray, ext: str = ".jpg") -> bytes:
_, buffer = cv2.imencode(ext, image)
return buffer.tobytes()
class ImageDisplay:
def __init__(self):
self._windows: List[str] = []
def show(self, image: np.ndarray, window_name: str = "Image") -> None:
if window_name not in self._windows:
cv2.namedWindow(window_name)
self._windows.append(window_name)
cv2.imshow(window_name, image)
def show_multiple(self, images: List[Tuple[np.ndarray, str]]) -> None:
for image, name in images:
self.show(image, name)
def wait_key(self, delay: int = 0) -> int:
return cv2.waitKey(delay) & 0xFF
def close_all(self) -> None:
cv2.destroyAllWindows()
self._windows.clear()
def close(self, window_name: str) -> None:
cv2.destroyWindow(window_name)
if window_name in self._windows:
self._windows.remove(window_name)33.1.2 色彩空间转换
python
class ColorConverter:
def __init__(self, image: np.ndarray):
self.image = image
def to_rgb(self) -> np.ndarray:
return cv2.cvtColor(self.image, cv2.COLOR_BGR2RGB)
def to_gray(self) -> np.ndarray:
return cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY)
def to_hsv(self) -> np.ndarray:
return cv2.cvtColor(self.image, cv2.COLOR_BGR2HSV)
def to_lab(self) -> np.ndarray:
return cv2.cvtColor(self.image, cv2.COLOR_BGR2LAB)
def to_hls(self) -> np.ndarray:
return cv2.cvtColor(self.image, cv2.COLOR_BGR2HLS)
def to_yuv(self) -> np.ndarray:
return cv2.cvtColor(self.image, cv2.COLOR_BGR2YUV)
def convert(self, color_space: ColorSpace) -> np.ndarray:
return cv2.cvtColor(self.image, color_space.value)
class ColorExtractor:
@staticmethod
def extract_channel(image: np.ndarray, channel: int) -> np.ndarray:
return image[:, :, channel]
@staticmethod
def split_channels(image: np.ndarray) -> List[np.ndarray]:
return list(cv2.split(image))
@staticmethod
def merge_channels(channels: List[np.ndarray]) -> np.ndarray:
return cv2.merge(channels)
@staticmethod
def extract_color_range_hsv(
image: np.ndarray,
lower: Tuple[int, int, int],
upper: Tuple[int, int, int]
) -> Tuple[np.ndarray, np.ndarray]:
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
mask = cv2.inRange(hsv, np.array(lower), np.array(upper))
result = cv2.bitwise_and(image, image, mask=mask)
return mask, result
@staticmethod
def extract_color_range_rgb(
image: np.ndarray,
lower: Tuple[int, int, int],
upper: Tuple[int, int, int]
) -> Tuple[np.ndarray, np.ndarray]:
mask = cv2.inRange(image, np.array(lower), np.array(upper))
result = cv2.bitwise_and(image, image, mask=mask)
return mask, result
class ColorAdjustment:
@staticmethod
def adjust_brightness(image: np.ndarray, value: int) -> np.ndarray:
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
h, s, v = cv2.split(hsv)
v = cv2.add(v, value)
v = np.clip(v, 0, 255).astype(np.uint8)
hsv = cv2.merge([h, s, v])
return cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
@staticmethod
def adjust_contrast(image: np.ndarray, alpha: float) -> np.ndarray:
return cv2.convertScaleAbs(image, alpha=alpha, beta=0)
@staticmethod
def adjust_saturation(image: np.ndarray, value: int) -> np.ndarray:
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
h, s, v = cv2.split(hsv)
s = cv2.add(s, value)
s = np.clip(s, 0, 255).astype(np.uint8)
hsv = cv2.merge([h, s, v])
return cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
@staticmethod
def gamma_correction(image: np.ndarray, gamma: float) -> np.ndarray:
inv_gamma = 1.0 / gamma
table = np.array([
((i / 255.0) ** inv_gamma) * 255
for i in np.arange(0, 256)
]).astype(np.uint8)
return cv2.LUT(image, table)33.2 图像处理
33.2.1 几何变换
python
class GeometricTransform:
def __init__(self, image: np.ndarray):
self.image = image
def resize(
self,
width: Optional[int] = None,
height: Optional[int] = None,
scale: Optional[float] = None,
interpolation: int = cv2.INTER_LINEAR
) -> np.ndarray:
if scale is not None:
width = int(self.image.shape[1] * scale)
height = int(self.image.shape[0] * scale)
elif width is None or height is None:
raise ValueError("Must specify width and height, or scale")
return cv2.resize(self.image, (width, height), interpolation=interpolation)
def rotate(
self,
angle: float,
center: Optional[Tuple[int, int]] = None,
scale: float = 1.0
) -> np.ndarray:
h, w = self.image.shape[:2]
if center is None:
center = (w // 2, h // 2)
matrix = cv2.getRotationMatrix2D(center, angle, scale)
return cv2.warpAffine(self.image, matrix, (w, h))
def flip(self, flip_code: int = 1) -> np.ndarray:
return cv2.flip(self.image, flip_code)
def flip_horizontal(self) -> np.ndarray:
return cv2.flip(self.image, 1)
def flip_vertical(self) -> np.ndarray:
return cv2.flip(self.image, 0)
def translate(self, tx: int, ty: int) -> np.ndarray:
h, w = self.image.shape[:2]
matrix = np.float32([[1, 0, tx], [0, 1, ty]])
return cv2.warpAffine(self.image, matrix, (w, h))
def crop(self, x: int, y: int, width: int, height: int) -> np.ndarray:
return self.image[y:y + height, x:x + width]
def crop_center(self, width: int, height: int) -> np.ndarray:
h, w = self.image.shape[:2]
start_x = (w - width) // 2
start_y = (h - height) // 2
return self.image[start_y:start_y + height, start_x:start_x + width]
def affine_transform(
self,
src_points: List[Tuple[float, float]],
dst_points: List[Tuple[float, float]]
) -> np.ndarray:
src_points = np.float32(src_points)
dst_points = np.float32(dst_points)
matrix = cv2.getAffineTransform(src_points, dst_points)
h, w = self.image.shape[:2]
return cv2.warpAffine(self.image, matrix, (w, h))
def perspective_transform(
self,
src_points: List[Tuple[float, float]],
dst_points: List[Tuple[float, float]]
) -> np.ndarray:
src_points = np.float32(src_points)
dst_points = np.float32(dst_points)
matrix = cv2.getPerspectiveTransform(src_points, dst_points)
h, w = self.image.shape[:2]
return cv2.warpPerspective(self.image, matrix, (w, h))
def rotate_bound(self, angle: float) -> np.ndarray:
h, w = self.image.shape[:2]
center = (w // 2, h // 2)
matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
cos = np.abs(matrix[0, 0])
sin = np.abs(matrix[0, 1])
new_w = int((h * sin) + (w * cos))
new_h = int((h * cos) + (w * sin))
matrix[0, 2] += (new_w / 2) - center[0]
matrix[1, 2] += (new_h / 2) - center[1]
return cv2.warpAffine(self.image, matrix, (new_w, new_h))33.2.2 图像滤波
python
class ImageFilter:
def __init__(self, image: np.ndarray):
self.image = image
def blur(self, ksize: Tuple[int, int] = (5, 5)) -> np.ndarray:
return cv2.blur(self.image, ksize)
def gaussian_blur(self, ksize: Tuple[int, int] = (5, 5), sigma: float = 0) -> np.ndarray:
return cv2.GaussianBlur(self.image, ksize, sigma)
def median_blur(self, ksize: int = 5) -> np.ndarray:
return cv2.medianBlur(self.image, ksize)
def bilateral_filter(
self,
d: int = 9,
sigma_color: float = 75,
sigma_space: float = 75
) -> np.ndarray:
return cv2.bilateralFilter(self.image, d, sigma_color, sigma_space)
def box_filter(self, ksize: Tuple[int, int] = (5, 5)) -> np.ndarray:
return cv2.boxFilter(self.image, -1, ksize)
def custom_kernel(self, kernel: np.ndarray) -> np.ndarray:
return cv2.filter2D(self.image, -1, kernel)
def sharpen(self) -> np.ndarray:
kernel = np.array([
[0, -1, 0],
[-1, 5, -1],
[0, -1, 0]
])
return cv2.filter2D(self.image, -1, kernel)
def emboss(self) -> np.ndarray:
kernel = np.array([
[-2, -1, 0],
[-1, 1, 1],
[0, 1, 2]
])
return cv2.filter2D(self.image, -1, kernel)
def edge_enhance(self) -> np.ndarray:
kernel = np.array([
[-1, -1, -1],
[-1, 9, -1],
[-1, -1, -1]
])
return cv2.filter2D(self.image, -1, kernel)
class EdgeDetection:
def __init__(self, image: np.ndarray):
if len(image.shape) == 3:
self.gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
self.gray = image
self.image = image
def sobel(
self,
dx: int = 1,
dy: int = 1,
ksize: int = 3
) -> np.ndarray:
sobel_x = cv2.Sobel(self.gray, cv2.CV_64F, dx, 0, ksize=ksize)
sobel_y = cv2.Sobel(self.gray, cv2.CV_64F, 0, dy, ksize=ksize)
sobel = np.sqrt(sobel_x ** 2 + sobel_y ** 2)
return np.uint8(sobel / sobel.max() * 255)
def sobel_x(self, ksize: int = 3) -> np.ndarray:
return cv2.Sobel(self.gray, cv2.CV_64F, 1, 0, ksize=ksize)
def sobel_y(self, ksize: int = 3) -> np.ndarray:
return cv2.Sobel(self.gray, cv2.CV_64F, 0, 1, ksize=ksize)
def laplacian(self, ksize: int = 3) -> np.ndarray:
return cv2.Laplacian(self.gray, cv2.CV_64F, ksize=ksize)
def canny(self, threshold1: float = 50, threshold2: float = 150) -> np.ndarray:
return cv2.Canny(self.gray, threshold1, threshold2)
def prewitt(self) -> np.ndarray:
kernel_x = np.array([
[-1, 0, 1],
[-1, 0, 1],
[-1, 0, 1]
])
kernel_y = np.array([
[-1, -1, -1],
[0, 0, 0],
[1, 1, 1]
])
prewitt_x = cv2.filter2D(self.gray, -1, kernel_x)
prewitt_y = cv2.filter2D(self.gray, -1, kernel_y)
return cv2.add(prewitt_x, prewitt_y)
def scharr(self) -> np.ndarray:
scharr_x = cv2.Scharr(self.gray, cv2.CV_64F, 1, 0)
scharr_y = cv2.Scharr(self.gray, cv2.CV_64F, 0, 1)
return np.sqrt(scharr_x ** 2 + scharr_y ** 2).astype(np.uint8)33.2.3 形态学操作
python
class MorphologicalOperations:
def __init__(self, image: np.ndarray):
if len(image.shape) == 3:
self.binary = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, self.binary = cv2.threshold(self.binary, 127, 255, cv2.THRESH_BINARY)
else:
self.binary = image
self.image = image
def _get_kernel(self, shape: int, ksize: Tuple[int, int]) -> np.ndarray:
return cv2.getStructuringElement(shape, ksize)
def erode(
self,
ksize: Tuple[int, int] = (5, 5),
iterations: int = 1,
shape: int = cv2.MORPH_RECT
) -> np.ndarray:
kernel = self._get_kernel(shape, ksize)
return cv2.erode(self.binary, kernel, iterations=iterations)
def dilate(
self,
ksize: Tuple[int, int] = (5, 5),
iterations: int = 1,
shape: int = cv2.MORPH_RECT
) -> np.ndarray:
kernel = self._get_kernel(shape, ksize)
return cv2.dilate(self.binary, kernel, iterations=iterations)
def opening(
self,
ksize: Tuple[int, int] = (5, 5),
iterations: int = 1,
shape: int = cv2.MORPH_RECT
) -> np.ndarray:
kernel = self._get_kernel(shape, ksize)
return cv2.morphologyEx(self.binary, cv2.MORPH_OPEN, kernel, iterations=iterations)
def closing(
self,
ksize: Tuple[int, int] = (5, 5),
iterations: int = 1,
shape: int = cv2.MORPH_RECT
) -> np.ndarray:
kernel = self._get_kernel(shape, ksize)
return cv2.morphologyEx(self.binary, cv2.MORPH_CLOSE, kernel, iterations=iterations)
def gradient(
self,
ksize: Tuple[int, int] = (5, 5),
shape: int = cv2.MORPH_RECT
) -> np.ndarray:
kernel = self._get_kernel(shape, ksize)
return cv2.morphologyEx(self.binary, cv2.MORPH_GRADIENT, kernel)
def tophat(
self,
ksize: Tuple[int, int] = (5, 5),
shape: int = cv2.MORPH_RECT
) -> np.ndarray:
kernel = self._get_kernel(shape, ksize)
return cv2.morphologyEx(self.binary, cv2.MORPH_TOPHAT, kernel)
def blackhat(
self,
ksize: Tuple[int, int] = (5, 5),
shape: int = cv2.MORPH_RECT
) -> np.ndarray:
kernel = self._get_kernel(shape, ksize)
return cv2.morphologyEx(self.binary, cv2.MORPH_BLACKHAT, kernel)
def hit_miss(self, kernel: np.ndarray) -> np.ndarray:
return cv2.morphologyEx(self.binary, cv2.MORPH_HITMISS, kernel)33.3 特征检测
33.3.1 角点检测
python
class CornerDetection:
def __init__(self, image: np.ndarray):
if len(image.shape) == 3:
self.gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
self.gray = image
self.image = image
def harris(
self,
block_size: int = 2,
ksize: int = 3,
k: float = 0.04,
threshold: float = 0.01
) -> Tuple[np.ndarray, List[Tuple[int, int]]]:
harris = cv2.cornerHarris(np.float32(self.gray), block_size, ksize, k)
harris = cv2.dilate(harris, None)
corners = np.where(harris > threshold * harris.max())
corner_points = list(zip(corners[1], corners[0]))
return harris, corner_points
def shi_tomasi(
self,
max_corners: int = 100,
quality_level: float = 0.01,
min_distance: int = 10,
block_size: int = 3
) -> List[Tuple[float, float]]:
corners = cv2.goodFeaturesToTrack(
self.gray,
max_corners,
quality_level,
min_distance,
blockSize=block_size
)
if corners is not None:
return [tuple(corner[0]) for corner in corners]
return []
def fast(
self,
threshold: int = 10,
non_max_suppression: bool = True
) -> List[cv2.KeyPoint]:
fast = cv2.FastFeatureDetector_create(threshold, non_max_suppression)
keypoints = fast.detect(self.gray, None)
return keypoints
def draw_corners(
self,
corners: List[Tuple[int, int]],
color: Tuple[int, int, int] = (0, 255, 0),
radius: int = 3
) -> np.ndarray:
result = self.image.copy()
for x, y in corners:
cv2.circle(result, (int(x), int(y)), radius, color, -1)
return result
def draw_keypoints(
self,
keypoints: List[cv2.KeyPoint],
color: Tuple[int, int, int] = (0, 255, 0)
) -> np.ndarray:
return cv2.drawKeypoints(
self.image,
keypoints,
None,
color,
cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS
)33.3.2 特征描述符
python
class FeatureDescriptor:
def __init__(self, image: np.ndarray):
if len(image.shape) == 3:
self.gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
self.gray = image
self.image = image
def sift_detect_and_compute(self) -> Tuple[List[cv2.KeyPoint], np.ndarray]:
sift = cv2.SIFT_create()
keypoints, descriptors = sift.detectAndCompute(self.gray, None)
return keypoints, descriptors
def surf_detect_and_compute(self, hessian_threshold: float = 400) -> Tuple[List[cv2.KeyPoint], np.ndarray]:
surf = cv2.xfeatures2d.SURF_create(hessian_threshold)
keypoints, descriptors = surf.detectAndCompute(self.gray, None)
return keypoints, descriptors
def orb_detect_and_compute(self, n_features: int = 500) -> Tuple[List[cv2.KeyPoint], np.ndarray]:
orb = cv2.ORB_create(n_features)
keypoints, descriptors = orb.detectAndCompute(self.gray, None)
return keypoints, descriptors
def brisk_detect_and_compute(self) -> Tuple[List[cv2.KeyPoint], np.ndarray]:
brisk = cv2.BRISK_create()
keypoints, descriptors = brisk.detectAndCompute(self.gray, None)
return keypoints, descriptors
def akaze_detect_and_compute(self) -> Tuple[List[cv2.KeyPoint], np.ndarray]:
akaze = cv2.AKAZE_create()
keypoints, descriptors = akaze.detectAndCompute(self.gray, None)
return keypoints, descriptors
class FeatureMatcher:
@staticmethod
def match_bf(
descriptors1: np.ndarray,
descriptors2: np.ndarray,
norm_type: int = cv2.NORM_L2
) -> List[cv2.DMatch]:
bf = cv2.BFMatcher(norm_type)
matches = bf.match(descriptors1, descriptors2)
return sorted(matches, key=lambda x: x.distance)
@staticmethod
def match_knn(
descriptors1: np.ndarray,
descriptors2: np.ndarray,
k: int = 2,
norm_type: int = cv2.NORM_L2
) -> List[List[cv2.DMatch]]:
bf = cv2.BFMatcher(norm_type)
return bf.knnMatch(descriptors1, descriptors2, k=k)
@staticmethod
def match_flann(
descriptors1: np.ndarray,
descriptors2: np.ndarray,
k: int = 2
) -> List[List[cv2.DMatch]]:
if descriptors1.dtype == np.uint8:
index_params = dict(algorithm=6, table_number=6, key_size=12, multi_probe_level=1)
else:
index_params = dict(algorithm=0, trees=5)
search_params = dict(checks=50)
flann = cv2.FlannBasedMatcher(index_params, search_params)
return flann.knnMatch(descriptors1, descriptors2, k=k)
@staticmethod
def ratio_test(matches: List[List[cv2.DMatch]], ratio: float = 0.75) -> List[cv2.DMatch]:
good = []
for m, n in matches:
if m.distance < ratio * n.distance:
good.append(m)
return good
@staticmethod
def draw_matches(
image1: np.ndarray,
keypoints1: List[cv2.KeyPoint],
image2: np.ndarray,
keypoints2: List[cv2.KeyPoint],
matches: List[cv2.DMatch]
) -> np.ndarray:
return cv2.drawMatches(
image1, keypoints1,
image2, keypoints2,
matches, None,
flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS
)33.3.3 轮廓检测
python
class ContourDetection:
def __init__(self, image: np.ndarray):
if len(image.shape) == 3:
self.gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
self.gray = image
self.image = image
def find_contours(
self,
mode: int = cv2.RETR_EXTERNAL,
method: int = cv2.CHAIN_APPROX_SIMPLE
) -> Tuple[np.ndarray, List[np.ndarray], np.ndarray]:
_, binary = cv2.threshold(self.gray, 127, 255, cv2.THRESH_BINARY)
contours, hierarchy = cv2.findContours(binary, mode, method)
return binary, contours, hierarchy
def find_contours_canny(
self,
threshold1: float = 50,
threshold2: float = 150,
mode: int = cv2.RETR_EXTERNAL,
method: int = cv2.CHAIN_APPROX_SIMPLE
) -> Tuple[np.ndarray, List[np.ndarray], np.ndarray]:
edges = cv2.Canny(self.gray, threshold1, threshold2)
contours, hierarchy = cv2.findContours(edges, mode, method)
return edges, contours, hierarchy
def draw_contours(
self,
contours: List[np.ndarray],
color: Tuple[int, int, int] = (0, 255, 0),
thickness: int = 2
) -> np.ndarray:
result = self.image.copy()
cv2.drawContours(result, contours, -1, color, thickness)
return result
@staticmethod
def get_contour_area(contour: np.ndarray) -> float:
return cv2.contourArea(contour)
@staticmethod
def get_contour_perimeter(contour: np.ndarray, closed: bool = True) -> float:
return cv2.arcLength(contour, closed)
@staticmethod
def approximate_contour(contour: np.ndarray, epsilon: float = 0.02) -> np.ndarray:
peri = cv2.arcLength(contour, True)
return cv2.approxPolyDP(contour, epsilon * peri, True)
@staticmethod
def get_bounding_rect(contour: np.ndarray) -> Tuple[int, int, int, int]:
return cv2.boundingRect(contour)
@staticmethod
def get_min_area_rect(contour: np.ndarray) -> cv2.RotatedRect:
return cv2.minAreaRect(contour)
@staticmethod
def get_min_enclosing_circle(contour: np.ndarray) -> Tuple[Tuple[float, float], float]:
return cv2.minEnclosingCircle(contour)
@staticmethod
def get_convex_hull(contour: np.ndarray) -> np.ndarray:
return cv2.convexHull(contour)
@staticmethod
def is_contour_convex(contour: np.ndarray) -> bool:
return cv2.isContourConvex(contour)
@staticmethod
def get_moments(contour: np.ndarray) -> cv2.Moments:
return cv2.moments(contour)
@staticmethod
def get_centroid(moments: cv2.Moments) -> Tuple[float, float]:
if moments["m00"] != 0:
cx = moments["m10"] / moments["m00"]
cy = moments["m01"] / moments["m00"]
return (cx, cy)
return (0, 0)33.4 图像分割
33.4.1 阈值分割
python
class ThresholdSegmentation:
def __init__(self, image: np.ndarray):
if len(image.shape) == 3:
self.gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
self.gray = image
self.image = image
def binary_threshold(
self,
threshold: int = 127,
max_value: int = 255,
type_: int = cv2.THRESH_BINARY
) -> Tuple[float, np.ndarray]:
return cv2.threshold(self.gray, threshold, max_value, type_)
def otsu_threshold(self, max_value: int = 255) -> Tuple[float, np.ndarray]:
return cv2.threshold(
self.gray, 0, max_value,
cv2.THRESH_BINARY + cv2.THRESH_OTSU
)
def adaptive_threshold_mean(
self,
block_size: int = 11,
c: int = 2
) -> np.ndarray:
return cv2.adaptiveThreshold(
self.gray, 255,
cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY,
block_size, c
)
def adaptive_threshold_gaussian(
self,
block_size: int = 11,
c: int = 2
) -> np.ndarray:
return cv2.adaptiveThreshold(
self.gray, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY,
block_size, c
)
def triangle_threshold(self, max_value: int = 255) -> Tuple[float, np.ndarray]:
return cv2.threshold(
self.gray, 0, max_value,
cv2.THRESH_BINARY + cv2.THRESH_TRIANGLE
)
def multi_threshold(
self,
thresholds: List[int],
max_value: int = 255
) -> np.ndarray:
result = np.zeros_like(self.gray)
for i, threshold in enumerate(sorted(thresholds)):
if i == 0:
result[self.gray < threshold] = 0
else:
result[(self.gray >= thresholds[i - 1]) & (self.gray < threshold)] = int(max_value * i / len(thresholds))
result[self.gray >= thresholds[-1]] = max_value
return result
class ColorSegmentation:
def __init__(self, image: np.ndarray):
self.image = image
def segment_by_color_hsv(
self,
lower: Tuple[int, int, int],
upper: Tuple[int, int, int]
) -> Tuple[np.ndarray, np.ndarray]:
hsv = cv2.cvtColor(self.image, cv2.COLOR_BGR2HSV)
mask = cv2.inRange(hsv, np.array(lower), np.array(upper))
result = cv2.bitwise_and(self.image, self.image, mask=mask)
return mask, result
def segment_by_color_rgb(
self,
lower: Tuple[int, int, int],
upper: Tuple[int, int, int]
) -> Tuple[np.ndarray, np.ndarray]:
mask = cv2.inRange(self.image, np.array(lower), np.array(upper))
result = cv2.bitwise_and(self.image, self.image, mask=mask)
return mask, result
def segment_skin_color(self) -> Tuple[np.ndarray, np.ndarray]:
hsv = cv2.cvtColor(self.image, cv2.COLOR_BGR2HSV)
lower = np.array([0, 20, 70])
upper = np.array([20, 255, 255])
mask = cv2.inRange(hsv, lower, upper)
result = cv2.bitwise_and(self.image, self.image, mask=mask)
return mask, result
def segment_green(self) -> Tuple[np.ndarray, np.ndarray]:
hsv = cv2.cvtColor(self.image, cv2.COLOR_BGR2HSV)
lower = np.array([35, 43, 46])
upper = np.array([77, 255, 255])
mask = cv2.inRange(hsv, lower, upper)
result = cv2.bitwise_and(self.image, self.image, mask=mask)
return mask, result
def segment_red(self) -> Tuple[np.ndarray, np.ndarray]:
hsv = cv2.cvtColor(self.image, cv2.COLOR_BGR2HSV)
lower1 = np.array([0, 43, 46])
upper1 = np.array([10, 255, 255])
lower2 = np.array([156, 43, 46])
upper2 = np.array([180, 255, 255])
mask1 = cv2.inRange(hsv, lower1, upper1)
mask2 = cv2.inRange(hsv, lower2, upper2)
mask = cv2.bitwise_or(mask1, mask2)
result = cv2.bitwise_and(self.image, self.image, mask=mask)
return mask, result33.4.2 高级分割
python
class AdvancedSegmentation:
def __init__(self, image: np.ndarray):
self.image = image
if len(image.shape) == 3:
self.gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
self.gray = image
def watershed(self, markers: np.ndarray) -> np.ndarray:
if len(self.image.shape) == 2:
image = cv2.cvtColor(self.image, cv2.COLOR_GRAY2BGR)
else:
image = self.image
markers = cv2.watershed(image, markers)
return markers
def grabcut(
self,
rect: Tuple[int, int, int, int],
iter_count: int = 5
) -> Tuple[np.ndarray, np.ndarray]:
mask = np.zeros(self.image.shape[:2], np.uint8)
bgd_model = np.zeros((1, 65), np.float64)
fgd_model = np.zeros((1, 65), np.float64)
cv2.grabCut(
self.image, mask, rect,
bgd_model, fgd_model,
iter_count, cv2.GC_INIT_WITH_RECT
)
mask2 = np.where((mask == 2) | (mask == 0), 0, 1).astype(np.uint8)
result = self.image * mask2[:, :, np.newaxis]
return mask2, result
def kmeans_segmentation(
self,
k: int = 3,
attempts: int = 10
) -> np.ndarray:
pixel_values = self.image.reshape((-1, 3))
pixel_values = np.float32(pixel_values)
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.2)
_, labels, centers = cv2.kmeans(
pixel_values, k, None,
criteria, attempts,
cv2.KMEANS_RANDOM_CENTERS
)
centers = np.uint8(centers)
segmented = centers[labels.flatten()]
return segmented.reshape(self.image.shape)
def mean_shift_segmentation(
self,
spatial_radius: float = 20,
color_radius: float = 30,
min_size: int = 20
) -> np.ndarray:
return cv2.pyrMeanShiftFiltering(
self.image,
spatial_radius,
color_radius,
None,
min_size
)
def connected_components(
self,
connectivity: int = 8
) -> Tuple[int, np.ndarray]:
_, binary = cv2.threshold(self.gray, 127, 255, cv2.THRESH_BINARY)
num_labels, labels = cv2.connectedComponents(binary, connectivity=connectivity)
return num_labels, labels33.5 目标检测
33.5.1 模板匹配
python
class TemplateMatching:
def __init__(self, image: np.ndarray):
self.image = image
if len(image.shape) == 3:
self.gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
self.gray = image
def match_template(
self,
template: np.ndarray,
method: int = cv2.TM_CCOEFF_NORMED
) -> Tuple[float, Tuple[int, int], Tuple[int, int]]:
if len(template.shape) == 3:
template_gray = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
else:
template_gray = template
result = cv2.matchTemplate(self.gray, template_gray, method)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
if method in [cv2.TM_SQDIFF, cv2.TM_SQDIFF_NORMED]:
top_left = min_loc
match_value = min_val
else:
top_left = max_loc
match_value = max_val
h, w = template_gray.shape
bottom_right = (top_left[0] + w, top_left[1] + h)
return match_value, top_left, bottom_right
def match_multiple(
self,
template: np.ndarray,
threshold: float = 0.8,
method: int = cv2.TM_CCOEFF_NORMED
) -> List[Tuple[Tuple[int, int], Tuple[int, int], float]]:
if len(template.shape) == 3:
template_gray = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
else:
template_gray = template
result = cv2.matchTemplate(self.gray, template_gray, method)
locations = np.where(result >= threshold)
h, w = template_gray.shape
matches = []
for pt in zip(*locations[::-1]):
top_left = pt
bottom_right = (pt[0] + w, pt[1] + h)
match_value = result[pt[1], pt[0]]
matches.append((top_left, bottom_right, float(match_value)))
return matches
def draw_match(
self,
top_left: Tuple[int, int],
bottom_right: Tuple[int, int],
color: Tuple[int, int, int] = (0, 255, 0),
thickness: int = 2
) -> np.ndarray:
result = self.image.copy()
cv2.rectangle(result, top_left, bottom_right, color, thickness)
return result
def non_max_suppression(
self,
boxes: List[Tuple[Tuple[int, int], Tuple[int, int], float]],
overlap_thresh: float = 0.3
) -> List[Tuple[Tuple[int, int], Tuple[int, int], float]]:
if len(boxes) == 0:
return []
boxes = sorted(boxes, key=lambda x: x[2], reverse=True)
keep = []
while boxes:
current = boxes.pop(0)
keep.append(current)
boxes = [
box for box in boxes
if self._iou(current[0], current[1], box[0], box[1]) < overlap_thresh
]
return keep
@staticmethod
def _iou(
box1_tl: Tuple[int, int],
box1_br: Tuple[int, int],
box2_tl: Tuple[int, int],
box2_br: Tuple[int, int]
) -> float:
x1 = max(box1_tl[0], box2_tl[0])
y1 = max(box1_tl[1], box2_tl[1])
x2 = min(box1_br[0], box2_br[0])
y2 = min(box1_br[1], box2_br[1])
if x2 < x1 or y2 < y1:
return 0.0
intersection = (x2 - x1) * (y2 - y1)
area1 = (box1_br[0] - box1_tl[0]) * (box1_br[1] - box1_tl[1])
area2 = (box2_br[0] - box2_tl[0]) * (box2_br[1] - box2_tl[1])
return intersection / (area1 + area2 - intersection)33.5.2 级联分类器
python
class CascadeClassifier:
def __init__(self, cascade_path: str):
self.cascade = cv2.CascadeClassifier(cascade_path)
def detect(
self,
image: np.ndarray,
scale_factor: float = 1.1,
min_neighbors: int = 3,
min_size: Tuple[int, int] = (30, 30),
max_size: Optional[Tuple[int, int]] = None
) -> List[Tuple[int, int, int, int]]:
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image
objects = self.cascade.detectMultiScale(
gray,
scaleFactor=scale_factor,
minNeighbors=min_neighbors,
minSize=min_size,
maxSize=max_size
)
return [tuple(obj) for obj in objects]
def draw_detections(
self,
image: np.ndarray,
detections: List[Tuple[int, int, int, int]],
color: Tuple[int, int, int] = (0, 255, 0),
thickness: int = 2
) -> np.ndarray:
result = image.copy()
for x, y, w, h in detections:
cv2.rectangle(result, (x, y), (x + w, y + h), color, thickness)
return result
class FaceDetector:
def __init__(self):
self.face_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
)
self.eye_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + "haarcascade_eye.xml"
)
self.smile_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + "haarcascade_smile.xml"
)
def detect_faces(
self,
image: np.ndarray,
scale_factor: float = 1.1,
min_neighbors: int = 5,
min_size: Tuple[int, int] = (30, 30)
) -> List[Tuple[int, int, int, int]]:
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image
faces = self.face_cascade.detectMultiScale(
gray,
scaleFactor=scale_factor,
minNeighbors=min_neighbors,
minSize=min_size
)
return [tuple(face) for face in faces]
def detect_eyes(
self,
image: np.ndarray,
face_region: Tuple[int, int, int, int]
) -> List[Tuple[int, int, int, int]]:
x, y, w, h = face_region
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image
roi_gray = gray[y:y + h, x:x + w]
eyes = self.eye_cascade.detectMultiScale(roi_gray)
return [(x + ex, y + ey, ew, eh) for ex, ey, ew, eh in eyes]
def detect_smile(
self,
image: np.ndarray,
face_region: Tuple[int, int, int, int]
) -> List[Tuple[int, int, int, int]]:
x, y, w, h = face_region
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image
roi_gray = gray[y:y + h, x:x + w]
smiles = self.smile_cascade.detectMultiScale(
roi_gray,
scaleFactor=1.7,
minNeighbors=22,
minSize=(25, 25)
)
return [(x + sx, y + sy, sw, sh) for sx, sy, sw, sh in smiles]
def draw_face_with_features(
self,
image: np.ndarray,
face_color: Tuple[int, int, int] = (255, 0, 0),
eye_color: Tuple[int, int, int] = (0, 255, 0),
smile_color: Tuple[int, int, int] = (0, 0, 255)
) -> np.ndarray:
result = image.copy()
faces = self.detect_faces(image)
for face in faces:
x, y, w, h = face
cv2.rectangle(result, (x, y), (x + w, y + h), face_color, 2)
eyes = self.detect_eyes(image, face)
for ex, ey, ew, eh in eyes:
cv2.rectangle(result, (ex, ey), (ex + ew, ey + eh), eye_color, 2)
smiles = self.detect_smile(image, face)
for sx, sy, sw, sh in smiles:
cv2.rectangle(result, (sx, sy), (sx + sw, sy + sh), smile_color, 2)
return result33.6 视频处理
33.6.1 视频读写
python
class VideoProcessor:
def __init__(self, source: Union[str, int] = 0):
self.cap = cv2.VideoCapture(source)
self.fps = self.cap.get(cv2.CAP_PROP_FPS)
self.width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
self.height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
self.frame_count = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
def read_frame(self) -> Tuple[bool, Optional[np.ndarray]]:
return self.cap.read()
def read_all_frames(self) -> List[np.ndarray]:
frames = []
while True:
ret, frame = self.cap.read()
if not ret:
break
frames.append(frame)
return frames
def process_video(
self,
process_func: callable,
output_path: Optional[str] = None,
codec: str = "mp4v"
) -> Optional[List[np.ndarray]]:
results = []
if output_path:
fourcc = cv2.VideoWriter_fourcc(*codec)
out = cv2.VideoWriter(output_path, fourcc, self.fps, (self.width, self.height))
while True:
ret, frame = self.cap.read()
if not ret:
break
processed = process_func(frame)
results.append(processed)
if output_path:
out.write(processed)
if output_path:
out.release()
return results
def release(self) -> None:
self.cap.release()
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.release()
class VideoWriter:
def __init__(
self,
output_path: str,
fps: float,
width: int,
height: int,
codec: str = "mp4v"
):
fourcc = cv2.VideoWriter_fourcc(*codec)
self.writer = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
def write(self, frame: np.ndarray) -> None:
self.writer.write(frame)
def write_frames(self, frames: List[np.ndarray]) -> None:
for frame in frames:
self.writer.write(frame)
def release(self) -> None:
self.writer.release()
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.release()33.6.2 物体追踪
python
class ObjectTracker:
def __init__(self, tracker_type: str = "CSRT"):
tracker_types = {
"BOOSTING": cv2.legacy.TrackerBoosting_create,
"MIL": cv2.legacy.TrackerMIL_create,
"KCF": cv2.legacy.TrackerKCF_create,
"TLD": cv2.legacy.TrackerTLD_create,
"MEDIANFLOW": cv2.legacy.TrackerMedianFlow_create,
"CSRT": cv2.legacy.TrackerCSRT_create,
"MOSSE": cv2.legacy.TrackerMOSSE_create
}
if tracker_type.upper() in tracker_types:
self.tracker = tracker_types[tracker_type.upper()]()
else:
raise ValueError(f"Unknown tracker type: {tracker_type}")
self.initialized = False
def init(self, frame: np.ndarray, bbox: Tuple[int, int, int, int]) -> None:
self.tracker.init(frame, bbox)
self.initialized = True
def update(self, frame: np.ndarray) -> Tuple[bool, Tuple[int, int, int, int]]:
if not self.initialized:
raise RuntimeError("Tracker not initialized. Call init() first.")
return self.tracker.update(frame)
def draw_tracking(
self,
frame: np.ndarray,
bbox: Tuple[int, int, int, int],
color: Tuple[int, int, int] = (0, 255, 0),
thickness: int = 2
) -> np.ndarray:
result = frame.copy()
x, y, w, h = [int(v) for v in bbox]
cv2.rectangle(result, (x, y), (x + w, y + h), color, thickness)
return result
class MultiObjectTracker:
def __init__(self, tracker_type: str = "CSRT"):
self.tracker_type = tracker_type
self.trackers: Dict[int, cv2.Tracker] = {}
self.next_id = 0
def add(self, frame: np.ndarray, bbox: Tuple[int, int, int, int]) -> int:
tracker_types = {
"CSRT": cv2.legacy.TrackerCSRT_create,
"KCF": cv2.legacy.TrackerKCF_create,
"MOSSE": cv2.legacy.TrackerMOSSE_create
}
tracker = tracker_types.get(
self.tracker_type.upper(),
cv2.legacy.TrackerCSRT_create
)()
tracker.init(frame, bbox)
track_id = self.next_id
self.trackers[track_id] = tracker
self.next_id += 1
return track_id
def remove(self, track_id: int) -> None:
if track_id in self.trackers:
del self.trackers[track_id]
def update(self, frame: np.ndarray) -> Dict[int, Tuple[bool, Tuple[int, int, int, int]]]:
results = {}
for track_id, tracker in self.trackers.items():
success, bbox = tracker.update(frame)
results[track_id] = (success, tuple(int(v) for v in bbox))
return results33.7 知识图谱
33.7.1 计算机视觉技术体系
计算机视觉技术层次
┌─────────────────────────────────────────────────────────────┐
│ 应用层 │
│ 人脸识别、目标检测、图像分割、OCR、姿态估计 │
└─────────────────────────────────────────────────────────────┘
│
▼
┌─────────────────────────────────────────────────────────────┐
│ 算法层 │
│ 特征提取、深度学习、传统算法、图像处理 │
└─────────────────────────────────────────────────────────────┘
│
▼
┌─────────────────────────────────────────────────────────────┐
│ 工具层 │
│ OpenCV、PIL、scikit-image、PyTorch、TensorFlow │
└─────────────────────────────────────────────────────────────┘
OpenCV核心模块:
┌─────────────────────────────────────────┐
│ cv2.imread/imwrite 图像读写 │
│ cv2.cvtColor 颜色空间转换 │
│ cv2.resize 图像缩放 │
│ cv2.GaussianBlur 高斯模糊 │
│ cv2.Canny 边缘检测 │
│ cv2.findContours 轮廓检测 │
│ cv2.VideoCapture 视频捕获 │
└─────────────────────────────────────────┘33.7.2 图像处理流程
图像处理标准流程
┌─────────────────────────────────────────┐
│ 1. 图像获取 读取图像/视频 │
│ 2. 预处理 缩放、去噪、归一化 │
│ 3. 特征提取 边缘、角点、纹理 │
│ 4. 处理分析 分割、检测、识别 │
│ 5. 结果输出 标注、保存、展示 │
└─────────────────────────────────────────┘33.8 技术选型指南
33.8.1 图像处理库选型
| 场景 | 推荐库 | 原因 |
|---|---|---|
| 传统图像处理 | OpenCV | 功能全面 |
| 简单图像操作 | PIL/Pillow | 易用性好 |
| 科学图像分析 | scikit-image | 算法丰富 |
| 深度学习视觉 | PyTorch/TensorFlow | GPU加速 |
33.8.2 特征提取方法选型
| 场景 | 推荐方法 | 说明 |
|---|---|---|
| 边缘检测 | Canny | 效果稳定 |
| 角点检测 | Harris/SIFT | 尺度不变 |
| 特征匹配 | ORB | 速度快 |
| 深度特征 | CNN | 表达能力强 |
33.9 常见问题与解决方案
33.9.1 图像读取问题
python
# 问题:中文路径读取失败
# 解决方案:使用numpy读取
import cv2
import numpy as np
def imread_chinese(path):
return cv2.imdecode(np.fromfile(path, dtype=np.uint8), -1)33.9.2 颜色空间转换
python
# 问题:BGR与RGB混淆
# 解决方案:明确转换
import cv2
# OpenCV默认BGR
img = cv2.imread('image.jpg')
rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)33.9.3 视频处理内存问题
python
# 问题:处理大视频内存溢出
# 解决方案:逐帧处理
cap = cv2.VideoCapture('video.mp4')
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
# 处理单帧
process_frame(frame)
cap.release()33.10 本章小结
本章详细介绍了Python计算机视觉的核心概念和实践:
- 图像基础:图像表示、色彩空间、图像读写
- 图像处理:几何变换、滤波、形态学操作
- 特征检测:角点检测、特征描述符、轮廓检测
- 图像分割:阈值分割、颜色分割、高级分割算法
- 目标检测:模板匹配、级联分类器、人脸检测
- 视频处理:视频读写、物体追踪
- 应用实例:完整的视觉处理流程
练习题
- 实现一个图像滤镜系统,支持模糊、锐化、边缘检测等效果
- 开发一个文档扫描应用,自动检测文档边缘并进行透视变换
- 实现一个车牌识别系统,包含车牌定位和字符识别
- 开发一个实时人脸检测应用,支持多人脸检测和标记
- 实现一个运动检测系统,检测视频中的移动物体