OpenCV进阶笔记
1. 特征检测与描述
角点检测
Harris角点检测
import cv2
import numpy as np
import matplotlib.pyplot as plt
def harris_corner_detection(img, block_size=2, ksize=3, k=0.04):
"""Harris角点检测"""
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = np.float32(gray)
# Harris角点检测
dst = cv2.cornerHarris(gray, block_size, ksize, k)
# 膨胀操作突出角点
dst = cv2.dilate(dst, None)
# 标记角点
result = img.copy()
result[dst > 0.01 * dst.max()] = [0, 0, 255] # 红色标记角点
return result, dst
# 使用示例
# result, corners = harris_corner_detection(img)
Shi-Tomasi角点检测
def goodFeaturesToTrack_detection(img, max_corners=100, quality_level=0.01, min_distance=10):
"""Shi-Tomasi角点检测(goodFeaturesToTrack)"""
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 检测角点
corners = cv2.goodFeaturesToTrack(gray, max_corners, quality_level, min_distance)
# 绘制角点
result = img.copy()
if corners is not None:
corners = np.int0(corners)
for corner in corners:
x, y = corner.ravel()
cv2.circle(result, (x, y), 3, (0, 255, 0), -1)
return result, corners
# 使用示例
# result, corners = goodFeaturesToTrack_detection(img)
SIFT特征检测
def sift_feature_detection(img):
"""SIFT特征检测与描述"""
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 创建SIFT检测器
sift = cv2.SIFT_create()
# 检测关键点和描述符
keypoints, descriptors = sift.detectAndCompute(gray, None)
# 绘制关键点
result = cv2.drawKeypoints(img, keypoints, None,
flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
return result, keypoints, descriptors
def sift_matching(img1, img2):
"""SIFT特征匹配"""
gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
gray2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
# 创建SIFT检测器
sift = cv2.SIFT_create()
# 检测特征点
kp1, des1 = sift.detectAndCompute(gray1, None)
kp2, des2 = sift.detectAndCompute(gray2, None)
# 特征匹配
bf = cv2.BFMatcher()
matches = bf.knnMatch(des1, des2, k=2)
# 应用Lowe's ratio test
good_matches = []
for m, n in matches:
if m.distance < 0.75 * n.distance:
good_matches.append([m])
# 绘制匹配结果
result = cv2.drawMatchesKnn(img1, kp1, img2, kp2, good_matches, None,
flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
return result, good_matches
# 使用示例
# result, kp, des = sift_feature_detection(img)
# match_result, matches = sift_matching(img1, img2)
ORB特征检测
def orb_feature_detection(img, n_features=500):
"""ORB特征检测(更快的SIFT替代方案)"""
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 创建ORB检测器
orb = cv2.ORB_create(nfeatures=n_features)
# 检测关键点和描述符
keypoints, descriptors = orb.detectAndCompute(gray, None)
# 绘制关键点
result = cv2.drawKeypoints(img, keypoints, None, color=(0, 255, 0))
return result, keypoints, descriptors
def orb_matching(img1, img2):
"""ORB特征匹配"""
gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
gray2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
# 创建ORB检测器
orb = cv2.ORB_create()
# 检测特征点
kp1, des1 = orb.detectAndCompute(gray1, None)
kp2, des2 = orb.detectAndCompute(gray2, None)
# 特征匹配
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
matches = bf.match(des1, des2)
# 按距离排序
matches = sorted(matches, key=lambda x: x.distance)
# 绘制前50个最佳匹配
result = cv2.drawMatches(img1, kp1, img2, kp2, matches[:50], None,
flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
return result, matches
# 使用示例
# result, kp, des = orb_feature_detection(img)
# match_result, matches = orb_matching(img1, img2)
2. 图像分割
分水岭算法
def watershed_segmentation(img):
"""分水岭算法图像分割"""
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 去噪
ret, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
# 形态学操作去除噪声
kernel = np.ones((3, 3), np.uint8)
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)
# 确定背景区域
sure_bg = cv2.dilate(opening, kernel, iterations=3)
# 确定前景区域
dist_transform = cv2.distanceTransform(opening, cv2.DIST_L2, 5)
ret, sure_fg = cv2.threshold(dist_transform, 0.7 * dist_transform.max(), 255, 0)
# 确定未知区域
sure_fg = np.uint8(sure_fg)
unknown = cv2.subtract(sure_bg, sure_fg)
# 标记连通组件
ret, markers = cv2.connectedComponents(sure_fg)
# 为所有标记加1,确保背景不是0而是1
markers = markers + 1
# 将未知区域标记为0
markers[unknown == 255] = 0
# 应用分水岭算法
markers = cv2.watershed(img, markers)
# 创建结果图像
result = img.copy()
result[markers == -1] = [255, 0, 0] # 边界标记为红色
return result, markers
# 使用示例
# result, markers = watershed_segmentation(img)
GrabCut算法
def grabcut_segmentation(img, rect=None):
"""GrabCut前景提取"""
if rect is None:
# 如果没有提供矩形,使用图像中心区域
height, width = img.shape[:2]
rect = (width//4, height//4, width//2, height//2)
# 初始化掩膜
mask = np.zeros(img.shape[:2], np.uint8)
# 前景和背景模型
bgdModel = np.zeros((1, 65), np.float64)
fgdModel = np.zeros((1, 65), np.float64)
# 应用GrabCut算法
cv2.grabCut(img, mask, rect, bgdModel, fgdModel, 5, cv2.GC_INIT_WITH_RECT)
# 修改掩膜
mask2 = np.where((mask == 2) | (mask == 0), 0, 1).astype('uint8')
# 应用掩膜
result = img * mask2[:, :, np.newaxis]
return result, mask2
def interactive_grabcut(img):
"""交互式GrabCut分割"""
mask = np.zeros(img.shape[:2], np.uint8)
bgdModel = np.zeros((1, 65), np.float64)
fgdModel = np.zeros((1, 65), np.float64)
# 这里可以添加交互式选择矩形的代码
# 为简化,使用预定义矩形
height, width = img.shape[:2]
rect = (50, 50, width-100, height-100)
# 初始化GrabCut
cv2.grabCut(img, mask, rect, bgdModel, fgdModel, 5, cv2.GC_INIT_WITH_RECT)
# 创建最终掩膜
mask2 = np.where((mask == 2) | (mask == 0), 0, 1).astype('uint8')
result = img * mask2[:, :, np.newaxis]
return result, mask2
# 使用示例
# result, mask = grabcut_segmentation(img)
K-means聚类分割
def kmeans_segmentation(img, k=3):
"""K-means聚类图像分割"""
# 重塑图像数据
data = img.reshape((-1, 3))
data = np.float32(data)
# 定义终止条件
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.2)
# 应用K-means
ret, labels, centers = cv2.kmeans(data, k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)
# 转换回uint8并重塑为原始图像形状
centers = np.uint8(centers)
segmented_data = centers[labels.flatten()]
segmented_image = segmented_data.reshape(img.shape)
return segmented_image, labels.reshape(img.shape[:2])
def adaptive_kmeans_segmentation(img, max_k=8):
"""自适应K值的K-means分割"""
best_k = 2
best_score = float('inf')
for k in range(2, max_k + 1):
data = img.reshape((-1, 3))
data = np.float32(data)
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.2)
ret, labels, centers = cv2.kmeans(data, k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)
# 使用紧密度作为评价指标
if ret < best_score:
best_score = ret
best_k = k
# 使用最佳k值进行最终分割
segmented_image, labels = kmeans_segmentation(img, best_k)
return segmented_image, labels, best_k
# 使用示例
# segmented, labels = kmeans_segmentation(img, k=4)
# adaptive_segmented, adaptive_labels, best_k = adaptive_kmeans_segmentation(img)
3. 轮廓检测与分析
轮廓检测
def contour_detection_analysis(img):
"""轮廓检测与分析"""
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 二值化
ret, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
# 查找轮廓
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# 创建结果图像
result = img.copy()
# 分析每个轮廓
for i, contour in enumerate(contours):
# 计算轮廓面积
area = cv2.contourArea(contour)
# 过滤小轮廓
if area < 100:
continue
# 计算轮廓周长
perimeter = cv2.arcLength(contour, True)
# 轮廓近似
epsilon = 0.02 * cv2.arcLength(contour, True)
approx = cv2.approxPolyDP(contour, epsilon, True)
# 计算边界矩形
x, y, w, h = cv2.boundingRect(contour)
# 计算最小外接圆
(cx, cy), radius = cv2.minEnclosingCircle(contour)
center = (int(cx), int(cy))
radius = int(radius)
# 绘制轮廓
cv2.drawContours(result, [contour], -1, (0, 255, 0), 2)
# 绘制边界矩形
cv2.rectangle(result, (x, y), (x + w, y + h), (255, 0, 0), 2)
# 绘制最小外接圆
cv2.circle(result, center, radius, (0, 0, 255), 2)
# 添加文本信息
cv2.putText(result, f'Area: {area:.0f}', (x, y-10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
return result, contours
def shape_detection(img):
"""形状检测"""
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
result = img.copy()
for contour in contours:
# 轮廓近似
epsilon = 0.02 * cv2.arcLength(contour, True)
approx = cv2.approxPolyDP(contour, epsilon, True)
# 计算边界矩形
x, y, w, h = cv2.boundingRect(approx)
# 根据顶点数量判断形状
if len(approx) == 3:
shape = "Triangle"
elif len(approx) == 4:
# 检查是否为正方形或矩形
aspect_ratio = w / float(h)
if 0.95 <= aspect_ratio <= 1.05:
shape = "Square"
else:
shape = "Rectangle"
elif len(approx) == 5:
shape = "Pentagon"
else:
shape = "Circle"
# 绘制轮廓和标签
cv2.drawContours(result, [contour], -1, (0, 255, 0), 2)
cv2.putText(result, shape, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX,
0.7, (255, 0, 0), 2)
return result
# 使用示例
# result, contours = contour_detection_analysis(img)
# shape_result = shape_detection(img)
轮廓特征
def contour_features(contour):
"""计算轮廓的各种特征"""
features = {}
# 基本特征
features['area'] = cv2.contourArea(contour)
features['perimeter'] = cv2.arcLength(contour, True)
# 边界矩形
x, y, w, h = cv2.boundingRect(contour)
features['bounding_rect'] = (x, y, w, h)
features['aspect_ratio'] = float(w) / h
features['extent'] = float(features['area']) / (w * h)
# 最小外接矩形
rect = cv2.minAreaRect(contour)
features['min_area_rect'] = rect
# 最小外接圆
(cx, cy), radius = cv2.minEnclosingCircle(contour)
features['min_enclosing_circle'] = ((cx, cy), radius)
# 椭圆拟合
if len(contour) >= 5:
ellipse = cv2.fitEllipse(contour)
features['fitted_ellipse'] = ellipse
# 凸包
hull = cv2.convexHull(contour)
features['convex_hull'] = hull
features['hull_area'] = cv2.contourArea(hull)
features['solidity'] = float(features['area']) / features['hull_area']
# 等效直径
features['equivalent_diameter'] = np.sqrt(4 * features['area'] / np.pi)
# 中心矩
M = cv2.moments(contour)
if M['m00'] != 0:
features['centroid'] = (int(M['m10']/M['m00']), int(M['m01']/M['m00']))
else:
features['centroid'] = (0, 0)
return features
def analyze_contour_features(img):
"""分析图像中所有轮廓的特征"""
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
result = img.copy()
all_features = []
for i, contour in enumerate(contours):
if cv2.contourArea(contour) < 100: # 过滤小轮廓
continue
features = contour_features(contour)
all_features.append(features)
# 绘制轮廓和特征信息
cv2.drawContours(result, [contour], -1, (0, 255, 0), 2)
# 绘制质心
cv2.circle(result, features['centroid'], 5, (255, 0, 0), -1)
# 绘制边界矩形
x, y, w, h = features['bounding_rect']
cv2.rectangle(result, (x, y), (x+w, y+h), (255, 0, 0), 2)
# 添加特征信息
text = f"Area: {features['area']:.0f}"
cv2.putText(result, text, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX,
0.5, (255, 255, 255), 1)
return result, all_features
# 使用示例
# result, features_list = analyze_contour_features(img)
4. 模板匹配
基本模板匹配
def template_matching(img, template, threshold=0.8):
"""模板匹配"""
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
template_gray = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
# 获取模板尺寸
w, h = template_gray.shape[::-1]
# 模板匹配
res = cv2.matchTemplate(gray, template_gray, cv2.TM_CCOEFF_NORMED)
# 找到匹配位置
locations = np.where(res >= threshold)
result = img.copy()
# 绘制匹配矩形
for pt in zip(*locations[::-1]):
cv2.rectangle(result, pt, (pt[0] + w, pt[1] + h), (0, 255, 0), 2)
return result, res, locations
def multi_scale_template_matching(img, template, scale_range=(0.5, 2.0), scale_step=0.1):
"""多尺度模板匹配"""
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
template_gray = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
best_match = None
best_val = -1
best_loc = None
best_scale = 1.0
# 遍历不同尺度
scale = scale_range[0]
while scale <= scale_range[1]:
# 缩放模板
scaled_template = cv2.resize(template_gray, None, fx=scale, fy=scale)
w, h = scaled_template.shape[::-1]
# 如果缩放后的模板比图像大,跳过
if scaled_template.shape[0] > gray.shape[0] or scaled_template.shape[1] > gray.shape[1]:
scale += scale_step
continue
# 模板匹配
res = cv2.matchTemplate(gray, scaled_template, cv2.TM_CCOEFF_NORMED)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
# 更新最佳匹配
if max_val > best_val:
best_val = max_val
best_loc = max_loc
best_scale = scale
best_match = (w, h)
scale += scale_step
result = img.copy()
if best_match is not None:
w, h = best_match
cv2.rectangle(result, best_loc, (best_loc[0] + w, best_loc[1] + h), (0, 255, 0), 2)
cv2.putText(result, f'Scale: {best_scale:.2f}, Conf: {best_val:.3f}',
(best_loc[0], best_loc[1]-10), cv2.FONT_HERSHEY_SIMPLEX,
0.7, (255, 0, 0), 2)
return result, best_val, best_loc, best_scale
# 使用示例
# result, res, locations = template_matching(img, template)
# ms_result, val, loc, scale = multi_scale_template_matching(img, template)
5. 霍夫变换
霍夫直线检测
def hough_line_detection(img, rho=1, theta=np.pi/180, threshold=100):
"""霍夫直线检测"""
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 边缘检测
edges = cv2.Canny(gray, 50, 150)
# 霍夫直线检测
lines = cv2.HoughLines(edges, rho, theta, threshold)
result = img.copy()
if lines is not None:
for line in lines:
rho, theta = line[0]
a = np.cos(theta)
b = np.sin(theta)
x0 = a * rho
y0 = b * rho
x1 = int(x0 + 1000 * (-b))
y1 = int(y0 + 1000 * (a))
x2 = int(x0 - 1000 * (-b))
y2 = int(y0 - 1000 * (a))
cv2.line(result, (x1, y1), (x2, y2), (0, 0, 255), 2)
return result, lines
def hough_line_segments_detection(img, min_line_length=50, max_line_gap=10):
"""霍夫概率直线检测(线段检测)"""
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(gray, 50, 150)
# 概率霍夫直线检测
lines = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=100,
minLineLength=min_line_length, maxLineGap=max_line_gap)
result = img.copy()
if lines is not None:
for line in lines:
x1, y1, x2, y2 = line[0]
cv2.line(result, (x1, y1), (x2, y2), (0, 255, 0), 2)
return result, lines
# 使用示例
# line_result, lines = hough_line_detection(img)
# segment_result, segments = hough_line_segments_detection(img)
霍夫圆检测
def hough_circle_detection(img, dp=1, min_dist=50, param1=50, param2=30,
min_radius=0, max_radius=0):
"""霍夫圆检测"""
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 高斯模糊减少噪声
gray = cv2.medianBlur(gray, 5)
# 霍夫圆检测
circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, dp, min_dist,
param1=param1, param2=param2,
minRadius=min_radius, maxRadius=max_radius)
result = img.copy()
if circles is not None:
circles = np.round(circles[0, :]).astype("int")
for (x, y, r) in circles:
# 绘制圆
cv2.circle(result, (x, y), r, (0, 255, 0), 4)
# 绘制圆心
cv2.circle(result, (x, y), 2, (0, 0, 255), 3)
return result, circles
def adaptive_circle_detection(img):
"""自适应参数的圆检测"""
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = cv2.medianBlur(gray, 5)
best_circles = None
best_count = 0
# 尝试不同参数组合
for param1 in [30, 50, 70]:
for param2 in [20, 30, 40]:
for min_dist in [30, 50, 70]:
circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, 1, min_dist,
param1=param1, param2=param2,
minRadius=10, maxRadius=100)
if circles is not None and len(circles[0]) > best_count:
best_count = len(circles[0])
best_circles = circles
result = img.copy()
if best_circles is not None:
circles = np.round(best_circles[0, :]).astype("int")
for (x, y, r) in circles:
cv2.circle(result, (x, y), r, (0, 255, 0), 4)
cv2.circle(result, (x, y), 2, (0, 0, 255), 3)
return result, best_circles
# 使用示例
# circle_result, circles = hough_circle_detection(img)
# adaptive_result, adaptive_circles = adaptive_circle_detection(img)
6. 光流检测
Lucas-Kanade光流
def lucas_kanade_optical_flow(video_path=None, img1=None, img2=None):
"""Lucas-Kanade光流检测"""
if video_path:
cap = cv2.VideoCapture(video_path)
ret, frame1 = cap.read()
if not ret:
return None
# 转换为灰度图
prev_gray = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
# 检测角点
p0 = cv2.goodFeaturesToTrack(prev_gray, mask=None, maxCorners=100,
qualityLevel=0.3, minDistance=7, blockSize=7)
# 创建随机颜色
colors = np.random.randint(0, 255, (100, 3))
# 创建轨迹图像
mask = np.zeros_like(frame1)
while True:
ret, frame2 = cap.read()
if not ret:
break
frame_gray = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
# 计算光流
p1, st, err = cv2.calcOpticalFlowPyrLK(prev_gray, frame_gray, p0, None)
# 选择好的点
if p1 is not None:
good_new = p1[st == 1]
good_old = p0[st == 1]
# 绘制轨迹
for i, (tr, to) in enumerate(zip(good_new, good_old)):
a, b = tr.ravel().astype(int)
c, d = to.ravel().astype(int)
mask = cv2.line(mask, (a, b), (c, d), colors[i].tolist(), 2)
frame2 = cv2.circle(frame2, (a, b), 5, colors[i].tolist(), -1)
img = cv2.add(frame2, mask)
cv2.imshow('Lucas-Kanade Optical Flow', img)
k = cv2.waitKey(30) & 0xff
if k == 27: # ESC键退出
break
# 更新前一帧和点
prev_gray = frame_gray.copy()
p0 = good_new.reshape(-1, 1, 2)
cap.release()
cv2.destroyAllWindows()
elif img1 is not None and img2 is not None:
# 单对图像的光流检测
gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
gray2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
# 检测角点
p0 = cv2.goodFeaturesToTrack(gray1, mask=None, maxCorners=100,
qualityLevel=0.3, minDistance=7, blockSize=7)
# 计算光流
p1, st, err = cv2.calcOpticalFlowPyrLK(gray1, gray2, p0, None)
# 选择好的点
if p1 is not None:
good_new = p1[st == 1]
good_old = p0[st == 1]
# 绘制光流
result = img2.copy()
for i, (tr, to) in enumerate(zip(good_new, good_old)):
a, b = tr.ravel().astype(int)
c, d = to.ravel().astype(int)
result = cv2.line(result, (a, b), (c, d), (0, 255, 0), 2)
result = cv2.circle(result, (a, b), 5, (0, 0, 255), -1)
return result
# 使用示例
# lucas_kanade_optical_flow(video_path='video.mp4')
# result = lucas_kanade_optical_flow(img1=frame1, img2=frame2)
Farneback稠密光流
def farneback_optical_flow(video_path=None, img1=None, img2=None):
"""Farneback稠密光流"""
if video_path:
cap = cv2.VideoCapture(video_path)
ret, frame1 = cap.read()
if not ret:
return None
prev_gray = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
hsv = np.zeros_like(frame1)
hsv[..., 1] = 255
while True:
ret, frame2 = cap.read()
if not ret:
break
gray = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
# 计算稠密光流
flow = cv2.calcOpticalFlowFarneback(prev_gray, gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
# 转换为HSV颜色表示
mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1])
hsv[..., 0] = ang * 180 / np.pi / 2
hsv[..., 2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX)
# 转换为BGR显示
bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
cv2.imshow('Farneback Optical Flow', bgr)
k = cv2.waitKey(30) & 0xff
if k == 27:
break
prev_gray = gray
cap.release()
cv2.destroyAllWindows()
elif img1 is not None and img2 is not None:
gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
gray2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
# 计算稠密光流
flow = cv2.calcOpticalFlowFarneback(gray1, gray2, None, 0.5, 3, 15, 3, 5, 1.2, 0)
# 创建HSV图像显示光流
hsv = np.zeros_like(img1)
hsv[..., 1] = 255
mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1])
hsv[..., 0] = ang * 180 / np.pi / 2
hsv[..., 2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX)
bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
return bgr, flow
# 使用示例
# farneback_optical_flow(video_path='video.mp4')
# result, flow = farneback_optical_flow(img1=frame1, img2=frame2)
7. 人脸检测与识别
Haar级联分类器
def haar_face_detection(img):
"""使用Haar级联分类器进行人脸检测"""
# 加载预训练的人脸检测器
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_eye.xml')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 检测人脸
faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
result = img.copy()
for (x, y, w, h) in faces:
# 绘制人脸矩形
cv2.rectangle(result, (x, y), (x+w, y+h), (255, 0, 0), 2)
# 在人脸区域检测眼睛
roi_gray = gray[y:y+h, x:x+w]
roi_color = result[y:y+h, x:x+w]
eyes = eye_cascade.detectMultiScale(roi_gray)
for (ex, ey, ew, eh) in eyes:
cv2.rectangle(roi_color, (ex, ey), (ex+ew, ey+eh), (0, 255, 0), 2)
return result, faces
def enhanced_face_detection(img):
"""增强的人脸检测"""
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
profile_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_profileface.xml')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 多种检测方法
frontal_faces = face_cascade.detectMultiScale(gray, 1.1, 5)
profile_faces = profile_cascade.detectMultiScale(gray, 1.1, 5)
result = img.copy()
# 绘制正面人脸
for (x, y, w, h) in frontal_faces:
cv2.rectangle(result, (x, y), (x+w, y+h), (255, 0, 0), 2)
cv2.putText(result, 'Frontal', (x, y-10), cv2.FONT_HERSHEY_SIMPLEX,
0.9, (255, 0, 0), 2)
# 绘制侧面人脸
for (x, y, w, h) in profile_faces:
cv2.rectangle(result, (x, y), (x+w, y+h), (0, 255, 0), 2)
cv2.putText(result, 'Profile', (x, y-10), cv2.FONT_HERSHEY_SIMPLEX,
0.9, (0, 255, 0), 2)
return result, frontal_faces, profile_faces
# 使用示例
# face_result, faces = haar_face_detection(img)
# enhanced_result, frontal, profile = enhanced_face_detection(img)
DNN人脸检测
def dnn_face_detection(img, confidence_threshold=0.5):
"""使用DNN进行人脸检测(更准确)"""
# 加载预训练的DNN模型
net = cv2.dnn.readNetFromTensorflow('opencv_face_detector_uint8.pb',
'opencv_face_detector.pbtxt')
(h, w) = img.shape[:2]
# 创建blob
blob = cv2.dnn.blobFromImage(cv2.resize(img, (300, 300)), 1.0,
(300, 300), (104.0, 177.0, 123.0))
# 设置输入并运行前向传播
net.setInput(blob)
detections = net.forward()
result = img.copy()
# 处理检测结果
for i in range(0, detections.shape[2]):
confidence = detections[0, 0, i, 2]
if confidence > confidence_threshold:
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")
# 绘制边界框和置信度
text = f"{confidence * 100:.1f}%"
y = startY - 10 if startY - 10 > 10 else startY + 10
cv2.rectangle(result, (startX, startY), (endX, endY), (0, 0, 255), 2)
cv2.putText(result, text, (startX, y), cv2.FONT_HERSHEY_SIMPLEX,
0.45, (0, 0, 255), 2)
return result
# 注意:需要下载相应的模型文件
# 下载地址:https://github.com/opencv/opencv_3rdparty/tree/dnn_samples_face_detector_20170830
8. 实践项目
车牌识别系统
class LicensePlateDetector:
"""车牌识别系统"""
def __init__(self):
# 初始化参数
self.min_area = 1000
self.max_area = 10000
self.aspect_ratio_range = (2.0, 5.0)
def preprocess_image(self, img):
"""图像预处理"""
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 直方图均衡化
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
enhanced = clahe.apply(gray)
# 高斯模糊
blurred = cv2.GaussianBlur(enhanced, (5, 5), 0)
return blurred
def detect_edges(self, img):
"""边缘检测"""
# 使用Sobel算子
sobel_x = cv2.Sobel(img, cv2.CV_64F, 1, 0, ksize=3)
sobel_y = cv2.Sobel(img, cv2.CV_64F, 0, 1, ksize=3)
sobel = np.sqrt(sobel_x**2 + sobel_y**2)
# 转换为uint8
sobel = np.uint8(sobel / sobel.max() * 255)
return sobel
def find_license_plate_candidates(self, img):
"""寻找车牌候选区域"""
processed = self.preprocess_image(img)
edges = self.detect_edges(processed)
# 二值化
_, binary = cv2.threshold(edges, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# 形态学操作
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (17, 3))
morph = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
# 查找轮廓
contours, _ = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
candidates = []
result = img.copy()
for contour in contours:
area = cv2.contourArea(contour)
if self.min_area < area < self.max_area:
# 计算最小外接矩形
rect = cv2.minAreaRect(contour)
(center, (width, height), angle) = rect
# 计算长宽比
aspect_ratio = max(width, height) / min(width, height)
if self.aspect_ratio_range[0] <= aspect_ratio <= self.aspect_ratio_range[1]:
# 获取矩形的四个顶点
box = cv2.boxPoints(rect)
box = np.int0(box)
cv2.drawContours(result, [box], 0, (0, 255, 0), 2)
candidates.append(box)
return result, candidates
def extract_license_plate(self, img, box):
"""提取车牌区域"""
# 获取透视变换矩阵
width = int(max(np.linalg.norm(box[0] - box[1]), np.linalg.norm(box[2] - box[3])))
height = int(max(np.linalg.norm(box[1] - box[2]), np.linalg.norm(box[3] - box[0])))
dst_points = np.array([[0, 0], [width, 0], [width, height], [0, height]], dtype=np.float32)
# 透视变换
M = cv2.getPerspectiveTransform(box.astype(np.float32), dst_points)
warped = cv2.warpPerspective(img, M, (width, height))
return warped
def detect_license_plate(self, img):
"""完整的车牌检测流程"""
result, candidates = self.find_license_plate_candidates(img)
extracted_plates = []
for i, box in enumerate(candidates):
plate = self.extract_license_plate(img, box)
extracted_plates.append(plate)
return result, extracted_plates
# 使用示例
# detector = LicensePlateDetector()
# result, plates = detector.detect_license_plate(car_image)
文档扫描器
class DocumentScanner:
"""文档扫描器"""
def __init__(self):
self.min_area = 10000
def preprocess_image(self, img):
"""图像预处理"""
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
return blurred
def detect_edges(self, img):
"""边缘检测"""
edges = cv2.Canny(img, 75, 200)
return edges
def find_document_contour(self, img):
"""寻找文档轮廓"""
processed = self.preprocess_image(img)
edges = self.detect_edges(processed)
# 查找轮廓
contours, _ = cv2.findContours(edges, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5]
document_contour = None
for contour in contours:
# 轮廓近似
peri = cv2.arcLength(contour, True)
approx = cv2.approxPolyDP(contour, 0.02 * peri, True)
# 如果近似轮廓有4个点,且面积足够大,认为是文档
if len(approx) == 4 and cv2.contourArea(contour) > self.min_area:
document_contour = approx
break
return document_contour
def order_points(self, pts):
"""对四个点进行排序:左上、右上、右下、左下"""
rect = np.zeros((4, 2), dtype="float32")
# 计算坐标和与差
s = pts.sum(axis=1)
rect[0] = pts[np.argmin(s)] # 左上角:和最小
rect[2] = pts[np.argmax(s)] # 右下角:和最大
diff = np.diff(pts, axis=1)
rect[1] = pts[np.argmin(diff)] # 右上角:差最小
rect[3] = pts[np.argmax(diff)] # 左下角:差最大
return rect
def perspective_transform(self, img, pts):
"""透视变换"""
rect = self.order_points(pts)
(tl, tr, br, bl) = rect
# 计算新图像的宽度和高度
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
# 目标点
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype="float32")
# 计算透视变换矩阵并应用
M = cv2.getPerspectiveTransform(rect, dst)
warped = cv2.warpPerspective(img, M, (maxWidth, maxHeight))
return warped
def enhance_document(self, img):
"""增强文档图像"""
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 自适应阈值
adaptive = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 11, 2)
return adaptive
def scan_document(self, img):
"""完整的文档扫描流程"""
original = img.copy()
# 寻找文档轮廓
contour = self.find_document_contour(img)
if contour is None:
print("未找到文档边缘")
return None
# 绘制检测到的轮廓
contour_img = original.copy()
cv2.drawContours(contour_img, [contour], -1, (0, 255, 0), 2)
# 透视变换
warped = self.perspective_transform(original, contour.reshape(4, 2))
# 图像增强
enhanced = self.enhance_document(warped)
return contour_img, warped, enhanced
# 使用示例
# scanner = DocumentScanner()
# contour_img, warped, enhanced = scanner.scan_document(document_image)
总结
OpenCV进阶技术包括:
特征检测与匹配
- 角点检测:Harris、Shi-Tomasi角点检测
- 特征描述子:SIFT、SURF、ORB特征检测与匹配
- 应用场景:图像拼接、物体识别、跟踪
图像分割技术
- 区域分割:分水岭算法、GrabCut前背景分离
- 聚类分割:K-means聚类分割
- 应用场景:医学图像、遥感图像、工业检测
形状分析
- 轮廓检测:轮廓查找、特征计算、形状识别
- 几何变换:透视变换、仿射变换
- 应用场景:质量检测、文档处理、机器人视觉
运动分析
- 光流检测:Lucas-Kanade、Farneback光流
- 目标跟踪:特征点跟踪、稠密光流
- 应用场景:视频监控、运动分析、自动驾驶
模式识别
- 模板匹配:单尺度、多尺度模板匹配
- 霍夫变换:直线检测、圆检测
- 人脸检测:传统级联分类器、深度学习方法
实践要点
- 参数调优:根据具体应用调整算法参数
- 多算法结合:组合不同算法提高鲁棒性
- 性能优化:考虑实时性和准确性的平衡
- 领域适应:针对特定领域优化算法流程
这些进阶技术为解决复杂的计算机视觉问题提供了强大的工具和方法。