图像基本处理
1. 图像读取与显示
图像读取
import cv2
import numpy as np
import matplotlib.pyplot as plt
# 读取彩色图像
img = cv2.imread('lenna.jpg', cv2.IMREAD_COLOR)
# 读取灰度图像
img_gray = cv2.imread('lenna.jpg', cv2.IMREAD_GRAYSCALE)
# 读取包含透明度通道的图像
img_rgba = cv2.imread('image.png', cv2.IMREAD_UNCHANGED)
# 检查图像是否正确读取
if img is None:
print("错误:无法读取图像文件")
else:
print(f"图像尺寸: {img.shape}")
print(f"数据类型: {img.dtype}")
图像显示
# 使用matplotlib显示图像(推荐在Jupyter中使用)
def show_images(images, titles, figsize=(15, 5)):
"""显示多个图像"""
n = len(images)
plt.figure(figsize=figsize)
for i in range(n):
plt.subplot(1, n, i+1)
if len(images[i].shape) == 3:
# 彩色图像:BGR转RGB
plt.imshow(cv2.cvtColor(images[i], cv2.COLOR_BGR2RGB))
else:
# 灰度图像
plt.imshow(images[i], cmap='gray')
plt.title(titles[i])
plt.axis('off')
plt.tight_layout()
plt.show()
# 使用示例
show_images([img, img_gray], ['原始图像', '灰度图像'])
2. 图像几何变换
图像缩放
def resize_image_demo(img):
"""图像缩放演示"""
height, width = img.shape[:2]
# 按比例缩放
scale_50 = cv2.resize(img, None, fx=0.5, fy=0.5, interpolation=cv2.INTER_AREA)
scale_150 = cv2.resize(img, None, fx=1.5, fy=1.5, interpolation=cv2.INTER_CUBIC)
# 固定尺寸缩放
fixed_size = cv2.resize(img, (300, 300), interpolation=cv2.INTER_LINEAR)
# 不同插值方法比较
nearest = cv2.resize(img, (200, 200), interpolation=cv2.INTER_NEAREST)
linear = cv2.resize(img, (200, 200), interpolation=cv2.INTER_LINEAR)
cubic = cv2.resize(img, (200, 200), interpolation=cv2.INTER_CUBIC)
images = [img, scale_50, scale_150, fixed_size]
titles = ['原图', '50%缩放', '150%放大', '固定尺寸300x300']
show_images(images, titles)
interpolation_imgs = [nearest, linear, cubic]
interpolation_titles = ['最近邻插值', '线性插值', '三次插值']
show_images(interpolation_imgs, interpolation_titles)
return scale_50, scale_150, fixed_size
# 使用示例
# scaled_imgs = resize_image_demo(img)
图像旋转
def rotation_demo(img):
"""图像旋转演示"""
height, width = img.shape[:2]
center = (width // 2, height // 2)
# 不同角度旋转
angles = [30, 45, 90, 180]
rotated_images = []
for angle in angles:
# 获取旋转矩阵
M = cv2.getRotationMatrix2D(center, angle, 1.0)
rotated = cv2.warpAffine(img, M, (width, height))
rotated_images.append(rotated)
# 旋转并缩放
M_scale = cv2.getRotationMatrix2D(center, 45, 0.8)
rotated_scaled = cv2.warpAffine(img, M_scale, (width, height))
all_images = [img] + rotated_images + [rotated_scaled]
all_titles = ['原图'] + [f'旋转{a}°' for a in angles] + ['旋转45°+缩放0.8']
show_images(all_images, all_titles, figsize=(20, 4))
return rotated_images
# 使用示例
# rotated_imgs = rotation_demo(img)
仿射变换
def affine_transform_demo(img):
"""仿射变换演示"""
rows, cols = img.shape[:2]
# 定义源点和目标点
pts1 = np.float32([[50, 50], [200, 50], [50, 200]])
pts2 = np.float32([[10, 100], [200, 50], [100, 250]])
# 获取仿射变换矩阵
M = cv2.getAffineTransform(pts1, pts2)
# 应用仿射变换
dst = cv2.warpAffine(img, M, (cols, rows))
# 透视变换示例
pts1_perspective = np.float32([[56, 65], [368, 52], [28, 387], [389, 390]])
pts2_perspective = np.float32([[0, 0], [300, 0], [0, 300], [300, 300]])
# 获取透视变换矩阵
M_perspective = cv2.getPerspectiveTransform(pts1_perspective, pts2_perspective)
perspective_dst = cv2.warpPerspective(img, M_perspective, (300, 300))
images = [img, dst, perspective_dst]
titles = ['原图', '仿射变换', '透视变换']
show_images(images, titles)
return dst, perspective_dst
# 使用示例
# transformed_imgs = affine_transform_demo(img)
3. 图像增强
亮度和对比度调整
def brightness_contrast_demo(img):
"""亮度和对比度调整演示"""
# 亮度调整:addWeighted方法
bright_img = cv2.addWeighted(img, 1, np.zeros(img.shape, img.dtype), 0, 50)
dark_img = cv2.addWeighted(img, 1, np.zeros(img.shape, img.dtype), 0, -50)
# 对比度调整:convertScaleAbs方法
high_contrast = cv2.convertScaleAbs(img, alpha=1.5, beta=0)
low_contrast = cv2.convertScaleAbs(img, alpha=0.5, beta=0)
# 同时调整亮度和对比度
enhanced = cv2.convertScaleAbs(img, alpha=1.2, beta=30)
images = [img, bright_img, dark_img, high_contrast, low_contrast, enhanced]
titles = ['原图', '增加亮度', '降低亮度', '增加对比度', '降低对比度', '亮度+对比度']
show_images(images, titles, figsize=(18, 6))
return bright_img, high_contrast, enhanced
def gamma_correction_demo(img):
"""伽马校正演示"""
def adjust_gamma(image, gamma=1.0):
# 构建查找表
inv_gamma = 1.0 / gamma
table = np.array([((i / 255.0) ** inv_gamma) * 255
for i in np.arange(0, 256)]).astype("uint8")
# 应用伽马校正
return cv2.LUT(image, table)
# 不同伽马值
gamma_values = [0.5, 0.8, 1.0, 1.2, 2.0]
gamma_images = []
for gamma in gamma_values:
gamma_img = adjust_gamma(img, gamma=gamma)
gamma_images.append(gamma_img)
titles = [f'γ={gamma}' for gamma in gamma_values]
show_images(gamma_images, titles)
return gamma_images
# 使用示例
# enhanced_imgs = brightness_contrast_demo(img)
# gamma_imgs = gamma_correction_demo(img)
直方图处理
def histogram_demo(img):
"""直方图处理演示"""
# 转换为灰度图像
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 直方图均衡化
equ = cv2.equalizeHist(gray)
# 自适应直方图均衡化(CLAHE)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
cl1 = clahe.apply(gray)
# 显示结果
images = [gray, equ, cl1]
titles = ['原始灰度图', '直方图均衡化', 'CLAHE']
show_images(images, titles)
# 绘制直方图
plt.figure(figsize=(15, 4))
for i, (image, title) in enumerate(zip(images, titles)):
plt.subplot(1, 3, i+1)
hist = cv2.calcHist([image], [0], None, [256], [0, 256])
plt.plot(hist)
plt.title(f'{title} - 直方图')
plt.xlabel('像素值')
plt.ylabel('频次')
plt.xlim([0, 256])
plt.tight_layout()
plt.show()
return equ, cl1
# 使用示例
# hist_imgs = histogram_demo(img)
4. 图像滤波
线性滤波
def linear_filtering_demo(img):
"""线性滤波演示"""
# 均值滤波
blur_3x3 = cv2.blur(img, (3, 3))
blur_9x9 = cv2.blur(img, (9, 9))
# 高斯滤波
gaussian_3x3 = cv2.GaussianBlur(img, (3, 3), 0)
gaussian_9x9 = cv2.GaussianBlur(img, (9, 9), 0)
gaussian_sigma = cv2.GaussianBlur(img, (9, 9), 2)
# 方框滤波
box_filter = cv2.boxFilter(img, -1, (9, 9), normalize=True)
images = [img, blur_3x3, blur_9x9, gaussian_3x3, gaussian_9x9, gaussian_sigma]
titles = ['原图', '均值滤波3x3', '均值滤波9x9', '高斯滤波3x3', '高斯滤波9x9', '高斯σ=2']
show_images(images, titles, figsize=(18, 6))
return blur_9x9, gaussian_9x9
def custom_kernel_demo(img):
"""自定义卷积核演示"""
# 锐化核
sharpening_kernel = np.array([[-1, -1, -1],
[-1, 9, -1],
[-1, -1, -1]])
# 边缘检测核
edge_kernel = np.array([[-1, -1, -1],
[-1, 8, -1],
[-1, -1, -1]])
# 浮雕效果核
emboss_kernel = np.array([[-2, -1, 0],
[-1, 1, 1],
[ 0, 1, 2]])
# 应用卷积核
sharpened = cv2.filter2D(img, -1, sharpening_kernel)
edges = cv2.filter2D(img, -1, edge_kernel)
embossed = cv2.filter2D(img, -1, emboss_kernel)
images = [img, sharpened, edges, embossed]
titles = ['原图', '锐化', '边缘检测', '浮雕效果']
show_images(images, titles)
return sharpened, edges, embossed
# 使用示例
# filtered_imgs = linear_filtering_demo(img)
# kernel_imgs = custom_kernel_demo(img)
非线性滤波
def nonlinear_filtering_demo(img):
"""非线性滤波演示"""
# 添加噪声以便观察滤波效果
def add_noise(image, noise_type='gaussian'):
if noise_type == 'gaussian':
noise = np.random.normal(0, 25, image.shape).astype(np.uint8)
noisy = cv2.add(image, noise)
elif noise_type == 'salt_pepper':
noisy = image.copy()
# 盐噪声(白点)
salt = np.random.random(image.shape[:2]) < 0.01
noisy[salt] = 255
# 胡椒噪声(黑点)
pepper = np.random.random(image.shape[:2]) < 0.01
noisy[pepper] = 0
return np.clip(noisy, 0, 255).astype(np.uint8)
# 添加噪声
noisy_gaussian = add_noise(img, 'gaussian')
noisy_sp = add_noise(img, 'salt_pepper')
# 中值滤波(对椒盐噪声效果好)
median_filtered = cv2.medianBlur(noisy_sp, 5)
# 双边滤波(保边去噪)
bilateral_filtered = cv2.bilateralFilter(noisy_gaussian, 9, 75, 75)
# 形态学滤波
kernel = np.ones((3, 3), np.uint8)
opening = cv2.morphologyEx(noisy_sp, cv2.MORPH_OPEN, kernel)
closing = cv2.morphologyEx(noisy_sp, cv2.MORPH_CLOSE, kernel)
images = [img, noisy_gaussian, noisy_sp, median_filtered, bilateral_filtered, opening]
titles = ['原图', '高斯噪声', '椒盐噪声', '中值滤波', '双边滤波', '开运算']
show_images(images, titles, figsize=(18, 6))
return median_filtered, bilateral_filtered
# 使用示例
# denoised_imgs = nonlinear_filtering_demo(img)
5. 颜色空间转换
常用颜色空间
def color_space_demo(img):
"""颜色空间转换演示"""
# 转换到不同颜色空间
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
yuv = cv2.cvtColor(img, cv2.COLOR_BGR2YUV)
# RGB(用于matplotlib显示)
rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# 显示不同颜色空间
images = [rgb, gray, hsv, lab, yuv]
titles = ['RGB', 'Gray', 'HSV', 'LAB', 'YUV']
plt.figure(figsize=(20, 4))
for i, (image, title) in enumerate(zip(images, titles)):
plt.subplot(1, 5, i+1)
if len(image.shape) == 3:
plt.imshow(image)
else:
plt.imshow(image, cmap='gray')
plt.title(title)
plt.axis('off')
plt.tight_layout()
plt.show()
return gray, hsv, lab
def hsv_channels_demo(img):
"""HSV通道分离演示"""
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# 分离HSV通道
h, s, v = cv2.split(hsv)
# 创建单通道彩色图像
zeros = np.zeros_like(h)
h_img = cv2.merge([h, np.ones_like(h)*255, np.ones_like(h)*255])
s_img = cv2.merge([zeros, s, np.ones_like(s)*255])
v_img = cv2.merge([zeros, zeros, v])
# 转换回BGR用于显示
h_bgr = cv2.cvtColor(h_img, cv2.COLOR_HSV2BGR)
s_bgr = cv2.cvtColor(s_img, cv2.COLOR_HSV2BGR)
v_bgr = cv2.cvtColor(v_img, cv2.COLOR_HSV2BGR)
images = [img, h, s, v, h_bgr, s_bgr, v_bgr]
titles = ['原图', 'H通道', 'S通道', 'V通道', 'H彩色', 'S彩色', 'V彩色']
show_images(images, titles, figsize=(21, 6))
return h, s, v
# 使用示例
# color_imgs = color_space_demo(img)
# hsv_channels = hsv_channels_demo(img)
6. 图像阈值处理
基本阈值
def threshold_demo(img):
"""阈值处理演示"""
# 转换为灰度图像
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 不同类型的全局阈值
ret1, thresh1 = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
ret2, thresh2 = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)
ret3, thresh3 = cv2.threshold(gray, 127, 255, cv2.THRESH_TRUNC)
ret4, thresh4 = cv2.threshold(gray, 127, 255, cv2.THRESH_TOZERO)
ret5, thresh5 = cv2.threshold(gray, 127, 255, cv2.THRESH_TOZERO_INV)
# Otsu阈值
ret_otsu, thresh_otsu = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
images = [gray, thresh1, thresh2, thresh3, thresh4, thresh5, thresh_otsu]
titles = ['原图', 'BINARY', 'BINARY_INV', 'TRUNC', 'TOZERO', 'TOZERO_INV', f'OTSU(T={ret_otsu:.0f})']
show_images(images, titles, figsize=(21, 6))
return thresh1, thresh_otsu
def adaptive_threshold_demo(img):
"""自适应阈值演示"""
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 全局阈值
ret, global_thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
# 自适应阈值
adaptive_mean = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY, 11, 2)
adaptive_gaussian = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 11, 2)
# 不同块大小的自适应阈值
adaptive_small = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 5, 2)
adaptive_large = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 21, 2)
images = [gray, global_thresh, adaptive_mean, adaptive_gaussian, adaptive_small, adaptive_large]
titles = ['原图', '全局阈值', '自适应均值', '自适应高斯', '小窗口(5)', '大窗口(21)']
show_images(images, titles, figsize=(18, 6))
return adaptive_mean, adaptive_gaussian
# 使用示例
# thresh_imgs = threshold_demo(img)
# adaptive_imgs = adaptive_threshold_demo(img)
7. 形态学操作
基本形态学操作
def morphology_demo(img):
"""形态学操作演示"""
# 转换为灰度并二值化
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
# 定义核
kernel_3x3 = np.ones((3, 3), np.uint8)
kernel_5x5 = np.ones((5, 5), np.uint8)
kernel_ellipse = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
# 基本形态学操作
erosion = cv2.erode(binary, kernel_3x3, iterations=1)
dilation = cv2.dilate(binary, kernel_3x3, iterations=1)
opening = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel_3x3)
closing = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel_3x3)
gradient = cv2.morphologyEx(binary, cv2.MORPH_GRADIENT, kernel_3x3)
tophat = cv2.morphologyEx(binary, cv2.MORPH_TOPHAT, kernel_5x5)
blackhat = cv2.morphologyEx(binary, cv2.MORPH_BLACKHAT, kernel_5x5)
images = [binary, erosion, dilation, opening, closing, gradient, tophat, blackhat]
titles = ['二值图', '腐蚀', '膨胀', '开运算', '闭运算', '形态梯度', '顶帽', '黑帽']
show_images(images, titles, figsize=(24, 6))
return erosion, dilation, opening, closing
def morphology_kernels_demo():
"""不同形状核的演示"""
# 创建不同形状的核
rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
ellipse_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
cross_kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (5, 5))
# 自定义核
custom_kernel = np.array([[0, 1, 0],
[1, 1, 1],
[0, 1, 0]], dtype=np.uint8)
# 显示核的形状
kernels = [rect_kernel, ellipse_kernel, cross_kernel, custom_kernel]
titles = ['矩形核', '椭圆核', '十字核', '自定义核']
plt.figure(figsize=(12, 3))
for i, (kernel, title) in enumerate(zip(kernels, titles)):
plt.subplot(1, 4, i+1)
plt.imshow(kernel * 255, cmap='gray')
plt.title(title)
plt.axis('off')
plt.tight_layout()
plt.show()
return kernels
# 使用示例
# morph_imgs = morphology_demo(img)
# kernels = morphology_kernels_demo()
8. 实用工具函数
图像质量评估
def image_quality_metrics(img1, img2):
"""计算图像质量指标"""
# 均方误差 (MSE)
mse = np.mean((img1 - img2) ** 2)
# 峰值信噪比 (PSNR)
if mse == 0:
psnr = float('inf')
else:
max_pixel = 255.0
psnr = 20 * np.log10(max_pixel / np.sqrt(mse))
# 结构相似性指数 (需要安装scikit-image)
# from skimage.metrics import structural_similarity as ssim
# ssim_value = ssim(img1, img2, multichannel=True)
print(f"MSE: {mse:.2f}")
print(f"PSNR: {psnr:.2f} dB")
# print(f"SSIM: {ssim_value:.4f}")
return mse, psnr
def create_test_images():
"""创建测试图像"""
# 创建渐变图像
gradient = np.linspace(0, 255, 256, dtype=np.uint8)
gradient_img = np.tile(gradient, (256, 1))
# 创建棋盘图像
checkerboard = np.zeros((256, 256), dtype=np.uint8)
checkerboard[::32, ::32] = 255
checkerboard[16::32, 16::32] = 255
# 创建同心圆图像
y, x = np.ogrid[:256, :256]
center_y, center_x = 128, 128
circles = np.sqrt((x - center_x)**2 + (y - center_y)**2)
circles = (circles % 20 < 10).astype(np.uint8) * 255
images = [gradient_img, checkerboard, circles]
titles = ['渐变图像', '棋盘图像', '同心圆']
show_images(images, titles)
return gradient_img, checkerboard, circles
# 使用示例
# test_imgs = create_test_images()
# 质量评估示例
# mse, psnr = image_quality_metrics(img, filtered_img)
9. 综合处理示例
图像预处理流水线
def image_preprocessing_pipeline(img, target_size=(224, 224)):
"""图像预处理流水线"""
original = img.copy()
# 步骤1: 尺寸调整
resized = cv2.resize(img, target_size, interpolation=cv2.INTER_AREA)
# 步骤2: 去噪
denoised = cv2.bilateralFilter(resized, 9, 75, 75)
# 步骤3: 增强对比度
enhanced = cv2.convertScaleAbs(denoised, alpha=1.2, beta=10)
# 步骤4: 锐化
sharpening_kernel = np.array([[-1, -1, -1],
[-1, 9, -1],
[-1, -1, -1]])
sharpened = cv2.filter2D(enhanced, -1, sharpening_kernel)
# 步骤5: 归一化
normalized = cv2.normalize(sharpened, None, 0, 255, cv2.NORM_MINMAX)
# 显示处理过程
processing_steps = [original, resized, denoised, enhanced, sharpened, normalized]
step_titles = ['原图', '调整尺寸', '去噪', '增强对比度', '锐化', '归一化']
show_images(processing_steps, step_titles, figsize=(18, 6))
return normalized
def batch_image_processing(image_list, operations):
"""批量图像处理"""
processed_images = []
for img in image_list:
processed_img = img.copy()
for operation, params in operations:
if operation == 'resize':
processed_img = cv2.resize(processed_img, params['size'])
elif operation == 'blur':
processed_img = cv2.GaussianBlur(processed_img, params['kernel'], params['sigma'])
elif operation == 'brightness':
processed_img = cv2.convertScaleAbs(processed_img, alpha=1, beta=params['value'])
elif operation == 'contrast':
processed_img = cv2.convertScaleAbs(processed_img, alpha=params['alpha'], beta=0)
processed_images.append(processed_img)
return processed_images
# 使用示例
# processed_img = image_preprocessing_pipeline(img)
# 批量处理示例
# operations = [
# ('resize', {'size': (300, 300)}),
# ('blur', {'kernel': (5, 5), 'sigma': 0}),
# ('brightness', {'value': 30})
# ]
# batch_results = batch_image_processing([img1, img2, img3], operations)
总结
图像基本处理涵盖了计算机视觉的核心操作:
核心技术
- 图像IO:读取、显示、保存图像文件
- 几何变换:缩放、旋转、仿射变换、透视变换
- 图像增强:亮度、对比度、伽马校正、直方图均衡化
- 滤波操作:线性滤波、非线性滤波、噪声去除
颜色处理
- 颜色空间:RGB、HSV、LAB、YUV转换
- 通道操作:分离、合并、单通道处理
- 阈值处理:全局阈值、自适应阈值、Otsu方法
形态学操作
- 基本操作:腐蚀、膨胀、开运算、闭运算
- 高级操作:形态梯度、顶帽、黑帽变换
- 结构元素:不同形状和大小的核