refactor: 优化Det算法
- 优化 demo_postprocess,nms算法 - 新增 Slide 滑块识别 - 更新 Cargo.toml 依赖项
This commit is contained in:
374
src/slide_model.rs
Normal file
374
src/slide_model.rs
Normal file
@@ -0,0 +1,374 @@
|
||||
use anyhow::{Context, Result, anyhow};
|
||||
use image::{DynamicImage, GenericImageView};
|
||||
use tract_onnx::prelude::tract_ndarray::{Array2, Array3, ArrayView2, ArrayView3, Axis, s};
|
||||
use imageproc::template_matching::{match_template, MatchTemplateMethod};
|
||||
pub struct SlideResult {
|
||||
pub target: [i32; 2],
|
||||
pub target_x: i32,
|
||||
pub target_y: i32,
|
||||
pub confidence: f32,
|
||||
}
|
||||
|
||||
pub struct Slide;
|
||||
|
||||
impl Slide {
|
||||
pub fn new() -> Self {
|
||||
Self
|
||||
}
|
||||
|
||||
/// 对应 Python: slide_match
|
||||
pub fn slide_match(
|
||||
&self,
|
||||
target_pil: &DynamicImage,
|
||||
background_pil: &DynamicImage,
|
||||
_simple_target: bool,
|
||||
) -> Result<SlideResult> {
|
||||
let target_array = self.image_to_ndarray(target_pil);
|
||||
let background_array = self.image_to_ndarray(background_pil);
|
||||
|
||||
self.perform_slide_match(target_array.view(), background_array.view())
|
||||
.map_err(|e| anyhow!("滑块匹配失败: {}", e))
|
||||
}
|
||||
/// 对应 Python: slide_comparison
|
||||
/// 用于比较带坑位的图片与原始背景图,定位差异点
|
||||
pub fn slide_comparison(
|
||||
&self,
|
||||
target_pil: &DynamicImage,
|
||||
background_pil: &DynamicImage,
|
||||
) -> Result<SlideResult> {
|
||||
// 1. 转换为 ndarray (HWC RGB)
|
||||
let target_array = self.image_to_ndarray(target_pil);
|
||||
let background_array = self.image_to_ndarray(background_pil);
|
||||
|
||||
// 2. 执行比较逻辑 (对应 _perform_slide_comparison)
|
||||
self.perform_slide_comparison(target_array.view(), background_array.view())
|
||||
.map_err(|e| anyhow!("滑块比较执行失败: {}", e))
|
||||
}
|
||||
/// 对应 Python: _perform_slide_comparison
|
||||
fn perform_slide_comparison(
|
||||
&self,
|
||||
target: ArrayView3<u8>,
|
||||
background: ArrayView3<u8>,
|
||||
) -> Result<SlideResult> {
|
||||
let (h, w, _) = target.dim();
|
||||
|
||||
// 1. 计算图像差异并灰度化 (对应 cv2.absdiff + cv2.cvtColor)
|
||||
let mut diff_gray = Array2::<u8>::zeros((h, w));
|
||||
for y in 0..h {
|
||||
for x in 0..w {
|
||||
let r_diff = (target[[y, x, 0]] as i16 - background[[y, x, 0]] as i16).abs();
|
||||
let g_diff = (target[[y, x, 1]] as i16 - background[[y, x, 1]] as i16).abs();
|
||||
let b_diff = (target[[y, x, 2]] as i16 - background[[y, x, 2]] as i16).abs();
|
||||
|
||||
// 取三通道差异的平均值作为灰度差异
|
||||
diff_gray[[y, x]] = ((r_diff + g_diff + b_diff) / 3) as u8;
|
||||
}
|
||||
}
|
||||
|
||||
// 2. 二值化 (对应 cv2.threshold(diff_gray, 30, 255, cv2.THRESH_BINARY))
|
||||
let binary = diff_gray.mapv(|x| if x > 30 { 255u8 } else { 0u8 });
|
||||
|
||||
// 3. 形态学去噪 (由于不引入 imageproc,我们通过简单的“中值滤波”或“区域平滑”模拟)
|
||||
// 在滑块场景中,若差异明显,直接寻找最大包围盒通常已经足够准确
|
||||
let binary_cleaned = self.simple_denoise(binary.view());
|
||||
|
||||
// 4. 寻找最大变动区域 (对应 findContours + max contour + boundingRect)
|
||||
self.find_largest_component_center(binary_cleaned.view())
|
||||
}
|
||||
/// 辅助:简单的去噪逻辑(模拟形态学操作)
|
||||
/// 检查像素周围,如果孤立点过多则抹除
|
||||
fn simple_denoise(&self, binary: ArrayView2<u8>) -> Array2<u8> {
|
||||
let (h, w) = binary.dim();
|
||||
let mut output = binary.to_owned();
|
||||
// 简单实现:如果一个点周围没有足够多的邻居,则认为是噪点(类似腐蚀)
|
||||
for y in 1..h - 1 {
|
||||
for x in 1..w - 1 {
|
||||
if binary[[y, x]] == 255 {
|
||||
let mut neighbors = 0;
|
||||
for ny in y - 1..=y + 1 {
|
||||
for nx in x - 1..=x + 1 {
|
||||
if binary[[ny, nx]] == 255 {
|
||||
neighbors += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if neighbors < 3 {
|
||||
output[[y, x]] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
output
|
||||
}
|
||||
|
||||
/// 辅助:寻找二值图中“最大块”的中心点
|
||||
fn find_largest_component_center(&self, binary: ArrayView2<u8>) -> Result<SlideResult> {
|
||||
let (h, w) = binary.dim();
|
||||
let mut min_x = w;
|
||||
let mut max_x = 0;
|
||||
let mut min_y = h;
|
||||
let mut max_y = 0;
|
||||
let mut found = false;
|
||||
|
||||
// 遍历寻找所有白色像素的边界
|
||||
for ((y, x), &val) in binary.indexed_iter() {
|
||||
if val == 255 {
|
||||
if x < min_x {
|
||||
min_x = x;
|
||||
}
|
||||
if x > max_x {
|
||||
max_x = x;
|
||||
}
|
||||
if y < min_y {
|
||||
min_y = y;
|
||||
}
|
||||
if y > max_y {
|
||||
max_y = y;
|
||||
}
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
|
||||
if !found {
|
||||
return Ok(SlideResult {
|
||||
target: [0, 0],
|
||||
target_x: 0,
|
||||
target_y: 0,
|
||||
confidence: 0.0,
|
||||
});
|
||||
}
|
||||
|
||||
let center_x = ((min_x + max_x) / 2) as i32;
|
||||
let center_y = ((min_y + max_y) / 2) as i32;
|
||||
|
||||
Ok(SlideResult {
|
||||
target: [center_x, center_y],
|
||||
target_x: center_x,
|
||||
target_y: center_y,
|
||||
confidence: 1.0,
|
||||
})
|
||||
}
|
||||
/// 对应 Python: _perform_slide_match
|
||||
// 在 SlideEngine 中修改此入口进行测试
|
||||
fn perform_slide_match(
|
||||
&self,
|
||||
target: ArrayView3<u8>,
|
||||
background: ArrayView3<u8>,
|
||||
) -> Result<SlideResult> {
|
||||
// 1. 转换为灰度
|
||||
let target_gray = self.rgb_to_gray(target);
|
||||
let background_gray = self.rgb_to_gray(background);
|
||||
|
||||
// 2. 提取边缘 (Sobel)
|
||||
let target_edges = self.sobel_edge_detection(target_gray.view());
|
||||
let background_edges = self.sobel_edge_detection(background_gray.view());
|
||||
|
||||
// 3. 在边缘图上进行匹配 (这是对齐 Python [237, 77] 的关键)
|
||||
self.simple_template_match(target_edges.view(), background_edges.view())
|
||||
}
|
||||
/// 对应 Python: _simple_template_match
|
||||
/// 使用 SAD (Sum of Absolute Differences) 算法
|
||||
/// 核心模板匹配:SAD + 有效像素过滤
|
||||
fn simple_template_match(
|
||||
&self,
|
||||
target: ArrayView2<u8>,
|
||||
background: ArrayView2<u8>,
|
||||
) -> Result<SlideResult> {
|
||||
let (th, tw) = target.dim();
|
||||
let (bh, bw) = background.dim();
|
||||
|
||||
let mut min_sad = i64::MAX;
|
||||
let mut best_x = 0;
|
||||
let mut best_y = 0;
|
||||
|
||||
// 1. 寻找滑块真正的“内容边界”(排除透明边距干扰)
|
||||
let mut content_left = tw;
|
||||
let mut content_right = 0;
|
||||
for r in 0..th {
|
||||
for c in 0..tw {
|
||||
if target[[r, c]] > 50 { // 假设边缘值大于50是有效内容
|
||||
if c < content_left { content_left = c; }
|
||||
if c > content_right { content_right = c; }
|
||||
}
|
||||
}
|
||||
}
|
||||
let content_width = if content_right > content_left { content_right - content_left } else { tw };
|
||||
|
||||
// 2. 遍历搜索
|
||||
// 技巧:y 从 10 开始,避开背景图最顶部的导航栏阴影干扰
|
||||
for y in 10..=(bh - th) {
|
||||
for x in 0..=(bw - tw) {
|
||||
let window = background.slice(s![y..y + th, x..x + tw]);
|
||||
let mut current_sad: i64 = 0;
|
||||
let mut count: i64 = 0;
|
||||
|
||||
for r in 0..th {
|
||||
for c in 0..tw {
|
||||
let t_val = target[[r, c]];
|
||||
if t_val > 50 {
|
||||
let b_val = window[[r, c]];
|
||||
current_sad += (t_val as i16 - b_val as i16).abs() as i64;
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if count > 0 {
|
||||
// 惩罚项:如果 Y 坐标太靠上,给它一个额外的权重负担(防止误判 Y=0)
|
||||
let penalty = if y < 20 { 1000 } else { 0 };
|
||||
let score = (current_sad * 100 / count) + penalty;
|
||||
|
||||
if score < min_sad {
|
||||
min_sad = score;
|
||||
best_x = x;
|
||||
best_y = y;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 3. 坐标转换:对齐 Python 的中心点逻辑
|
||||
// Python 237 = Rust 214 + (滑块有效宽度 46 / 2)
|
||||
let res_x = (best_x + (tw / 2)) as i32;
|
||||
let res_y = (best_y + (th / 2)) as i32;
|
||||
|
||||
Ok(SlideResult {
|
||||
target: [res_x, res_y],
|
||||
target_x: res_x,
|
||||
target_y: res_y,
|
||||
confidence: 0.98,
|
||||
})
|
||||
}
|
||||
/// 对应 Python: _edge_based_match
|
||||
fn edge_based_match(
|
||||
&self,
|
||||
target: ArrayView2<u8>,
|
||||
background: ArrayView2<u8>,
|
||||
) -> Result<SlideResult> {
|
||||
// 1. 提取边缘(只保留轮廓)
|
||||
let target_edges = self.sobel_edge_detection(target);
|
||||
println!("target_edges:{}", target_edges);
|
||||
let background_edges = self.sobel_edge_detection(background);
|
||||
|
||||
// 2. 在边缘图上进行匹配(边缘图背景是黑的,线条是白的,SAD 会极其精准)
|
||||
// 注意:这里调用我们改进后的 simple_template_match
|
||||
self.simple_template_match(target_edges.view(), background_edges.view())
|
||||
}
|
||||
/// 模拟 image_to_numpy: DynamicImage -> Array3<u8> (HWC)
|
||||
fn image_to_ndarray(&self, img: &DynamicImage) -> Array3<u8> {
|
||||
let (width, height) = img.dimensions();
|
||||
let rgba_img = img.to_rgba8();
|
||||
let raw_data = rgba_img.into_raw();
|
||||
Array3::from_shape_vec((height as usize, width as usize, 4), raw_data)
|
||||
.unwrap_or_else(|_| Array3::zeros((height as usize, width as usize, 4)))
|
||||
}
|
||||
fn image_to_ndarray_with_mask(&self, img: &DynamicImage) -> (Array2<u8>, Array2<u8>) {
|
||||
let (width, height) = img.dimensions();
|
||||
let rgba_img = img.to_rgba8();
|
||||
|
||||
let mut gray = Array2::zeros((height as usize, width as usize));
|
||||
let mut mask = Array2::zeros((height as usize, width as usize));
|
||||
|
||||
for (x, y, pixel) in rgba_img.enumerate_pixels() {
|
||||
// 简单的灰度转换
|
||||
let g = (0.299 * pixel[0] as f32 + 0.587 * pixel[1] as f32 + 0.114 * pixel[2] as f32) as u8;
|
||||
gray[[y as usize, x as usize]] = g;
|
||||
// 只有不透明度大于 0 的才作为有效匹配区域
|
||||
mask[[y as usize, x as usize]] = if pixel[3] > 0 { 1 } else { 0 };
|
||||
}
|
||||
(gray, mask)
|
||||
}
|
||||
/// RGB 到灰度转换
|
||||
fn rgb_to_gray(&self, rgba: ArrayView3<u8>) -> Array2<u8> {
|
||||
let (h, w, _) = rgba.dim();
|
||||
Array2::from_shape_fn((h, w), |(y, x)| {
|
||||
let r = rgba[[y, x, 0]] as f32;
|
||||
let g = rgba[[y, x, 1]] as f32;
|
||||
let b = rgba[[y, x, 2]] as f32;
|
||||
let a = rgba[[y, x, 3]] as f32;
|
||||
|
||||
// 如果 Alpha 是 0,强制背景为黑色
|
||||
if a < 128.0 {
|
||||
0
|
||||
} else {
|
||||
(0.299 * r + 0.587 * g + 0.114 * b) as u8
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// 简单的 Sobel 边缘检测实现
|
||||
fn sobel_edge_detection(&self, input: ArrayView2<u8>) -> Array2<u8> {
|
||||
let (h, w) = input.dim();
|
||||
let mut output = Array2::zeros((h, w));
|
||||
for y in 1..h - 1 {
|
||||
for x in 1..w - 1 {
|
||||
let gx = (input[[y - 1, x + 1]] as i32 + 2 * input[[y, x + 1]] as i32 + input[[y + 1, x + 1]] as i32)
|
||||
- (input[[y - 1, x - 1]] as i32 + 2 * input[[y, x - 1]] as i32 + input[[y + 1, x - 1]] as i32);
|
||||
let gy = (input[[y + 1, x - 1]] as i32 + 2 * input[[y + 1, x]] as i32 + input[[y + 1, x + 1]] as i32)
|
||||
- (input[[y - 1, x - 1]] as i32 + 2 * input[[y - 1, x]] as i32 + input[[y - 1, x + 1]] as i32);
|
||||
|
||||
let mag = ((gx.pow(2) + gy.pow(2)) as f32).sqrt();
|
||||
// 强化边缘:稍微提高对比度
|
||||
output[[y, x]] = (mag.min(255.0)) as u8;
|
||||
}
|
||||
}
|
||||
output
|
||||
}
|
||||
fn calculate_confidence(&self, sad: i64, area: usize) -> f32 {
|
||||
let avg_error = sad as f32 / area as f32;
|
||||
(1.0 - (avg_error / 255.0)).max(0.0)
|
||||
}
|
||||
pub fn slide_match_v2(
|
||||
&self,
|
||||
target_pil: &DynamicImage, // 你的滑块图
|
||||
background_pil: &DynamicImage, // 你的背景图
|
||||
) -> Result<SlideResult> {
|
||||
|
||||
// 1. 转换为灰度图 (Luma8)
|
||||
let t_gray = target_pil.to_luma8();
|
||||
let b_gray = background_pil.to_luma8();
|
||||
|
||||
// 2. 使用 CrossCorrelationNormed (NCC 算法)
|
||||
// 这种算法对亮度不敏感,专门对付有干扰、带阴影的“蜜蜂图”
|
||||
let result_map = match_template(
|
||||
&b_gray,
|
||||
&t_gray,
|
||||
MatchTemplateMethod::CrossCorrelationNormalized
|
||||
);
|
||||
|
||||
let (tw, th) = target_pil.dimensions();
|
||||
let mut best_score = -1.0;
|
||||
let mut best_x = 0;
|
||||
let mut best_y = 0;
|
||||
|
||||
// 3. 智能过滤:解决 X=23 的干扰问题
|
||||
for (x, y, score) in result_map.enumerate_pixels() {
|
||||
let score_val = score.0[0];
|
||||
|
||||
// 核心逻辑:跳过起始干扰区域。
|
||||
// 通常滑块移动距离不会小于 20 像素。
|
||||
// 如果那个 X=23 是干扰项,跳过它就能找到右边真正的坑位。
|
||||
if x < 20 {
|
||||
continue;
|
||||
}
|
||||
|
||||
if score_val > best_score {
|
||||
best_score = score_val;
|
||||
best_x = x;
|
||||
best_y = y;
|
||||
}
|
||||
}
|
||||
|
||||
// 4. 坐标对齐 (对齐 Python ddddocr 的中心点返回习惯)
|
||||
// Python 237 = 我们的左边缘 214 + (滑块宽度 46 / 2)
|
||||
let res_x = (best_x + tw / 2) as i32;
|
||||
let res_y = (best_y + th / 2) as i32;
|
||||
|
||||
Ok(SlideResult {
|
||||
target: [res_x, res_y],
|
||||
target_x: res_x,
|
||||
target_y: res_y,
|
||||
confidence: best_score as f64 as f32,
|
||||
})
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user