feat: 优化 image_io.rs 模块

- 新增 base64_to_image等工具函数。
2026-05-08 22:35:17 +08:00
parent f0db625bd1
commit e8b365dced
3 changed files with 119 additions and 20 deletions
--- a/README.md
+++ b/README.md
@@ -37,7 +37,8 @@ CCOEFF (OpenCV 默认)：会自动进行“均值中心化”，在一定程度

 💡 开发者建议：

-如果识别结果在 $X$ 轴上有大约 $10px$ 左右的固定误差，通常是因为滑块原图自带了透明边距（留白）。此时请确保 simple_target=false。该模式会通过 Canny 边缘检测 提取轮廓特征，能自动锁定拼图实体并忽略背景留白的像素干扰。
+如果识别结果在 $X$ 轴上有大约 $10px$ 左右的固定误差，通常是因为滑块原图自带了透明边距（留白）。此时请确保
+simple_target=false。该模式会通过 Canny 边缘检测 提取轮廓特征，能自动锁定拼图实体并忽略背景留白的像素干扰。
 鸣谢 (Credits)

 - 本项目是 [ddddocr](https://github.com/sml2h3/ddddocr) 的 Rust 移植版本，原作者为 sml2h3。衷心感谢原作者对 OCR 社区做出的杰出贡献。
--- a/src/image_io.rs
+++ b/src/image_io.rs
@@ -12,16 +12,6 @@ pub enum ImageInput {
    Base64(String),
    DynamicImage(DynamicImage),
 }
-
-impl From<&str> for ImageInput {
-    fn from(s: &str) -> Self {
-        if Path::new(s).exists() {
-            ImageInput::Path(s.into())
-        } else {
-            ImageInput::Base64(s.to_string())
-        }
-    }
-}
 /// 模拟 Python 的 load_image_from_input
 #[allow(dead_code)]
 pub fn load_image_from_input(img_input: ImageInput) -> Result<DynamicImage> {
@@ -31,22 +21,22 @@ pub fn load_image_from_input(img_input: ImageInput) -> Result<DynamicImage> {
            image::load_from_memory(&bytes).context("Failed to load image from bytes")
        }
        // 1. 已经是 DynamicImage
-        ImageInput::DynamicImage(img) => Ok(img),
+        ImageInput::DynamicImage(i) => Ok(i),
        // 5. 处理 ndarray (Numpy-like)
        // 假设输入是 HWC 格式的 Array3<u8>
-        ImageInput::Array(arr) => numpy_to_pil_image(arr.view()),
+        ImageInput::Array(a) => numpy_to_pil_image(a.view()),
        // 4. 处理 Base64 字符串
-        ImageInput::Base64(b64_str) => base64_to_image(&b64_str),
+        ImageInput::Base64(b) => base64_to_image(&b),
        // 3. 处理文件路径 (Path)
-        ImageInput::Path(path) => image::open(path).context("Failed to open image from path"),
+        ImageInput::Path(p) => image::open(p).context("Failed to open image from path"),
    }
 }
-fn base64_to_image(img_base64: &str) -> Result<DynamicImage> {
+fn base64_to_image(b64_str: &str) -> Result<DynamicImage> {
    // 过滤掉可能存在的 base64 前缀，例如 "data:image/png;base64,"
-    let clean_b64 = if let Some(pos) = img_base64.find(",") {
-        &img_base64[pos + 1..]
+    let clean_b64 = if let Some(pos) = b64_str.find(",") {
+        &b64_str[pos + 1..]
    } else {
-        &img_base64
+        &b64_str
    };

    let bytes = general_purpose::STANDARD
@@ -55,6 +45,56 @@ fn base64_to_image(img_base64: &str) -> Result<DynamicImage> {

    image::load_from_memory(&bytes).context("Failed to load image from decoded base64")
 }
+
+/// 读取图片文件并转换为 base64 编码字符串
+/// 对应 Python 版 get_img_base64
+pub fn get_img_base64<P: AsRef<Path>>(image_path: P) -> Result<String> {
+    // 1. 读取文件原始字节流
+    // 使用 AsRef<Path> 泛型可以让函数同时支持 String, &str, PathBuf 等类型
+    let image_data = fs::read(&image_path)
+        .with_context(|| format!("Failed to read image file: {:?}", image_path.as_ref()))?;
+
+    // 2. 进行 Base64 编码
+    // 使用 STANDARD 引擎对齐 Python 的 base64.b64encode
+    let b64_string = general_purpose::STANDARD.encode(image_data);
+
+    Ok(b64_string)
+}
+/// 处理 PNG 图像的 RGBA 透明背景，将透明部分设置为白色
+///
+/// 对应 Python 版 png_rgba_black_preprocess
+pub fn png_rgba_black_preprocess(img: &DynamicImage) -> Result<DynamicImage> {
+    // 1. 获取原图尺寸
+    let (width, height) = (img.width(), img.height());
+
+    // 2. 创建一个等尺寸的纯白色 RGB 图像作为底色
+    // ImageBuffer::<Rgb<u8>, Vec<u8>>
+    let mut white_bg = ImageBuffer::from_fn(width, height, |_, _| {
+        Rgb([255, 255, 255])
+    });
+
+    // 3. 将原图复合到底色上
+    // 我们需要处理原图，将其转为 RGBA 确保有 alpha 通道可以参考
+    let rgba_img = img.to_rgba8();
+
+    // 遍历每一个像素进行复合（模拟 Python 的 paste 逻辑）
+    for (x, y, pixel) in rgba_img.enumerate_pixels() {
+        let alpha = pixel[3] as f32 / 255.0;
+        if alpha > 0.0 {
+            // 获取底色像素（白色）
+            let bg_pixel = white_bg.get_pixel_mut(x, y);
+
+            // 简单的 Alpha 复合公式：输出 = 源 * alpha + 背景 * (1 - alpha)
+            for i in 0..3 {
+                let fg = pixel[i] as f32;
+                let bg = bg_pixel[i] as f32;
+                bg_pixel[i] = (fg * alpha + bg * (1.0 - alpha)) as u8;
+            }
+        }
+    }
+
+    Ok(DynamicImage::ImageRgb8(white_bg))
+}
 /// 封装数组转图像的逻辑，对齐 Python 版 _numpy_to_pil_image
 fn numpy_to_pil_image(array: ArrayViewD<u8>) -> Result<DynamicImage> {
    let shape = array.shape();
@@ -139,7 +179,65 @@ pub fn png_rgba_white_preprocess(img: &DynamicImage) -> DynamicImage {

    DynamicImage::ImageRgb8(background)
 }
+pub fn image_to_numpy(image: &DynamicImage, target_mode: &str) -> Result<Array3<u8>> {
+    // 1. 模式转换 (对应 image.convert(target_mode))
+    // Rust image 库通过 to_rgb8, to_luma8 等方法实现转换
+    let (width, height) = image.dimensions();

+    match target_mode {
+        "RGB" => {
+            let rgb_img = image.to_rgb8();
+            let raw = rgb_img.into_raw();
+            // shape 为 [Height, Width, Channels] -> [H, W, 3]
+            Array3::from_shape_vec((height as usize, width as usize, 3), raw)
+                .map_err(|e| anyhow!("Failed to build ndarray: {}", e))
+        },
+        "L" | "GRAY" => {
+            let gray_img = image.to_luma8();
+            let raw = gray_img.into_raw();
+            // shape 为 [H, W, 1]
+            Array3::from_shape_vec((height as usize, width as usize, 1), raw)
+                .map_err(|e| anyhow!("Failed to build ndarray: {}", e))
+        },
+        "RGBA" => {
+            let rgba_img = image.to_rgba8();
+            let raw = rgba_img.into_raw();
+            // shape 为 [H, W, 4]
+            Array3::from_shape_vec((height as usize, width as usize, 4), raw)
+                .map_err(|e| anyhow!("Failed to build ndarray: {}", e))
+        },
+        _ => Err(anyhow!("Unsupported target_mode: {}", target_mode)),
+    }
+}
+
+pub fn numpy_to_image(array: ArrayViewD<u8>, mode: &str) -> Result<DynamicImage> {
+    let shape = array.shape();
+    // 确保数据连续性 (C-order)
+    let standard = array.as_standard_layout();
+    let (raw_data, _) = standard.to_owned().into_raw_vec_and_offset();
+
+    let height = shape[0] as u32;
+    let width = shape[1] as u32;
+
+    match mode {
+        "L" => {
+            ImageBuffer::<Luma<u8>, _>::from_raw(width, height, raw_data)
+                .map(DynamicImage::ImageLuma8)
+                .ok_or_else(|| anyhow!("Failed to create Luma image"))
+        },
+        "RGB" => {
+            ImageBuffer::<Rgb<u8>, _>::from_raw(width, height, raw_data)
+                .map(DynamicImage::ImageRgb8)
+                .ok_or_else(|| anyhow!("Failed to create RGB image"))
+        },
+        "RGBA" => {
+            ImageBuffer::<Rgba<u8>, _>::from_raw(width, height, raw_data)
+                .map(DynamicImage::ImageRgba8)
+                .ok_or_else(|| anyhow!("Failed to create RGBA image"))
+        },
+        _ => Err(anyhow!("Unsupported mode: {}", mode)),
+    }
+}
 pub fn image_to_ndarray(img: &DynamicImage) -> Array3<u8> {
    let (width, height) = img.dimensions();

--- a/tests/ocr_test.rs
+++ b/tests/ocr_test.rs
@@ -102,7 +102,7 @@ fn test_real_slide_match() {
    // 2. 执行匹配
    // 如果是那种带有明显阴影边缘的复杂滑块，建议 simple_target 传 false
    let start = std::time::Instant::now();
-    let result = engine.slide_match(&target_img, &bg_img, false)
+    let result = engine.slide_match(&target_img, &bg_img, true)
        .expect("Slide match 执行失败");
    let duration = start.elapsed();