From e8b365dced2b6c6dd1dd7ee5355fa758d8c5a718 Mon Sep 17 00:00:00 2001
From: CNWei <t6g888@163.com>
Date: Fri, 8 May 2026 22:35:17 +0800
Subject: [PATCH] =?UTF-8?q?feat:=20=E4=BC=98=E5=8C=96=20image=5Fio.rs=20?=
 =?UTF-8?q?=E6=A8=A1=E5=9D=97?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 新增 base64_to_image等工具函数。
---
 README.md         |   3 +-
 src/image_io.rs   | 134 +++++++++++++++++++++++++++++++++++++++-------
 tests/ocr_test.rs |   2 +-
 3 files changed, 119 insertions(+), 20 deletions(-)
diff --git a/README.md b/README.md
index b7e7b25..9ba3654 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,8 @@ CCOEFF (OpenCV 默认)：会自动进行“均值中心化”，在一定程度
 
 💡 开发者建议：
 
-如果识别结果在 $X$ 轴上有大约 $10px$ 左右的固定误差，通常是因为滑块原图自带了透明边距（留白）。此时请确保 simple_target=false。该模式会通过 Canny 边缘检测 提取轮廓特征，能自动锁定拼图实体并忽略背景留白的像素干扰。
+如果识别结果在 $X$ 轴上有大约 $10px$ 左右的固定误差，通常是因为滑块原图自带了透明边距（留白）。此时请确保
+simple_target=false。该模式会通过 Canny 边缘检测 提取轮廓特征，能自动锁定拼图实体并忽略背景留白的像素干扰。
 鸣谢 (Credits)
 
 - 本项目是 [ddddocr](https://github.com/sml2h3/ddddocr) 的 Rust 移植版本，原作者为 sml2h3。衷心感谢原作者对 OCR 社区做出的杰出贡献。
diff --git a/src/image_io.rs b/src/image_io.rs
index 1373286..23d6017 100644
--- a/src/image_io.rs
+++ b/src/image_io.rs
@@ -12,16 +12,6 @@ pub enum ImageInput {
     Base64(String),
     DynamicImage(DynamicImage),
 }
-
-impl From<&str> for ImageInput {
-    fn from(s: &str) -> Self {
-        if Path::new(s).exists() {
-            ImageInput::Path(s.into())
-        } else {
-            ImageInput::Base64(s.to_string())
-        }
-    }
-}
 /// 模拟 Python 的 load_image_from_input
 #[allow(dead_code)]
 pub fn load_image_from_input(img_input: ImageInput) -> Result<DynamicImage> {
@@ -31,22 +21,22 @@ pub fn load_image_from_input(img_input: ImageInput) -> Result<DynamicImage> {
             image::load_from_memory(&bytes).context("Failed to load image from bytes")
         }
         // 1. 已经是 DynamicImage
-        ImageInput::DynamicImage(img) => Ok(img),
+        ImageInput::DynamicImage(i) => Ok(i),
         // 5. 处理 ndarray (Numpy-like)
         // 假设输入是 HWC 格式的 Array3<u8>
-        ImageInput::Array(arr) => numpy_to_pil_image(arr.view()),
+        ImageInput::Array(a) => numpy_to_pil_image(a.view()),
         // 4. 处理 Base64 字符串
-        ImageInput::Base64(b64_str) => base64_to_image(&b64_str),
+        ImageInput::Base64(b) => base64_to_image(&b),
         // 3. 处理文件路径 (Path)
-        ImageInput::Path(path) => image::open(path).context("Failed to open image from path"),
+        ImageInput::Path(p) => image::open(p).context("Failed to open image from path"),
     }
 }
-fn base64_to_image(img_base64: &str) -> Result<DynamicImage> {
+fn base64_to_image(b64_str: &str) -> Result<DynamicImage> {
     // 过滤掉可能存在的 base64 前缀，例如 "data:image/png;base64,"
-    let clean_b64 = if let Some(pos) = img_base64.find(",") {
-        &img_base64[pos + 1..]
+    let clean_b64 = if let Some(pos) = b64_str.find(",") {
+        &b64_str[pos + 1..]
     } else {
-        &img_base64
+        &b64_str
     };
 
     let bytes = general_purpose::STANDARD
@@ -55,6 +45,56 @@ fn base64_to_image(img_base64: &str) -> Result<DynamicImage> {
 
     image::load_from_memory(&bytes).context("Failed to load image from decoded base64")
 }
+
+/// 读取图片文件并转换为 base64 编码字符串
+/// 对应 Python 版 get_img_base64
+pub fn get_img_base64<P: AsRef<Path>>(image_path: P) -> Result<String> {
+    // 1. 读取文件原始字节流
+    // 使用 AsRef<Path> 泛型可以让函数同时支持 String, &str, PathBuf 等类型
+    let image_data = fs::read(&image_path)
+        .with_context(|| format!("Failed to read image file: {:?}", image_path.as_ref()))?;
+
+    // 2. 进行 Base64 编码
+    // 使用 STANDARD 引擎对齐 Python 的 base64.b64encode
+    let b64_string = general_purpose::STANDARD.encode(image_data);
+
+    Ok(b64_string)
+}
+/// 处理 PNG 图像的 RGBA 透明背景，将透明部分设置为白色
+///
+/// 对应 Python 版 png_rgba_black_preprocess
+pub fn png_rgba_black_preprocess(img: &DynamicImage) -> Result<DynamicImage> {
+    // 1. 获取原图尺寸
+    let (width, height) = (img.width(), img.height());
+
+    // 2. 创建一个等尺寸的纯白色 RGB 图像作为底色
+    // ImageBuffer::<Rgb<u8>, Vec<u8>>
+    let mut white_bg = ImageBuffer::from_fn(width, height, |_, _| {
+        Rgb([255, 255, 255])
+    });
+
+    // 3. 将原图复合到底色上
+    // 我们需要处理原图，将其转为 RGBA 确保有 alpha 通道可以参考
+    let rgba_img = img.to_rgba8();
+
+    // 遍历每一个像素进行复合（模拟 Python 的 paste 逻辑）
+    for (x, y, pixel) in rgba_img.enumerate_pixels() {
+        let alpha = pixel[3] as f32 / 255.0;
+        if alpha > 0.0 {
+            // 获取底色像素（白色）
+            let bg_pixel = white_bg.get_pixel_mut(x, y);
+
+            // 简单的 Alpha 复合公式：输出 = 源 * alpha + 背景 * (1 - alpha)
+            for i in 0..3 {
+                let fg = pixel[i] as f32;
+                let bg = bg_pixel[i] as f32;
+                bg_pixel[i] = (fg * alpha + bg * (1.0 - alpha)) as u8;
+            }
+        }
+    }
+
+    Ok(DynamicImage::ImageRgb8(white_bg))
+}
 /// 封装数组转图像的逻辑，对齐 Python 版 _numpy_to_pil_image
 fn numpy_to_pil_image(array: ArrayViewD<u8>) -> Result<DynamicImage> {
     let shape = array.shape();
@@ -139,7 +179,65 @@ pub fn png_rgba_white_preprocess(img: &DynamicImage) -> DynamicImage {
 
     DynamicImage::ImageRgb8(background)
 }
+pub fn image_to_numpy(image: &DynamicImage, target_mode: &str) -> Result<Array3<u8>> {
+    // 1. 模式转换 (对应 image.convert(target_mode))
+    // Rust image 库通过 to_rgb8, to_luma8 等方法实现转换
+    let (width, height) = image.dimensions();
 
+    match target_mode {
+        "RGB" => {
+            let rgb_img = image.to_rgb8();
+            let raw = rgb_img.into_raw();
+            // shape 为 [Height, Width, Channels] -> [H, W, 3]
+            Array3::from_shape_vec((height as usize, width as usize, 3), raw)
+                .map_err(|e| anyhow!("Failed to build ndarray: {}", e))
+        },
+        "L" | "GRAY" => {
+            let gray_img = image.to_luma8();
+            let raw = gray_img.into_raw();
+            // shape 为 [H, W, 1]
+            Array3::from_shape_vec((height as usize, width as usize, 1), raw)
+                .map_err(|e| anyhow!("Failed to build ndarray: {}", e))
+        },
+        "RGBA" => {
+            let rgba_img = image.to_rgba8();
+            let raw = rgba_img.into_raw();
+            // shape 为 [H, W, 4]
+            Array3::from_shape_vec((height as usize, width as usize, 4), raw)
+                .map_err(|e| anyhow!("Failed to build ndarray: {}", e))
+        },
+        _ => Err(anyhow!("Unsupported target_mode: {}", target_mode)),
+    }
+}
+
+pub fn numpy_to_image(array: ArrayViewD<u8>, mode: &str) -> Result<DynamicImage> {
+    let shape = array.shape();
+    // 确保数据连续性 (C-order)
+    let standard = array.as_standard_layout();
+    let (raw_data, _) = standard.to_owned().into_raw_vec_and_offset();
+
+    let height = shape[0] as u32;
+    let width = shape[1] as u32;
+
+    match mode {
+        "L" => {
+            ImageBuffer::<Luma<u8>, _>::from_raw(width, height, raw_data)
+                .map(DynamicImage::ImageLuma8)
+                .ok_or_else(|| anyhow!("Failed to create Luma image"))
+        },
+        "RGB" => {
+            ImageBuffer::<Rgb<u8>, _>::from_raw(width, height, raw_data)
+                .map(DynamicImage::ImageRgb8)
+                .ok_or_else(|| anyhow!("Failed to create RGB image"))
+        },
+        "RGBA" => {
+            ImageBuffer::<Rgba<u8>, _>::from_raw(width, height, raw_data)
+                .map(DynamicImage::ImageRgba8)
+                .ok_or_else(|| anyhow!("Failed to create RGBA image"))
+        },
+        _ => Err(anyhow!("Unsupported mode: {}", mode)),
+    }
+}
 pub fn image_to_ndarray(img: &DynamicImage) -> Array3<u8> {
     let (width, height) = img.dimensions();
 
diff --git a/tests/ocr_test.rs b/tests/ocr_test.rs
index a915f22..9ca0b44 100644
--- a/tests/ocr_test.rs
+++ b/tests/ocr_test.rs
@@ -102,7 +102,7 @@ fn test_real_slide_match() {
     // 2. 执行匹配
     // 如果是那种带有明显阴影边缘的复杂滑块，建议 simple_target 传 false
     let start = std::time::Instant::now();
-    let result = engine.slide_match(&target_img, &bg_img, false)
+    let result = engine.slide_match(&target_img, &bg_img, true)
         .expect("Slide match 执行失败");
     let duration = start.elapsed();