From e8b365dced2b6c6dd1dd7ee5355fa758d8c5a718 Mon Sep 17 00:00:00 2001 From: CNWei Date: Fri, 8 May 2026 22:35:17 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E4=BC=98=E5=8C=96=20image=5Fio.rs=20?= =?UTF-8?q?=E6=A8=A1=E5=9D=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增 base64_to_image等工具函数。 --- README.md | 3 +- src/image_io.rs | 134 +++++++++++++++++++++++++++++++++++++++------- tests/ocr_test.rs | 2 +- 3 files changed, 119 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index b7e7b25..9ba3654 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,8 @@ CCOEFF (OpenCV 默认):会自动进行“均值中心化”,在一定程度 💡 开发者建议: -如果识别结果在 $X$ 轴上有大约 $10px$ 左右的固定误差,通常是因为滑块原图自带了透明边距(留白)。此时请确保 simple_target=false。该模式会通过 Canny 边缘检测 提取轮廓特征,能自动锁定拼图实体并忽略背景留白的像素干扰。 +如果识别结果在 $X$ 轴上有大约 $10px$ 左右的固定误差,通常是因为滑块原图自带了透明边距(留白)。此时请确保 +simple_target=false。该模式会通过 Canny 边缘检测 提取轮廓特征,能自动锁定拼图实体并忽略背景留白的像素干扰。 鸣谢 (Credits) - 本项目是 [ddddocr](https://github.com/sml2h3/ddddocr) 的 Rust 移植版本,原作者为 sml2h3。衷心感谢原作者对 OCR 社区做出的杰出贡献。 diff --git a/src/image_io.rs b/src/image_io.rs index 1373286..23d6017 100644 --- a/src/image_io.rs +++ b/src/image_io.rs @@ -12,16 +12,6 @@ pub enum ImageInput { Base64(String), DynamicImage(DynamicImage), } - -impl From<&str> for ImageInput { - fn from(s: &str) -> Self { - if Path::new(s).exists() { - ImageInput::Path(s.into()) - } else { - ImageInput::Base64(s.to_string()) - } - } -} /// 模拟 Python 的 load_image_from_input #[allow(dead_code)] pub fn load_image_from_input(img_input: ImageInput) -> Result { @@ -31,22 +21,22 @@ pub fn load_image_from_input(img_input: ImageInput) -> Result { image::load_from_memory(&bytes).context("Failed to load image from bytes") } // 1. 已经是 DynamicImage - ImageInput::DynamicImage(img) => Ok(img), + ImageInput::DynamicImage(i) => Ok(i), // 5. 处理 ndarray (Numpy-like) // 假设输入是 HWC 格式的 Array3 - ImageInput::Array(arr) => numpy_to_pil_image(arr.view()), + ImageInput::Array(a) => numpy_to_pil_image(a.view()), // 4. 处理 Base64 字符串 - ImageInput::Base64(b64_str) => base64_to_image(&b64_str), + ImageInput::Base64(b) => base64_to_image(&b), // 3. 处理文件路径 (Path) - ImageInput::Path(path) => image::open(path).context("Failed to open image from path"), + ImageInput::Path(p) => image::open(p).context("Failed to open image from path"), } } -fn base64_to_image(img_base64: &str) -> Result { +fn base64_to_image(b64_str: &str) -> Result { // 过滤掉可能存在的 base64 前缀,例如 "data:image/png;base64," - let clean_b64 = if let Some(pos) = img_base64.find(",") { - &img_base64[pos + 1..] + let clean_b64 = if let Some(pos) = b64_str.find(",") { + &b64_str[pos + 1..] } else { - &img_base64 + &b64_str }; let bytes = general_purpose::STANDARD @@ -55,6 +45,56 @@ fn base64_to_image(img_base64: &str) -> Result { image::load_from_memory(&bytes).context("Failed to load image from decoded base64") } + +/// 读取图片文件并转换为 base64 编码字符串 +/// 对应 Python 版 get_img_base64 +pub fn get_img_base64>(image_path: P) -> Result { + // 1. 读取文件原始字节流 + // 使用 AsRef 泛型可以让函数同时支持 String, &str, PathBuf 等类型 + let image_data = fs::read(&image_path) + .with_context(|| format!("Failed to read image file: {:?}", image_path.as_ref()))?; + + // 2. 进行 Base64 编码 + // 使用 STANDARD 引擎对齐 Python 的 base64.b64encode + let b64_string = general_purpose::STANDARD.encode(image_data); + + Ok(b64_string) +} +/// 处理 PNG 图像的 RGBA 透明背景,将透明部分设置为白色 +/// +/// 对应 Python 版 png_rgba_black_preprocess +pub fn png_rgba_black_preprocess(img: &DynamicImage) -> Result { + // 1. 获取原图尺寸 + let (width, height) = (img.width(), img.height()); + + // 2. 创建一个等尺寸的纯白色 RGB 图像作为底色 + // ImageBuffer::, Vec> + let mut white_bg = ImageBuffer::from_fn(width, height, |_, _| { + Rgb([255, 255, 255]) + }); + + // 3. 将原图复合到底色上 + // 我们需要处理原图,将其转为 RGBA 确保有 alpha 通道可以参考 + let rgba_img = img.to_rgba8(); + + // 遍历每一个像素进行复合(模拟 Python 的 paste 逻辑) + for (x, y, pixel) in rgba_img.enumerate_pixels() { + let alpha = pixel[3] as f32 / 255.0; + if alpha > 0.0 { + // 获取底色像素(白色) + let bg_pixel = white_bg.get_pixel_mut(x, y); + + // 简单的 Alpha 复合公式:输出 = 源 * alpha + 背景 * (1 - alpha) + for i in 0..3 { + let fg = pixel[i] as f32; + let bg = bg_pixel[i] as f32; + bg_pixel[i] = (fg * alpha + bg * (1.0 - alpha)) as u8; + } + } + } + + Ok(DynamicImage::ImageRgb8(white_bg)) +} /// 封装数组转图像的逻辑,对齐 Python 版 _numpy_to_pil_image fn numpy_to_pil_image(array: ArrayViewD) -> Result { let shape = array.shape(); @@ -139,7 +179,65 @@ pub fn png_rgba_white_preprocess(img: &DynamicImage) -> DynamicImage { DynamicImage::ImageRgb8(background) } +pub fn image_to_numpy(image: &DynamicImage, target_mode: &str) -> Result> { + // 1. 模式转换 (对应 image.convert(target_mode)) + // Rust image 库通过 to_rgb8, to_luma8 等方法实现转换 + let (width, height) = image.dimensions(); + match target_mode { + "RGB" => { + let rgb_img = image.to_rgb8(); + let raw = rgb_img.into_raw(); + // shape 为 [Height, Width, Channels] -> [H, W, 3] + Array3::from_shape_vec((height as usize, width as usize, 3), raw) + .map_err(|e| anyhow!("Failed to build ndarray: {}", e)) + }, + "L" | "GRAY" => { + let gray_img = image.to_luma8(); + let raw = gray_img.into_raw(); + // shape 为 [H, W, 1] + Array3::from_shape_vec((height as usize, width as usize, 1), raw) + .map_err(|e| anyhow!("Failed to build ndarray: {}", e)) + }, + "RGBA" => { + let rgba_img = image.to_rgba8(); + let raw = rgba_img.into_raw(); + // shape 为 [H, W, 4] + Array3::from_shape_vec((height as usize, width as usize, 4), raw) + .map_err(|e| anyhow!("Failed to build ndarray: {}", e)) + }, + _ => Err(anyhow!("Unsupported target_mode: {}", target_mode)), + } +} + +pub fn numpy_to_image(array: ArrayViewD, mode: &str) -> Result { + let shape = array.shape(); + // 确保数据连续性 (C-order) + let standard = array.as_standard_layout(); + let (raw_data, _) = standard.to_owned().into_raw_vec_and_offset(); + + let height = shape[0] as u32; + let width = shape[1] as u32; + + match mode { + "L" => { + ImageBuffer::, _>::from_raw(width, height, raw_data) + .map(DynamicImage::ImageLuma8) + .ok_or_else(|| anyhow!("Failed to create Luma image")) + }, + "RGB" => { + ImageBuffer::, _>::from_raw(width, height, raw_data) + .map(DynamicImage::ImageRgb8) + .ok_or_else(|| anyhow!("Failed to create RGB image")) + }, + "RGBA" => { + ImageBuffer::, _>::from_raw(width, height, raw_data) + .map(DynamicImage::ImageRgba8) + .ok_or_else(|| anyhow!("Failed to create RGBA image")) + }, + _ => Err(anyhow!("Unsupported mode: {}", mode)), + } +} pub fn image_to_ndarray(img: &DynamicImage) -> Array3 { let (width, height) = img.dimensions(); diff --git a/tests/ocr_test.rs b/tests/ocr_test.rs index a915f22..9ca0b44 100644 --- a/tests/ocr_test.rs +++ b/tests/ocr_test.rs @@ -102,7 +102,7 @@ fn test_real_slide_match() { // 2. 执行匹配 // 如果是那种带有明显阴影边缘的复杂滑块,建议 simple_target 传 false let start = std::time::Instant::now(); - let result = engine.slide_match(&target_img, &bg_img, false) + let result = engine.slide_match(&target_img, &bg_img, true) .expect("Slide match 执行失败"); let duration = start.elapsed();