feat: 优化 image_io.rs 模块

- 新增 base64_to_image等工具函数。
This commit is contained in:
2026-05-08 22:35:17 +08:00
parent f0db625bd1
commit e8b365dced
3 changed files with 119 additions and 20 deletions

View File

@@ -37,7 +37,8 @@ CCOEFF (OpenCV 默认):会自动进行“均值中心化”,在一定程度
💡 开发者建议: 💡 开发者建议:
如果识别结果在 $X$ 轴上有大约 $10px$ 左右的固定误差,通常是因为滑块原图自带了透明边距(留白)。此时请确保 simple_target=false。该模式会通过 Canny 边缘检测 提取轮廓特征,能自动锁定拼图实体并忽略背景留白的像素干扰。 如果识别结果在 $X$ 轴上有大约 $10px$ 左右的固定误差,通常是因为滑块原图自带了透明边距(留白)。此时请确保
simple_target=false。该模式会通过 Canny 边缘检测 提取轮廓特征,能自动锁定拼图实体并忽略背景留白的像素干扰。
鸣谢 (Credits) 鸣谢 (Credits)
- 本项目是 [ddddocr](https://github.com/sml2h3/ddddocr) 的 Rust 移植版本,原作者为 sml2h3。衷心感谢原作者对 OCR 社区做出的杰出贡献。 - 本项目是 [ddddocr](https://github.com/sml2h3/ddddocr) 的 Rust 移植版本,原作者为 sml2h3。衷心感谢原作者对 OCR 社区做出的杰出贡献。

View File

@@ -12,16 +12,6 @@ pub enum ImageInput {
Base64(String), Base64(String),
DynamicImage(DynamicImage), DynamicImage(DynamicImage),
} }
impl From<&str> for ImageInput {
fn from(s: &str) -> Self {
if Path::new(s).exists() {
ImageInput::Path(s.into())
} else {
ImageInput::Base64(s.to_string())
}
}
}
/// 模拟 Python 的 load_image_from_input /// 模拟 Python 的 load_image_from_input
#[allow(dead_code)] #[allow(dead_code)]
pub fn load_image_from_input(img_input: ImageInput) -> Result<DynamicImage> { pub fn load_image_from_input(img_input: ImageInput) -> Result<DynamicImage> {
@@ -31,22 +21,22 @@ pub fn load_image_from_input(img_input: ImageInput) -> Result<DynamicImage> {
image::load_from_memory(&bytes).context("Failed to load image from bytes") image::load_from_memory(&bytes).context("Failed to load image from bytes")
} }
// 1. 已经是 DynamicImage // 1. 已经是 DynamicImage
ImageInput::DynamicImage(img) => Ok(img), ImageInput::DynamicImage(i) => Ok(i),
// 5. 处理 ndarray (Numpy-like) // 5. 处理 ndarray (Numpy-like)
// 假设输入是 HWC 格式的 Array3<u8> // 假设输入是 HWC 格式的 Array3<u8>
ImageInput::Array(arr) => numpy_to_pil_image(arr.view()), ImageInput::Array(a) => numpy_to_pil_image(a.view()),
// 4. 处理 Base64 字符串 // 4. 处理 Base64 字符串
ImageInput::Base64(b64_str) => base64_to_image(&b64_str), ImageInput::Base64(b) => base64_to_image(&b),
// 3. 处理文件路径 (Path) // 3. 处理文件路径 (Path)
ImageInput::Path(path) => image::open(path).context("Failed to open image from path"), ImageInput::Path(p) => image::open(p).context("Failed to open image from path"),
} }
} }
fn base64_to_image(img_base64: &str) -> Result<DynamicImage> { fn base64_to_image(b64_str: &str) -> Result<DynamicImage> {
// 过滤掉可能存在的 base64 前缀,例如 "data:image/png;base64," // 过滤掉可能存在的 base64 前缀,例如 "data:image/png;base64,"
let clean_b64 = if let Some(pos) = img_base64.find(",") { let clean_b64 = if let Some(pos) = b64_str.find(",") {
&img_base64[pos + 1..] &b64_str[pos + 1..]
} else { } else {
&img_base64 &b64_str
}; };
let bytes = general_purpose::STANDARD let bytes = general_purpose::STANDARD
@@ -55,6 +45,56 @@ fn base64_to_image(img_base64: &str) -> Result<DynamicImage> {
image::load_from_memory(&bytes).context("Failed to load image from decoded base64") image::load_from_memory(&bytes).context("Failed to load image from decoded base64")
} }
/// 读取图片文件并转换为 base64 编码字符串
/// 对应 Python 版 get_img_base64
pub fn get_img_base64<P: AsRef<Path>>(image_path: P) -> Result<String> {
// 1. 读取文件原始字节流
// 使用 AsRef<Path> 泛型可以让函数同时支持 String, &str, PathBuf 等类型
let image_data = fs::read(&image_path)
.with_context(|| format!("Failed to read image file: {:?}", image_path.as_ref()))?;
// 2. 进行 Base64 编码
// 使用 STANDARD 引擎对齐 Python 的 base64.b64encode
let b64_string = general_purpose::STANDARD.encode(image_data);
Ok(b64_string)
}
/// 处理 PNG 图像的 RGBA 透明背景,将透明部分设置为白色
///
/// 对应 Python 版 png_rgba_black_preprocess
pub fn png_rgba_black_preprocess(img: &DynamicImage) -> Result<DynamicImage> {
// 1. 获取原图尺寸
let (width, height) = (img.width(), img.height());
// 2. 创建一个等尺寸的纯白色 RGB 图像作为底色
// ImageBuffer::<Rgb<u8>, Vec<u8>>
let mut white_bg = ImageBuffer::from_fn(width, height, |_, _| {
Rgb([255, 255, 255])
});
// 3. 将原图复合到底色上
// 我们需要处理原图,将其转为 RGBA 确保有 alpha 通道可以参考
let rgba_img = img.to_rgba8();
// 遍历每一个像素进行复合(模拟 Python 的 paste 逻辑)
for (x, y, pixel) in rgba_img.enumerate_pixels() {
let alpha = pixel[3] as f32 / 255.0;
if alpha > 0.0 {
// 获取底色像素(白色)
let bg_pixel = white_bg.get_pixel_mut(x, y);
// 简单的 Alpha 复合公式:输出 = 源 * alpha + 背景 * (1 - alpha)
for i in 0..3 {
let fg = pixel[i] as f32;
let bg = bg_pixel[i] as f32;
bg_pixel[i] = (fg * alpha + bg * (1.0 - alpha)) as u8;
}
}
}
Ok(DynamicImage::ImageRgb8(white_bg))
}
/// 封装数组转图像的逻辑,对齐 Python 版 _numpy_to_pil_image /// 封装数组转图像的逻辑,对齐 Python 版 _numpy_to_pil_image
fn numpy_to_pil_image(array: ArrayViewD<u8>) -> Result<DynamicImage> { fn numpy_to_pil_image(array: ArrayViewD<u8>) -> Result<DynamicImage> {
let shape = array.shape(); let shape = array.shape();
@@ -139,7 +179,65 @@ pub fn png_rgba_white_preprocess(img: &DynamicImage) -> DynamicImage {
DynamicImage::ImageRgb8(background) DynamicImage::ImageRgb8(background)
} }
pub fn image_to_numpy(image: &DynamicImage, target_mode: &str) -> Result<Array3<u8>> {
// 1. 模式转换 (对应 image.convert(target_mode))
// Rust image 库通过 to_rgb8, to_luma8 等方法实现转换
let (width, height) = image.dimensions();
match target_mode {
"RGB" => {
let rgb_img = image.to_rgb8();
let raw = rgb_img.into_raw();
// shape 为 [Height, Width, Channels] -> [H, W, 3]
Array3::from_shape_vec((height as usize, width as usize, 3), raw)
.map_err(|e| anyhow!("Failed to build ndarray: {}", e))
},
"L" | "GRAY" => {
let gray_img = image.to_luma8();
let raw = gray_img.into_raw();
// shape 为 [H, W, 1]
Array3::from_shape_vec((height as usize, width as usize, 1), raw)
.map_err(|e| anyhow!("Failed to build ndarray: {}", e))
},
"RGBA" => {
let rgba_img = image.to_rgba8();
let raw = rgba_img.into_raw();
// shape 为 [H, W, 4]
Array3::from_shape_vec((height as usize, width as usize, 4), raw)
.map_err(|e| anyhow!("Failed to build ndarray: {}", e))
},
_ => Err(anyhow!("Unsupported target_mode: {}", target_mode)),
}
}
pub fn numpy_to_image(array: ArrayViewD<u8>, mode: &str) -> Result<DynamicImage> {
let shape = array.shape();
// 确保数据连续性 (C-order)
let standard = array.as_standard_layout();
let (raw_data, _) = standard.to_owned().into_raw_vec_and_offset();
let height = shape[0] as u32;
let width = shape[1] as u32;
match mode {
"L" => {
ImageBuffer::<Luma<u8>, _>::from_raw(width, height, raw_data)
.map(DynamicImage::ImageLuma8)
.ok_or_else(|| anyhow!("Failed to create Luma image"))
},
"RGB" => {
ImageBuffer::<Rgb<u8>, _>::from_raw(width, height, raw_data)
.map(DynamicImage::ImageRgb8)
.ok_or_else(|| anyhow!("Failed to create RGB image"))
},
"RGBA" => {
ImageBuffer::<Rgba<u8>, _>::from_raw(width, height, raw_data)
.map(DynamicImage::ImageRgba8)
.ok_or_else(|| anyhow!("Failed to create RGBA image"))
},
_ => Err(anyhow!("Unsupported mode: {}", mode)),
}
}
pub fn image_to_ndarray(img: &DynamicImage) -> Array3<u8> { pub fn image_to_ndarray(img: &DynamicImage) -> Array3<u8> {
let (width, height) = img.dimensions(); let (width, height) = img.dimensions();

View File

@@ -102,7 +102,7 @@ fn test_real_slide_match() {
// 2. 执行匹配 // 2. 执行匹配
// 如果是那种带有明显阴影边缘的复杂滑块,建议 simple_target 传 false // 如果是那种带有明显阴影边缘的复杂滑块,建议 simple_target 传 false
let start = std::time::Instant::now(); let start = std::time::Instant::now();
let result = engine.slide_match(&target_img, &bg_img, false) let result = engine.slide_match(&target_img, &bg_img, true)
.expect("Slide match 执行失败"); .expect("Slide match 执行失败");
let duration = start.elapsed(); let duration = start.elapsed();