feat: 优化 image_io.rs 模块
- 新增 base64_to_image等工具函数。
This commit is contained in:
@@ -37,7 +37,8 @@ CCOEFF (OpenCV 默认):会自动进行“均值中心化”,在一定程度
|
||||
|
||||
💡 开发者建议:
|
||||
|
||||
如果识别结果在 $X$ 轴上有大约 $10px$ 左右的固定误差,通常是因为滑块原图自带了透明边距(留白)。此时请确保 simple_target=false。该模式会通过 Canny 边缘检测 提取轮廓特征,能自动锁定拼图实体并忽略背景留白的像素干扰。
|
||||
如果识别结果在 $X$ 轴上有大约 $10px$ 左右的固定误差,通常是因为滑块原图自带了透明边距(留白)。此时请确保
|
||||
simple_target=false。该模式会通过 Canny 边缘检测 提取轮廓特征,能自动锁定拼图实体并忽略背景留白的像素干扰。
|
||||
鸣谢 (Credits)
|
||||
|
||||
- 本项目是 [ddddocr](https://github.com/sml2h3/ddddocr) 的 Rust 移植版本,原作者为 sml2h3。衷心感谢原作者对 OCR 社区做出的杰出贡献。
|
||||
|
||||
134
src/image_io.rs
134
src/image_io.rs
@@ -12,16 +12,6 @@ pub enum ImageInput {
|
||||
Base64(String),
|
||||
DynamicImage(DynamicImage),
|
||||
}
|
||||
|
||||
impl From<&str> for ImageInput {
|
||||
fn from(s: &str) -> Self {
|
||||
if Path::new(s).exists() {
|
||||
ImageInput::Path(s.into())
|
||||
} else {
|
||||
ImageInput::Base64(s.to_string())
|
||||
}
|
||||
}
|
||||
}
|
||||
/// 模拟 Python 的 load_image_from_input
|
||||
#[allow(dead_code)]
|
||||
pub fn load_image_from_input(img_input: ImageInput) -> Result<DynamicImage> {
|
||||
@@ -31,22 +21,22 @@ pub fn load_image_from_input(img_input: ImageInput) -> Result<DynamicImage> {
|
||||
image::load_from_memory(&bytes).context("Failed to load image from bytes")
|
||||
}
|
||||
// 1. 已经是 DynamicImage
|
||||
ImageInput::DynamicImage(img) => Ok(img),
|
||||
ImageInput::DynamicImage(i) => Ok(i),
|
||||
// 5. 处理 ndarray (Numpy-like)
|
||||
// 假设输入是 HWC 格式的 Array3<u8>
|
||||
ImageInput::Array(arr) => numpy_to_pil_image(arr.view()),
|
||||
ImageInput::Array(a) => numpy_to_pil_image(a.view()),
|
||||
// 4. 处理 Base64 字符串
|
||||
ImageInput::Base64(b64_str) => base64_to_image(&b64_str),
|
||||
ImageInput::Base64(b) => base64_to_image(&b),
|
||||
// 3. 处理文件路径 (Path)
|
||||
ImageInput::Path(path) => image::open(path).context("Failed to open image from path"),
|
||||
ImageInput::Path(p) => image::open(p).context("Failed to open image from path"),
|
||||
}
|
||||
}
|
||||
fn base64_to_image(img_base64: &str) -> Result<DynamicImage> {
|
||||
fn base64_to_image(b64_str: &str) -> Result<DynamicImage> {
|
||||
// 过滤掉可能存在的 base64 前缀,例如 "data:image/png;base64,"
|
||||
let clean_b64 = if let Some(pos) = img_base64.find(",") {
|
||||
&img_base64[pos + 1..]
|
||||
let clean_b64 = if let Some(pos) = b64_str.find(",") {
|
||||
&b64_str[pos + 1..]
|
||||
} else {
|
||||
&img_base64
|
||||
&b64_str
|
||||
};
|
||||
|
||||
let bytes = general_purpose::STANDARD
|
||||
@@ -55,6 +45,56 @@ fn base64_to_image(img_base64: &str) -> Result<DynamicImage> {
|
||||
|
||||
image::load_from_memory(&bytes).context("Failed to load image from decoded base64")
|
||||
}
|
||||
|
||||
/// 读取图片文件并转换为 base64 编码字符串
|
||||
/// 对应 Python 版 get_img_base64
|
||||
pub fn get_img_base64<P: AsRef<Path>>(image_path: P) -> Result<String> {
|
||||
// 1. 读取文件原始字节流
|
||||
// 使用 AsRef<Path> 泛型可以让函数同时支持 String, &str, PathBuf 等类型
|
||||
let image_data = fs::read(&image_path)
|
||||
.with_context(|| format!("Failed to read image file: {:?}", image_path.as_ref()))?;
|
||||
|
||||
// 2. 进行 Base64 编码
|
||||
// 使用 STANDARD 引擎对齐 Python 的 base64.b64encode
|
||||
let b64_string = general_purpose::STANDARD.encode(image_data);
|
||||
|
||||
Ok(b64_string)
|
||||
}
|
||||
/// 处理 PNG 图像的 RGBA 透明背景,将透明部分设置为白色
|
||||
///
|
||||
/// 对应 Python 版 png_rgba_black_preprocess
|
||||
pub fn png_rgba_black_preprocess(img: &DynamicImage) -> Result<DynamicImage> {
|
||||
// 1. 获取原图尺寸
|
||||
let (width, height) = (img.width(), img.height());
|
||||
|
||||
// 2. 创建一个等尺寸的纯白色 RGB 图像作为底色
|
||||
// ImageBuffer::<Rgb<u8>, Vec<u8>>
|
||||
let mut white_bg = ImageBuffer::from_fn(width, height, |_, _| {
|
||||
Rgb([255, 255, 255])
|
||||
});
|
||||
|
||||
// 3. 将原图复合到底色上
|
||||
// 我们需要处理原图,将其转为 RGBA 确保有 alpha 通道可以参考
|
||||
let rgba_img = img.to_rgba8();
|
||||
|
||||
// 遍历每一个像素进行复合(模拟 Python 的 paste 逻辑)
|
||||
for (x, y, pixel) in rgba_img.enumerate_pixels() {
|
||||
let alpha = pixel[3] as f32 / 255.0;
|
||||
if alpha > 0.0 {
|
||||
// 获取底色像素(白色)
|
||||
let bg_pixel = white_bg.get_pixel_mut(x, y);
|
||||
|
||||
// 简单的 Alpha 复合公式:输出 = 源 * alpha + 背景 * (1 - alpha)
|
||||
for i in 0..3 {
|
||||
let fg = pixel[i] as f32;
|
||||
let bg = bg_pixel[i] as f32;
|
||||
bg_pixel[i] = (fg * alpha + bg * (1.0 - alpha)) as u8;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(DynamicImage::ImageRgb8(white_bg))
|
||||
}
|
||||
/// 封装数组转图像的逻辑,对齐 Python 版 _numpy_to_pil_image
|
||||
fn numpy_to_pil_image(array: ArrayViewD<u8>) -> Result<DynamicImage> {
|
||||
let shape = array.shape();
|
||||
@@ -139,7 +179,65 @@ pub fn png_rgba_white_preprocess(img: &DynamicImage) -> DynamicImage {
|
||||
|
||||
DynamicImage::ImageRgb8(background)
|
||||
}
|
||||
pub fn image_to_numpy(image: &DynamicImage, target_mode: &str) -> Result<Array3<u8>> {
|
||||
// 1. 模式转换 (对应 image.convert(target_mode))
|
||||
// Rust image 库通过 to_rgb8, to_luma8 等方法实现转换
|
||||
let (width, height) = image.dimensions();
|
||||
|
||||
match target_mode {
|
||||
"RGB" => {
|
||||
let rgb_img = image.to_rgb8();
|
||||
let raw = rgb_img.into_raw();
|
||||
// shape 为 [Height, Width, Channels] -> [H, W, 3]
|
||||
Array3::from_shape_vec((height as usize, width as usize, 3), raw)
|
||||
.map_err(|e| anyhow!("Failed to build ndarray: {}", e))
|
||||
},
|
||||
"L" | "GRAY" => {
|
||||
let gray_img = image.to_luma8();
|
||||
let raw = gray_img.into_raw();
|
||||
// shape 为 [H, W, 1]
|
||||
Array3::from_shape_vec((height as usize, width as usize, 1), raw)
|
||||
.map_err(|e| anyhow!("Failed to build ndarray: {}", e))
|
||||
},
|
||||
"RGBA" => {
|
||||
let rgba_img = image.to_rgba8();
|
||||
let raw = rgba_img.into_raw();
|
||||
// shape 为 [H, W, 4]
|
||||
Array3::from_shape_vec((height as usize, width as usize, 4), raw)
|
||||
.map_err(|e| anyhow!("Failed to build ndarray: {}", e))
|
||||
},
|
||||
_ => Err(anyhow!("Unsupported target_mode: {}", target_mode)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn numpy_to_image(array: ArrayViewD<u8>, mode: &str) -> Result<DynamicImage> {
|
||||
let shape = array.shape();
|
||||
// 确保数据连续性 (C-order)
|
||||
let standard = array.as_standard_layout();
|
||||
let (raw_data, _) = standard.to_owned().into_raw_vec_and_offset();
|
||||
|
||||
let height = shape[0] as u32;
|
||||
let width = shape[1] as u32;
|
||||
|
||||
match mode {
|
||||
"L" => {
|
||||
ImageBuffer::<Luma<u8>, _>::from_raw(width, height, raw_data)
|
||||
.map(DynamicImage::ImageLuma8)
|
||||
.ok_or_else(|| anyhow!("Failed to create Luma image"))
|
||||
},
|
||||
"RGB" => {
|
||||
ImageBuffer::<Rgb<u8>, _>::from_raw(width, height, raw_data)
|
||||
.map(DynamicImage::ImageRgb8)
|
||||
.ok_or_else(|| anyhow!("Failed to create RGB image"))
|
||||
},
|
||||
"RGBA" => {
|
||||
ImageBuffer::<Rgba<u8>, _>::from_raw(width, height, raw_data)
|
||||
.map(DynamicImage::ImageRgba8)
|
||||
.ok_or_else(|| anyhow!("Failed to create RGBA image"))
|
||||
},
|
||||
_ => Err(anyhow!("Unsupported mode: {}", mode)),
|
||||
}
|
||||
}
|
||||
pub fn image_to_ndarray(img: &DynamicImage) -> Array3<u8> {
|
||||
let (width, height) = img.dimensions();
|
||||
|
||||
|
||||
@@ -102,7 +102,7 @@ fn test_real_slide_match() {
|
||||
// 2. 执行匹配
|
||||
// 如果是那种带有明显阴影边缘的复杂滑块,建议 simple_target 传 false
|
||||
let start = std::time::Instant::now();
|
||||
let result = engine.slide_match(&target_img, &bg_img, false)
|
||||
let result = engine.slide_match(&target_img, &bg_img, true)
|
||||
.expect("Slide match 执行失败");
|
||||
let duration = start.elapsed();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user