feat: 实现 DdddOcr 核心推理流水线与图像预处理
- 封装 `preprocess_image` 方法,实现 PNG 透明背景修复、灰度化、比例缩放及 NCHW 张量转换。 - 提取 `inference` 逻辑,支持通过 tract-onnx 执行模型推理。 - 实现 `extract_indices` 解析输出张量,支持 I64 索引直接读取与 F32 概率矩阵的 Argmax 处理。 - 完善 `decode_ctc` 解码算法,支持标准 CTC 贪婪搜索与字符集映射。 - 重构 `classification` 主入口,将预处理、推理、解析、解码逻辑解耦,提升代码可维护性。
This commit is contained in:
62
src/image_io.rs
Normal file
62
src/image_io.rs
Normal file
@@ -0,0 +1,62 @@
|
||||
use anyhow::{Context, Result};
|
||||
use base64::{Engine as _, engine::general_purpose};
|
||||
use image::{DynamicImage, GenericImageView, ImageBuffer, Rgb, RgbImage};
|
||||
use std::path::{Path, PathBuf};
|
||||
use tract_onnx::prelude::tract_ndarray::Array3;
|
||||
|
||||
/// 定义支持的输入类型枚举
|
||||
pub enum ImageInput {
|
||||
Bytes(Vec<u8>),
|
||||
Array(Array3<u8>),
|
||||
Path(PathBuf),
|
||||
Base64(String),
|
||||
DynamicImage(DynamicImage),
|
||||
}
|
||||
|
||||
/// 模拟 Python 的 load_image_from_input
|
||||
#[allow(dead_code)]
|
||||
pub fn load_image_from_input(input: ImageInput) -> Result<DynamicImage> {
|
||||
match input {
|
||||
ImageInput::DynamicImage(img) => Ok(img),
|
||||
_ => todo!("后续补充"),
|
||||
}
|
||||
}
|
||||
|
||||
/// 对应 Python 的 png_rgba_black_preprocess
|
||||
/// 将带有透明通道的图片转换为白色背景的 RGB 图片
|
||||
#[allow(dead_code)]
|
||||
pub fn png_rgba_white_preprocess(img: &DynamicImage) -> DynamicImage {
|
||||
// 1. 检查是否包含透明通道,如果没有,直接克隆并返回
|
||||
if !img.color().has_alpha() {
|
||||
return img.clone();
|
||||
}
|
||||
|
||||
let (width, height) = img.dimensions();
|
||||
|
||||
// 2. 创建一个新的 RGB 图像缓冲,默认填充为白色 (255, 255, 255)
|
||||
let mut background = ImageBuffer::from_pixel(width, height, Rgb([255u8, 255u8, 255u8]));
|
||||
|
||||
// 3. 获取原图的 RGBA 视图
|
||||
let rgba_img = img.to_rgba8();
|
||||
|
||||
// 4. 遍历像素并手动进行 Alpha 混合
|
||||
// 对应 Python 的 image.paste(img, ..., mask=img)
|
||||
for (x, y, pixel) in rgba_img.enumerate_pixels() {
|
||||
let alpha = pixel[3] as f32 / 255.0;
|
||||
|
||||
if alpha >= 1.0 {
|
||||
// 完全不透明,直接覆盖
|
||||
background.put_pixel(x, y, Rgb([pixel[0], pixel[1], pixel[2]]));
|
||||
} else if alpha > 0.0 {
|
||||
// 半透明,执行 Alpha 混合公式: (src * alpha) + (dst * (1 - alpha))
|
||||
let bg_pixel = background.get_pixel(x, y);
|
||||
let r = (pixel[0] as f32 * alpha + bg_pixel[0] as f32 * (1.0 - alpha)) as u8;
|
||||
let g = (pixel[1] as f32 * alpha + bg_pixel[1] as f32 * (1.0 - alpha)) as u8;
|
||||
let b = (pixel[2] as f32 * alpha + bg_pixel[2] as f32 * (1.0 - alpha)) as u8;
|
||||
background.put_pixel(x, y, Rgb([r, g, b]));
|
||||
}
|
||||
// alpha == 0 的情况不需要处理,因为背景已经是白色了
|
||||
}
|
||||
|
||||
DynamicImage::ImageRgb8(background)
|
||||
}
|
||||
Reference in New Issue
Block a user