refactor: 移除 OpenCV 依赖并实现纯 Rust 图像处理流水线

- 替换 opencv 为 image 库以简化交叉编译
- 修正 nms 逻辑中的 ArrayView 借用问题
- 增加 save_debug_image 方法用于可视化检测框
- 更新 Cargo.toml 依赖项
This commit is contained in:
2026-05-06 17:37:38 +08:00
parent cfeb68ad04
commit 8fcfa2096e
8 changed files with 338 additions and 71 deletions

View File

@@ -1,3 +1,4 @@
use crate::base::ModelArgs;
use crate::image_io::png_rgba_white_preprocess;
use crate::image_processor::{convert_to_grayscale, resize_image};
use crate::model_loader::{ModelLoader, ModelSession, ModelType};
@@ -7,8 +8,6 @@ use tract_onnx::prelude::tract_ndarray::s;
use tract_onnx::prelude::{
DatumType, Graph, IntoTensor, RunnableModel, Tensor, TypedFact, TypedOp, tract_ndarray, tvec,
};
use crate::base::ModelArgs;
// 颜色过滤的自定义范围:(低值RGB, 高值RGB)
pub type ColorRange = ((u8, u8, u8), (u8, u8, u8));
@@ -16,17 +15,17 @@ pub type ColorRange = ((u8, u8, u8), (u8, u8, u8));
// 字符集范围类型
#[derive(Debug, Clone)]
pub enum CharsetRange {
All, // 所有字符
Digit, // 数字
Letter, // 字母
Alphanumeric, // 字母数字
Single(String), // 单字符串
Multiple(Vec<String>), // 多个字符串
Range(char, char), // 字符范围
Custom(Vec<char>), // 自定义字符列表
All, // 所有字符
Digit, // 数字
Letter, // 字母
Alphanumeric, // 字母数字
Single(String), // 单字符串
Multiple(Vec<String>), // 多个字符串
Range(char, char), // 字符范围
Custom(Vec<char>), // 自定义字符列表
}
#[derive(Debug, Clone)]
pub struct PredictArgs{
pub struct PredictArgs {
/// 是否修复PNG格式问题
pub png_fix: bool,
/// 是否返回概率信息
@@ -100,7 +99,19 @@ pub struct Ocr {
charset: Vec<String>,
}
impl ModelSession for Ocr {
fn predict(&self, image: &DynamicImage, png_fix: bool) -> Result<String, anyhow::Error> {
fn get_model_type(&self) -> ModelType {
todo!()
}
fn desc(&self) -> String {
"Ocr Model 加载成功".to_string()
}
}
impl Ocr {
pub fn new(model_path: String, charset: Vec<String>) -> Result<Self, anyhow::Error> {
let session = ModelLoader::load_model(&model_path)?.session;
Ok(Self { session, charset })
}
pub fn predict(&self, image: &DynamicImage, png_fix: bool) -> Result<String, anyhow::Error> {
let tensor = self.preprocess_image(image, png_fix)?;
//
// let result = self.session.run(tvec!(tensor.into()))?;
@@ -108,19 +119,9 @@ impl ModelSession for Ocr {
// // let output = result[0].to_array_view::<i64>()?;
let output = self.inference(tensor)?;
let output2 = self.process_text_output(&output)?;
Ok(Self::ctc_decode_indices(&output2))
Ok(self.ctc_decode_indices(&output2))
// Ok("ocr result".to_string())
}
fn get_model_type(&self) -> ModelType {
ModelType::Ocr
}
}
impl Ocr {
pub fn new(model_path: String, charset: Vec<String>) -> Result<Self, anyhow::Error> {
let session = ModelLoader::load_model(&model_path)?.session;
Ok(Self { session, charset })
}
/// 对应 Python 的 _preprocess_image
/// 负责:透明背景修复 -> 灰度化 -> 按比例 Resize -> 归一化 -> 4维张量转换
fn preprocess_image(&self, img: &DynamicImage, png_fix: bool) -> anyhow::Result<Tensor> {
@@ -222,10 +223,9 @@ impl Ocr {
)),
}
}
fn ctc_decode_indices(predicted_indices: &[i64]) -> String {
fn ctc_decode_indices(&self, predicted_indices: &[i64]) -> String {
println!("indices模型输出原始数据: {:?}", predicted_indices);
use crate::charset::CHARSET_BETA;
// 对应 _ctc_decode_indices 的逻辑:去重、去 blank (0)
let mut res = String::new();
let mut prev_idx: i64 = -1;
@@ -235,8 +235,11 @@ impl Ocr {
// 2. 跳过 blank 字符 (假设索引 0 是 blank)
if idx != prev_idx && idx != 0 {
if let Ok(u_idx) = usize::try_from(idx) {
if let Some(&char_str) = CHARSET_BETA.get(u_idx) {
if let Some(char_str) = self.charset.get(u_idx) {
res.push_str(char_str);
} else {
// 保护逻辑:如果模型预测的索引超出了字符集范围
eprintln!("警告: 预测索引 {} 超出字符集范围", u_idx);
}
}
}