refactor: 移除 OpenCV 依赖并实现纯 Rust 图像处理流水线

- 替换 opencv 为 image 库以简化交叉编译 - 修正 nms 逻辑中的 ArrayView 借用问题 - 增加 save_debug_image 方法用于可视化检测框 - 更新 Cargo.toml 依赖项
2026-05-06 17:37:38 +08:00
parent cfeb68ad04
commit 8fcfa2096e
8 changed files with 338 additions and 71 deletions
--- a/src/ocr_model.rs
+++ b/src/ocr_model.rs
@@ -1,3 +1,4 @@
+use crate::base::ModelArgs;
 use crate::image_io::png_rgba_white_preprocess;
 use crate::image_processor::{convert_to_grayscale, resize_image};
 use crate::model_loader::{ModelLoader, ModelSession, ModelType};
@@ -7,8 +8,6 @@ use tract_onnx::prelude::tract_ndarray::s;
 use tract_onnx::prelude::{
    DatumType, Graph, IntoTensor, RunnableModel, Tensor, TypedFact, TypedOp, tract_ndarray, tvec,
 };
-use crate::base::ModelArgs;
-

 // 颜色过滤的自定义范围：(低值RGB, 高值RGB)
 pub type ColorRange = ((u8, u8, u8), (u8, u8, u8));
@@ -16,17 +15,17 @@ pub type ColorRange = ((u8, u8, u8), (u8, u8, u8));
 // 字符集范围类型
 #[derive(Debug, Clone)]
 pub enum CharsetRange {
-    All,                           // 所有字符
-    Digit,                         // 数字
-    Letter,                        // 字母
-    Alphanumeric,                  // 字母数字
-    Single(String),                // 单字符串
-    Multiple(Vec<String>),         // 多个字符串
-    Range(char, char),            // 字符范围
-    Custom(Vec<char>),            // 自定义字符列表
+    All,                   // 所有字符
+    Digit,                 // 数字
+    Letter,                // 字母
+    Alphanumeric,          // 字母数字
+    Single(String),        // 单字符串
+    Multiple(Vec<String>), // 多个字符串
+    Range(char, char),     // 字符范围
+    Custom(Vec<char>),     // 自定义字符列表
 }
 #[derive(Debug, Clone)]
-pub struct PredictArgs{
+pub struct PredictArgs {
    /// 是否修复PNG格式问题
    pub png_fix: bool,
    /// 是否返回概率信息
@@ -100,7 +99,19 @@ pub struct Ocr {
    charset: Vec<String>,
 }
 impl ModelSession for Ocr {
-    fn predict(&self, image: &DynamicImage, png_fix: bool) -> Result<String, anyhow::Error> {
+    fn get_model_type(&self) -> ModelType {
+        todo!()
+    }
+    fn desc(&self) -> String {
+        "Ocr Model 加载成功".to_string()
+    }
+}
+impl Ocr {
+    pub fn new(model_path: String, charset: Vec<String>) -> Result<Self, anyhow::Error> {
+        let session = ModelLoader::load_model(&model_path)?.session;
+        Ok(Self { session, charset })
+    }
+    pub fn predict(&self, image: &DynamicImage, png_fix: bool) -> Result<String, anyhow::Error> {
        let tensor = self.preprocess_image(image, png_fix)?;
        //
        // let result = self.session.run(tvec!(tensor.into()))?;
@@ -108,19 +119,9 @@ impl ModelSession for Ocr {
        // // let output = result[0].to_array_view::<i64>()?;
        let output = self.inference(tensor)?;
        let output2 = self.process_text_output(&output)?;
-        Ok(Self::ctc_decode_indices(&output2))
+        Ok(self.ctc_decode_indices(&output2))
        // Ok("ocr result".to_string())
    }
-
-    fn get_model_type(&self) -> ModelType {
-        ModelType::Ocr
-    }
-}
-impl Ocr {
-    pub fn new(model_path: String, charset: Vec<String>) -> Result<Self, anyhow::Error> {
-        let session = ModelLoader::load_model(&model_path)?.session;
-        Ok(Self { session, charset })
-    }
    /// 对应 Python 的 _preprocess_image
    /// 负责：透明背景修复 -> 灰度化 -> 按比例 Resize -> 归一化 -> 4维张量转换
    fn preprocess_image(&self, img: &DynamicImage, png_fix: bool) -> anyhow::Result<Tensor> {
@@ -222,10 +223,9 @@ impl Ocr {
            )),
        }
    }
-    fn ctc_decode_indices(predicted_indices: &[i64]) -> String {
+    fn ctc_decode_indices(&self, predicted_indices: &[i64]) -> String {
        println!("indices模型输出原始数据: {:?}", predicted_indices);

-        use crate::charset::CHARSET_BETA;
        // 对应 _ctc_decode_indices 的逻辑：去重、去 blank (0)
        let mut res = String::new();
        let mut prev_idx: i64 = -1;
@@ -235,8 +235,11 @@ impl Ocr {
            // 2. 跳过 blank 字符 (假设索引 0 是 blank)
            if idx != prev_idx && idx != 0 {
                if let Ok(u_idx) = usize::try_from(idx) {
-                    if let Some(&char_str) = CHARSET_BETA.get(u_idx) {
+                    if let Some(char_str) = self.charset.get(u_idx) {
                        res.push_str(char_str);
+                    } else {
+                        // 保护逻辑：如果模型预测的索引超出了字符集范围
+                        eprintln!("警告: 预测索引 {} 超出字符集范围", u_idx);
                    }
                }
            }