refactor: 移除 OpenCV 依赖并实现纯 Rust 图像处理流水线
- 替换 opencv 为 image 库以简化交叉编译 - 修正 nms 逻辑中的 ArrayView 借用问题 - 增加 save_debug_image 方法用于可视化检测框 - 更新 Cargo.toml 依赖项
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
use crate::base::ModelArgs;
|
||||
use crate::image_io::png_rgba_white_preprocess;
|
||||
use crate::image_processor::{convert_to_grayscale, resize_image};
|
||||
use crate::model_loader::{ModelLoader, ModelSession, ModelType};
|
||||
@@ -7,8 +8,6 @@ use tract_onnx::prelude::tract_ndarray::s;
|
||||
use tract_onnx::prelude::{
|
||||
DatumType, Graph, IntoTensor, RunnableModel, Tensor, TypedFact, TypedOp, tract_ndarray, tvec,
|
||||
};
|
||||
use crate::base::ModelArgs;
|
||||
|
||||
|
||||
// 颜色过滤的自定义范围:(低值RGB, 高值RGB)
|
||||
pub type ColorRange = ((u8, u8, u8), (u8, u8, u8));
|
||||
@@ -16,17 +15,17 @@ pub type ColorRange = ((u8, u8, u8), (u8, u8, u8));
|
||||
// 字符集范围类型
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum CharsetRange {
|
||||
All, // 所有字符
|
||||
Digit, // 数字
|
||||
Letter, // 字母
|
||||
Alphanumeric, // 字母数字
|
||||
Single(String), // 单字符串
|
||||
Multiple(Vec<String>), // 多个字符串
|
||||
Range(char, char), // 字符范围
|
||||
Custom(Vec<char>), // 自定义字符列表
|
||||
All, // 所有字符
|
||||
Digit, // 数字
|
||||
Letter, // 字母
|
||||
Alphanumeric, // 字母数字
|
||||
Single(String), // 单字符串
|
||||
Multiple(Vec<String>), // 多个字符串
|
||||
Range(char, char), // 字符范围
|
||||
Custom(Vec<char>), // 自定义字符列表
|
||||
}
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PredictArgs{
|
||||
pub struct PredictArgs {
|
||||
/// 是否修复PNG格式问题
|
||||
pub png_fix: bool,
|
||||
/// 是否返回概率信息
|
||||
@@ -100,7 +99,19 @@ pub struct Ocr {
|
||||
charset: Vec<String>,
|
||||
}
|
||||
impl ModelSession for Ocr {
|
||||
fn predict(&self, image: &DynamicImage, png_fix: bool) -> Result<String, anyhow::Error> {
|
||||
fn get_model_type(&self) -> ModelType {
|
||||
todo!()
|
||||
}
|
||||
fn desc(&self) -> String {
|
||||
"Ocr Model 加载成功".to_string()
|
||||
}
|
||||
}
|
||||
impl Ocr {
|
||||
pub fn new(model_path: String, charset: Vec<String>) -> Result<Self, anyhow::Error> {
|
||||
let session = ModelLoader::load_model(&model_path)?.session;
|
||||
Ok(Self { session, charset })
|
||||
}
|
||||
pub fn predict(&self, image: &DynamicImage, png_fix: bool) -> Result<String, anyhow::Error> {
|
||||
let tensor = self.preprocess_image(image, png_fix)?;
|
||||
//
|
||||
// let result = self.session.run(tvec!(tensor.into()))?;
|
||||
@@ -108,19 +119,9 @@ impl ModelSession for Ocr {
|
||||
// // let output = result[0].to_array_view::<i64>()?;
|
||||
let output = self.inference(tensor)?;
|
||||
let output2 = self.process_text_output(&output)?;
|
||||
Ok(Self::ctc_decode_indices(&output2))
|
||||
Ok(self.ctc_decode_indices(&output2))
|
||||
// Ok("ocr result".to_string())
|
||||
}
|
||||
|
||||
fn get_model_type(&self) -> ModelType {
|
||||
ModelType::Ocr
|
||||
}
|
||||
}
|
||||
impl Ocr {
|
||||
pub fn new(model_path: String, charset: Vec<String>) -> Result<Self, anyhow::Error> {
|
||||
let session = ModelLoader::load_model(&model_path)?.session;
|
||||
Ok(Self { session, charset })
|
||||
}
|
||||
/// 对应 Python 的 _preprocess_image
|
||||
/// 负责:透明背景修复 -> 灰度化 -> 按比例 Resize -> 归一化 -> 4维张量转换
|
||||
fn preprocess_image(&self, img: &DynamicImage, png_fix: bool) -> anyhow::Result<Tensor> {
|
||||
@@ -222,10 +223,9 @@ impl Ocr {
|
||||
)),
|
||||
}
|
||||
}
|
||||
fn ctc_decode_indices(predicted_indices: &[i64]) -> String {
|
||||
fn ctc_decode_indices(&self, predicted_indices: &[i64]) -> String {
|
||||
println!("indices模型输出原始数据: {:?}", predicted_indices);
|
||||
|
||||
use crate::charset::CHARSET_BETA;
|
||||
// 对应 _ctc_decode_indices 的逻辑:去重、去 blank (0)
|
||||
let mut res = String::new();
|
||||
let mut prev_idx: i64 = -1;
|
||||
@@ -235,8 +235,11 @@ impl Ocr {
|
||||
// 2. 跳过 blank 字符 (假设索引 0 是 blank)
|
||||
if idx != prev_idx && idx != 0 {
|
||||
if let Ok(u_idx) = usize::try_from(idx) {
|
||||
if let Some(&char_str) = CHARSET_BETA.get(u_idx) {
|
||||
if let Some(char_str) = self.charset.get(u_idx) {
|
||||
res.push_str(char_str);
|
||||
} else {
|
||||
// 保护逻辑:如果模型预测的索引超出了字符集范围
|
||||
eprintln!("警告: 预测索引 {} 超出字符集范围", u_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user