本文主要是介绍Paddleocr数据增强调用逻辑,希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!
数据增强调用逻辑
以在ppocr/data/simple_dataset.py为例:
get_ext_data通过self.ops[:self.ext_op_transform_idx]获取配置文件中数据增强
self.ops在def __init__(self, config, mode, logger, seed=None):中通过解析配置文件中'transforms'内容获取数据增强操作:
self.ops = create_operators(dataset_config['transforms'], global_config)
然后调用 data = transform(data, load_data_ops)实现数据增强。
def get_ext_data(self):ext_data_num = 0for op in self.ops:if hasattr(op, 'ext_data_num'):ext_data_num = getattr(op, 'ext_data_num')breakload_data_ops = self.ops[:self.ext_op_transform_idx]ext_data = []while len(ext_data) < ext_data_num:file_idx = self.data_idx_order_list[np.random.randint(self.__len__())]data_line = self.data_lines[file_idx]data_line = data_line.decode('utf-8')substr = data_line.strip("\n").split(self.delimiter)file_name = substr[0]file_name = self._try_parse_filename_list(file_name)label = substr[1]img_path = os.path.join(self.data_dir, file_name)data = {'img_path': img_path, 'label': label}if not os.path.exists(img_path):continuewith open(data['img_path'], 'rb') as f:img = f.read()data['image'] = imgdata = transform(data, load_data_ops)if data is None:continueif 'polys' in data.keys():if data['polys'].shape[1] != 4:continueext_data.append(data)return ext_data
数据增强实现的细节:
代码来自ppocr/data/imaug/__init__.py
1、trainsform函数将数据增强数组逐个对数据输出数据列表
def transform(data, ops=None):""" transform """if ops is None:ops = []for op in ops:data = op(data)if data is None:return Nonereturn data
op(data)为什么能进行数据增强:
每一个op是eval(op_name)(**param),eval() 函数将字符串 expression 解析为 Python 表达式,并在指定的命名空间中执行它。
def create_operators(op_param_list, global_config=None):"""create operators based on the configArgs:params(list): a dict list, used to create some operators"""assert isinstance(op_param_list, list), ('operator config should be a list')ops = []for operator in op_param_list:assert isinstance(operator,dict) and len(operator) == 1, "yaml format error"op_name = list(operator)[0]param = {} if operator[op_name] is None else operator[op_name]if global_config is not None:param.update(global_config)op = eval(op_name)(**param)ops.append(op)return ops
例如配置文件中的:
CopyPaste: null
通过ppocr/data/imaug/__init__.py,eval()可以调用CopyPaste实现数据增强
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literalsfrom .iaa_augment import IaaAugment
from .make_border_map import MakeBorderMap
from .make_shrink_map import MakeShrinkMap
from .random_crop_data import EastRandomCropData, RandomCropImgMask
from .make_pse_gt import MakePseGtfrom .rec_img_aug import BaseDataAugmentation, RecAug, RecConAug, RecResizeImg, ClsResizeImg, \SRNRecResizeImg, GrayRecResizeImg, SARRecResizeImg, PRENResizeImg, \ABINetRecResizeImg, SVTRRecResizeImg, ABINetRecAug, VLRecResizeImg, SPINRecResizeImg, RobustScannerRecResizeImg, \RFLRecResizeImg, SVTRRecAug, ParseQRecAug
from .ssl_img_aug import SSLRotateResize
from .randaugment import RandAugment
from .copy_paste import CopyPaste
from .ColorJitter import ColorJitter
from .operators import *
from .label_ops import *from .east_process import *
from .sast_process import *
from .pg_process import *
from .table_ops import *from .vqa import *from .fce_aug import *
from .fce_targets import FCENetTargets
from .ct_process import *
from .drrg_targets import DRRGTargets
这篇关于Paddleocr数据增强调用逻辑的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!