Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pytroch中的数据预处理 #474

Open
DaCheng1823 opened this issue Oct 14, 2024 · 0 comments
Open

Pytroch中的数据预处理 #474

DaCheng1823 opened this issue Oct 14, 2024 · 0 comments

Comments

@DaCheng1823
Copy link

DaCheng1823 commented Oct 14, 2024

`""""by lyuwenyu
"""

import torch
import torch.nn as nn

import torchvision

torchvision.disable_beta_transforms_warning()
from torchvision import datapoints

import torchvision.transforms.v2 as T
import torchvision.transforms.v2.functional as F

from PIL import Image
from typing import Any, Dict, List, Optional

from src.core import register, GLOBAL_CONFIG

all = ['Compose', ]

RandomPhotometricDistort = register(T.RandomPhotometricDistort)
RandomZoomOut = register(T.RandomZoomOut)

RandomIoUCrop = register(T.RandomIoUCrop)

RandomHorizontalFlip = register(T.RandomHorizontalFlip)
Resize = register(T.Resize)
ToImageTensor = register(T.ToImageTensor)
ConvertDtype = register(T.ConvertDtype)
SanitizeBoundingBox = register(T.SanitizeBoundingBox)
RandomCrop = register(T.RandomCrop)
Normalize = register(T.Normalize)

@register
class Compose(T.Compose):
def init(self, ops) -> None:
transforms = []
if ops is not None:
for op in ops:
if isinstance(op, dict):
name = op.pop('type')
transfom = getattr(GLOBAL_CONFIG[name]['_pymodule'], name)(**op)
transforms.append(transfom)
# op['type'] = name
elif isinstance(op, nn.Module):
transforms.append(op)

            else:
                raise ValueError('')
    else:
        transforms = [EmptyTransform(), ]

    super().__init__(transforms=transforms)

@register
class EmptyTransform(T.Transform):
def init(self, ) -> None:
super().init()

def forward(self, *inputs):
    inputs = inputs if len(inputs) > 1 else inputs[0]
    return inputs

@register
class PadToSize(T.Pad):
_transformed_types = (
Image.Image,
datapoints.Image,
datapoints.Video,
datapoints.Mask,
datapoints.BoundingBox,
)

def _get_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
    sz = F.get_spatial_size(flat_inputs[0])
    h, w = self.spatial_size[0] - sz[0], self.spatial_size[1] - sz[1]
    self.padding = [0, 0, w, h]
    return dict(padding=self.padding)

def __init__(self, spatial_size, fill=0, padding_mode='constant') -> None:
    if isinstance(spatial_size, int):
        spatial_size = (spatial_size, spatial_size)

    self.spatial_size = spatial_size
    super().__init__(0, fill, padding_mode)

def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
    fill = self._fill[type(inpt)]
    padding = params['padding']
    return F.pad(inpt, padding=padding, fill=fill, padding_mode=self.padding_mode)  # type: ignore[arg-type]

def __call__(self, *inputs: Any) -> Any:
    outputs = super().forward(*inputs)
    if len(outputs) > 1 and isinstance(outputs[1], dict):
        outputs[1]['padding'] = torch.tensor(self.padding)
    return outputs

@register
class RandomIoUCrop(T.RandomIoUCrop):
def init(self, min_scale: float = 0.3, max_scale: float = 1, min_aspect_ratio: float = 0.5,
max_aspect_ratio: float = 2, sampler_options: Optional[List[float]] = None, trials: int = 40,
p: float = 1.0):
super().init(min_scale, max_scale, min_aspect_ratio, max_aspect_ratio, sampler_options, trials)
self.p = p

def __call__(self, *inputs: Any) -> Any:
    if torch.rand(1) >= self.p:
        return inputs if len(inputs) > 1 else inputs[0]

    return super().forward(*inputs)

@register
class ConvertBox(T.Transform):
_transformed_types = (
datapoints.BoundingBox,
)

def __init__(self, out_fmt='', normalize=False) -> None:
    super().__init__()
    self.out_fmt = out_fmt
    self.normalize = normalize

    self.data_fmt = {
        'xyxy': datapoints.BoundingBoxFormat.XYXY,
        'cxcywh': datapoints.BoundingBoxFormat.CXCYWH
    }

def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
    if self.out_fmt:
        spatial_size = inpt.spatial_size
        in_fmt = inpt.format.value.lower()
        inpt = torchvision.ops.box_convert(inpt, in_fmt=in_fmt, out_fmt=self.out_fmt)
        inpt = datapoints.BoundingBox(inpt, format=self.data_fmt[self.out_fmt], spatial_size=spatial_size)

    if self.normalize:
        inpt = inpt / torch.tensor(inpt.spatial_size[::-1]).tile(2)[None]

    return inpt`

作者你好,感谢你对RT-DETR的工作,目前我的代码能力不是特别厉害暂时梳理不清这个数据预处理的过程,能麻烦作者帮理一下在pytroch中,数据预处理的过程吗,是先将图片处理成640x640再转tensor吗,转化的过程具体用了什么方法呢?用不同的数据预处理方法对图片推理的时候有影响吗?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant