RandomPerspective in PyTorch
Super Kai (Kazuya Ito)

Super Kai (Kazuya Ito) @hyperkai

About: I'm a web developer. Buy Me a Coffee: ko-fi.com/superkai SO: stackoverflow.com/users/3247006/super-kai-kazuya-ito X(Twitter): twitter.com/superkai_kazuya FB: facebook.com/superkai.kazuya

Joined:
Oct 21, 2021

RandomPerspective in PyTorch

Publish Date: Feb 22
0 0

Buy Me a Coffee

*Memos:

RandomPerspective() can do random perspective transformation for an image as shown below:

*Memos:

  • The 1st argument for initialization is distortion_scale(Optional-Default:0.5-Type:int or float): *Memos:
    • It can do perspective transformation.
    • It must be 0 <= x <= 1.
  • The 2nd argument for initialization is p(Optional-Default:0.5-Type:int or float): *Memos:
    • It's the probability of whether an image is done with perspective transformation or not.
    • It must be 0 <= x <= 1.
  • The 3rd argument for initialization is interpolation(Optional-Default:InterpolationMode.BILINEAR-Type:InterpolationMode): *Memos:
    • NEAREST, NEAREST_EXACT, BILINEAR and BICUBIC modes can be used.
    • My post explains InterpolationMode with and without anti-aliasing.
  • The 4th argument for initialization is fill(Optional-Default:0-Type:int, float or tuple/list(int or float)): *Memos:
    • It can change the background of an image. *The background can be seen when doing perspective transformation for an image.
    • A tuple/list must be the 1D with 1 or 3 elements.
    • If all values are x <= 0, it's black.
    • If all values are 255 <= x, it's white.
  • The 1st argument is img(Required-Type:PIL Image or tensor(int/float/complex/bool)): *Memos:
    • A tensor must be 3D or more D.
    • Don't use img=.
  • v2 is recommended to use according to V1 or V2? Which one should I use?.
from torchvision.datasets import OxfordIIITPet
from torchvision.transforms.v2 import RandomPerspective
from torchvision.transforms.functional import InterpolationMode

rp = RandomPerspective()
rp = RandomPerspective(distortion_scale=0.5, p=0.5,
                       interpolation=InterpolationMode.BILINEAR, fill=0)
rp
# RandomPerspective(p=0.5, distortion_scale=0.5,
#                   interpolation=InterpolationMode.BILINEAR, fill=0)

rp.distortion_scale
# 0.5

rp.p
# 0.5

rp.interpolation
# <InterpolationMode.BILINEAR: 'bilinear'>

rp.fill
# 0

origin_data = OxfordIIITPet(
    root="data",
    transform=None
)

ds0p1origin_data = OxfordIIITPet( # `ds` is distortion_scale.
    root="data",
    transform=RandomPerspective(distortion_scale=0, p=1)
)

ds01p1_data = OxfordIIITPet(
    root="data",
    transform=RandomPerspective(distortion_scale=0.1, p=1)
)

ds02p1_data = OxfordIIITPet(
    root="data",
    transform=RandomPerspective(distortion_scale=0.2, p=1)
)

ds03p1_data = OxfordIIITPet(
    root="data",
    transform=RandomPerspective(distortion_scale=0.3, p=1)
)

ds04p1_data = OxfordIIITPet(
    root="data",
    transform=RandomPerspective(distortion_scale=0.4, p=1)
)

ds05p1_data = OxfordIIITPet(
    root="data",
    transform=RandomPerspective(distortion_scale=0.5, p=1)
    # transform=RandomPerspective(p=1)
)

ds06p1_data = OxfordIIITPet(
    root="data",
    transform=RandomPerspective(distortion_scale=0.6, p=1)
)

ds07p1_data = OxfordIIITPet(
    root="data",
    transform=RandomPerspective(distortion_scale=0.7, p=1)
)

ds08p1_data = OxfordIIITPet(
    root="data",
    transform=RandomPerspective(distortion_scale=0.8, p=1)
)

ds09p1_data = OxfordIIITPet(
    root="data",
    transform=RandomPerspective(distortion_scale=0.9, p=1)
)

ds1p1_data = OxfordIIITPet(
    root="data",
    transform=RandomPerspective(distortion_scale=1, p=1)
)

p0_data = OxfordIIITPet(
    root="data",
    transform=RandomPerspective(p=0)
)

p05_data = OxfordIIITPet(
    root="data",
    transform=RandomPerspective(p=0.5)
    # transform=RandomPerspective()
)

p1_data = OxfordIIITPet(
    root="data",
    transform=RandomPerspective(p=1)
)

p1fgray_data = OxfordIIITPet( # `f` is fill.
    root="data",
    transform=RandomPerspective(p=1, fill=150)
    # transform=RandomPerspective(p=1, fill=[150])
)

p1fpurple_data = OxfordIIITPet(
    root="data",
    transform=RandomPerspective(p=1, fill=[160, 32, 240])
)

import matplotlib.pyplot as plt

def show_images1(data, main_title=None):
    plt.figure(figsize=[10, 5])
    plt.suptitle(t=main_title, y=0.8, fontsize=14)
    for i, (im, _) in zip(range(1, 6), data):
        plt.subplot(1, 5, i)
        plt.imshow(X=im)
        plt.xticks(ticks=[])
        plt.yticks(ticks=[])
    plt.tight_layout()
    plt.show()

show_images1(data=origin_data, main_title="origin_data")
print()
show_images1(data=ds0p1origin_data, main_title="ds0p1origin_data")
show_images1(data=ds01p1_data, main_title="ds01p1_data")
show_images1(data=ds02p1_data, main_title="ds02p1_data")
show_images1(data=ds03p1_data, main_title="ds03p1_data")
show_images1(data=ds04p1_data, main_title="ds04p1_data")
show_images1(data=ds05p1_data, main_title="ds05p1_data")
show_images1(data=ds06p1_data, main_title="ds06p1_data")
show_images1(data=ds07p1_data, main_title="ds07p1_data")
show_images1(data=ds08p1_data, main_title="ds08p1_data")
show_images1(data=ds09p1_data, main_title="ds09p1_data")
show_images1(data=ds1p1_data, main_title="ds1p1_data")
print()
show_images1(data=p0_data, main_title="p0_data")
show_images1(data=p0_data, main_title="p0_data")
show_images1(data=p0_data, main_title="p0_data")
print()
show_images1(data=p05_data, main_title="p05_data")
show_images1(data=p05_data, main_title="p05_data")
show_images1(data=p05_data, main_title="p05_data")
print()
show_images1(data=p1_data, main_title="p1_data")
show_images1(data=p1_data, main_title="p1_data")
show_images1(data=p1_data, main_title="p1_data")
print()
show_images1(data=p1fgray_data, main_title="p1fgray_data")
show_images1(data=p1fpurple_data, main_title="p1fpurple_data")

# ↓ ↓ ↓ ↓ ↓ ↓ The code below is identical to the code above. ↓ ↓ ↓ ↓ ↓ ↓
def show_images2(data, main_title=None, ds=0.5, p=0.5,
                 ip=InterpolationMode.BILINEAR, f=0):
    plt.figure(figsize=[10, 5])
    plt.suptitle(t=main_title, y=0.8, fontsize=14)
    if main_title != "origin_data":
        for i, (im, _) in zip(range(1, 6), data):
            plt.subplot(1, 5, i)
            rp = RandomPerspective(distortion_scale=ds, p=p,
                                   interpolation=ip, fill=f)
            plt.imshow(X=rp(im))
            plt.xticks(ticks=[])
            plt.yticks(ticks=[])
    else:
        for i, (im, _) in zip(range(1, 6), data):
            plt.subplot(1, 5, i)
            plt.imshow(X=im)
            plt.xticks(ticks=[])
            plt.yticks(ticks=[])
    plt.tight_layout()
    plt.show()

show_images2(data=origin_data, main_title="origin_data")
print()
show_images2(data=origin_data, main_title="ds0p1origin_data", ds=0, p=1)
show_images2(data=origin_data, main_title="ds01p1_data", ds=0.1, p=1)
show_images2(data=origin_data, main_title="ds02p1_data", ds=0.2, p=1)
show_images2(data=origin_data, main_title="ds03p1_data", ds=0.3, p=1)
show_images2(data=origin_data, main_title="ds04p1_data", ds=0.4, p=1)
show_images2(data=origin_data, main_title="ds05p1_data", ds=0.5, p=1)
show_images2(data=origin_data, main_title="ds06p1_data", ds=0.6, p=1)
show_images2(data=origin_data, main_title="ds07p1_data", ds=0.7, p=1)
show_images2(data=origin_data, main_title="ds08p1_data", ds=0.8, p=1)
show_images2(data=origin_data, main_title="ds09p1_data", ds=0.9, p=1)
show_images2(data=origin_data, main_title="ds1p1_data", ds=1, p=1)
print()
show_images2(data=origin_data, main_title="p0_data", p=0)
show_images2(data=origin_data, main_title="p0_data", p=0)
show_images2(data=origin_data, main_title="p0_data", p=0)
print()
show_images2(data=origin_data, main_title="p05_data", p=0.5)
show_images2(data=origin_data, main_title="p05_data", p=0.5)
show_images2(data=origin_data, main_title="p05_data", p=0.5)
print()
show_images2(data=origin_data, main_title="p1_data", p=1)
show_images2(data=origin_data, main_title="p1_data", p=1)
show_images2(data=origin_data, main_title="p1_data", p=1)
print()
show_images2(data=origin_data, main_title="p1fgray_data", p=1, f=150)
show_images2(data=origin_data, main_title="p1fpurple_data", p=1,
             f=[160, 32, 240])
Enter fullscreen mode Exit fullscreen mode

Image description


Image description

Image description

Image description

Image description

Image description

Image description

Image description

Image description

Image description

Image description

Image description


Image description

Image description

Image description


Image description

Image description

Image description


Image description

Image description

Image description


Image description

Image description

Comments 0 total

    Add comment