upsampling.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. from .module import Module
  2. from .. import functional as F
  3. from torch import Tensor
  4. from typing import Optional
  5. from ..common_types import _size_2_t, _ratio_2_t, _size_any_t, _ratio_any_t
  6. class Upsample(Module):
  7. r"""Upsamples a given multi-channel 1D (temporal), 2D (spatial) or 3D (volumetric) data.
  8. The input data is assumed to be of the form
  9. `minibatch x channels x [optional depth] x [optional height] x width`.
  10. Hence, for spatial inputs, we expect a 4D Tensor and for volumetric inputs, we expect a 5D Tensor.
  11. The algorithms available for upsampling are nearest neighbor and linear,
  12. bilinear, bicubic and trilinear for 3D, 4D and 5D input Tensor,
  13. respectively.
  14. One can either give a :attr:`scale_factor` or the target output :attr:`size` to
  15. calculate the output size. (You cannot give both, as it is ambiguous)
  16. Args:
  17. size (int or Tuple[int] or Tuple[int, int] or Tuple[int, int, int], optional):
  18. output spatial sizes
  19. scale_factor (float or Tuple[float] or Tuple[float, float] or Tuple[float, float, float], optional):
  20. multiplier for spatial size. Has to match input size if it is a tuple.
  21. mode (str, optional): the upsampling algorithm: one of ``'nearest'``,
  22. ``'linear'``, ``'bilinear'``, ``'bicubic'`` and ``'trilinear'``.
  23. Default: ``'nearest'``
  24. align_corners (bool, optional): if ``True``, the corner pixels of the input
  25. and output tensors are aligned, and thus preserving the values at
  26. those pixels. This only has effect when :attr:`mode` is
  27. ``'linear'``, ``'bilinear'``, ``'bicubic'``, or ``'trilinear'``.
  28. Default: ``False``
  29. recompute_scale_factor (bool, optional): recompute the scale_factor for use in the
  30. interpolation calculation. If `recompute_scale_factor` is ``True``, then
  31. `scale_factor` must be passed in and `scale_factor` is used to compute the
  32. output `size`. The computed output `size` will be used to infer new scales for
  33. the interpolation. Note that when `scale_factor` is floating-point, it may differ
  34. from the recomputed `scale_factor` due to rounding and precision issues.
  35. If `recompute_scale_factor` is ``False``, then `size` or `scale_factor` will
  36. be used directly for interpolation.
  37. Shape:
  38. - Input: :math:`(N, C, W_{in})`, :math:`(N, C, H_{in}, W_{in})` or :math:`(N, C, D_{in}, H_{in}, W_{in})`
  39. - Output: :math:`(N, C, W_{out})`, :math:`(N, C, H_{out}, W_{out})`
  40. or :math:`(N, C, D_{out}, H_{out}, W_{out})`, where
  41. .. math::
  42. D_{out} = \left\lfloor D_{in} \times \text{scale\_factor} \right\rfloor
  43. .. math::
  44. H_{out} = \left\lfloor H_{in} \times \text{scale\_factor} \right\rfloor
  45. .. math::
  46. W_{out} = \left\lfloor W_{in} \times \text{scale\_factor} \right\rfloor
  47. .. warning::
  48. With ``align_corners = True``, the linearly interpolating modes
  49. (`linear`, `bilinear`, `bicubic`, and `trilinear`) don't proportionally
  50. align the output and input pixels, and thus the output values can depend
  51. on the input size. This was the default behavior for these modes up to
  52. version 0.3.1. Since then, the default behavior is
  53. ``align_corners = False``. See below for concrete examples on how this
  54. affects the outputs.
  55. .. note::
  56. If you want downsampling/general resizing, you should use :func:`~nn.functional.interpolate`.
  57. Examples::
  58. >>> input = torch.arange(1, 5, dtype=torch.float32).view(1, 1, 2, 2)
  59. >>> input
  60. tensor([[[[ 1., 2.],
  61. [ 3., 4.]]]])
  62. >>> m = nn.Upsample(scale_factor=2, mode='nearest')
  63. >>> m(input)
  64. tensor([[[[ 1., 1., 2., 2.],
  65. [ 1., 1., 2., 2.],
  66. [ 3., 3., 4., 4.],
  67. [ 3., 3., 4., 4.]]]])
  68. >>> m = nn.Upsample(scale_factor=2, mode='bilinear') # align_corners=False
  69. >>> m(input)
  70. tensor([[[[ 1.0000, 1.2500, 1.7500, 2.0000],
  71. [ 1.5000, 1.7500, 2.2500, 2.5000],
  72. [ 2.5000, 2.7500, 3.2500, 3.5000],
  73. [ 3.0000, 3.2500, 3.7500, 4.0000]]]])
  74. >>> m = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
  75. >>> m(input)
  76. tensor([[[[ 1.0000, 1.3333, 1.6667, 2.0000],
  77. [ 1.6667, 2.0000, 2.3333, 2.6667],
  78. [ 2.3333, 2.6667, 3.0000, 3.3333],
  79. [ 3.0000, 3.3333, 3.6667, 4.0000]]]])
  80. >>> # Try scaling the same data in a larger tensor
  81. >>>
  82. >>> input_3x3 = torch.zeros(3, 3).view(1, 1, 3, 3)
  83. >>> input_3x3[:, :, :2, :2].copy_(input)
  84. tensor([[[[ 1., 2.],
  85. [ 3., 4.]]]])
  86. >>> input_3x3
  87. tensor([[[[ 1., 2., 0.],
  88. [ 3., 4., 0.],
  89. [ 0., 0., 0.]]]])
  90. >>> m = nn.Upsample(scale_factor=2, mode='bilinear') # align_corners=False
  91. >>> # Notice that values in top left corner are the same with the small input (except at boundary)
  92. >>> m(input_3x3)
  93. tensor([[[[ 1.0000, 1.2500, 1.7500, 1.5000, 0.5000, 0.0000],
  94. [ 1.5000, 1.7500, 2.2500, 1.8750, 0.6250, 0.0000],
  95. [ 2.5000, 2.7500, 3.2500, 2.6250, 0.8750, 0.0000],
  96. [ 2.2500, 2.4375, 2.8125, 2.2500, 0.7500, 0.0000],
  97. [ 0.7500, 0.8125, 0.9375, 0.7500, 0.2500, 0.0000],
  98. [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]]]])
  99. >>> m = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
  100. >>> # Notice that values in top left corner are now changed
  101. >>> m(input_3x3)
  102. tensor([[[[ 1.0000, 1.4000, 1.8000, 1.6000, 0.8000, 0.0000],
  103. [ 1.8000, 2.2000, 2.6000, 2.2400, 1.1200, 0.0000],
  104. [ 2.6000, 3.0000, 3.4000, 2.8800, 1.4400, 0.0000],
  105. [ 2.4000, 2.7200, 3.0400, 2.5600, 1.2800, 0.0000],
  106. [ 1.2000, 1.3600, 1.5200, 1.2800, 0.6400, 0.0000],
  107. [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]]]])
  108. """
  109. __constants__ = ['size', 'scale_factor', 'mode', 'align_corners', 'name', 'recompute_scale_factor']
  110. name: str
  111. size: Optional[_size_any_t]
  112. scale_factor: Optional[_ratio_any_t]
  113. mode: str
  114. align_corners: Optional[bool]
  115. recompute_scale_factor: Optional[bool]
  116. def __init__(self, size: Optional[_size_any_t] = None, scale_factor: Optional[_ratio_any_t] = None,
  117. mode: str = 'nearest', align_corners: Optional[bool] = None,
  118. recompute_scale_factor: Optional[bool] = None) -> None:
  119. super(Upsample, self).__init__()
  120. self.name = type(self).__name__
  121. self.size = size
  122. if isinstance(scale_factor, tuple):
  123. self.scale_factor = tuple(float(factor) for factor in scale_factor)
  124. else:
  125. self.scale_factor = float(scale_factor) if scale_factor else None
  126. self.mode = mode
  127. self.align_corners = align_corners
  128. self.recompute_scale_factor = recompute_scale_factor
  129. def forward(self, input: Tensor) -> Tensor:
  130. return F.interpolate(input, self.size, self.scale_factor, self.mode, self.align_corners,
  131. recompute_scale_factor=self.recompute_scale_factor)
  132. def extra_repr(self) -> str:
  133. if self.scale_factor is not None:
  134. info = 'scale_factor=' + str(self.scale_factor)
  135. else:
  136. info = 'size=' + str(self.size)
  137. info += ', mode=' + self.mode
  138. return info
  139. class UpsamplingNearest2d(Upsample):
  140. r"""Applies a 2D nearest neighbor upsampling to an input signal composed of several input
  141. channels.
  142. To specify the scale, it takes either the :attr:`size` or the :attr:`scale_factor`
  143. as it's constructor argument.
  144. When :attr:`size` is given, it is the output size of the image `(h, w)`.
  145. Args:
  146. size (int or Tuple[int, int], optional): output spatial sizes
  147. scale_factor (float or Tuple[float, float], optional): multiplier for
  148. spatial size.
  149. .. warning::
  150. This class is deprecated in favor of :func:`~nn.functional.interpolate`.
  151. Shape:
  152. - Input: :math:`(N, C, H_{in}, W_{in})`
  153. - Output: :math:`(N, C, H_{out}, W_{out})` where
  154. .. math::
  155. H_{out} = \left\lfloor H_{in} \times \text{scale\_factor} \right\rfloor
  156. .. math::
  157. W_{out} = \left\lfloor W_{in} \times \text{scale\_factor} \right\rfloor
  158. Examples::
  159. >>> input = torch.arange(1, 5, dtype=torch.float32).view(1, 1, 2, 2)
  160. >>> input
  161. tensor([[[[ 1., 2.],
  162. [ 3., 4.]]]])
  163. >>> m = nn.UpsamplingNearest2d(scale_factor=2)
  164. >>> m(input)
  165. tensor([[[[ 1., 1., 2., 2.],
  166. [ 1., 1., 2., 2.],
  167. [ 3., 3., 4., 4.],
  168. [ 3., 3., 4., 4.]]]])
  169. """
  170. def __init__(self, size: Optional[_size_2_t] = None, scale_factor: Optional[_ratio_2_t] = None) -> None:
  171. super(UpsamplingNearest2d, self).__init__(size, scale_factor, mode='nearest')
  172. class UpsamplingBilinear2d(Upsample):
  173. r"""Applies a 2D bilinear upsampling to an input signal composed of several input
  174. channels.
  175. To specify the scale, it takes either the :attr:`size` or the :attr:`scale_factor`
  176. as it's constructor argument.
  177. When :attr:`size` is given, it is the output size of the image `(h, w)`.
  178. Args:
  179. size (int or Tuple[int, int], optional): output spatial sizes
  180. scale_factor (float or Tuple[float, float], optional): multiplier for
  181. spatial size.
  182. .. warning::
  183. This class is deprecated in favor of :func:`~nn.functional.interpolate`. It is
  184. equivalent to ``nn.functional.interpolate(..., mode='bilinear', align_corners=True)``.
  185. Shape:
  186. - Input: :math:`(N, C, H_{in}, W_{in})`
  187. - Output: :math:`(N, C, H_{out}, W_{out})` where
  188. .. math::
  189. H_{out} = \left\lfloor H_{in} \times \text{scale\_factor} \right\rfloor
  190. .. math::
  191. W_{out} = \left\lfloor W_{in} \times \text{scale\_factor} \right\rfloor
  192. Examples::
  193. >>> input = torch.arange(1, 5, dtype=torch.float32).view(1, 1, 2, 2)
  194. >>> input
  195. tensor([[[[ 1., 2.],
  196. [ 3., 4.]]]])
  197. >>> m = nn.UpsamplingBilinear2d(scale_factor=2)
  198. >>> m(input)
  199. tensor([[[[ 1.0000, 1.3333, 1.6667, 2.0000],
  200. [ 1.6667, 2.0000, 2.3333, 2.6667],
  201. [ 2.3333, 2.6667, 3.0000, 3.3333],
  202. [ 3.0000, 3.3333, 3.6667, 4.0000]]]])
  203. """
  204. def __init__(self, size: Optional[_size_2_t] = None, scale_factor: Optional[_ratio_2_t] = None) -> None:
  205. super(UpsamplingBilinear2d, self).__init__(size, scale_factor, mode='bilinear', align_corners=True)