GreenWizard2015
diff --git a/‎NN/Nerf2D.py‎
Lines changed: 23 additions & 3 deletions b/‎NN/Nerf2D.py‎
Lines changed: 23 additions & 3 deletions
diff --git a/‎NN/RestorationModel/CRestorationModel.py‎
Lines changed: 7 additions & 3 deletions b/‎NN/RestorationModel/CRestorationModel.py‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎NN/encoders/CEncoderHead.py‎
Lines changed: 302 additions & 0 deletions b/‎NN/encoders/CEncoderHead.py‎
Lines changed: 302 additions & 0 deletions
@@ -88,22 +88,31 @@ def _withExtraLatents(self, latents, src, points):
     )
     return tf.concat([latents, extraData], axis=-1)
 
+  def _extractR(self, YData):
+    R = None
+    if 'blur R' in YData:
+      R = YData['blur R']
+      tf.assert_equal(R, R[:, 0:1], "R must be the same for all points")
+      R = R[:, 0]
+    return R
+  
   def train_step(self, data):
     (src, YData) = data
     src = ensure4d(src)
     x0 = YData['sampled']
     positions = YData['positions']
+    B, N = tf.shape(positions)[0], tf.shape(positions)[1]
     # remove this keys from the dictionary
     YData = {k: v for k, v in YData.items() if k not in ['sampled', 'positions']}
 
     with tf.GradientTape() as tape:
-      encodedSrc = self._encoder(src=src, training=True)
+      encodedSrc = self._encoder(src=src, training=True, R=self._extractR(YData))
       latents = self._extractLatents(encodedSrc=encodedSrc, positions=positions, training=True)
       # train the restorator
       residual = self._withResidual(src, points=positions)
       latents = self._withExtraLatents(latents, src=src, points=positions)
       # flatten values
-      BN = tf.shape(positions)[0] * tf.shape(positions)[1]
+      BN = B * N
       latents = tf.reshape(latents, (BN, tf.shape(latents)[-1]))
       positions = tf.reshape(positions, (BN, 2))
       x0 = tf.reshape(x0, (BN, tf.shape(x0)[-1]))
@@ -117,6 +126,7 @@ def train_step(self, data):
         positions=positions,
         params={**self._lossParams, **params},
       )['loss']
+      tf.assert_equal(tf.shape(loss), (BN, ))
 
     self.optimizer.minimize(loss, self.trainable_variables, tape=tape)
     self._loss.update_state(loss)
@@ -153,7 +163,17 @@ def _inference(
       tf.assert_equal(tf.shape(initialValues)[:1], (B, ))
       initialValues = tf.reshape(initialValues, (B, N, C))
 
-    encoded = self._encoder(src, training=False, params=encoderParams)
+    R = None
+    if 'blurRadius' in reverseArgs:
+      R = reverseArgs['blurRadius']
+      if not tf.is_tensor(R):
+        R = tf.convert_to_tensor(R, dtype=tf.float32)
+      R = tf.reshape(R, [1, 1])
+      R = tf.tile(R, [B, 1])
+    else:
+      R = tf.zeros((B, 1), dtype=tf.float32)
+      pass
+    encoded = self._encoder(src, training=False, params=encoderParams, R=R)
     def getChunk(ind, sz):
       posC = pos[ind:ind+sz]
       sz = tf.shape(posC)[0]
 
@@ -60,8 +60,11 @@ def _addRadius(self, latents, R=None, fakeR=0.0, training=False):
     B = tf.shape(latents)[0]
     if self._blurRadiusEncoder is not None:
       if R is None:
-        R = tf.constant([[fakeR]], dtype=tf.float32)
-        encodedR = self._blurRadiusEncoder(R, training=training)
+        if not tf.is_tensor(fakeR):
+          fakeR = tf.convert_to_tensor(fakeR, dtype=tf.float32)
+
+        fakeR = tf.reshape(fakeR, [1, 1])
+        encodedR = self._blurRadiusEncoder(fakeR, training=training)
         encodedR = tf.tile(encodedR, [B, 1])
       else:
         encodedR = self._blurRadiusEncoder(R, training=training)
@@ -81,7 +84,8 @@ def call(self, latents, pos, T, V, residual, R=None, training=False):
   def reverse(self, latents, pos, reverseArgs, training, value, residual, index):
     EPos = self._encodePos(pos, training=training, args=reverseArgs.get('decoder', {}))
     latents = self._addResiduals(latents, residual)
-    latents = self._addRadius(latents, R=None, training=training)
+    fakeR = reverseArgs.get('blurRadius', 0.0)
+    latents = self._addRadius(latents, R=None, fakeR=fakeR, training=training)
 
     def denoiser(x, t, mask=None):
       args = dict(condition=latents, coords=EPos, timestep=t, V=x)
 
@@ -0,0 +1,302 @@
+# a bit hacky way to make encoder head obtain input shape dynamically
+import tensorflow as tf
+import tensorflow.keras.layers as L
+from NN.utils import sMLP
+from NN.encoding import CCoordsGridLayer, CCoordsEncodingLayer
+from NN.layers import MixerConvLayer, Patches, TransformerBlock
+from Utils.utils import dumb_deepcopy
+
+def block_params_from_config(config):
+  layers = config.get('layers', None)
+  if not(layers is None): return layers
+
+  defaultConvParams = {
+    'kernel size': config.get('kernel size', 3),
+    'activation': config.get('activation', 'relu'),
+    'name': config.get('name', 'Conv2D'),
+  }
+  convBefore = config['conv before']
+  # if convBefore is an integer, then it's the same for all layers
+  if isinstance(convBefore, int):
+    convParams = { 'channels': config['channels'], **defaultConvParams }
+    convBefore = [convParams] * convBefore # repeat the same parameters
+    pass
+  assert isinstance(convBefore, list), 'convBefore must be a list'
+  # if convBefore is a list of integers, then each integer is the number of channels
+  if (0 < len(convBefore)) and isinstance(convBefore[0], int):
+    convBefore = [ {'channels': sz, **defaultConvParams} for sz in convBefore ]
+    pass
+
+  # add separately last layer
+  lastConvParams = {
+    'channels': config.get('channels last', config['channels']),
+    'kernel size': config.get('kernel size last', defaultConvParams['kernel size']),
+    'activation': config.get('final activation', defaultConvParams['activation']),
+    'name': config.get('last name', 'Conv2D'),
+  }
+  return convBefore + [lastConvParams]
+  
+def conv_block_from_config(data, config, defaults, name='CB'):
+  config = {**defaults, **config} # merge defaults and config
+  convParams = block_params_from_config(config)
+  # apply convolutions to the data
+  for i, parameters in enumerate(convParams):
+    parameters = dumb_deepcopy(parameters)
+    Name = parameters.get('name', 'Conv2D')
+    if 'Conv2D' == Name:
+      data = L.Conv2D(
+        filters=parameters['channels'],
+        padding='same',
+        kernel_size=parameters['kernel size'],
+        activation=parameters['activation'],
+        name='%s/conv-%d' % (name, i)
+      )(data)
+      continue
+
+    if 'MLP Mixer' == Name:
+      data = MixerConvLayer(
+        token_mixing=parameters.get('token mixing', 512),
+        channel_mixing=parameters.get('channel mixing', 512),
+        name='%s/conv-mixer-%d' % (name, i)
+      )(data)
+      continue
+
+    if 'Patches' == Name:
+      data = Patches(
+        patch_size=parameters['patch size'],
+        name='%s/patches-%d' % (name, i)
+      )(data)
+      continue
+
+    if 'CoordsGrid' == Name:
+      parameters = {k: v for k, v in parameters.items() if k not in ['name']}
+      parameters['name'] = '%s/coordsGrid-%d' % (name, i)
+      data = CCoordsGridLayer(
+        CCoordsEncodingLayer(
+          N=parameters.get('N', 32),
+          **parameters
+        ),
+        name='%s/coordsGrid-%d' % (name, i)
+      )(data)
+      continue
+
+    if 'Transformer' == Name:
+      parameters = {k: v for k, v in parameters.items()}      
+      parameters['name'] = '%s/transformer-%d' % (name, i)
+      parameters['intermediate_dim'] = parameters.pop('intermediate dim', 512)
+      parameters['num_heads'] = parameters.pop('num heads', 8)
+      data = TransformerBlock(**parameters)(data)
+      continue
+
+    if 'Reshape' == Name:
+      shape = list(parameters['shape'])
+      for j, sz in enumerate(shape):
+        if sz <= -2:
+          sz = data.shape[sz + 1]
+        shape[j] = sz
+        continue
+      data = L.Reshape(
+        shape,
+        name='%s/reshape-%d' % (name, i)
+      )(data)
+      continue
+
+    if 'MLP' == Name:
+      parameters['name'] = '%s/mlp-%d' % (name, i)
+      data = sMLP(**parameters)(data)
+      continue
+    
+    raise NotImplementedError('Unknown layer: {}'.format(Name))
+  return data
+
+def _createGCMv2(dataShape, config, latentDim, name):
+  data = L.Input(shape=dataShape)
+
+  res = data
+  for i, blockConfig in enumerate(config['downsample steps']):
+    # downsample
+    res = L.Conv2D(
+      filters=blockConfig['channels'],
+      kernel_size=blockConfig['kernel size'],
+      strides=2,
+      padding='same',
+      activation='relu',
+      name=name + '/downsample-%d' % (i + 1,)
+    )(res)
+    # convolutions
+    for layerId in range(blockConfig['layers']):
+      res = L.Conv2D(
+        filters=blockConfig['channels'],
+        kernel_size=blockConfig['kernel size'],
+        padding='same',
+        activation='relu',
+        name=name + '/downsample-%d/layer-%d' % (i + 1, layerId + 1)
+      )(res)
+      continue
+    continue
+
+  return tf.keras.Model(inputs=[data], outputs=res, name=name)
+
+def _createGlobalContextModel(X, config, latentDim, name):
+  model = config.get('name', 'v1')
+  if 'v1' == model: # simple convolutional model
+    res = conv_block_from_config(
+      data=X, config=config, defaults={
+        'conv before': 0, # by default, no convolutions before the last layer
+      }
+    )
+    # calculate global context
+    latent = L.Flatten()(res)
+    context = sMLP(sizes=config['mlp'], activation='relu', name=name + '/globalMixer')(latent)
+    context = L.Dense(latentDim, activation=config['final activation'], name=name + '/dense-latent')(context)
+    return context # end of 'v1' model
+  
+  if 'v2' == model:
+    res = data = L.Input(shape=X.shape[1:])
+    res = _createGCMv2(res.shape[1:], config, latentDim, name)(res)
+    # calculate global context
+    latent = L.Flatten()(res)
+    context = sMLP(sizes=config['mlp'], activation='relu', name=name + '/globalMixer')(latent)
+    context = L.Dense(latentDim, activation=config['final activation'], name=name + '/dense-latent')(context)
+    model = tf.keras.Model(inputs=[data], outputs=context, name=name)
+    return model(X) # end of 'v2' model
+  
+  raise NotImplementedError('Unknown global context model: {}'.format(model))
+
+def _withPositionConfig(config, name):
+  if config is None:
+    print('[Encoder] Positions: No')
+    return lambda x, _: x
+  
+  print('[Encoder] Positions: Yes')
+  
+  if isinstance(config, bool): config = { 'N': 32 }
+  assert isinstance(config, dict), 'config must be a dictionary'
+
+  def withPosition(x, i):
+    if not config.get('stage-%d' % i, True): return x
+
+    encoding = config.get('encoding', {})
+    encoding = dict(**encoding)
+    encoding['N'] = config.get('stage-%d N' % i, config.get('N', 32))
+    return CCoordsGridLayer(
+      CCoordsEncodingLayer(**encoding, name='%s/coordsGrid-%d/encoding' % (name, i)),
+      name='%s/coordsGrid-%d' % (name, i)
+    )(x)
+  return withPosition
+
+##################
+def createEncoderHead_full(
+  imgWidth,
+  config,
+  channels, downsampleSteps, latentDim, 
+  ConvBeforeStage, ConvAfterStage, 
+  localContext, globalContext,
+  positionsConfigs,
+  name
+):
+  assert config is not None, 'config must be a dictionary'
+  assert isinstance(downsampleSteps, list) and (0 < len(downsampleSteps)), 'downsampleSteps must be a list of integers'
+  data = L.Input(shape=(imgWidth, imgWidth, channels))
+  
+  withPosition = _withPositionConfig(positionsConfigs, name)
+  res = data
+  intermediate = []
+  for i, sz in enumerate(downsampleSteps):
+    if config.get('use downsampling', True):
+      res = L.Conv2D(sz, 3, strides=2, padding='same', activation='relu')(res)
+    res = withPosition(res, i) # add position encoding if needed
+    for _ in range(ConvBeforeStage):
+      res = L.Conv2D(sz, 3, padding='same', activation='relu')(res)
+
+    # local context
+    if not(localContext is None):
+      intermediate.append(
+        conv_block_from_config(
+          data=res, config=localContext, defaults={
+            'channels': sz,
+            'channels last': latentDim, # last layer should have latentDim channels
+          },
+          name='%s/intermediate-%d' % (name, i)
+        )
+      )
+    ################################
+    for _ in range(ConvAfterStage):
+      res = L.Conv2D(sz, 3, padding='same', activation='relu')(res)
+    continue
+
+  if not(globalContext is None): # global context
+    res = withPosition(res, len(downsampleSteps))
+    context = _createGlobalContextModel(res, globalContext, latentDim, name + '/globalContext')
+  else: # no global context
+    # return dummy context to keep the interface consistent
+    context = L.Lambda(
+      lambda x: tf.zeros((tf.shape(x)[0], 1), dtype=res.dtype)
+    )(res)
+
+  return tf.keras.Model(
+    inputs=[data],
+    outputs={
+      'intermediate': intermediate, # intermediate representations
+      'context': context, # global context
+    },
+    name=name
+  )
+
+class CEncoderHead(tf.keras.Model):
+  def __init__(self, 
+    config,
+    downsampleSteps, latentDim, 
+    ConvBeforeStage, ConvAfterStage, 
+    localContext, globalContext,
+    positionsConfigs,
+    **kwargs
+  ):
+    super().__init__(**kwargs)
+    self._config = config
+    self._downsampleSteps = downsampleSteps
+    self._latentDim = latentDim
+    self._ConvBeforeStage = ConvBeforeStage
+    self._ConvAfterStage = ConvAfterStage
+    self._localContext = localContext
+    self._globalContext = globalContext
+    self._positionsConfigs = positionsConfigs
+    return
+  
+  def build(self, inputShape):
+    H, W, C = inputShape[1:]
+    self._encoderHead = createEncoderHead_full(
+      imgWidth=H, config=self._config,
+      channels=C, downsampleSteps=self._downsampleSteps, latentDim=self._latentDim,
+      ConvBeforeStage=self._ConvBeforeStage, ConvAfterStage=self._ConvAfterStage,
+      localContext=self._localContext, globalContext=self._globalContext,
+      positionsConfigs=self._positionsConfigs,
+      name=self.name + '/EncoderHead'
+    )
+    self._encoderHead.build(inputShape)
+    return super().build(inputShape)
+
+  def call(self, src, training=None):
+    return self._encoderHead(src, training=training)
+'''
+Simple encoder that takes image as input and returns corresponding latent vector with intermediate representations
+'''
+def createEncoderHead(
+  config,
+  downsampleSteps, latentDim, 
+  ConvBeforeStage, ConvAfterStage, 
+  localContext, globalContext,
+  positionsConfigs,
+  name
+):
+  return CEncoderHead(
+    config=config,
+    downsampleSteps=downsampleSteps,
+    latentDim=latentDim,
+    ConvBeforeStage=ConvBeforeStage,
+    ConvAfterStage=ConvAfterStage,
+    localContext=localContext,
+    globalContext=globalContext,
+    positionsConfigs=positionsConfigs,
+    name=name
+  )