import tensorflow as tf 

def generator_gatedcnn(inputs, reuse = False, scope_name = 'generator_gatedcnn'):

    # inputs has shape [batch_size, num_features, time]
    # we need to convert it to [batch_size, time, num_features] for 1D convolution
    inputs = tf.transpose(inputs, perm = [0, 2, 1], name = 'input_transpose')

    with tf.variable_scope(scope_name) as scope:
        # Discriminator would be reused in CycleGAN
        if reuse:
            assert scope.reuse is False

        h1 = conv1d_layer(inputs = inputs, filters = 128, kernel_size = 15, strides = 1, activation = None, name = 'h1_conv')
        h1_gates = conv1d_layer(inputs = inputs, filters = 128, kernel_size = 15, strides = 1, activation = None, name = 'h1_conv_gates')
        h1_glu = gated_linear_layer(inputs = h1, gates = h1_gates, name = 'h1_glu')

        # Downsample
        d1 = downsample1d_block(inputs = h1_glu, filters = 256, kernel_size = 5, strides = 2, name_prefix = 'downsample1d_block1_')
        d2 = downsample1d_block(inputs = d1, filters = 512, kernel_size = 5, strides = 2, name_prefix = 'downsample1d_block2_')

        # Residual blocks
        r1 = residual1d_block(inputs = d2, filters = 1024, kernel_size = 3, strides = 1, name_prefix = 'residual1d_block1_')
        r2 = residual1d_block(inputs = r1, filters = 1024, kernel_size = 3, strides = 1, name_prefix = 'residual1d_block2_')
        r3 = residual1d_block(inputs = r2, filters = 1024, kernel_size = 3, strides = 1, name_prefix = 'residual1d_block3_')
        r4 = residual1d_block(inputs = r3, filters = 1024, kernel_size = 3, strides = 1, name_prefix = 'residual1d_block4_')
        r5 = residual1d_block(inputs = r4, filters = 1024, kernel_size = 3, strides = 1, name_prefix = 'residual1d_block5_')
        r6 = residual1d_block(inputs = r5, filters = 1024, kernel_size = 3, strides = 1, name_prefix = 'residual1d_block6_')

        # Upsample
        u1 = upsample1d_block(inputs = r6, filters = 1024, kernel_size = 5, strides = 1, shuffle_size = 2, name_prefix = 'upsample1d_block1_')
        u2 = upsample1d_block(inputs = u1, filters = 512, kernel_size = 5, strides = 1, shuffle_size = 2, name_prefix = 'upsample1d_block2_')

        # Output
        o1 = conv1d_layer(inputs = u2, filters = 24, kernel_size = 15, strides = 1, activation = None, name = 'o1_conv')
        o2 = tf.transpose(o1, perm = [0, 2, 1], name = 'output_transpose')

    return o2


一维卷积 conv1d_layer
def conv1d_layer(
    strides = 1, 
    padding = 'same', 
    activation = None,
    kernel_initializer = None,
    name = None):

    conv_layer = tf.layers.conv1d(
        inputs = inputs,
        filters = filters,
        kernel_size = kernel_size,
        strides = strides,
        padding = padding,
        activation = activation,
        kernel_initializer = kernel_initializer,
        name = name)

    return conv_layer


def gated_linear_layer(inputs, gates, name = None):

    activation = tf.multiply(x = inputs, y = tf.sigmoid(gates), name = name)

    return activation



import tensorflow as tf

# 创建两个简单的张量
a = tf.constant([1, 2, 3])
b = tf.constant([4, 5, 6])

# 执行元素级乘法
product = tf.multiply(a, b)

# 启动一个TensorFlow会话并计算乘积
with tf.Session() as sess:
    result = sess.run(product)
    print(result)  # 输出: [ 4 10 18]

在这个例子中,a和b是两个一维张量,它们的形状都是[3]。tf.multiply(a, b)将返回一个新的张量,其中包含a和b中对应元素相乘的结果。

# 创建形状不同的张量
a = tf.constant([[1, 2], [3, 4]])
b = tf.constant([5, 6])

# 执行元素级乘法,b会自动广播以匹配a的形状
product = tf.multiply(a, b)

# 启动一个TensorFlow会话并计算乘积
with tf.Session() as sess:
    result = sess.run(product)
    print(result)  # 输出: [[ 5 12], [15 24]]

在这个例子中,b的形状是[2],而a的形状是[2, 2]。由于b的形状可以在第一个维度上广播到匹配a的形状,所以b会在第一个维度上复制以匹配a的形状,然后执行元素级乘法。

在TensorFlow 2.x中,通常推荐使用tf.multiply的简写形式*运算符来执行元素级乘法,因为TensorFlow 2.x默认启用了Eager Execution,这使得操作更加直观和易于使用,也更符合Python的语法习惯。


def downsample1d_block(
    name_prefix = 'downsample1d_block_'):

    h1 = conv1d_layer(inputs = inputs, filters = filters, kernel_size = kernel_size, strides = strides, activation = None, name = name_prefix + 'h1_conv')
    h1_norm = instance_norm_layer(inputs = h1, activation_fn = None, name = name_prefix + 'h1_norm')
    h1_gates = conv1d_layer(inputs = inputs, filters = filters, kernel_size = kernel_size, strides = strides, activation = None, name = name_prefix + 'h1_gates')
    h1_norm_gates = instance_norm_layer(inputs = h1_gates, activation_fn = None, name = name_prefix + 'h1_norm_gates')
    h1_glu = gated_linear_layer(inputs = h1_norm, gates = h1_norm_gates, name = name_prefix + 'h1_glu')

    return h1_glu


def instance_norm_layer(
    epsilon = 1e-06, 
    activation_fn = None, 
    name = None):

    instance_norm_layer = tf.contrib.layers.instance_norm(
        inputs = inputs,
        epsilon = epsilon,
        activation_fn = activation_fn)

    return instance_norm_layer

tf.contrib.layers.instance_norm来实现实例归一化(Instance Normalization)。实例归一化是一种深度学习技术,用于归一化输入数据的每个实例,通常用于风格迁移和图像增强任务中。
需要注意的是,tf.contrib模块在TensorFlow 2.x版本中已经被移除,因为TensorFlow团队决定不再维护这个模块。如果您正在使用TensorFlow 2.x,您需要使用其他方式来实现实例归一化。

以下是使用TensorFlow 2.x中的核心API实现实例归一化的方法:

import tensorflow as tf

def instance_norm_layer(inputs, epsilon=1e-6, activation_fn=None, name=None):
    with tf.name_scope(name):
        # 计算实例的均值和方差
        mean, variance = tf.nn.moments(inputs, axes=[1, 2], keepdims=True)
        # 归一化
        normalized = (inputs - mean) / tf.sqrt(variance + epsilon)
        # 可学习参数,用于缩放和平移归一化后的输出
        scale = tf.Variable(tf.ones(inputs.shape[-1]), name='scale')
        shift = tf.Variable(tf.zeros(inputs.shape[-1]), name='shift')
        # 应用缩放和平移
        normalized = scale * normalized + shift
        # 应用激活函数
        if activation_fn is not None:
            normalized = activation_fn(normalized)
        return normalized
def residual1d_block(
    filters = 1024, 
    kernel_size = 3, 
    strides = 1,
    name_prefix = 'residule_block_'):

    h1 = conv1d_layer(inputs = inputs, filters = filters, kernel_size = kernel_size, strides = strides, activation = None, name = name_prefix + 'h1_conv')
    h1_norm = instance_norm_layer(inputs = h1, activation_fn = None, name = name_prefix + 'h1_norm')
    h1_gates = conv1d_layer(inputs = inputs, filters = filters, kernel_size = kernel_size, strides = strides, activation = None, name = name_prefix + 'h1_gates')
    h1_norm_gates = instance_norm_layer(inputs = h1_gates, activation_fn = None, name = name_prefix + 'h1_norm_gates')
    h1_glu = gated_linear_layer(inputs = h1_norm, gates = h1_norm_gates, name = name_prefix + 'h1_glu')
    h2 = conv1d_layer(inputs = h1_glu, filters = filters // 2, kernel_size = kernel_size, strides = strides, activation = None, name = name_prefix + 'h2_conv')
    h2_norm = instance_norm_layer(inputs = h2, activation_fn = None, name = name_prefix + 'h2_norm')
    h3 = inputs + h2_norm 

    return h3
def upsample1d_block(
    shuffle_size = 2,
    name_prefix = 'upsample1d_block_'):
    h1 = conv1d_layer(inputs = inputs, filters = filters, kernel_size = kernel_size, strides = strides, activation = None, name = name_prefix + 'h1_conv')
    h1_shuffle = pixel_shuffler(inputs = h1, shuffle_size = shuffle_size, name = name_prefix + 'h1_shuffle')
    h1_norm = instance_norm_layer(inputs = h1_shuffle, activation_fn = None, name = name_prefix + 'h1_norm')

    h1_gates = conv1d_layer(inputs = inputs, filters = filters, kernel_size = kernel_size, strides = strides, activation = None, name = name_prefix + 'h1_gates')
    h1_shuffle_gates = pixel_shuffler(inputs = h1_gates, shuffle_size = shuffle_size, name = name_prefix + 'h1_shuffle_gates')
    h1_norm_gates = instance_norm_layer(inputs = h1_shuffle_gates, activation_fn = None, name = name_prefix + 'h1_norm_gates')

    h1_glu = gated_linear_layer(inputs = h1_norm, gates = h1_norm_gates, name = name_prefix + 'h1_glu')

    return h1_glu


def pixel_shuffler(inputs, shuffle_size = 2, name = None):

    n = tf.shape(inputs)[0]
    w = tf.shape(inputs)[1]
    c = inputs.get_shape().as_list()[2]

    oc = c // shuffle_size
    ow = w * shuffle_size

    outputs = tf.reshape(tensor = inputs, shape = [n, ow, oc], name = name)

    return outputs

Pixel Shuffle操作通常用于图像超分辨率任务中,用于将低分辨率图像上采样到高分辨率图像。
首先,让我们回顾一下Pixel Shuffle的基本原理。给定一个输入张量inputs,其形状为(batch_size, width, channels * (shuffle_size ^ 2)),Pixel Shuffle操作将其重新排列为(batch_size, width * shuffle_size, channels)。这里的shuffle_size是上采样因子,用于指定在宽度和高度上将图像放大的倍数。
在TensorFlow 1.x中,inputs.get_shape().as_list()用于获取张量的形状,但在TensorFlow 2.x中,这个用法已经被弃用。在TensorFlow 2.x中,可以直接使用inputs.shape来获取张量的形状,这是一个更直接和推荐的方式。
在Pixel Shuffle操作中,你需要将输入张量的最后一个维度(通道维度)重新排列,以便将每个shuffle_size x shuffle_size的通道块重新排列到输出的空间维度上。这意味着你需要在最后一个维度上进行重塑和转置操作。
以下是一个修正后的pixel_shuffler函数,适用于TensorFlow 2.x:

import tensorflow as tf

def pixel_shuffler(inputs, shuffle_size=2, name=None):
    with tf.name_scope(name):
        # 获取输入张量的批次大小、高度、宽度和通道数
        n, h, w, c = inputs.shape

        # 确保通道数是shuffle_size * shuffle_size的倍数
        assert c % (shuffle_size * shuffle_size) == 0, "The number of channels must be divisible by shuffle_size^2"

        # 计算输出的通道数和宽度
        oc = c // (shuffle_size * shuffle_size)
        ow = w * shuffle_size

        # 重塑输入张量,准备进行Pixel Shuffle操作
        inputs = tf.reshape(inputs, [n, h, w, shuffle_size, shuffle_size, oc])

        # 转置和重塑以完成Pixel Shuffle操作
        outputs = tf.reshape(tf.transpose(inputs, [0, 1, 3, 4, 2, 5]), [n, h, ow, oc])

    return outputs

在这个修正后的函数中,我们首先确保输入张量的通道数是shuffle_size * shuffle_size的倍数。然后,我们计算输出的通道数和宽度。接下来,我们使用tf.reshape和tf.transpose操作来重新排列输入张量的维度,以完成Pixel Shuffle操作。最后,我们返回重塑后的输出张量。

请注意,这个函数假设inputs是一个四维张量,其形状为(batch_size, height, width, channels)。如果你的输入张量有不同的形状,你可能需要调整代码以适应不同的形状。




def discriminator(inputs, reuse = False, scope_name = 'discriminator'):

    # inputs has shape [batch_size, num_features, time]
    # we need to add channel for 2D convolution [batch_size, num_features, time, 1]
    inputs = tf.expand_dims(inputs, -1)

    with tf.variable_scope(scope_name) as scope:
        # Discriminator would be reused in CycleGAN
        if reuse:
            assert scope.reuse is False

        h1 = conv2d_layer(inputs = inputs, filters = 128, kernel_size = [3, 3], strides = [1, 2], activation = None, name = 'h1_conv')
        h1_gates = conv2d_layer(inputs = inputs, filters = 128, kernel_size = [3, 3], strides = [1, 2], activation = None, name = 'h1_conv_gates')
        h1_glu = gated_linear_layer(inputs = h1, gates = h1_gates, name = 'h1_glu')

        # Downsample
        d1 = downsample2d_block(inputs = h1_glu, filters = 256, kernel_size = [3, 3], strides = [2, 2], name_prefix = 'downsample2d_block1_')
        d2 = downsample2d_block(inputs = d1, filters = 512, kernel_size = [3, 3], strides = [2, 2], name_prefix = 'downsample2d_block2_')
        d3 = downsample2d_block(inputs = d2, filters = 1024, kernel_size = [6, 3], strides = [1, 2], name_prefix = 'downsample2d_block3_')

        # Output
        o1 = tf.layers.dense(inputs = d3, units = 1, activation = tf.nn.sigmoid)

        return o1



