Spaces:

AIMClab-RUC
/

UNet_DCP_1024

Sleeping

qijie.wei

first commit

c5f4ee2 3 months ago

22.8 kB

	# ICCV2021, Joint Topology-preserving and Feature-refinement Network for Curvilinear Structure Segmentation
	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	from .UNet_p import MultiHeadAttention2D_Dual2_2, rand, window_partition, window_unpartition, prompt_partition, OneLayerRes




	class SpatialAttention(nn.Module):
	def __init__(self):
	super(SpatialAttention, self).__init__()
	self.conv = nn.Sequential(
	nn.Conv2d(2, 1, kernel_size=(3, 3), padding=(1, 1)),
	nn.Conv2d(1, 1, kernel_size=(5, 5), padding=(2, 2)),
	nn.Sigmoid()
	)

	def forward(self, x):
	avg_out = torch.mean(x, dim=1, keepdim=True)
	max_out, _ = torch.max(x, dim=1, keepdim=True)
	x = torch.cat([avg_out, max_out], dim=1)
	x = self.conv(x)
	return x


	class ChannelAttention(nn.Module):
	def __init__(self, channel, reduction=2):
	super(ChannelAttention, self).__init__()
	self.avg_pool = nn.AdaptiveAvgPool2d(1)
	self.max_pool = nn.AdaptiveMaxPool2d(1)

	self.fc1 = nn.Conv2d(channel, channel // reduction, 1, bias=False)
	self.fc2 = nn.Conv2d(channel // reduction, channel, 1, bias=False)
	self.activate = nn.Sigmoid()

	def forward(self, x):
	avg_out = self.fc2(self.fc1(self.avg_pool(x)))
	max_out = self.fc2(self.fc1(self.max_pool(x)))
	out = avg_out + max_out
	out = self.activate(out)
	return out


	class GAU(nn.Module):
	def __init__(self, in_channels, use_gau=True, reduce_dim=False, out_channels=None):
	super(GAU, self).__init__()
	self.use_gau = use_gau
	self.reduce_dim = reduce_dim

	if self.reduce_dim:
	self.down_conv = nn.Sequential(
	nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1),
	nn.BatchNorm2d(out_channels),
	nn.ReLU(inplace=True)
	)
	in_channels = out_channels

	if self.use_gau:

	self.sa = SpatialAttention()
	self.ca = ChannelAttention(in_channels)

	self.reset_gate = nn.Sequential(
	nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=2, dilation=2),
	nn.BatchNorm2d(out_channels),
	nn.ReLU(inplace=True),
	)

	def forward(self, x, y):
	if self.reduce_dim:
	x = self.down_conv(x)

	if self.use_gau:
	y = F.interpolate(y, x.shape[-2:], mode='bilinear', align_corners=True)

	comx = x * y
	resx = x * (1 - y) # bs, c, h, w

	x_sa = self.sa(resx) # bs, 1, h, w
	x_ca = self.ca(resx) # bs, c, 1, 1

	O = self.reset_gate(comx)
	M = x_sa * x_ca

	RF = M * x + (1 - M) * O
	else:
	RF = x
	return RF


	class FIM(nn.Module):

	def __init__(self, in_channels, out_channels, f_channels, use_topo=True, up=True, bottom=False):
	super(FIM, self).__init__()
	self.use_topo = use_topo
	self.up = up
	self.bottom = bottom

	if self.up:
	self.up_s = nn.Sequential(
	nn.ConvTranspose2d(in_channels, out_channels, kernel_size=2, stride=2),
	nn.BatchNorm2d(out_channels),
	nn.ReLU(inplace=True)
	)
	self.up_t = nn.Sequential(
	nn.ConvTranspose2d(in_channels, out_channels, kernel_size=2, stride=2),
	nn.BatchNorm2d(out_channels),
	nn.ReLU(inplace=True)
	)
	else:
	self.up_s = nn.Sequential(
	nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1),
	nn.BatchNorm2d(out_channels),
	nn.ReLU(inplace=True)
	)
	self.up_t = nn.Sequential(
	nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1),
	nn.BatchNorm2d(out_channels),
	nn.ReLU(inplace=True)
	)

	self.decoder_s = nn.Sequential(
	nn.Conv2d(out_channels + f_channels, out_channels, kernel_size=3, stride=1, padding=1),
	nn.BatchNorm2d(out_channels),
	nn.ReLU(inplace=True),
	nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1),
	nn.BatchNorm2d(out_channels),
	nn.ReLU(inplace=True)
	)

	'''self.inner_s = nn.Sequential(
	nn.Conv2d(out_channels, 1, kernel_size=3, padding=1, bias=False),
	nn.Sigmoid()
	)'''

	if self.bottom:
	self.st = nn.Sequential(
	nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1),
	nn.BatchNorm2d(in_channels),
	nn.ReLU(inplace=True)
	)

	if self.use_topo:
	self.decoder_t = nn.Sequential(
	nn.Conv2d(out_channels + out_channels, out_channels, kernel_size=3, stride=1, padding=1),
	nn.BatchNorm2d(out_channels),
	nn.ReLU(inplace=True)
	)

	self.s_to_t = nn.Sequential(
	nn.Conv2d(out_channels, out_channels, kernel_size=1, stride=1),
	nn.BatchNorm2d(out_channels),
	nn.ReLU(inplace=True)
	)

	self.t_to_s = nn.Sequential(
	nn.Conv2d(out_channels, out_channels, kernel_size=1, stride=1),
	nn.BatchNorm2d(out_channels),
	nn.ReLU(inplace=True)
	)

	self.res_s = nn.Sequential(
	nn.Conv2d(out_channels * 2, out_channels, kernel_size=3, stride=1, padding=1),
	nn.BatchNorm2d(out_channels),
	nn.ReLU(inplace=True)
	)

	'''self.inner_t = nn.Sequential(
	nn.Conv2d(out_channels, 1, kernel_size=3, padding=1, bias=False),
	nn.Sigmoid()
	)'''

	def forward(self, x_s, x_t, rf):
	if self.use_topo:
	if self.bottom:
	x_t = self.st(x_t)
	#bs, c, h, w = x_s.shape
	x_s = self.up_s(x_s)
	x_t = self.up_t(x_t)

	# padding
	diffY = rf.size()[2] - x_s.size()[2]
	diffX = rf.size()[3] - x_s.size()[3]

	x_s = F.pad(x_s, [diffX // 2, diffX - diffX // 2,
	diffY // 2, diffY - diffY // 2])
	x_t = F.pad(x_t, [diffX // 2, diffX - diffX // 2,
	diffY // 2, diffY - diffY // 2])

	rf_s = torch.cat((x_s, rf), dim=1)
	s = self.decoder_s(rf_s)
	s_t = self.s_to_t(s)

	t = torch.cat((x_t, s_t), dim=1)
	x_t = self.decoder_t(t)
	t_s = self.t_to_s(x_t)

	s_res = self.res_s(torch.cat((s, t_s), dim=1))

	x_s = s + s_res
	# t_cls = self.inner_t(x_t)
	# s_cls = self.inner_s(x_s)
	else:
	x_s = self.up_s(x_s)
	#x_b = self.up_b(x_b)
	# padding
	diffY = rf.size()[2] - x_s.size()[2]
	diffX = rf.size()[3] - x_s.size()[3]

	x_s = F.pad(x_s, [diffX // 2, diffX - diffX // 2,
	diffY // 2, diffY - diffY // 2])

	rf_s = torch.cat((x_s, rf), dim=1)
	s = self.decoder_s(rf_s)
	x_s = s
	x_t = x_s
	#t_cls = None
	#s_cls = self.inner_s(x_s)
	return x_s, x_t


	class JTFNDecoder(nn.Module):
	def __init__(self, channels, use_topo) -> None:
	super().__init__()
	self.skip_blocks = []
	for i in range(5):
	self.skip_blocks.append(GAU(channels[i], use_gau=True, reduce_dim=False, out_channels=channels[i]))
	self.fims = []
	index = 3
	for i in range(4):
	if i == index:
	self.fims.append(FIM(channels[i+1], channels[i], channels[i], use_topo=use_topo, up=True, bottom=True))
	else:
	self.fims.append(FIM(channels[i+1], channels[i], channels[i], use_topo=use_topo, up=True, bottom=False))
	self.skip_blocks = nn.ModuleList(self.skip_blocks)
	self.fims = nn.ModuleList(self.fims)

	def forward(self, x1, x2, x3, x4, x5, y):
	x1 = self.skip_blocks[0](x1, y)
	x2 = self.skip_blocks[1](x2, y)
	x3 = self.skip_blocks[2](x3, y)
	x4 = self.skip_blocks[3](x4, y)
	x5 = self.skip_blocks[4](x5, y)

	x5_seg, x5_bou = x5, x5

	x4_seg, x4_bou = self.fims[3](x5_seg, x5_bou, x4)
	x3_seg, x3_bou = self.fims[2](x4_seg, x4_bou, x3)
	x2_seg, x2_bou = self.fims[1](x3_seg, x3_bou, x2)
	x1_seg, x1_bou = self.fims[0](x2_seg, x2_bou, x1)


	return [x1_seg, x2_seg, x3_seg, x4_seg], [x1_bou, x2_bou, x3_bou, x4_bou]


	class JTFN(nn.Module):
	def __init__(self, encoder, decoder, channels, num_classes, steps) -> None:
	super().__init__()
	self.encoder = encoder
	self.decoder = decoder
	self.num_classes = num_classes
	self.steps = steps

	self.conv_seg1_head = nn.Conv2d(channels[0], num_classes, kernel_size=3, stride=1, padding=1, bias=False)
	self.conv_seg2_head = nn.Conv2d(channels[1], num_classes, kernel_size=3, stride=1, padding=1, bias=False)
	self.conv_seg3_head = nn.Conv2d(channels[2], num_classes, kernel_size=3, stride=1, padding=1, bias=False)
	self.conv_seg4_head = nn.Conv2d(channels[3], num_classes, kernel_size=3, stride=1, padding=1, bias=False)

	self.conv_bou1_head = nn.Conv2d(channels[0], num_classes, kernel_size=3, stride=1, padding=1, bias=False)
	self.conv_bou2_head = nn.Conv2d(channels[1], num_classes, kernel_size=3, stride=1, padding=1, bias=False)
	self.conv_bou3_head = nn.Conv2d(channels[2], num_classes, kernel_size=3, stride=1, padding=1, bias=False)
	self.conv_bou4_head = nn.Conv2d(channels[3], num_classes, kernel_size=3, stride=1, padding=1, bias=False)

	self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)

	def forward(self, x):
	B, C, H, W = x.shape
	y = torch.zeros([B, self.num_classes, H, W], device=x.device)

	x1, x2, x3, x4, x5 = self.encoder(x)

	outputs = {}
	for i in range(self.steps):
	segs, bous = self.decoder(x1, x2, x3, x4, x5, y)
	x1_seg, x2_seg, x3_seg, x4_seg = segs
	x1_bou, x2_bou, x3_bou, x4_bou = bous

	x1_seg = self.conv_seg1_head(x1_seg)
	x2_seg = self.conv_seg2_head(x2_seg)
	x3_seg = self.conv_seg3_head(x3_seg)
	x4_seg = self.conv_seg4_head(x4_seg)

	x1_bou = self.conv_bou1_head(x1_bou)
	x2_bou = self.conv_bou2_head(x2_bou)
	x3_bou = self.conv_bou3_head(x3_bou)
	x4_bou = self.conv_bou4_head(x4_bou)

	y = x1_seg
	outputs['step_{}_seg'.format(i)] = [x1_seg, x2_seg, x3_seg, x4_seg]
	outputs['step_{}_bou'.format(i)] = [x1_bou, x2_bou, x3_bou, x4_bou]
	y = self.upsample(y)
	outputs['output'] = y
	return outputs

	def encoder_forward(self, x, dataset_idx):
	# efficient net
	x = self.encoder.conv_stem(x)
	x = self.encoder.bn1(x)
	features = []
	if 0 in self.encoder._stage_out_idx:
	features.append(x) # add stem out
	for i in range(len(self.encoder.blocks)):
	for j, l in enumerate(self.encoder.blocks[i]):
	if j == len(self.encoder.blocks[i]) - 1 and i + 1 in self.encoder._stage_out_idx:
	x = l(x, dataset_idx)
	else:
	x = l(x)
	if i + 1 in self.encoder._stage_out_idx:
	features.append(x)
	return features



	class JTFN_DCP(JTFN):
	def __init__(self, encoder, decoder, channels, num_classes, steps, dataset_idx,
	local_window_sizes, encoder_channels, pos_promot_channels, cha_promot_channels,
	embed_ratio, strides, att_fusion, use_conv) -> None:
	super().__init__(encoder, decoder, channels, num_classes, steps)
	self.dataset_idx = dataset_idx
	self.local_window_sizes = local_window_sizes

	self.pos_promot_channels = pos_promot_channels
	pos_p1 = rand((1, encoder_channels[0], pos_promot_channels[0], local_window_sizes[0]), val=3. / encoder_channels[0] ** 0.5)
	pos_p2 = rand((1, encoder_channels[1], pos_promot_channels[1], local_window_sizes[1]), val=3. / encoder_channels[1] ** 0.5)
	pos_p3 = rand((1, encoder_channels[2], pos_promot_channels[2], local_window_sizes[2]), val=3. / encoder_channels[2] ** 0.5)
	pos_p4 = rand((1, encoder_channels[3], pos_promot_channels[3], local_window_sizes[3]), val=3. / encoder_channels[3] ** 0.5)
	pos_p5 = rand((1, encoder_channels[4], pos_promot_channels[4], local_window_sizes[4]), val=3. / encoder_channels[4] ** 0.5)
	self.pos_promot1 = nn.ParameterDict({str(k): nn.Parameter(pos_p1.detach().clone(), requires_grad=True) for k in dataset_idx})
	self.pos_promot2 = nn.ParameterDict({str(k): nn.Parameter(pos_p2.detach().clone(), requires_grad=True) for k in dataset_idx})
	self.pos_promot3 = nn.ParameterDict({str(k): nn.Parameter(pos_p3.detach().clone(), requires_grad=True) for k in dataset_idx})
	self.pos_promot4 = nn.ParameterDict({str(k): nn.Parameter(pos_p4.detach().clone(), requires_grad=True) for k in dataset_idx})
	self.pos_promot5 = nn.ParameterDict({str(k): nn.Parameter(pos_p5.detach().clone(), requires_grad=True) for k in dataset_idx})

	self.cha_promot_channels = cha_promot_channels
	cha_p1 = rand((1, cha_promot_channels[0], local_window_sizes[0], local_window_sizes[0]), val=3. / local_window_sizes[0])
	cha_p2 = rand((1, cha_promot_channels[1], local_window_sizes[1], local_window_sizes[1]), val=3. / local_window_sizes[1])
	cha_p3 = rand((1, cha_promot_channels[2], local_window_sizes[2], local_window_sizes[2]), val=3. / local_window_sizes[2])
	cha_p4 = rand((1, cha_promot_channels[3], local_window_sizes[3], local_window_sizes[3]), val=3. / local_window_sizes[3])
	cha_p5 = rand((1, cha_promot_channels[4], local_window_sizes[4], local_window_sizes[4]), val=3. / local_window_sizes[4])
	self.cha_promot1 = nn.ParameterDict({str(k): nn.Parameter(cha_p1.detach().clone(), requires_grad=True) for k in dataset_idx})
	self.cha_promot2 = nn.ParameterDict({str(k): nn.Parameter(cha_p2.detach().clone(), requires_grad=True) for k in dataset_idx})
	self.cha_promot3 = nn.ParameterDict({str(k): nn.Parameter(cha_p3.detach().clone(), requires_grad=True) for k in dataset_idx})
	self.cha_promot4 = nn.ParameterDict({str(k): nn.Parameter(cha_p4.detach().clone(), requires_grad=True) for k in dataset_idx})
	self.cha_promot5 = nn.ParameterDict({str(k): nn.Parameter(cha_p5.detach().clone(), requires_grad=True) for k in dataset_idx})

	self.strides = strides

	self.att1 = MultiHeadAttention2D_Dual2_2(dim_pos=encoder_channels[0], dim_cha=encoder_channels[0] + cha_promot_channels[0], embed_dim=encoder_channels[0], att_fusion=att_fusion, num_heads=8, embed_dim_ratio=embed_ratio, stride=strides[0], pos_slide=0, cha_slide=0, use_conv=use_conv)
	self.att2 = MultiHeadAttention2D_Dual2_2(dim_pos=encoder_channels[1], dim_cha=encoder_channels[1] + cha_promot_channels[1], embed_dim=encoder_channels[1], att_fusion=att_fusion, num_heads=8, embed_dim_ratio=embed_ratio, stride=strides[1], pos_slide=0, cha_slide=0, use_conv=use_conv)
	self.att3 = MultiHeadAttention2D_Dual2_2(dim_pos=encoder_channels[2], dim_cha=encoder_channels[2] + cha_promot_channels[2], embed_dim=encoder_channels[2], att_fusion=att_fusion, num_heads=8, embed_dim_ratio=embed_ratio, stride=strides[2], pos_slide=0, cha_slide=0, use_conv=use_conv)
	self.att4 = MultiHeadAttention2D_Dual2_2(dim_pos=encoder_channels[3], dim_cha=encoder_channels[3] + cha_promot_channels[3], embed_dim=encoder_channels[3], att_fusion=att_fusion, num_heads=8, embed_dim_ratio=embed_ratio, stride=strides[3], pos_slide=0, cha_slide=0, use_conv=use_conv)
	self.att5 = MultiHeadAttention2D_Dual2_2(dim_pos=encoder_channels[4], dim_cha=encoder_channels[4] + cha_promot_channels[4], embed_dim=encoder_channels[4], att_fusion=att_fusion, num_heads=8, embed_dim_ratio=embed_ratio, stride=strides[4], pos_slide=0, cha_slide=0, use_conv=use_conv)

	def get_cha_prompts(self, dataset_idx, batch_size):
	if len(dataset_idx) != batch_size:
	raise Exception(dataset_idx, self.dataset_idx, batch_size)
	# print(dataset_idx, '***')
	promots1 = torch.concatenate([self.cha_promot1[str(i)] for i in dataset_idx], dim=0)
	promots2 = torch.concatenate([self.cha_promot2[str(i)] for i in dataset_idx], dim=0)
	promots3 = torch.concatenate([self.cha_promot3[str(i)] for i in dataset_idx], dim=0)
	promots4 = torch.concatenate([self.cha_promot4[str(i)] for i in dataset_idx], dim=0)
	promots5 = torch.concatenate([self.cha_promot5[str(i)] for i in dataset_idx], dim=0)
	return promots1, promots2, promots3, promots4, promots5

	def get_pos_prompts(self, dataset_idx, batch_size):
	if len(dataset_idx) != batch_size:
	raise Exception(dataset_idx, self.dataset_idx)
	# print(dataset_idx, '***')
	promots1 = torch.concatenate([self.pos_promot1[str(i)] for i in dataset_idx], dim=0)
	promots2 = torch.concatenate([self.pos_promot2[str(i)] for i in dataset_idx], dim=0)
	promots3 = torch.concatenate([self.pos_promot3[str(i)] for i in dataset_idx], dim=0)
	promots4 = torch.concatenate([self.pos_promot4[str(i)] for i in dataset_idx], dim=0)
	promots5 = torch.concatenate([self.pos_promot5[str(i)] for i in dataset_idx], dim=0)
	return promots1, promots2, promots3, promots4, promots5

	def forward(self, x, dataset_idx, return_features=False):
	if isinstance(dataset_idx, torch.Tensor):
	dataset_idx = list(dataset_idx.cpu().numpy())
	#print(dataset_idx)
	cha_promots1, cha_promots2, cha_promots3, cha_promots4, cha_promots5 = self.get_cha_prompts(dataset_idx=dataset_idx, batch_size=x.size(0))
	pos_promots1, pos_promots2, pos_promots3, pos_promots4, pos_promots5 = self.get_pos_prompts(dataset_idx=dataset_idx, batch_size=x.size(0))

	B, C, H, W = x.shape
	y = torch.zeros([B, self.num_classes, H, W], device=x.device)

	x1, x2, x3, x4, x5 = self.encoder(x)

	if return_features:
	pre_x1, pre_x2, pre_x3, pre_x4, pre_x5 = x1.detach().clone(), x2.detach().clone(), x3.detach().clone(), x4.detach().clone(), x5.detach().clone()
	h1, w1 = x1.size()[2:]
	h2, w2 = x2.size()[2:]
	h3, w3 = x3.size()[2:]
	h4, w4 = x4.size()[2:]
	h5, w5 = x5.size()[2:]
	x1, (Hp1, Wp1), (h_win1, w_win1) = window_partition(x1, self.local_window_sizes[0])
	x2, (Hp2, Wp2), (h_win2, w_win2) = window_partition(x2, self.local_window_sizes[1])
	x3, (Hp3, Wp3), (h_win3, w_win3) = window_partition(x3, self.local_window_sizes[2])
	x4, (Hp4, Wp4), (h_win4, w_win4) = window_partition(x4, self.local_window_sizes[3])
	x5, (Hp5, Wp5), (h_win5, w_win5) = window_partition(x5, self.local_window_sizes[4])

	cha_promots1 = prompt_partition(cha_promots1, h_win1, w_win1)
	cha_promots2 = prompt_partition(cha_promots2, h_win2, w_win2)
	cha_promots3 = prompt_partition(cha_promots3, h_win3, w_win3)
	cha_promots4 = prompt_partition(cha_promots4, h_win4, w_win4)
	cha_promots5 = prompt_partition(cha_promots5, h_win5, w_win5)

	pos_promots1 = prompt_partition(pos_promots1, h_win1, w_win1)
	pos_promots2 = prompt_partition(pos_promots2, h_win2, w_win2)
	pos_promots3 = prompt_partition(pos_promots3, h_win3, w_win3)
	pos_promots4 = prompt_partition(pos_promots4, h_win4, w_win4)
	pos_promots5 = prompt_partition(pos_promots5, h_win5, w_win5)

	#print(x1.size(), x2.size(), x3.size(), x4.size(), x5.size())
	cha_x1, cha_x2, cha_x3, cha_x4, cha_x5 = torch.cat([x1, cha_promots1], dim=1), torch.cat([x2, cha_promots2], dim=1), torch.cat([x3, cha_promots3], dim=1), torch.cat([x4, cha_promots4], dim=1), torch.cat([x5, cha_promots5], dim=1)
	pos_x1, pos_x2, pos_x3, pos_x4, pos_x5 = torch.cat([pos_promots1, x1], dim=2), torch.cat([pos_promots2, x2], dim=2), torch.cat([pos_promots3, x3], dim=2), torch.cat([pos_promots4, x4], dim=2), torch.cat([pos_promots5, x5], dim=2)

	#print(x1.size(), x2.size(), x3.size(), x4.size(), x5.size())
	x1, x2, x3, x4, x5 = self.att1(pos_x1, cha_x1), self.att2(pos_x2, cha_x2), self.att3(pos_x3, cha_x3), self.att4(pos_x4, cha_x4), self.att5(pos_x5, cha_x5)

	x1 = window_unpartition(x1, self.local_window_sizes[0], (Hp1, Wp1), (h1, w1))
	x2 = window_unpartition(x2, self.local_window_sizes[1], (Hp2, Wp2), (h2, w2))
	x3 = window_unpartition(x3, self.local_window_sizes[2], (Hp3, Wp3), (h3, w3))
	x4 = window_unpartition(x4, self.local_window_sizes[3], (Hp4, Wp4), (h4, w4))
	x5 = window_unpartition(x5, self.local_window_sizes[4], (Hp5, Wp5), (h5, w5))

	if return_features:
	pro_x1, pro_x2, pro_x3, pro_x4, pro_x5 = x1.detach().clone(), x2.detach().clone(), x3.detach().clone(), x4.detach().clone(), x5.detach().clone()

	return (pre_x1, pre_x2, pre_x3, pre_x4, pre_x5), (pro_x1, pro_x2, pro_x3, pro_x4, pro_x5)

	outputs = {}
	for i in range(self.steps):
	segs, bous = self.decoder(x1, x2, x3, x4, x5, y)
	x1_seg, x2_seg, x3_seg, x4_seg = segs
	x1_bou, x2_bou, x3_bou, x4_bou = bous

	x1_seg = self.conv_seg1_head(x1_seg)
	x2_seg = self.conv_seg2_head(x2_seg)
	x3_seg = self.conv_seg3_head(x3_seg)
	x4_seg = self.conv_seg4_head(x4_seg)

	x1_bou = self.conv_bou1_head(x1_bou)
	x2_bou = self.conv_bou2_head(x2_bou)
	x3_bou = self.conv_bou3_head(x3_bou)
	x4_bou = self.conv_bou4_head(x4_bou)

	y = x1_seg
	outputs['step_{}_seg'.format(i)] = [x1_seg, x2_seg, x3_seg, x4_seg]
	outputs['step_{}_bou'.format(i)] = [x1_bou, x2_bou, x3_bou, x4_bou]
	y = self.upsample(y)
	outputs['output'] = y
	return outputs