RFDiffusion中的ContigMap类介绍
功能概述
在RFDiffusion中,ContigMap
类的主要作用是帮助管理和处理蛋白质设计中模板和目标序列的残基映射关系。它为用户提供了一种方式来描述蛋白质片段的拓扑结构,并将输入数据(如残基编号、链编号等)映射到模型中使用的内部索引系统。
源代码:
class ContigMap:
"""
Class for doing mapping.
Inherited from Inpainting. To update at some point.
Supports multichain or multiple crops from a single receptor chain.
Also supports indexing jump (+200) or not, based on contig input.
Default chain outputs are inpainted chains as A (and B, C etc if multiple chains), and all fragments of receptor chain on the next one (generally B)
Output chains can be specified. Sequence must be the same number of elements as in contig string
"""
def __init__(
self,
parsed_pdb,
contigs=None,
inpaint_seq=None,
inpaint_str=None,
length=None,
ref_idx=None,
hal_idx=None,
idx_rf=None,
inpaint_seq_tensor=None,
inpaint_str_tensor=None,
topo=False,
provide_seq=None,
inpaint_str_strand=None,
inpaint_str_helix=None,
inpaint_str_loop=None
):
# sanity checks
if contigs is None and ref_idx is None:
sys.exit("Must either specify a contig string or precise mapping")
if idx_rf is not None or hal_idx is not None or ref_idx is not None:
if idx_rf is None or hal_idx is None or ref_idx is None:
sys.exit(
"If you're specifying specific contig mappings, the reference and output positions must be specified, AND the indexing for RoseTTAFold (idx_rf)"
)
self.chain_order = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
if length is not None:
if "-" not in length:
self.length = [int(length), int(length) + 1]
else:
self.length = [int(length.split("-")[0]), int(length.split("-")[1]) + 1]
else:
self.length = None
self.ref_idx = ref_idx
self.hal_idx = hal_idx
self.idx_rf = idx_rf
parse_inpaint = lambda x: "/".join(x).split("/") if x is not None else None
self.inpaint_seq = parse_inpaint(inpaint_seq)
self.inpaint_str = parse_inpaint(inpaint_str)
self.inpaint_str_helix=parse_inpaint(inpaint_str_helix)
self.inpaint_str_strand=parse_inpaint(inpaint_str_strand)
self.inpaint_str_loop=parse_inpaint(inpaint_str_loop)
self.inpaint_seq_tensor = inpaint_seq_tensor
self.inpaint_str_tensor = inpaint_str_tensor
self.parsed_pdb = parsed_pdb
self.topo = topo
if ref_idx is None:
# using default contig generation, which outputs in rosetta-like format
self.contigs = contigs
(
self.sampled_mask,
self.contig_length,
self.n_inpaint_chains,
) = self.get_sampled_mask()
self.receptor_chain = self.chain_order[self.n_inpaint_chains]
(
self.receptor,
self.receptor_hal,
self.receptor_rf,
self.inpaint,
self.inpaint_hal,
self.inpaint_rf,
) = self.expand_sampled_mask()
self.ref = self.inpaint + self.receptor
self.hal = self.inpaint_hal + self.receptor_hal
self.rf = self.inpaint_rf + self.receptor_rf
else:
# specifying precise mappings
self.ref = ref_idx
self.hal = hal_idx
self.rf = idx_rf
self.mask_1d = [False if i == ("_", "_") else True for i in self.ref]
# take care of sequence and structure masking
if self.inpaint_seq_tensor is None:
if self.inpaint_seq is not None:
self.inpaint_seq = self.get_inpaint_seq_str(self.inpaint_seq)
else:
self.inpaint_seq = np.array(
[True if i != ("_", "_") else False for i in self.ref]
)
else:
self.inpaint_seq = self.inpaint_seq_tensor
if self.inpaint_str_tensor is None:
if self.inpaint_str is not None:
self.inpaint_str = self.get_inpaint_seq_str(se