统计PDB中各个链的氨基酸数量和辅助因子类型

yy951010 发表于 2024-9-25 08:36

from Bio.PDB import PDBParser
from collections import defaultdict
# PDB 文件路径
pdb_file = "C:/Users/Administrator/Desktop/8bcw.pdb"
output_file = "C:/Users/Administrator/Desktop/results.txt"
# 初始化 PDB 解析器
parser = PDBParser(QUIET=True)
structure = parser.get_structure("PDB_structure", pdb_file)
# 定义氨基酸残基的分类方法
amino_acids = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET',
            'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL']
# 打开文件以写入模式
with open(output_file, 'w') as f:
# 遍历所有链，统计每个链的氨基酸和辅助因子的数量
for model in structure:
   for chain in model:
         amino_acid_count = 0
         ligand_count = 0
         ligand_types = defaultdict(int)# 使用默认字典记录每种辅助因子的数量
         # 遍历链中的每个残基
         for residue in chain:
            res_name = residue.get_resname().strip()
            # 判断是氨基酸还是辅助因子
            if res_name in amino_acids:
               amino_acid_count += 1
            else:
               ligand_types += 1
               ligand_count += 1

         # 写入每条链的统计信息
         f.write(f"链 {chain.id}: 氨基酸数量 = {amino_acid_count}, 辅助因子数量 = {ligand_count}\n")

         # 写入每种辅助因子的数量
         if ligand_types:
            f.write(f"链 {chain.id} 的辅助因子分布：\n")
            for ligand, count in ligand_types.items():
               f.write(f"{ligand}: {count}\n")
         else:
            f.write(f"链 {chain.id} 没有检测到辅助因子\n")

# 输出所有检测到的辅助因子的总种类
ligand_summary = set(ligand_types.keys())
if ligand_summary:
   f.write(f"检测到的辅助因子种类: {', '.join(ligand_summary)}\n")
else:
   f.write("未检测到任何辅助因子\n")
print(f"结果已写入 {output_file}")

batch2000 发表于 2024-9-25 08:46

属于“既……又……”的学霸了这是{:1_921:}

angelhunte 发表于 2024-9-25 13:35

可以用于什么方面的研究？

初音MIKU公主 发表于 2024-9-25 16:23

第一次看到来计算生物的，虽然看不懂{:1_925:}，楼主应该要贴个实例吧

yy951010 发表于 2024-9-26 08:34

angelhunte 发表于 2024-9-25 13:35
可以用于什么方面的研究？

做结构生物学的，有时候嫌麻烦，这样方便

yy951010 发表于 2024-9-26 08:35

初音MIKU公主发表于 2024-9-25 16:23
第一次看到来计算生物的，虽然看不懂，楼主应该要贴个实例吧

设置一个输入和输出路径就出来结果了，大概率不做这方面的一般用不到

页: [1]

吾爱破解 - 52pojie.cn's Archiver

统计PDB中各个链的氨基酸数量和辅助因子类型