def parse_fasta_files(self):
"""
解析所有FASTA文件,记录基因在哪些品种中出现
"""
for index, fasta_file in enumerate(self.fasta_files):
species_name = f"species_{index + 1}" # 假设每个文件代表一个品种
with open(fasta_file, "r") as file:
for record in SeqIO.parse(file, "fasta"):
gene_id = record.id
self.gene_presence[gene_id].add(species_name)
# 保存核心基因
with open(core_file, "w") as f:
f.write("\n".join(core_genes))
print(f"核心基因已保存到 {core_file}")
# 保存非核心基因
with open(non_core_file, "w") as f:
f.write("\n".join(non_core_genes))
print(f"非核心基因已保存到 {non_core_file}")
# 保存特有基因
with open(unique_file, "w") as f:
for species, genes in unique_genes.items():
f.write(f">{species}\n")
f.write("\n".join(genes) + "\n")
print(f"特有基因已保存到 {unique_file}")
2.__init__.py
from .gene_family_analysis import GeneFamilyAnalysis
__all__ = ["GeneFamilyAnalysis"]
3.以下是如何使用这个包来筛选基因家族中的核心基因、非核心基因和特有基因
from mygenefamily import GeneFamilyAnalysis