本帖最后由 苏紫方璇 于 2024-9-10 00:44 编辑
from Bio import SeqIO
import pandas as pd
# 读取FASTA文件并统计核苷酸数量
def count_nucleotides(fasta_file):
gene_counts = []
for record in SeqIO.parse(fasta_file, "fasta"):
gene_name = record.id
nucleotide_count = len(record.seq)
gene_counts.append({"Gene Name": gene_name, "Nucleotide Count": nucleotide_count})
return gene_counts
# 将统计结果输出到Excel表格
def export_to_excel(data, output_file):
df = pd.DataFrame(data)
df.to_excel(output_file, index=False)
# 主函数
if __name__ == "__main__":
fasta_file = "path/to/your/genes.fasta" # 替换为你的fasta文件路径
output_file = "gene_nucleotide_counts.xlsx"
# 统计核苷酸数量
gene_counts = count_nucleotides(fasta_file)
# 导出到Excel
export_to_excel(gene_counts, output_file)
print(f"统计结果已输出到 {output_file}")
|