本帖最后由 kesai 于 2019-12-18 18:11 编辑
[Python] 纯文本查看 复制代码 # @Description : 词频统计
def getText():
txt = open('hamlet.txt', 'r').read()
txt = txt.lower()
for ch in '!"#$%&()*+,-./:;<=>?@[\\]^_‘{|}~':
txt = txt.replace(ch, " ") # 将文本中特殊字符替换为空格
return txt
def taleSecond(item):
return item[1]
txt = getText()
words = txt.split()
counts = dict()
for word in words:
counts[word] = counts.get(word, 0)+1
print(counts)
items = list(counts.items())
#items.sort(key=taleSecond,reverse=True)
items.sort(key=lambda x: x[1], reverse=True)
for i in range(0, 10):
word, count = items[i] # 数组赋值,我擦
print('{0:<10}{1:>5}'.format(word,count)) |