去除包含中文字符的代码行
本帖最后由 Mis16800 于 2024-5-11 09:23 编辑去除包含中文字符的代码行
```
```
import os
import re
def remove_chinese_code_and_comments_in_folder(folder_path):
for root, dirs, files in os.walk(folder_path):
for file_name in files:
if file_name.endswith((".java", ".js", ".py")):
file_path = os.path.join(root, file_name)
with open(file_path, "r", encoding="utf-8") as file:
code = file.read()
cleaned_code = remove_chinese_code_and_comments(code)
file.seek(0)
file.truncate()
file.write(cleaned_code)
def remove_chinese_code_and_comments(code):
lines = code.split('\n')
result = []
in_comment_block = False
for line in lines:
# 去除包含中文字符的注释行
if "//" in line or "#" in line or "/*" in line:
if "//" in line:
line_parts = line.split('//', 1)
elif "#" in line:
line_parts = line.split('#', 1)
else:
line_parts = re.split('/\*', line, 1)
comment = line_parts[-1].strip()
if re.search('[\u4e00-\u9fff]', comment):
line = line_parts
# 去除包含中文字符的代码行
if re.search('[\u4e00-\u9fff]', line):
line = ""
if '"""' in line or "'''" in line or "*/" in line:
in_comment_block = not in_comment_block
result.append(line)
return '\n'.join(result)
# 指定要处理的文件夹路径
folder_path = "文件夹路径文件夹路径"
remove_chinese_code_and_comments_in_folder(folder_path)
```
``` 感谢分享! 感谢,有用! 如果是上传代码到git就可以用来删除中文再上传 或者可以加入中文字符转拼音或拼音首字母 核心就是用[\u4e00-\u9fff]正则匹配中文字符的unicode编码 可以用 r+ 实现读取并写入文件,没必要打开两次 三滑稽甲苯 发表于 2024-5-10 15:01
可以用 r+ 实现读取并写入文件,没必要打开两次
感谢你的建议,已经修改了 本帖最后由 Mis16800 于 2024-5-11 09:30 编辑
```
之前的代码
def remove_chinese_code_and_comments_in_folder(folder_path):
for root, dirs, files in os.walk(folder_path):
for file_name in files:
if file_name.endswith((".java", ".js", ".py")):
file_path = os.path.join(root, file_name)
with open(file_path, "r", encoding="utf-8") as file:
code = file.read()
cleaned_code = remove_chinese_code_and_comments(code)
with open(file_path, "w", encoding="utf-8") as file:
file.write(cleaned_code)
修改后的代码
def remove_chinese_code_and_comments_in_folder(folder_path):
for root, dirs, files in os.walk(folder_path):
for file_name in files:
if file_name.endswith((".java", ".js", ".py")):
file_path = os.path.join(root, file_name)
with open(file_path, "r", encoding="utf-8") as file:
code = file.read()
cleaned_code = remove_chinese_code_and_comments(code)
file.seek(0) # 将文件指针移动到文件的开头
file.truncate() #则会清空文件内容(从文件指针位置开始)
file.write(cleaned_code)
```
页:
[1]