フォルダの中のファイルに日本語を抽出する
-
# This is a sample Python script. import os import re # 対象パス<---input please dir_path: str = "/Users/aoyamanozomu/PycharmProjects/test" def findAll(dir_path: object) -> object: for root, dirs, files in os.walk(dir_path): print('directory:', root) for subdir in dirs: print('sub directory:', os.path.join(root, subdir)) for file in files: print('file :', os.path.join(root, file)) print_japanese(file) def print_japanese(filename): print(f'File: {filename}') with open(dir_path + '/' + filename, 'r') as f: content = f.read() # find japanese pattern = re.compile(r'[\u3000-\u303F\u3040-\u309F\u30A0-\u30FF\u4E00-\u9FFF]+') matches = re.findall(pattern, content) for match in matches: print(match) # Press the green button in the gutter to run the script. if __name__ == '__main__': findAll(dir_path)
-
@yuhope ありがとう