I am trying to iterate through directories and subdirectories to find duplicate files but issue encountered here is script is giving some errors:
Traceback (most recent call last):
File "./fileDupchknew.py", line 29, in <module>
dup_fileremove(dirname)
File "./fileDupchknew.py", line 26, in dup_fileremove
os.remove(filepath)
OSError: [Errno 21] Is a directory: '/tmp/rishabh-test/new-test'
Script:
#!/usr/bin/python
import os
import hashlib
import sys
dirname = sys.argv[1] os.chdir(dirname)
def dup_fileremove(dir):
duplicate = set()
os.chdir(dir)
path=os.getcwd()
print ("The dir is: ", path)
for filename in os.listdir(dir):
filehash = None
filepath=os.path.join(dir, filename)
print("Current file path is: ", filepath)
if os.path.isdir(filepath):
dup_fileremove(filepath)
elif os.path.isfile(filepath):
filehash =hashlib.md5(file(filepath).read()).hexdigest()
if filehash not in duplicate:
duplicate.add(filehash)
else:
os.remove(filepath)
print("removed : ", filepath)
dup_fileremove(dirname)
def dup_fileremove(dir):commented out in your original code, or is that a transcription error from writing this post?diris not a good variable name since it's the name of a built-in function.