dennisro 5 éve
commit
3398fccfc4
1 módosított fájl, 100 hozzáadás és 0 törlés
  1. 100 0
      wtd.py

+ 100 - 0
wtd.py

@@ -0,0 +1,100 @@
+import os, sys
+import hashlib
+import argparse
+import humanfriendly
+
+parser = argparse.ArgumentParser(description='Find dupes') 
+
+optional = parser._action_groups.pop() # Edited this line
+required = parser.add_argument_group('required arguments')
+
+optional.add_argument('--size', type=str,
+                    help='Only output files greater than \'size\'. 16, 16K, 16M, 16G, 16T')
+
+required.add_argument('--dir', type=str, nargs='?', required=True, action='append',
+                    help='Directory to scan. Can be used multiple times.')
+
+parser._action_groups.append(optional)
+args = parser.parse_args()
+
+if args.size:
+    bytes = humanfriendly.parse_size(args.size)
+else:
+    bytes = 0
+
+
+def findDup(parentFolder):
+    # Dups in format {hash:[names]}
+    dups = {}
+    for dirName, subdirs, fileList in os.walk(parentFolder):
+        print('Scanning %s...' % dirName)
+        for filename in fileList:
+            # Get the path to the file
+            path = os.path.join(dirName, filename)
+            # Calculate hash
+            if os.path.exists(path):
+                file_size = os.path.getsize(path)
+                if file_size > bytes:
+                    file_hash = hashfile(path)
+                    # Add or append the file path
+                    all = str(humanfriendly.format_size(file_size, binary=True)) + ' ' + path
+                    if file_hash in dups:
+                        dups[file_hash].append(all)
+                    else:
+                        dups[file_hash] = [all]
+    return dups
+ 
+ 
+# Joins two dictionaries
+def joinDicts(dict1, dict2):
+    for key in dict2.keys():
+        if key in dict1:
+            dict1[key] = dict1[key] + dict2[key]
+        else:
+            dict1[key] = dict2[key]
+ 
+ 
+def hashfile(path, blocksize = 65536):
+    afile = open(path, 'rb')
+    hasher = hashlib.sha1()
+    buf = afile.read(blocksize)
+    while len(buf) > 0:
+        hasher.update(buf)
+        buf = afile.read(blocksize)
+    afile.close()
+    return hasher.hexdigest()
+ 
+ 
+def printResults(dict1):
+    results = list(filter(lambda x: len(x) > 1, dict1.values()))
+    if len(results) > 0:
+        print('\n\033[1m  Duplicates Found\033[0m\n')
+        print('  The following files are identical. The name could differ, but the content is identical')
+        print('______________\n')
+        for result in results:
+            for subresult in result:
+                print('  %s' % subresult)
+            print('______________\n')
+ 
+    else:
+        if bytes:
+            print('No duplicate files bigger than ' + str(humanfriendly.format_size(bytes, binary=True)) + ' found.')
+        else:
+            print('No duplicate files found.')
+ 
+ 
+if __name__ == '__main__':
+    if len(sys.argv) > 1:
+        dups = {}
+        folders = args.dir
+        for i in folders:
+            # Iterate the folders given
+            if os.path.exists(i):
+                # Find the duplicated files and append them to the dups
+                joinDicts(dups, findDup(i))
+            else:
+                print('%s is not a valid path, please verify' % i)
+                sys.exit()
+        printResults(dups)
+    else:
+        print('Usage: python dupFinder.py folder or python dupFinder.py folder1 folder2 folder3')