| 
					
				 | 
			
			
				@@ -0,0 +1,100 @@ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import os, sys 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import hashlib 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import argparse 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import humanfriendly 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+parser = argparse.ArgumentParser(description='Find dupes')  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+optional = parser._action_groups.pop() # Edited this line 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+required = parser.add_argument_group('required arguments') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+optional.add_argument('--size', type=str, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    help='Only output files greater than \'size\'. 16, 16K, 16M, 16G, 16T') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+required.add_argument('--dir', type=str, nargs='?', required=True, action='append', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    help='Directory to scan. Can be used multiple times.') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+parser._action_groups.append(optional) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+args = parser.parse_args() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+if args.size: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    bytes = humanfriendly.parse_size(args.size) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    bytes = 0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+def findDup(parentFolder): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    # Dups in format {hash:[names]} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    dups = {} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    for dirName, subdirs, fileList in os.walk(parentFolder): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        print('Scanning %s...' % dirName) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for filename in fileList: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            # Get the path to the file 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            path = os.path.join(dirName, filename) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            # Calculate hash 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if os.path.exists(path): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                file_size = os.path.getsize(path) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                if file_size > bytes: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    file_hash = hashfile(path) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    # Add or append the file path 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    all = str(humanfriendly.format_size(file_size, binary=True)) + ' ' + path 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    if file_hash in dups: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        dups[file_hash].append(all) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        dups[file_hash] = [all] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return dups 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+# Joins two dictionaries 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+def joinDicts(dict1, dict2): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    for key in dict2.keys(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if key in dict1: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            dict1[key] = dict1[key] + dict2[key] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            dict1[key] = dict2[key] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+def hashfile(path, blocksize = 65536): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    afile = open(path, 'rb') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    hasher = hashlib.sha1() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    buf = afile.read(blocksize) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    while len(buf) > 0: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        hasher.update(buf) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        buf = afile.read(blocksize) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    afile.close() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return hasher.hexdigest() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+def printResults(dict1): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    results = list(filter(lambda x: len(x) > 1, dict1.values())) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if len(results) > 0: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        print('\n\033[1m  Duplicates Found\033[0m\n') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        print('  The following files are identical. The name could differ, but the content is identical') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        print('______________\n') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for result in results: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            for subresult in result: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                print('  %s' % subresult) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            print('______________\n') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if bytes: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            print('No duplicate files bigger than ' + str(humanfriendly.format_size(bytes, binary=True)) + ' found.') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            print('No duplicate files found.') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+if __name__ == '__main__': 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if len(sys.argv) > 1: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        dups = {} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        folders = args.dir 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for i in folders: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            # Iterate the folders given 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if os.path.exists(i): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                # Find the duplicated files and append them to the dups 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                joinDicts(dups, findDup(i)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                print('%s is not a valid path, please verify' % i) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                sys.exit() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        printResults(dups) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        print('Usage: python dupFinder.py folder or python dupFinder.py folder1 folder2 folder3') 
			 |