5 lat temu · b49969a0b2
--- a/2.py
+++ b/2.py
@@ -6,12 +6,15 @@ import humanfriendly
 
				 
			
 
				 parser = argparse.ArgumentParser(description='What the dupe!?')
			
 
				 
			
 
				-optional = parser._action_groups.pop() # Edited this line
			
 
				+optional = parser._action_groups.pop()
			
 
				 required = parser.add_argument_group('required arguments')
			
 
				 
			
 
				 optional.add_argument('--threshold', type=str,
			
 
				                     help='Only output files greater than \'size\', e.g. 100M')
			
 
				 
			
 
				+optional.add_argument('--exclude', type=str, nargs='?', action='append',
			
 
				+                    help='Only output files greater than \'size\', e.g. 100M')
			
 
				+
			
 
				 required.add_argument('--dir', type=str, nargs='?', required=True, action='append',
			
 
				                     help='Directory to scan. Can be issued multiple times.')
			
 
				 
			
@@ -21,29 +24,37 @@ args = parser.parse_args()
 
				 
			
 
				 sizes = ['10M', '50M', '100M', '1G', '5G', 'gt5GB']
			
 
				 
			
 
				+if args.exclude:
			
 
				+    print('hi')
			
 
				+    exclude=args.exclude
			
 
				+    print(exclude)
			
 
				+
			
 
				 if args.threshold:
			
 
				-    bytes = humanfriendly.parse_size(args.threshold)
			
 
				+    threshold = humanfriendly.parse_size(args.threshold)
			
 
				 else:
			
 
				-    bytes = 0
			
 
				+    threshold = 0
			
 
				 
			
 
				 def findDup(parentFolder):
			
 
				     # Dups in format {hash:[names]}
			
 
				     dups = {}
			
 
				     print()
			
 
				     for dirName, subdirs, fileList in os.walk(parentFolder):
			
 
				-        print('  Scanning %s...' % dirName)
			
 
				-        for filename in fileList:
			
 
				-            # Get the path to the file
			
 
				-            path = os.path.join(dirName, filename)
			
 
				-            # Calculate hash
			
 
				-            if os.path.exists(path):
			
 
				+        if args.exclude and dirName in args.exclude:
			
 
				+            continue
			
 
				+        else:
			
 
				+            print('  Scanning %s...' % dirName)
			
 
				+            for filename in fileList:
			
 
				+                # Get the path to the file
			
 
				+                path = os.path.join(dirName, filename)
			
 
				                 # Calculate hash
			
 
				-                file_hash = hashfile(path)
			
 
				-                # Add or append the file path
			
 
				-                if file_hash in dups:
			
 
				-                    dups[file_hash].append(path)
			
 
				-                else:
			
 
				-                    dups[file_hash] = [path]
			
 
				+                if os.path.exists(path):
			
 
				+                    # Calculate hash
			
 
				+                    file_hash = hashfile(path)
			
 
				+                    # Add or append the file path
			
 
				+                    if file_hash in dups:
			
 
				+                        dups[file_hash].append(path)
			
 
				+                    else:
			
 
				+                        dups[file_hash] = [path]
			
 
				     return dups
			
 
				 
			
 
				 
			
@@ -57,17 +68,21 @@ def joinDicts(dict1, dict2):
 
				 
			
 
				 
			
 
				 def hashfile(path, blocksize = 65536):
			
 
				-    try:
			
 
				-        afile = open(path, 'rb')
			
 
				-        hasher = hashlib.sha256()
			
 
				-        buf = afile.read(blocksize)
			
 
				-        while len(buf) > 0:
			
 
				-            hasher.update(buf)
			
 
				+    file_size = os.path.getsize(path)
			
 
				+    # Only hash files larger than threshold
			
 
				+    if threshold == 0 or (threshold > 0 and file_size > threshold):
			
 
				+        try:
			
 
				+            print('Hashing '+path)
			
 
				+            afile = open(path, 'rb')
			
 
				+            hasher = hashlib.sha256()
			
 
				             buf = afile.read(blocksize)
			
 
				-        afile.close()
			
 
				-        return hasher.hexdigest()
			
 
				-    except:
			
 
				-        pass
			
 
				+            while len(buf) > 0:
			
 
				+                hasher.update(buf)
			
 
				+                buf = afile.read(blocksize)
			
 
				+            afile.close()
			
 
				+            return hasher.hexdigest()
			
 
				+        except:
			
 
				+            pass
			
 
				 
			
 
				 
			
 
				 def printResults(dict1):
			
@@ -75,14 +90,14 @@ def printResults(dict1):
 
				     for size in sizes:
			
 
				         final[size] = []
			
 
				     del size
			
 
				-    if bytes > 0:
			
 
				-        final[bytes] = []
			
 
				+    if threshold > 0:
			
 
				+        final[threshold] = []
			
 
				     results = list(filter(lambda x: len(x) > 1, dict1.values()))
			
 
				     for result in results:
			
 
				         file_size = os.path.getsize(result[0])
			
 
				-        if bytes > 0:
			
 
				-            if file_size >= bytes:
			
 
				-                final[bytes].append(result)
			
 
				+        if threshold > 0:
			
 
				+            if file_size >= threshold:
			
 
				+                final[threshold].append(result)
			
 
				 
			
 
				         else:
			
 
				             #0=10MB 1=50MB 2=100MB 3=1GB 4=5GB
			
@@ -98,15 +113,16 @@ def printResults(dict1):
 
				                 final[sizes[5]].append(result)
			
 
				             else:
			
 
				                 final[sizes[0]].append(result)
			
 
				-    if len(results) > 0 and not bytes:
			
 
				+            final[threshold]=[False]
			
 
				+    if len(results) > 0 and len(final[threshold]) > 0:
			
 
				         print('___________________')
			
 
				         print('\n\033[1;34m\033[1;34m\u25b6 Duplicates Found\033[0m\n')
			
 
				         print('  The following files are identical. The name could differ, but the content is identical')
			
 
				         print('___________________')
			
 
				         new = ['0']
			
 
				-        if bytes > 0:
			
 
				-             print("\n\033[1;34m\u25b6 Files bigger than %s\033[0m" % humanfriendly.format_size(bytes, binary=True))
			
 
				-             for dupe in final[bytes]:
			
 
				+        if threshold > 0:
			
 
				+             print("\n\033[1;34m\u25b6 Files bigger than %s\033[0m" % humanfriendly.format_size(threshold, binary=True))
			
 
				+             for dupe in final[threshold]:
			
 
				                  print('___________________\n')
			
 
				                  for file in dupe:
			
 
				                      print('  %s' % str(file))