dennisro 5 lat temu
rodzic
commit
b49969a0b2
1 zmienionych plików z 50 dodań i 34 usunięć
  1. 50 34
      2.py

+ 50 - 34
2.py

@@ -6,12 +6,15 @@ import humanfriendly
 
 parser = argparse.ArgumentParser(description='What the dupe!?')
 
-optional = parser._action_groups.pop() # Edited this line
+optional = parser._action_groups.pop()
 required = parser.add_argument_group('required arguments')
 
 optional.add_argument('--threshold', type=str,
                     help='Only output files greater than \'size\', e.g. 100M')
 
+optional.add_argument('--exclude', type=str, nargs='?', action='append',
+                    help='Only output files greater than \'size\', e.g. 100M')
+
 required.add_argument('--dir', type=str, nargs='?', required=True, action='append',
                     help='Directory to scan. Can be issued multiple times.')
 
@@ -21,29 +24,37 @@ args = parser.parse_args()
 
 sizes = ['10M', '50M', '100M', '1G', '5G', 'gt5GB']
 
+if args.exclude:
+    print('hi')
+    exclude=args.exclude
+    print(exclude)
+
 if args.threshold:
-    bytes = humanfriendly.parse_size(args.threshold)
+    threshold = humanfriendly.parse_size(args.threshold)
 else:
-    bytes = 0
+    threshold = 0
 
 def findDup(parentFolder):
     # Dups in format {hash:[names]}
     dups = {}
     print()
     for dirName, subdirs, fileList in os.walk(parentFolder):
-        print('  Scanning %s...' % dirName)
-        for filename in fileList:
-            # Get the path to the file
-            path = os.path.join(dirName, filename)
-            # Calculate hash
-            if os.path.exists(path):
+        if args.exclude and dirName in args.exclude:
+            continue
+        else:
+            print('  Scanning %s...' % dirName)
+            for filename in fileList:
+                # Get the path to the file
+                path = os.path.join(dirName, filename)
                 # Calculate hash
-                file_hash = hashfile(path)
-                # Add or append the file path
-                if file_hash in dups:
-                    dups[file_hash].append(path)
-                else:
-                    dups[file_hash] = [path]
+                if os.path.exists(path):
+                    # Calculate hash
+                    file_hash = hashfile(path)
+                    # Add or append the file path
+                    if file_hash in dups:
+                        dups[file_hash].append(path)
+                    else:
+                        dups[file_hash] = [path]
     return dups
 
 
@@ -57,17 +68,21 @@ def joinDicts(dict1, dict2):
 
 
 def hashfile(path, blocksize = 65536):
-    try:
-        afile = open(path, 'rb')
-        hasher = hashlib.sha256()
-        buf = afile.read(blocksize)
-        while len(buf) > 0:
-            hasher.update(buf)
+    file_size = os.path.getsize(path)
+    # Only hash files larger than threshold
+    if threshold == 0 or (threshold > 0 and file_size > threshold):
+        try:
+            print('Hashing '+path)
+            afile = open(path, 'rb')
+            hasher = hashlib.sha256()
             buf = afile.read(blocksize)
-        afile.close()
-        return hasher.hexdigest()
-    except:
-        pass
+            while len(buf) > 0:
+                hasher.update(buf)
+                buf = afile.read(blocksize)
+            afile.close()
+            return hasher.hexdigest()
+        except:
+            pass
 
 
 def printResults(dict1):
@@ -75,14 +90,14 @@ def printResults(dict1):
     for size in sizes:
         final[size] = []
     del size
-    if bytes > 0:
-        final[bytes] = []
+    if threshold > 0:
+        final[threshold] = []
     results = list(filter(lambda x: len(x) > 1, dict1.values()))
     for result in results:
         file_size = os.path.getsize(result[0])
-        if bytes > 0:
-            if file_size >= bytes:
-                final[bytes].append(result)
+        if threshold > 0:
+            if file_size >= threshold:
+                final[threshold].append(result)
 
         else:
             #0=10MB 1=50MB 2=100MB 3=1GB 4=5GB
@@ -98,15 +113,16 @@ def printResults(dict1):
                 final[sizes[5]].append(result)
             else:
                 final[sizes[0]].append(result)
-    if len(results) > 0 and not bytes:
+            final[threshold]=[False]
+    if len(results) > 0 and len(final[threshold]) > 0:
         print('___________________')
         print('\n\033[1;34m\033[1;34m\u25b6 Duplicates Found\033[0m\n')
         print('  The following files are identical. The name could differ, but the content is identical')
         print('___________________')
         new = ['0']
-        if bytes > 0:
-             print("\n\033[1;34m\u25b6 Files bigger than %s\033[0m" % humanfriendly.format_size(bytes, binary=True))
-             for dupe in final[bytes]:
+        if threshold > 0:
+             print("\n\033[1;34m\u25b6 Files bigger than %s\033[0m" % humanfriendly.format_size(threshold, binary=True))
+             for dupe in final[threshold]:
                  print('___________________\n')
                  for file in dupe:
                      print('  %s' % str(file))