Replicate a modified du command in python

2019-08-19 09:14发布

问题:

Here is the command I execute in shell. I want to get the same results in Python. Can I do this using the os module and how? I'm using grep -v here as some file names also have that pattern. Please note that i dont want to invoke this from shell.

du -ah 2> >(grep -v "permission denied") |grep [1-9][0-9]G | grep -v [0-9][0-9]K|grep -v [0-9][0-9]M|sort -nr -k 1| head -50

回答1:

You can use this python program. It does not spawn any child processes in the shell.

 #!/usr/bin/env python

 from __future__ import absolute_import
 from __future__ import print_function
 import subprocess
 import os
 import argparse

 def files_larger_than_no_child_process(min_bytes, count):
     """Return the top count files that are larger than the given min_bytes"""

     # A list that will have (size, name) tuples.
     file_info = []
     for root, dirs, files in os.walk("."):
         for f in files:
             file_path = os.path.abspath(os.path.realpath(os.path.join(root, f)))
             try:
                 size = os.path.getsize(file_path)
                 # Discard all smaller files than the given threshold
                 if size > min_bytes:
                     file_info.append((size,file_path))
             except OSError as e:
                 pass

     # Sort the files with respect to their sizes
     file_info = sorted(file_info, key=lambda x: x[0], reverse=True)

     # Print the top count entries
     for l in file_info[:count]:
         print(l[0], " ", l[1])

 def main():
     parser = argparse.ArgumentParser("""Prints the top files that are larger than the
         given bytes in the current directory recusrively.""")
     parser.add_argument("min_bytes",help="Print files larger than this value",
         type=int)
     parser.add_argument("count",help="Print at most the given number of files",
         type=int)
     args = parser.parse_args()


     files_larger_than_no_child_process(args.min_bytes, args.count)

 if __name__ == "__main__":
     main()