Why standard logging module is so slow when disabl

2019-08-16 16:53发布

问题:

I did this benchmark:

import logging

import copy
import pstats
import cProfile

from io import StringIO

def difference(first_stats, second_stats):
    first_stats = copy.deepcopy( first_stats )
    first_stats.total_calls -= second_stats.total_calls
    first_stats.prim_calls -= second_stats.prim_calls
    first_stats.total_tt -= second_stats.total_tt

    first_set = set( first_stats.stats.keys() )
    second_set = set( second_stats.stats.keys() )
    intersection = first_set.intersection( second_set )

    for func in intersection:
        first_source = first_stats.stats[func]
        second_source = second_stats.stats[func]
        first_cc, first_nc, first_tt, first_ct, callers = first_source
        second_cc, second_nc, second_tt, second_ct, callers = second_source
        first_stats.stats[func] = (first_cc-second_cc, first_nc-second_nc, first_tt-second_tt, first_ct-second_ct, callers)

    for func, source in second_stats.stats.items():

        if func not in first_stats.stats:
            func = list(func)
            func[0] = '* ' + func[0]
            func = tuple(func)
            first_stats.stats[func] = source

    return first_stats

def print_difference(stats):
    output_stream = StringIO()
    stats.stream = output_stream
    stats.sort_stats( "time" )
    stats.print_stats()
    return output_stream.getvalue()

def average(stats, average_count):
    stats.total_calls /= average_count
    stats.prim_calls /= average_count
    stats.total_tt /= average_count

    for func, source in stats.stats.items():
        cc, nc, tt, ct, callers = source
        stats.stats[func] = (cc/average_count, nc/average_count, tt/average_count, ct/average_count, callers)

    return stats

def profile_something(profile_function, average_count, iterations_count):
    output_stream = StringIO()
    profiler_status = pstats.Stats( stream=output_stream )

    for index in range(average_count):
        profiler = cProfile.Profile()
        profiler.enable()

        profile_function( iterations_count )
        profiler.disable()
        profiler_status.add( profiler )

        print( 'Profiled', '{0:7.3f}'.format( profiler_status.total_tt ), 'seconds at', '%3d' % (index + 1),
                'for', profile_function.__name__, flush=True )

    average( profiler_status, average_count )
    profiler_status.sort_stats( "time" )
    profiler_status.print_stats()

    return "\n Profile results for %s\n%s" % ( profile_function.__name__, output_stream.getvalue() ), profiler_status


def run_profiling(first_function, second_function, average_count, iterations_count):
    first_function_results, first_function_stats = profile_something( first_function, average_count, iterations_count )
    second_function_results, second_function_stats = profile_something( second_function, average_count, iterations_count )
    total_difference = print_difference( difference( first_function_stats, second_function_stats ) )
    time_difference = first_function_stats.total_tt - second_function_stats.total_tt

    output = 2500
    output_stream = StringIO()
    print( first_function_results[0:output], file=output_stream )
    print( '\n', second_function_results[0:output], file=output_stream )
    print( '\n\nTotal difference\n', total_difference[0:output], file=output_stream )

    return ( ( time_difference, first_function_stats.total_tt, second_function_stats.total_tt,
                first_function.__name__, second_function.__name__, format(iterations_count, ',d') ),
            output_stream.getvalue() )

def get_loggging_debug():
    logging.Logger.manager.loggerDict.clear()
    log = logging.getLogger( "benchmark" )

    if not log.hasHandlers():
        date_format = "%H:%M:%S"
        string_format = "%(asctime)s:%(msecs)010.6f - %(name)s.%(funcName)s:%(lineno)d - %(message)s"

        stream = logging.StreamHandler()
        formatter = logging.Formatter( string_format, date_format )
        stream.setFormatter( formatter )
        log.addHandler( stream )
    return log

def logging_mod_log_debug_off(iterations_count):
    log = get_loggging_debug()
    log.setLevel( "WARNING" )

    for index in range( iterations_count ):
        log.debug( 'Message' )

def standardlog_fastdebug_off(iterations_count):

    class ClassName(object):

        def __init__(self, is_debug):
            self.is_debug = is_debug

    debugger = ClassName( False )

    for index in range( iterations_count ):

        if debugger.is_debug:
            print( 'Message' )

results= []
results.append( run_profiling( standardlog_fastdebug_off, logging_mod_log_debug_off, 1, 10000000 ) )

print('\n\nResults details:')
for index, result in enumerate( results, start=1 ):
    print( "\n%2d. %s\n" % ( index, result[1] ) )

print('Results resume:')
for index, result in enumerate( results, start=1 ):
    print( '%2d. %10.5f %10.5f %10.5f  %s - %s  = %s iterations' % ( (index,) + result[0]) )

Which produces the following result on my computer:

$ python3.7.3 benchmark_tests.py
Profiled   0.910 seconds at   1 for standardlog_fastdebug_off
Profiled  10.450 seconds at   1 for logging_mod_log_debug_off


Results details:

 1.
 Profile results for standardlog_fastdebug_off
         5.0 function calls in 0.910 seconds

   Ordered by: internal time

      ncalls     tottime     percall     cumtime     percall filename:lineno(function)
         1.0       0.910       0.910       0.910       0.910 benchmark_tests.py:150(standardlog_fastdebug_off)
         1.0       0.000       0.000       0.000       0.000 {built-in method builtins.__build_class__}
         1.0       0.000       0.000       0.000       0.000 benchmark_tests.py:152(ClassName)
         1.0       0.000       0.000       0.000       0.000 benchmark_tests.py:154(__init__)
         1.0       0.000       0.000       0.000       0.000 {method 'disable' of '_lsprof.Profiler' objects}





 Profile results for logging_mod_log_debug_off
         20,000,058.0 function calls in 10.450 seconds

   Ordered by: internal time

      ncalls     tottime     percall     cumtime     percall filename:lineno(function)
         1.0       4.157       4.157      10.450      10.450 benchmark_tests.py:143(logging_mod_log_debug_off)
10,000,000.0       3.968       0.000       6.293       0.000 F:\Python\lib\logging\__init__.py:1362(debug)
10,000,000.0       2.325       0.000       2.325       0.000 F:\Python\lib\logging\__init__.py:1620(isEnabledFor)
         1.0       0.000       0.000       0.000       0.000 benchmark_tests.py:129(get_loggging_debug)
         1.0       0.000       0.000       0.000       0.000 F:\Python\lib\logging\__init__.py:1222(getLogger)
         1.0       0.000       0.000       0.000       0.000 F:\Python\lib\logging\__init__.py:1342(__init__)
         1.0       0.000       0.000       0.000       0.000 F:\Python\lib\logging\__init__.py:1310(_clear_cache)
         1.0       0.000       0.000       0.000       0.000 F:\Python\lib\threading.py:75(RLock)
         5.0       0.000       0.000       0.000       0.000 F:\Python\lib\logging\__init__.py:212(_acquireLock)
         3.0       0.000       0.000       0.000       0.000 {method 'clear' of 'dict' objects}
         1.0       0.000       0.000       0.000       0.000 F:\Python\lib\logging\__init__.py:794(_addHandlerRef)
         1.0       0.000       0.000       0.000       0.000 F:\Python\lib\logging\__init__.py:813(__init__)
         5.0       0.000       0.000       0.000       0.000 F:\Python\lib\logging\__init__.py:221(_releaseLock)
         1.0       0.000       0.000       0.000       0.000 F:\Python\lib\logging\__init__.py:1355(setLevel)
         1.0       0.000       0.000       0.000       0.000 F:\Python\lib\logging\__init__.py:513(__init__)
         3.0       0.000       0.000       0.000       0.000 F:\Python\lib\logging\__init__.py:187(_checkLevel)
         1.0       0.000       0.000       0.000       0.000 F:\Python\lib\logging\__init__.py:842(createLock)
         1.0       0.000       0.000       0.000       0.000 F:\Python\lib\logging\__init__.py:1273(_fixupParents)
         5.0       0.000       0.000       0.000       0.000 {method 'acquire' of '_thread.RLock' objects}
         2.0       0.000       0.000       0.000       0.000 F:\Python\lib\logging\__init__.py:727(__init__)
         1.0       0.000       0.000       0.000       0.000 F:\Python\lib\logging\__init__


Total difference
          -20,000,053.0 function calls in -9.540 seconds

   Ordered by: internal time

      ncalls     tottime     percall     cumtime     percall filename:lineno(function)
         1.0       4.157       4.157      10.450      10.450 * benchmark_tests.py:143(logging_mod_log_debug_off)
10,000,000.0       3.968       0.000       6.293       0.000 * F:\Python\lib\logging\__init__.py:1362(debug)
10,000,000.0       2.325       0.000       2.325       0.000 * F:\Python\lib\logging\__init__.py:1620(isEnabledFor)
         1.0       0.910       0.910       0.910       0.910 benchmark_tests.py:150(standardlog_fastdebug_off)
         1.0       0.000       0.000       0.000       0.000 * benchmark_tests.py:129(get_loggging_debug)
         1.0       0.000       0.000       0.000       0.000 {built-in method builtins.__build_class__}
         1.0       0.000       0.000       0.000       0.000 * F:\Python\lib\logging\__init__.py:1222(getLogger)
         1.0       0.000       0.000       0.000       0.000 * F:\Python\lib\logging\__init__.py:1342(__init__)
         1.0       0.000       0.000       0.000       0.000 * F:\Python\lib\logging\__init__.py:1310(_clear_cache)
         1.0       0.000       0.000       0.000       0.000 * F:\Python\lib\threading.py:75(RLock)
         5.0       0.000       0.000       0.000       0.000 * F:\Python\lib\logging\__init__.py:212(_acquireLock)
         3.0       0.000       0.000       0.000       0.000 * ~:0(<method 'clear' of 'dict' objects>)
         1.0       0.000       0.000       0.000       0.000 * F:\Python\lib\logging\__init__.py:794(_addHandlerRef)
         1.0       0.000       0.000       0.000       0.000 * F:\Python\lib\logging\__init__.py:813(__init__)
         5.0       0.000       0.000       0.000       0.000 * F:\Python\lib\logging\__init__.py:221(_releaseLock)
         1.0       0.000       0.000       0.000       0.000 * F:\Python\lib\logging\__init__.py:1355(setLevel)
         1.0       0.000       0.000       0.000       0.000 * F:\Python\lib\logging\__init__.py:513(__init__)
         3.0       0.000       0.000       0.000       0.000 * F:\Python\lib\logging\__init__.py:187(_checkLevel)
         1.0       0.000       0.000       0.000       0.000 * F:\Python\lib\logging\__init__.py:842(createLock)
         1.0       0.000       0.000       0.000       0.000 * F:\Python\lib\logging\__init__.py:1273(_fixupParents)
         5.0       0.000       0.000       0.000       0.000 * ~:0(<method 'acquire' of '_thread.R


Results resume:
 1.   -9.53998    0.91016   10.45015  standardlog_fastdebug_off - logging_mod_log_debug_off  = 10,000,000 iterations

Which is most important is the overall time highlighted on the last line of the benchmark report. It says that after 10 million calls, with both logging and self.is_debug disabled, spent 0.91016 seconds for the print log while the logging module took 10.45015 seconds. Which is more than 10 times slower.

I am not going to compare the logging module against the print statement, when both are enabled because the logging module offers tons of features, then, I am ok with spending more time than a simple print.

However, the logging module is disabled, I would not expect it to be so much time consuming. Or I am miscalculating and let the logging module take 10.45015 seconds instead of 0.91016 seconds each 10 million calls is reasonable?

回答1:

I guess it's not about logging module it's about Python in general. Consider the following code. Four simple functions. The only difference between them is how many times attribute access is performed. See result below.

class A:
    def __init__(self):
        self.a = False
        self.a_b = B()

class B:
    def __init__(self):
        self.b = False
        self.b_c = C()

class C:
    def __init__(self):
        self.c = False

import time
N = 10_000_000
a = A()

def f_0(N):
    start = time.time()
    for dummy in range(N):
        if False:
            print(111)  # never printed
    stop = time.time()
    return stop - start

def f_1(N):
    start = time.time()
    for dummy in range(N):
        if a.a:
            print(111)
    stop = time.time()
    return stop - start

def f_2(N):
    start = time.time()
    for dummy in range(N):
        if a.a_b.b:
            print(111)
    stop = time.time()
    return stop - start

def f_3(N):
    start = time.time()
    for dummy in range(N):
        if a.a_b.b_c.c:
            print(111)
    stop = time.time()
    return stop - start

result = {'x': list(), 'y': list()}
for run in range(10):
    for x, f in enumerate([f_0, f_1, f_2, f_3]):
        result['x'].append(str(x))
        result['y'].append(f(N))
    print(run)

import matplotlib.pyplot as plt
plt.figure()
plt.plot(result['x'], result['y'], '.')
plt.xlabel('argument access chain length')
plt.ylabel('time, seconds')
plt.show()

Result is (Python 3.6.8 / CPython / Win10 / CPU i5-7600):

The reason is well-known - pure python is slow when cpu is limiting issue. That's why python is not really suitable as a number grinder, for instance. From my experience, switching from custom classes and pandas to just built-in dictionaries made my python code (part with heavy calculations) almost 10 times faster.

Also, you may have a look at https://hackernoon.com/yes-python-is-slow-and-i-dont-care-13763980b5a1 - applicable to the logging part of your code.



回答2:

I'm not really able to reproduce your results.

If the logging module is absolutely not configured whatsoever, it's... pretty fast.

test code

import logging
import argparse
import time
import sys


log = logging.getLogger("blep")


def prepare_off():
    pass


def prepare_on():
    logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)


def step(x):
    log.info("now running step %s", x)


def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("-n", type=int, default=500_000)
    ap.add_argument(
        "--mode", required=True, choices=("off", "on")
    )
    args = ap.parse_args()

    prepare = globals()[f"prepare_{args.mode}"]

    prepare()
    t0 = time.perf_counter_ns()
    for x in range(args.n):
        step(x)
    t1 = time.perf_counter_ns()
    print(
        vars(args),
        (t1 - t0) / args.n,
        "ns/iter",
        file=sys.stderr,
    )


if __name__ == "__main__":
    main()

transcript

$ time python3 so54470102.py --mode off >/dev/null
{'n': 500000, 'mode': 'off'} 541.53571 ns/iter
        0.32 real         0.30 user         0.01 sys
$ time python3 so54470102.py --mode on >/dev/null
{'n': 500000, 'mode': 'on'} 26932.005352 ns/iter
       13.52 real        13.01 user         0.47 sys

Remember running under a profiler has a significant overhead (much more when there are more calls involved, e.g. when the logging module does things):

$ time python3 -m cProfile -o /dev/null so54470102.py --mode on >/dev/null
{'n': 500000, 'mode': 'on'} 45733.711544 ns/iter
       22.93 real        22.23 user         0.57 sys
$ time python3 -m cProfile -o /dev/null so54470102.py --mode off >/dev/null
{'n': 500000, 'mode': 'off'} 962.122468 ns/iter
        0.54 real         0.52 user         0.01 sys