From 781eb1937829a3f3f726d41e3a718c7139da212f Mon Sep 17 00:00:00 2001 From: Philippe Mathieu <71FIL@users.noreply.github.com> Date: Tue, 27 Jul 2021 13:54:36 -0400 Subject: [PATCH 1/4] Issue 61: add option allowing specifying gc collect generation --- HACKING.rst | 4 +-- docs/index.txt | 14 +++++++++ objgraph.py | 79 ++++++++++++++++++++++++++++++++++++-------------- tests.py | 37 +++++++++++++++++++++++ 4 files changed, 111 insertions(+), 23 deletions(-) diff --git a/HACKING.rst b/HACKING.rst index d6a756b..c8ccebf 100644 --- a/HACKING.rst +++ b/HACKING.rst @@ -3,7 +3,7 @@ Hacking on objgraph =================== -Start by geting the latest source with :: +Start by getting the latest source with :: git clone https://github.com/mgedmin/objgraph @@ -65,7 +65,7 @@ to see how incomplete they are with :: make coverage I use a `vim plugin `_ -to higlight lines not covered by tests while I edit :: +to highlight lines not covered by tests while I edit :: make coverage vim objgraph.py diff --git a/docs/index.txt b/docs/index.txt index e022ade..2c09df1 100644 --- a/docs/index.txt +++ b/docs/index.txt @@ -123,6 +123,20 @@ It is perhaps surprising to find :mod:`linecache` at the end of that chain (apparently :mod:`doctest` monkey-patches it), but the important things -- :func:`computate_something` and its cache dictionary -- are in there. +In some cases, something might look like a memory leak but actually isn't. +The case I'm interested in here is a manifestation of `python issue 39061 + `_. Objects that do get in the garbage +collector generation 2 are only rarely collected and might end up using more +and more memory. + +Using `objgraph` in such a scenario actually hides the issue as a full garbage +collection is being run to cut the noise as much as possible. + +To avoid a full collection being run and hiding that memory leak look alike, +an option has been added to most of the API functions to only collect up to +a specific generation. The option is `gc_collect_gen` and is set to 2 by +default, which results in the default gc behavior. + There are other tools, perhaps better suited for memory leak hunting: `heapy `_, `Dozer `_. diff --git a/objgraph.py b/objgraph.py index 6c2c941..ecacc48 100755 --- a/objgraph.py +++ b/objgraph.py @@ -276,7 +276,8 @@ def show_most_common_types( file.write('%-*s %i\n' % (width, name, count)) -def growth(limit=10, peak_stats={}, shortnames=True, filter=None): +def growth(limit=10, peak_stats={}, shortnames=True, filter=None, + gc_collect_gen=2): """Count the increase in peak object since last call. Returns a list of (type_name, total_count, increase_delta), @@ -293,6 +294,9 @@ def growth(limit=10, peak_stats={}, shortnames=True, filter=None): returning a boolean. Objects for which ``filter(obj)`` returns ``False`` will be ignored. + gc.collect() is called with ``gc_collect_gen``. The default is ``2``, thus + running a full collection. + The caveats documented in :func:`typestats` apply. Example: @@ -303,7 +307,7 @@ def growth(limit=10, peak_stats={}, shortnames=True, filter=None): .. versionadded:: 3.3.0 """ - gc.collect() + gc.collect(gc_collect_gen) stats = typestats(shortnames=shortnames, filter=filter) deltas = {} for name, count in iteritems(stats): @@ -320,7 +324,7 @@ def growth(limit=10, peak_stats={}, shortnames=True, filter=None): def show_growth(limit=10, peak_stats=None, shortnames=True, file=None, - filter=None): + filter=None, gc_collect_gen=2): """Show the increase in peak object counts since last call. if ``peak_stats`` is None, peak object counts will recorded in @@ -348,11 +352,15 @@ def show_growth(limit=10, peak_stats=None, shortnames=True, file=None, .. versionchanged:: 3.1.3 New parameter: ``filter``. + .. versionchanged:: 3.6 + New parameter: ``gc_collect_gen``. + """ if peak_stats is None: - result = growth(limit, shortnames=shortnames, filter=filter) + result = growth(limit, shortnames=shortnames, filter=filter, + gc_collect_gen=gc_collect_gen) else: - result = growth(limit, peak_stats, shortnames, filter) + result = growth(limit, peak_stats, shortnames, filter, gc_collect_gen) if result: if file is None: file = sys.stdout @@ -362,7 +370,7 @@ def show_growth(limit=10, peak_stats=None, shortnames=True, file=None, def get_new_ids(skip_update=False, limit=10, sortby='deltas', - shortnames=None, file=None, _state={}): + shortnames=None, file=None, _state={}, gc_collect_gen=2): """Find and display new objects allocated since last call. Shows the increase in object counts since last call to this @@ -392,6 +400,9 @@ def get_new_ids(skip_update=False, limit=10, sortby='deltas', It is used by the function to store the internal state between calls. Never pass in this argument unless you know what you're doing. + ``gc_collect_gen`` (int from 0 to 2): used in the call to gc.collect() to + limit the collection to given generation and lower. + The caveats documented in :func:`growth` apply. When one gets new_ids from :func:`get_new_ids`, one can use @@ -435,7 +446,7 @@ def get_new_ids(skip_update=False, limit=10, sortby='deltas', shortnames = _state['shortnames'] else: _state['shortnames'] = shortnames - gc.collect() + gc.collect(gc_collect_gen) objects = gc.get_objects() for class_name in old_ids: old_ids[class_name].clear() @@ -494,7 +505,7 @@ def get_new_ids(skip_update=False, limit=10, sortby='deltas', return new_ids -def get_leaking_objects(objects=None): +def get_leaking_objects(objects=None, gc_collect_gen=2): """Return objects that do not have any referents. These could indicate reference-counting bugs in C code. Or they could @@ -505,7 +516,7 @@ def get_leaking_objects(objects=None): .. versionadded:: 1.7 """ if objects is None: - gc.collect() + gc.collect(gc_collect_gen) objects = gc.get_objects() try: ids = set(id(i) for i in objects) @@ -592,7 +603,8 @@ def at_addrs(address_set): return res -def find_ref_chain(obj, predicate, max_depth=20, extra_ignore=()): +def find_ref_chain(obj, predicate, max_depth=20, extra_ignore=(), + gc_collect_gen=2): """Find a shortest chain of references leading from obj. The end of the chain will be some object that matches your predicate. @@ -604,6 +616,9 @@ def find_ref_chain(obj, predicate, max_depth=20, extra_ignore=()): ``extra_ignore`` can be a list of object IDs to exclude those objects from your search. + ``gc_collect_gen`` specifies the generation to be collected in the call to + gc.collect(). The default is to run a full collection. + Example: >>> find_ref_chain(obj, lambda x: isinstance(x, MyClass)) @@ -614,10 +629,12 @@ def find_ref_chain(obj, predicate, max_depth=20, extra_ignore=()): .. versionadded:: 1.7 """ return _find_chain(obj, predicate, gc.get_referents, - max_depth=max_depth, extra_ignore=extra_ignore)[::-1] + max_depth=max_depth, extra_ignore=extra_ignore, + gc_collect_gen=gc_collect_gen)[::-1] -def find_backref_chain(obj, predicate, max_depth=20, extra_ignore=()): +def find_backref_chain(obj, predicate, max_depth=20, extra_ignore=(), + gc_collect_gen=2): """Find a shortest chain of references leading to obj. The start of the chain will be some object that matches your predicate. @@ -629,6 +646,9 @@ def find_backref_chain(obj, predicate, max_depth=20, extra_ignore=()): ``extra_ignore`` can be a list of object IDs to exclude those objects from your search. + ``gc_collect_gen`` specifies the generation to be collected in the call to + gc.collect(). The default is to run a full collection. + Example: >>> find_backref_chain(obj, is_proper_module) @@ -641,13 +661,14 @@ def find_backref_chain(obj, predicate, max_depth=20, extra_ignore=()): """ return _find_chain(obj, predicate, gc.get_referrers, - max_depth=max_depth, extra_ignore=extra_ignore) + max_depth=max_depth, extra_ignore=extra_ignore, + gc_collect_gen=gc_collect_gen) def show_backrefs(objs, max_depth=3, extra_ignore=(), filter=None, too_many=10, highlight=None, filename=None, extra_info=None, refcounts=False, shortnames=True, output=None, - extra_node_attrs=None): + extra_node_attrs=None, gc_collect_gen=2): """Generate an object reference graph ending at ``objs``. The graph will show you what objects refer to ``objs``, directly and @@ -693,6 +714,9 @@ def show_backrefs(objs, max_depth=3, extra_ignore=(), filter=None, too_many=10, names ('package.module.ClassName'). By default you get to see only the class name part. + ``gc_collect_gen`` specifies the generation to be collected in the call to + gc.collect(). The default is to run a full collection. + Examples: >>> show_backrefs(obj) @@ -717,6 +741,9 @@ class name part. .. versionchanged:: 3.5 New parameter: ``extra_node_attrs``. + + .. versionchanged:: 3.6 + New parameter: ``gc_collect_gen``. """ # For show_backrefs(), it makes sense to stop when reaching a # module because you'll end up in sys.modules and explode the @@ -728,13 +755,14 @@ class name part. filename=filename, output=output, extra_info=extra_info, refcounts=refcounts, shortnames=shortnames, cull_func=is_proper_module, - extra_node_attrs=extra_node_attrs) + extra_node_attrs=extra_node_attrs, + gc_collect_gen=gc_collect_gen) def show_refs(objs, max_depth=3, extra_ignore=(), filter=None, too_many=10, highlight=None, filename=None, extra_info=None, refcounts=False, shortnames=True, output=None, - extra_node_attrs=None): + extra_node_attrs=None, gc_collect_gen=2): """Generate an object reference graph starting at ``objs``. The graph will show you what objects are reachable from ``objs``, directly @@ -774,6 +802,9 @@ def show_refs(objs, max_depth=3, extra_ignore=(), filter=None, too_many=10, Specify ``refcounts=True`` if you want to see reference counts. + ``gc_collect_gen`` specifies the generation to be collected in the call to + gc.collect(). The default is to run a full collection. + Examples: >>> show_refs(obj) @@ -801,13 +832,17 @@ def show_refs(objs, max_depth=3, extra_ignore=(), filter=None, too_many=10, .. versionchanged:: 3.5 New parameter: ``extra_node_attrs``. + + .. versionchanged:: 3.6 + New parameter: ``gc_collect_gen``. """ return _show_graph(objs, max_depth=max_depth, extra_ignore=extra_ignore, filter=filter, too_many=too_many, highlight=highlight, edge_func=gc.get_referents, swap_source_target=True, filename=filename, extra_info=extra_info, refcounts=refcounts, shortnames=shortnames, - output=output, extra_node_attrs=extra_node_attrs) + output=output, extra_node_attrs=extra_node_attrs, + gc_collect_gen=gc_collect_gen) def show_chain(*chains, **kw): @@ -881,7 +916,8 @@ def is_proper_module(obj): # Internal helpers # -def _find_chain(obj, predicate, edge_func, max_depth=20, extra_ignore=()): +def _find_chain(obj, predicate, edge_func, max_depth=20, extra_ignore=(), + gc_collect_gen=2): queue = [obj] depth = {id(obj): 0} parent = {id(obj): None} @@ -893,7 +929,7 @@ def _find_chain(obj, predicate, edge_func, max_depth=20, extra_ignore=()): ignore.add(id(ignore)) ignore.add(id(sys._getframe())) # this function ignore.add(id(sys._getframe(1))) # find_chain/find_backref_chain - gc.collect() + gc.collect(gc_collect_gen) while queue: target = queue.pop(0) if predicate(target): @@ -920,7 +956,8 @@ def _show_graph(objs, edge_func, swap_source_target, max_depth=3, extra_ignore=(), filter=None, too_many=10, highlight=None, filename=None, extra_info=None, refcounts=False, shortnames=True, output=None, - cull_func=None, extra_node_attrs=None): + cull_func=None, extra_node_attrs=None, + gc_collect_gen=2): if not _isinstance(objs, (list, tuple)): objs = [objs] @@ -963,7 +1000,7 @@ def _show_graph(objs, edge_func, swap_source_target, depth[id(obj)] = 0 queue.append(obj) del obj - gc.collect() + gc.collect(gc_collect_gen) nodes = 0 while queue: nodes += 1 diff --git a/tests.py b/tests.py index 27a325a..8cabfb9 100755 --- a/tests.py +++ b/tests.py @@ -339,6 +339,43 @@ def test_growth(self): self.assertEqual(1, len(cared)) self.assertEqual(1, cared[0][2]) + def test_growth_override_gc_collect_gen(self): + + """ + Inspiration taken from https://bugs.python.org/issue39061, attachment late_gc.py + """ + class ApparentlyLeakingObj: + """Object keeping references to itself""" + def __init__(self): + self.create_cycle = self + + def trigger_memory_leak_look_alike(): + for i in range(1000): + apparently_leaking = ApparentlyLeakingObj() + # Instantiate n objects to free via garbage collection while "working" on that heavy object + for i in range(90): + light_cyclical_object = list() + light_cyclical_object.append(light_cyclical_object) + del apparently_leaking + + # First, make sure that when using the default garbage collection generation + # parameter, there is no memory leak look alike: there should not be any + # ApparentlyLeakingObj in the growth info + objgraph.growth(limit=None) + trigger_memory_leak_look_alike() + growth_info = objgraph.growth(limit=None) + + assert not any(record[0] == 'ApparentlyLeakingObj' for record in growth_info) + + # Now, only collect up to generation 1, keeping the objects in generation 2 + # intact. There should be at least one ApparentlyLeakingObj in the growth + # info + objgraph.growth(limit=None, gc_collect_gen=1) + trigger_memory_leak_look_alike() + growth_info = objgraph.growth(limit=None, gc_collect_gen=1) + + assert any(record[0] == 'ApparentlyLeakingObj' for record in growth_info) + def test_show_growth_custom_peak_stats(self): ps = {} objgraph.show_growth(peak_stats=ps, file=StringIO()) From 1a5c82ece60fa1af6828306b54cd34e6f9af1301 Mon Sep 17 00:00:00 2001 From: Philippe Mathieu <71FIL@users.noreply.github.com> Date: Wed, 11 Aug 2021 13:45:05 -0400 Subject: [PATCH 2/4] flake8 --- tests.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/tests.py b/tests.py index 8cabfb9..1831811 100755 --- a/tests.py +++ b/tests.py @@ -342,7 +342,8 @@ def test_growth(self): def test_growth_override_gc_collect_gen(self): """ - Inspiration taken from https://bugs.python.org/issue39061, attachment late_gc.py + Inspiration taken from https://bugs.python.org/issue39061, attachment + late_gc.py """ class ApparentlyLeakingObj: """Object keeping references to itself""" @@ -352,29 +353,32 @@ def __init__(self): def trigger_memory_leak_look_alike(): for i in range(1000): apparently_leaking = ApparentlyLeakingObj() - # Instantiate n objects to free via garbage collection while "working" on that heavy object + # Instantiate n objects to free via garbage collection while + # "working" on that heavy object for i in range(90): light_cyclical_object = list() light_cyclical_object.append(light_cyclical_object) del apparently_leaking - # First, make sure that when using the default garbage collection generation - # parameter, there is no memory leak look alike: there should not be any - # ApparentlyLeakingObj in the growth info + # First, make sure that when using the default garbage collection + # generation parameter, there is no memory leak look alike: there + # should not be any ApparentlyLeakingObj in the growth info objgraph.growth(limit=None) trigger_memory_leak_look_alike() growth_info = objgraph.growth(limit=None) - assert not any(record[0] == 'ApparentlyLeakingObj' for record in growth_info) + assert not any(record[0] == 'ApparentlyLeakingObj' + for record in growth_info) - # Now, only collect up to generation 1, keeping the objects in generation 2 - # intact. There should be at least one ApparentlyLeakingObj in the growth - # info + # Now, only collect up to generation 1, keeping the objects in + # generation 2 intact. There should be at least one + # ApparentlyLeakingObj in the growth info objgraph.growth(limit=None, gc_collect_gen=1) trigger_memory_leak_look_alike() growth_info = objgraph.growth(limit=None, gc_collect_gen=1) - assert any(record[0] == 'ApparentlyLeakingObj' for record in growth_info) + assert any(record[0] == 'ApparentlyLeakingObj' + for record in growth_info) def test_show_growth_custom_peak_stats(self): ps = {} From 7ca0002338cfa0d0a944036fa4bd1365328e6961 Mon Sep 17 00:00:00 2001 From: Philippe Mathieu <71FIL@users.noreply.github.com> Date: Thu, 12 Aug 2021 13:50:11 -0400 Subject: [PATCH 3/4] Added missing versionchanged --- objgraph.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/objgraph.py b/objgraph.py index ecacc48..732a271 100755 --- a/objgraph.py +++ b/objgraph.py @@ -306,6 +306,9 @@ def growth(limit=10, peak_stats={}, shortnames=True, filter=None, .. versionadded:: 3.3.0 + .. versionchanged:: 3.6 + New parameter: ``gc_collect_gen``. + """ gc.collect(gc_collect_gen) stats = typestats(shortnames=shortnames, filter=filter) @@ -431,6 +434,10 @@ def get_new_ids(skip_update=False, limit=10, sortby='deltas', True .. versionadded:: 3.4 + + .. versionchanged:: 3.6 + New parameter: ``gc_collect_gen``. + """ if not _state: _state['old'] = collections.defaultdict(set) @@ -514,6 +521,10 @@ def get_leaking_objects(objects=None, gc_collect_gen=2): Note that the GC does not track simple objects like int or str. .. versionadded:: 1.7 + + .. versionchanged:: 3.6 + New parameter: ``gc_collect_gen``. + """ if objects is None: gc.collect(gc_collect_gen) From 7c380b08e6fb98fb47a0af8aa9f7e3b8a44e66f1 Mon Sep 17 00:00:00 2001 From: Philippe Mathieu <71FIL@users.noreply.github.com> Date: Thu, 12 Aug 2021 13:56:53 -0400 Subject: [PATCH 4/4] Added more missing versionchanged --- objgraph.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/objgraph.py b/objgraph.py index 732a271..3039611 100755 --- a/objgraph.py +++ b/objgraph.py @@ -638,6 +638,10 @@ def find_ref_chain(obj, predicate, max_depth=20, extra_ignore=(), Returns ``[obj]`` if such a chain could not be found. .. versionadded:: 1.7 + + .. versionchanged:: 3.6 + New parameter: ``gc_collect_gen``. + """ return _find_chain(obj, predicate, gc.get_referents, max_depth=max_depth, extra_ignore=extra_ignore, @@ -670,6 +674,9 @@ def find_backref_chain(obj, predicate, max_depth=20, extra_ignore=(), .. versionchanged:: 1.5 Returns ``obj`` instead of ``None`` when a chain could not be found. + .. versionchanged:: 3.6 + New parameter: ``gc_collect_gen``. + """ return _find_chain(obj, predicate, gc.get_referrers, max_depth=max_depth, extra_ignore=extra_ignore,