How to pretty print nested dictionaries?

2019-01-02 19:50发布

问题:

How can I pretty print a dictionary with depth of ~4 in Python? I tried pretty printing with pprint(), but it did not work:

import pprint 
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(mydict)

I simply want an indentation ("\t") for each nesting, so that I get something like this:

key1
    value1
    value2
    key2
       value1
       value2

etc.

How can I do this?

回答1:

I'm not sure how exactly you want the formatting to look like, but you could start with a function like this:

def pretty(d, indent=0):
   for key, value in d.items():
      print('\t' * indent + str(key))
      if isinstance(value, dict):
         pretty(value, indent+1)
      else:
         print('\t' * (indent+1) + str(value))

(for python 2 user: import the print function from __future__)



回答2:

My first thought was that the JSON serializer is probably pretty good at nested dictionaries, so I'd cheat and use that:

>>> import json
>>> print json.dumps({'a':2, 'b':{'x':3, 'y':{'t1': 4, 't2':5}}},
...                  sort_keys=True, indent=4)
{
    "a": 2,
    "b": {
        "x": 3,
        "y": {
            "t1": 4,
            "t2": 5
        }
    }
}


回答3:

You could try YAML via PyYAML. Its output can be fine-tuned. I'd suggest starting with the following:

print yaml.dump(data, allow_unicode=True, default_flow_style=False)

The result is very readable; it can be also parsed back to Python if needed.

Edit:

Example:

>>> import yaml
>>> data = {'a':2, 'b':{'x':3, 'y':{'t1': 4, 't2':5}}}
>>> print yaml.dump(data, default_flow_style=False)
a: 2
b:
  x: 3
  y:
    t1: 4
    t2: 5


回答4:

As of what have been done, I don't see any pretty printer that at least mimics the output of the python interpreter with very simple formatting so here's mine :

class Formatter(object):
    def __init__(self):
        self.types = {}
        self.htchar = '\t'
        self.lfchar = '\n'
        self.indent = 0
        self.set_formater(object, self.__class__.format_object)
        self.set_formater(dict, self.__class__.format_dict)
        self.set_formater(list, self.__class__.format_list)
        self.set_formater(tuple, self.__class__.format_tuple)

    def set_formater(self, obj, callback):
        self.types[obj] = callback

    def __call__(self, value, **args):
        for key in args:
            setattr(self, key, args[key])
        formater = self.types[type(value) if type(value) in self.types else object]
        return formater(self, value, self.indent)

    def format_object(self, value, indent):
        return repr(value)

    def format_dict(self, value, indent):
        items = [
            self.lfchar + self.htchar * (indent + 1) + repr(key) + ': ' +
            (self.types[type(value[key]) if type(value[key]) in self.types else object])(self, value[key], indent + 1)
            for key in value
        ]
        return '{%s}' % (','.join(items) + self.lfchar + self.htchar * indent)

    def format_list(self, value, indent):
        items = [
            self.lfchar + self.htchar * (indent + 1) + (self.types[type(item) if type(item) in self.types else object])(self, item, indent + 1)
            for item in value
        ]
        return '[%s]' % (','.join(items) + self.lfchar + self.htchar * indent)

    def format_tuple(self, value, indent):
        items = [
            self.lfchar + self.htchar * (indent + 1) + (self.types[type(item) if type(item) in self.types else object])(self, item, indent + 1)
            for item in value
        ]
        return '(%s)' % (','.join(items) + self.lfchar + self.htchar * indent)

To initialize it :

pretty = Formatter()

It can support the addition of formatters for defined types, you simply need to make a function for that like this one and bind it to the type you want with set_formater :

from collections import OrderedDict

def format_ordereddict(self, value, indent):
    items = [
        self.lfchar + self.htchar * (indent + 1) +
        "(" + repr(key) + ', ' + (self.types[
            type(value[key]) if type(value[key]) in self.types else object
        ])(self, value[key], indent + 1) + ")"
        for key in value
    ]
    return 'OrderedDict([%s])' % (','.join(items) +
           self.lfchar + self.htchar * indent)
pretty.set_formater(OrderedDict, format_ordereddict)

For historical reasons, I keep the previous pretty printer which was a function instead of a class, but they both can be used the same way, the class version simply permit much more :

def pretty(value, htchar='\t', lfchar='\n', indent=0):
    nlch = lfchar + htchar * (indent + 1)
    if type(value) is dict:
        items = [
            nlch + repr(key) + ': ' + pretty(value[key], htchar, lfchar, indent + 1)
            for key in value
        ]
        return '{%s}' % (','.join(items) + lfchar + htchar * indent)
    elif type(value) is list:
        items = [
            nlch + pretty(item, htchar, lfchar, indent + 1)
            for item in value
        ]
        return '[%s]' % (','.join(items) + lfchar + htchar * indent)
    elif type(value) is tuple:
        items = [
            nlch + pretty(item, htchar, lfchar, indent + 1)
            for item in value
        ]
        return '(%s)' % (','.join(items) + lfchar + htchar * indent)
    else:
        return repr(value)

To use it :

>>> a = {'list':['a','b',1,2],'dict':{'a':1,2:'b'},'tuple':('a','b',1,2),'function':pretty,'unicode':u'\xa7',("tuple","key"):"valid"}
>>> a
{'function': <function pretty at 0x7fdf555809b0>, 'tuple': ('a', 'b', 1, 2), 'list': ['a', 'b', 1, 2], 'dict': {'a': 1, 2: 'b'}, 'unicode': u'\xa7', ('tuple', 'key'): 'valid'}
>>> print(pretty(a))
{
    'function': <function pretty at 0x7fdf555809b0>,
    'tuple': (
        'a',
        'b',
        1,
        2
    ),
    'list': [
        'a',
        'b',
        1,
        2
    ],
    'dict': {
        'a': 1,
        2: 'b'
    },
    'unicode': u'\xa7',
    ('tuple', 'key'): 'valid'
}

Compared to other versions :

  • This solution looks directly for object type, so you can pretty print almost everything, not only list or dict.
  • Doesn't have any dependancy.
  • Everything is put inside a string, so you can do whatever you want with it.
  • The class and the function has been tested and works with Python 2.7 and 3.4.
  • You can have all type of objects inside, this is their representations and not theirs contents that being put in the result (so string have quotes, Unicode string are fully represented ...).
  • With the class version, you can add formatting for every object type you want or change them for already defined ones.
  • key can be of any valid type.
  • Indent and Newline character can be changed for everything we'd like.
  • Dict, List and Tuples are pretty printed.


回答5:

Another option with yapf:

from pprint import pformat
from yapf.yapflib.yapf_api import FormatCode

dict_example = {'1': '1', '2': '2', '3': [1, 2, 3, 4, 5], '4': {'1': '1', '2': '2', '3': [1, 2, 3, 4, 5]}}
dict_string = pformat(dict_example)
formatted_code, _ = FormatCode(dict_string)

print(formatted_code)

Output:

{
    '1': '1',
    '2': '2',
    '3': [1, 2, 3, 4, 5],
    '4': {
        '1': '1',
        '2': '2',
        '3': [1, 2, 3, 4, 5]
    }
}


回答6:

I took sth's answer and modified it slightly to fit my needs of a nested dictionaries and lists:

def pretty(d, indent=0):
    if isinstance(d, dict):
        for key, value in d.iteritems():
            print '\t' * indent + str(key)
            if isinstance(value, dict) or isinstance(value, list):
                pretty(value, indent+1)
            else:
                print '\t' * (indent+1) + str(value)
    elif isinstance(d, list):
        for item in d:
            if isinstance(item, dict) or isinstance(item, list):
                pretty(item, indent+1)
            else:
                print '\t' * (indent+1) + str(item)
    else:
        pass

Which then gives me output like:

>>> 
xs:schema
    @xmlns:xs
        http://www.w3.org/2001/XMLSchema
    xs:redefine
        @schemaLocation
            base.xsd
        xs:complexType
            @name
                Extension
            xs:complexContent
                xs:restriction
                    @base
                        Extension
                    xs:sequence
                        xs:element
                            @name
                                Policy
                            @minOccurs
                                1
                            xs:complexType
                                xs:sequence
                                    xs:element
                                            ...


回答7:

As others have posted, you can use recursion/dfs to print the nested dictionary data and call recursively if it is a dictionary; otherwise print the data.

def print_json(data):
    if type(data) == dict:
            for k, v in data.items():
                    print k
                    print_json(v)
    else:
            print data


回答8:

Sth, i sink that's pretty ;)

def pretty(d, indent=0):
    for key, value in d.iteritems():
        if isinstance(value, dict):
            print '\t' * indent + (("%30s: {\n") % str(key).upper())
            pretty(value, indent+1)
            print '\t' * indent + ' ' * 32 + ('} # end of %s #\n' % str(key).upper())
        elif isinstance(value, list):
            for val in value:
                print '\t' * indent + (("%30s: [\n") % str(key).upper())
                pretty(val, indent+1)
                print '\t' * indent + ' ' * 32 + ('] # end of %s #\n' % str(key).upper())
        else:
            print '\t' * indent + (("%30s: %s") % (str(key).upper(),str(value)))


回答9:

This class prints out a complex nested dictionary with sub dictionaries and sub lists.  
##
## Recursive class to parse and print complex nested dictionary
##

class NestedDictionary(object):
    def __init__(self,value):
        self.value=value

    def print(self,depth):
        spacer="--------------------"
        if type(self.value)==type(dict()):
            for kk, vv in self.value.items():
                if (type(vv)==type(dict())):
                    print(spacer[:depth],kk)
                    vvv=(NestedDictionary(vv))
                    depth=depth+3
                    vvv.print(depth)
                    depth=depth-3
                else:
                    if (type(vv)==type(list())):
                        for i in vv:
                            vvv=(NestedDictionary(i))
                            depth=depth+3
                            vvv.print(depth)
                            depth=depth-3
                    else:
                        print(spacer[:depth],kk,vv) 

##
## Instatiate and execute - this prints complex nested dictionaries
## with sub dictionaries and sub lists
## 'something' is a complex nested dictionary

MyNest=NestedDictionary(weather_com_result)
MyNest.print(0)


回答10:

I wrote this simple code to print the general structure of a json object in Python.

def getstructure(data, tab = 0):
    if type(data) is dict:
        print ' '*tab + '{' 
        for key in data:
            print ' '*tab + '  ' + key + ':'
            getstructure(data[key], tab+4)
        print ' '*tab + '}'         
    elif type(data) is list and len(data) > 0:
        print ' '*tab + '['
        getstructure(data[0], tab+4)
        print ' '*tab + '  ...'
        print ' '*tab + ']'

the result for the following data

a = {'list':['a','b',1,2],'dict':{'a':1,2:'b'},'tuple':('a','b',1,2),'function':'p','unicode':u'\xa7',("tuple","key"):"valid"}
getstructure(a)

is very compact and looks like this:

{
  function:
  tuple:
  list:
    [
      ...
    ]
  dict:
    {
      a:
      2:
    }
  unicode:
  ('tuple', 'key'):
}


回答11:

Here's something that will print any sort of nested dictionary, while keeping track of the "parent" dictionaries along the way.

dicList = list()

def prettierPrint(dic, dicList):
count = 0
for key, value in dic.iteritems():
    count+=1
    if str(value) == 'OrderedDict()':
        value = None
    if not isinstance(value, dict):
        print str(key) + ": " + str(value)
        print str(key) + ' was found in the following path:',
        print dicList
        print '\n'
    elif isinstance(value, dict):
        dicList.append(key)
        prettierPrint(value, dicList)
    if dicList:
         if count == len(dic):
             dicList.pop()
             count = 0

prettierPrint(dicExample, dicList)

This is a good starting point for printing according to different formats, like the one specified in OP. All you really need to do is operations around the Print blocks. Note that it looks to see if the value is 'OrderedDict()'. Depending on whether you're using something from Container datatypes Collections, you should make these sort of fail-safes so the elif block doesn't see it as an additional dictionary due to its name. As of now, an example dictionary like

example_dict = {'key1': 'value1',
            'key2': 'value2',
            'key3': {'key3a': 'value3a'},
            'key4': {'key4a': {'key4aa': 'value4aa',
                               'key4ab': 'value4ab',
                               'key4ac': 'value4ac'},
                     'key4b': 'value4b'}

will print

key3a: value3a
key3a was found in the following path: ['key3']

key2: value2
key2 was found in the following path: []

key1: value1
key1 was found in the following path: []

key4ab: value4ab
key4ab was found in the following path: ['key4', 'key4a']

key4ac: value4ac
key4ac was found in the following path: ['key4', 'key4a']

key4aa: value4aa
key4aa was found in the following path: ['key4', 'key4a']

key4b: value4b
key4b was found in the following path: ['key4']

~altering code to fit the question's format~

lastDict = list()
dicList = list()
def prettierPrint(dic, dicList):
    global lastDict
    count = 0
    for key, value in dic.iteritems():
        count+=1
        if str(value) == 'OrderedDict()':
            value = None
        if not isinstance(value, dict):
            if lastDict == dicList:
                sameParents = True
            else:
                sameParents = False

            if dicList and sameParents is not True:
                spacing = ' ' * len(str(dicList))
                print dicList
                print spacing,
                print str(value)

            if dicList and sameParents is True:
                print spacing,
                print str(value)
            lastDict = list(dicList)

        elif isinstance(value, dict):
            dicList.append(key)
            prettierPrint(value, dicList)

        if dicList:
             if count == len(dic):
                 dicList.pop()
                 count = 0

Using the same example code, it will print the following:

['key3']
         value3a
['key4', 'key4a']
                  value4ab
                  value4ac
                  value4aa
['key4']
         value4b

This isn't exactly what is requested in OP. The difference is that a parent^n is still printed, instead of being absent and replaced with white-space. To get to OP's format, you'll need to do something like the following: iteratively compare dicList with the lastDict. You can do this by making a new dictionary and copying dicList's content to it, checking if i in the copied dictionary is the same as i in lastDict, and -- if it is -- writing whitespace to that i position using the string multiplier function.



回答12:

From this link:

def prnDict(aDict, br='\n', html=0,
            keyAlign='l',   sortKey=0,
            keyPrefix='',   keySuffix='',
            valuePrefix='', valueSuffix='',
            leftMargin=0,   indent=1 ):
    '''
return a string representive of aDict in the following format:
    {
     key1: value1,
     key2: value2,
     ...
     }

Spaces will be added to the keys to make them have same width.

sortKey: set to 1 if want keys sorted;
keyAlign: either 'l' or 'r', for left, right align, respectively.
keyPrefix, keySuffix, valuePrefix, valueSuffix: The prefix and
   suffix to wrap the keys or values. Good for formatting them
   for html document(for example, keyPrefix='<b>', keySuffix='</b>'). 
   Note: The keys will be padded with spaces to have them
         equally-wide. The pre- and suffix will be added OUTSIDE
         the entire width.
html: if set to 1, all spaces will be replaced with '&nbsp;', and
      the entire output will be wrapped with '<code>' and '</code>'.
br: determine the carriage return. If html, it is suggested to set
    br to '<br>'. If you want the html source code eazy to read,
    set br to '<br>\n'

version: 04b52
author : Runsun Pan
require: odict() # an ordered dict, if you want the keys sorted.
         Dave Benjamin 
         http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/161403
    '''

    if aDict:

        #------------------------------ sort key
        if sortKey:
            dic = aDict.copy()
            keys = dic.keys()
            keys.sort()
            aDict = odict()
            for k in keys:
                aDict[k] = dic[k]

        #------------------- wrap keys with ' ' (quotes) if str
        tmp = ['{']
        ks = [type(x)==str and "'%s'"%x or x for x in aDict.keys()]

        #------------------- wrap values with ' ' (quotes) if str
        vs = [type(x)==str and "'%s'"%x or x for x in aDict.values()] 

        maxKeyLen = max([len(str(x)) for x in ks])

        for i in range(len(ks)):

            #-------------------------- Adjust key width
            k = {1            : str(ks[i]).ljust(maxKeyLen),
                 keyAlign=='r': str(ks[i]).rjust(maxKeyLen) }[1]

            v = vs[i]        
            tmp.append(' '* indent+ '%s%s%s:%s%s%s,' %(
                        keyPrefix, k, keySuffix,
                        valuePrefix,v,valueSuffix))

        tmp[-1] = tmp[-1][:-1] # remove the ',' in the last item
        tmp.append('}')

        if leftMargin:
          tmp = [ ' '*leftMargin + x for x in tmp ]

        if html:
            return '<code>%s</code>' %br.join(tmp).replace(' ','&nbsp;')
        else:
            return br.join(tmp)     
    else:
        return '{}'

'''
Example:

>>> a={'C': 2, 'B': 1, 'E': 4, (3, 5): 0}

>>> print prnDict(a)
{
 'C'   :2,
 'B'   :1,
 'E'   :4,
 (3, 5):0
}

>>> print prnDict(a, sortKey=1)
{
 'B'   :1,
 'C'   :2,
 'E'   :4,
 (3, 5):0
}

>>> print prnDict(a, keyPrefix="<b>", keySuffix="</b>")
{
 <b>'C'   </b>:2,
 <b>'B'   </b>:1,
 <b>'E'   </b>:4,
 <b>(3, 5)</b>:0
}

>>> print prnDict(a, html=1)
<code>{
&nbsp;'C'&nbsp;&nbsp;&nbsp;:2,
&nbsp;'B'&nbsp;&nbsp;&nbsp;:1,
&nbsp;'E'&nbsp;&nbsp;&nbsp;:4,
&nbsp;(3,&nbsp;5):0
}</code>

>>> b={'car': [6, 6, 12], 'about': [15, 9, 6], 'bookKeeper': [9, 9, 15]}

>>> print prnDict(b, sortKey=1)
{
 'about'     :[15, 9, 6],
 'bookKeeper':[9, 9, 15],
 'car'       :[6, 6, 12]
}

>>> print prnDict(b, keyAlign="r")
{
        'car':[6, 6, 12],
      'about':[15, 9, 6],
 'bookKeeper':[9, 9, 15]
}
'''


回答13:

I'm just returning to this question after taking sth's answer and making a small but very useful modification. This function prints all keys in the JSON tree as well as the size of leaf nodes in that tree.

def print_JSON_tree(d, indent=0):
    for key, value in d.iteritems():
        print '    ' * indent + unicode(key),
        if isinstance(value, dict):
            print; print_JSON_tree(value, indent+1)
        else:
            print ":", str(type(d[key])).split("'")[1], "-", str(len(unicode(d[key])))

It's really nice when you have large JSON objects and want to figure out where the meat is. Example:

>>> print_JSON_tree(JSON_object)
key1
    value1 : int - 5
    value2 : str - 16
    key2
       value1 : str - 34
       value2 : list - 5623456

This would tell you that most of the data you care about is probably inside JSON_object['key1']['key2']['value2'] because the length of that value formatted as a string is very large.



回答14:

I'm a relative python newbie myself but I've been working with nested dictionaries for the past couple weeks and this is what I had came up with.

You should try using a stack. Make the keys from the root dictionary into a list of a list:

stack = [ root.keys() ]     # Result: [ [root keys] ]

Going in reverse order from last to first, lookup each key in the dictionary to see if its value is (also) a dictionary. If not, print the key then delete it. However if the value for the key is a dictionary, print the key then append the keys for that value to the end of the stack, and start processing that list in the same way, repeating recursively for each new list of keys.

If the value for the second key in each list were a dictionary you would have something like this after several rounds:

[['key 1','key 2'],['key 2.1','key 2.2'],['key 2.2.1','key 2.2.2'],[`etc.`]]

The upside to this approach is that the indent is just \t times the length of the stack:

indent = "\t" * len(stack)

The downside is that in order to check each key you need to hash through to the relevant sub-dictionary, though this can be handled easily with a list comprehension and a simple for loop:

path = [li[-1] for li in stack]
# The last key of every list of keys in the stack

sub = root
for p in path:
    sub = sub[p]


if type(sub) == dict:
    stack.append(sub.keys()) # And so on

Be aware that this approach will require you to cleanup trailing empty lists, and to delete the last key in any list followed by an empty list (which of course may create another empty list, and so on).

There are other ways to implement this approach but hopefully this gives you a basic idea of how to do it.

EDIT: If you don't want to go through all that, the pprint module prints nested dictionaries in a nice format.



回答15:

Here's a function I wrote based on what sth's comment. It's works the same as json.dumps with indent, but I'm using tabs instead of space for indents. In Python 3.2+ you can specify indent to be a '\t' directly, but not in 2.7.

def pretty_dict(d):
    def pretty(d, indent):
        for i, (key, value) in enumerate(d.iteritems()):
            if isinstance(value, dict):
                print '{0}"{1}": {{'.format( '\t' * indent, str(key))
                pretty(value, indent+1)
                if i == len(d)-1:
                    print '{0}}}'.format( '\t' * indent)
                else:
                    print '{0}}},'.format( '\t' * indent)
            else:
                if i == len(d)-1:
                    print '{0}"{1}": "{2}"'.format( '\t' * indent, str(key), value)
                else:
                    print '{0}"{1}": "{2}",'.format( '\t' * indent, str(key), value)
    print '{'
    pretty(d,indent=1)
    print '}'

Ex:

>>> dict_var = {'a':2, 'b':{'x':3, 'y':{'t1': 4, 't2':5}}}
>>> pretty_dict(dict_var)
{
    "a": "2",
    "b": {
        "y": {
            "t2": "5",
            "t1": "4"
        },
        "x": "3"
    }
}


标签: