I am dealing with a complex nested dictionary and list data structure. I need to flatten the data and bring all nested items to level 0. See below example for more clarity :
{a:1,b:2,c:{c1:[{c11:1,c12:2,c13:3},{c21:1,c22:2,c23:3}],d1:[{d11:1,d12:2,d13:3},{d21:1,d22:2,d23:3}]},x:1,y:2}
i need to flatten this to:
{a:1,b:2,c_c1_c11:1, c_c1_c12:2,c_c1_c13:3,c_c1_c21:1,c_c1_c22:2,c_c1_c23:3, c_d1,d11:1...and so on}
I took reference from the first answer in this post, but it can only work if i have nested dictionaries, and not if lists are nested within dictionaries and more dictionaries nested within those lists.
I modified the code a bit to fit my use case, but this code doesn't work
def flattenDict(d):
node_map = {}
node_path = []
def nodeRecursiveMap(d, node_path):
for key, val in d.items():
if ((type(val) is not dict)&(type(val) is not list)):
node_map['_'.join(node_path + [key])] = val
if type(val) is list:
def nodeListRecursion(val,node_path):
for element in val:
if ((type(element) is not dict)&(type(element) is not list)) : node_map['_'.join(node_path + [key])] = element
if type(element) is list: nodeListRecursion(element,node_map)
if type(element) is dict: nodeRecursiveMap(element, node_path + [key])
nodeListRecursion(val,node_path)
if type(val) is dict: nodeRecursiveMap(val, node_path + [key])
nodeRecursiveMap(d, node_path)
return node_map
The indentation is getting messed up when i paste my code here. But i would really appreciate any help here.
I think you're overcomplicating things. You start from a dictionary, with keys and values. Its values are either a dictionary or a list of dictionaries which you want to recurse down, or they're not, in which case you want to leave it alone. So:
def flatten(d):
out = {}
for key, val in d.items():
if isinstance(val, dict):
val = [val]
if isinstance(val, list):
for subdict in val:
deeper = flatten(subdict).items()
out.update({key + '_' + key2: val2 for key2, val2 in deeper})
else:
out[key] = val
return out
gives me
In [34]: nested = {'a': 1, 'b': 2, 'c': {'c1': [{'c11': 1, 'c12': 2, 'c13': 3}, {'c21': 1, 'c22': 2, 'c23': 3}], 'd1': [{'d11': 1, 'd12': 2, 'd13': 3}, {'d21': 1, 'd22': 2, 'd23': 3}]}, 'x': 1, 'y': 2}
In [35]: flatten(nested)
Out[35]:
{'a': 1,
'b': 2,
'c_c1_c11': 1,
'c_c1_c12': 2,
'c_c1_c13': 3,
'c_c1_c21': 1,
'c_c1_c22': 2,
'c_c1_c23': 3,
'c_d1_d11': 1,
'c_d1_d12': 2,
'c_d1_d13': 3,
'c_d1_d21': 1,
'c_d1_d22': 2,
'c_d1_d23': 3,
'x': 1,
'y': 2}
In my project, I am using an updated version of function from DSMs answer to flatten dict which may contain other dict or list or list of dict. I hope it will be helpful.
def flatten(input_dict, separator='_', prefix=''):
output_dict = {}
for key, value in input_dict.items():
if isinstance(value, dict) and value:
deeper = flatten(value, separator, prefix+key+separator)
output_dict.update({key2: val2 for key2, val2 in deeper.items()})
elif isinstance(value, list) and value:
for index, sublist in enumerate(value, start=1):
if isinstance(sublist, dict) and sublist:
deeper = flatten(sublist, separator, prefix+key+separator+str(index)+separator)
output_dict.update({key2: val2 for key2, val2 in deeper.items()})
else:
output_dict[prefix+key+separator+str(index)] = value
else:
output_dict[prefix+key] = value
return output_dict