python sort list of json by value

2020-05-21 08:46发布

I have a file consists of JSON, each a line, and want to sort the file by update_time reversed.

sample JSON file:

{ "page": { "url": "url1", "update_time": "1415387875"}, "other_key": {} }
{ "page": { "url": "url2", "update_time": "1415381963"}, "other_key": {} }
{ "page": { "url": "url3", "update_time": "1415384938"}, "other_key": {} }

want output:

{ "page": { "url": "url1", "update_time": "1415387875"}, "other_key": {} }
{ "page": { "url": "url3", "update_time": "1415384938"}, "other_key": {} }
{ "page": { "url": "url2", "update_time": "1415381963"}, "other_key": {} }

my code:

#!/bin/env python
#coding: utf8

import sys
import os
import json
import operator

#load json from file
lines = []
while True:
    line = sys.stdin.readline()
    if not line: break
    line = line.strip()
    json_obj = json.loads(line)
    lines.append(json_obj)

#sort json
lines = sorted(lines, key=lambda k: k['page']['update_time'], reverse=True)

#output result
for line in lines:
    print line

The code works fine with sample JSON file, but if a JSON has no 'update_time', it will raise KeyError exception. Are there non-exception ways to do this?

4条回答
神经病院院长
2楼-- · 2020-05-21 09:07
def get_sortest_key(a: dict, o: dict):
    v = None
    k = None
    for key, value in a.items():
        if v is None:
            v = value
            k = key
            continue
        if v > value:
            v = value
            k = key
    o.update({k: v})
    a.pop(k)
    if a:
        get_sortest_key(a, o)
    else:
        return


def call(o):
    a = {'a': 9, 'b': 1, 'c': 3, 'k': 3, 'l': -1, 's': 100}
    z = get_sortest_key(a, o)
    print(o)


o={}    
call(o)
查看更多
再贱就再见
3楼-- · 2020-05-21 09:24
# sort json
lines = sorted(lines, key=lambda k: k['page'].get('update_time', 0), reverse=True)
查看更多
欢心
4楼-- · 2020-05-21 09:25

Write a function that uses try...except to handle the KeyError, then use this as the key argument instead of your lambda.

def extract_time(json):
    try:
        # Also convert to int since update_time will be string.  When comparing
        # strings, "10" is smaller than "2".
        return int(json['page']['update_time'])
    except KeyError:
        return 0

# lines.sort() is more efficient than lines = lines.sorted()
lines.sort(key=extract_time, reverse=True)
查看更多
一夜七次
5楼-- · 2020-05-21 09:26

You can use dict.get() with a default value:

lines = sorted(lines, key=lambda k: k['page'].get('update_time', 0), reverse=True)

Example:

>>> lines = [
...     {"page": {"url": "url1", "update_time": "1415387875"}, "other_key": {}},
...     {"page": {"url": "url2", "update_time": "1415381963"}, "other_key": {}},
...     {"page": {"url": "url3", "update_time": "1415384938"}, "other_key": {}},
...     {"page": {"url": "url4"}, "other_key": {}},
...     {"page": {"url": "url5"}, "other_key": {}}
... ]
>>> lines = sorted(lines, key=lambda k: k['page'].get('update_time', 0), reverse=True)
>>> for line in lines:
...     print line
... 
{'other_key': {}, 'page': {'url': 'url1', 'update_time': '1415387875'}}
{'other_key': {}, 'page': {'url': 'url3', 'update_time': '1415384938'}}
{'other_key': {}, 'page': {'url': 'url2', 'update_time': '1415381963'}}
{'other_key': {}, 'page': {'url': 'url4'}}
{'other_key': {}, 'page': {'url': 'url5'}}

Though, I would still follow the EAFP principle that Ferdinand suggested - this way you would also handle cases when page key is also missing. Much easier to let it fail and handle it than checking all sorts of corner cases.

查看更多
登录 后发表回答