Optimal parsing of large JSON in background

2019-07-25 02:06发布

问题:

I am parsing large JSON data similar to one like this.

There are around 3000+ chapter objects here. I require only those chapter objects with "lang_code":"gb", which will be around 1300 with some basic objects like title, description. So basically 55% of JSON is not for my use.

I am generating the classes for JSON parsing using https://app.quicktype.io/ which gives me correct classes but this method is too slow.

Any suggestion to speed it up.

回答1:

If most of the information that you need is sparse, it's probably best to pick it out in a targeted fashion, rather than create objects for everything.

You can't get around decoding the whole json string, which takes about 60ms on my laptop. Pruning the non-gb chapters takes just a few ms, and mapping what's left to some usable objects takes another few ms. Total time to something usable: <70ms.

import 'dart:convert';
import 'dart:io';

main() {
  String manga = new File('manga.json').readAsStringSync();
  int t1 = DateTime.now().millisecondsSinceEpoch;

  Map<String, dynamic> data = json.decode(manga);
  Map<String, dynamic> jChapters = data['chapter'];
  jChapters.removeWhere((_, m) => m['lang_code'] != 'gb');

  Map<String, Chapter> chapters = jChapters.map((_, m) {
    String number = m['chapter'];
    return MapEntry(number, Chapter(number, m['title']));
  });

  int t2 = DateTime.now().millisecondsSinceEpoch;
  print(t2 - t1);

  print(chapters);
}

class Chapter {
  String number;
  String title;

  Chapter(this.number, this.title);

  @override
  String toString() => 'Chapter #$number:$title';
}


回答2:

I am generating the classes for JSON parsing using https://app.quicktype.io/ which gives me correct classes but this method is too slow.

This method is not so slow, even if you deserialize each object.

import 'dart:convert';
import 'dart:io';

import 'package:queries/collections.dart';

import 'json_objects.dart';

void main() {
  var text = File('bin/manga.json').readAsStringSync();
  var sw = Stopwatch();
  sw.start();
  var json = jsonDecode(text) as Map<String, dynamic>;
  _print('Parsing', sw);
  var data = Response1.fromJson(json);
  _print('Deserializing', sw);
  var query = Dictionary.fromMap(data.chapter)
      .where((e) => e.value.langCode == 'gb')
      .select((e) => e.value);
  _print('Searching', sw, false);
  print('Found: ${query.count()} chapter(s)');
  for (var chapter in query.asIterable()) {
    print('Lang code: ${chapter.langCode}');
    break;
  }
}

void _print(String text, Stopwatch sw, [bool restart = true]) {
  sw.stop();
  print('$text: ${sw.elapsedMilliseconds / 1000} sec');
  if (restart) {
    sw.reset();
    sw.start();
  }
}

Results:

Parsing: 0.044 sec
Deserializing: 0.013 sec
Searching: 0.008 sec
Found: 1360 chapter(s)
Lang code: gb

JSON data models:

class Response1 {
  final Map<String, Response1Chapter> chapter;
  final Response1Manga manga;
  final String status;

  Response1({this.chapter, this.manga, this.status});

  factory Response1.fromJson(Map<String, dynamic> json) {
    return Response1(
      chapter:
          _toObjectMap(json['chapter'], (e) => Response1Chapter.fromJson(e)),
      manga: _toObject(json['manga'], (e) => Response1Manga.fromJson(e)),
      status: json['status'] as String,
    );
  }

  Map<String, dynamic> toJson() {
    return {
      'chapter': _fromMap(chapter, (e) => e.toJson()),
      'manga': manga?.toJson(),
      'status': status,
    };
  }
}

class Response1Chapter {
  final String chapter;
  final int groupId;
  final int groupId2;
  final int groupId3;
  final String groupName;
  final Object groupName2;
  final Object groupName3;
  final String langCode;
  final int timestamp;
  final String title;
  final String volume;

  Response1Chapter(
      {this.chapter,
      this.groupId,
      this.groupId2,
      this.groupId3,
      this.groupName,
      this.groupName2,
      this.groupName3,
      this.langCode,
      this.timestamp,
      this.title,
      this.volume});

  factory Response1Chapter.fromJson(Map<String, dynamic> json) {
    return Response1Chapter(
      chapter: json['chapter'] as String,
      groupId: json['group_id'] as int,
      groupId2: json['group_id_2'] as int,
      groupId3: json['group_id_3'] as int,
      groupName: json['group_name'] as String,
      groupName2: json['group_name_2'],
      groupName3: json['group_name_3'],
      langCode: json['lang_code'] as String,
      timestamp: json['timestamp'] as int,
      title: json['title'] as String,
      volume: json['volume'] as String,
    );
  }

  Map<String, dynamic> toJson() {
    return {
      'chapter': chapter,
      'group_id': groupId,
      'group_id_2': groupId2,
      'group_id_3': groupId3,
      'group_name': groupName,
      'group_name_2': groupName2,
      'group_name_3': groupName3,
      'lang_code': langCode,
      'timestamp': timestamp,
      'title': title,
      'volume': volume,
    };
  }
}

class Response1Manga {
  final String artist;
  final String author;
  final String coverUrl;
  final String description;
  final List<int> genres;
  final int hentai;
  final String langFlag;
  final String langName;
  final String lastChapter;
  final Response1MangaLinks links;
  final int status;
  final String title;

  Response1Manga(
      {this.artist,
      this.author,
      this.coverUrl,
      this.description,
      this.genres,
      this.hentai,
      this.langFlag,
      this.langName,
      this.lastChapter,
      this.links,
      this.status,
      this.title});

  factory Response1Manga.fromJson(Map<String, dynamic> json) {
    return Response1Manga(
      artist: json['artist'] as String,
      author: json['author'] as String,
      coverUrl: json['cover_url'] as String,
      description: json['description'] as String,
      genres: _toList(json['genres'], (e) => e as int),
      hentai: json['hentai'] as int,
      langFlag: json['lang_flag'] as String,
      langName: json['lang_name'] as String,
      lastChapter: json['last_chapter'] as String,
      links: _toObject(json['links'], (e) => Response1MangaLinks.fromJson(e)),
      status: json['status'] as int,
      title: json['title'] as String,
    );
  }

  Map<String, dynamic> toJson() {
    return {
      'artist': artist,
      'author': author,
      'cover_url': coverUrl,
      'description': description,
      'genres': _fromList(genres, (e) => e),
      'hentai': hentai,
      'lang_flag': langFlag,
      'lang_name': langName,
      'last_chapter': lastChapter,
      'links': links?.toJson(),
      'status': status,
      'title': title,
    };
  }
}

class Response1MangaLinks {
  final String amz;
  final String cdj;
  final String ebj;
  final String engtl;
  final String mal;
  final String mu;
  final String raw;

  Response1MangaLinks(
      {this.amz, this.cdj, this.ebj, this.engtl, this.mal, this.mu, this.raw});

  factory Response1MangaLinks.fromJson(Map<String, dynamic> json) {
    return Response1MangaLinks(
      amz: json['amz'] as String,
      cdj: json['cdj'] as String,
      ebj: json['ebj'] as String,
      engtl: json['engtl'] as String,
      mal: json['mal'] as String,
      mu: json['mu'] as String,
      raw: json['raw'] as String,
    );
  }

  Map<String, dynamic> toJson() {
    return {
      'amz': amz,
      'cdj': cdj,
      'ebj': ebj,
      'engtl': engtl,
      'mal': mal,
      'mu': mu,
      'raw': raw,
    };
  }
}

List _fromList(data, Function(dynamic) toJson) {
  if (data == null) {
    return null;
  }
  var result = [];
  for (var element in data) {
    var value;
    if (element != null) {
      value = toJson(element);
    }
    result.add(value);
  }
  return result;
}

Map<K, V> _fromMap<K, V>(data, V Function(dynamic) toJson) {
  if (data == null) {
    return null;
  }
  var result = <K, V>{};
  for (var key in data.keys) {
    V value;
    var element = data[key];
    if (element != null) {
      value = toJson(element);
    }
    result[key as K] = value;
  }
  return result;
}

List<T> _toList<T>(data, T Function(dynamic) fromJson) {
  if (data == null) {
    return null;
  }
  var result = <T>[];
  for (var element in data) {
    T value;
    if (element != null) {
      value = fromJson(element);
    }
    result.add(value);
  }
  return result;
}

T _toObject<T>(data, T Function(Map<String, dynamic>) fromJson) {
  if (data == null) {
    return null;
  }
  return fromJson(data as Map<String, dynamic>);
}

Map<K, V> _toObjectMap<K, V>(data, V Function(Map<String, dynamic>) fromJson) {
  if (data == null) {
    return null;
  }
  var result = <K, V>{};
  for (var key in data.keys) {
    V value;
    var element = data[key];
    if (element != null) {
      value = fromJson(element as Map<String, dynamic>);
    }
    result[key as K] = value;
  }
  return result;
}

/*
Response1:
  "manga": Response1Manga
  "chapter": Map<String, Response1Chapter>
  "status": String

Response1Manga:
  "cover_url": String
  "description": String
  "title": String
  "artist": String
  "author": String
  "status": int
  "genres": List<int>
  "last_chapter": String
  "lang_name": String
  "lang_flag": String
  "hentai": int
  "links": Response1MangaLinks

Response1MangaLinks:
  "mu": String
  "amz": String
  "cdj": String
  "ebj": String
  "mal": String
  "raw": String
  "engtl": String

Response1Chapter:
  "volume": String
  "chapter": String
  "title": String
  "lang_code": String
  "group_id": int
  "group_name": String
  "group_id_2": int
  "group_name_2": Object
  "group_id_3": int
  "group_name_3": Object
  "timestamp": int
*/