How to convert json file into dataframe in R?

2019-02-18 15:04发布

问题:

I have a json text file which reads

{"type":"session.ended","v":2,"post.source":"1306210600-col001.sv1.movenetworks.com:2097#5","post.ip4":"75.114.187.146","post.rtime_secs":1371794661,"post.rtime_text":"2013-06-21 06:04:20","post.time_secs":1371794596,"post.time_text":"2013-06-21 06:03:16","post.time_date":"2013-06-21","post.time_hour":6,"post.late_secs":65,"id.session":"3625657","id.sub":"2370b726-b96e-11e2-b3eb-1231380e1adf","id.partner":"0CB48A664E514CA48D378D152574EDBB","id.service":"BBTV (CBD46B77)","device.make":"Roku","device.model":"3050X","device.info":"Roku;3050X","device.serial":"12C241003940","device.version":"2.5.0.7","device.uuid":"51ce2255-62ad-5778-b2d7-b9543c1476c6","device.os":"Linux","device.os_version":"2.6.35","device.platform":"Roku","device.platform_vendor":"Move Networks","device.platform_version":"1.0.0.20130329","device.licenses":[],"user.type":"Subscriber","ip.provider":"netacuity","ip.postal_code":"48154","ip.dma":505,"ip.dma_name":"unknown","ip.city":"livonia","ip.country":"united states","ip.region":"michigan","ip.continent":"north america","ip.isp":"bright house networks llc","ip.asn":0,"ip.asn_owner":"?","clip.id":"1338713","clip.pub":"CBD46B77","asset.id":524768,"asset.pub":"CBD46B77","asset.length_ms":1800000,"asset.guid":"b7c12c09dc5aec832e142a00b0f191fa","asset.title":"Diya Aur Bati Hum","asset.type":"captured","asset.adult":false,"asset.franchise_guid":"941496e1452b4fce9acfe7b3339924eb","asset.franchise_title":"Diya Aur Bati Hum","container.id":14,"container.pub":"CBD46B77","container.guid":"3caa6afd715e4c57ac4750d29e449a9c","container.title":"SPLUS","usage.elapsed_ms":2312,"usage.viewed_ms":392,"usage.stage":"mainVideo","exp.idle_ms":350,"exp.stalls":0,"exp.stalled_ms":0,"exp.frame_renders":0,"exp.frame_drops":0,"exp.ghost_session_ms":0,"exp.ghost_sessions":0,"exp.qmx_stale_ms":0,"exp.qmx_error_ms":0,"exp.qss_late_ms":0,"exp.qss_error_ms":0,"exp.fom":0,"exp.fom_weight":0,"data.dl_bytes":228,"data.ul_bytes":406,"http.oks":2,"http.errors":0,"http.timeouts":0,"net.throughput":8977,"http.slows":0,"data.bitrate_mean":1968,"data.bitrate_stddev":0,"data.bitrate_median":1950,"data.bitrate_modes":[1950],"data.streamlets":1,"data.late_streamlets":0,"data.all_streamlets":0,"data.bf_streamlets":0,"data.ab_streamlets":0}
{"type":"session.started","v":2,"post.source":"1306210600-col004.sv1.movenetworks.com:2183#6","post.ip4":"63.225.172.43","post.rtime_secs":1371794671,"post.rtime_text":"2013-06-21 06:04:31","post.time_secs":1371794660,"post.time_text":"2013-06-21 06:04:20","post.time_date":"2013-06-21","post.time_hour":6,"post.late_secs":11,"id.session":"232169818","id.sub":"55d514ba-3858-11e2-91a7-12313d08e01f","id.partner":"0CB48A664E514CA48D378D152574EDBB","id.service":"BBTV (CBD46B77)","device.make":"Roku","device.model":"3100X","device.info":"Roku;3100X","device.serial":"13C2AE061481","device.version":"2.5.0.37","device.uuid":"7f5654d5-3aa7-5a5f-bb2b-8084da358942","device.os":"Linux","device.os_version":"2.6.35","device.platform":"Roku","device.platform_vendor":"Move Networks","device.platform_version":"1.0.0.20130615","device.licenses":[],"user.type":"Subscriber","ip.provider":"netacuity","ip.postal_code":"98115","ip.dma":819,"ip.dma_name":"unknown","ip.city":"seattle","ip.country":"united states","ip.region":"washington","ip.continent":"north america","ip.isp":"qwest communications company llc","ip.asn":0,"ip.asn_owner":"?","clip.id":"1339170","clip.pub":"CBD46B77","asset.id":522015,"asset.pub":"CBD46B77","asset.length_ms":7200000,"asset.guid":"c6938cfa200a21e90dce41f5ed131cc2","asset.title":"Spark Top 20","asset.type":"captured","asset.adult":false,"container.id":277,"container.pub":"CBD46B77","container.guid":"03e3a689e245457bba2f98c30ef931fa","container.title":"BIGMGC","usage.stage":"mainVideo","exp.idle_ms":5772}

I want to load it in R and convert to a dataframe.Here field names are a part of the data and also we have unequal no of fields in each row (a total of 13 rows)

Any help will be appreciated.

回答1:

Here's one way to do it:

file <- '[
{"type":"session.ended","v":2,"post.source":"1306210600-col001.sv1.movenetworks.com:2097#5","post.ip4":"75.114.187.146","post.rtime_secs":1371794661,"post.rtime_text":"2013-06-21 06:04:20","post.time_secs":1371794596,"post.time_text":"2013-06-21 06:03:16","post.time_date":"2013-06-21","post.time_hour":6,"post.late_secs":65,"id.session":"3625657","id.sub":"2370b726-b96e-11e2-b3eb-1231380e1adf","id.partner":"0CB48A664E514CA48D378D152574EDBB","id.service":"BBTV (CBD46B77)","device.make":"Roku","device.model":"3050X","device.info":"Roku;3050X","device.serial":"12C241003940","device.version":"2.5.0.7","device.uuid":"51ce2255-62ad-5778-b2d7-b9543c1476c6","device.os":"Linux","device.os_version":"2.6.35","device.platform":"Roku","device.platform_vendor":"Move Networks","device.platform_version":"1.0.0.20130329","device.licenses":[],"user.type":"Subscriber","ip.provider":"netacuity","ip.postal_code":"48154","ip.dma":505,"ip.dma_name":"unknown","ip.city":"livonia","ip.country":"united states","ip.region":"michigan","ip.continent":"north america","ip.isp":"bright house networks llc","ip.asn":0,"ip.asn_owner":"?","clip.id":"1338713","clip.pub":"CBD46B77","asset.id":524768,"asset.pub":"CBD46B77","asset.length_ms":1800000,"asset.guid":"b7c12c09dc5aec832e142a00b0f191fa","asset.title":"Diya Aur Bati Hum","asset.type":"captured","asset.adult":false,"asset.franchise_guid":"941496e1452b4fce9acfe7b3339924eb","asset.franchise_title":"Diya Aur Bati Hum","container.id":14,"container.pub":"CBD46B77","container.guid":"3caa6afd715e4c57ac4750d29e449a9c","container.title":"SPLUS","usage.elapsed_ms":2312,"usage.viewed_ms":392,"usage.stage":"mainVideo","exp.idle_ms":350,"exp.stalls":0,"exp.stalled_ms":0,"exp.frame_renders":0,"exp.frame_drops":0,"exp.ghost_session_ms":0,"exp.ghost_sessions":0,"exp.qmx_stale_ms":0,"exp.qmx_error_ms":0,"exp.qss_late_ms":0,"exp.qss_error_ms":0,"exp.fom":0,"exp.fom_weight":0,"data.dl_bytes":228,"data.ul_bytes":406,"http.oks":2,"http.errors":0,"http.timeouts":0,"net.throughput":8977,"http.slows":0,"data.bitrate_mean":1968,"data.bitrate_stddev":0,"data.bitrate_median":1950,"data.bitrate_modes":[1950],"data.streamlets":1,"data.late_streamlets":0,"data.all_streamlets":0,"data.bf_streamlets":0,"data.ab_streamlets":0}
,{"type":"session.started","v":2,"post.source":"1306210600-col004.sv1.movenetworks.com:2183#6","post.ip4":"63.225.172.43","post.rtime_secs":1371794671,"post.rtime_text":"2013-06-21 06:04:31","post.time_secs":1371794660,"post.time_text":"2013-06-21 06:04:20","post.time_date":"2013-06-21","post.time_hour":6,"post.late_secs":11,"id.session":"232169818","id.sub":"55d514ba-3858-11e2-91a7-12313d08e01f","id.partner":"0CB48A664E514CA48D378D152574EDBB","id.service":"BBTV (CBD46B77)","device.make":"Roku","device.model":"3100X","device.info":"Roku;3100X","device.serial":"13C2AE061481","device.version":"2.5.0.37","device.uuid":"7f5654d5-3aa7-5a5f-bb2b-8084da358942","device.os":"Linux","device.os_version":"2.6.35","device.platform":"Roku","device.platform_vendor":"Move Networks","device.platform_version":"1.0.0.20130615","device.licenses":[],"user.type":"Subscriber","ip.provider":"netacuity","ip.postal_code":"98115","ip.dma":819,"ip.dma_name":"unknown","ip.city":"seattle","ip.country":"united states","ip.region":"washington","ip.continent":"north america","ip.isp":"qwest communications company llc","ip.asn":0,"ip.asn_owner":"?","clip.id":"1339170","clip.pub":"CBD46B77","asset.id":522015,"asset.pub":"CBD46B77","asset.length_ms":7200000,"asset.guid":"c6938cfa200a21e90dce41f5ed131cc2","asset.title":"Spark Top 20","asset.type":"captured","asset.adult":false,"container.id":277,"container.pub":"CBD46B77","container.guid":"03e3a689e245457bba2f98c30ef931fa","container.title":"BIGMGC","usage.stage":"mainVideo","exp.idle_ms":5772}
]'

You need the function fromJSON of the RJSONIO package:

library(RJSONIO)
json <- fromJSON(file, nullValue = NA)

Replace (empty) lists by NA and convert to data frames:

dat <- lapply(json, function(j) {
  as.data.frame(replace(j, sapply(j, is.list), NA))
})

Create a single data frame:

library(plyr)
res <- rbind.fill(dat)

The result (res):

             type v
1   session.ended 2
2 session.started 2
                                    post.source
1 1306210600-col001.sv1.movenetworks.com:2097#5
2 1306210600-col004.sv1.movenetworks.com:2183#6
        post.ip4 post.rtime_secs     post.rtime_text
1 75.114.187.146      1371794661 2013-06-21 06:04:20
2  63.225.172.43      1371794671 2013-06-21 06:04:31
  post.time_secs      post.time_text post.time_date
1     1371794596 2013-06-21 06:03:16     2013-06-21
2     1371794660 2013-06-21 06:04:20     2013-06-21
  post.time_hour post.late_secs id.session
1              6             65    3625657
2              6             11  232169818
                                id.sub
1 2370b726-b96e-11e2-b3eb-1231380e1adf
2 55d514ba-3858-11e2-91a7-12313d08e01f
                        id.partner      id.service
1 0CB48A664E514CA48D378D152574EDBB BBTV (CBD46B77)
2 0CB48A664E514CA48D378D152574EDBB BBTV (CBD46B77)
  device.make device.model device.info device.serial
1        Roku        3050X  Roku;3050X  12C241003940
2        Roku        3100X  Roku;3100X  13C2AE061481
  device.version                          device.uuid
1        2.5.0.7 51ce2255-62ad-5778-b2d7-b9543c1476c6
2       2.5.0.37 7f5654d5-3aa7-5a5f-bb2b-8084da358942
  device.os device.os_version device.platform
1     Linux            2.6.35            Roku
2     Linux            2.6.35            Roku
  device.platform_vendor device.platform_version
1          Move Networks          1.0.0.20130329
2          Move Networks          1.0.0.20130615
  device.licenses  user.type ip.provider ip.postal_code
1              NA Subscriber   netacuity          48154
2              NA Subscriber   netacuity          98115
  ip.dma ip.dma_name ip.city    ip.country  ip.region
1    505     unknown livonia united states   michigan
2    819     unknown seattle united states washington
   ip.continent                           ip.isp ip.asn
1 north america        bright house networks llc      0
2 north america qwest communications company llc      0
  ip.asn_owner clip.id clip.pub asset.id asset.pub
1            ? 1338713 CBD46B77   524768  CBD46B77
2            ? 1339170 CBD46B77   522015  CBD46B77
  asset.length_ms                       asset.guid
1         1800000 b7c12c09dc5aec832e142a00b0f191fa
2         7200000 c6938cfa200a21e90dce41f5ed131cc2
        asset.title asset.type asset.adult
1 Diya Aur Bati Hum   captured       FALSE
2      Spark Top 20   captured       FALSE
              asset.franchise_guid asset.franchise_title
1 941496e1452b4fce9acfe7b3339924eb     Diya Aur Bati Hum
2                             <NA>                  <NA>
  container.id container.pub
1           14      CBD46B77
2          277      CBD46B77
                    container.guid container.title
1 3caa6afd715e4c57ac4750d29e449a9c           SPLUS
2 03e3a689e245457bba2f98c30ef931fa          BIGMGC
  usage.elapsed_ms usage.viewed_ms usage.stage
1             2312             392   mainVideo
2               NA              NA   mainVideo
  exp.idle_ms exp.stalls exp.stalled_ms
1         350          0              0
2        5772         NA             NA
  exp.frame_renders exp.frame_drops exp.ghost_session_ms
1                 0               0                    0
2                NA              NA                   NA
  exp.ghost_sessions exp.qmx_stale_ms exp.qmx_error_ms
1                  0                0                0
2                 NA               NA               NA
  exp.qss_late_ms exp.qss_error_ms exp.fom
1               0                0       0
2              NA               NA      NA
  exp.fom_weight data.dl_bytes data.ul_bytes http.oks
1              0           228           406        2
2             NA            NA            NA       NA
  http.errors http.timeouts net.throughput http.slows
1           0             0           8977          0
2          NA            NA             NA         NA
  data.bitrate_mean data.bitrate_stddev
1              1968                   0
2                NA                  NA
  data.bitrate_median data.bitrate_modes data.streamlets
1                1950               1950               1
2                  NA                 NA              NA
  data.late_streamlets data.all_streamlets
1                    0                   0
2                   NA                  NA
  data.bf_streamlets data.ab_streamlets
1                  0                  0
2                 NA                 NA