Obtener datos json importados en un dataframe

Tengo un archivo que contiene más de 1500 objetos json con los que quiero trabajar en R. Pude importar los datos como una lista, pero tengo problemas para forzarlos a una estructura útil. Quiero crear un dataframe que contenga una fila para cada objeto json y una columna para cada par clave: valor.

He recreado mi situación con este conjunto de datos pequeño y falso:

[{"name":"Doe, John","group":"Red","age (y)":24,"height (cm)":182,"wieght (kg)":74.8,"score":null}, {"name":"Doe, Jane","group":"Green","age (y)":30,"height (cm)":170,"wieght (kg)":70.1,"score":500}, {"name":"Smith, Joan","group":"Yellow","age (y)":41,"height (cm)":169,"wieght (kg)":60,"score":null}, {"name":"Brown, Sam","group":"Green","age (y)":22,"height (cm)":183,"wieght (kg)":75,"score":865}, {"name":"Jones, Larry","group":"Green","age (y)":31,"height (cm)":178,"wieght (kg)":83.9,"score":221}, {"name":"Murray, Seth","group":"Red","age (y)":35,"height (cm)":172,"wieght (kg)":76.2,"score":413}, {"name":"Doe, Jane","group":"Yellow","age (y)":22,"height (cm)":164,"wieght (kg)":68,"score":902}] 

Algunas características de los datos:

  • Todos los objetos contienen el mismo número de pares clave: valor, aunque algunos de los valores son nulos
  • Hay dos columnas no numéricas por objeto (nombre y grupo)
  • nombre es el identificador único, hay 10 o más grupos
  • muchos de los nombres y grupos incluyen espacios, comas y otros signos de puntuación.

Basado en esta pregunta: R list (structure (list ())) al frame de datos , intenté lo siguiente:

 json_file <- "test.json" json_data <- fromJSON(json_file) asFrame <- do.call("rbind.fill", lapply(json_data, as.data.frame)) 

Con mis datos reales y esta información falsa, la última línea me da este error:

 Error in data.frame(name = "Doe, John", group = "Red", `age (y)` = 24, : arguments imply differing number of rows: 1, 0 

Solo necesitas reemplazar tus NULLs con NAs:

 require(RJSONIO) json_file <- '[{"name":"Doe, John","group":"Red","age (y)":24,"height (cm)":182,"wieght (kg)":74.8,"score":null}, {"name":"Doe, Jane","group":"Green","age (y)":30,"height (cm)":170,"wieght (kg)":70.1,"score":500}, {"name":"Smith, Joan","group":"Yellow","age (y)":41,"height (cm)":169,"wieght (kg)":60,"score":null}, {"name":"Brown, Sam","group":"Green","age (y)":22,"height (cm)":183,"wieght (kg)":75,"score":865}, {"name":"Jones, Larry","group":"Green","age (y)":31,"height (cm)":178,"wieght (kg)":83.9,"score":221}, {"name":"Murray, Seth","group":"Red","age (y)":35,"height (cm)":172,"wieght (kg)":76.2,"score":413}, {"name":"Doe, Jane","group":"Yellow","age (y)":22,"height (cm)":164,"wieght (kg)":68,"score":902}]' json_file <- fromJSON(json_file) json_file <- lapply(json_file, function(x) { x[sapply(x, is.null)] <- NA unlist(x) }) 

Una vez que tiene un valor no nulo para cada elemento, puede llamar a rbind sin obtener un error:

 do.call("rbind", json_file) name group age (y) height (cm) wieght (kg) score [1,] "Doe, John" "Red" "24" "182" "74.8" NA [2,] "Doe, Jane" "Green" "30" "170" "70.1" "500" [3,] "Smith, Joan" "Yellow" "41" "169" "60" NA [4,] "Brown, Sam" "Green" "22" "183" "75" "865" [5,] "Jones, Larry" "Green" "31" "178" "83.9" "221" [6,] "Murray, Seth" "Red" "35" "172" "76.2" "413" [7,] "Doe, Jane" "Yellow" "22" "164" "68" "902" 

Esto es muy simple si usa library(jsonlite) y la función de fromJSON . También maneja los valores null y los convierte a NA .

 json_file <- '[{"name":"Doe, John","group":"Red","age (y)":24,"height (cm)":182,"wieght (kg)":74.8,"score":null}, {"name":"Doe, Jane","group":"Green","age (y)":30,"height (cm)":170,"wieght (kg)":70.1,"score":500}, {"name":"Smith, Joan","group":"Yellow","age (y)":41,"height (cm)":169,"wieght (kg)":60,"score":null}, {"name":"Brown, Sam","group":"Green","age (y)":22,"height (cm)":183,"wieght (kg)":75,"score":865}, {"name":"Jones, Larry","group":"Green","age (y)":31,"height (cm)":178,"wieght (kg)":83.9,"score":221}, {"name":"Murray, Seth","group":"Red","age (y)":35,"height (cm)":172,"wieght (kg)":76.2,"score":413}, {"name":"Doe, Jane","group":"Yellow","age (y)":22,"height (cm)":164,"wieght (kg)":68,"score":902}]' library(jsonlite) fromJSON(json_file) # name group age (y) height (cm) wieght (kg) score # 1 Doe, John Red 24 182 74.8 NA # 2 Doe, Jane Green 30 170 70.1 500 # 3 Smith, Joan Yellow 41 169 60.0 NA # 4 Brown, Sam Green 22 183 75.0 865 # 5 Jones, Larry Green 31 178 83.9 221 # 6 Murray, Seth Red 35 172 76.2 413 # 7 Doe, Jane Yellow 22 164 68.0 902 str(fromJSON(json_file)) # 'data.frame': 7 obs. of 6 variables: # $ name : chr "Doe, John" "Doe, Jane" "Smith, Joan" "Brown, Sam" ... # $ group : chr "Red" "Green" "Yellow" "Green" ... # $ age (y) : int 24 30 41 22 31 35 22 # $ height (cm): int 182 170 169 183 178 172 164 # $ wieght (kg): num 74.8 70.1 60 75 83.9 76.2 68 # $ score : int NA 500 NA 865 221 413 902 
 library(rjson) Lines <- readLines("yelp_academic_dataset_business.json") business <- as.data.frame(t(sapply(Lines, fromJSON))) 

Puede intentar esto para cargar datos JSON en R

 dplyr::bind_rows(fromJSON(file_name)) 

Para eliminar valores nulos, use el parámetro nullValue

 json_data <- fromJSON(json_file, nullValue = NA) asFrame <- do.call("rbind.fill", lapply(json_data, as.data.frame)) 

de esta manera no habrá citas innecesarias en su salida