Find nearest cities from the data frame to the spe

2019-07-18 15:54发布

Below dataframe contains the information about the lat, long , state and city. I want to find the three nearest cities for every city given in the dataframe. For example, from the below dataframe, Oklahoma city and Colarado SPringd nearest to Albuquerque, So three nearest city to Albuquerque should be saved in other dataframe named nearest_AL(I don't know how to get this result, that'y I tried to gave an idea by creating a data frame).

dataframe<-data.frame(long=c("-106.61291","-81.97224","-84.42770","-72.68604","-97.60056","-104.70261"),
  lat=c("35.04333","33.37378","33.64073","41.93887","35.39305","38.80171"),
  state=c("NM","GA","GA","TX","OK","CO"),
  city=c("Albuquerque","Augusta","Atlanta","Windsor Locks","Oklahoma City","Colarado Springs")
)

nearest_Al<-data.frame(long=c("-97.60056","-104.70261"),
                      lat=c("35.39305","38.80171"),
                      state=c("OK","CO"),
                      city=c("Oklahoma City","Colarado Springs")
)

This same thing I have to perform on the dataframe which contains rows 500k and around 100 locations.

Thanks in advance!

标签: r spatial
3条回答
对你真心纯属浪费
2楼-- · 2019-07-18 16:45

This might be a little slow with all your data but it does the trick

dataframe<-data.frame(long=as.numeric(c("-106.61291","-81.97224","-84.42770","-72.68604","-97.60056","-104.70261")),
                  lat=as.numeric(c("35.04333","33.37378","33.64073","41.93887","35.39305","38.80171")),
                  state=c("NM","GA","GA","TX","OK","CO"),
                  city=c("Albuquerque","Augusta","Atlanta","Windsor Locks","Oklahoma City","Colarado Springs"))

library(sp)
library(rgeos)


coordinates(dataframe) <- ~long+lat
dist_cities <- gDistance(dataframe, byid=T)

dist_cities_rank<-data.frame()
for(i in seq(1,dim(dist_cities)[1])){
   dist_cities_rank<-rbind(dist_cities_rank,rank(as.numeric(dist_cities[i,])))
}

three_close_cities<-list()
for(i in seq(1,dim(dataframe)[1])){

   three_close_cities[[i]]<-
   list(test_city=dataframe[i,],cbind(dataframe[which(dist_cities_rank[i,]<=4&dist_cities_rank[i,]!=1),],
                                                          dist_cities[i,which(dist_cities_rank[i,]<=4&dist_cities_rank[i,]!=1)]))
}
查看更多
看我几分像从前
3楼-- · 2019-07-18 16:46

The following should work for you

I made a distance function that accepts x (longitude of current row in dataframe), y (latitude of current row in dataframe), and dataframe. It returns the top 2 nearest cities (excluding the target city)

 dist <- function(xi, yi, z) {
              z <- z %>% 
                     mutate(dist = sqrt((as.double(as.character(z$long)) - as.double(as.character(xi)))^2 + (as.double(as.character(z$lat)) - as.double(as.character(yi)))^2)) %>%
                     arrange(dist) %>%            # distance
                     slice(2:3)                   # top 2 nearest cities

              return(z)
         }

tidyverse solution

 library(tidyverse)
 mod <- dataframe %>%
          mutate(copylong = long, copylat = lat) %>%     # make copy of longitude and latitude to nest
          nest(copylong, copylat) %>%                    # nest copy
          mutate(data = map(data, ~ dist(.x$copylong, .x$copylat, dataframe)))

To save only the nearest cities as a separate data frame

 desired <- map_df(1:nrow(mod), ~ mod$data[.x][[1]])

Output

         long      lat  state             city      dist
 1 -104.70261 38.80171     CO Colarado Springs  4.216001
 2  -97.60056 35.39305     OK    Oklahoma City  9.019133
 3  -84.42770 33.64073     GA          Atlanta  2.469928
 4  -72.68604 41.93887     TX    Windsor Locks 12.633063
 5  -81.97224 33.37378     GA          Augusta  2.469928
 6  -97.60056 35.39305     OK    Oklahoma City 13.288900
 # etc

Extra

If you want to keep the original database and the nearest cities

 mod <- dataframe %>%
          mutate(copylong = long, copylat = lat) %>%     # make copy of longitude and latitude to nest
          nest(copylong, copylat) %>%                    # nest copy
          mutate(data = map(data, ~ dist(.x$copylong, .x$copylat, dataframe))) %>%
          unnest(data)
Extra output
         long      lat  state             city      long1     lat1 state1            city1      dist
 1 -106.61291 35.04333     NM      Albuquerque -104.70261 38.80171     CO Colarado Springs  4.216001
 2 -106.61291 35.04333     NM      Albuquerque  -97.60056 35.39305     OK    Oklahoma City  9.019133
 3  -81.97224 33.37378     GA          Augusta  -84.42770 33.64073     GA          Atlanta  2.469928
 4  -81.97224 33.37378     GA          Augusta  -72.68604 41.93887     TX    Windsor Locks 12.633063

Split into named list

 L <- split(mod, mod$city)
 names(L) <- dataframe$city
查看更多
劫难
4楼-- · 2019-07-18 16:51

Here is one idea. dataframe2 is the final output. The Near_City column shows the top three closest cities for each city in the city column.

library(dplyr)
library(sp)
library(rgdal)
library(sf)

# Create example data frame
dataframe<-data.frame(long=c("-106.61291","-81.97224","-84.42770","-72.68604","-97.60056","-104.70261"),
                      lat=c("35.04333","33.37378","33.64073","41.93887","35.39305","38.80171"),
                      state=c("NM","GA","GA","TX","OK","CO"),
                      city=c("Albuquerque","Augusta","Atlanta","Windsor Locks","Oklahoma City","Colarado Springs"),
                      stringsAsFactors = FALSE
)

# Create spatial point data frame object
dataframe_sp <- dataframe %>%
  mutate(long = as.numeric(long), lat = as.numeric(lat))
coordinates(dataframe_sp) <- ~long + lat

# Convert to sf object
dataframe_sf <- st_as_sf(dataframe_sp)

# Set projection
st_crs(dataframe_sf) <- 4326

# Calculate the distance
dist_m <- st_distance(dataframe_sf, dataframe_sf)

# Select the closet three cities
# Remove the first row, and then select the first three rows
index <- apply(dist_m, 1, order)
index <- index[2:nrow(index), ]
index <- index[1:3, ]

# Rep each city by three
dataframe2 <- dataframe[rep(1:nrow(dataframe), each = 3), ]

# Process the dataframe based on index, store the results in Near_City column
dataframe2$Near_City <- dataframe[as.vector(index), ]$city

Update

We can further create the output the OP wants.

dataframe3 <- dataframe[as.vector(index), ]
dataframe3$TargetCity <- dataframe2$city

nearest_city_list <- split(dataframe3, f = dataframe3$TargetCity)

Now each "Target City" is an element on the list nearest_city_list. To Access the data, we can access the list element using the target city name. Here is an example pulling out the results of Albuquerque:

nearest_city_list[["Albuquerque"]]
        long      lat state             city  TargetCity
6 -104.70261 38.80171    CO Colarado Springs Albuquerque
5  -97.60056 35.39305    OK    Oklahoma City Albuquerque
3  -84.42770 33.64073    GA          Atlanta Albuquerque
查看更多
登录 后发表回答