MaxMind's GeoLite2 is a wonderful database and is very useful if you want to map IP addresses to countries.
To do this efficiently, I want to import it into a MySQL database with a scheme like this:
I remember writing an import script for the CSV database long time ago, but the CSV as you can download it today has a very difficult to understand format, at least to me:
network,geoname_id,registered_country_geoname_id,represented_country_geoname_id,is_anonymous_proxy,is_satellite_provider
1.0.0.0/24,2077456,2077456,,0,0
1.0.1.0/24,1814991,1814991,,0,0
1.0.2.0/23,1814991,1814991,,0,0
1.0.4.0/22,2077456,2077456,,0,0
1.0.8.0/21,1814991,1814991,,0,0
1.0.16.0/20,1861060,1861060,,0,0
1.0.32.0/19,1814991,1814991,,0,0
1.0.64.0/18,1861060,1861060,,0,0
1.0.128.0/17,1605651,1605651,,0,0
I'm really stuck at the basics here. What is the most efficient and easiest way to import the database from its CSV representation into MySQL?
I'd suggest using MaxMind's GeoIP2 CSV Converter to put it in the format you want. There is an -include-integer-range
option that will provide from and to integer columns that I think you are looking for. Binaries are available for Windows, Linux (amd64), and OS X.
network,geoname_id,registered_country_geoname_id,represented_country_geoname_id,is_anonymous_proxy,is_satellite_provider
1.0.0.0/24,2077456,2077456,,0,0
1.0.1.0/24,1814991,1814991,,0,0
1.0.2.0/23,1814991,1814991,,0,0
1.0.4.0/22,2077456,2077456,,0,0
create table thing1
( network varchar(20) not null,
geoname_id varchar(20) not null,
registered_country_geoname_id varchar(20) not null,
represented_country_geoname_id varchar(20) not null,
is_anonymous_proxy varchar(20) not null,
is_satellite_provider varchar(20) not null
);
LOAD DATA INFILE 'c:\\dev\\ipaddr.txt'
INTO TABLE thing1
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
IGNORE 1 LINES
(@v1,@v2,@v3,@v4,@v5,@v6)
set network=ifnull(@v1,''),
geoname_id=ifnull(@v2,''),
registered_country_geoname_id=ifnull(@v3,''),
represented_country_geoname_id=ifnull(@v4,''),
is_anonymous_proxy=ifnull(@v5,''),
is_satellite_provider=ifnull(@v6,'');
The above came in fine for me.
Begin of Edits below
For the below, attempting to improve this answer after points made by stijn-de-witt in comments.
Note however that the referenced blog in the comments had an error in the update statement for going into ints. So for the time being until I sort that out, I found a varchar modification as seen below.
Edit1 (more to follow, see comments below answer):
Alter the table to get a "from ip to ip range"
alter table thing1 add column from_ip varchar(20), add column to_ip varchar(20);
-- note that those two are nullable at the moment. You can always change that later
Update table for the varchar's just added
update thing1
set from_ip= INET_NTOA(INET_ATON( SUBSTRING_INDEX(network, '/', 1))
& 0xffffffff ^ ((0x1 << ( 32 - SUBSTRING_INDEX(network, '/', -1)) ) -1 )),
to_ip= INET_NTOA(INET_ATON( SUBSTRING_INDEX(network, '/', 1))
| ((0x100000000 >> SUBSTRING_INDEX(network, '/', -1) ) -1 ))
select * from thing1;
(For the above update statement, Credit to Bernd Buffen in this answer )
Results of the above update statement:
mysql> select network,from_ip,to_ip from thing1;
+------------+---------+-----------+
| network | from_ip | to_ip |
+------------+---------+-----------+
| 1.0.1.0/24 | 1.0.1.0 | 1.0.1.255 |
| 1.0.2.0/23 | 1.0.2.0 | 1.0.3.255 |
| 1.0.4.0/22 | 1.0.4.0 | 1.0.7.255 |
+------------+---------+-----------+
From here, check out the MySQL Manual Page Miscellaneous Functions for INET_ATON(expr)
.
Edit2 (thanks to stijn-de-witt again):
alter table thing1 add column uint_from_ip int unsigned, add column uint_to_ip int unsigned;
UPDATE thing1 SET uint_from_ip = inet_aton(SUBSTRING(network, 1, LOCATE('/', network) - 1)),
uint_to_ip = (inet_aton(SUBSTRING(network, 1, LOCATE('/', network) - 1)) + (pow(2, (32-CONVERT(SUBSTRING(network, LOCATE('/', network) + 1), UNSIGNED INT)))-1));
Results:
select network,from_ip,to_ip,uint_from_ip,uint_to_ip from thing1;
+------------+---------+-----------+--------------+------------+
| network | from_ip | to_ip | uint_from_ip | uint_to_ip |
+------------+---------+-----------+--------------+------------+
| 1.0.1.0/24 | 1.0.1.0 | 1.0.1.255 | 16777472 | 16777727 |
| 1.0.2.0/23 | 1.0.2.0 | 1.0.3.255 | 16777728 | 16778239 |
| 1.0.4.0/22 | 1.0.4.0 | 1.0.7.255 | 16778240 | 16779263 |
+------------+---------+-----------+--------------+------------+
(the above credit to this S0BEIT blog after a few fixes mentioned)
It really doesn't seem possible to do this with a simple SQL script, so I've written one in C#. And since importing MySQL databases that are so big is not that simple, I implemented a direct INSERT INTO
into the script itself.
A table structure like the one on the sketch in the question is required for it to work.
using MySql.Data.MySqlClient;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
namespace GeoIPConvert
{
public static class Program
{
public static void Main(string[] args)
{
// https://dev.maxmind.com/geoip/geoip2/geolite2/
List<Country> countries = File.ReadAllLines("Countries.csv")
.Select(line => line.Split(','))
.Where(line => line[4] != "" && line[5] != "")
.Select((line, index) => new Country
{
ID = Convert.ToInt32(line[0]),
DatabaseID = index + 1,
Flag = line[4].ToLower(),
Name = line[5].Replace("\"", "")
})
.ToList();
List<IPRange> ipRanges = File.ReadAllLines("GeoIP.csv")
.Select(line => line.Split(','))
.Where(line => line[2] != "")
.Select(line => new IPRange
{
Country = countries.First(country => country.ID == Convert.ToInt32(line[2])),
From = ConvertCidrToRange(line[0]).Item1,
To = ConvertCidrToRange(line[0]).Item2,
})
.ToList();
//string sql =
// "INSERT INTO geoip_countries(Flag, Name) VALUES\r\n" +
// string.Join(",\r\n", countries.Select(country => "(\"" + country.Flag + "\", \"" + country.Name + "\")").ToArray()) + "\r\n" +
// "INSERT INTO geoip_ipranges(CountryID, `From`, `To`) VALUES\r\n" +
// string.Join(",\r\n", ipRanges.Select(iprange => "(\"" + iprange.Country.DatabaseID + "\", \"" + iprange.From + "\", \"" + iprange.To + "\")").ToArray());
//File.WriteAllText("Import.sql", sql);
using (MySqlConnection sql = new MySqlConnection("Server=localhost;Database=test_db;Uid=root;"))
{
sql.Open();
foreach (Country country in countries)
{
new MySqlCommand("INSERT INTO geoip_countries(Flag, Name) VALUES(\"" + country.Flag + "\", \"" + country.Name + "\")", sql).ExecuteNonQuery();
}
foreach (IPRange ipRange in ipRanges)
{
new MySqlCommand("INSERT INTO geoip_ipranges(CountryID, `From`, `To`) VALUES(\"" + ipRange.Country.DatabaseID + "\", \"" + ipRange.From + "\", \"" + ipRange.To + "\")", sql).ExecuteNonQuery();
Console.WriteLine(ipRange.To);
}
sql.Close();
}
}
private static Tuple<uint, uint> ConvertCidrToRange(string cidr)
{
string[] parts = cidr.Split('.', '/');
uint ipnum = Convert.ToUInt32(parts[0]) << 24 | Convert.ToUInt32(parts[1]) << 16 | Convert.ToUInt32(parts[2]) << 8 | Convert.ToUInt32(parts[3]);
uint mask = uint.MaxValue << (32 - Convert.ToInt32(parts[4]));
return Tuple.Create(ipnum & mask, ipnum | (mask ^ uint.MaxValue));
}
}
public class Country
{
public int ID { get; set; }
public int DatabaseID { get; set; }
public string Flag { get; set; }
public string Name { get; set; }
}
public class IPRange
{
public Country Country { get; set; }
public uint From { get; set; }
public uint To { get; set; }
}
}