-->

Importing a CSV file into a sqlite3 database table

2018-12-31 19:18发布

问题:

I have a CSV file and I want to bulk-import this file into my sqlite3 database using Python. the command is \".import .....\". but it seems that it cannot work like this. Can anyone give me an example of how to do it in sqlite3? I am using windows just in case. Thanks

回答1:

import csv, sqlite3

con = sqlite3.connect(\":memory:\")
cur = con.cursor()
cur.execute(\"CREATE TABLE t (col1, col2);\") # use your column names here

with open(\'data.csv\',\'rb\') as fin: # `with` statement available in 2.5+
    # csv.DictReader uses first line in file for column headings by default
    dr = csv.DictReader(fin) # comma is default delimiter
    to_db = [(i[\'col1\'], i[\'col2\']) for i in dr]

cur.executemany(\"INSERT INTO t (col1, col2) VALUES (?, ?);\", to_db)
con.commit()
con.close()


回答2:

Creating an sqlite connection to a file on disk is left as an exercise for the reader ... but there is now a two-liner made possible by the pandas library

df = pandas.read_csv(csvfile)
df.to_sql(table_name, conn, if_exists=\'append\', index=False)


回答3:

The .import command is a feature of the sqlite3 command-line tool. To do it in Python, you should simply load the data using whatever facilities Python has, such as the csv module, and inserting the data as per usual.

This way, you also have control over what types are inserted, rather than relying on sqlite3\'s seemingly undocumented behaviour.



回答4:

My 2 cents (more generic):

import csv, sqlite3
import logging

def _get_col_datatypes(fin):
    dr = csv.DictReader(fin) # comma is default delimiter
    fieldTypes = {}
    for entry in dr:
        feildslLeft = [f for f in dr.fieldnames if f not in fieldTypes.keys()]
        if not feildslLeft: break # We\'re done
        for field in feildslLeft:
            data = entry[field]

            # Need data to decide
            if len(data) == 0:
                continue

            if data.isdigit():
                fieldTypes[field] = \"INTEGER\"
            else:
                fieldTypes[field] = \"TEXT\"
        # TODO: Currently there\'s no support for DATE in sqllite

    if len(feildslLeft) > 0:
        raise Exception(\"Failed to find all the columns data types - Maybe some are empty?\")

    return fieldTypes


def escapingGenerator(f):
    for line in f:
        yield line.encode(\"ascii\", \"xmlcharrefreplace\").decode(\"ascii\")


def csvToDb(csvFile, outputToFile = False):
    # TODO: implement output to file

    with open(csvFile,mode=\'r\', encoding=\"ISO-8859-1\") as fin:
        dt = _get_col_datatypes(fin)

        fin.seek(0)

        reader = csv.DictReader(fin)

        # Keep the order of the columns name just as in the CSV
        fields = reader.fieldnames
        cols = []

        # Set field and type
        for f in fields:
            cols.append(\"%s %s\" % (f, dt[f]))

        # Generate create table statement:
        stmt = \"CREATE TABLE ads (%s)\" % \",\".join(cols)

        con = sqlite3.connect(\":memory:\")
        cur = con.cursor()
        cur.execute(stmt)

        fin.seek(0)


        reader = csv.reader(escapingGenerator(fin))

        # Generate insert statement:
        stmt = \"INSERT INTO ads VALUES(%s);\" % \',\'.join(\'?\' * len(cols))

        cur.executemany(stmt, reader)
        con.commit()

    return con


回答5:

Many thanks for bernie\'s answer! Had to tweak it a bit - here\'s what worked for me:

import csv, sqlite3
conn = sqlite3.connect(\"pcfc.sl3\")
curs = conn.cursor()
curs.execute(\"CREATE TABLE PCFC (id INTEGER PRIMARY KEY, type INTEGER, term TEXT, definition TEXT);\")
reader = csv.reader(open(\'PC.txt\', \'r\'), delimiter=\'|\')
for row in reader:
    to_db = [unicode(row[0], \"utf8\"), unicode(row[1], \"utf8\"), unicode(row[2], \"utf8\")]
    curs.execute(\"INSERT INTO PCFC (type, term, definition) VALUES (?, ?, ?);\", to_db)
conn.commit()

My text file (PC.txt) looks like this:

1 | Term 1 | Definition 1
2 | Term 2 | Definition 2
3 | Term 3 | Definition 3


回答6:

#!/usr/bin/python
# -*- coding: utf-8 -*-

import sys, csv, sqlite3

def main():
    con = sqlite3.connect(sys.argv[1]) # database file input
    cur = con.cursor()
    cur.executescript(\"\"\"
        DROP TABLE IF EXISTS t;
        CREATE TABLE t (COL1 TEXT, COL2 TEXT);
        \"\"\") # checks to see if table exists and makes a fresh table.

    with open(sys.argv[2], \"rb\") as f: # CSV file input
        reader = csv.reader(f, delimiter=\',\') # no header information with delimiter
        for row in reader:
            to_db = [unicode(row[0], \"utf8\"), unicode(row[1], \"utf8\")] # Appends data from CSV file representing and handling of text
            cur.execute(\"INSERT INTO neto (COL1, COL2) VALUES(?, ?);\", to_db)
            con.commit()
    con.close() # closes connection to database

if __name__==\'__main__\':
    main()


回答7:

You can do this using blaze & odo efficiently

import blaze
csv_path = \'data.csv\'
bz.odo(csv_path, \'sqlite:///data.db::data\')

Odo will store the csv file to data.db (sqlite database) under the schema data

Or you use odo directly, without blaze. Either ways is fine. Read this documentation



回答8:

Based on Guy L solution (Love it) but can handle escaped fields.

import csv, sqlite3

def _get_col_datatypes(fin):
    dr = csv.DictReader(fin) # comma is default delimiter
    fieldTypes = {}
    for entry in dr:
        feildslLeft = [f for f in dr.fieldnames if f not in fieldTypes.keys()]        
        if not feildslLeft: break # We\'re done
        for field in feildslLeft:
            data = entry[field]

            # Need data to decide
            if len(data) == 0:
                continue

            if data.isdigit():
                fieldTypes[field] = \"INTEGER\"
            else:
                fieldTypes[field] = \"TEXT\"
        # TODO: Currently there\'s no support for DATE in sqllite

    if len(feildslLeft) > 0:
        raise Exception(\"Failed to find all the columns data types - Maybe some are empty?\")

    return fieldTypes


def escapingGenerator(f):
    for line in f:
        yield line.encode(\"ascii\", \"xmlcharrefreplace\").decode(\"ascii\")


def csvToDb(csvFile,dbFile,tablename, outputToFile = False):

    # TODO: implement output to file

    with open(csvFile,mode=\'r\', encoding=\"ISO-8859-1\") as fin:
        dt = _get_col_datatypes(fin)

        fin.seek(0)

        reader = csv.DictReader(fin)

        # Keep the order of the columns name just as in the CSV
        fields = reader.fieldnames
        cols = []

        # Set field and type
        for f in fields:
            cols.append(\"\\\"%s\\\" %s\" % (f, dt[f]))

        # Generate create table statement:
        stmt = \"create table if not exists \\\"\" + tablename + \"\\\" (%s)\" % \",\".join(cols)
        print(stmt)
        con = sqlite3.connect(dbFile)
        cur = con.cursor()
        cur.execute(stmt)

        fin.seek(0)


        reader = csv.reader(escapingGenerator(fin))

        # Generate insert statement:
        stmt = \"INSERT INTO \\\"\" + tablename + \"\\\" VALUES(%s);\" % \',\'.join(\'?\' * len(cols))

        cur.executemany(stmt, reader)
        con.commit()
        con.close()


回答9:

import csv, sqlite3

def _get_col_datatypes(fin):
    dr = csv.DictReader(fin) # comma is default delimiter
    fieldTypes = {}
    for entry in dr:
        feildslLeft = [f for f in dr.fieldnames if f not in fieldTypes.keys()]        
        if not feildslLeft: break # We\'re done
        for field in feildslLeft:
            data = entry[field]

        # Need data to decide
        if len(data) == 0:
            continue

        if data.isdigit():
            fieldTypes[field] = \"INTEGER\"
        else:
            fieldTypes[field] = \"TEXT\"
    # TODO: Currently there\'s no support for DATE in sqllite

if len(feildslLeft) > 0:
    raise Exception(\"Failed to find all the columns data types - Maybe some are empty?\")

return fieldTypes


def escapingGenerator(f):
    for line in f:
        yield line.encode(\"ascii\", \"xmlcharrefreplace\").decode(\"ascii\")


def csvToDb(csvFile,dbFile,tablename, outputToFile = False):

    # TODO: implement output to file

    with open(csvFile,mode=\'r\', encoding=\"ISO-8859-1\") as fin:
        dt = _get_col_datatypes(fin)

        fin.seek(0)

        reader = csv.DictReader(fin)

        # Keep the order of the columns name just as in the CSV
        fields = reader.fieldnames
        cols = []

        # Set field and type
        for f in fields:
            cols.append(\"\\\"%s\\\" %s\" % (f, dt[f]))

        # Generate create table statement:
        stmt = \"create table if not exists \\\"\" + tablename + \"\\\" (%s)\" % \",\".join(cols)
        print(stmt)
        con = sqlite3.connect(dbFile)
        cur = con.cursor()
        cur.execute(stmt)

        fin.seek(0)


        reader = csv.reader(escapingGenerator(fin))

        # Generate insert statement:
        stmt = \"INSERT INTO \\\"\" + tablename + \"\\\" VALUES(%s);\" % \',\'.join(\'?\' * len(cols))

        cur.executemany(stmt, reader)
        con.commit()
        con.close()