The aim here is to adapt this answer to return array instead of setof datatype.
CREATE FUNCTION split_csvline(
line text, -- the input CSV string
delim_char char(1) = ',', -- can be also E'\t', ';', '|', etc.
quote_char char(1) = '"' -- field quotation
) RETURNS text[] AS $f$
import csv
row = csv.reader(
[line],
quotechar=quote_char,
delimiter=delim_char,
skipinitialspace=True,
escapechar='\\'
)
next(row)
$f$ IMMUTABLE language PLpythonU;
SELECT split_csvline('a,b'); -- empty!
EDIT
Notes
It is a question about "using Python with PostgreSQL".
I am using PLpythonU because the staff use Python and because CSV is complex and need reliable (years of test) algorithm.
Not need a workaround, because a simple workaround is in use:
CREATE FUNCTION split_csv_line(
text, char(1) DEFAULT ',', char(1) DEFAULT '"'
) RETURNS text[] AS $f$
SELECT x FROM split_csv($1,$2,$3) x LIMIT 1;
$f$ language SQL IMMUTABLE;
The csv.reader
function returns a reader
object. next
is a reader
object method:
create or replace function split_csvline(
_line text,
_delim_char char(1) = ',',
_quote_char char(1) = '"'
) returns text[] as $f$
import csv
reader = csv.reader(
[_line],
quotechar = _quote_char,
delimiter = _delim_char,
skipinitialspace = True,
escapechar = '\\'
)
return reader.next()
$f$ immutable language plpythonu;
select split_csvline('a,b');
split_csvline
---------------
{a,b}
You could use PL/pgSQL:
CREATE OR REPLACE FUNCTION split_csvline(
line text, -- the input CSV string
delim_char char(1) = ',', -- can be also E'\t', ';', '|', etc.
quote_char char(1) = '"', -- field quotation
OUT result text[]
) LANGUAGE plpgsql AS
$$DECLARE
i integer;
t text := '';
c char(1);
/*
* 0 means unquoted
* 1 means quoted
* 2 means quoted, and we just read a quote
*/
q integer := 0;
BEGIN
/* loop through the characters */
FOR i IN 1..length(line) LOOP
/* get i-th character */
c := substring(line FROM i FOR 1);
/* end of string is at an unquoted delimiter */
IF c = delim_char AND q <> 1 THEN
result := result || t;
t := '';
q := 0;
ELSIF c = quote_char THEN
CASE q
WHEN 0 THEN
q := 1;
WHEN 1 THEN
IF c = quote_char THEN
q := 2;
ELSE
t := t || c;
END IF;
WHEN 2 THEN
q := 1;
t := t || quote_char;
END CASE;
ELSE
IF q = 2 THEN
q := 0;
END IF;
t := t || c;
END IF;
END LOOP;
/* add the last string */
result := result || t;
END;$$;