Netezza SQL convert VARCHAR to binary string

2019-04-29 20:06发布

问题:

I have a bitmap stored as a VARCHAR in Netteza. Need to convert that VARCHAR to a binary string in Netezza.

Input (Netezza col value - VARCHAR ) = '0xFFFFFFFFFFFFFFFF'

Desired output (VARCHAR)->

'1111111111111111111111111111111111111111111111111111111111111111'

Is there a way to do this using Netezza query ?

I tried

SELECT CAST('0xFFFFFFFFFFFFFFFF' AS VARBINARY(64) ); 

but that throws an error

ERROR [HY000]ERROR: Cannot cast type 'VARCHAR' to 'VARBINARY'

回答1:

You can convert a hex string into binary data and store it in either a VARCHAR or VARBINARY column. I tend to prefer VARCHAR because of the rather limited CASTs that are available for VARBINARY.

To convert a hex string to binary and stored it in a VARCHAR, use the hextoraw function provided with the SQL Extension Toolkit. This is included with Netezza but must be configured and made available by your administrator.

To convert a hex string to binary and store it in a VARBINARY, use the hex_to_binary function included with Netezza (added in v 7.2).

drop table test_table if exists;
DROP TABLE
create table test_table (col1 varchar(50), col2 varbinary(50));
CREATE TABLE

insert into test_table values (hextoraw('464F4F'), hex_to_binary('464F4F'));
INSERT 0 1

select * from test_table;
 COL1 |   COL2
------+-----------
 FOO  | X'464F4F'
(1 row)

From there you'll need a UDF to handle the bit calculations that you want to do. I've put together three simple UDFs that I believe will suit your purpose.

FirstBit returns the position of the first non-zero bit. BitCount returns the total count of non-zero bits. CharToBase2 converts a binary values in a VARCHAR of 1s and 0s.

I think the first two get the end result that you need without the third, but in case you still wanted that, it's here.

select firstbit(hextoraw('0000')), bitcount(hextoraw('0000')), chartobase2(hextoraw('0000'));
FIRSTBIT | BITCOUNT |   CHARTOBASE2
----------+----------+------------------
       -1 |        0 | 0000000000000000
(1 row)

select firstbit(hextoraw('0001')), bitcount(hextoraw('0001')), chartobase2(hextoraw('0001'));
 FIRSTBIT | BITCOUNT |   CHARTOBASE2
----------+----------+------------------
       15 |        1 | 0000000000000001
(1 row)

select firstbit(hextoraw('FFFF')), bitcount(hextoraw('FFFF')), chartobase2(hextoraw('FFFF'));
 FIRSTBIT | BITCOUNT |   CHARTOBASE2
----------+----------+------------------
        0 |       16 | 1111111111111111
(1 row)

Here are the sources for each. Please note that I am a terrible C++ coder, and would likely be fired if that were my job, so caveat emptor.

BitCount.cpp

#include "udxinc.h"
#include <string.h>

using namespace nz::udx;

class BitCount : public Udf
{
    public:
        static Udf* instantiate();

    ReturnValue evaluate()
    {
        StringArg* str = stringArg(0);
        int32 retval = 0;


        for(int i=0; i< str->length; i++)
                {

                for (int y=7; y>=0 ; y--)
                        {

                        if ((str->data[i] & (unsigned char)pow(2,y)) > 0)
                                {
                                        retval++;

                                }

                        }
                }

        NZ_UDX_RETURN_INT32(retval);
    }

};

Udf* BitCount::instantiate()
{
    return new BitCount;
}

FirstBit.cpp

#include "udxinc.h"
#include <string.h>

using namespace nz::udx;

class FirstBit : public Udf
{
    public:
        static Udf* instantiate();

    ReturnValue evaluate()
    {
        StringArg* str = stringArg(0);
        int32 retval = -1;


        for(int i=0; i< str->length; i++) {

                for (int y=7; y>=0 ; y--) {

                        if ((str->data[i] & (unsigned char)pow(2,y)) > 0)
                                {
                                        retval = i*8 + 7 - y;

                                }

                        if (retval > -1) break;
                }
                if (retval > -1)         break;
                }

        NZ_UDX_RETURN_INT32(retval);
    }

};

Udf* FirstBit::instantiate()
{
    return new FirstBit;
}

CharToBase2.cpp

#include "udxinc.h"
#include <string.h>

using namespace nz::udx;

class CharToBase2 : public Udf
{
    public:
        static Udf* instantiate();

    ReturnValue evaluate()
    {
        StringArg* str = stringArg(0);
        StringReturn* result = stringReturnInfo();
        result->size = str->length*8;
        //unsigned char stringbyte = 0 ;


        for(int i=0; i< str->length; i++)
                {


                  for (int y=7; y>=0 ; y-- )
                        {

                         if ((str->data[i] & (unsigned char)pow(2,y)) == 0) {
                                        result->data[i*8 + 7 - y] = '0'; }
                                else {
                                        result->data[i*8 + 7 - y] = '1';        }
                        }

                }

        NZ_UDX_RETURN_STRING(result);
    }

    uint64 calculateSize() const
    {
        return sizerStringSizeValue(sizerStringArgSize(0)*8);
    }
};

Udf* CharToBase2::instantiate()
{
    return new CharToBase2;
}

Finally, here are the scripts I used to compile and install each.

install_firstbit.sh DBNAME

DB=$1
if [[ -z $DB ]]; then
 DB=$NZ_DATABASE
fi

if [[ -z $DB ]]; then
 print "Usage: install <database>"
 return 1
fi

export NZ_DATABASE="${DB}"

nzudxcompile FirstBit.cpp \
 --fenced \
 --sig   "FirstBit(varchar(any))" \
 --return  "integer" \
 --class  "FirstBit"

rm FirstBit.o_*

install_bitcount.sh DBNAME

DB=$1
if [[ -z $DB ]]; then
 DB=$NZ_DATABASE
fi

if [[ -z $DB ]]; then
 print "Usage: install <database>"
 return 1
fi

export NZ_DATABASE="${DB}"

nzudxcompile BitCount.cpp \
 --fenced \
 --sig   "BitCount(varchar(any))" \
 --return  "integer" \
 --class  "BitCount"

rm BitCount.o_*

install_chartobase2.sh DBNAME

DB=$1
if [[ -z $DB ]]; then
 DB=$NZ_DATABASE
fi

if [[ -z $DB ]]; then
 print "Usage: install <database>"
 return 1
fi

export NZ_DATABASE="${DB}"

nzudxcompile CharToBase2.cpp \
 --fenced \
 --sig   "CharToBase2(varchar(any))" \
 --return  "varchar(any)" \
 --class  "CharToBase2"

rm CharToBase2.o_*


回答2:

I think you'll need to define a UDF in C, register it with the database, and then call it on your column.

I'd start by looking at either this answer or this one. In both of those cases you'd likely have to strip the leading 0x.



回答3:

following suggestions from @ScottMcG - constructing a UDF to convert hex string to binary string.

input -> 'F0F' desired output -> '11110000111'

----------HexToBin.cpp------

#include "udxinc.h"
#include <string.h>

using namespace nz::udx;

class HexToBin : public Udf
{
    public:
        static Udf* instantiate();

    ReturnValue evaluate()
    {
        StringArg* str = stringArg(0);
        StringReturn* result = stringReturnInfo();
        result->size = str->length*4; // binary representation
        string quads[16] = {"0000", "0001", "0010", "0011", "0100", "0101",
                     "0110", "0111", "1000", "1001", "1010", "1011",
                     "1100", "1101", "1110", "1111"};


        // iterate through the string characters 
        for(int i = 0, len = str->length; i < len; i++)
        {
            char c = str->data[i];
            // concatenate quad to result->data based on character c
            if (c >= '0' && c <= '9') strcat(result->data,  quads[c - '0'].c_str());
            if (c >= 'A' && c <= 'F') strcat(result->data,  quads[10 + c - 'A'].c_str());
            if (c >= 'a' && c <= 'f') strcat(result->data,  quads[10 + c - 'a'].c_str());

        }        

        NZ_UDX_RETURN_STRING(result);
    }

    uint64 calculateSize() const
    {
        return sizerStringSizeValue(sizerStringArgSize(0)*4);
    }
};

Udf* HexToBin::instantiate()
{
    return new HexToBin;
}

--------install_hextobin.sh DBNAME-------

DB=$1
if [[ -z $DB ]]; then
 DB=$NZ_DATABASE
fi

if [[ -z $DB ]]; then
 print "Usage: install <database>"
 return 1
fi

export NZ_DATABASE="${DB}"

nzudxcompile HexToBin.cpp \
 --fenced \
 --sig   "HexToBin(varchar(any))" \
 --return  "varchar(any)" \
 --class  "HexToBin"

rm HexToBin.o_*

Sample results

select hextobin('F0F');
   HEXTOBIN
--------------
 111100001111
(1 row)

select hextobin('00F');
   HEXTOBIN
--------------
 000000001111
(1 row)

select hextobin('F00');
   HEXTOBIN
--------------
 111100000000
(1 row)


标签: sql netezza