all
Does anybody know details about QOMX_COLOR_FormatYUV420PackedSemiPlanar64x32Tile2m8ka format, it's the output format of qcom 7x30 h/w decoder, how data is stored in such color format? thanks
all
Does anybody know details about QOMX_COLOR_FormatYUV420PackedSemiPlanar64x32Tile2m8ka format, it's the output format of qcom 7x30 h/w decoder, how data is stored in such color format? thanks
This is my research on this, about QOMX_COLOR_FormatYUV420PackedSemiPlanar64x32Tile2m8ka converting to YUV420Planar (I420). As far as QOMX_COLOR_FormatYUV420PackedSemiPlanar64x32Tile2m8ka
format is concerned, you can refer to ($your_android_native_sdk_dir)/WORKING_DIRECTORY/hardware/qcom/media/mm-core/inc/QOMX_IVCommonExtensions.h
source code. This function might idealy support generic resolution, however I only test on CIF size because unavailable given source input. Sure you can fake other size data as given input. Hope it is helpful if you are working on this.
I have saw the color is correct per my CIF test. The code is so long about 390 lines, so heavy. Including steps: construct yTileToMb, uvTileToMb, then converting y and u/v as followed.
///////////////////////////////////////////////////////////////////////
/**
* Enumeration defining the extended uncompressed image/video
* formats.
*
* ENUMS:
* YVU420PackedSemiPlanar : Buffer containing all Y, and then V and U
* interleaved.
* YVU420PackedSemiPlanar32m4ka : YUV planar format, similar to the
* YVU420PackedSemiPlanar format, but with the
* following restrictions:
*
* 1. The width and height of both plane must
* be a multiple of 32 texels.
*
* 2. The base address of both planes must be
* aligned to a 4kB boundary.
*
* YUV420PackedSemiPlanar16m2ka : YUV planar format, similar to the
* YUV420PackedSemiPlanar format, but with the
* following restrictions:
*
* 1. The width of the luma plane must be a
* multiple of 16 pixels.
*
* 2. The address of both planes must be
* aligned to a 2kB boundary.
*
* YUV420PackedSemiPlanar64x32Tile2m8ka : YUV planar format, similar to the
* YUV420PackedSemiPlanar format, but with the
* following restrictions:
*
* 1. The data is laid out in a 4x2 MB tiling
* memory structure
*
* 2. The width of each plane is a multiple of
* 2 4x2 MB tiles.
*
* 3. The height of each plan is a multiple of
* a 4x2 MB tile.
*
* 4. The base address of both planes must be
* aligned to an 8kB boundary.
*
* 5. The tiles are scanned in the order
* defined in the MFCV5.1 User's Manual.
*
* i.e, CIF size in pysical location
*
* Luma order(4x2 MB = 64x32 pix): 54 tiles:
* 0 1 6 7 8 9
* 2 3 4 5 10 11
* 12 13 18 19 20 21
* 14 15 16 17 22 23
* 24 25 30 31 32 33
* 26 27 28 29 34 35
* 36 37 42 43 44 45
* 38 39 40 41 46 47
* 48 49 50 51 52 53
* MBs in a y tile:
* 0 1 2 3
* 22 23 24 25
* Chromas order(64x32 pix):
* 0 1 6 7 8 9
* 2 3 4 5 10 11
* 12 13 18 19 20 21
* 14 15 16 17 22 23
* 24 25 26 27 28 29
* MBs in a uv tile:
* 0 1 2 3
* 22 23 24 25
* 44 45 46 47
* 66 67 68 69
*
*/
// YUV420PackedSemiPlanar64x32Tile2m8ka, // 12 bit, yyyyyyyy vuvu
// YUV420Planar, 12 bit, yyyyyyyy uu vv
#pragma pack(1)
typedef enum {
Scan_Init = 0,
Scan_Hor,
Scan_VerDown,
Scan_VerUp
} ScanMode;
typedef struct{
uint16_t startMbIndex;
uint8_t numMBs;
// bool lastTileInHor;
bool lastTileInVer;
} MbGroup;
#pragma pack()
#define ALIGN_B(x,a) (((x)+(a)-1) &(~((a)-1)))
#define MAX_RESOLUTION_X 1920 // 4096
#define MAX_RESOLUTION_Y 1088 // 3072
#define MAX_TILES_NUM (((MAX_RESOLUTION_X+63)>>6) * ((MAX_RESOLUTION_Y+31)>>5))
int32_t ToI420::YUV420PackedSemiPlanar64x32Tile2m8kaToYUV420Planar( uint8_t* src, uint8_t* dst_y, uint8_t* dst_u, uint8_t* dst_v,
const int32_t width, const int32_t height,
const int32_t stride_y, const int32_t stride_u, const int32_t stride_v )
{
const int32_t srcStrideY = ALIGN_B( width, 128 );
const int32_t srcHeightY = ALIGN_B( height, 32 );
const int32_t srcStrideUV = srcStrideY; // v/u interlaced
const int32_t srcHeightUV = ALIGN_B( (height>>1), 32 );
const int32_t srcSizeY = ALIGN_B( (srcStrideY * srcHeightY), 8192 );
const int32_t srcSizeUV = ALIGN_B( (srcStrideUV * srcHeightUV), 8192 );
uint8_t* src_y = src;
uint8_t* src_uv = src_y + srcSizeY;
const int32_t wTiles = (width+63)>>6;
const int32_t hTilesY = (height+31)>>5;
const int32_t hTilesUV = (height/2+31)>>5;
const int32_t numTilesY = wTiles*hTilesY;
const int32_t numTilesUV = wTiles*hTilesUV;
const int32_t wMacroblocks = (width+15)>>4;
const int32_t hMacroblocks = (height+15)>>4;
int32_t numMbInTile = 4*2; // y: 4*2; uv: 4*4
const int32_t mbOffsetTileHor = 4;
int32_t mbOffsetTileVer = (wMacroblocks<<1); // y: (wMacroblocks<<1); uv: (wMacroblocks<<2)
MbGroup yTileToMb[MAX_TILES_NUM] = {0}; // each Tile index storing according MB index
MbGroup uvTileToMb[MAX_TILES_NUM] = {0}; // each Tile index storing according MB index
assert( numTilesY <= MAX_TILES_NUM && numTilesUV <= MAX_TILES_NUM );
int32_t availableTilesY = numTilesY;
int32_t availableTilesUV = numTilesUV;
const int32_t numTilesYPerScanUnit = (wTiles<<1);
int32_t tileIndex = 0;
ScanMode preMode = Scan_Init;
ScanMode curMode = Scan_Hor;
int32_t scanedTiles = 0;
int32_t hMbMultiple = 0;
int32_t cntScanTimesInPeriod = 0; // maximal scan times is up to 4
int32_t cnt1stLineTiles = 0;
int32_t cnt2ndLineTiles = 0;
uint16_t mbPosition = 0;
uint16_t lastMbIdx = mbPosition;
uint16_t firstMbIdxUnit = mbPosition;
bool noEnoughMbInTile = false;
// construct yTileToMb table
while( availableTilesY > 0 ){
if ( availableTilesY >= numTilesYPerScanUnit ){
preMode = Scan_Init;
curMode = Scan_Hor;
lastMbIdx = mbPosition;
firstMbIdxUnit = mbPosition;
cntScanTimesInPeriod = 0; // maximal scan times is up to 4
cnt1stLineTiles = 0;
cnt2ndLineTiles = 0;
noEnoughMbInTile = false;
scanedTiles = 0;
while ( scanedTiles < numTilesYPerScanUnit ){
if ( (tileIndex & 3) == 0 ){
firstMbIdxUnit = mbPosition;
}
noEnoughMbInTile = false;
if ( curMode == Scan_Hor ){
if ( (preMode == Scan_VerUp && cnt1stLineTiles+1>= wTiles) ||
(preMode == Scan_VerDown && cnt2ndLineTiles+1>= wTiles) ){
noEnoughMbInTile = (ALIGN_B(width, 16) < srcStrideY);
// yTileToMb[tileIndex].lastTileInHor = true;
}
yTileToMb[tileIndex].startMbIndex = mbPosition;
yTileToMb[tileIndex].numMBs = noEnoughMbInTile ? ((4-((srcStrideY-ALIGN_B(width,16))>>4))<<1) : numMbInTile;
++ tileIndex;
++ cntScanTimesInPeriod;
if ( noEnoughMbInTile && cntScanTimesInPeriod == 1){
if ( preMode == Scan_VerDown ){
preMode = curMode; // scan mode change need upate preMode
curMode = Scan_VerUp;
++ cnt2ndLineTiles;
mbPosition = firstMbIdxUnit - mbOffsetTileVer;
}else if ( preMode == Scan_VerUp ){
preMode = curMode; // scan mode change need upate preMode
curMode = Scan_VerDown;
++ cnt1stLineTiles;
mbPosition = firstMbIdxUnit + mbOffsetTileVer;
}
}else if ( cntScanTimesInPeriod == 2 ){
if ( preMode == Scan_Init || preMode == Scan_VerUp ){
preMode = curMode; // scan mode change need upate preMode
curMode = Scan_VerDown;
++ cnt1stLineTiles;
mbPosition = firstMbIdxUnit + mbOffsetTileVer;
}else if ( preMode == Scan_VerDown ){
preMode = curMode; // scan mode change need upate preMode
curMode = Scan_VerUp;
++ cnt2ndLineTiles;
mbPosition = firstMbIdxUnit - mbOffsetTileVer;
}
}else if ( cntScanTimesInPeriod == 4 ){
if ( preMode == Scan_VerDown ){
++ cnt2ndLineTiles;
mbPosition += mbOffsetTileHor;
}else if ( preMode == Scan_VerUp ){
++ cnt1stLineTiles;
mbPosition += mbOffsetTileHor;
}
}else{
if ( preMode == Scan_Init ){
++ cnt1stLineTiles;
}else if ( preMode == Scan_VerDown ){
++ cnt2ndLineTiles;
}else if ( preMode == Scan_VerUp ){
++ cnt1stLineTiles;
}
if ( cnt2ndLineTiles >= wTiles && preMode == Scan_VerDown ){
preMode = curMode; // scan mode change need upate preMode
curMode = Scan_VerUp;
mbPosition = firstMbIdxUnit - mbOffsetTileVer;
}else if ( cnt1stLineTiles >= wTiles && preMode == Scan_VerUp ){
preMode = curMode; // scan mode change need upate preMode
curMode = Scan_VerDown;
mbPosition = firstMbIdxUnit + mbOffsetTileVer;
}else{
mbPosition += mbOffsetTileHor;
}
}
}else if ( curMode == Scan_VerUp ){
if ( cnt1stLineTiles+1 >= wTiles ){
noEnoughMbInTile = (ALIGN_B(width, 16) < srcStrideY);
// yTileToMb[tileIndex].lastTileInHor = true;
}
yTileToMb[tileIndex].startMbIndex= mbPosition;
yTileToMb[tileIndex].numMBs = noEnoughMbInTile ? ((4-((srcStrideY-ALIGN_B(width,16))>>4))<<1) : numMbInTile;
++ tileIndex;
mbPosition += mbOffsetTileHor;
++ cntScanTimesInPeriod;
++ cnt1stLineTiles;
preMode = curMode; // scan mode change need upate preMode
curMode = Scan_Hor;
}else if ( curMode == Scan_VerDown ){
if ( cnt2ndLineTiles+1 >= wTiles ){
noEnoughMbInTile = (ALIGN_B(width, 16) < srcStrideY);
// yTileToMb[tileIndex].lastTileInHor = true;
}
yTileToMb[tileIndex].startMbIndex= mbPosition;
yTileToMb[tileIndex].numMBs = noEnoughMbInTile ? ((4-((srcStrideY-ALIGN_B(width,16))>>4))<<1) : numMbInTile;
++ tileIndex;
mbPosition += mbOffsetTileHor;
++ cntScanTimesInPeriod;
++ cnt2ndLineTiles;
preMode = curMode; // scan mode change need upate preMode
curMode = Scan_Hor;
}
cntScanTimesInPeriod &= 0x03;
++ scanedTiles;
}
mbPosition = lastMbIdx + (mbOffsetTileVer<<1);
availableTilesY -= numTilesYPerScanUnit;
}else{
scanedTiles = 0;
hMbMultiple = hMacroblocks - (tileIndex/wTiles)*2;
noEnoughMbInTile = false;
while ( scanedTiles < wTiles ){
yTileToMb[tileIndex].startMbIndex = mbPosition;
yTileToMb[tileIndex].lastTileInVer= true;
if ( scanedTiles+1 == wTiles ){
noEnoughMbInTile = (ALIGN_B(width, 16) < srcStrideY);
// yTileToMb[tileIndex].lastTileInHor = true;
}
yTileToMb[tileIndex].numMBs = noEnoughMbInTile ? ((4-((srcStrideY-ALIGN_B(width,16))>>4))*hMbMultiple) : (4*hMbMultiple);
++ tileIndex;
mbPosition += mbOffsetTileHor;
++ scanedTiles;
}
availableTilesY -= wTiles;
}
}
numMbInTile = 4*4;
mbOffsetTileVer = (wMacroblocks<<2);
mbPosition = 0;
tileIndex = 0;
// construct uvTileToMb table
while( availableTilesUV > 0 ){
if ( availableTilesUV >= numTilesYPerScanUnit ){
preMode = Scan_Init;
curMode = Scan_Hor;
lastMbIdx = mbPosition;
firstMbIdxUnit = mbPosition;
cntScanTimesInPeriod= 0; // maximal scan times is up to 4
cnt1stLineTiles = 0;
cnt2ndLineTiles = 0;
noEnoughMbInTile = false;
scanedTiles = 0;
while ( scanedTiles < numTilesYPerScanUnit ){
if ( (tileIndex & 3) == 0 ){
firstMbIdxUnit = mbPosition;
}
noEnoughMbInTile = false;
if ( curMode == Scan_Hor ){
if ( (preMode == Scan_VerUp && cnt1stLineTiles+1>= wTiles) ||
(preMode == Scan_VerDown && cnt2ndLineTiles+1>= wTiles) ){
noEnoughMbInTile = (ALIGN_B(width, 16) < srcStrideY);
// uvTileToMb[tileIndex].lastTileInHor = true;
}
uvTileToMb[tileIndex].startMbIndex = mbPosition;
uvTileToMb[tileIndex].numMBs = noEnoughMbInTile ? ((4-((srcStrideY-ALIGN_B(width,16))>>4))<<2) : numMbInTile;
++ tileIndex;
++ cntScanTimesInPeriod;
if ( noEnoughMbInTile && cntScanTimesInPeriod == 1){
if ( preMode == Scan_VerDown ){
preMode = curMode; // scan mode change need upate preMode
curMode = Scan_VerUp;
++ cnt2ndLineTiles;
mbPosition = firstMbIdxUnit - mbOffsetTileVer;
}else if ( preMode == Scan_VerUp ){
preMode = curMode; // scan mode change need upate preMode
curMode = Scan_VerDown;
++ cnt1stLineTiles;
mbPosition = firstMbIdxUnit + mbOffsetTileVer;
}
}else if ( cntScanTimesInPeriod == 2 ){
if ( preMode == Scan_Init || preMode == Scan_VerUp ){
preMode = curMode; // scan mode change need upate preMode
curMode = Scan_VerDown;
++ cnt1stLineTiles;
mbPosition = firstMbIdxUnit + mbOffsetTileVer;
}else if ( preMode == Scan_VerDown ){
preMode = curMode; // scan mode change need upate preMode
curMode = Scan_VerUp;
++ cnt2ndLineTiles;
mbPosition = firstMbIdxUnit - mbOffsetTileVer;
}
}else if ( cntScanTimesInPeriod == 4 ){
if ( preMode == Scan_VerDown ){
++ cnt2ndLineTiles;
mbPosition += mbOffsetTileHor;
}else if ( preMode == Scan_VerUp ){
++ cnt1stLineTiles;
mbPosition += mbOffsetTileHor;
}
}else{
if ( preMode == Scan_Init ){
++ cnt1stLineTiles;
}else if ( preMode == Scan_VerDown ){
++ cnt2ndLineTiles;
}else if ( preMode == Scan_VerUp ){
++ cnt1stLineTiles;
}
if ( cnt2ndLineTiles >= wTiles && preMode == Scan_VerDown ){
preMode = curMode; // scan mode change need upate preMode
curMode = Scan_VerUp;
mbPosition = firstMbIdxUnit - mbOffsetTileVer;
}else if ( cnt1stLineTiles >= wTiles && preMode == Scan_VerUp ){
preMode = curMode; // scan mode change need upate preMode
curMode = Scan_VerDown;
mbPosition = firstMbIdxUnit + mbOffsetTileVer;
}else{
mbPosition += mbOffsetTileHor;
}
}
}else if ( curMode == Scan_VerUp ){
if ( cnt1stLineTiles+1 >= wTiles ){
noEnoughMbInTile = (ALIGN_B(width, 16) < srcStrideY);
// uvTileToMb[tileIndex].lastTileInHor = true;
}
uvTileToMb[tileIndex].startMbIndex = mbPosition;
uvTileToMb[tileIndex].numMBs = noEnoughMbInTile ? ((4-((srcStrideY-ALIGN_B(width,16))>>4))<<2) : numMbInTile;
++ tileIndex;
mbPosition += mbOffsetTileHor;
++ cntScanTimesInPeriod;
++ cnt1stLineTiles;
preMode = curMode; // scan mode change need upate preMode
curMode = Scan_Hor;
}else if ( curMode == Scan_VerDown ){
if ( cnt2ndLineTiles+1 >= wTiles ){
noEnoughMbInTile = (ALIGN_B(width, 16) < srcStrideY);
// uvTileToMb[tileIndex].lastTileInHor = true;
}
uvTileToMb[tileIndex].startMbIndex = mbPosition;
uvTileToMb[tileIndex].numMBs = noEnoughMbInTile ? ((4-((srcStrideY-ALIGN_B(width,16))>>4))<<2) : numMbInTile;
++ tileIndex;
mbPosition += mbOffsetTileHor;
++ cntScanTimesInPeriod;
++ cnt2ndLineTiles;
preMode = curMode; // scan mode change need upate preMode
curMode = Scan_Hor;
}
cntScanTimesInPeriod &= 0x03;
++ scanedTiles;
}
mbPosition = lastMbIdx + (mbOffsetTileVer<<1);
availableTilesUV -= numTilesYPerScanUnit;
}else{
scanedTiles = 0;
hMbMultiple = hMacroblocks - (tileIndex/wTiles)*4;
noEnoughMbInTile = false;
while ( scanedTiles < wTiles ){
uvTileToMb[tileIndex].startMbIndex = mbPosition;
uvTileToMb[tileIndex].lastTileInVer = true;
if ( scanedTiles+1 == wTiles ){
noEnoughMbInTile = (ALIGN_B(width, 16) < srcStrideY);
// uvTileToMb[tileIndex].lastTileInHor = true;
}
uvTileToMb[tileIndex].numMBs = noEnoughMbInTile ? ((4-((srcStrideY-ALIGN_B(width,16))>>4))*hMbMultiple) : (4*hMbMultiple);
++ tileIndex;
mbPosition += mbOffsetTileHor;
++ scanedTiles;
}
availableTilesUV -= wTiles;
}
}
uint8_t* py = src_y;
tileIndex = 0;
// converting luma componet with yTileToMb
while ( tileIndex < numTilesY ){
uint16_t startMbIndex = yTileToMb[tileIndex].startMbIndex;
const int32_t startMbX = (startMbIndex % wMacroblocks);
const int32_t startMbY = (startMbIndex / wMacroblocks);
int32_t mb_x = startMbX;
int32_t mb_y = startMbY;
const int32_t cntMbLines = yTileToMb[tileIndex].lastTileInVer ? (hMacroblocks - (tileIndex/wTiles)*2) : 2;
const int32_t numMbPerLine = yTileToMb[tileIndex].numMBs / cntMbLines;
const int32_t sizePixelLine = (numMbPerLine << 4);
int32_t mbLine = 0;
while ( mbLine < cntMbLines ){
assert( mb_y < hMacroblocks && mb_x < wMacroblocks );
const int32_t dstOffsetY = (mb_y * stride_y + mb_x)<<4;
int32_t _l = 0;
// luma
while( _l < 16 ){
memcpy( dst_y + dstOffsetY + _l * stride_y, py, sizePixelLine );
py += 64; // eliminate padding (64-sizePixelLine)
++ _l;
}
mb_x = startMbX;
++ mb_y;
++ mbLine;
}
++ tileIndex;
}
uint8_t* puv = src_uv;
tileIndex = 0;
// convering cb/cr componets with uvTileToMb
while ( tileIndex < numTilesUV ){
uint16_t startMbIndex = uvTileToMb[tileIndex].startMbIndex;
const int32_t startMbX = (startMbIndex % wMacroblocks);
const int32_t startMbY = (startMbIndex / wMacroblocks);
int32_t mb_x = startMbX;
int32_t mb_y = startMbY;
const int32_t cntMbLines = uvTileToMb[tileIndex].lastTileInVer ? (hMacroblocks - (tileIndex/wTiles)*4) : 4;
const int32_t numMbPerLine = uvTileToMb[tileIndex].numMBs / cntMbLines;
int32_t mbLine = 0;
while ( mbLine < cntMbLines ){
assert( mb_y < hMacroblocks && mb_x < wMacroblocks );
// cb/cr
int32_t mbIndex = 0;
while ( mbIndex < numMbPerLine ){
assert( mb_y < hMacroblocks && mb_x < wMacroblocks );
const int32_t dstOffsetUV = (mb_y * stride_u + mb_x)<<3;
int32_t _l = 0;
while( _l < 8 ){
const int32_t _offset = dstOffsetUV + _l * stride_u;
uint8_t* _u = dst_u + _offset;
uint8_t* _v = dst_v + _offset;
uint8_t* _src_vu = puv+(mbIndex<<4)+(_l<<6);
int32_t _interlace = 0;
for ( int32_t ichroma = 0; ichroma < 8; ++ichroma ){
_u[ichroma] = _src_vu[_interlace++];
_v[ichroma] = _src_vu[_interlace++];
}
++ _l;
}
++ mb_x;
++ mbIndex;
}
puv += 64*8;
mb_x = startMbX;
++ mb_y;
++ mbLine;
}
if ( cntMbLines < 4 ){
puv += 64*(4-cntMbLines)*8;
}
++ tileIndex;
}
return 0;
}
///////////////////////////////////////////////////////////////////////
Like the name suggests, the data is packed into 64x32 pixels "tiles".
You don't need to know the pixel format if you write the decoded picture into a compatible hardware surface.
I have reverse engineered the format (Luma only for now), at least for some video widths. I don't know (yet) how chroma samples are laid out however, and the code below is still buggy.
void CopyOmxPicture( decoder_t *p_dec, picture_t *p_pic,
OMX_BUFFERHEADERTYPE *p_header, int i_slice_height )
{
decoder_sys_t *p_sys = p_dec->p_sys;
int i_src_stride;
int i_plane, i_width, i_line;
uint8_t *p_dst, *p_src, *p_dst2;
i_src_stride = p_sys->out.i_frame_stride;
p_src = p_header->pBuffer + p_header->nOffset;
if( p_dec->p_sys->out.definition.format.video.eColorFormat == QOMX_COLOR_FormatYUV420PackedSemiPlanar64x32Tile2m8ka )
{
uint8_t *to = p_pic->p[0].p_pixels;
int w = p_pic->p[0].i_visible_pitch;
int h = p_pic->p[0].i_visible_lines;
int pitch = p_pic->p[0].i_pitch;
msg_Dbg(p_dec, "stride %d pitch %d w %d h %d", i_src_stride, pitch, w, h);
//copy luma plane
const int tsz = 64*32;
int wtiles = (w + 63) / 64; // number of tiles in horizontal direction
int htiles = (h + 31) / 32; // number of tiles in vertical direction
int tile = 0; // FIXME : order differs for other streams
int tiles_max = 2 * wtiles;
uint8_t order[tiles_max];
order[0] = 0;
order[1] = 1;
uint8_t done[tiles_max];
memset(done, 0, tiles_max);
done[0] = done[1] = 1;
int j = 2 + 4;
for (int i = 2; i < tiles_max;) {
while(done[j]) { j++; j%=tiles_max; }
done[j] = 1;
order[i++] = j++; j%= tiles_max;
while(done[j]) { j++; j%=tiles_max; }
done[j] = 1;
order[i++] = j++; j%= tiles_max;
if (j == 0)
continue;
if (i == tiles_max)
break;
while(done[j]) { j++; j%=tiles_max; }
done[j] = 1;
order[i++] = j++; j%= tiles_max;
while(done[j]) { j++; j%=tiles_max; }
done[j] = 1;
order[i++] = j++;
j += 4;
j%= tiles_max;
}
#if 0
static const uint8_t order[] = { 0, 1, 6, 7, 2, 3, 4, 5 };
static const uint8_t order[] = { 0, 1,
6, 7, 8, 9,
14, 15, 16, 17,
22, 23, 24, 25,
2, 3, 4, 5 ,
10, 11, 12, 13,
18, 19, 20, 21,
26, 27,
};
#endif
i_src_stride += 127; i_src_stride &= ~127;
//int width_align = tsz * (wtiles & 1); // width is aligned on 128 pixels
int width_align = i_src_stride - ((wtiles + 1) & ~1) * 64;
int soff = 0;
for (int i = 0; i < htiles; i++) { // top to bottom
int lines = 32;
if ((i == htiles - 1) && (h & 31))
lines = h & 31;
for (int j = 0; j < wtiles; j++) { // left to right
//copy one tile
int tile_pitch = 64;
if ((j == wtiles-1) && (w & 63))
tile_pitch = w & 63;
int doff = pitch * i * 32 + j * 64;
for (int l = 0; l < lines; l++) {
memcpy(&to[doff + l * pitch],
&p_src[soff + 64 * 32 * order[tile % tiles_max] + l * 64],
tile_pitch);
}
if ((++tile % tiles_max) == 0) {
soff += tiles_max * 64 * 32;
}
}
p_src += width_align;
}
// black out chroma
for (int i = 1; i < p_pic->i_planes; i++)
memset(p_pic->p[i].p_pixels, 0x80,
p_pic->p[i].i_pitch * p_pic->p[i].i_visible_lines);
#if 1 //dump
char mask[32];
static int x = 0;
sprintf(mask, "/sdcard/yuv/out%dx%dxp%d-%.3d.yuv", w, h, pitch, ++x);
if ((x & 15) == 0) {
FILE *f = fopen(mask, "w");
if ((f = fopen(mask, "w"))) {
#if 1
//int w = (p_pic->format.i_width + 127) & ~127;
//int h = (p_pic->format.i_height + 31) & ~31;
//size_t s = (w * h + 8191) & ~8191;
size_t s = p_header->nFilledLen;
fwrite(p_src, s, 1, f);
#else
fwrite(to, pitch*h*3 / 2, 1, f);
#endif
fclose(f);
}
}
#endif
}
}