15 #include "reconstruct3d.h"
16 #include "visiontransfer/internal/alignedallocator.h"
27 #include <immintrin.h>
29 #include <emmintrin.h>
37 using namespace visiontransfer;
38 using namespace visiontransfer::internal;
40 namespace visiontransfer {
44 class Reconstruct3D::Pimpl {
48 float* createPointMap(
const unsigned short* dispMap,
int width,
int height,
49 int rowStride,
const float* q,
unsigned short minDisparity,
int subpixelFactor,
50 unsigned short maxDisparity);
52 float* createPointMap(
const ImageSet& imageSet,
unsigned short minDisparity,
unsigned short maxDisparity = 0xFFF);
54 float* createZMap(
const ImageSet& imageSet,
unsigned short minDisparity,
unsigned short maxDisparity);
56 void projectSinglePoint(
int imageX,
int imageY,
unsigned short disparity,
const float* q,
57 float& pointX,
float& pointY,
float& pointZ,
int subpixelFactor);
59 void writePlyFile(
const char* file,
const unsigned short* dispMap,
61 int dispRowStride,
int imageRowStride,
const float* q,
62 double maxZ,
bool binary,
int subpixelFactor,
unsigned short maxDisparity);
64 void writePlyFile(
const char* file,
const ImageSet& imageSet,
65 double maxZ,
bool binary, ColorSource colSource,
unsigned short maxDisparity);
68 std::vector<float, AlignedAllocator<float> > pointMap;
70 float* createPointMapFallback(
const unsigned short* dispMap,
int width,
int height,
71 int rowStride,
const float* q,
unsigned short minDisparity,
int subpixelFactor,
72 unsigned short maxDisparity);
74 float* createPointMapSSE2(
const unsigned short* dispMap,
int width,
int height,
75 int rowStride,
const float* q,
unsigned short minDisparity,
int subpixelFactor,
76 unsigned short maxDisparity);
78 float* createPointMapAVX2(
const unsigned short* dispMap,
int width,
int height,
79 int rowStride,
const float* q,
unsigned short minDisparity,
int subpixelFactor,
80 unsigned short maxDisparity);
82 float* createPointMapNEON(
const unsigned short* dispMap,
int width,
int height,
83 int rowStride,
const float* q,
unsigned short minDisparity,
int subpixelFactor,
84 unsigned short maxDisparity);
89 Reconstruct3D::Reconstruct3D()
93 Reconstruct3D::~Reconstruct3D() {
98 int rowStride,
const float* q,
unsigned short minDisparity,
int subpixelFactor,
99 unsigned short maxDisparity) {
100 return pimpl->createPointMap(dispMap, width, height, rowStride, q, minDisparity,
101 subpixelFactor, maxDisparity);
105 return pimpl->createPointMap(imageSet, minDisparity, maxDisparity);
109 unsigned short maxDisparity) {
110 return pimpl->createZMap(imageSet, minDisparity, maxDisparity);
114 const float* q,
float& pointX,
float& pointY,
float& pointZ,
int subpixelFactor) {
115 pimpl->projectSinglePoint(imageX, imageY, disparity, q, pointX, pointY, pointZ,
121 int imageRowStride,
const float* q,
double maxZ,
bool binary,
int subpixelFactor,
122 unsigned short maxDisparity) {
123 pimpl->writePlyFile(file, dispMap, image, width, height, format, dispRowStride,
124 imageRowStride, q, maxZ, binary, subpixelFactor, maxDisparity);
128 double maxZ,
bool binary, ColorSource colSource,
unsigned short maxDisparity) {
129 pimpl->writePlyFile(file, imageSet, maxZ, binary, colSource, maxDisparity);
134 Reconstruct3D::Pimpl::Pimpl() {
137 float* Reconstruct3D::Pimpl::createPointMap(
const unsigned short* dispMap,
int width,
138 int height,
int rowStride,
const float* q,
unsigned short minDisparity,
139 int subpixelFactor,
unsigned short maxDisparity) {
142 if(pointMap.size() <
static_cast<unsigned int>(4*width*height)) {
143 pointMap.resize(4*width*height);
148 bool angledCameraFallback = (q[15] != 0.0 && minDisparity == 0);
149 (void)angledCameraFallback;
152 if(!angledCameraFallback && maxDisparity <= 0x1000 && width % 16 == 0 && (uintptr_t)dispMap % 32 == 0) {
153 return createPointMapAVX2(dispMap, width, height, rowStride, q,
154 minDisparity, subpixelFactor, maxDisparity);
158 if(!angledCameraFallback && maxDisparity <= 0x1000 && width % 8 == 0 && (uintptr_t)dispMap % 16 == 0) {
159 return createPointMapSSE2(dispMap, width, height, rowStride, q,
160 minDisparity, subpixelFactor, maxDisparity);
164 if(!angledCameraFallback && maxDisparity <= 0x1000 && width % 8 == 0) {
165 return createPointMapNEON(dispMap, width, height, rowStride, q,
166 minDisparity, subpixelFactor, maxDisparity);
170 return createPointMapFallback(dispMap, width, height, rowStride, q,
171 minDisparity, subpixelFactor, maxDisparity);
175 float* Reconstruct3D::Pimpl::createPointMap(
const ImageSet& imageSet,
unsigned short minDisparity,
unsigned short maxDisparity) {
177 throw std::runtime_error(
"ImageSet does not contain a disparity map!");
181 throw std::runtime_error(
"Disparity map must have 12-bit pixel format!");
184 return createPointMap(
reinterpret_cast<unsigned short*
>(imageSet.
getPixelData(ImageSet::IMAGE_DISPARITY)), imageSet.
getWidth(),
189 float* Reconstruct3D::Pimpl::createPointMapFallback(
const unsigned short* dispMap,
int width,
190 int height,
int rowStride,
const float* q,
unsigned short minDisparity,
191 int subpixelFactor,
unsigned short maxDisparity) {
193 float* outputPtr = &pointMap[0];
194 int stride = rowStride / 2;
197 if(minDisparity == 0) {
200 dInvalid = std::numeric_limits<double>::infinity();
203 dInvalid = double(minDisparity) / double(subpixelFactor);
206 for(
int y = 0; y < height; y++) {
207 double qx = q[1]*y + q[3];
208 double qy = q[5]*y + q[7];
209 double qz = q[9]*y + q[11];
210 double qw = q[13]*y + q[15];
212 const unsigned short* dispRow = &dispMap[y*stride];
213 for(
int x = 0; x < width; x++) {
214 unsigned short intDisp = std::max(minDisparity, dispRow[x]);
217 if(intDisp >= maxDisparity) {
220 d = double(intDisp) / double(subpixelFactor);
223 double w = qw + q[14]*d;
225 *outputPtr =
static_cast<float>((qx + q[2]*d)/w);
228 *outputPtr =
static_cast<float>((qy + q[6]*d)/w);
231 *outputPtr =
static_cast<float>((qz + q[10]*d)/w);
243 float* Reconstruct3D::Pimpl::createZMap(
const ImageSet& imageSet,
unsigned short minDisparity,
244 unsigned short maxDisparity) {
246 if(pointMap.size() <
static_cast<unsigned int>(imageSet.
getWidth()*imageSet.
getHeight())) {
250 float* outputPtr = &pointMap[0];
251 int stride = imageSet.
getRowStride(ImageSet::IMAGE_DISPARITY) / 2;
252 const unsigned short* dispMap =
reinterpret_cast<const unsigned short*
>(imageSet.
getPixelData(ImageSet::IMAGE_DISPARITY));
257 if(minDisparity == 0) {
260 dInvalid = std::numeric_limits<double>::infinity();
263 dInvalid = double(minDisparity) / double(subpixelFactor);
266 for(
int y = 0; y < imageSet.
getHeight(); y++) {
267 double qz = q[9]*y + q[11];
268 double qw = q[13]*y + q[15];
270 const unsigned short* dispRow = &dispMap[y*stride];
271 for(
int x = 0; x < imageSet.
getWidth(); x++) {
272 unsigned short intDisp = std::max(minDisparity, dispRow[x]);
275 if(intDisp >= maxDisparity) {
278 d = double(intDisp) / double(subpixelFactor);
281 double w = qw + q[14]*d;
283 *outputPtr =
static_cast<float>((qz + q[10]*d)/w);
292 void Reconstruct3D::Pimpl::projectSinglePoint(
int imageX,
int imageY,
unsigned short disparity,
293 const float* q,
float& pointX,
float& pointY,
float& pointZ,
int subpixelFactor) {
295 double d = disparity / double(subpixelFactor);
296 double w = q[15] + q[14]*d;
297 pointX =
static_cast<float>((imageX*q[0] + q[3])/w);
298 pointY =
static_cast<float>((imageY*q[5] + q[7])/w);
299 pointZ =
static_cast<float>(q[11]/w);
303 float* Reconstruct3D::Pimpl::createPointMapAVX2(
const unsigned short* dispMap,
int width,
304 int height,
int rowStride,
const float* q,
unsigned short minDisparity,
305 int subpixelFactor,
unsigned short maxDisparity) {
308 const __m256 qCol0 = _mm256_setr_ps(q[0], q[4], q[8], q[12], q[0], q[4], q[8], q[12]);
309 const __m256 qCol1 = _mm256_setr_ps(q[1], q[5], q[9], q[13], q[1], q[5], q[9], q[13]);
310 const __m256 qCol2 = _mm256_setr_ps(q[2], q[6], q[10], q[14], q[2], q[6], q[10], q[14]);
311 const __m256 qCol3 = _mm256_setr_ps(q[3], q[7], q[11], q[15], q[3], q[7], q[11], q[15]);
314 const __m256i minDispVector = _mm256_set1_epi16(minDisparity);
315 const __m256i maxDispVector = _mm256_set1_epi16(maxDisparity);
316 const __m256 scaleVector = _mm256_set1_ps(1.0/
double(subpixelFactor));
317 const __m256i zeroVector = _mm256_set1_epi16(0);
319 float* outputPtr = &pointMap[0];
321 for(
int y = 0; y < height; y++) {
322 const unsigned char* rowStart = &
reinterpret_cast<const unsigned char*
>(dispMap)[y*rowStride];
323 const unsigned char* rowEnd = &
reinterpret_cast<const unsigned char*
>(dispMap)[y*rowStride + 2*width];
326 for(
const unsigned char* ptr = rowStart; ptr != rowEnd; ptr += 32) {
327 __m256i disparities = _mm256_load_si256(
reinterpret_cast<const __m256i*
>(ptr));
330 __m256i validMask = _mm256_cmpgt_epi16(maxDispVector, disparities);
331 disparities = _mm256_and_si256(validMask, disparities);
334 disparities = _mm256_max_epi16(disparities, minDispVector);
337 __m256i disparitiesMixup = _mm256_permute4x64_epi64(disparities, 0xd8);
340 __m256 floatDisp = _mm256_cvtepi32_ps(_mm256_unpacklo_epi16(disparitiesMixup, zeroVector));
341 __m256 dispScaled = _mm256_mul_ps(floatDisp, scaleVector);
345 __declspec(align(32)) float dispArray[16];
347 float dispArray[16]__attribute__((aligned(32)));
349 _mm256_store_ps(&dispArray[0], dispScaled);
352 floatDisp = _mm256_cvtepi32_ps(_mm256_unpackhi_epi16(disparitiesMixup, zeroVector));
353 dispScaled = _mm256_mul_ps(floatDisp, scaleVector);
354 _mm256_store_ps(&dispArray[8], dispScaled);
357 for(
int i=0; i<16; i+=2) {
359 __m256 vec = _mm256_setr_ps(x, y, dispArray[i], 1.0,
360 x+1, y, dispArray[i+1], 1.0);
363 __m256 u1 = _mm256_shuffle_ps(vec, vec, _MM_SHUFFLE(0,0,0,0));
364 __m256 u2 = _mm256_shuffle_ps(vec, vec, _MM_SHUFFLE(1,1,1,1));
365 __m256 u3 = _mm256_shuffle_ps(vec, vec, _MM_SHUFFLE(2,2,2,2));
366 __m256 u4 = _mm256_shuffle_ps(vec, vec, _MM_SHUFFLE(3,3,3,3));
368 __m256 prod1 = _mm256_mul_ps(u1, qCol0);
369 __m256 prod2 = _mm256_mul_ps(u2, qCol1);
370 __m256 prod3 = _mm256_mul_ps(u3, qCol2);
371 __m256 prod4 = _mm256_mul_ps(u4, qCol3);
373 __m256 multResult = _mm256_add_ps(_mm256_add_ps(prod1, prod2), _mm256_add_ps(prod3, prod4));
376 __m256 point = _mm256_div_ps(multResult,
377 _mm256_shuffle_ps(multResult,multResult, _MM_SHUFFLE(3,3,3,3)));
380 _mm256_store_ps(outputPtr, point);
393 float* Reconstruct3D::Pimpl::createPointMapSSE2(
const unsigned short* dispMap,
int width,
394 int height,
int rowStride,
const float* q,
unsigned short minDisparity,
395 int subpixelFactor,
unsigned short maxDisparity) {
398 const __m128 qCol0 = _mm_setr_ps(q[0], q[4], q[8], q[12]);
399 const __m128 qCol1 = _mm_setr_ps(q[1], q[5], q[9], q[13]);
400 const __m128 qCol2 = _mm_setr_ps(q[2], q[6], q[10], q[14]);
401 const __m128 qCol3 = _mm_setr_ps(q[3], q[7], q[11], q[15]);
404 const __m128i minDispVector = _mm_set1_epi16(minDisparity);
405 const __m128i maxDispVector = _mm_set1_epi16(maxDisparity);
406 const __m128 scaleVector = _mm_set1_ps(1.0/
double(subpixelFactor));
407 const __m128i zeroVector = _mm_set1_epi16(0);
409 float* outputPtr = &pointMap[0];
411 for(
int y = 0; y < height; y++) {
412 const unsigned char* rowStart = &
reinterpret_cast<const unsigned char*
>(dispMap)[y*rowStride];
413 const unsigned char* rowEnd = &
reinterpret_cast<const unsigned char*
>(dispMap)[y*rowStride + 2*width];
416 for(
const unsigned char* ptr = rowStart; ptr != rowEnd; ptr += 16) {
417 __m128i disparities = _mm_load_si128(
reinterpret_cast<const __m128i*
>(ptr));
420 __m128i validMask = _mm_cmplt_epi16(disparities, maxDispVector);
421 disparities = _mm_and_si128(validMask, disparities);
424 disparities = _mm_max_epi16(disparities, minDispVector);
427 __m128 floatDisp = _mm_cvtepi32_ps(_mm_unpacklo_epi16(disparities, zeroVector));
428 __m128 dispScaled = _mm_mul_ps(floatDisp, scaleVector);
432 __declspec(align(16)) float dispArray[8];
434 float dispArray[8]__attribute__((aligned(16)));
436 _mm_store_ps(&dispArray[0], dispScaled);
439 floatDisp = _mm_cvtepi32_ps(_mm_unpackhi_epi16(disparities, zeroVector));
440 dispScaled = _mm_mul_ps(floatDisp, scaleVector);
441 _mm_store_ps(&dispArray[4], dispScaled);
444 for(
int i=0; i<8; i++) {
446 __m128 u1 = _mm_set1_ps(
static_cast<float>(x));
447 __m128 u2 = _mm_set1_ps(
static_cast<float>(y));
448 __m128 u3 = _mm_set1_ps(dispArray[i]);
449 __m128 u4 = _mm_set1_ps(1.0f);
451 __m128 prod1 = _mm_mul_ps(u1, qCol0);
452 __m128 prod2 = _mm_mul_ps(u2, qCol1);
453 __m128 prod3 = _mm_mul_ps(u3, qCol2);
454 __m128 prod4 = _mm_mul_ps(u4, qCol3);
456 __m128 multResult = _mm_add_ps(_mm_add_ps(prod1, prod2), _mm_add_ps(prod3, prod4));
459 __m128 point = _mm_div_ps(multResult,
460 _mm_shuffle_ps(multResult,multResult, _MM_SHUFFLE(3,3,3,3)));
463 _mm_store_ps(outputPtr, point);
476 float* Reconstruct3D::Pimpl::createPointMapNEON(
const unsigned short* dispMap,
int width,
477 int height,
int rowStride,
const float* q,
unsigned short minDisparity,
478 int subpixelFactor,
unsigned short maxDisparity) {
481 float32x4_t qCol0 = {q[0], q[4], q[8], q[12]};
482 float32x4_t qCol1 = {q[1], q[5], q[9], q[13]};
483 float32x4_t qCol2 = {q[2], q[6], q[10], q[14]};
484 float32x4_t qCol3 = {q[3], q[7], q[11], q[15]};
487 uint16x8_t minDispVector = vdupq_n_u16(minDisparity);
488 uint16x8_t maxDispVector = vdupq_n_u16(maxDisparity);
489 float32x4_t scaleVector = vdupq_n_f32(1.0f /
static_cast<float>(subpixelFactor));
491 float* outputPtr = &pointMap[0];
493 for(
int y = 0; y < height; y++) {
494 const unsigned char* rowStart = &
reinterpret_cast<const unsigned char*
>(dispMap)[y*rowStride];
495 const unsigned char* rowEnd = &
reinterpret_cast<const unsigned char*
>(dispMap)[y*rowStride + 2*width];
498 for(
const unsigned char* ptr = rowStart; ptr != rowEnd; ptr += 16) {
499 uint16x8_t disparities = vld1q_u16(
reinterpret_cast<const uint16_t*
>(ptr));
502 uint16x8_t validMask = vcltq_u16(disparities, maxDispVector);
503 disparities = vandq_u16(validMask, disparities);
506 disparities = vmaxq_u16(disparities, minDispVector);
509 float32x4_t floatDisp = vcvtq_f32_u32(vmovl_u16(vget_low_u16(disparities)));
510 float32x4_t dispScaled = vmulq_f32(floatDisp, scaleVector);
514 vst1q_f32(&dispArray[0], dispScaled);
517 floatDisp = vcvtq_f32_u32(vmovl_u16(vget_high_u16(disparities)));
518 dispScaled = vmulq_f32(floatDisp, scaleVector);
519 vst1q_f32(&dispArray[4], dispScaled);
522 for(
int i=0; i<8; i++) {
524 float32x4_t u1 = vdupq_n_f32(
static_cast<float>(x));
525 float32x4_t u2 = vdupq_n_f32(
static_cast<float>(y));
526 float32x4_t u3 = vdupq_n_f32(dispArray[i]);
527 float32x4_t u4 = vdupq_n_f32(1.0f);
529 float32x4_t prod1 = vmulq_f32(u1, qCol0);
530 float32x4_t prod2 = vmulq_f32(u2, qCol1);
531 float32x4_t prod3 = vmulq_f32(u3, qCol2);
532 float32x4_t prod4 = vmulq_f32(u4, qCol3);
534 float32x4_t multResult = vaddq_f32(vaddq_f32(prod1, prod2), vaddq_f32(prod3, prod4));
537 float32x4_t point = vdivq_f32(multResult, vdupq_laneq_f32(multResult, 3));
540 vst1q_f32(outputPtr, point);
552 void Reconstruct3D::Pimpl::writePlyFile(
const char* file,
const unsigned short* dispMap,
554 int imageRowStride,
const float* q,
double maxZ,
bool binary,
int subpixelFactor,
555 unsigned short maxDisparity) {
557 float* pointMap = createPointMap(dispMap, width, height, dispRowStride,
558 q, 0, subpixelFactor, maxDisparity);
563 for(
int i=0; i<width*height; i++) {
564 if(pointMap[4*i+2] <= maxZ && pointMap[4*i+2] > 0) {
569 pointsCount = width*height;
573 fstream strm(file, binary ? (ios::out | ios::binary) : ios::out);
574 strm <<
"ply" << endl;
577 strm <<
"format binary_little_endian 1.0" << endl;
579 strm <<
"format ascii 1.0" << endl;
582 strm <<
"element vertex " << pointsCount << endl
583 <<
"property float x" << endl
584 <<
"property float y" << endl
585 <<
"property float z" << endl;
586 if (image !=
nullptr) {
588 strm <<
"property uchar red" << endl
589 <<
"property uchar green" << endl
590 <<
"property uchar blue" << endl;
592 strm <<
"end_header" << endl;
595 for(
int i=0; i<width*height; i++) {
599 if((maxZ < 0 || pointMap[4*i+2] <= maxZ) && pointMap[4*i+2] > 0) {
602 strm.write(
reinterpret_cast<char*
>(&pointMap[4*i]),
sizeof(
float)*3);
603 if (image ==
nullptr) {
606 const unsigned char* col = &image[y*imageRowStride + 3*x];
607 strm.write(
reinterpret_cast<const char*
>(col), 3*
sizeof(*col));
609 const unsigned char* col = &image[y*imageRowStride + x];
610 unsigned char writeData[3] = {*col, *col, *col};
611 strm.write(
reinterpret_cast<const char*
>(writeData),
sizeof(writeData));
613 const unsigned short* col =
reinterpret_cast<const unsigned short*
>(&image[y*imageRowStride + 2*x]);
614 unsigned char writeData[3] = {
615 (
unsigned char)(*col >> 4),
616 (
unsigned char)(*col >> 4),
617 (
unsigned char)(*col >> 4)
619 strm.write(
reinterpret_cast<const char*
>(writeData),
sizeof(writeData));
623 if(std::isfinite(pointMap[4*i + 2])) {
624 strm << pointMap[4*i]
625 <<
" " << pointMap[4*i + 1]
626 <<
" " << pointMap[4*i + 2];
628 strm <<
"NaN NaN NaN";
631 if (image ==
nullptr) {
635 const unsigned char* col = &image[y*imageRowStride + 3*x];
636 strm <<
" " <<
static_cast<int>(col[0])
637 <<
" " <<
static_cast<int>(col[1])
638 <<
" " <<
static_cast<int>(col[2]) << endl;
640 const unsigned char* col = &image[y*imageRowStride + x];
641 strm <<
" " <<
static_cast<int>(*col)
642 <<
" " <<
static_cast<int>(*col)
643 <<
" " <<
static_cast<int>(*col) << endl;
645 const unsigned short* col =
reinterpret_cast<const unsigned short*
>(&image[y*imageRowStride + 2*x]);
646 strm <<
" " <<
static_cast<int>(*col >> 4)
647 <<
" " <<
static_cast<int>(*col >> 4)
648 <<
" " <<
static_cast<int>(*col >> 4) << endl;
655 void Reconstruct3D::Pimpl::writePlyFile(
const char* file,
const ImageSet& imageSet,
656 double maxZ,
bool binary, ColorSource colSource,
unsigned short maxDisparity) {
660 int indexDisp = imageSet.
getIndexOf(ImageSet::IMAGE_DISPARITY);
662 if(indexDisp == -1) {
663 throw std::runtime_error(
"No disparity channel present, cannot create point map!");
666 throw std::runtime_error(
"Disparity map must have 12-bit pixel format!");
670 writePlyFile(file,
reinterpret_cast<unsigned short*
>(imageSet.
getPixelData(indexDisp)),
671 (indexImg == -1) ?
nullptr : imageSet.
getPixelData(indexImg),