gaussian splatting 中的数学公式

这里主要标记一部分cuda代码,和一些参数的说明。

参考资料

https://zhuanlan.zhihu.com/p/680669616

EWA Splatting

3D Gaussian Splatting for Real-Time Radiance Field Rendering

splatting 的流程

Gaussian 球表示的三维模型,具有颜色(球谐参数),世界系下的位置 means3D ,姿态 rotations(四元数), 沿着轴的尺度 scales。渲染过程会先计算每个 Gaussian 球在像素平面下的投影,计算投影的覆盖区域,然后建立每个像素点和覆盖它的高斯球的索引,最后对每个像素点进行渲染的计算过程。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
// Forward rendering procedure for differentiable rasterization
// of Gaussians.
int CudaRasterizer::Rasterizer::forward(
std::function<char* (size_t)> geometryBuffer,
std::function<char* (size_t)> binningBuffer,
std::function<char* (size_t)> imageBuffer,
const int P, //高斯球的数量
int D, //球谐函数展开的维度
int M,
const float* background,
const int width, int height,
const float* means3D,
const float* shs, //球谐系数,每个高斯的系数数量和 D 有关
const float* colors_precomp,
const float* opacities,
const float* scales,
const float scale_modifier,
const float* rotations,
const float* transMat_precomp,
const float* viewmatrix,
const float* projmatrix,
const float* cam_pos,
const float tan_fovx, float tan_fovy,
const bool prefiltered,
float* out_color,
float* out_others,
int* radii,
bool debug)

focal

1
2
3
// int CudaRasterizer::Rasterizer::forward(
const float focal_y = height / (2.0f * tan_fovy);
const float focal_x = width / (2.0f * tan_fovx);

forward 调用 preprocess

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
__global__ void preprocessCUDA(int P, int D, int M,
const float* orig_points,
const glm::vec2* scales,
const float scale_modifier,
const glm::vec4* rotations,
const float* opacities,
const float* shs,
bool* clamped,
const float* transMat_precomp,
const float* colors_precomp,
const float* viewmatrix,
const float* projmatrix,
const glm::vec3* cam_pos,
const int W, int H,
const float tan_fovx, const float tan_fovy,
const float focal_x, const float focal_y,
int* radii,
float2* points_xy_image,
float* depths,
float* transMats,
float* rgb,
float4* normal_opacity,
const dim3 grid,
uint32_t* tiles_touched,
bool prefiltered)

preprocess 首先检查高斯球是否过近,以及计算在NDC 系下的坐标?

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
	// if (!in_frustum(idx, orig_points, viewmatrix, projmatrix, prefiltered, p_view))
// return;
__forceinline__ __device__ bool in_frustum(int idx,
const float* orig_points,
const float* viewmatrix,
const float* projmatrix,
bool prefiltered,
float3& p_view)
{
float3 p_orig = { orig_points[3 * idx], orig_points[3 * idx + 1], orig_points[3 * idx + 2] };

// Bring points to screen space
float4 p_hom = transformPoint4x4(p_orig, projmatrix);
float p_w = 1.0f / (p_hom.w + 0.0000001f);
float3 p_proj = { p_hom.x * p_w, p_hom.y * p_w, p_hom.z * p_w };
p_view = transformPoint4x3(p_orig, viewmatrix);

if (p_view.z <= 0.2f)
{
if (prefiltered)
{
printf("Point is filtered although prefiltered is set. This shouldn't happen!");
__trap();
}
return false;
}
return true;
}
1
2
3
4
5
6
7
8
9
__forceinline__ __device__ float3 transformPoint4x3(const float3& p, const float* matrix)
{
float3 transformed = {
matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z + matrix[12],
matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z + matrix[13],
matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z + matrix[14],
};
return transformed;
}

pview=(100001000010)(m0m4m8m12m1m5m9m13m2m6m10m14m3m7m11m15)(p0p1p21)p_{view}=\begin{pmatrix}1&0&0&0\\0&1&0&0\\0&0&1&0 \end{pmatrix}\begin{pmatrix}m0&m4&m8&m12 \\m1&m5&m9&m13\\m2&m6&m10&m14\\m3&m7&m11&m15\end{pmatrix} \begin{pmatrix}p0\\p1\\p2\\1\end{pmatrix}

这里的projmatrix是什么呢?一路往外找。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
def setup_camera(w, h, k, w2c, near=0.01, far=100):
fx, fy, cx, cy = k[0][0], k[1][1], k[0][2], k[1][2]
w2c = torch.tensor(w2c).cuda().float()
cam_center = torch.inverse(w2c)[:3, 3]
w2c = w2c.unsqueeze(0).transpose(1, 2)
opengl_proj = torch.tensor([[2 * fx / w, 0.0, -(w - 2 * cx) / w, 0.0],
[0.0, 2 * fy / h, -(h - 2 * cy) / h, 0.0],
[0.0, 0.0, far / (far - near), -(far * near) / (far - near)],
[0.0, 0.0, 1.0, 0.0]]).cuda().float().unsqueeze(0).transpose(1, 2)
full_proj = w2c.bmm(opengl_proj)
cam = Camera(
image_height=h,
image_width=w,
tanfovx=w / (2 * fx),
tanfovy=h / (2 * fy),
bg=torch.tensor([0, 0, 0], dtype=torch.float32, device="cuda"),
scale_modifier=1.0,
viewmatrix=w2c,
projmatrix=full_proj,
sh_degree=0,
campos=cam_center,
prefiltered=False
)
return cam