From aacc4d7278a62ad226a4aa3556df84ee42d59fc6 Mon Sep 17 00:00:00 2001 From: "Dileep V. Reddy" Date: Mon, 9 Sep 2024 19:24:15 -0600 Subject: [PATCH] PointToPseudoPitch() was being called even when viewpoint was not allowed out of bounds. And its sqrt was slow even when it was necessary (thanks dpjudas for the speedup code). --- src/rendering/hwrenderer/scene/hw_bsp.cpp | 61 +++++++++++-------- src/rendering/hwrenderer/scene/hw_clipper.cpp | 15 +++++ 2 files changed, 50 insertions(+), 26 deletions(-) diff --git a/src/rendering/hwrenderer/scene/hw_bsp.cpp b/src/rendering/hwrenderer/scene/hw_bsp.cpp index fafa680d47f..0904a0207a6 100644 --- a/src/rendering/hwrenderer/scene/hw_bsp.cpp +++ b/src/rendering/hwrenderer/scene/hw_bsp.cpp @@ -324,25 +324,28 @@ void HWDrawInfo::AddLine (seg_t *seg, bool portalclip) return; } - auto &clipperv = *vClipper; - angle_t startPitch = clipperv.PointToPseudoPitch(seg->v1->fX(), seg->v1->fY(), currentsector->floorplane.ZatPoint(seg->v1)); - angle_t endPitch = clipperv.PointToPseudoPitch(seg->v1->fX(), seg->v1->fY(), currentsector->ceilingplane.ZatPoint(seg->v1)); - angle_t startPitch2 = clipperv.PointToPseudoPitch(seg->v2->fX(), seg->v2->fY(), currentsector->floorplane.ZatPoint(seg->v2)); - angle_t endPitch2 = clipperv.PointToPseudoPitch(seg->v2->fX(), seg->v2->fY(), currentsector->ceilingplane.ZatPoint(seg->v2)); - angle_t temp; - // Wall can be tilted from viewpoint perspective. Find vertical extent on screen in psuedopitch units (0 to 2, bottom to top) - if(int(startPitch) > int(startPitch2)) // Handle zero crossing + if (Viewpoint.IsAllowedOoB()) // No need for vertical clipping if viewpoint not allowed out of bounds { - temp = startPitch; startPitch = startPitch2; startPitch2 = temp; // exchange - } - if(int(endPitch) > int(endPitch2)) // Handle zero crossing - { - temp = endPitch; endPitch = endPitch2; endPitch2 = temp; // exchange - } + auto &clipperv = *vClipper; + angle_t startPitch = clipperv.PointToPseudoPitch(seg->v1->fX(), seg->v1->fY(), currentsector->floorplane.ZatPoint(seg->v1)); + angle_t endPitch = clipperv.PointToPseudoPitch(seg->v1->fX(), seg->v1->fY(), currentsector->ceilingplane.ZatPoint(seg->v1)); + angle_t startPitch2 = clipperv.PointToPseudoPitch(seg->v2->fX(), seg->v2->fY(), currentsector->floorplane.ZatPoint(seg->v2)); + angle_t endPitch2 = clipperv.PointToPseudoPitch(seg->v2->fX(), seg->v2->fY(), currentsector->ceilingplane.ZatPoint(seg->v2)); + angle_t temp; + // Wall can be tilted from viewpoint perspective. Find vertical extent on screen in psuedopitch units (0 to 2, bottom to top) + if(int(startPitch) > int(startPitch2)) // Handle zero crossing + { + temp = startPitch; startPitch = startPitch2; startPitch2 = temp; // exchange + } + if(int(endPitch) > int(endPitch2)) // Handle zero crossing + { + temp = endPitch; endPitch = endPitch2; endPitch2 = temp; // exchange + } - if (!clipperv.SafeCheckRange(startPitch, endPitch2)) - { - return; + if (!clipperv.SafeCheckRange(startPitch, endPitch2)) + { + return; + } } if (!r_radarclipper || (Level->flags3 & LEVEL3_NOFOGOFWAR) || clipperr.SafeCheckRange(startAngleR, endAngleR)) @@ -734,7 +737,7 @@ void HWDrawInfo::DoSubsector(subsector_t * sub) int count = sub->numlines; seg_t * seg = sub->firstline; bool anglevisible = false; - bool pitchvisible = false; + bool pitchvisible = !(Viewpoint.IsAllowedOoB()); // No vertical clipping if viewpoint is not allowed out of bounds bool radarvisible = false; angle_t pitchtemp; angle_t pitchmin = ANGLE_90; @@ -751,15 +754,21 @@ void HWDrawInfo::DoSubsector(subsector_t * sub) angle_t endAngleR = clipperr.PointToPseudoAngle(seg->v1->fX(), seg->v1->fY()); if (startAngleR-endAngleR >= ANGLE_180) radarvisible |= (clipperr.SafeCheckRange(startAngleR, endAngleR) || (Level->flags3 & LEVEL3_NOFOGOFWAR) || ((sub->flags & SSECMF_DRAWN) && !deathmatch)); - pitchmin = clipperv.PointToPseudoPitch(seg->v1->fX(), seg->v1->fY(), sector->floorplane.ZatPoint(seg->v1)); - pitchmax = clipperv.PointToPseudoPitch(seg->v1->fX(), seg->v1->fY(), sector->ceilingplane.ZatPoint(seg->v1)); - pitchvisible |= clipperv.SafeCheckRange(pitchmin, pitchmax); + if (!pitchvisible) + { + pitchmin = clipperv.PointToPseudoPitch(seg->v1->fX(), seg->v1->fY(), sector->floorplane.ZatPoint(seg->v1)); + pitchmax = clipperv.PointToPseudoPitch(seg->v1->fX(), seg->v1->fY(), sector->ceilingplane.ZatPoint(seg->v1)); + pitchvisible |= clipperv.SafeCheckRange(pitchmin, pitchmax); + } if (pitchvisible && anglevisible && radarvisible) break; - pitchtemp = clipperv.PointToPseudoPitch(seg->v2->fX(), seg->v2->fY(), sector->floorplane.ZatPoint(seg->v2)); - if (int(pitchmin) > int(pitchtemp)) pitchmin = pitchtemp; - pitchtemp = clipperv.PointToPseudoPitch(seg->v2->fX(), seg->v2->fY(), sector->ceilingplane.ZatPoint(seg->v2)); - if (int(pitchmax) < int(pitchtemp)) pitchmax = pitchtemp; - pitchvisible |= clipperv.SafeCheckRange(pitchmin, pitchmax); + if (!pitchvisible) + { + pitchtemp = clipperv.PointToPseudoPitch(seg->v2->fX(), seg->v2->fY(), sector->floorplane.ZatPoint(seg->v2)); + if (int(pitchmin) > int(pitchtemp)) pitchmin = pitchtemp; + pitchtemp = clipperv.PointToPseudoPitch(seg->v2->fX(), seg->v2->fY(), sector->ceilingplane.ZatPoint(seg->v2)); + if (int(pitchmax) < int(pitchtemp)) pitchmax = pitchtemp; + pitchvisible |= clipperv.SafeCheckRange(pitchmin, pitchmax); + } if (pitchvisible && anglevisible && radarvisible) break; } seg++; diff --git a/src/rendering/hwrenderer/scene/hw_clipper.cpp b/src/rendering/hwrenderer/scene/hw_clipper.cpp index 94713f107d5..6b256851b4f 100644 --- a/src/rendering/hwrenderer/scene/hw_clipper.cpp +++ b/src/rendering/hwrenderer/scene/hw_clipper.cpp @@ -39,6 +39,11 @@ #include "g_levellocals.h" #include "basics.h" +#if defined(__SSE2__) || defined(_M_X64) +#include +#define USE_SSE2 +#endif + unsigned Clipper::starttime; Clipper::Clipper() @@ -440,7 +445,17 @@ angle_t Clipper::PointToPseudoPitch(double x, double y, double z) } else { +#ifdef USE_SSE2 + __m128 mvecx = _mm_set_ss(vecx); + __m128 mvecy = _mm_set_ss(vecy); + __m128 mvecz = _mm_set_ss(vecz); + __m128 dot = _mm_add_ss(_mm_mul_ss(mvecx, mvecx), _mm_mul_ss(mvecy, mvecy)); + __m128 notsignbit = _mm_castsi128_ps(_mm_cvtsi32_si128(~(1 << 31))); + __m128 fabsvecz = _mm_and_ps(mvecz, notsignbit); + double result = _mm_cvtss_f32((_mm_div_ss(mvecz, _mm_add_ss(_mm_sqrt_ss(dot), fabsvecz)))); +#else double result = vecz / (g_sqrt(vecx*vecx + vecy*vecy) + fabs(vecz)); // -ffast-math compile flag applies to this file, yes? +#endif if ((vecx * viewpoint->TanCos + vecy * viewpoint->TanSin) <= 0.0) // Point is behind viewpoint { result = 2.0 - result;