From 2d24a50191d711dec08d13dcc2534fe8a008f7f3 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Tue, 22 Oct 2024 11:54:19 +0200 Subject: [PATCH] Add support for cacheless FPU configurations --- src/main/scala/vexiiriscv/Param.scala | 1 + .../execute/lsu/LsuCachelessPlugin.scala | 49 ++++++++++++++----- .../vexiiriscv/execute/lsu/LsuPlugin.scala | 8 +-- .../vexiiriscv/test/WhiteboxerPlugin.scala | 7 ++- .../scala/vexiiriscv/tester/Regression.scala | 2 +- 5 files changed, 48 insertions(+), 19 deletions(-) diff --git a/src/main/scala/vexiiriscv/Param.scala b/src/main/scala/vexiiriscv/Param.scala index b4fd8490..1610eaee 100644 --- a/src/main/scala/vexiiriscv/Param.scala +++ b/src/main/scala/vexiiriscv/Param.scala @@ -458,6 +458,7 @@ class ParamSimple(){ opt[Unit]("stressed-src") action { (v, c) => relaxedSrc = false } opt[Unit]("with-mul") unbounded() action { (v, c) => withMul = true } opt[Unit]("with-div") unbounded() action { (v, c) => withDiv = true } + opt[Unit]("with-rvm") action { (v, c) => withMul = true; withDiv = true } opt[Unit]("with-rva") action { (v, c) => withRva = true } opt[Unit]("with-rvf") action { (v, c) => withRvf = true } opt[Unit]("with-rvd") action { (v, c) => withRvd = true; withRvf = true } diff --git a/src/main/scala/vexiiriscv/execute/lsu/LsuCachelessPlugin.scala b/src/main/scala/vexiiriscv/execute/lsu/LsuCachelessPlugin.scala index d4efd0fe..8847ff3a 100644 --- a/src/main/scala/vexiiriscv/execute/lsu/LsuCachelessPlugin.scala +++ b/src/main/scala/vexiiriscv/execute/lsu/LsuCachelessPlugin.scala @@ -4,7 +4,7 @@ import spinal.core._ import spinal.lib._ import spinal.lib.misc.plugin.FiberPlugin import vexiiriscv.{Global, riscv} -import vexiiriscv.riscv.{CSR, Const, IntRegFile, MicroOp, RS1, RS2, Riscv, Rvi} +import vexiiriscv.riscv.{CSR, Const, FloatRegFile, IntRegFile, MicroOp, RS1, RS2, Riscv, Rvi} import AguPlugin._ import spinal.core.fiber.{Handle, Retainer} import spinal.core.sim.SimDataPimper @@ -12,7 +12,7 @@ import vexiiriscv.decode.Decode import vexiiriscv.fetch.FetchPipelinePlugin import vexiiriscv.memory.{AddressTranslationPortUsage, AddressTranslationService, DBusAccessService, PmaLoad, PmaLogic, PmaPort, PmaStore} import vexiiriscv.misc.{AddressToMask, LsuTriggerService, PerformanceCounterService, TrapArg, TrapReason, TrapService} -import vexiiriscv.riscv.Riscv.{LSLEN, XLEN} +import vexiiriscv.riscv.Riscv.{FLEN, LSLEN, XLEN} import spinal.lib.misc.pipeline._ import spinal.lib.system.tag.PmaRegion import vexiiriscv.decode.Decode.{INSTRUCTION_SLICE_COUNT_WIDTH, UOP} @@ -52,13 +52,14 @@ class LsuCachelessPlugin(var layer : LaneLayer, val logic = during setup new Area{ val elp = host.find[ExecuteLanePlugin](_ == layer.lane) val ifp = host.find[IntFormatPlugin](_.lane == layer.lane) + val fpwbp = host.findOption[WriteBackPlugin](p => p.lane == layer.lane && p.rf == FloatRegFile) val srcp = host.find[SrcPlugin](_.layer == layer) val ats = host[AddressTranslationService] val ts = host[TrapService] val ss = host[ScheduleService] val buildBefore = retains(elp.pipelineLock, ats.portsLock) val atsStorageLock = retains(ats.storageLock) - val retainer = retains(elp.uopLock, srcp.elaborationLock, ifp.elaborationLock, ts.trapLock, ss.elaborationLock) + val retainer = retains(List(elp.uopLock, srcp.elaborationLock, ifp.elaborationLock, ts.trapLock, ss.elaborationLock) ++ fpwbp.map(_.elaborationLock)) awaitBuild() Riscv.RVA.set(withAmo) @@ -69,17 +70,22 @@ class LsuCachelessPlugin(var layer : LaneLayer, val flushPort = ss.newFlushPort(layer.lane.getExecuteAge(forkAt), laneAgeWidth = Execute.LANE_AGE_WIDTH, withUopId = true) val frontend = new AguFrontend(layer, host) - // IntFormatPlugin specification val iwb = ifp.access(wbAt) + val fpwb = fpwbp.map(_.createPort(wbAt)) val amos = Riscv.RVA.get.option(frontend.amos.uops).toList.flatten for(load <- frontend.writingRf ++ amos){ - val spec = Rvi.loadSpec(load) val op = layer(load) - ifp.addMicroOp(iwb, op) - spec.signed match { - case false => ifp.zeroExtend(iwb, op, spec.width) - case true => ifp.signExtend(iwb, op, spec.width) + + Rvi.loadSpec.get(load) match { + case Some(spec) => + ifp.addMicroOp(iwb, op) + spec.signed match { + case false => ifp.zeroExtend(iwb, op, spec.width) + case true => ifp.signExtend(iwb, op, spec.width) + } + case None => } + op.mayFlushUpTo(forkAt) // page fault / trap withSpeculativeLoadFlush match { case true => @@ -87,6 +93,12 @@ class LsuCachelessPlugin(var layer : LaneLayer, } } + fpwbp.foreach(_.addMicroOp(fpwb.get, layer, frontend.writeRfFloat)) + for(fp <- frontend.writeRfFloat) { + val spec = layer(fp) + spec.setCompletion(wbAt) + } + for(store <- frontend.writingMem ++ amos){ val op = layer(store) op.mayFlushUpTo(forkAt) @@ -118,7 +130,12 @@ class LsuCachelessPlugin(var layer : LaneLayer, accessRetainer.await() val onFirst = new elp.Execute(0){ - val WRITE_DATA = insert(up(elp(IntRegFile, riscv.RS2))) //Workaround for op.addRsSpec(RS2, 0) (TODO) + val WRITE_DATA = Payload(Bits(LSLEN bits)) + WRITE_DATA.assignDontCare() + WRITE_DATA(0, XLEN bits) := up(elp(IntRegFile, riscv.RS2)) //Workaround for op.addRsSpec(RS2, 0) (TODO) ? + if(Riscv.withFpu) when(FLOAT){ + WRITE_DATA(0, FLEN bits) := up(elp(FloatRegFile, riscv.RS2)) + } } val onAddress = new addressCtrl.Area{ @@ -361,13 +378,21 @@ class LsuCachelessPlugin(var layer : LaneLayer, rspShifted(i * 8, 8 bits) := src.read(sel) } - iwb.valid := SEL - iwb.payload := rspShifted + iwb.valid := SEL && !FLOAT + iwb.payload := rspShifted.resized if (withAmo) when(ATOMIC && !LOAD) { iwb.payload(0) := onJoin.SC_MISS iwb.payload(7 downto 1) := 0 } + + fpwb.foreach{p => + p.valid := SEL && FLOAT + p.payload := rspShifted.resized + if(Riscv.RVD) when(SIZE === 2) { + p.payload(63 downto 32).setAll() + } + } } buildBefore.release() diff --git a/src/main/scala/vexiiriscv/execute/lsu/LsuPlugin.scala b/src/main/scala/vexiiriscv/execute/lsu/LsuPlugin.scala index 498cf381..6b85b900 100644 --- a/src/main/scala/vexiiriscv/execute/lsu/LsuPlugin.scala +++ b/src/main/scala/vexiiriscv/execute/lsu/LsuPlugin.scala @@ -14,7 +14,7 @@ import vexiiriscv.decode.Decode import vexiiriscv.decode.Decode.UOP import vexiiriscv.memory.{AddressTranslationPortUsage, AddressTranslationService, DBusAccessService, PmaLoad, PmaLogic, PmaPort, PmaStore} import vexiiriscv.misc.{AddressToMask, LsuTriggerService, PerformanceCounterService, TrapArg, TrapReason, TrapService} -import vexiiriscv.riscv.Riscv.{LSLEN, XLEN} +import vexiiriscv.riscv.Riscv.{FLEN, LSLEN, XLEN} import vexiiriscv.riscv._ import vexiiriscv.schedule.{DispatchPlugin, ScheduleService} import vexiiriscv.{Global, riscv} @@ -478,10 +478,10 @@ class LsuPlugin(var layer : LaneLayer, val lsuTrap = False val writeData = Bits(Riscv.LSLEN bits) - writeData := elp(IntRegFile, riscv.RS2).resized + writeData.assignDontCare() + writeData(0, XLEN bits) := up(elp(IntRegFile, riscv.RS2)) if(Riscv.withFpu) when(FLOAT){ - val value = elp(FloatRegFile, riscv.RS2) - writeData(value.bitsRange) := value + writeData(0, FLEN bits) := up(elp(FloatRegFile, riscv.RS2)) } val scMiss = Bool() diff --git a/src/main/scala/vexiiriscv/test/WhiteboxerPlugin.scala b/src/main/scala/vexiiriscv/test/WhiteboxerPlugin.scala index 89fa3f64..9cdd38e5 100644 --- a/src/main/scala/vexiiriscv/test/WhiteboxerPlugin.scala +++ b/src/main/scala/vexiiriscv/test/WhiteboxerPlugin.scala @@ -161,7 +161,10 @@ class WhiteboxerPlugin(withOutputs : Boolean) extends FiberPlugin{ uopId := c(Decode.UOP_ID) size := c(AguPlugin.SIZE).resized address := c(p.logic.tpk.TRANSLATED) - data := host.find[IntFormatPlugin](_.lane == p.layer.lane).logic.stages.find(_.ctrlLink == c.ctrlLink).get.wb.payload + data := host.find[IntFormatPlugin](_.lane == p.layer.lane).logic.stages.find(_.ctrlLink == c.ctrlLink).get.wb.payload.resized + if(p.logic.fpwb.nonEmpty) when(p.logic.fpwb.get.valid){ + data := p.logic.fpwb.get.payload.asSInt.resize(widthOf(data)).asBits.resized + } }) @@ -174,7 +177,7 @@ class WhiteboxerPlugin(withOutputs : Boolean) extends FiberPlugin{ address := c(LsuL1.PHYSICAL_ADDRESS) data := host.find[IntFormatPlugin](_.lane == p.layer.lane).logic.stages.find(_.ctrlLink == c.ctrlLink).get.wb.payload.resized if(p.logic.fpwb.nonEmpty) when(p.logic.fpwb.get.valid){ - data := p.logic.fpwb.get.payload.asSInt.resize(widthOf(data)).asBits + data := p.logic.fpwb.get.payload.asSInt.resize(widthOf(data)).asBits.resized } }) } diff --git a/src/test/scala/vexiiriscv/tester/Regression.scala b/src/test/scala/vexiiriscv/tester/Regression.scala index bf340b6d..9e8f2238 100644 --- a/src/test/scala/vexiiriscv/tester/Regression.scala +++ b/src/test/scala/vexiiriscv/tester/Regression.scala @@ -623,7 +623,7 @@ class Regression extends MultithreadedFunSuite(sys.env.getOrElse("VEXIIRISCV_REG addDim("btbParam", List("--btb-sets 512 --btb-hash-width 16", "--btb-sets 128 --btb-hash-width 6")) dimensions += new Dimensions[ParamSimple]("fpu") { override def getRandomPosition(state : ParamSimple, random: Random): String = { - if(!state.lsuL1Enable) return "" //Don't support the FPU yet TODO + if(!state.withMul || state.withDiv) return "" return List("", "--with-rvf", "--with-rvf --with-rvd").randomPick(random) } }