diff --git a/crates/xenia-cpu/src/interpreter.rs b/crates/xenia-cpu/src/interpreter.rs
index 3bb3d56..a205dcf 100644
--- a/crates/xenia-cpu/src/interpreter.rs
+++ b/crates/xenia-cpu/src/interpreter.rs
@@ -2982,16 +2982,21 @@ fn execute(ctx: &mut PpcContext, mem: &dyn MemoryAccess, instr: &DecodedInstr) -
             ctx.pc += 4;
         }
         PpcOpcode::stvewx128 => {
-            let ea = ea_indexed(ctx, instr) & !0xF;
-            // TODO PPCBUG-510: stvewx128 currently writes 16 bytes at ea & !0xF; the EA scope is
-            // wrong (should be word-aligned, 4 bytes only). When P3 fixes EA, this invalidate's
-            // range narrows automatically.
-            // PPCBUG-512: stvewx128 was missing invalidate_for_write.
+            // Mirror of stvewx: word-align EA, extract one 32-bit lane, write 4 bytes only.
+            // Previous code used & !0xF (16-byte) and wrote all 16 bytes, corrupting 12
+            // adjacent bytes on every execution (PPCBUG-510).
+            let ea_unaligned = ea_indexed(ctx, instr);
+            let ea = ea_unaligned & !0x3u32;
             if let Some(t) = ctx.reservation_table.as_ref().filter(|t| t.is_enabled()) {
                 if t.has_active_reservers() { t.invalidate_for_write(ea); }
             }
+            let slot = ((ea_unaligned & 0xF) >> 2) as usize;
             let bytes = ctx.vr[instr.vs128()].as_bytes();
-            for i in 0..16 { mem.write_u8(ea + i as u32, bytes[i]); }
+            let w = ((bytes[slot * 4] as u32) << 24)
+                  | ((bytes[slot * 4 + 1] as u32) << 16)
+                  | ((bytes[slot * 4 + 2] as u32) << 8)
+                  | (bytes[slot * 4 + 3] as u32);
+            mem.write_u32(ea, w);
             ctx.pc += 4;
         }
 
@@ -6463,4 +6468,47 @@ mod tests {
         assert_eq!(r[2], 0x3333_3333, "pack=1 shift=3: lane 2 from prev");
         assert_eq!(r[3], 0x4444_4444, "pack=1 shift=3: lane 3 from prev");
     }
+
+    // ---- PPCBUG-510: stvewx128 should write one word (4 bytes), not 16 ----
+
+    fn encode_stvewx128(vs_lo: u32, ra: u32, rb: u32) -> u32 {
+        // stvewx128 is a VMX128 load/store at op6=4.
+        // decode_op4 key1 = (bits21-27 << 4) | bits30-31 = 0b00110000011 for stvewx128.
+        // bits21-27 = 0b0011000 (host bits 10-4), bits30-31 = 0b11 (host bits 1-0).
+        // VS128[4:0] at host bits 25-21; RA at host bits 20-16; RB at host bits 15-11.
+        // VS128[5] at host bit 3 (PPC bit 28); VS128[6] at host bit 1 (PPC bit 30).
+        (4u32 << 26)
+            | (vs_lo << 21)     // VS128[4:0]
+            | (ra << 16)        // RA
+            | (rb << 11)        // RB
+            | (0b0011000 << 4)  // bits 21-27 of key1 pattern
+            | 0b11              // bits 30-31 of key1 pattern
+    }
+
+    #[test]
+    fn stvewx128_writes_one_word_at_word_aligned_ea() {
+        // PPCBUG-510: old code wrote all 16 bytes at ea & !0xF, corrupting 12 adjacent bytes.
+        // Fix: word-align EA, extract lane from (ea & 0xF) >> 2, write 4 bytes only.
+        let mut ctx = PpcContext::new();
+        let mem = TestMem::new();
+        // VS128 = v96 (vs_lo=0 | key bits → vs128=0 since key bits 21-27 set bit4=1 and bit5=1
+        // in the key, but vs128 uses bits 6-10 for low 5 bits).
+        // Actually: vs128 uses decode bits 6-10 (host 25-21) and bits 21,22 (host 10,9).
+        // encode_stvewx128 sets vs_lo in bits 25-21 and key bits at bits 10-4.
+        // vs128 = bits6-10 | (bit21<<5) | (bit22<<6) = vs_lo | 0 | 0 = vs_lo.
+        // So vs128 = vs_lo. We'll use vs_lo=3 → vs128=3.
+        let raw = encode_stvewx128(3, 1, 2);
+        ctx.vr[3] = xenia_types::Vec128::from_u32x4(0x1111_1111, 0x2222_2222, 0x3333_3333, 0x4444_4444);
+        ctx.gpr[1] = 0x1000; // base
+        ctx.gpr[2] = 0x008;  // offset → EA = 0x1008 → word-aligned EA = 0x1008, slot = (0x8 & 0xF)>>2 = 2
+        write_instr(&mem, 0, raw);
+        ctx.pc = 0;
+        step(&mut ctx, &mem);
+        assert_eq!(ctx.pc, 4, "PC must advance");
+        // Slot 2 → lane 2 = 0x3333_3333
+        assert_eq!(mem.read_u32(0x1008), 0x3333_3333, "only lane 2 word at ea");
+        // Adjacent words must be untouched (mem is zero-init)
+        assert_eq!(mem.read_u32(0x1000), 0x0000_0000, "byte below must be untouched");
+        assert_eq!(mem.read_u32(0x100C), 0x0000_0000, "byte above must be untouched");
+    }
 }