Playing on the Nintendo Switch with a generic USB-HID controller (specifically, a fake PS3 controller)

Update: You can also make this work with real PS3 controllers

In a previous post, we did some warmups — playing MSX games using a fake PS3 controller. In this post, we’ll be using GP2040-CE to control a Nintendo Switch. (No analog stick support implemented at the moment, but it wouldn’t be hard.) At the time of writing, most of the code to do this is already there! And I’m sure there will be support for USB-HID controllers in no time, so this post will probably be outdated soon.
Update: Analog is implemented too, and the diff below has been updated.

Anyway, you just need to put GP2040-CE on your Pico and get into the web configuration. In add-ons, you enable keyboard support, and then set up the “Keyboard Host Configuration”, which looks like this:

GP2040-CE keyboard mapping and other configuration

Then you can connect a generic USB keyboard to the Raspberry Pi Pico, and connect the Pico to the Nintendo Switch. (For electrical reasons, I do not recommend setting a pin for 5V power here, and just putting the host USB +5 on the VBUS pin of the Pico.)

Things that could come in handy: Breadboard, Raspberry Pi Pico, USB port that can be connected to one of the Pico’s GPIO pins

If everything works and you can control Sonic using your keyboard, great, you can move on to the next step! If it didn’t work, it probably won’t magically get better from here on out, so make sure to check those connections. The green/white wires can be D+/D- or D-/D+!

Now we’ll perform a small modification to the existing code. I’m basing my work on commit 961c49d5b969ee749ae17bd4cbb2f0bad2380e71. Beware, this may or may not work with your controller. I’d recommend taking a look at the above-mentioned previous post where we modify a Pico-PIO-USB example and to check if your controller behaves the same way. I have only two controllers to test with, and I only tested with one! Anyway, here’s the diff:

diff --git a/headers/addons/keyboard_host.h b/headers/addons/keyboard_host.h
index af9c61b..74fb628 100644
--- a/headers/addons/keyboard_host.h
+++ b/headers/addons/keyboard_host.h
@@ -53,6 +53,7 @@ private:
 	bool _keyboard_host_enabled;
 	uint8_t getKeycodeFromModifier(uint8_t modifier);
 	void process_kbd_report(uint8_t dev_addr, hid_keyboard_report_t const *report);
+	void process_usb_gamepad_report(uint8_t dev_addr, const uint8_t *report);
 	GamepadState _keyboard_host_state;
 	KeyboardButtonMapping _keyboard_host_mapDpadUp;
 	KeyboardButtonMapping _keyboard_host_mapDpadDown;
@@ -74,4 +75,4 @@ private:
 	KeyboardButtonMapping _keyboard_host_mapButtonA2;
 };
 
-#endif  // _KeyboardHost_H_
\ No newline at end of file
+#endif  // _KeyboardHost_H_
diff --git a/src/addons/keyboard_host.cpp b/src/addons/keyboard_host.cpp
index a5294e9..8f59f4a 100644
--- a/src/addons/keyboard_host.cpp
+++ b/src/addons/keyboard_host.cpp
@@ -63,12 +63,15 @@ void KeyboardHostAddon::setup() {
 
 void KeyboardHostAddon::preprocess() {
   Gamepad *gamepad = Storage::getInstance().GetGamepad();
+  gamepad->setDpadMode(DpadMode::DPAD_MODE_DIGITAL);
+  gamepad->hasLeftAnalogStick = true;
+  gamepad->hasRightAnalogStick = true;
   gamepad->state.dpad     |= _keyboard_host_state.dpad;
   gamepad->state.buttons  |= _keyboard_host_state.buttons;
-  gamepad->state.lx       |= _keyboard_host_state.lx;
-  gamepad->state.ly       |= _keyboard_host_state.ly;
-  gamepad->state.rx       |= _keyboard_host_state.rx;
-  gamepad->state.ry       |= _keyboard_host_state.ry;
+  gamepad->state.lx       = _keyboard_host_state.lx;
+  gamepad->state.ly       = _keyboard_host_state.ly;
+  gamepad->state.rx       = _keyboard_host_state.rx;
+  gamepad->state.ry       = _keyboard_host_state.ry;
   gamepad->state.lt       |= _keyboard_host_state.lt;
   gamepad->state.rt       |= _keyboard_host_state.rt;
 }
@@ -89,10 +92,11 @@ void KeyboardHostAddon::report_received(uint8_t dev_addr, uint8_t instance, uint
   uint8_t const itf_protocol = tuh_hid_interface_protocol(dev_addr, instance);
 
   // tuh_hid_report_received_cb() will be invoked when report is available
-  if (itf_protocol != HID_ITF_PROTOCOL_KEYBOARD)
-    return;
-
-  process_kbd_report(dev_addr, (hid_keyboard_report_t const*) report );
+  if (itf_protocol == HID_ITF_PROTOCOL_KEYBOARD) {
+    process_kbd_report(dev_addr, (hid_keyboard_report_t const*) report );
+  } else {
+    process_usb_gamepad_report(dev_addr, report);
+  }
 }
 
 uint8_t KeyboardHostAddon::getKeycodeFromModifier(uint8_t modifier) {
@@ -161,4 +165,96 @@ void KeyboardHostAddon::process_kbd_report(uint8_t dev_addr, hid_keyboard_report
         _keyboard_host_state.rt = 0;
     }
   }
-}
\ No newline at end of file
+}
+
+// 0x1: L2
+// 0x2: R2
+// 0x4: L1
+// 0x8: R1
+// 0x10: Triangle
+// 0x20: Circle
+// 0x40: X
+// 0x80: Square?
+// 0x100: ?
+// 0x200: ?
+// 0x400: R3
+// 0x800: Start
+// 0x1000: Up
+// 0x2000: Right
+// 0x4000: Down?
+// 0x8000: Left?
+
+#define BUTTON_L2 0x1
+#define BUTTON_R2 0x2
+#define BUTTON_L1 0x4
+#define BUTTON_R1 0x8
+#define BUTTON_SQUARE 0x10
+#define BUTTON_CROSS 0x20
+#define BUTTON_CIRCLE 0x40
+#define BUTTON_TRIANGLE 0x80
+
+#define BUTTON_SELECT 0x100 // ?
+#define BUTTON_L3 0x200 // ?
+#define BUTTON_R3 0x400 // ?
+
+#define BUTTON_START 0x800
+#define BUTTON_UP 0x1000
+#define BUTTON_RIGHT 0x2000
+#define BUTTON_DOWN 0x4000
+#define BUTTON_LEFT 0x8000
+
+void KeyboardHostAddon::process_usb_gamepad_report(uint8_t dev_addr, const uint8_t *report)
+{
+  _keyboard_host_state.dpad = 0;
+  _keyboard_host_state.buttons = 0;
+  _keyboard_host_state.lx = GAMEPAD_JOYSTICK_MID;
+  _keyboard_host_state.ly = GAMEPAD_JOYSTICK_MID;
+  _keyboard_host_state.rx = GAMEPAD_JOYSTICK_MID;
+  _keyboard_host_state.ry = GAMEPAD_JOYSTICK_MID;
+  _keyboard_host_state.lt = 0;
+  _keyboard_host_state.rt = 0;
+
+  uint16_t button_state = report[2] << 8 | report[3];
+  uint8_t left_analog_x = report[6];
+  uint8_t left_analog_y = report[7];
+  uint8_t right_analog_x = report[8];
+  uint8_t right_analog_y = report[9];
+
+  const GamepadOptions& gamepadOptions = Storage::getInstance().getGamepadOptions();
+
+  _keyboard_host_state.dpad |=
+            ((button_state & BUTTON_UP)    ? (gamepadOptions.invertYAxis ? _keyboard_host_mapDpadDown.buttonMask : _keyboard_host_mapDpadUp.buttonMask) : _keyboard_host_state.dpad)
+          | ((button_state & BUTTON_DOWN)  ? (gamepadOptions.invertYAxis ? _keyboard_host_mapDpadUp.buttonMask : _keyboard_host_mapDpadDown.buttonMask) : _keyboard_host_state.dpad)
+          | ((button_state & BUTTON_LEFT)  ? _keyboard_host_mapDpadLeft.buttonMask  : _keyboard_host_state.dpad)
+          | ((button_state & BUTTON_RIGHT) ? _keyboard_host_mapDpadRight.buttonMask : _keyboard_host_state.dpad)
+        ;
+
+  _keyboard_host_state.buttons |=
+      ((button_state & BUTTON_CROSS)  ? _keyboard_host_mapButtonB1.buttonMask  : _keyboard_host_state.buttons)
+    | ((button_state & BUTTON_CIRCLE)  ? _keyboard_host_mapButtonB2.buttonMask  : _keyboard_host_state.buttons)
+    | ((button_state & BUTTON_SQUARE)  ? _keyboard_host_mapButtonB3.buttonMask  : _keyboard_host_state.buttons)
+    | ((button_state & BUTTON_TRIANGLE)  ? _keyboard_host_mapButtonB4.buttonMask  : _keyboard_host_state.buttons)
+    | ((button_state & BUTTON_L1)  ? _keyboard_host_mapButtonL1.buttonMask  : _keyboard_host_state.buttons)
+    | ((button_state & BUTTON_R1)  ? _keyboard_host_mapButtonR1.buttonMask  : _keyboard_host_state.buttons)
+    | ((button_state & BUTTON_L2)  ? _keyboard_host_mapButtonL2.buttonMask  : _keyboard_host_state.buttons)
+    | ((button_state & BUTTON_R2)  ? _keyboard_host_mapButtonR2.buttonMask  : _keyboard_host_state.buttons)
+    | ((button_state & BUTTON_SELECT)  ? _keyboard_host_mapButtonS1.buttonMask  : _keyboard_host_state.buttons)
+    | ((button_state & BUTTON_START)  ? _keyboard_host_mapButtonS2.buttonMask  : _keyboard_host_state.buttons)
+    | ((button_state & BUTTON_L3)  ? _keyboard_host_mapButtonL3.buttonMask  : _keyboard_host_state.buttons)
+    | ((button_state & BUTTON_R3)  ? _keyboard_host_mapButtonR3.buttonMask  : _keyboard_host_state.buttons)
+  ;
+
+  /*
+   * #define GAMEPAD_JOYSTICK_MIN 0
+   * #define GAMEPAD_JOYSTICK_MID 0x7FFF
+   * #define GAMEPAD_JOYSTICK_MAX 0xFFFF
+   * lx, ly, rx, ry are 16-bit values, but our joystick produces 8-bit values
+   * our joystick's middle is at 0x80, so it would probably be slightly better to adjust that.
+   */
+  _keyboard_host_state.lx = (left_analog_x == 0x80) ? GAMEPAD_JOYSTICK_MID : (left_analog_x << 8 | left_analog_x);
+  _keyboard_host_state.ly = (left_analog_y == 0x80) ? GAMEPAD_JOYSTICK_MID : (left_analog_y << 8 | left_analog_y);
+  _keyboard_host_state.rx = (right_analog_x == 0x80) ? GAMEPAD_JOYSTICK_MID : (right_analog_x << 8 | right_analog_x);
+  _keyboard_host_state.ry = (right_analog_y == 0x80) ? GAMEPAD_JOYSTICK_MID : (right_analog_y << 8 | right_analog_y);
+  _keyboard_host_state.lt = 0;
+  _keyboard_host_state.rt = 0;
+}

Good luck. Miraculously, everything worked perfectly for me. The keyboard worked immediately, the above code modification worked immediately without having to do any debugging, and I’ve gotta say, my fake PS3 controller feels quite okay! (Note that you will have to press the PlayStation button after connecting your PS3 controller.)

The red part just hides some clutter.

PS3 controller repair log

Symptoms on real PS3: probably “crazy behavior”, I didn’t actually test on a real PS3.
Symptoms when connected to a computer with a program open that displays the gamepad status: random button presses, button “flickering”, buttons going on and off randomly, possibly depending on how the controller is held.

Cause in my case: rubber cushion is worn out, and/or physical damage to the controller’s case and/or loss of one of the screws. The rubber cushion sits on a piece of plastic, and a flex cable is sandwiched between the rubber cushion and the PCB. If the rubber cushion loses some of its original height, for example due to wear, or if one of the controller’s screws are lost and the PCB isn’t pressed as hard against the rubber cushion as it used to, buttons will randomly appear pressed or unpressed. (When there is absolutely no connection between the flex cable and the PCB, all buttons will appear pressed. When the connection is flaky, buttons may appear pressed all the time, or go on and off.)

Fix: adding a little height to the cushion fixed the problem for me.

In this pic, I’m holding the flex cable with one of my fingers. The tube (it’s a piece of heat shrink, actually) is what I added to improve contact between the flex cable and PCB. The original rubber is still there.

Using a USB-HID game controller on the MSX, using a Raspberry Pi Pico for signal conversion

There is very little code that I wrote myself in this project, but it’s the first time I’m looking at USB-HID on the protocol level and the end result is something mildly useful. So possibly worth a post? (Note: code is at the bottom of this post.)

All we’ll be doing here is modify an example from https://github.com/sekigon-gonnoc/Pico-PIO-USB, namely, the capture_hid_report example. You’ll need a good way to connect a USB-A port to your Raspberry Pi Pico. I’m using an old piece of hardware that was meant for use in desktop PCs to add USB ports on the back, internally connected to motherboard headers. (In the demo we’re using, USB D+ and D- are GPIO pins 0 and 1, respectively.)

Once you have your Pico flashed, open a serial terminal, and then connect a USB-HID gamepad. I’m using a fake PS3 controller. It looks very similar to a Sony PS3 controller but bears a “P3” mark instead of the PlayStation logo on the middle button. (I don’t know if it works with a real PS3.)
(I’m leaving out the wiring details here, but it’s all exactly as explained in the README.md in the repo.) I get the following messages when I plug in my controller. The “EP 0x81” messages are sent continuously.

Device 0 Connected
control in[complete]
Enumerating 054c:0268, class:0, address:1
control out[complete]
control in[complete]
control in[complete]
Manufacture:GASIA CORP.
control in[complete]
control in[complete]
Product:PLAYSTATION(R)3 Controller
control in[complete]
control in[complete]
control out[complete]
inum:0, altsetting:0, numep:2, iclass:3, isubclass:0, iprotcol:0, iface:0
        bcdHID:1.11, country:0, desc num:1, desc_type:34, desc_size:148
control out[error]
control in[complete]
                Report descriptor:05 01 09 04 a1 01 a1 02 85 01 75 08 95 01 15 00 26 ff 00 81 03 75 01 95 13 15 00 25 01 35 00 45 01 05 09 19 01 29 13 81 02 75 01 95 0d 06 00 ff 81 03 15 00 26 ff 00 05 01 09 01 a1 00 75 08  
                        epaddr:0x02, attr:3, size:64, interval:1
                        epaddr:0x81, attr:3, size:64, interval:1
...
054c:0268 EP 0x81:      01 00 00 00 00 00 80 80 80 80 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 03 ef 10 00 00 00 00 23 6d 77 01 80 02 00 02 00 01 80 02 00 
054c:0268 EP 0x81:      01 00 00 00 00 00 80 80 80 80 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 03 ef 10 00 00 00 00 23 6d 77 01 80 01 ff 01 ff 01 7f 01 ff 
054c:0268 EP 0x81:      01 00 00 00 00 00 80 80 80 80 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 03 ef 10 00 00 00 00 23 6d 77 01 80 01 fe 01 fe 01 7e 01 fe 
054c:0268 EP 0x81:      01 00 00 00 00 00 80 80 80 80 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 03 ef 10 00 00 00 00 23 6d 77 01 80 01 ff 01 ff 01 7f 01 ff 
054c:0268 EP 0x81:      01 00 00 00 00 00 80 80 80 80 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 03 ef 10 00 00 00 00 23 6d 77 01 80 02 00 02 00 01 80 02 00
...

You can also use Linux tools such as usbhid-dump to get reports, but for some reason they look quite different from what I get here. Don’t know if the driver is doing something (odd, or not so odd) or if the Pico is doing something odd. (It’s quite likely that there’s a driver in Linux that configures the controller automatically. On the Pico you have to press the P3 button every time after plugging in, on Linux it lights up the first player LED immediately.)

Decoding the USB HID report descriptor

First of all, we don’t actually need to do all this. Pressing buttons and looking at the output makes it quite obvious what we have to do. But for let’s edify outselves anyway. Here’s a great tutorial on USB HID report descriptors: https://eleccelerator.com/tutorial-about-usb-hid-report-descriptors/. The same guy has a tool on their website that allows us to quickly decode the descriptor: https://eleccelerator.com/usbdescreqparser/.

When we paste in our descriptor, we get the following output:

0x05, 0x01,        // Usage Page (Generic Desktop Ctrls)
0x09, 0x04,        // Usage (Joystick)
0xA1, 0x01,        // Collection (Application)
0xA1, 0x02,        //   Collection (Logical)
0x85, 0x01,        //     Report ID (1)
0x75, 0x08,        //     Report Size (8)
0x95, 0x01,        //     Report Count (1)
0x15, 0x00,        //     Logical Minimum (0)
0x26, 0xFF, 0x00,  //     Logical Maximum (255)
0x81, 0x03,        //     Input (Const,Var,Abs,No Wrap,Linear,Preferred State,No Null Position)
0x75, 0x01,        //     Report Size (1)
0x95, 0x13,        //     Report Count (19)
0x15, 0x00,        //     Logical Minimum (0)
0x25, 0x01,        //     Logical Maximum (1)
0x35, 0x00,        //     Physical Minimum (0)
0x45, 0x01,        //     Physical Maximum (1)
0x05, 0x09,        //     Usage Page (Button)
0x19, 0x01,        //     Usage Minimum (0x01)
0x29, 0x13,        //     Usage Maximum (0x13)
0x81, 0x02,        //     Input (Data,Var,Abs,No Wrap,Linear,Preferred State,No Null Position)
0x75, 0x01,        //     Report Size (1)
0x95, 0x0D,        //     Report Count (13)
0x06, 0x00, 0xFF,  //     Usage Page (Vendor Defined 0xFF00)
0x81, 0x03,        //     Input (Const,Var,Abs,No Wrap,Linear,Preferred State,No Null Position)
0x15, 0x00,        //     Logical Minimum (0)
0x26, 0xFF, 0x00,  //     Logical Maximum (255)
0x05, 0x01,        //     Usage Page (Generic Desktop Ctrls)
0x09, 0x01,        //     Usage (Pointer)
0xA1, 0x00,        //     Collection (Physical)
0x75, 0x08,        //       Report Size (8)

// 63 bytes

This doesn’t look quite exactly the same as the examples in the tutorial. Below is my understanding, which may not be 100% correct, but makes sense to me at least. Let’s look at some output lines from the capture_hid_report example. The first line was:

054c:0268 EP 0x81:      01 00 00 00 00 00 80 80 80 80 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 03 ef 10 00 00 00 00 23 6d 77 01 80 02 00 02 00 01 80 02 00

First of all, all lines start with “054c:0268 EP 0x81”. That’s just added by the example code. Let’s go through the remaining numbers in the line (actually, just the bolded ones) and see how they relate to the descriptor we saw earlier.

01: report ID, same as the report ID defined in the descriptor.

00: this is the entire report that was described in the following extract from the descriptor, exactly 1 byte (8 bits):

0x75, 0x08,        //     Report Size (8)
0x95, 0x01,        //     Report Count (1)
0x15, 0x00,        //     Logical Minimum (0)
0x26, 0xFF, 0x00,  //     Logical Maximum (255)
0x81, 0x03,        //     Input (Const,Var,Abs,No Wrap,Linear,Preferred State,No Null Position)

00 00 01 00: the (two) reports referenced in the following parts of the descriptor:

0x75, 0x01,        //     Report Size (1)
0x95, 0x13, // Report Count (19)
0x15, 0x00, // Logical Minimum (0)
0x25, 0x01, // Logical Maximum (1)
0x35, 0x00, // Physical Minimum (0)
0x45, 0x01, // Physical Maximum (1)
0x05, 0x09, // Usage Page (Button)
0x19, 0x01, // Usage Minimum (0x01)
0x29, 0x13, // Usage Maximum (0x13)
0x81, 0x02, // Input (Data,Var,Abs,No Wrap,Linear,Preferred State,No Null Position)
0x75, 0x01, // Report Size (1)
0x95, 0x0D, // Report Count (13)
0x06, 0x00, 0xFF, // Usage Page (Vendor Defined 0xFF00)
0x81, 0x03, // Input (Const,Var,Abs,No Wrap,Linear,Preferred State,No Null Position)

This is actually two reports. One is 19 bits, the other is 13 bits. Together that’s exactly 32 bits. (This is similar to the three mouse buttons example in the tutorial, where we only have three buttons and therefore want to pad this to 8 bits.) Here, we have 19 buttons (though the controller only sports 16 physical buttons, unless I’m much mistaken), each of which is 1 bit, i.e., either pressed, or not. The remaining 13 bits are just to pad the report to make it easier to handle on the software side, or perhaps the padding is required somehow. (Looks like it is on Windows at least: https://stackoverflow.com/questions/65846159/usb-hid-report-descriptor-multiple-reports.)

And indeed, these four bytes (well, the first 19, er, 16 bits) do change when buttons are pressed. This is basically all we need to figure out how to use this controller with an MSX. We just need to figure out which button is responsible for which bit in the third and fourth bytes. (Unless we also wanted to translate analog stick input.)

What about the rest of the bytes? I don’t know how they relate to our descriptor above, but pressing buttons and moving the analog sticks makes it pretty obvious what they mean. We can see that the analog sticks are two bytes each, and there’s another byte for each button, and the value that comes up depends on how hard the button was pressed. I never knew that the △□○☓ buttons were pressure-sensitive on the PS3! Anyway, perhaps it’s possible that our descriptor is incomplete, as the frame they are transmitted in is limited to 64 (or maybe 63?) bytes?

So without going into any hardware details yet, here’s how we could modify the example code to translate our button presses to electric pulses on a GPIO pin. Here’s the part of the code that we’ll replace:

if (len > 0) {
  printf("%04x:%04x EP 0x%02x:\t", device->vid, device->pid,
        ep->ep_num);
  for (int i = 0; i < len; i++) {
    printf("%02x ", temp[i]);
  }
  printf("\n");
}

And here’s some of the code we could maybe replace the above block with. Note that this doesn’t actually consider the hardware details of the actual MSX interface yet, it’s just an example, so don’t copy this code. The actual code is in the last section of this blog post.

if (len > 0) {
  if (temp[0] == GAMEPAD_REPORT_ID) { // GAMEPAD_REPORT_ID is 1 in our case
    uint16_t button_state = (temp[2] << 8) | temp[3];
    if (button_state & BUTTON_UP) {
      gpio_put(BUTTON_UP_PIN, 1);
    } else {
      gpio_put(BUTTON_UP_PIN, 0);
    }
    if (button_state & BUTTON_DOWN) {
...
  }
}

My controller’s button mappings are as follows:

#define BUTTON_UP 0x1000
#define BUTTON_DOWN 0x4000
#define BUTTON_LEFT 0x8000
#define BUTTON_RIGHT 0x2000
#define BUTTON_A 0x40 // X or O, can't remember
#define BUTTON_B 0x20 // X or O, can't remember

MSX interface

Good news: the MSX joystick ports come with 5V and GND pins. According to https://www.msx.org/wiki/General_Purpose_port, we can draw up to 50 mA from these pins. Is that enough for the Pico and a controller? It is for mine! According to my measurements, the Pico + fake PS3 controller together draw about 40 mA. (Instrument’s display precision is 10 mA, so it could be up to 45 mA, or more if the instrument is inaccurate.) Note that many modern controllers (including real PS3 controllers) contain a battery, and will attempt to charge it. That will most likely take the current way above 50 mA.
(Note: I don’t think that drawing slightly more than 50 mA would be a huge problem at least on my MSX, which I’ve disassembled and reassembled a couple times.)

Slightly bad news (1): pressing buttons on MSX joysticks connects pins to pin 8, which is not GND. Pin 8 is “strobe” and can be GND or 5V. (Button pins are normally pulled high.)

Slightly bad news (2): on the MSX side, the joystick ports are actually GPIO ports and can be configured for both input (normal) and output! We don’t want the Pico to output when the MSX’s PSG is outputting too!

1: addressing this in software might be possible, albeit with a (very) short time lag. I don’t really know anything about this feature and have no easy way to test a software-based solution with MSX software that actually uses this feature.
2: I don’t think regular MSX joysticks worry about this. Some MSX systems may have safeguards in place.

Some measurements and wiring details

Using my multimeter’s rarely used ammeter mode, with the probes between STROBE and any button pin, I see a current of ~0.7 mA. (Two button pins, and the ammeter shows 1.5 mA. It probably goes up linearly the more buttons you press.) That’s quite a lot by modern standards!

On my MSX, there are two 74LS157 chips that implement joystick selection. The 74LS157’s outputs are directly connected to the PSG’s inputs. (Only one joystick’s state is visible to software at a time; a PSG register change is required to switch to the other one.) If we change the PSG’s I/O port’s direction, I think we’ll be pitting the 74LS157’s outputs against the PSG’s outputs. Doesn’t sound so great, eh.
Except, the 74LS157 has an \ENABLE pin! When the \ENABLE pin on a 74LS157 is disabled, the outputs will be high-Z, potentially allowing a signal coming from the PSG to make its way somewhere. Is there such functionality on the MB-H2? Answer after some probing: no. \ENABLE on these two chips is tied to GND.

While most button pins have 10K pull-up resistors (some seem to have 3.3K pull-up resistors), the STROBE pins are connected directly to PSG pins 8 and 9.

Deciding how to hook up the Pico to the joystick port

In real joysticks, when you press a button, you short the STROBE pin to the button pin, and STROBE can be HIGH or LOW. (When STROBE is HIGH, we short HIGH to HIGH, and nothing happens. Since on my MSX, as discussed above, the button pins are connected to 74LS157 inputs, they should always be pulled high, and never go low during device operation.) In real joysticks, when nothing is pressed, there is no connection, period. So that’s more like a tri-state affair, so instead of producing a HIGH level when nothing is pressed, we should set our Pico’s GPIO to high-Z (which we can do by setting it to INPUT).

Armed with the measurements above, I think we can hook up the Pico directly (let’s ignore 5V vs 3.3V for now), producing a LOW level when a button is pressed. We’ll have the Pico’s GPIO pin sink a little bit of current, but not too much. Even if all 6 buttons are pressed, the Pico shouldn’t sink more than around 4.5 mA across multiple pins. That’s okay, really. Even if we decided to make the Pico interface with two controllers, that’s still comfortably under our limit.

Now we could start thinking about reducing the 5V on the button pins to 3.3V on the Pico’s GPIO pins. But according to many accounts, 5V is okay if it’s sufficiently current-limited, which it is, in our case. So let’s ignore this for now.

Other MSX machines

(I will probably expand this section at some point.)

YIS-503

The YIS-503 (schematics, look at the circuits near the JOY1 and JOY2 connectors on the left) has 22K pull-up resistors and an MSX Engine. I doubt that MSX Engine chips (which have the PSG integrated) can be configured to destruct themselves.

Kuninet’s homebrew MSX

10k pull-up resistors. The rest of the wiring looks exactly like on the Hitachi MB-H2 as far as I can tell.

So does it actually work?

Yes. Here’s a pic of my trusty Hitachi MB-H2 MSX running its built-in sketch program, controlled through the fake PS3 controller. (Sorry, the computer’s still open from the probing I did earlier.)

The USB port is a PC part that I found at my local Hard Off. The RS232 cable is also from Hard Off. In fact, the fake PS3 controller is also from Hard Off! The USB cable is from my own cable collection.

Totally off-topic, but there’s a spot in this pic that has been cleaned up using my Pixel phone’s magic eraser. Can you see where it is? (I didn’t touch it up in an image editor afterwards.)

Pacman <3

Problems

Plugging our contraption into the joystick port while the MSX is running crashes the machine! The screen goes very slightly dark for a split second too. Probably in-rush current. I’m pretty sure I blew my multimeter’s (200 mA) fuse trying to measure the current. Laff. (Having the controller already plugged in before powering on the computer works fine.)

Source code

Replace your Pico-PIO-USB/examples/capture_hid_report.c with the following code, (re-)make, flash, and you’re set. (I’m basing my modifications on git commit d00a10a8c425d0d40f81b87169102944b01f3bb3.)

#include <stdio.h>
#include <string.h>

#include "pico/stdlib.h"
#include "pico/multicore.h"
#include "pico/bootrom.h"

#include "pio_usb.h"

#define GAMEPAD_REPORT_ID 1

// 0x1: L2
// 0x2: R2
// 0x4: L1
// 0x8: R1
// 0x10: Triangle
// 0x20: Circle
// 0x40: X
// 0x80: Square?
// 0x100: ?
// 0x200: ?
// 0x400: R3
// 0x800: Start
// 0x1000: Up
// 0x2000: Right
// 0x4000: Down?
// 0x8000: Left?

#define BUTTON_UP 0x1000
#define BUTTON_DOWN 0x4000
#define BUTTON_LEFT 0x8000
#define BUTTON_RIGHT 0x2000
#define BUTTON_A 0x20
#define BUTTON_B 0x40
#define BUTTON_A_ALT 0x10
#define BUTTON_B_ALT 0x80

#define BUTTON_UP_PIN 16
#define BUTTON_DOWN_PIN 17
#define BUTTON_LEFT_PIN 18
#define BUTTON_RIGHT_PIN 19
#define BUTTON_A_PIN 20
#define BUTTON_B_PIN 21

static usb_device_t *usb_device = NULL;

void core1_main() {
  sleep_ms(10);

  // To run USB SOF interrupt in core1, create alarm pool in core1.
  static pio_usb_configuration_t config = PIO_USB_DEFAULT_CONFIG;
  config.alarm_pool = (void*)alarm_pool_create(2, 1);
  usb_device = pio_usb_host_init(&config);

  //// Call pio_usb_host_add_port to use multi port
  // const uint8_t pin_dp2 = 8;
  // pio_usb_host_add_port(pin_dp2);

  while (true) {
    pio_usb_host_task();
  }
}

static void gpio_setup()
{
  gpio_init(BUTTON_UP_PIN);
  gpio_init(BUTTON_DOWN_PIN);
  gpio_init(BUTTON_LEFT_PIN);
  gpio_init(BUTTON_RIGHT_PIN);
  gpio_init(BUTTON_A_PIN);
  gpio_init(BUTTON_B_PIN);

  gpio_set_dir(BUTTON_UP_PIN, GPIO_IN);
  gpio_set_dir(BUTTON_DOWN_PIN, GPIO_IN);
  gpio_set_dir(BUTTON_LEFT_PIN, GPIO_IN);
  gpio_set_dir(BUTTON_RIGHT_PIN, GPIO_IN);
  gpio_set_dir(BUTTON_A_PIN, GPIO_IN);
  gpio_set_dir(BUTTON_B_PIN, GPIO_IN);

  gpio_put(BUTTON_UP_PIN, 0);
  gpio_put(BUTTON_DOWN_PIN, 0);
  gpio_put(BUTTON_LEFT_PIN, 0);
  gpio_put(BUTTON_RIGHT_PIN, 0);
  gpio_put(BUTTON_A_PIN, 0);
  gpio_put(BUTTON_B_PIN, 0);
}

int main() {
  // default 125MHz is not appropreate. Sysclock should be multiple of 12MHz.
  set_sys_clock_khz(120000, true);

  stdio_init_all();
  printf("hello!");

  sleep_ms(10);

  multicore_reset_core1();
  // all USB task run in core1
  multicore_launch_core1(core1_main);

  gpio_setup();

  while (true) {
    if (usb_device != NULL) {
      for (int dev_idx = 0; dev_idx < PIO_USB_DEVICE_CNT; dev_idx++) {
        usb_device_t *device = &usb_device[dev_idx];
        if (!device->connected) {
          continue;
        }

        // Print received packet to EPs
        for (int ep_idx = 0; ep_idx < PIO_USB_DEV_EP_CNT; ep_idx++) {
          endpoint_t *ep = pio_usb_get_endpoint(device, ep_idx);

          if (ep == NULL) {
            break;
          }

          uint8_t temp[64];
          int len = pio_usb_get_in_data(ep, temp, sizeof(temp));

          if (len > 0) {
            if (temp[0] == GAMEPAD_REPORT_ID) {
              uint16_t button_state = temp[2] << 8 | temp[3];
              if (button_state & BUTTON_UP) {
                gpio_put(BUTTON_UP_PIN, 0);
                gpio_set_dir(BUTTON_UP_PIN, GPIO_OUT);
                printf("BUTTON_UP_PIN\n");
              } else {
                gpio_set_dir(BUTTON_UP_PIN, GPIO_IN);
              }

              if (button_state & BUTTON_DOWN) {
                gpio_put(BUTTON_DOWN_PIN, 0);
                gpio_set_dir(BUTTON_DOWN_PIN, GPIO_OUT);
                printf("BUTTON_DOWN_PIN\n");
              } else {
                gpio_set_dir(BUTTON_DOWN_PIN, GPIO_IN);
              }

              if (button_state & BUTTON_LEFT) {
                gpio_put(BUTTON_LEFT_PIN, 0);
                gpio_set_dir(BUTTON_LEFT_PIN, GPIO_OUT);
                printf("BUTTON_LEFT_PIN\n");
              } else {
                gpio_set_dir(BUTTON_LEFT_PIN, GPIO_IN);
              }

              if (button_state & BUTTON_RIGHT) {
                gpio_put(BUTTON_RIGHT_PIN, 0);
                gpio_set_dir(BUTTON_RIGHT_PIN, GPIO_OUT);
                printf("BUTTON_RIGHT_PIN\n");
              } else {
                gpio_set_dir(BUTTON_RIGHT_PIN, GPIO_IN);
              }

              if (button_state & BUTTON_A || button_state & BUTTON_A_ALT) {
                gpio_put(BUTTON_A_PIN, 0);
                gpio_set_dir(BUTTON_A_PIN, GPIO_OUT);
                printf("BUTTON_A_PIN\n");
              } else {
                gpio_set_dir(BUTTON_A_PIN, GPIO_IN);
              }

              if (button_state & BUTTON_B || button_state & BUTTON_B_ALT) {
                gpio_put(BUTTON_B_PIN, 0);
                gpio_set_dir(BUTTON_B_PIN, GPIO_OUT);
                printf("BUTTON_B_PIN\n");
              } else {
                gpio_set_dir(BUTTON_B_PIN, GPIO_IN);
              }
            }
          }
        }
      }
    }
    stdio_flush();
    sleep_us(10);
  }
}

「DETEKER」の「電子工作でよく使う電子部品セット」の トランジスターについて

アマゾンで売られている「電子工作でよく使う電子部品セット」に18種のトランジスタが入っています。これらのトランジスターはどう違うのか、初心者は気になることでしょう。

トランジスターには色々な特性があります。例えば、電圧の限界値、許容電流、周波数の限界、ノイズ、ゲイン。これらの特性によって、そのトランジスターの用途が決まってきます。これらの特性は、トランジスターの「データシート」で簡単に調べられます。ネットで、例えば「2n2222 datasheet」のような検索をすればすぐに出てきます。

DETEKER のキットに含まれているトランジスターは18種もありますが、残念ながらすべての用途がカバーできるわけではないです。キットに含まれているすべてのトランジスターを調べた結果、これらのカテゴリーのトランジスターがありました。

  • オーディオ
  • 汎用
  • 電波
  • 高電圧

また、NPN トランジスターと同じ特性を持った PNP 版のトランジスターも多く含まれています。

個人的には残念だと思うのは、モーターの駆動に使えるトランジスターがないこと。(小型モーターなら、2n2222 あたりでいけなくはないかもしれませんが。)

さて、表を作ったのでご確認ください。載せていない属性もたくさんあります。データシートも、用途によっては、ノイズ値などはあったりなかったりします。(PC でご覧の場合は Shift+マウスホイールなどで横にスクロールができます。)

The following table shows several important properties of the transistors that come in DETEKER’s “Basic Electronic Components Kit” that is available on Amazon. (Use Shift+mouse wheel to scroll horizontally)

トランジスター名C-E 耐電圧C 電流ゲイン (最低は)
低電流時 (1 mA)
ゲイン (最大)
低電流時 (1 mA)
ゲイン (標準値)
低電流時 (1 mA)
ゲイン (最低は)
高電流時 (50 mA)
周波数特性種類データシートに書いてある用途カテゴリーデータシートの URL
Transistor nameMax C-E VoltageC currentGain (min)
at low current
(e.g. 1 mA)
Gain (max)
at low current
(e.g. 1 mA)
Gain (typical)
at low current
(e.g. 1 mA)
Gain (min)
(at high current)
FrequencyTypeDatasheets explicitly mentionAuthor’s interpretationDatasheet URL
S805020700 mA10040011040NPNClass B push-pull audio amplifier
General purpose
Complementary to S8550
Audio (オーディオ)
General purpose (汎用)
https://pdf1.alldatasheet.jp/datasheet-pdf/view/172696/UTC/S8050.html
S855020700 mA10040011040PNPSee S8050Audio
General purpose
https://pdf1.alldatasheet.jp/datasheet-pdf/view/172698/UTC/S8550.html
S901240500 mA6440040150 MHzPNPAudiohttps://pdf1.alldatasheet.jp/datasheet-pdf/view/433169/MCC/S9012-I.html
S9013NPNDesigned for use in 1W output amplifier of portable radios in class B push-pull operation
Complementary to S9012
Audiohttps://pdf1.alldatasheet.jp/datasheet-pdf/view/447547/TGS/S9013.html
S901445100 mA601000280NPNPre-amplifier, low level & low noise
Complementary to S9015
Audiohttps://pdf1.alldatasheet.jp/datasheet-pdf/view/54742/FAIRCHILD/S9014.html
S9015PNPSee S9014Audio
S90183050 mA28198100GBWP
700 MHz min, 1100 typ
NPNAM/FM amplifier, local oscillator of FM/VHF tunerRF (電波)https://pdf1.alldatasheet.jp/datasheet-pdf/view/54744/FAIRCHILD/S9018.html
A1015-50-150 mA7040025
(at -150 mA)
PNPLow-frequency amplifier
Complementary to C1815
Audiohttps://pdf1.alldatasheet.net/datasheet-pdf/view-marking/1161111/ONSEMI/KSA1015GRTA.html
C1815NPNSee A1015Audio
A42305200 mA2540
(at 30 mA)
50 MHz
20V, 10 mA
NPNHigh voltageHigh voltage (高電圧)https://pdf1.alldatasheet.com/datasheet-pdf/view/1312096/LUGUANG/A42.html
A92-305-200 mA2540
(at 30 mA)
50 MHz
20V, 10 mA
PNPHigh voltageHigh voltagehttps://pdf1.alldatasheet.com/datasheet-pdf/view/1312098/LUGUANG/A92.html
A733-50-100 mA90600200Current Gain Bandwidth
100 MHz min, 180 typ
PNPAF output amplifierAudiohttps://pdf1.alldatasheet.jp/datasheet-pdf/view/143737/STANSON/A733.html
C94550100 mA90600200Current Gain Bandwidth
100 MHz min, 180 typ
NPNAF output amplifierAudiohttps://pdf1.alldatasheet.jp/datasheet-pdf/view/143746/STANSON/C945.html
2N222230800 mA50300250 MHzNPNGeneral purposehttps://pdf1.alldatasheet.jp/datasheet-pdf/view/21676/STMICROELECTRONICS/2N2221-2N2222.html
2N3906-40200 mA80300250 MHzPNPSwitching and amplifier
Complementary to 2N3904
General purposehttps://pdf1.alldatasheet.jp/datasheet-pdf/view/61873/GE/2N3906.html
2N3904NPNHigh-speed switching
Complementary to 2N3906
General purposehttps://pdf1.alldatasheet.jp/datasheet-pdf/view/15077/PHILIPS/2N3904.html
2N5401150200 mA50240Current gain bandwidth product
100 MHz min, 300 max
PNPGeneral purposeGeneral purposehttps://pdf1.alldatasheet.jp/datasheet-pdf/view/50038/FAIRCHILD/2N5401.html
2N5551160600 mA8030
(at 50 mA)
NPNHigh voltagehttps://pdf1.alldatasheet.jp/datasheet-pdf/view/11488/ONSEMI/2N5551.html

Another Hitachi MB-H2 MSX repair

Introduction and conclusion

I bought another Hitachi H2 MSX last year, mostly because I wanted the manual, which I’ve scanned. Unfortunately for my free time but fortunately for my, um, education in retro computing, this computer had issues with its video RAM. Often, the computer would boot up with a garbled screen. Resetting after a couple minutes would usually fix the issue. The video RAM is made by Toshiba, and is called TMM416P-2 (also marked 4116-2). If you have this memory, I’d recommend you look out for issues, because all eight ICs had the same issue, namely: crazy-ass noise on the -5V line. (How much noise is “crazy-ass” noise? In this case, it’s +-3V.) The noise sort of comes and goes, or at least gets stronger and weaker, randomly, which made it too hard for me to find a combination of capacitors to tame it. (Though it’s more likely to be present after turning the computer on after a long while.) I ended up socketing them all, replacing one that unfortunately died during the very professional desoldering process, and added 103 ceramic capacitors to (almost) every one, between the -5 and GND pins, which seems to have a slight positive effect. (The bottom part of the case has a hook that requires some clearance and prevents two of the chips from getting their capacitor.) I also replaced the zener diode with a 7905, which fit perfectly after bending the legs a little bit.

Details

The -5V rail for the 4116 VRAM chips is generated using a zener diode. Replacing this, or the capacitor on the rail, unfortunately didn’t have any effect. Hmm, odd!

Next, I decided to desolder the -5V pin on the first 4116 IC, and drive it using my own known good -5V supply (using a standard 7905 regulator). Result: noise both on the first chip and all the others. Hmm, odd!

Next, I did this for the rest of the 4116 ICs, and was able to see that each and every one generates noise.

Next, I decided to desolder all of them and individually test them on my 4116 tester. (They still produced the noise while in the tester.) I decided to desolder all of them because the H2 seemed to support 4416 ICs for the video RAM, and I happened to have some of those that were waiting to be put to use. I.e., there are holes of the right size, right next to the VDP, and the silkscreen on those holes says “TMS4416”. ;)

Well, today’s lesson is, do not necessarily trust the silkscreen. The TMS9928A doesn’t even support 4416 VRAM! The holes where the data pins go didn’t even have any traces on them.
The TMS9928A can be made to support 4416 RAM using a custom circuit, though. Maybe I should have implemented this circuit. I even bought the two required parts! But then decided against it for complexity management reasons.

Unfortunately, expecting to be able to use the 4416 slots, I had desoldered the original VRAM ICs in a rather brutish manner, losing vias and traces in the process, which meant that I needed to add a bunch of bodge wires to get them to work again. At least the bodge wires aren’t too complex to figure out, if at some point one of them decides to become loose again. I ended up keeping the original, noisy, RAM chips. But since they’re now all socketed, it shouldn’t be too hard to replace them at some point, if necessary.

Pictures

Garbled screen
Hitachi MB-H2 logic board from above. The misleading silkscreen is in the top left, above the TMS9928ANL VDP.
Example with a lot of noise
And an example with a lot less. (This picture is from six months ago. It’s entirely possible that I had extra capacitors for this shot.)
TMM416P-2 noise closeup. Intensity varies. Here it’s about 3V peak-to-peak.
TMM416P-2 noise closeup, one more example.

After

Noise after “completion” of this repair (note: using AC coupling here). Note that noise intensity has always been a bit random, so I this can’t be taken as proof that adding 103 capacitors to each chip is going to help in any case, and I am not too interested in performing rigorous testing. Anecdotally, I haven’t seen any garbled screens yet after the “repair”!
Noise closeup (note: using AC coupling here)
Check out this 7905’s limbo dance moves

Before looking at the picture of the bodge wires below, please keep in mind that it is rude to stare.

Ahem

Memory-friendly monochrome WebP encoding

Hi, just a quick thing that may be useful if you’re trying to save memory while using libwebp to encode a monochrome image.

We assume that you already have the monochrome data, and you just want to encode it. Your data size is width*height*1, and is 100% equivalent to the Y channel in YUV.

Allocating a bunch of memory (width*height/2) for the UV part would be silly, but looks like it’s required, right?

Well, we can actually get away with just width/2, by doing this:

    picture.y = y_data; // straightforward
    picture.y_stride = width; // straightforward
    picture.u = dummy_uv_data; // array of length width/2 full of 0x80 bytes
    picture.v = dummy_uv_data; // it's the same array!
    picture.uv_stride = 0; // stride is 0! this means we'll always read UV from the same location for every single pixel row

BTW, you can generate your y_data with the following ImageMagick command:

convert test.jpg y:test.y

Below is the full code demonstrating the use of this trick. Note that the code is about 90% generated by ChatGPT, and I haven’t cleaned it up beyond the minimum necessary to get it to work. Don’t forget to adjust the width and height variables to match your input image.

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <webp/encode.h>

int main() {
    int width = 3024;
    int height = 4032;
    int stride = width;
    uint8_t *y_data = malloc(width*height);
    uint8_t *dummy_uv_data = malloc(width/2);
    memset(dummy_uv_data, 0x80, width/2);

    FILE *infile = fopen("test.y", "r");
    if (!infile) {
        fprintf(stderr, "Error opening output file.\n");
        return 1;
    }
    fread(y_data, width*height, 1, infile);
    fclose(infile);

    FILE* outfile = fopen("output.webp", "wb");
    if (!outfile) {
        fprintf(stderr, "Error opening output file.\n");
        return 1;
    }

    WebPConfig config;
    WebPPicture picture;
    WebPMemoryWriter writer;

    WebPConfigInit(&config);
    config.lossless = 0;  // Set to 1 for lossless encoding
    config.quality = 75;  // Set the desired quality value (0-100)

    WebPMemoryWriterInit(&writer);

    if (!WebPPictureInit(&picture)) {
        fprintf(stderr, "Error initializing WebP structures.\n");
        return 1;
    }

    picture.width = width;
    picture.height = height;
    picture.use_argb = 0;
    picture.y = y_data;
    picture.u = dummy_uv_data;
    picture.v = dummy_uv_data;
    picture.y_stride = width;
    picture.uv_stride = 0;
    picture.writer = WebPMemoryWrite;
    picture.custom_ptr = &writer;

    if (!WebPEncode(&config, &picture)) {
        fprintf(stderr, "Error encoding WebP.\n");
        return 1;
    }

    fwrite(writer.mem, 1, writer.size, outfile);

    free(y_data);
    WebPMemoryWriterClear(&writer);
    WebPPictureFree(&picture);
    fclose(outfile);

    return 0;
}

Compile with: cc -o encode_webp encode_webp.c -lwebp

Execute with: ./encode_webp

Printf debugging helper

Ah yes, printf debugging. If you’re like me and occasionally need to place a dozen “got here”s at once, you may find this, or something like this helpful.

You need some kind of facility to create global shortcuts. If you, like many sensible people in the world, are a KDE user, you’ll find such a facility right in the settings:

Define two shortcuts, perhaps name them “Next” and “Redefine”. Perhaps Meta+Ctrl+Alt+Shift isn’t very ergonomic, but it’s probably unique at least.

Next, we’ll add actions. “Next” should type something like ‘printf(“Got here23”);’, and “redefine” allows you to change the ‘printf(“Got here’ prefix and the ‘”);’ suffix.

Here are two example shell scripts to accomplish this. Dependencies: xclip, xdotool. (Note: these scripts probably won’t work on Wayland, but I’d assume there are Wayland-compatible replacements for these two programs.)

next.sh:

#!/bin/bash

cd $(dirname -- "${BASH_SOURCE[0]}")
touch prefix
touch suffix
touch next_i
i=$(cat next_i)
string_to_type=$(cat prefix; echo -n $i; cat suffix)
sleep 0.8 && xdotool type "$string_to_type"
((i++))
echo -n $i > next_i

reset.sh:

#!/bin/bash

cd $(dirname -- "${BASH_SOURCE[0]}")
touch prefix_or_suffix
prefix_or_suffix=$(cat prefix_or_suffix)
if [ "$prefix_or_suffix" == 1 ]; then
    xclip -o -selection primary > suffix
    prefix_or_suffix=0
else # 0 or blank or junk
    xclip -o -selection primary > prefix
    prefix_or_suffix=1
fi
echo -n $prefix_or_suffix > prefix_or_suffix
echo -n 1 > next_i

If your system is kind of slow and xdotool’s output gets chopped up somehow, maybe try xdotool key –delay 50. You could also do echo $string_to_type | xclip, and then xdotool to send Ctrl-V in order to paste. That might be a little faster for long strings.

Here’s a short video clip that shows how this works:

By the way, this is the 100th post on this blog. :O

Testing 4164/4116 DRAM ICs in-circuit/live with a Raspberry Pi Pico, without removing them (WIP)

Hi! My sabbatical ended and I’ve been working again since two months ago. Boo. However there’s this thing I just wanted to get off my chest, so I spent a few hours that I did not really have and wrote some code and this blog post about it!

Last year, I made a 4164/4116 DRAM tester for the Raspberry Pi Pico, which works just as you would expect, you program the Pico, place it on a breadboard, add some wires and something to drop the 5V to 3.3V for the Q output, place the 4164 or 4116 chip you’d like to test on the breadboard, connect a USB cable from the Pico to a computer, and look at the terminal output (or just the on-board LED). This is useful if you have already extracted a 4164 chip that you have determined to be bad. (I have written previously how you could determine whether a 4164 chip is bad, here and here.)

I previously made a “live” chip tester for the Arduino, capable of checking whether simple logic chips are doing what they’re supposed to be doing in a powered on system. I used the Arduino rather than the Pico because the Arduino is 5V tolerant. In this post, we’re not doing simple logic chips, but DRAM chips. And since we need to be super-fast, we’re using a Pico.

Executive summary

We “allocate” 64 KB of RAM on the Pico. We need to read in two 8-bit addresses, and combine them to a 16-bit address. If a write is being attempted, we write the same bit value into the appropriate address of Pico’s RAM. If a read is being attempted, we check if the DRAM’s output is the same as what we have in the Pico’s RAM. (We could also use 64 Kb of RAM on the Pico at a minimum, but as we’ll see in the next section we do not really have a lot of time for such shenanigans.)

To use this software, having an IC test clip would probably be very beneficial. I use these: https://www.kandh.com.tw/ic-test-clips-ic-test-clips.html. These are available in Akihabara from Akizuki: https://akizukidenshi.com/catalog/g/gC-04753/. If they aren’t available in your market, perhaps these somewhat expensive test clips from 3M might be more available: https://www.digikey.jp/ja/products/detail/3m/923700/3852.

Note: to use this software, you need to at least mostly know what you’re doing.

Example usage
Test clip close-up
Wiring closeup

One note on hooking up the Pico directly to 5V components, as seen in the above pictures

Not guaranteed to not fry your Pico (note the double negation), do this at your own risk. Your Pico will possibly also draw more current than a normal TTL chip when driven above 3.6V or so, which could easily damage your precious hardware! (The current on the address pins will likely be supplied by a pair of 74LS157 chips, on the Q pins by the RAM chip, and the current on the RAM’s data in pin by the CPU or any other. \RAS and \CAS probably by custom logic chips.) Use a 74HCT245 between the Pico and the device you’d like to test.

Caveat 1

The Pico is very fast when compared to an 8-bit computer from the 1980s, but 4164 transition times are extremely fast too. If you look at a timing diagram for the 4164 (which you will find in any 4164 datasheet), you will notice that all transition times listed are on the order of <ten, tens, or low hundreds of nanoseconds. Most 4164 chips have a -20, -15, -12, or -10 suffix in their part number. This indicates the minimum allowable number of nanoseconds × 10 for the sum of all transitions. (If the DRAM is driven faster, it probably won’t work correctly. However, if it’s driven slower, most things will generally work out, though if your system e.g. reads the DRAM’s output too slowly it might be too late and not work out.)

The stock Pico runs at 125 MHz, which means that one CPU cycle is 8 ns. From hearsay, you can probably overclock any Pico to 200 MHz (clock cycles are 5 ns), and many people report that their Pico runs fine even at 400 MHz (clock cycles are 2.5 ns). For -15 DRAMs, you have 150/8 = 18.75 CPU cycles per transition if the DRAM is driven at its max speed. (Note: it isn’t on MSX machines, at least.) 18 CPU cycles isn’t a lot. Remember, we need to convert two 8-bit addresses to a 16-bit address, and then check if the newly read value matches our previously recorded value. Is that doable in 18 CPU cycles? I don’t think so, but I’m not an ARM assembly expert.

So, did I get it to work? Well… sort of but not quite.

Caveat 2

Note that there are many failure modes for DRAM chips. For example, if the chip gets super-hot within a few seconds, it’s probably shorted. I’d expect there to be a very low resistance between ground and another pin. Before hooking up the “live” tester I’m going to explain on this page, check for that kind of stuff. I would not recommend using the live tester on a chip that gets super-hot within seconds. You could risk melting your connectors, and if the short is not between VCC and GND, potentially also risk your Pico due to excessive current on a pin driven by the Pico.

Caveat 3

Untested with 4116 chips, only tested with my MSX’s main RAM.

Current status

The live tester successfully verifies that a TMS4164-15 DRAM chip under test in my stand-alone 4164 RAM tester is outputting the correct values. (There is no reason why it shouldn’t work with a 4116 chip. The tester certainly does! You just need to re-wire slightly. Also, 5V on the Pico, yeah, it doesn’t seem to “explode immediately.” But -5V or 12V? You’d better leave those pins unconnected!)

On a real system (my trusty Hitachi MB-H2 MSX) with TMS4164-15NL DRAM chips, the live tester manages just fine from power up, up until the first ~11000 comparisons (which is a split second), but at some point reads a 0 when it should have been a 1, and prints an error. Printing errors takes a long time at 115200 bps, so we go completely off the rails once we’ve encountered the first error. (That’s slightly configurable though, see “Lnobs” section for details.)

However, in the live tester’s “DEBUG” mode, it just collects a lot of samples into memory, and prints them out when the sample memory is full. Using a simple script (the Perl script included in the repo), I can then verify that all the samples check out. Note that the DEBUG code also prints out who many times it had to wait until it got data from the PIO. The answer is 0 times every time, which means that we’re too slow or almost too slow. (Sometimes there is a handful of mismatches, I’ll look into those at some point. Could be that we were just too slow, or the 5V is messing with the system ;D)

There’s a lot that could be improved, hence the “WIP” attribute in title. The first obvious improvement would be to try a little harder in the non-debug mode. The Pico has two CPU cores, and we’re only using one. We could attain more throughput by running according to the following scheme:

Core 1:
Wait for sample 1
Tell core 2 to wait for sample 2
Process sample 1
Wait for sample 3
Tell core 2 to wait for sample 4
Process sample 3

Core 2:
Wait for instructions from CPU 1
Wait for sample 2
Process sample 2
Wait for instructions from CPU 1
Wait for sample 4
Process sample 2

Another potential optimization would be to write the processing code in ARM assembly. (My experience with ARM assembly is mostly read-only, so not sure how much better I can get without spending way too much effort.)

Also I haven’t tried overclocking yet. Probably should!

Some more technical details

We use two PIO state machines. One waits for RAS high→low (“RAS SM”, and the other one waits for CAS high→low (“CAS SM”, which comes after the RAS transition.

Not all RAS transitions are followed by CAS transitions. For example, refresh is mostly RAS-only. In addition, though perhaps not used on the MSX(?), RAS transitions may be followed by multiple CAS transitions.

In the C code, we wait for events on the CAS SM, and then read from both the RAS SM’s FIFO and the CAS SM’s FIFO. In the PIO code, the CAS SM tells the RAS SM whether to push its address or not. (We could alternatively (maybe) always push and have the CPU make sure the FIFO never gets full, but my experiments in that regard didn’t go that well.)

There are a lot of defines that change the way the system works.

Knobs

  • Setting PRINT_ERROR_THRESHOLD to something above 0 only starts printing errors after encountering that many errors.
  • CORRECT_ERRORS causes the Pico’s memory to be updated when we encounter a mismatch
  • VERBOSE_STATUS_LEDS causes the Pico to perform GPIO writes at GPIO16+ (or so, I recommend you check the source to find the exact GPIO pin number) to indicate whether we’re reading or writing. This isn’t very beneficial performance-wise.
  • SWAP_RAS_AND_CAS_ADDRESSES: my Hitachi MB-H2 MSX applies the CPU’s A0-A7 to the RAM pins at RAS time, and A8-A15 at CAS time. When thinking “rows” and “columns”, most people would probably assume that “rows” use the more significant bits, but that is not necessarily the case, and it doesn’t matter. When operating in DEBUG mode, you’ll see accesses that are mostly linear if this define is set correctly. Otherwise each access will be 256 apart.

The code is at https://github.com/qiqitori/pico_live_chip_tester.

So… is this likely to find a fault?

I haven’t seen any DRAM chip failures (except on YouTube) where only some addresses were broken and the chip appeared to work otherwise. (SRAM chips are different story. I’m sort of planning on doing an SRAM tester too, but probably not too soon.) Most DRAM chips I’ve seen are an all-or-nothing affair. For all-or-nothing affairs, this chip tester is very likely to find the problem immediately, especially if you compare all 8 chips and only one is weird. For hypothetical chips with just a single address problem (or perhaps, a single broken row), either it’ll be difficult with the code not 100% working right now, or it might take several attempts and statistics.

Playing .psg tunes (or even .asc/.pt3/… tunes) on the MSX (part 2)

(I do not recommend watching this demo on a smartphone. It’s quite flashy and exacerbated my headache. Also, the WebMSX code will ask you to go fullscreen, but I don’t think you can start the demo without going fullscreen and then back again.)

In part 1, we constructed a 48 KB ROM to play a tune called “Popsa 2”. We didn’t apply any real compression algorithms but implemented a set of scripts to find repeated sections in a binary file and added an instruction to the .psg file format to “call” repeated sections. Using real compression algorithms we could achieve much better compression, and each tune would just occupy a couple KBs. Using our method, we _just_ manage to fit the tune into a single cartridge. Popsa 2 fit into 48 KB, and the tune we’re going to do today is going to require a 64 KB ROM. If the previous track didn’t quite do it for you, I think it might be worth giving this one a chance. It’s a very complex piece of wonder-inducing music in my opinion. (Press the power button and in the menu that pops up, choose “Power” to boot the ROM.)

64 KB ROMs still require a header at 0x4000 or 0x8000. This means that we need to add a header and some entrypoint code to set up the slots right in the middle of our data. That’s inconvenient, but I didn’t feel like changing the structure of the program, so I just added one check each at the beginning and end of the main loop to see if the HL register has gone above a certain value. If yes: before the main loop, it adds an offset; after the main loop, it subtracts the same offset again. This way, we don’t have to do anything too complex when jumping to a previous section of the track.

Mic – Dreamless is #2 on the top 300 of ZX Spectrum music. There’s a bunch of other nice stuff on that list!

In part 1, I mentioned a problem in WebMSX that prevented the 48 KB ROM from working. 64 KB ROMs are not affected by this problem. The WebMSX player at the top of this article page plays the ROM linked to above. The ROM also works on real hardware (the Hitachi MB-H2 MSX1 I repaired a while ago).

Aside: disabling WebMSX’ auto-scroll

In the unlikely event that you have read this blog’s front page sometime in the last few months, you might have noticed that it scrolled automatically to this WebMSX player, even though this post is now very much not the newest post on this blog! I only noticed this a short while ago and decided to fix it, because it’s quite annoying. The below code snippets are taken from the WebMSX commit with the tag “v6.0.4”. Older or newer versions may look different.

All you need to do is remove the “this.focus()” line in the powerOn function in CanvasDisplay.js:

    this.powerOn = function() {
        this.setDefaults();
        updateLogo();
        document.documentElement.classList.add("wmsx-started");
        setPageVisibilityHandling();
        this.focus(); // <-- this is the line you need to remove or comment out
        if (WMSXFullScreenSetup.shouldStartInFullScreen()) {
            setFullscreenState(true);
            if (FULLSCREEN_MODE !== 2 & isMobileDevice) setEnterFullscreenByAPIOnFirstTouch();       // Not if mode = 2 (Windowed)
        }
    };

If you prefer to just edit the minified version, search for the call to setPageVisibilityHandling() and then edit out the “this.focus(),” bit.

Before:

documentElement.classList.add("wmsx-started"),setPageVisibilityHandling(),this.focus(),WMSXFullScreenSetup.shouldStar

After:

documentElement.classList.add("wmsx-started"),setPageVisibilityHandling(),WMSXFullScreenSetup.shouldStartInFullScreen

Playing .psg tunes (or even .asc/.pt3/… tunes) on the MSX (part 1)

This article is somewhat technical. If you just want to listen to a chip tune on WebMSX, maybe go for part 2 instead.

In previous articles I explored the YM2151 and the VGM file format. In this article, we’ll go back a generation and listen to some tunes written for the DSG (doorbell sound generator) PSG (programmable sound generator, i.e., the General Instruments AY-3-8910, or compatibly, Yamaha’s YM2149). PSG files (and particularly ASC files) are mainly used for ZX Spectrum chip tunes (I think), but the MSX has the same sound chip so why not play some chip tunes on the MSX?

Well, before we spend time working on something just slightly above PC beeper music… are there even any decent PSG tunes? Well, I’ve found at least one that like, “Popsa 2”, as is included in the below mix (scroll down a bit) on YouTube for example, and some of the commenters on this video seem to like “Illusion”.

Unfortunately, it doesn’t work in WebMSX (after 20 seconds or so). But it works in all three (NTSC) openMSX machines I bothered to test with, and it also works on my real MSX1 (Hitachi MB-H2). To get it to run in WebMSX, you have to “Set ROM format” -> “KonamiSCC”, but even then it’ll crash after a few minutes (vs. 20 seconds for e.g. ASCII8). For some reason it doesn’t let me choose “Normal”. I’m quite sure it would work with that setting if it were available. :p I’ll look into the matter at some point, probably. Looks like WebMSX will require a patch to work. Patch is submitted and will probably make it into the next version.

This machine produces NTSC color artifacts like there is no tomorrow.

Caution: writing to certain PSG registers is unsafe on certain MSX machines. I don’t think my code writes to these registers, but I didn’t make 100% sure. (However, openMSX gives you a warning when it notices unsafe writes, and I didn’t get a warning.)

“Popsa 2” was made in a program called ASC Sound Master. The “.asc” file can be downloaded here: https://zxart.ee/eng/authors/d/dreamer/popsa-2/. These .asc files are pretty small. They can be converted to PSG using ZXTune (https://bitbucket.org/zxtune/zxtune/) (and from PSG they can easily be converted to e.g. VGM, see bottom of this post), but the resulting files are too large to fit on a regular MSX1 cartridge.

ZXTune compilation and conversion:

git clone https://bitbucket.org/zxtune/zxtune.git
cd zxtune
make platform=linux system.zlib=1 -C apps/zxtune123/ -j4

bin/linux/release/zxtune123 --convert mode=psg,filename=foo.psg -- Dreamer\ -\ POPSA-2\ \(1994\).asc

The original .asc file is 3720 bytes. The resulting .psg is 129028 bytes. If you convert that to VGM, the resulting size is 187342 bytes.

The PSG file format

The PSG file format is very similar in concept to the VGM file format, except that only one chip is supported, the PSG. It seems it’s primarily used for ZX Spectrum chip tunes. As only one chip is supported, you don’t need the “command byte” that indicates what chip is to be written to. So you only have pairs of “register address” and “register value to write”.

There’s also a header in the first 16 bytes. The first three bytes are “PSG”, dunno about the rest.

The PSG only has 16 (IIRC) registers, and some of those aren’t even relevant for sound. In other words, the registers 0x10 to 0xff don’t exist and the designers of this file format used that opportunity to fit in a “wait” command at 0xff (one raster scan, so 1/50s or 1/60s depending on whether the system is PAL or NTSC). There’s also a command that waits multiple raster intervals, 0xfe, and a command that ends the tune, 0xfd. Ignoring the header, here are the first few bytes of the Popsa 2 PSG file:

ff
00 41
01 05
02 0b
03 01
04 e0
05 00
06 1f
07 31
08 0f
09 0f
0a 0a
ff
02 e0
03 00
07 38
09 0d
0a 0c
ff
...

All this means: wait 1 raster interval, then write to registers 00 through 0a with values 41, 05, 0b, 01, …, respectively, wait 1 raster interval, write to registers 02, 03, 07, 09, 0a, with values e0, 00, 38, 0d, 0c, respectively, wait 1 raster interval. (As you can see the 0xff command doesn’t take any parameters.)

Now that we know mostly how this file format works, it’s time to think about how to fit roughly 126 KB of data into my 48 KB cartridge. We could easily use an off-the-shelf compression library, but where’s the fun in that? That’s like… modern programming, ew.

We’ll invent another command for PSG, 0xfc, which takes a two-byte parameter that tells it to jump back somewhere (for a while, and then returns to its original location). We also need to write a program that identifies repetitive sections in the music (of which there are plenty). The former is pretty easy, so let’s talk about the latter program first.

  1. Compute MD5 sums of a 100-byte window for every byte in the file. So we end up with 129028-100=128928 MD5 sums. Easy and fast on modern hardware. See code snippet below.
  2. Check if we even have repeated chunks, e.g. by executing: md5sum chunks/* | awk ‘{print $1}’ | sort -n | uniq -c
  3. We may want to check a couple other window sizes to see if we can get better results. A lower window size means we’ll find more repetition, but we need 3 bytes to encode a jump in our PSG file.
  4. Re-assemble PSG file using a quick-and-dirty and probably somewhat buggy script. (See below.)

The resulting data length is 42158 bytes for the Popsa 2 song.

For task (1) we first convert the PSG file into hex, and later into tokens:

xxd -p Dreamer\ -\ POPSA-2\ \(1994\).psg | sed -r -e 's/(..)/\1 /g' | tr -d '\n' > Dreamer\ -\ POPSA-2\ \(1994\).psg.hex
# Then remove 16-byte header using a standard text editor
cat Dreamer\ -\ POPSA-2\ \(1994\).psg.hex | perl -e '$_ = <STDIN>; while (s/(..) //) { $cmd = $1; if ($cmd eq "ff" or $cmd eq "fd") { print("$cmd\n") } else { s/(..) //; $param = $1; print("$cmd $param\n") } }' > tokens

Then divide the tokens into chunks using the below script, divide_tokens_into_chunks.sh:

#!/bin/bash

N=100 # sliding window length
mkdir -p chunks_N$N
line_count=$(cat tokens | wc -l)
for ((i=0; i<$((line_count-N)); i++)); do
    tail -n +$i tokens | head -n $N > chunks_N$N/chunk_$i
done
rm chunks_N$N/chunk_0 # same as chunk_1

You know, looking back at this code for the first time in a while, I see there’s a nice off-by-1 error and a nice rm command to fix half of the problem. But the great thing about this being a hobby is that I don’t need to care. :)

Next, we have a Perl script that creates our PSG file. It needs some help though, so we do this first:

find chunks_N100/ | xargs md5sum > chunks_N100_md5sums

(We can’t do md5sum chunks_N100/* because that expands to a tad too many arguments in our case. xargs automatically cuts down the number of arguments to a more reasonable value.) This is the main program. Usage: ./compress_aggressive_but_convert_to_psg.pl < chunks_N100_md5sums > foo.psg

#!/usr/bin/perl

# dependencies:
# chunks_N$N/ (directory)
# chunks_N$10_md5sums (file) # example generation: find chunks_N10/ | xargs md5sum > chunks_N10_md5sums

use strict;
use warnings;
use feature "switch";

my $N = 100;

my $md5s = {};
my @chunks;

my $md5;
my $file;

my $debug_logged = 0;
my $lines = [];
my $current_output_byte_number = 0;
for (my $chunk_number = 0; <>; $chunk_number++) {
    /([a-z0-9]+)\s+([a-zA-Z0-9_\/]+)/;
    $md5 = $1;
    $file = $2;
    if (exists $md5s->{$md5}) {
        # can't call chunks that already contain a call because that call would take us beyond the N token window that we can see from where we are
        # that means it's likely we'd generate wrong code
        # so we'll just move on and maybe we'll find a nicer block

        my $target_chunk_number = $md5s->{$md5}->{chunk_number};
        my $concatted_chunks = join('', @chunks[max(0, $target_chunk_number-$N)..min($#chunks, $target_chunk_number+$N)]);
        if (($concatted_chunks =~ /; call/) or # NOTE "call wait_for_raster" is allowed
            ($chunk_number - $target_chunk_number < $N)) {
            # 1) can't convert due to existing call; nothing to be done here, or
            # 2) we can't call something right behind us
            # DANGER let's head back to the non-exists path
            goto NON_EXIST_PATH;
        } else {
            if (!$md5s->{$md5}->{converted_to_call}) {
                convert_to_callable_sub($target_chunk_number);
                $md5s->{$md5}->{converted_to_call} = 1;
            }
            my $output_byte_number_high = int($md5s->{$md5}->{output_byte_number} / 256);
            my $output_byte_number_low = $md5s->{$md5}->{output_byte_number} % 256;
            $chunks[$chunk_number] = sprintf("fc %02x %02x ; call " . $md5s->{$md5}->{output_byte_number} . " ($md5)\n", $output_byte_number_high, $output_byte_number_low);
            $current_output_byte_number += 3;

            # skip next N-1 rows
            for (0..$N-1) {
                my $foo = <>;
                $chunk_number++;
                $chunks[$chunk_number] = "";
            }
        }
    } else {
        $md5s->{$md5} = {};
        $md5s->{$md5}->{chunk_number} = $chunk_number;
        $md5s->{$md5}->{converted_to_call} = 0;
NON_EXIST_PATH:
        open my $fh, '<', $file or die "Can't open \"$file\": $!";
        my $token = <$fh>;
        close $fh;
        my $asm = convert_to_asm($token);
        $md5s->{$md5}->{output_byte_number} = $current_output_byte_number;
        $current_output_byte_number += (scalar(split(" ", $asm)));
        $chunks[$chunk_number] = $asm;
    }
}

print foreach @chunks;

print "infloop:
    jr infloop\n";

# no changes needed
sub convert_to_callable_sub($) {
    my $block_number = shift;
}

# don't actually do anything here
sub convert_to_asm($) {
    my $string = shift;
    return "$string";
}

sub min($$) {
    my ($a, $b) = @_;
    return $a if ($a < $b);
    return $b;
}
sub max($$) {
    my ($a, $b) = @_;
    return $a if ($a > $b);
    return $b;
}

The output of this program is in hex. Now we just need some assembly code to read the data and put it into the PSG registers. Here’s the core part:

ld hl,psg_begin
main_loop:
    ld a,(hl)
    cp 0xff
    jr z,wait
    cp 0xfe
    jr z,wait_n_times
    cp 0xfd
    jr z,end
    cp 0xfc
    jr z,jump
    jr register_write
inc_loop:
    inc hl
    jr loop

wait:
    call wait_for_raster
    jr inc_loop

register_write:
    ld a,(hl)
    out (0xa0),a
    inc hl
    ld a,(hl)
    out (0xa1),a
    jr inc_loop

wait_for_raster:
    in a,(0x99)
    and 128
    cp 128
    jr nz,wait_for_raster
    ret

psg_begin:
    include "foo.psg"
ds 010000h-$ ; fill rest with 0s

Understanding the above should help understanding the full implementation. (The above doesn’t include the code for the 0xfe, 0xfd, and 0xfc commands.) Note that we can’t use the above wait_for_raster on NTSC machines because the tune assumes 50 Hz. So we’ll instead emulate the 50 Hz interval using a busy loop.

For 0xfd (end of song), we just enter an infinite loop. For 0xfe, we just call wait_for_raster multiple times. For 0xfc, we need to store where we left off, then set hl to the address in the parameter, then execute exactly 100 main loop runs, then set hl back to its previous address and continue as normal.

Here’s the code, which also includes some VRAM writes to visualize the music a little bit. Does it look good? Eh, I dunno. It was an experiment. I changed the registers to be displayed because some registers don’t see updates very often. The overall visuals are a bit noisy, but there is one section that looks good in my opinion, and it’s also the section that I like best in the tune, right at the end. You can clearly see one of the registers changing right in sync with the doorbell sound. (It looks even more in sync in openMSX.)

N: equ 100

org 4000H
db "AB"
dw entry_point
db 00,00,00,00,00,00,00,00,00,00,00,00

SetVdpWrite: macro high low ; from http://map.grauw.nl/articles/vdp_tut.php
    ld a,low
    out (0x99),a
    ld a,high
    add 0x40
    out (0x99),a
endm

vpoke: macro value
    ld a,value
    out (0x98),a
endm

entry_point:
    ; copy cart rom (c000-f000) to ram
    in a,(0a8h)
    and 11000000b ; we want to know which slot is RAM, and AFAIK RAM should be mapped in at 0xc000-0xffff.
    ld c,a ; save value for later
    in a,(0a8h)
    and 00001100b ; we are executing from cartridge ROM at 0x4000~0x7fff, so the 2-bit value for this region is known correct. we just have to make the slots above this one the same value.
    ld b,a ; save a
    rla ; << 1 (now have 000xx000b)
    rla ; << 1 (now have 00xx0000b)
    or b ; | saved b (now have 00xxxx00b)
    rla ; << 1 (now have 0xxxx000b)
    rla ; << 1 (now have xxxx0000b)
    or b ; | saved b (now have xxxxxx00b)
;     ld a,01010100b ; set pages 0: rom 1: rom 2: cart 3: cart
    out (0a8h),a
copy_c000_f000:
    ld hl,0c000h ; start at c000
copy_c000_f000_loop:
    ld a,(hl) ; read from ROM address (hl)
    ld d,a
    in a,(0a8h)
    ld b,a ; store original value
    and 00111111b ; only keep settings for lower three slots
    or c ; add in setting for top slot (saved earlier)
;     ld a,011010100b
    out (0a8h),a ; set port
    ld (hl),d ; store value read from ROM address (hl) to RAM address (also hl of course)
    ld a,b ; load a with original value
    out (0a8h),a ; set port back
    inc hl
    ld a,h
    cp 0f0h
    jp z,other_init ; done with this copy
    jp copy_c000_f000_loop

; entry_point:
;     ld a,0xd4
;     out (0xa8),a ; set slots
other_init:
    ; set ports to bios:cart:cart:ram
    in a,(0a8h)
    and 00111111b ; only keep settings for lower three slots
    or c ; add in setting for top slot (saved earlier)
    out (0a8h),a ; set port
    ; set colors
    ld a,011110000b ; set data to be written into register (white on black)
    out (099h),a
    ld a,010000111b ; set register number (7)
    out (099h),a
    SetVdpWrite 0x20 0x05
    vpoke 0x0f ; set white on black for some part of the screen
    vpoke 0x0f ; set white on black for some other part of the screen
video_init:
    ; put chars /0123456789 into 0x1800-0x1AFF
    SetVdpWrite 0x18 0x00
    ld b,64 ; 64 chars
video_loop_1:
    vpoke 0x2f
    djnz video_loop_1
    ld b,64 ; 64 chars
video_loop_2:
    vpoke 0x2f
    djnz video_loop_2
    ld b,64 ; 64 chars
video_loop_3:
    vpoke 0x33
    djnz video_loop_3
    ld b,64 ; 64 chars
video_loop_4:
    vpoke 0x33
    djnz video_loop_4
    ld b,64 ; 64 chars
video_loop_5:
    vpoke 0x36
    djnz video_loop_5
    ld b,64 ; 64 chars
video_loop_6:
    vpoke 0x36
    djnz video_loop_6
    ld b,64 ; 64 chars
video_loop_7:
    vpoke 0x31
    djnz video_loop_7
    ld b,64 ; 64 chars
video_loop_8:
    vpoke 0x31
    djnz video_loop_8
    ld b,64 ; 64 chars
video_loop_9:
    vpoke 0x35
    djnz video_loop_9
    ld b,64 ; 64 chars
video_loop_10:
    vpoke 0x35
    djnz video_loop_10
    ld b,64 ; 64 chars
video_loop_11:
    vpoke 0x37
    djnz video_loop_11
    ld b,64 ; 64 chars
video_loop_12:
    vpoke 0x37
    djnz video_loop_12
    ld b,64 ; 64 chars
    
    ld b,0 ; flag to indicate whether we are jumping around at the moment (0 means we aren't) (NOTE: nested jumping isn't supported)
    ld c,0xa0 ; first PSG port
    ld hl,psg_begin
    jr main_loop

loop:
    ld a,b
    cp 0
    jr z,main_loop ; b isn't set so just head back to the loop
    pop af
    dec a
    cp -1
    jr z,restore_hl
    push af ; don't need this on the stack if we go to restore_hl, so place it after the jump
    jr main_loop
restore_hl:
    ld b,0 ; unset flag
    pop hl
    inc hl
    ; and continue executing into loop
main_loop:
    ld a,(hl)
    cp 0xff
    jr z,wait
    cp 0xfe
    jr z,wait_n_times
    cp 0xfd
    jr z,end
    cp 0xfc
    jr z,jump
    jr register_write
inc_loop:
    inc hl
    jr loop

wait:
    call wait_for_raster_50hz_emu
    jr inc_loop

wait_n_times: ; safe to assume that param isn't 0
    push bc
    inc hl
    ld b,(hl)
wait_n_times_loop:
    call wait_for_raster_50hz_emu
    djnz wait_n_times_loop
    pop bc
    jr inc_loop

end:
    jr end ; infinite loop

jump:
    inc hl
    ld d,(hl)
    inc hl
    ld e,(hl)
    push hl
    ld b,1 ; signal that we're calling a previous segment
    ld a,N ; we want to execute N instructions before going back to where we left off
    push af
    ld hl,psg_begin
    add hl,de
    jr loop

register_write:
    ld a,(hl)
    out (c),a

    ; really we only need ld a,(hl) and out (0xa1),a, but let's poke around in the VRAM to make this program slightly less boring
    ; we'll modify the tile definitions of characters /, 0, ..., 9 (8 bytes each starting at 0x178) and just put in the same value we're writing to the PSG register
    or a ; clear carry flag to make rla behave
    ; a = a*8 for vram write address
    rla ; *2
    rla ; *2 (*2*2 == *4)
    rla ; *2 (*2*2*2 == *8)
    ld d,a ; vram write address
    inc hl
    ld a,(hl)
    ld e,a ; vram write value
    out (0xa1),a
    ld a,0x78
    add a,d ; vram address low byte is 0x78 + (psg register)*8

    ; color change code currently commented out because it's not very pleasant to look at
;     ; let's also change some colors when register 5 is written to, which doesn't appear to happen very often
;     ; for register 5 a is 5*8 + 0x78 = 0xa0
;     cp 0xa0
;     jr nz,skip_color_change
;     ld d,a
;     ld a,e
;     out (099h),a
;     ld a,010000111b ; set register number (7)
;     out (099h),a
;     ld a,d
skip_color_change:
    SetVdpWrite 1 a ; vram address high byte is 1 (full address: 0x178)
    vpoke e
    vpoke e
    vpoke e
    vpoke e
    vpoke e
    vpoke e
    vpoke e
    vpoke e

    jr inc_loop

wait_for_raster:
    in a,(0x99)
    and 128
    cp 128
    jr nz,wait_for_raster
    ret

wait_for_raster_50hz_emu:
    ; CPU clock is 3579545 Hz
    ; decrement and loop routine takes 36 instructions per loop run (wait_for_raster_50hz_emu_loop up to (not including) low_0)
    ; (https://www.overtakenbyevents.com/tstates/)
    ; want routine to finish in 1/50 or a second, so:
    ; 3579545/50/36=1988.636111111111, let's very scientifically, er, let's throw out that whole calculation and say 1650 because we have overhead and I have experimentally determined that to sound close enough to the original :p
    ; our overhead varies depending on code path. some rhythm problems are audible, but not _too_ terrible
    ld de,1650
wait_for_raster_50hz_emu_loop:
    dec de
    ld a,e
    cp 0
    jr z,low_0
    jr wait_for_raster_50hz_emu_loop
low_0:
    ld a,d
    cp 0
    jr z,high_low_0
    jr wait_for_raster_50hz_emu_loop
high_low_0:
    ret

psg_begin:
include "foo.psg"

ds 010000h-$

Compiles with z80asm. Other assemblers might need some tweaks.

Bonus: converting PSG files to VGM

This is implemented in straight-forward C.
Compilation: cc -o psg2vgm psg2vgm.c
Execution: ./psg2vgm Dreamer\ -\ POPSA-2\ \(1994\).psg | xxd -r -p > foo.vgm

#include <stdio.h>
#include <stdlib.h>

int try_fgetc(FILE *file)
{
    int param;
    if ((param = fgetc(file)) == EOF) {
        fprintf(stderr, "Unexpected EOF");
        exit(1);
    }
    return param;
}

int main(int argc, char **argv)
{
    FILE *file = fopen(argv[1], "r");
    int c, param;
    int i;

    // header with 3579545/2 = 1789772 Hz clock rate for AY8910, fake value for length (should be long enough for anyone)
    printf("56 67 6d 20 f0 f0 f0 f0 50 01 00 00 00 00 00 00 00 00 00 00 36 09 01 00 00 dd 00 00 24 00 00 00 00 dd 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 cc 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 4c 4f 1b 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00");
    
    for (i = 0; i < 16; i++) { // bulldoze over header
        try_fgetc(file);
    }

    while ((c = fgetc(file)) != EOF) {
        switch (c) {
            case 0xff:
                printf("%02x ", 0x63); // wait 882 samples (50th of a second), a shortcut for 0x61 0x72 0x03
                break;
            case 0xfe:
                param = try_fgetc(file);
                for (i = 0; i < param; i++) {
                    printf("%02x ", 0x63); // wait 882 samples (50th of a second), a shortcut for 0x61 0x72 0x03
                }
                break;
            case 0xfd:
                printf("%02x ", 0x66); // end of sound data
                break;
            default:
                param = try_fgetc(file);
                printf("%02x %02x %02x ", 0xa0, c, param);
        }
    }

    printf("\n");
}