add adapter handling for HiROM
[flashkit-mdc.git] / flashkit.c
index e6d5785..fb64293 100644 (file)
@@ -53,6 +53,20 @@ enum dev_cmd {
 #define PAR_SINGE  (1 << 6)
 #define PAR_INC    (1 << 7)
 
+static struct flash_info {
+       uint32_t prog_addr;
+       uint16_t mid;
+       uint16_t did;
+       uint32_t size;
+       uint16_t region_cnt;
+       struct {
+               uint32_t block_size;
+               uint32_t block_count;
+               uint32_t start;
+               uint32_t size;
+       } region[4];
+} info;
+
 static int setup(int fd)
 {
        struct termios tty;
@@ -229,25 +243,106 @@ static void read_block16(int fd, void *dst, uint32_t size)
 
 static void flash_seq_write8(int fd, uint32_t addr, const uint8_t *d)
 {
+       uint8_t cmd[] = {
+               // unlock
+               CMD_ADDR, info.prog_addr >> 16,
+               CMD_ADDR, info.prog_addr >> 8,
+               CMD_ADDR, info.prog_addr >> 0,
+               CMD_WR | PAR_SINGE | PAR_MODE8, 0xaa,
+               CMD_ADDR, info.prog_addr >> 17,
+               CMD_ADDR, info.prog_addr >> 9,
+               CMD_ADDR, info.prog_addr >> 1,
+               CMD_WR | PAR_SINGE | PAR_MODE8, 0x55,
+               // program setup
+               CMD_ADDR, info.prog_addr >> 16,
+               CMD_ADDR, info.prog_addr >> 8,
+               CMD_ADDR, info.prog_addr >> 0,
+               CMD_WR | PAR_SINGE | PAR_MODE8, 0xa0,
+               // program data
+               CMD_ADDR, addr >> 16,
+               CMD_ADDR, addr >> 8,
+               CMD_ADDR, addr >> 0,
+               CMD_WR | PAR_SINGE | PAR_MODE8, *d,
+               CMD_RY
+       };
+
+       assert(info.prog_addr);
+       write_serial(fd, cmd, sizeof(cmd));
+}
+
+static void flash_seq_write16(int fd, uint32_t addr, const uint8_t *d)
+{
+       uint8_t cmd[] = {
+               // unlock
+               CMD_ADDR, info.prog_addr >> 17,
+               CMD_ADDR, info.prog_addr >> 9,
+               CMD_ADDR, info.prog_addr >> 1,
+               CMD_WR | PAR_SINGE | PAR_MODE8, 0xaa,
+               CMD_ADDR, info.prog_addr >> 18,
+               CMD_ADDR, info.prog_addr >> 10,
+               CMD_ADDR, info.prog_addr >> 2,
+               CMD_WR | PAR_SINGE | PAR_MODE8, 0x55,
+               // program setup
+               CMD_ADDR, info.prog_addr >> 17,
+               CMD_ADDR, info.prog_addr >> 9,
+               CMD_ADDR, info.prog_addr >> 1,
+               CMD_WR | PAR_SINGE | PAR_MODE8, 0xa0,
+               // program data
+               CMD_ADDR, addr >> 17,
+               CMD_ADDR, addr >> 9,
+               CMD_ADDR, addr >> 1,
+               CMD_WR | PAR_SINGE, d[0], d[1],
+               CMD_RY
+       };
+
+       assert(info.prog_addr);
+       write_serial(fd, cmd, sizeof(cmd));
+}
+
+// -- 8bit+LoROM --
+
+static uint32_t lorom_rom_addr(uint32_t a)
+{
+       return ((a & 0x7f8000) << 1) | 0x8000 | (a & 0x7fff);
+}
+
+static void set_addr8l(int fd, uint32_t a)
+{
+       set_addr8(fd, lorom_rom_addr(a));
+}
+
+static uint16_t read_bus8l(int fd, uint32_t a)
+{
+       return read_bus8(fd, lorom_rom_addr(a));
+}
+
+static void write_bus8l(int fd, uint32_t a, uint16_t d)
+{
+       write_bus8(fd, lorom_rom_addr(a), d);
+}
+
+static void flash_seq_write8l(int fd, uint32_t a, const uint8_t *d)
+{
+       a = lorom_rom_addr(a);
        uint8_t cmd[] = {
                // unlock
                CMD_ADDR, 0,
-               CMD_ADDR, 0x0a,
+               CMD_ADDR, 0x8a,
                CMD_ADDR, 0xaa,
                CMD_WR | PAR_SINGE | PAR_MODE8, 0xaa,
                CMD_ADDR, 0,
-               CMD_ADDR, 0x05,
+               CMD_ADDR, 0x85,
                CMD_ADDR, 0x55,
                CMD_WR | PAR_SINGE | PAR_MODE8, 0x55,
                // program setup
                CMD_ADDR, 0,
-               CMD_ADDR, 0x0a,
+               CMD_ADDR, 0x8a,
                CMD_ADDR, 0xaa,
                CMD_WR | PAR_SINGE | PAR_MODE8, 0xa0,
                // program data
-               CMD_ADDR, addr >> 16,
-               CMD_ADDR, addr >> 8,
-               CMD_ADDR, addr >> 0,
+               CMD_ADDR, a >> 16,
+               CMD_ADDR, a >> 8,
+               CMD_ADDR, a >> 0,
                CMD_WR | PAR_SINGE | PAR_MODE8, *d,
                CMD_RY
        };
@@ -255,36 +350,174 @@ static void flash_seq_write8(int fd, uint32_t addr, const uint8_t *d)
        write_serial(fd, cmd, sizeof(cmd));
 }
 
-static void flash_seq_write16(int fd, uint32_t addr, const uint8_t *d)
+// -- 8bit+LoROM+adapter --
+
+static uint32_t do_flipflops(int fd, uint32_t a)
 {
+       static uint32_t abits_now = ~0u; // A23, A22, A21
+       uint32_t abits = (a >> 21) & 7;
+
+       if (abits != abits_now) {
+               // printf("flipflops: %x->%x\n", abits_now, abits);
+               write_bus16(fd, 0xa13000, abits);
+               abits_now = abits;
+       }
+       return a & 0x1fffff;
+}
+
+static void set_addr8la(int fd, uint32_t a)
+{
+       set_addr8(fd, do_flipflops(fd, lorom_rom_addr(a)));
+}
+
+static uint16_t read_bus8la(int fd, uint32_t a)
+{
+       return read_bus8(fd, do_flipflops(fd, lorom_rom_addr(a)));
+}
+
+static void write_bus8la(int fd, uint32_t a, uint16_t d)
+{
+       write_bus8(fd, do_flipflops(fd, lorom_rom_addr(a)), d);
+}
+
+static void flash_seq_write8la(int fd, uint32_t a, const uint8_t *d)
+{
+       // we should clear flipflops for the flash commands, but this
+       // doesn't seem to be necessary as the flash chip seems to
+       // ignore the upper bits when looking for commands, and this
+       // extra clearing would slow things down
+       a = do_flipflops(fd, lorom_rom_addr(a));
        uint8_t cmd[] = {
                // unlock
                CMD_ADDR, 0,
-               CMD_ADDR, 0x05,
-               CMD_ADDR, 0x55,
+               CMD_ADDR, 0x8a,
+               CMD_ADDR, 0xaa,
                CMD_WR | PAR_SINGE | PAR_MODE8, 0xaa,
                CMD_ADDR, 0,
-               CMD_ADDR, 0x02,
-               CMD_ADDR, 0xaa,
+               CMD_ADDR, 0x85,
+               CMD_ADDR, 0x55,
                CMD_WR | PAR_SINGE | PAR_MODE8, 0x55,
                // program setup
                CMD_ADDR, 0,
-               CMD_ADDR, 0x05,
+               CMD_ADDR, 0x8a,
+               CMD_ADDR, 0xaa,
+               CMD_WR | PAR_SINGE | PAR_MODE8, 0xa0,
+               // program data
+               CMD_ADDR, a >> 16,
+               CMD_ADDR, a >> 8,
+               CMD_ADDR, a >> 0,
+               CMD_WR | PAR_SINGE | PAR_MODE8, *d,
+               CMD_RY
+       };
+
+       write_serial(fd, cmd, sizeof(cmd));
+}
+
+// -- 8bit+LoROM+adapter+sram --
+
+static uint32_t lorom_sram_addr(uint32_t a)
+{
+       return a | 0x600000;
+}
+
+static void set_addr8las(int fd, uint32_t a)
+{
+       set_addr8(fd, do_flipflops(fd, lorom_sram_addr(a)));
+}
+
+static uint16_t read_bus8las(int fd, uint32_t a)
+{
+       return read_bus8(fd, do_flipflops(fd, lorom_sram_addr(a)));
+}
+
+static void write_bus8las(int fd, uint32_t a, uint16_t d)
+{
+       write_bus8(fd, do_flipflops(fd, lorom_sram_addr(a)), d);
+}
+
+static void flash_seq_write8las(int fd, uint32_t a, const uint8_t *d)
+{
+}
+
+// -- 8bit+adapter --
+
+static void set_addr8a(int fd, uint32_t a)
+{
+       set_addr8(fd, do_flipflops(fd, a));
+}
+
+static uint16_t read_bus8a(int fd, uint32_t a)
+{
+       return read_bus8(fd, do_flipflops(fd, a));
+}
+
+static void write_bus8a(int fd, uint32_t a, uint16_t d)
+{
+       write_bus8(fd, do_flipflops(fd, a), d);
+}
+
+static void flash_seq_write8a(int fd, uint32_t a, const uint8_t *d)
+{
+       // no flipflop clearing, see flash_seq_write8la
+       a = do_flipflops(fd, a);
+       uint8_t cmd[] = {
+               // unlock
+               CMD_ADDR, 0,
+               CMD_ADDR, 0x8a,
+               CMD_ADDR, 0xaa,
+               CMD_WR | PAR_SINGE | PAR_MODE8, 0xaa,
+               CMD_ADDR, 0,
+               CMD_ADDR, 0x85,
                CMD_ADDR, 0x55,
+               CMD_WR | PAR_SINGE | PAR_MODE8, 0x55,
+               // program setup
+               CMD_ADDR, 0,
+               CMD_ADDR, 0x8a,
+               CMD_ADDR, 0xaa,
                CMD_WR | PAR_SINGE | PAR_MODE8, 0xa0,
                // program data
-               CMD_ADDR, addr >> 17,
-               CMD_ADDR, addr >> 9,
-               CMD_ADDR, addr >> 1,
-               CMD_WR | PAR_SINGE, d[0], d[1],
+               CMD_ADDR, a >> 16,
+               CMD_ADDR, a >> 8,
+               CMD_ADDR, a >> 0,
+               CMD_WR | PAR_SINGE | PAR_MODE8, *d,
                CMD_RY
        };
 
        write_serial(fd, cmd, sizeof(cmd));
 }
 
+// -- 8bit+adapter+nocart --
+
+static void set_addr8an(int fd, uint32_t a)
+{
+       set_addr8(fd, do_flipflops(fd, a) | 0x200000);
+}
+
+static uint16_t read_bus8an(int fd, uint32_t a)
+{
+       return read_bus8(fd, do_flipflops(fd, a) | 0x200000);
+}
+
+static void write_bus8an(int fd, uint32_t a, uint16_t d)
+{
+       write_bus8(fd, do_flipflops(fd, a) | 0x200000, d);
+}
+
+static void flash_seq_write8an(int fd, uint32_t a, const uint8_t *d)
+{
+}
+
+#define N0 ""
+#define N1 "8bit"
+#define N2 "8bit+LoROM"
+#define N3 "8bit+LoROM+adapter"
+#define N4 "8bit+LoROM+adapter+sram"
+#define N5 "8bit+adapter (use '-a 0x400000' for HiROM)"
+#define N6 "8bit+adapter+nocart"
 static const struct iof
 {
+       const char *name;
+       int      addrs_remapped;
        void     (*set_addr)(int fd, uint32_t addr);
        uint16_t (*read_bus)(int fd, uint32_t addr);
        void     (*write_bus)(int fd, uint32_t addr, uint16_t d);
@@ -293,32 +526,49 @@ static const struct iof
 }
 io_ops[] =
 {
-       { set_addr8,  read_bus8,  write_bus8,  read_block8,  flash_seq_write8  },
-       { set_addr16, read_bus16, write_bus16, read_block16, flash_seq_write16 },
+       { N0, 0, set_addr16,   read_bus16,   write_bus16,   read_block16, flash_seq_write16   },
+       { N1, 0, set_addr8,    read_bus8,    write_bus8,    read_block8,  flash_seq_write8    },
+       { N2, 1, set_addr8l,   read_bus8l,   write_bus8l,   read_block8,  flash_seq_write8l   },
+       { N3, 1, set_addr8la,  read_bus8la,  write_bus8la,  read_block8,  flash_seq_write8la  },
+       { N4, 0, set_addr8las, read_bus8las, write_bus8las, read_block8,  flash_seq_write8las },
+       { N5, 0, set_addr8a,   read_bus8a,   write_bus8a,   read_block8,  flash_seq_write8a   },
+       { N6, 0, set_addr8an,  read_bus8an,  write_bus8an,  read_block8,  flash_seq_write8an  },
 };
 
-static const struct iof *io = &io_ops[1];
+static const struct iof *io = &io_ops[0];
 
 static uint16_t flash_seq_r(int fd, uint8_t cmd, uint32_t addr)
 {
        // unlock
-       io->write_bus(fd, 0xaaa, 0xaa);
-       io->write_bus(fd, 0x555, 0x55);
+       assert(info.prog_addr);
+       io->write_bus(fd, info.prog_addr >> 0, 0xaa);
+       io->write_bus(fd, info.prog_addr >> 1, 0x55);
 
-       io->write_bus(fd, 0xaaa, cmd);
+       io->write_bus(fd, info.prog_addr >> 0, cmd);
        return io->read_bus(fd, addr);
 }
 
-static void flash_seq_erase(int fd, uint32_t addr)
+static void flash_seq_erase_d(int fd, uint32_t addr, uint8_t d)
 {
        // printf("erase %06x\n", addr);
-       io->write_bus(fd, 0xaaa, 0xaa);
-       io->write_bus(fd, 0x555, 0x55);
-       io->write_bus(fd, 0xaaa, 0x80);
+       assert(info.prog_addr);
+       io->write_bus(fd, info.prog_addr >> 0, 0xaa);
+       io->write_bus(fd, info.prog_addr >> 1, 0x55);
+       io->write_bus(fd, info.prog_addr >> 0, 0x80);
+
+       io->write_bus(fd, info.prog_addr >> 0, 0xaa);
+       io->write_bus(fd, info.prog_addr >> 1, 0x55);
+       io->write_bus(fd, addr, d);
+}
 
-       io->write_bus(fd, 0xaaa, 0xaa);
-       io->write_bus(fd, 0x555, 0x55);
-       io->write_bus(fd, addr, 0x30);
+static void flash_seq_erase(int fd, uint32_t addr)
+{
+       flash_seq_erase_d(fd, addr, 0x30);
+}
+
+static void flash_seq_erase_full(int fd)
+{
+       flash_seq_erase_d(fd, info.prog_addr, 0x10);
 }
 
 // status wait + dummy read to cause a wait?
@@ -339,59 +589,82 @@ static void set_delay(int fd, uint8_t delay)
        write_serial(fd, cmd, sizeof(cmd));
 }
 
-static struct flash_info {
-       uint16_t mid;
-       uint16_t did;
-       uint32_t size;
-       uint16_t region_cnt;
-       struct {
-               uint32_t block_size;
-               uint32_t block_count;
-               uint32_t start;
-               uint32_t size;
-       } region[4];
-} info;
-
 static void read_info(int fd)
 {
        static const uint16_t qry[3] = { 'Q', 'R', 'Y' };
+       uint16_t resp_cfi[3], sst_mid = ~0;
        uint32_t total = 0;
-       uint16_t resp[3];
        uint32_t i, a;
 
-       info.mid = flash_seq_r(fd, 0x90, 0); // autoselect
-       info.did = io->read_bus(fd, 2);
 
-       // could enter CFI directly, but there seems to be a "stack"
-       // of modes, so 2 exits would be needed
-       io->write_bus(fd, 0, 0xf0);
+       // see if this chip understands CFI (common flash interface)
+       io->write_bus(fd, 0xaa, 0x98);
+       resp_cfi[0] = io->read_bus(fd, 0x20);
+       resp_cfi[1] = io->read_bus(fd, 0x22);
+       resp_cfi[2] = io->read_bus(fd, 0x24);
+       if (memcmp(resp_cfi, qry, sizeof(resp_cfi)) == 0)
+       {
+               info.size = 1u << io->read_bus(fd, 0x4e);
+               info.region_cnt = io->read_bus(fd, 0x58);
+               assert(0 < info.region_cnt && info.region_cnt <= 4);
+               for (i = 0, a = 0x5a; i < info.region_cnt; i++, a += 8) {
+                       info.region[i].block_count = io->read_bus(fd, a + 0) + 1;
+                       info.region[i].block_count += io->read_bus(fd, a + 2) << 8;
+                       info.region[i].block_size = io->read_bus(fd, a + 4) << 8;
+                       info.region[i].block_size |= io->read_bus(fd, a + 6) << 16;
+                       info.region[i].start = total;
+                       info.region[i].size =
+                               info.region[i].block_size * info.region[i].block_count;
+                       assert(info.region[i].size);
+                       total += info.region[i].size;
+               }
+               if (info.size != total)
+                       fprintf(stderr, "warning: total is %u, bad CFI?\n", total);
 
-       io->write_bus(fd, 0xaa, 0x98); // CFI Query
-       resp[0] = io->read_bus(fd, 0x20);
-       resp[1] = io->read_bus(fd, 0x22);
-       resp[2] = io->read_bus(fd, 0x24);
-       if (memcmp(resp, qry, sizeof(resp))) {
-               fprintf(stderr, "unexpected CFI response: %04x %04x %04x\n",
-                       resp[0], resp[1], resp[2]);
-               exit(1);
+               info.prog_addr = 0xaaa;
+               io->write_bus(fd, 0, 0xf0);
+               info.mid = flash_seq_r(fd, 0x90, 0); // autoselect
+               info.did = io->read_bus(fd, 2);
        }
-       info.size = 1u << io->read_bus(fd, 0x4e);
-       info.region_cnt = io->read_bus(fd, 0x58);
-       assert(0 < info.region_cnt && info.region_cnt <= 4);
-       for (i = 0, a = 0x5a; i < info.region_cnt; i++, a += 8) {
-               info.region[i].block_count = io->read_bus(fd, a + 0) + 1;
-               info.region[i].block_count += io->read_bus(fd, a + 2) << 8;
-               info.region[i].block_size = io->read_bus(fd, a + 4) << 8;
-               info.region[i].block_size |= io->read_bus(fd, a + 6) << 16;
-               info.region[i].start = total;
-               info.region[i].size =
-                       info.region[i].block_size * info.region[i].block_count;
-               assert(info.region[i].size);
-               total += info.region[i].size;
+       else
+       {
+               // try the SST protocol
+               info.prog_addr = 0x5555;
+               sst_mid = flash_seq_r(fd, 0x90, 0);
+               if (sst_mid == 0xbf)
+               {
+                       info.mid = sst_mid;
+                       info.did = io->read_bus(fd, 0x01);
+                       switch (info.did) {
+                       case 0xb5:
+                       case 0xb6:
+                       case 0xb7:
+                               info.size = 128 * 1024 << (info.did - 0xb5);
+                               break;
+                       default:
+                               fprintf(stderr, "unrecognized SST device %02x\n", info.did);
+                               exit(1);
+                       }
+                       info.region_cnt = 1;
+                       info.region[0].block_count = info.size / 4096;
+                       info.region[0].block_size = 4096;
+                       info.region[0].start = 0;
+                       info.region[0].size = info.size;
+               }
+               else
+                       info.prog_addr = 0;
        }
 
        io->write_bus(fd, 0, 0xf0); // flash reset
 
+       if (info.prog_addr == 0) {
+               fprintf(stderr, "unable to identify the flash chip :(\n");
+               fprintf(stderr, "CFI response: %02x %02x %02x\n",
+                       resp_cfi[0], resp_cfi[1], resp_cfi[2]);
+               fprintf(stderr, "SST MID: %02x\n", sst_mid);
+               exit(1);
+       }
+
        printf("Flash info:\n");
        printf("Manufacturer ID: %04x\n", info.mid);
        printf("Device ID: %04x\n", info.did);
@@ -400,8 +673,6 @@ static void read_info(int fd)
        for (i = 0; i < info.region_cnt; i++)
                printf("  %5u x %u\n", info.region[i].block_size,
                        info.region[i].block_count);
-       if (info.size != total)
-               fprintf(stderr, "warning: total is %u, bad CFI?\n", total);
 }
 
 static uint32_t get_block_addr(uint32_t addr, uint32_t blk_offset)
@@ -475,18 +746,23 @@ static const char *portname =
 
 static void usage(const char *argv0)
 {
+       size_t i;
+
        printf("usage:\n"
                "%s [options]\n"
                "  -d <ttydevice>      (default %s)\n"
                "  -r <file> [size]    dump the cart (default 4MB)\n"
                "  -w <file> [size]    program the flash (def. file size)\n"
                "  -s <file> [size]    simple write (SRAM, etc, def. file size)\n"
-               "  -e <size>           erase (rounds to block size)\n"
+               "  -e <size>           erase (rounds to block size); can specify 'full'\n"
                "  -a <start_address>  read/write start address (default 0)\n"
-               "  -8                  8bit flash\n"
-               "  -v                  verify written data\n"
-               "  -i                  get info about the flash chip\n"
+               "  -m <n>              use an address mapper n, one of:\n"
                , argv0, portname);
+       for (i = 1; i < sizeof(io_ops) / sizeof(io_ops[0]); i++)
+               printf(
+               "                       %zd: %s\n", i, io_ops[i].name);
+       printf( "  -v                  verify written data\n"
+               "  -i                  get info about the flash chip\n");
        exit(1);
 }
 
@@ -506,8 +782,22 @@ static void *getarg(int argc, char *argv[], int arg)
        return argv[arg];
 }
 
-static uint8_t g_block[0x10000];
-static uint8_t g_block2[0x10000];
+static long getarg_l(int argc, char *argv[], int arg)
+{
+       char *endp = NULL;
+       long r;
+
+       if (arg >= argc)
+               invarg(argc, argv, arg);
+       r = strtol(argv[arg], &endp, 0);
+       if (endp == NULL || *endp != 0)
+               invarg(argc, argv, arg);
+       return r;
+}
+
+// 32K to easily handle SNES LoROM
+static uint8_t g_block[0x8000];
+static uint8_t g_block2[sizeof(g_block)];
 
 int main(int argc, char *argv[])
 {
@@ -544,7 +834,7 @@ int main(int argc, char *argv[])
                if (!strcmp(argv[arg], "-r")) {
                        fname_r = getarg(argc, argv, ++arg);
                        if (arg + 1 < argc && argv[arg + 1][0] != '-') {
-                               size_r = strtol(argv[++arg], NULL, 0);
+                               size_r = getarg_l(argc, argv, ++arg);
                                if (size_r <= 0)
                                        invarg(argc, argv, arg);
                        }
@@ -553,7 +843,7 @@ int main(int argc, char *argv[])
                if (!strcmp(argv[arg], "-w")) {
                        fname_w = getarg(argc, argv, ++arg);
                        if (arg + 1 < argc && argv[arg + 1][0] != '-') {
-                               size_w = strtol(argv[++arg], NULL, 0);
+                               size_w = getarg_l(argc, argv, ++arg);
                                if (size_w <= 0)
                                        invarg(argc, argv, arg);
                        }
@@ -562,27 +852,36 @@ int main(int argc, char *argv[])
                if (!strcmp(argv[arg], "-s")) {
                        fname_ws = getarg(argc, argv, ++arg);
                        if (arg + 1 < argc && argv[arg + 1][0] != '-') {
-                               size_ws = strtol(argv[++arg], NULL, 0);
+                               size_ws = getarg_l(argc, argv, ++arg);
                                if (size_ws <= 0)
                                        invarg(argc, argv, arg);
                        }
                        continue;
                }
                if (!strcmp(argv[arg], "-a")) {
-                       address_in = strtol(getarg(argc, argv, ++arg), NULL, 0);
-                       if (address_in < 0 || (address_in & 1))
+                       address_in = getarg_l(argc, argv, ++arg);
+                       if (address_in < 0)
                                invarg(argc, argv, arg);
                        continue;
                }
                if (!strcmp(argv[arg], "-e")) {
-                       size_e = strtol(getarg(argc, argv, ++arg), NULL, 0);
-                       if (size_e <= 0)
-                               invarg(argc, argv, arg);
+                       arg++;
+                       if (!strcmp(getarg(argc, argv, arg), "full"))
+                               size_e = -1;
+                       else {
+                               size_e = getarg_l(argc, argv, arg);
+                               if (size_e <= 0)
+                                       invarg(argc, argv, arg);
+                       }
                        continue;
                }
-               if (!strcmp(argv[arg], "-8")) {
-                       io = &io_ops[0];
-                       write_step = 1;
+               if (!strcmp(argv[arg], "-m")) {
+                       long v = getarg_l(argc, argv, ++arg);
+                       if ((size_t)v >= sizeof(io_ops) / sizeof(io_ops[0]))
+                               invarg(argc, argv, arg);
+                       io = &io_ops[v];
+                       if (v != 0)
+                               write_step = 1;
                        continue;
                }
                if (!strcmp(argv[arg], "-v")) {
@@ -601,7 +900,9 @@ int main(int argc, char *argv[])
 
        if (fname_w) {
                f_w = open_prep_read(fname_w, &size_w);
-               if (size_e < size_w)
+               if (size_e == 0 && io->addrs_remapped)
+                       size_e = -1;
+               if (size_e != -1 && size_e < size_w)
                        size_e = size_w;
                if (do_verify)
                        size_v = size_w;
@@ -629,12 +930,24 @@ int main(int argc, char *argv[])
        printf("flashkit id: %02x\n", id[0]);
 
        set_delay(fd, 1);
-       io->write_bus(fd, 0, 0xf0); // flash reset
+
+       if (do_info || size_e || f_w)
+               io->write_bus(fd, 0, 0xf0); // flash reset
 
        if (do_info || size_e)
                read_info(fd);
 
-       if (size_e) {
+       if (size_e == -1) {
+               printf("performing full erase..."); fflush(stdout);
+               flash_seq_erase_full(fd);
+               rv = ry_read(fd);
+               if (rv != 0xffff) {
+                       fprintf(stderr, "\nerase error: %04x\n", rv);
+                       return 1;
+               }
+               printf(" done.\n");
+       }
+       else if (size_e) {
                // set_delay(fd, 0); // ?
                a_blk = get_block_addr(address_in, 0);
                end = address_in + size_e;
@@ -647,6 +960,7 @@ int main(int argc, char *argv[])
                        if (rv != 0xffff) {
                                fprintf(stderr, "\nerase error: %lx %04x\n",
                                        a_blk, rv);
+                               return 1;
                        }
 
                        a_blk = get_block_addr(a_blk, 1);
@@ -720,8 +1034,13 @@ int main(int argc, char *argv[])
 
                printf("reading %ld bytes:\n", max(size_r, size_v));
                print_progress(0, blks * sizeof(g_block));
-               io->set_addr(fd, address_in);
+               a_blk = -1;
                for (done = 0; done < size_r || done < size_v; ) {
+                       a = address_in + done;
+                       if (io->addrs_remapped || (a >> 21) != a_blk) {
+                               a_blk = a >> 21;
+                               io->set_addr(fd, a);
+                       }
                        io->read_block(fd, g_block, sizeof(g_block));
                        if (f_r && done < size_r) {
                                len = min(size_r - done, sizeof(g_block));