[ia32rtools.git] / tools / cvt_data.c

#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "my_assert.h"
#include "my_str.h"

#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
#define IS(w, y) !strcmp(w, y)
#define IS_START(w, y) !strncmp(w, y, strlen(y))

#include "protoparse.h"

static const char *asmfn;
static int asmln;
static FILE *g_fhdr;

enum dx_type {
  DXT_UNSPEC,
  DXT_BYTE,
  DXT_WORD,
  DXT_DWORD,
  DXT_QUAD,
  DXT_TEN,
};

#define aerr(fmt, ...) do { \
	printf("%s:%d: error: " fmt, asmfn, asmln, ##__VA_ARGS__); \
  fcloseall(); \
	exit(1); \
} while (0)

#include "masm_tools.h"

static char *next_word_s(char *w, size_t wsize, char *s)
{
  int quote = 0;
	size_t i;

	s = sskip(s);

	for (i = 0; i < wsize - 1; i++) {
    if (s[i] == '\'')
      quote ^= 1;
		if (s[i] == 0 || (!quote && (my_isblank(s[i]) || s[i] == ',')))
			break;
		w[i] = s[i];
	}
	w[i] = 0;

	if (s[i] != 0 && !my_isblank(s[i]) && s[i] != ',')
		printf("warning: '%s' truncated\n", w);

	return s + i;
}

static void next_section(FILE *fasm, char *name)
{
  char words[2][256];
  char line[256];
  int wordc;
  char *p;

  name[0] = 0;

  while (fgets(line, sizeof(line), fasm))
  {
    wordc = 0;
    asmln++;

    p = sskip(line);
    if (*p == 0)
      continue;

    if (*p == ';') {
      while (strlen(line) == sizeof(line) - 1) {
        // one of those long comment lines..
        if (!fgets(line, sizeof(line), fasm))
          break;
      }
      continue;
    }

    for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
      p = sskip(next_word(words[wordc], sizeof(words[0]), p));
      if (*p == 0 || *p == ';') {
        wordc++;
        break;
      }
    }

    if (wordc < 2)
      continue;

    if (!IS(words[1], "segment"))
      continue;

    strcpy(name, words[0]);
    break;
  }
}

static enum dx_type parse_dx_directive(const char *name)
{
  if (IS(name, "dd"))
    return DXT_DWORD;
  if (IS(name, "dw"))
    return DXT_WORD;
  if (IS(name, "db"))
    return DXT_BYTE;
  if (IS(name, "dq"))
    return DXT_QUAD;
  if (IS(name, "dt"))
    return DXT_TEN;

  return DXT_UNSPEC;
}

static const char *type_name(enum dx_type type)
{
  switch (type) {
  case DXT_BYTE:
    return ".byte";
  case DXT_WORD:
    return ".word";
  case DXT_DWORD:
    return ".long";
  case DXT_QUAD:
    return ".quad";
  case DXT_TEN:
    return ".tfloat";
  case DXT_UNSPEC:
    break;
  }
  return "<bad>";
}

static int type_size(enum dx_type type)
{
  switch (type) {
  case DXT_BYTE:
    return 1;
  case DXT_WORD:
    return 2;
  case DXT_DWORD:
    return 4;
  case DXT_QUAD:
    return 8;
  case DXT_TEN:
    return 10;
  case DXT_UNSPEC:
    break;
  }
  return -1;
}

static char *escape_string(char *s)
{
  char buf[256];
  char *t = buf;

  for (; *s != 0; s++) {
    if (*s == '"') {
      strcpy(t, "\\22");
      t += strlen(t);
      continue;
    }
    if (*s == '\\') {
      strcpy(t, "\\\\");
      t += strlen(t);
      continue;
    }
    *t++ = *s;
  }
  *t = *s;
  return strcpy(s, buf);
}

int main(int argc, char *argv[])
{
  FILE *fout, *fasm;
  char words[20][256];
  //int sep_after[20];
  char word[256];
  char line[256];
  char comment[256];
  unsigned long val;
  unsigned long cnt;
  const char *sym;
  enum dx_type type;
  int is_label;
  int wordc;
  int first;
  int arg_out;
  int arg = 1;
  int len;
  int w;
  char *p;
  char *p2;

  if (argc != 4) {
    printf("usage:\n%s <.s> <.asm> <hdrf>\n",
      argv[0]);
    return 1;
  }

  arg_out = arg++;

  asmfn = argv[arg++];
  fasm = fopen(asmfn, "r");
  my_assert_not(fasm, NULL);

  hdrfn = argv[arg++];
  g_fhdr = fopen(hdrfn, "r");
  my_assert_not(g_fhdr, NULL);

  fout = fopen(argv[arg_out], "w");
  my_assert_not(fout, NULL);

  comment[0] = 0;

  while (!feof(fasm)) {
    next_section(fasm, line);
    if (IS(line + 1, "text"))
      continue;

    if (IS(line + 1, "rdata"))
      fprintf(fout, "\n.section .rodata\n");
    else if (IS(line + 1, "data"))
      fprintf(fout, "\n.data\n");
    else
      aerr("unhandled section: '%s'\n", line);

    fprintf(fout, ".align 4\n");

    while (fgets(line, sizeof(line), fasm))
    {
      sym = NULL;
      asmln++;

      p = sskip(line);
      if (*p == 0 || *p == ';')
        continue;

      for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
        //sep_after[wordc] = 0;
        p = sskip(next_word_s(words[wordc], sizeof(words[0]), p));
        if (*p == 0 || *p == ';') {
          wordc++;
          break;
        }
        if (*p == ',') {
          //sep_after[wordc] = 1;
          p = sskip(p + 1);
        }
      }

      if (wordc == 2 && IS(words[1], "ends"))
        break;
      if (wordc < 2)
        aerr("unhandled: '%s'\n", words[0]);

      // don't cares
      if (IS(words[0], "assume"))
        continue;

      if (IS(words[0], "align")) {
        val = parse_number(words[1]);
        fprintf(fout, "\t\t  .align %ld", val);
        goto fin;
      }

      w = 1;
      type = parse_dx_directive(words[0]);
      if (type == DXT_UNSPEC) {
        type = parse_dx_directive(words[1]);
        sym = words[0];
        w = 2;
      }
      if (type == DXT_UNSPEC)
        aerr("unhandled decl: '%s %s'\n", words[0], words[1]);

      if (sym != NULL) {
        len = strlen(sym);
        fprintf(fout, "_%s:", sym);

        len += 2;
        if (len < 8)
          fprintf(fout, "\t");
        if (len < 16)
          fprintf(fout, "\t");
        if (len <= 16)
          fprintf(fout, "  ");
        else
          fprintf(fout, " ");
      }
      else {
        fprintf(fout, "\t\t  ");
      }

      if (type == DXT_BYTE && words[w][0] == '\'') {
        // string; use asciz for most common case
        if (w == wordc - 2 && IS(words[w + 1], "0")) {
          fprintf(fout, ".asciz \"");
          wordc--;
        }
        else
          fprintf(fout, ".ascii \"");

        for (; w < wordc; w++) {
          if (words[w][0] == '\'') {
            p = words[w] + 1;
            p2 = strchr(p, '\'');
            if (p2 == NULL)
              aerr("unterminated string? '%s'\n", p);
            memcpy(word, p, p2 - p);
            word[p2 - p] = 0;
            fprintf(fout, "%s", escape_string(word));
          }
          else {
            val = parse_number(words[w]);
            if (val & ~0xff)
              aerr("bad string trailing byte?\n");
            fprintf(fout, "\\x%02lx", val);
          }
        }
        fprintf(fout, "\"");
        goto fin;
      }

      if (w == wordc - 2) {
        if (IS_START(words[w + 1], "dup(")) {
          cnt = parse_number(words[w]);
          p = words[w + 1] + 4;
          p2 = strchr(p, ')');
          if (p2 == NULL)
            aerr("bad dup?\n");
          memmove(word, p, p2 - p);
          word[p2 - p] = 0;
          val = parse_number(word);

          fprintf(fout, ".fill 0x%02lx,%d,0x%02lx",
            cnt, type_size(type), val);
          goto fin;
        }
      }

      if (type == DXT_DWORD && words[w][0] == '\''
        && words[w][5] == '\'' && strlen(words[w]) == 6)
      {
        if (w != wordc - 1)
          aerr("TODO\n");

        p = words[w];
        val = (p[1] << 24) | (p[2] << 16) | (p[3] << 8) | p[4];
        fprintf(fout, ".long 0x%lx", val);
        snprintf(comment, sizeof(comment), "%s", words[w]);
        goto fin;
      }

      if ((type == DXT_QUAD || type == DXT_TEN)
          && strchr(words[w], '.'))
      {
        if (w != wordc - 1)
          aerr("TODO\n");

        fprintf(fout, type == DXT_TEN ? ".tfloat " : ".double ");
        fprintf(fout, "%s", words[w]);
        goto fin;
      }

      first = 1;
      fprintf(fout, "%s ", type_name(type));
      for (; w < wordc; w++)
      {
        if (!first)
          fprintf(fout, ", ");

        is_label = 0;
        if (w >= wordc - 2 && IS(words[w], "offset")) {
          is_label = 1;
          w++;
        }
        else if (type == DXT_DWORD
                 && !('0' <= words[w][0] && words[w][0] <= '9'))
        {
          // assume label
          is_label = 1;
        }

        if (is_label) {
          p = words[w];
          if (IS_START(p, "loc_") || strchr(p, '?') || strchr(p, '@'))
          {
            fprintf(fout, "0");
            snprintf(comment, sizeof(comment), "%s", words[w + 1]);
            goto fin;
          }
          fprintf(fout, "_%s", p);
        }
        else {
          val = parse_number(words[w]);
          if (val < 10)
            fprintf(fout, "%ld", val);
          else
            fprintf(fout, "0x%lx", val);
        }

        first = 0;
      }

fin:
      if (comment[0] != 0) {
        fprintf(fout, "\t\t# %s", comment);
        comment[0] = 0;
      }
      fprintf(fout, "\n");
      (void)proto_parse;
    }
  }

  fclose(fout);
  fclose(fasm);
  fclose(g_fhdr);

  return 0;
}

// vim:ts=2:shiftwidth=2:expandtab
Commit	Line	Data
	1	#define _GNU_SOURCE
	2	#include <stdio.h>
	3	#include <stdlib.h>
	4	#include <string.h>
	5
	6	#include "my_assert.h"
	7	#include "my_str.h"
	8
	9	#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
	10	#define IS(w, y) !strcmp(w, y)
	11	#define IS_START(w, y) !strncmp(w, y, strlen(y))
	12
	13	#include "protoparse.h"
	14
	15	static const char *asmfn;
	16	static int asmln;
	17	static FILE *g_fhdr;
	18
	19	enum dx_type {
	20	DXT_UNSPEC,
	21	DXT_BYTE,
	22	DXT_WORD,
	23	DXT_DWORD,
	24	DXT_QUAD,
	25	DXT_TEN,
	26	};
	27
	28	#define aerr(fmt, ...) do { \
	29	printf("%s:%d: error: " fmt, asmfn, asmln, ##__VA_ARGS__); \
	30	fcloseall(); \
	31	exit(1); \
	32	} while (0)
	33
	34	#include "masm_tools.h"
	35
	36	static char next_word_s(char w, size_t wsize, char *s)
	37	{
	38	int quote = 0;
	39	size_t i;
	40
	41	s = sskip(s);
	42
	43	for (i = 0; i < wsize - 1; i++) {
	44	if (s[i] == '\'')
	45	quote ^= 1;
	46	if (s[i] == 0 \|\| (!quote && (my_isblank(s[i]) \|\| s[i] == ',')))
	47	break;
	48	w[i] = s[i];
	49	}
	50	w[i] = 0;
	51
	52	if (s[i] != 0 && !my_isblank(s[i]) && s[i] != ',')
	53	printf("warning: '%s' truncated\n", w);
	54
	55	return s + i;
	56	}
	57
	58	static void next_section(FILE fasm, char name)
	59	{
	60	char words[2][256];
	61	char line[256];
	62	int wordc;
	63	char *p;
	64
	65	name[0] = 0;
	66
	67	while (fgets(line, sizeof(line), fasm))
	68	{
	69	wordc = 0;
	70	asmln++;
	71
	72	p = sskip(line);
	73	if (*p == 0)
	74	continue;
	75
	76	if (*p == ';') {
	77	while (strlen(line) == sizeof(line) - 1) {
	78	// one of those long comment lines..
	79	if (!fgets(line, sizeof(line), fasm))
	80	break;
	81	}
	82	continue;
	83	}
	84
	85	for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
	86	p = sskip(next_word(words[wordc], sizeof(words[0]), p));
	87	if (p == 0 \|\| p == ';') {
	88	wordc++;
	89	break;
	90	}
	91	}
	92
	93	if (wordc < 2)
	94	continue;
	95
	96	if (!IS(words[1], "segment"))
	97	continue;
	98
	99	strcpy(name, words[0]);
	100	break;
	101	}
	102	}
	103
	104	static enum dx_type parse_dx_directive(const char *name)
	105	{
	106	if (IS(name, "dd"))
	107	return DXT_DWORD;
	108	if (IS(name, "dw"))
	109	return DXT_WORD;
	110	if (IS(name, "db"))
	111	return DXT_BYTE;
	112	if (IS(name, "dq"))
	113	return DXT_QUAD;
	114	if (IS(name, "dt"))
	115	return DXT_TEN;
	116
	117	return DXT_UNSPEC;
	118	}
	119
	120	static const char *type_name(enum dx_type type)
	121	{
	122	switch (type) {
	123	case DXT_BYTE:
	124	return ".byte";
	125	case DXT_WORD:
	126	return ".word";
	127	case DXT_DWORD:
	128	return ".long";
	129	case DXT_QUAD:
	130	return ".quad";
	131	case DXT_TEN:
	132	return ".tfloat";
	133	case DXT_UNSPEC:
	134	break;
	135	}
	136	return "<bad>";
	137	}
	138
	139	static int type_size(enum dx_type type)
	140	{
	141	switch (type) {
	142	case DXT_BYTE:
	143	return 1;
	144	case DXT_WORD:
	145	return 2;
	146	case DXT_DWORD:
	147	return 4;
	148	case DXT_QUAD:
	149	return 8;
	150	case DXT_TEN:
	151	return 10;
	152	case DXT_UNSPEC:
	153	break;
	154	}
	155	return -1;
	156	}
	157
	158	static char escape_string(char s)
	159	{
	160	char buf[256];
	161	char *t = buf;
	162
	163	for (; *s != 0; s++) {
	164	if (*s == '"') {
	165	strcpy(t, "\\22");
	166	t += strlen(t);
	167	continue;
	168	}
	169	if (*s == '\\') {
	170	strcpy(t, "\\\\");
	171	t += strlen(t);
	172	continue;
	173	}
	174	t++ = s;
	175	}
	176	t = s;
	177	return strcpy(s, buf);
	178	}
	179
	180	int main(int argc, char *argv[])
	181	{
	182	FILE fout, fasm;
	183	char words[20][256];
	184	//int sep_after[20];
	185	char word[256];
	186	char line[256];
	187	char comment[256];
	188	unsigned long val;
	189	unsigned long cnt;
	190	const char *sym;
	191	enum dx_type type;
	192	int is_label;
	193	int wordc;
	194	int first;
	195	int arg_out;
	196	int arg = 1;
	197	int len;
	198	int w;
	199	char *p;
	200	char *p2;
	201
	202	if (argc != 4) {
	203	printf("usage:\n%s <.s> <.asm> <hdrf>\n",
	204	argv[0]);
	205	return 1;
	206	}
	207
	208	arg_out = arg++;
	209
	210	asmfn = argv[arg++];
	211	fasm = fopen(asmfn, "r");
	212	my_assert_not(fasm, NULL);
	213
	214	hdrfn = argv[arg++];
	215	g_fhdr = fopen(hdrfn, "r");
	216	my_assert_not(g_fhdr, NULL);
	217
	218	fout = fopen(argv[arg_out], "w");
	219	my_assert_not(fout, NULL);
	220
	221	comment[0] = 0;
	222
	223	while (!feof(fasm)) {
	224	next_section(fasm, line);
	225	if (IS(line + 1, "text"))
	226	continue;
	227
	228	if (IS(line + 1, "rdata"))
	229	fprintf(fout, "\n.section .rodata\n");
	230	else if (IS(line + 1, "data"))
	231	fprintf(fout, "\n.data\n");
	232	else
	233	aerr("unhandled section: '%s'\n", line);
	234
	235	fprintf(fout, ".align 4\n");
	236
	237	while (fgets(line, sizeof(line), fasm))
	238	{
	239	sym = NULL;
	240	asmln++;
	241
	242	p = sskip(line);
	243	if (p == 0 \|\| p == ';')
	244	continue;
	245
	246	for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
	247	//sep_after[wordc] = 0;
	248	p = sskip(next_word_s(words[wordc], sizeof(words[0]), p));
	249	if (p == 0 \|\| p == ';') {
	250	wordc++;
	251	break;
	252	}
	253	if (*p == ',') {
	254	//sep_after[wordc] = 1;
	255	p = sskip(p + 1);
	256	}
	257	}
	258
	259	if (wordc == 2 && IS(words[1], "ends"))
	260	break;
	261	if (wordc < 2)
	262	aerr("unhandled: '%s'\n", words[0]);
	263
	264	// don't cares
	265	if (IS(words[0], "assume"))
	266	continue;
	267
	268	if (IS(words[0], "align")) {
	269	val = parse_number(words[1]);
	270	fprintf(fout, "\t\t .align %ld", val);
	271	goto fin;
	272	}
	273
	274	w = 1;
	275	type = parse_dx_directive(words[0]);
	276	if (type == DXT_UNSPEC) {
	277	type = parse_dx_directive(words[1]);
	278	sym = words[0];
	279	w = 2;
	280	}
	281	if (type == DXT_UNSPEC)
	282	aerr("unhandled decl: '%s %s'\n", words[0], words[1]);
	283
	284	if (sym != NULL) {
	285	len = strlen(sym);
	286	fprintf(fout, "_%s:", sym);
	287
	288	len += 2;
	289	if (len < 8)
	290	fprintf(fout, "\t");
	291	if (len < 16)
	292	fprintf(fout, "\t");
	293	if (len <= 16)
	294	fprintf(fout, " ");
	295	else
	296	fprintf(fout, " ");
	297	}
	298	else {
	299	fprintf(fout, "\t\t ");
	300	}
	301
	302	if (type == DXT_BYTE && words[w][0] == '\'') {
	303	// string; use asciz for most common case
	304	if (w == wordc - 2 && IS(words[w + 1], "0")) {
	305	fprintf(fout, ".asciz \"");
	306	wordc--;
	307	}
	308	else
	309	fprintf(fout, ".ascii \"");
	310
	311	for (; w < wordc; w++) {
	312	if (words[w][0] == '\'') {
	313	p = words[w] + 1;
	314	p2 = strchr(p, '\'');
	315	if (p2 == NULL)
	316	aerr("unterminated string? '%s'\n", p);
	317	memcpy(word, p, p2 - p);
	318	word[p2 - p] = 0;
	319	fprintf(fout, "%s", escape_string(word));
	320	}
	321	else {
	322	val = parse_number(words[w]);
	323	if (val & ~0xff)
	324	aerr("bad string trailing byte?\n");
	325	fprintf(fout, "\\x%02lx", val);
	326	}
	327	}
	328	fprintf(fout, "\"");
	329	goto fin;
	330	}
	331
	332	if (w == wordc - 2) {
	333	if (IS_START(words[w + 1], "dup(")) {
	334	cnt = parse_number(words[w]);
	335	p = words[w + 1] + 4;
	336	p2 = strchr(p, ')');
	337	if (p2 == NULL)
	338	aerr("bad dup?\n");
	339	memmove(word, p, p2 - p);
	340	word[p2 - p] = 0;
	341	val = parse_number(word);
	342
	343	fprintf(fout, ".fill 0x%02lx,%d,0x%02lx",
	344	cnt, type_size(type), val);
	345	goto fin;
	346	}
	347	}
	348
	349	if (type == DXT_DWORD && words[w][0] == '\''
	350	&& words[w][5] == '\'' && strlen(words[w]) == 6)
	351	{
	352	if (w != wordc - 1)
	353	aerr("TODO\n");
	354
	355	p = words[w];
	356	val = (p[1] << 24) \| (p[2] << 16) \| (p[3] << 8) \| p[4];
	357	fprintf(fout, ".long 0x%lx", val);
	358	snprintf(comment, sizeof(comment), "%s", words[w]);
	359	goto fin;
	360	}
	361
	362	if ((type == DXT_QUAD \|\| type == DXT_TEN)
	363	&& strchr(words[w], '.'))
	364	{
	365	if (w != wordc - 1)
	366	aerr("TODO\n");
	367
	368	fprintf(fout, type == DXT_TEN ? ".tfloat " : ".double ");
	369	fprintf(fout, "%s", words[w]);
	370	goto fin;
	371	}
	372
	373	first = 1;
	374	fprintf(fout, "%s ", type_name(type));
	375	for (; w < wordc; w++)
	376	{
	377	if (!first)
	378	fprintf(fout, ", ");
	379
	380	is_label = 0;
	381	if (w >= wordc - 2 && IS(words[w], "offset")) {
	382	is_label = 1;
	383	w++;
	384	}
	385	else if (type == DXT_DWORD
	386	&& !('0' <= words[w][0] && words[w][0] <= '9'))
	387	{
	388	// assume label
	389	is_label = 1;
	390	}
	391
	392	if (is_label) {
	393	p = words[w];
	394	if (IS_START(p, "loc_") \|\| strchr(p, '?') \|\| strchr(p, '@'))
	395	{
	396	fprintf(fout, "0");
	397	snprintf(comment, sizeof(comment), "%s", words[w + 1]);
	398	goto fin;
	399	}
	400	fprintf(fout, "_%s", p);
	401	}
	402	else {
	403	val = parse_number(words[w]);
	404	if (val < 10)
	405	fprintf(fout, "%ld", val);
	406	else
	407	fprintf(fout, "0x%lx", val);
	408	}
	409
	410	first = 0;
	411	}
	412
	413	fin:
	414	if (comment[0] != 0) {
	415	fprintf(fout, "\t\t# %s", comment);
	416	comment[0] = 0;
	417	}
	418	fprintf(fout, "\n");
	419	(void)proto_parse;
	420	}
	421	}
	422
	423	fclose(fout);
	424	fclose(fasm);
	425	fclose(g_fhdr);
	426
	427	return 0;
	428	}
	429
	430	// vim:ts=2:shiftwidth=2:expandtab