[ia32rtools.git] / tools / cvt_data.c

#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "my_assert.h"
#include "my_str.h"

#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
#define IS(w, y) !strcmp(w, y)
#define IS_START(w, y) !strncmp(w, y, strlen(y))

#include "protoparse.h"

static const char *asmfn;
static int asmln;
static FILE *g_fhdr;

enum dx_type {
  DXT_UNSPEC,
  DXT_BYTE,
  DXT_WORD,
  DXT_DWORD,
  DXT_QUAD,
  DXT_TEN,
};

#define aerr(fmt, ...) do { \
	printf("%s:%d: error: " fmt, asmfn, asmln, ##__VA_ARGS__); \
  fcloseall(); \
	exit(1); \
} while (0)

#include "masm_tools.h"

static char *next_word_s(char *w, size_t wsize, char *s)
{
  int quote = 0;
	size_t i;

	s = sskip(s);

	for (i = 0; i < wsize - 1; i++) {
    if (s[i] == '\'')
      quote ^= 1;
		if (s[i] == 0 || (!quote && (my_isblank(s[i]) || s[i] == ',')))
			break;
		w[i] = s[i];
	}
	w[i] = 0;

	if (s[i] != 0 && !my_isblank(s[i]) && s[i] != ',')
		printf("warning: '%s' truncated\n", w);

	return s + i;
}

static void next_section(FILE *fasm, char *name)
{
  char words[2][256];
  char line[256];
  int wordc;
  char *p;

  name[0] = 0;

  while (fgets(line, sizeof(line), fasm))
  {
    wordc = 0;
    asmln++;

    p = sskip(line);
    if (*p == 0)
      continue;

    if (*p == ';') {
      while (strlen(line) == sizeof(line) - 1) {
        // one of those long comment lines..
        if (!fgets(line, sizeof(line), fasm))
          break;
      }
      continue;
    }

    for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
      p = sskip(next_word(words[wordc], sizeof(words[0]), p));
      if (*p == 0 || *p == ';') {
        wordc++;
        break;
      }
    }

    if (wordc < 2)
      continue;

    if (!IS(words[1], "segment"))
      continue;

    strcpy(name, words[0]);
    break;
  }
}

static enum dx_type parse_dx_directive(const char *name)
{
  if (IS(name, "dd"))
    return DXT_DWORD;
  if (IS(name, "dw"))
    return DXT_WORD;
  if (IS(name, "db"))
    return DXT_BYTE;
  if (IS(name, "dq"))
    return DXT_QUAD;
  if (IS(name, "dt"))
    return DXT_TEN;

  return DXT_UNSPEC;
}

static const char *type_name(enum dx_type type)
{
  switch (type) {
  case DXT_BYTE:
    return ".byte";
  case DXT_WORD:
    return ".word";
  case DXT_DWORD:
    return ".long";
  case DXT_QUAD:
    return ".quad";
  case DXT_TEN:
    return ".tfloat";
  case DXT_UNSPEC:
    break;
  }
  return "<bad>";
}

static int type_size(enum dx_type type)
{
  switch (type) {
  case DXT_BYTE:
    return 1;
  case DXT_WORD:
    return 2;
  case DXT_DWORD:
    return 4;
  case DXT_QUAD:
    return 8;
  case DXT_TEN:
    return 10;
  case DXT_UNSPEC:
    break;
  }
  return -1;
}

static char *escape_string(char *s)
{
  char buf[256];
  char *t = buf;

  for (; *s != 0; s++) {
    if (*s == '"') {
      strcpy(t, "\\22");
      t += strlen(t);
      continue;
    }
    if (*s == '\\') {
      strcpy(t, "\\\\");
      t += strlen(t);
      continue;
    }
    *t++ = *s;
  }
  *t = *s;
  return strcpy(s, buf);
}

int main(int argc, char *argv[])
{
  FILE *fout, *fasm;
  char words[20][256];
  //int sep_after[20];
  char word[256];
  char line[256];
  char comment[256];
  unsigned long val;
  unsigned long cnt;
  const char *sym;
  enum dx_type type;
  int is_label;
  int wordc;
  int first;
  int arg_out;
  int arg = 1;
  int len;
  int w;
  char *p;
  char *p2;

  if (argc != 4) {
    printf("usage:\n%s <.s> <.asm> <hdrf>\n",
      argv[0]);
    return 1;
  }

  arg_out = arg++;

  asmfn = argv[arg++];
  fasm = fopen(asmfn, "r");
  my_assert_not(fasm, NULL);

  hdrfn = argv[arg++];
  g_fhdr = fopen(hdrfn, "r");
  my_assert_not(g_fhdr, NULL);

  fout = fopen(argv[arg_out], "w");
  my_assert_not(fout, NULL);

  comment[0] = 0;

  while (!feof(fasm)) {
    next_section(fasm, line);
    if (IS(line + 1, "text"))
      continue;

    if (IS(line + 1, "rdata"))
      fprintf(fout, "\n.section .rodata\n");
    else if (IS(line + 1, "data"))
      fprintf(fout, "\n.data\n");
    else
      aerr("unhandled section: '%s'\n", line);

    fprintf(fout, ".align 4\n");

    while (fgets(line, sizeof(line), fasm))
    {
      sym = NULL;
      asmln++;

      p = sskip(line);
      if (*p == 0 || *p == ';')
        continue;

      for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
        //sep_after[wordc] = 0;
        p = sskip(next_word_s(words[wordc], sizeof(words[0]), p));
        if (*p == 0 || *p == ';') {
          wordc++;
          break;
        }
        if (*p == ',') {
          //sep_after[wordc] = 1;
          p = sskip(p + 1);
        }
      }

      if (wordc == 2 && IS(words[1], "ends"))
        break;
      if (wordc < 2)
        aerr("unhandled: '%s'\n", words[0]);

      // don't cares
      if (IS(words[0], "assume"))
        continue;

      if (IS(words[0], "align")) {
        val = parse_number(words[1]);
        fprintf(fout, "\t\t  .align %ld", val);
        goto fin;
      }

      w = 1;
      type = parse_dx_directive(words[0]);
      if (type == DXT_UNSPEC) {
        type = parse_dx_directive(words[1]);
        sym = words[0];
        w = 2;
      }
      if (type == DXT_UNSPEC)
        aerr("unhandled decl: '%s %s'\n", words[0], words[1]);

      if (sym != NULL) {
        len = strlen(sym);
        fprintf(fout, "_%s:", sym);

        len += 2;
        if (len < 8)
          fprintf(fout, "\t");
        if (len < 16)
          fprintf(fout, "\t");
        if (len <= 16)
          fprintf(fout, "  ");
        else
          fprintf(fout, " ");
      }
      else {
        fprintf(fout, "\t\t  ");
      }

      if (type == DXT_BYTE && words[w][0] == '\'') {
        // string; use asciz for most common case
        if (w == wordc - 2 && IS(words[w + 1], "0")) {
          fprintf(fout, ".asciz \"");
          wordc--;
        }
        else
          fprintf(fout, ".ascii \"");

        for (; w < wordc; w++) {
          if (words[w][0] == '\'') {
            p = words[w] + 1;
            p2 = strchr(p, '\'');
            if (p2 == NULL)
              aerr("unterminated string? '%s'\n", p);
            memcpy(word, p, p2 - p);
            word[p2 - p] = 0;
            fprintf(fout, "%s", escape_string(word));
          }
          else {
            val = parse_number(words[w]);
            if (val & ~0xff)
              aerr("bad string trailing byte?\n");
            fprintf(fout, "\\x%02lx", val);
          }
        }
        fprintf(fout, "\"");
        goto fin;
      }

      if (w == wordc - 2) {
        if (IS_START(words[w + 1], "dup(")) {
          cnt = parse_number(words[w]);
          p = words[w + 1] + 4;
          p2 = strchr(p, ')');
          if (p2 == NULL)
            aerr("bad dup?\n");
          memmove(word, p, p2 - p);
          word[p2 - p] = 0;
          val = parse_number(word);

          fprintf(fout, ".fill 0x%02lx,%d,0x%02lx",
            cnt, type_size(type), val);
          goto fin;
        }
      }

      if (type == DXT_DWORD && words[w][0] == '\''
        && words[w][5] == '\'' && strlen(words[w]) == 6)
      {
        if (w != wordc - 1)
          aerr("TODO\n");

        p = words[w];
        val = (p[1] << 24) | (p[2] << 16) | (p[3] << 8) | p[4];
        fprintf(fout, ".long 0x%lx", val);
        snprintf(comment, sizeof(comment), "%s", words[w]);
        goto fin;
      }

      if ((type == DXT_QUAD || type == DXT_TEN)
          && strchr(words[w], '.'))
      {
        if (w != wordc - 1)
          aerr("TODO\n");

        fprintf(fout, type == DXT_TEN ? ".tfloat " : ".double ");
        fprintf(fout, "%s", words[w]);
        goto fin;
      }

      first = 1;
      fprintf(fout, "%s ", type_name(type));
      for (; w < wordc; w++)
      {
        if (!first)
          fprintf(fout, ", ");

        is_label = 0;
        if (w >= wordc - 2 && IS(words[w], "offset")) {
          is_label = 1;
          w++;
        }
        else if (type == DXT_DWORD
                 && !('0' <= words[w][0] && words[w][0] <= '9'))
        {
          // assume label
          is_label = 1;
        }

        if (is_label) {
          p = words[w];
          if (IS_START(p, "loc_") || strchr(p, '?') || strchr(p, '@'))
          {
            fprintf(fout, "0");
            snprintf(comment, sizeof(comment), "%s", words[w + 1]);
            goto fin;
          }
          fprintf(fout, "_%s", p);
        }
        else {
          val = parse_number(words[w]);
          if (val < 10)
            fprintf(fout, "%ld", val);
          else
            fprintf(fout, "0x%lx", val);
        }

        first = 0;
      }

fin:
      if (comment[0] != 0) {
        fprintf(fout, "\t\t# %s", comment);
        comment[0] = 0;
      }
      fprintf(fout, "\n");
      (void)proto_parse;
    }
  }

  fclose(fout);
  fclose(fasm);
  fclose(g_fhdr);

  return 0;
}

// vim:ts=2:shiftwidth=2:expandtab
Commit	Line	Data
054f95b2	1	#define _GNU_SOURCE
	2	#include <stdio.h>
	3	#include <stdlib.h>
	4	#include <string.h>
	5
	6	#include "my_assert.h"
	7	#include "my_str.h"
	8
	9	#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
	10	#define IS(w, y) !strcmp(w, y)
	11	#define IS_START(w, y) !strncmp(w, y, strlen(y))
	12
	13	#include "protoparse.h"
	14
	15	static const char *asmfn;
	16	static int asmln;
	17	static FILE *g_fhdr;
	18
	19	enum dx_type {
	20	DXT_UNSPEC,
	21	DXT_BYTE,
	22	DXT_WORD,
	23	DXT_DWORD,
	24	DXT_QUAD,
	25	DXT_TEN,
	26	};
	27
	28	#define aerr(fmt, ...) do { \
	29	printf("%s:%d: error: " fmt, asmfn, asmln, ##__VA_ARGS__); \
	30	fcloseall(); \
	31	exit(1); \
	32	} while (0)
	33
	34	#include "masm_tools.h"
	35
	36	static char next_word_s(char w, size_t wsize, char *s)
	37	{
	38	int quote = 0;
	39	size_t i;
	40
	41	s = sskip(s);
	42
	43	for (i = 0; i < wsize - 1; i++) {
	44	if (s[i] == '\'')
	45	quote ^= 1;
	46	if (s[i] == 0 \|\| (!quote && (my_isblank(s[i]) \|\| s[i] == ',')))
	47	break;
	48	w[i] = s[i];
	49	}
	50	w[i] = 0;
	51
	52	if (s[i] != 0 && !my_isblank(s[i]) && s[i] != ',')
	53	printf("warning: '%s' truncated\n", w);
	54
	55	return s + i;
	56	}
	57
	58	static void next_section(FILE fasm, char name)
	59	{
	60	char words[2][256];
	61	char line[256];
	62	int wordc;
	63	char *p;
	64
65	name[0] = 0;
66
67	while (fgets(line, sizeof(line), fasm))
68	{
69	wordc = 0;
70	asmln++;
71
72	p = sskip(line);
73	if (*p == 0)
74	continue;
75
76	if (*p == ';') {
77	while (strlen(line) == sizeof(line) - 1) {
78	// one of those long comment lines..
79	if (!fgets(line, sizeof(line), fasm))
80	break;
81	}
82	continue;
83	}
84
85	for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
86	p = sskip(next_word(words[wordc], sizeof(words[0]), p));
87	if (p == 0 \|\| p == ';') {
88	wordc++;
89	break;
90	}
91	}
92
93	if (wordc < 2)
94	continue;
95
96	if (!IS(words[1], "segment"))
97	continue;
98
99	strcpy(name, words[0]);
100	break;
101	}
102	}
103
104	static enum dx_type parse_dx_directive(const char *name)
105	{
106	if (IS(name, "dd"))
107	return DXT_DWORD;
108	if (IS(name, "dw"))
109	return DXT_WORD;
110	if (IS(name, "db"))
111	return DXT_BYTE;
112	if (IS(name, "dq"))
113	return DXT_QUAD;
114	if (IS(name, "dt"))
115	return DXT_TEN;
116
117	return DXT_UNSPEC;
118	}
119
120	static const char *type_name(enum dx_type type)
121	{
122	switch (type) {
123	case DXT_BYTE:
124	return ".byte";
125	case DXT_WORD:
126	return ".word";
127	case DXT_DWORD:
128	return ".long";
129	case DXT_QUAD:
130	return ".quad";
131	case DXT_TEN:
132	return ".tfloat";
133	case DXT_UNSPEC:
134	break;
135	}
136	return "<bad>";
137	}
138
139	static int type_size(enum dx_type type)
140	{
141	switch (type) {
142	case DXT_BYTE:
143	return 1;
144	case DXT_WORD:
145	return 2;
146	case DXT_DWORD:
147	return 4;
148	case DXT_QUAD:
149	return 8;
150	case DXT_TEN:
151	return 10;
152	case DXT_UNSPEC:
153	break;
154	}
155	return -1;
156	}
157
158	static char escape_string(char s)
159	{
160	char buf[256];
161	char *t = buf;
162
163	for (; *s != 0; s++) {
164	if (*s == '"') {
165	strcpy(t, "\\22");
166	t += strlen(t);
167	continue;
168	}
169	if (*s == '\\') {
170	strcpy(t, "\\\\");
171	t += strlen(t);
172	continue;
173	}
174	t++ = s;
175	}
176	t = s;
177	return strcpy(s, buf);
178	}
179
180	int main(int argc, char *argv[])
181	{
182	FILE fout, fasm;
183	char words[20][256];
184	//int sep_after[20];
185	char word[256];
186	char line[256];
187	char comment[256];
188	unsigned long val;
189	unsigned long cnt;
190	const char *sym;
191	enum dx_type type;
192	int is_label;
193	int wordc;
194	int first;
195	int arg_out;
196	int arg = 1;
197	int len;
198	int w;
199	char *p;
200	char *p2;
201
202	if (argc != 4) {
203	printf("usage:\n%s <.s> <.asm> <hdrf>\n",
204	argv[0]);
205	return 1;
206	}
207
208	arg_out = arg++;
209
210	asmfn = argv[arg++];
211	fasm = fopen(asmfn, "r");
212	my_assert_not(fasm, NULL);
213
214	hdrfn = argv[arg++];
215	g_fhdr = fopen(hdrfn, "r");
216	my_assert_not(g_fhdr, NULL);
217
218	fout = fopen(argv[arg_out], "w");
219	my_assert_not(fout, NULL);
220
221	comment[0] = 0;
222
223	while (!feof(fasm)) {
224	next_section(fasm, line);
225	if (IS(line + 1, "text"))
226	continue;
227
228	if (IS(line + 1, "rdata"))
229	fprintf(fout, "\n.section .rodata\n");
230	else if (IS(line + 1, "data"))
231	fprintf(fout, "\n.data\n");
232	else
233	aerr("unhandled section: '%s'\n", line);
234
235	fprintf(fout, ".align 4\n");
236
237	while (fgets(line, sizeof(line), fasm))
238	{
239	sym = NULL;
240	asmln++;
241
242	p = sskip(line);
243	if (p == 0 \|\| p == ';')
244	continue;
245
246	for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
247	//sep_after[wordc] = 0;
248	p = sskip(next_word_s(words[wordc], sizeof(words[0]), p));
249	if (p == 0 \|\| p == ';') {
250	wordc++;
251	break;
252	}
253	if (*p == ',') {
254	//sep_after[wordc] = 1;
255	p = sskip(p + 1);
256	}
257	}
258
259	if (wordc == 2 && IS(words[1], "ends"))
260	break;
261	if (wordc < 2)
262	aerr("unhandled: '%s'\n", words[0]);
263
264	// don't cares
265	if (IS(words[0], "assume"))
266	continue;
267
268	if (IS(words[0], "align")) {
269	val = parse_number(words[1]);
270	fprintf(fout, "\t\t .align %ld", val);
271	goto fin;
272	}
273
274	w = 1;
275	type = parse_dx_directive(words[0]);
276	if (type == DXT_UNSPEC) {
277	type = parse_dx_directive(words[1]);
278	sym = words[0];
279	w = 2;
280	}
281	if (type == DXT_UNSPEC)
282	aerr("unhandled decl: '%s %s'\n", words[0], words[1]);
283
284	if (sym != NULL) {
285	len = strlen(sym);
286	fprintf(fout, "_%s:", sym);
287
288	len += 2;
289	if (len < 8)
290	fprintf(fout, "\t");
291	if (len < 16)
292	fprintf(fout, "\t");
293	if (len <= 16)
294	fprintf(fout, " ");
295	else
296	fprintf(fout, " ");
297	}
298	else {
299	fprintf(fout, "\t\t ");
300	}
301
302	if (type == DXT_BYTE && words[w][0] == '\'') {
303	// string; use asciz for most common case
304	if (w == wordc - 2 && IS(words[w + 1], "0")) {
305	fprintf(fout, ".asciz \"");
306	wordc--;
307	}
308	else
309	fprintf(fout, ".ascii \"");
310
311	for (; w < wordc; w++) {
312	if (words[w][0] == '\'') {
313	p = words[w] + 1;
314	p2 = strchr(p, '\'');
315	if (p2 == NULL)
316	aerr("unterminated string? '%s'\n", p);
317	memcpy(word, p, p2 - p);
318	word[p2 - p] = 0;
319	fprintf(fout, "%s", escape_string(word));
320	}
321	else {
322	val = parse_number(words[w]);
323	if (val & ~0xff)
324	aerr("bad string trailing byte?\n");
325	fprintf(fout, "\\x%02lx", val);
326	}
327	}
328	fprintf(fout, "\"");
329	goto fin;
330	}
331
332	if (w == wordc - 2) {
333	if (IS_START(words[w + 1], "dup(")) {
334	cnt = parse_number(words[w]);
335	p = words[w + 1] + 4;
336	p2 = strchr(p, ')');
337	if (p2 == NULL)
338	aerr("bad dup?\n");
339	memmove(word, p, p2 - p);
340	word[p2 - p] = 0;
341	val = parse_number(word);
342
343	fprintf(fout, ".fill 0x%02lx,%d,0x%02lx",
344	cnt, type_size(type), val);
345	goto fin;
346	}
347	}
348
349	if (type == DXT_DWORD && words[w][0] == '\''
350	&& words[w][5] == '\'' && strlen(words[w]) == 6)
351	{
352	if (w != wordc - 1)
353	aerr("TODO\n");
354
355	p = words[w];
356	val = (p[1] << 24) \| (p[2] << 16) \| (p[3] << 8) \| p[4];
357	fprintf(fout, ".long 0x%lx", val);
358	snprintf(comment, sizeof(comment), "%s", words[w]);
359	goto fin;
360	}
361
362	if ((type == DXT_QUAD \|\| type == DXT_TEN)
363	&& strchr(words[w], '.'))
364	{
365	if (w != wordc - 1)
366	aerr("TODO\n");
367
368	fprintf(fout, type == DXT_TEN ? ".tfloat " : ".double ");
369	fprintf(fout, "%s", words[w]);
370	goto fin;
371	}
372
373	first = 1;
374	fprintf(fout, "%s ", type_name(type));
375	for (; w < wordc; w++)
376	{
377	if (!first)
378	fprintf(fout, ", ");
379
380	is_label = 0;
381	if (w >= wordc - 2 && IS(words[w], "offset")) {
382	is_label = 1;
383	w++;
384	}
385	else if (type == DXT_DWORD
386	&& !('0' <= words[w][0] && words[w][0] <= '9'))
387	{
388	// assume label
389	is_label = 1;
390	}
391
392	if (is_label) {
393	p = words[w];
394	if (IS_START(p, "loc_") \|\| strchr(p, '?') \|\| strchr(p, '@'))
395	{
396	fprintf(fout, "0");
397	snprintf(comment, sizeof(comment), "%s", words[w + 1]);
398	goto fin;
399	}
400	fprintf(fout, "_%s", p);
401	}
402	else {
403	val = parse_number(words[w]);
404	if (val < 10)
405	fprintf(fout, "%ld", val);
406	else
407	fprintf(fout, "0x%lx", val);
408	}
409
410	first = 0;
411	}
412
413	fin:
414	if (comment[0] != 0) {
415	fprintf(fout, "\t\t# %s", comment);
416	comment[0] = 0;
417	}
418	fprintf(fout, "\n");
419	(void)proto_parse;
420	}
421	}
422
423	fclose(fout);
424	fclose(fasm);
425	fclose(g_fhdr);
426
427	return 0;
428	}
429
430	// vim:ts=2:shiftwidth=2:expandtab