/*
 * file: utfcode.c
 *
 * (c) Peter Kleiweg 2000
 *
 * This is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2,
 * or (at your option) any later version.
 */

#define UTFcodeVERSION "1.0"

#ifdef __MSDOS__
#ifndef __COMPACT__
#error Memory model COMPACT required
#endif  /* __COMPACT__  */
#include <dir.h>
#endif  /* __MSDOS__  */
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int
    obits = 0,
    utf7 = 1,
    utf8 = 1,
    nr;

unsigned
    octal;

long unsigned
    *lu;

char
    *programname,
    *no_mem_buffer,
    out_of_memory [] = "Out of memory";

void
    utf_7 (void),
    utf_8 (void),
    outbyte (unsigned i),
    outoct (void),
    get_programname (char const *argv0),
    errit (char const *format, ...),
    syntax (void),
    *s_malloc (size_t size),
    *s_realloc (void *block, size_t size);
char
    *s_strdup (char const *s);

int main (int argc, char *argv [])
{
    int
        i;

    no_mem_buffer = (char *) malloc (1024);

    get_programname (argv [0]);

    while (argc > 1) {
        if (! strcmp (argv [1], "-7")) {
            utf7 = 1;
            utf8 = 0;
        } else if (! strcmp (argv [1], "-8")) {
            utf7 = 0;
            utf8 = 1;
        } else
            break;
        argv++;
        argc--;
    }

    if (argc == 1)
	syntax ();

    nr = argc - 1;
    
    lu = (long unsigned *) s_malloc (nr * sizeof (long unsigned));
    for (i = 0; i < nr; i++) {
	if ((argv [i + 1][0] == 'U' || argv [i + 1][0] == 'u') && argv [i + 1][1] == '+') {
	    argv [i + 1][0] = '0';
	    argv [i + 1][1] = 'x';
	}
        lu [i] = strtoul (argv [i + 1], NULL, 0);
    }

    if (utf7)
        utf_7 ();

    if (utf8)
        utf_8 ();

    return 0;
}

void utf_7 ()
{
    int
        i;

    fputc ('+', stdout);
    for (i = 0; i < nr; i++)
        if (lu [i] < 0x10000) {
            outbyte (lu [i] >> 8);
            outbyte (lu [i] & 0xFF);
	} else
            errit ("Too large for UTF-7: 0x%lX", lu [i]);
    
    if (obits)
        outoct ();

    fputs ("-\n", stdout);
}

void outbyte (unsigned u)
{
    switch (obits) {
        case 0:
            octal = (u >> 2);
            outoct ();
	    octal = ((u & 0x03) << 4);
            obits = 2;
            break;
        case 2:
            octal |= (u >> 4);
            outoct ();
            octal = ((u & 0x0F) << 2);
            obits = 4;
            break;
        case 4:
            octal |= (u >> 6);
            outoct ();
            octal = (u & 0x3F);
            outoct ();
            obits = 0;
            break;
    }
}

void outoct ()
{
    char
        c;

    if (octal < 26)
        c = octal + 'A';
    else if (octal < 52)
        c = octal - 26 + 'a';
    else if (octal < 62)
        c = octal - 52 + '0';
    else if (octal == 62)
        c = '+';
    else
        c = '/';
    fputc (c, stdout);
}

void utf_8 ()
{
    int
        i;

    for (i = 0; i < nr; i++) {
        /* 1 byte */
        if (lu [i] < 0x80)
            fputc (lu [i], stdout);

	/* 2 bytes */
        else if (lu [i] < 0x800) {
            fputc (0xC0 | (lu [i] >> 6), stdout);
            fputc (0x80 | (lu [i] & 0x3F), stdout);
        }

	/* 3 bytes */
        else if (lu [i] < 0x10000) {
            fputc (0xE0 | (lu [i] >> 12), stdout);
            fputc (0x80 | ((lu [i] >> 6) & 0x3F), stdout);
            fputc (0x80 | (lu [i] & 0x3F), stdout);
        }

	/* 4 bytes */
        else if (lu [i] < 0x200000) {
            fputc (0xF0 | (lu [i] >> 18), stdout);
            fputc (0x80 | ((lu [i] >> 12) & 0x3F), stdout);
            fputc (0x80 | ((lu [i] >> 6) & 0x3F), stdout);
            fputc (0x80 | (lu [i] & 0x3F), stdout);
	}

	/* 5 bytes */
        else if (lu [i] < 0x4000000) {
            fputc (0xF8 | (lu [i] >> 24), stdout);
            fputc (0x80 | ((lu [i] >> 18) & 0x3F), stdout);
            fputc (0x80 | ((lu [i] >> 12) & 0x3F), stdout);
            fputc (0x80 | ((lu [i] >> 6) & 0x3F), stdout);
            fputc (0x80 | (lu [i] & 0x3F), stdout);
	}

	/* 6 bytes */
        else if (lu [i] < 0x80000000) {
            fputc (0xFC | (lu [i] >> 30), stdout);
            fputc (0x80 | ((lu [i] >> 24) & 0x3F), stdout);
            fputc (0x80 | ((lu [i] >> 18) & 0x3F), stdout);
            fputc (0x80 | ((lu [i] >> 12) & 0x3F), stdout);
            fputc (0x80 | ((lu [i] >> 6) & 0x3F), stdout);
            fputc (0x80 | (lu [i] & 0x3F), stdout);
	} else
            errit ("Too large for UTF-8: 0x%lX", lu [i]);
    }
    fputc ('\n', stdout);
}

void errit (char const *format, ...)
{
    va_list
	list;

    fprintf (stderr, "\nError %s: ", programname);

    va_start (list, format);
    vfprintf (stderr, format, list);

    fprintf (stderr, "\n\n");

    exit (1);
}

void get_programname (char const *argv0)
{
#ifdef __MSDOS__
    char
        name [MAXFILE];
    fnsplit (argv0, NULL, NULL, name, NULL);
    programname = strdup (name);
#else   /* unix */
    char
        *p;
    p = strrchr (argv0, '/');
    if (p)
        programname = strdup (p + 1);
    else
        programname = strdup (argv0);
#endif    
}

void *s_malloc (size_t size)
{
    void
	*p;

    p = malloc (size);
    if (! p) {
        free (no_mem_buffer);
	errit (out_of_memory);
    }
    return p;
}

void *s_realloc (void *block, size_t size)
{
    void
	*p;

    p = realloc (block, size);
    if (! p) {
        free (no_mem_buffer);
	errit (out_of_memory);
    }
    return p;
}

char *s_strdup (char const *s)
{
    char
	*s1;

    if (s) {
	s1 = (char *) s_malloc (strlen (s) + 1);
	strcpy (s1, s);
    } else {
	s1 = (char *) s_malloc (1);
	s1 [0] = '\0';
    }
    return s1;
}

void syntax ()
{
    fprintf (
	stderr,
	"\n"
	"This is utfcode, version " UTFcodeVERSION "\n"
	"\n"
        "Usage: %s [-7|-8] [number...]\n"
        "\n"
        " -7: utf-7 only\n"
        " -8: utf-8 only\n"
        "\n",
	programname
    );
    exit (1);
}