
/* Written by Aidan Kehoe, kehoea at parhasard dot net, September
   2001. Public Domain--No warranty of any kind. */

#include <sys/types.h>
#include <machine/endian.h>
#include <sys/uio.h>
#include <unistd.h>

#include <ctype.h>
#include <errno.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>

		/* 0; all ok. 1; read error; 2; write error. */
#define		RETURN_OK		0 
#define		RETURN_READ_ERROR	1
#define		RETURN_WRITE_ERROR	2

/* `Byte Ordering Read 32 bits,'--dependent on whether we're dealing
   with big-endian or host order. Do set_host_byteorder(0) to
   guarantee big-endian format.  */

#define 	bo_read32(X) 	(host_ordering ? X : ntohl(X))
#define 	bo_write32(X) 	(host_ordering ? X : htonl(X))

#define 	bo_read16(X) 	(host_ordering ? X : ntohs(X))
#define 	bo_write16(X) 	(host_ordering ? X : htons(X))

static int 	host_ordering = 0; 

int
set_host_byteorder(int j)
{
	return host_ordering = j; 
}

/*
 * u4char_to_u8string: Return a UTF-8 representation of the character
 * `input,' which is encoded in 32-bit UCS4. If output is non-null,
 * write the answer to output; else use a static buffer.  In either
 * case, a pointer to the result is returned. The length of the buffer
 * output must be at least 7, and the resulting string is
 * nul-terminated.
 */

unsigned char *
u4char_to_u8string(uint32_t input, unsigned char *output)
{
	static unsigned char sbuf[7]; /* Nul-terminated. */
	unsigned char *ochars; 
    
	if (output) 
		ochars = output; 
	else
		ochars = sbuf; 

	if (input < 0x80) {
		ochars[0] = input; 
		ochars[1] = '\0'; 
		return ochars; 
	}
    
	if (input < 0x0800) {
		ochars[2] = '\0'; 
		ochars[1] = 0x80 | input & ~(0xFFFFFFC0);
		/* and the next 5. */
		ochars[0] = 0xC0 | (input & ~(0xFFFFF83F)) >> 6;
		return ochars; 
	}
	    
	if (input < 0x00010000) {
		ochars[3] = '\0'; 
		ochars[2] = 0x80 | input & ~(0xFFFFFFC0); 
		ochars[1] = 0x80 | (input >> 6) & ~(0xFFFFFFC0); 
		/* last 4. */
		ochars[0] = 0xE0 | (input >> 12) & ~(0xFFFFFFF0); 
		return ochars; 
	}
	    
	if (input < 0x00200000) {
		memcpy(ochars, "\xF0\x80\x80\x80\x00", 5); 
	
		ochars[3] |= input & ~(0xFFFFFFC0); 
		ochars[2] |= (input >> 6) & ~(0xFFFFFFC0); 
		ochars[1] |= (input >> 12) & ~(0xFFFFFFC0);
		/* Isolate the last 3 bits. */
		ochars[0] |= (input >> 18) & ~(0xFFFFFFF8);
		return ochars; 

	}
    
	if (input < 0x04000000) { 
		memcpy(ochars, "\xF8\x80\x80\x80\x80\x00", 6); 

		ochars[4] |= input & ~(0xFFFFFFC0); 
		ochars[3] |= (input >> 6) & ~(0xFFFFFFC0); 
		ochars[2] |= (input >> 12) & ~(0xFFFFFFC0); 
		ochars[1] |= (input >> 18) & ~(0xFFFFFFC0); 
		/* Isolate the last 2 bits. */
		ochars[0] |= (input >> 24) & ~(0xFFFFFFF6); 
		return ochars; 
	}

	memcpy(ochars, "\xFC\x80\x80\x80\x80\x80\x00", 7); 
	ochars[5] |= input & ~(0xFFFFFFC0); 
	ochars[4] |= (input >> 6) & ~(0xFFFFFFC0); 
	ochars[3] |= (input >> 12) & ~(0xFFFFFFC0); 
	ochars[2] |= (input >> 18) & ~(0xFFFFFFC0); 
	ochars[1] |= (input >> 24) & ~(0xFFFFFFC0); 
	/* Isolate the last bit. */
	ochars[0] |= (input >> 30) & ~(0xFFFFFFF1); 
	return ochars; 
}

/*
 * code_to_u8string; Given a number encoded as an ASCII
 * string(codestr), return the nul-terminated UTF-8 encoding of that
 * value. If the number starts with 'x' or 'X', it's interpreted as
 * hexadecimal; else it's interpreted as decimal.  
 */

unsigned char *
code_to_u8string(char *codestr)
{
	uint32_t c_s_param; 
	char *end; 
	extern int errno; 

	errno = 0; 
	if (*codestr == 'x' || *codestr == 'X')
		c_s_param = strtoul(++codestr, &end, 16); 
	else
		c_s_param = strtoul(codestr, &end, 10); 

	if (c_s_param == ULONG_MAX && errno == ERANGE)
		return NULL; 

	if (*end != '\0') 
		return NULL;

	return u4char_to_u8string(c_s_param, NULL); 
}

/*
 * ucs4toutf8: Read a UCS4 encoded stream from in, and write the
 * equivalent UTF-8 encoded stream to out. Returns 0 on success, 1 on
 * read error, and 2 on write error. 
 */

int 
ucs4toutf8(FILE *in, FILE *out)
{
	uint32_t input, scratch32; 
	unsigned char ochars[6] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; 

	if (in == NULL || out == NULL) 
		return RETURN_READ_ERROR; 

	for (;;) {

		input = fread(&scratch32, 1, sizeof(scratch32), in); 
		if (input != sizeof(scratch32))
			return feof(in) ? RETURN_OK : RETURN_READ_ERROR; 

		input = bo_read32(scratch32); 

		if (input < 0x80) {
			fputc(input, out); 
			continue; 
		}

		if (input < 0x0800) {
			memcpy(ochars, "\xC0\x80", 2); 

			ochars[1] |= input & ~(0xFFFFFFC0);
			/* and the next 5. */
			ochars[0] |= (input >> 6 ) & ~(0xFFFFFFE0);

			if (fwrite(ochars, 1, 2, out) != 2)
				return RETURN_WRITE_ERROR; 
			continue; 
		}
	    
		if (input < 0x00010000) {
			memcpy(ochars, "\xE0\x80\x80", 3); 

			/* Get the last 6 bits. */
			ochars[2] |= input & ~(0xFFFFFFC0); 
			/* Get the next 6 bits. */
			ochars[1] |= (input >> 6) & ~(0xFFFFFFC0); 
			/* And the last 4. */
			ochars[0] |= (input >> 12) & ~(0xFFFFFFF0); 

			if (fwrite(ochars, 1, 3, out) != 3)
				return RETURN_WRITE_ERROR; 
			continue; 
		}
	    
		if (input < 0x00200000) {
			memcpy(ochars, "\xF0\x80\x80\x80", 4); 

			ochars[3] |= input & ~(0xFFFFFFC0); 
			ochars[2] |= (input >> 6) & ~(0xFFFFFFC0); 
			ochars[1] |= (input >> 12) & ~(0xFFFFFFC8);
			ochars[0] |= (input >> 18) & ~(0xFFFFFFF8);

			if (fwrite(ochars, 1, 4, out) != 4)
				return RETURN_WRITE_ERROR; 
			continue; 
		}
	    
		if (input < 0x04000000) { 
			memcpy(ochars, "\xF8\x80\x80\x80\x80", 5); 

			ochars[4] |= input & ~(0xFFFFFFC0); 
			ochars[3] |= (input >> 6) & ~(0xFFFFFFC0); 
			ochars[2] |= (input >> 12) & ~(0xFFFFFFC0); 
			ochars[1] |= (input >> 18) & ~(0xFFFFFFC0); 
			ochars[0] |= (input >> 24) & ~(0xFFFFFFFC); 
			if (fwrite(ochars, 1, 5, out) != 5)
				return RETURN_WRITE_ERROR; 
			continue; 
		}

		memcpy(ochars, "\xFC\x80\x80\x80\x80\x80", 6); 
		ochars[5] |= input & ~(0xFFFFFFC0); 
		ochars[4] |= (input >> 6) & ~(0xFFFFFFC0); 
		ochars[3] |= (input >> 12) & ~(0xFFFFFFC0); 
		ochars[2] |= (input >> 18) & ~(0xFFFFFFC0); 
		ochars[1] |= (input >> 24) & ~(0xFFFFFFC0); 
		/* Isolate the last bit. */
		ochars[0] |= (input >> 30) & ~(0xFFFFFFFE); 

		if (fwrite(ochars, 1, 6, out) != 6)
			return RETURN_WRITE_ERROR; 
	}
}

/*
 * utf16toutf8: Read a UTF-16 encoded stream from in, and write the
 * equivalent UTF-8 encoded stream to out. Returns 0 on success, 1 on
 * read error, and 2 on write error. Invalid characters are replaced
 * with the UTF-8 encoding of U+FFFD. 
 */

int 
utf16toutf8(FILE *in, FILE *out)
{
	uint16_t input, scratch16; 
	uint32_t scratch32;

	unsigned char ochars[6];
#define		FFFD_UTF8		"\xef\xbf\xbd"
	if (in == NULL || out == NULL) 
		return RETURN_READ_ERROR; 

	for (;;) {

		input = fread(&scratch16, 1, sizeof(scratch16), in); 
		if (input != sizeof(scratch16))
			return feof(in) ? RETURN_OK : RETURN_READ_ERROR; 
		input = bo_read16(scratch16); 

		/* The `badpair,' goto is preferable to an extra
		   variable for performance reasons (i.e. an extra if,
		   executed even for ASCII characters, would be
		   required before the fread above). */
	badpair:
		if (input < 0x80) {
			fputc(input, out); 
			continue; 
		}

		if (input < 0x0800) {
			memcpy(ochars, "\xC0\x80", 2); 
			/* Get the last 6 bits. */
			ochars[1] |= input & ~(0xFFC0);
			/* and the next 5. */
			ochars[0] |= (input >> 6 ) & ~(0xFFE0);

			if (fwrite(ochars, 1, 2, out) != 2)
				return RETURN_WRITE_ERROR; 
			continue; 
		}
	
		if (input >= 0xD800 && input <= 0xDBFF) {
			/* We're dealing with surrogate pairs. */
			scratch32 = (input - 0xD7C0) << 10; 
			if (fread(&scratch16, 1, sizeof(scratch16), in) 
			    != sizeof(scratch16)) {
				if (fwrite(FFFD_UTF8,1,
					   sizeof(FFFD_UTF8)- 1,out)
				    != sizeof(FFFD_UTF8) - 1) 
					return RETURN_WRITE_ERROR; 	    
				continue; 
			}

			input = bo_read16(scratch16); 
			if (!(input >= 0xDC00 && input <= 0xDFFF)) { 
				/* Single one of a surrogate pair. Bad
                                   character. */
				if (fwrite(FFFD_UTF8,1,
					   sizeof(FFFD_UTF8)-1,out)
				    != sizeof(FFFD_UTF8) - 1)
					return RETURN_WRITE_ERROR;
				goto badpair; 
			}

			scratch32 |= (input & ~(0xDC00)); 
			if (scratch32 < 0x10000) {
				if (fwrite(FFFD_UTF8,1,
					   sizeof(FFFD_UTF8)-1,out)
				    != sizeof(FFFD_UTF8) - 1)
					return RETURN_WRITE_ERROR;
				continue; 
			}

			if (scratch32 < 0x110000) {
				memcpy(ochars, "\xF0\x80\x80\x80", 4); 
		
				ochars[3] |= scratch32 & ~(0xFFFFFFC0); 
				ochars[2] |= (scratch32 >> 6) & ~(0xFFFFFFC0); 
				ochars[1] |= (scratch32 >> 12) & ~(0xFFFFFFC0);
				/* Isolate the last 3 bits. */
				ochars[0] |= (scratch32 >> 18) & ~(0xFFFFFFF8);

				if (fwrite(ochars, 1, 4, out) != 4)
					return RETURN_WRITE_ERROR; 
				continue; 
			}

			/* assert("Bad UTF-16." == 0); */

			if (fwrite(FFFD_UTF8,1,sizeof(FFFD_UTF8)-1,out)
			    != sizeof(FFFD_UTF8) - 1)
				return RETURN_WRITE_ERROR; 	    
			continue; 
		}

		if (scratch16 >= 0xD800 && scratch16 <= 0xD8FF) { 
			if (fwrite(FFFD_UTF8, 1, sizeof(FFFD_UTF8) - 1, out)
			    != sizeof(FFFD_UTF8) - 1)
				return RETURN_WRITE_ERROR; 	    
			continue; 
		}

		memcpy(ochars, "\xE0\x80\x80", 3);
		ochars[2] |= input & ~(0xFFC0);
		ochars[1] |= (input >> 6) & ~(0xFFC0);
		/* And the last 4. */
		ochars[0] |= (input >> 12) & ~(0xFFF0);
	
		if (fwrite(ochars, 1, 3, out) != 3)
			return RETURN_WRITE_ERROR;
		continue;
	}
}

/*
 * read_sequence; Try and read count bytes into buf from stream
 * f. Return -1 on a read error, or if any of the bytes read aren't
 * UTF-8 continuation bytes. In the latter case, ungetc(3) the
 * offending byte.
 */

static __inline__ int
read_sequence(FILE *f, size_t count, unsigned char *buf)
{
	size_t i; 
	int j; 

	for (i = 0; i < count; ++i) {
		j = getc(f); 

		if (j == EOF)
			return -1; 

		if (((j & 0x80) == 0) || ((j & 0x40) != 0)) {
			ungetc(j, f); 
			return -1; 
		}

		buf[i] = j; 
	}
	return count; 
}	

/*
 * utf8touc4: Read a UTF-8 encoded stream from in, writing the
 * equivalent UCS-4 encoded stream to out. Returns 0 on success, 1 on
 * read error, and 2 on write error. Overlong UTF-8 sequences are
 * mapped to 0xFFFD, as are values corresponding to UTF-16 surrogates. 
 */

int
utf8toucs4(FILE *in, FILE *out)
{
	uint32_t i, j; 
	unsigned char ichars[5] = ""; 

	if (in == NULL || out == NULL) 
		return RETURN_READ_ERROR; 

	for (;;) {

		if ((j = fgetc(in)) == (uint32_t)EOF)
			return feof(in) ? RETURN_OK : RETURN_READ_ERROR; 

		if ((j & 0x80) == 0) {
			/* If it's less than 0x80, nothing to be
                           done. */
			i = bo_write32(j); 
			if (fwrite(&i, 1, 4, out) != 4) 
				return RETURN_WRITE_ERROR; 
			continue; 
		}

		if ((j & 0x40) == 0) {
			/* Highest bit set, second highest
			   not--there's something wrong. */
			j = bo_write32(0xFFFD); 	
			if (fwrite(&j, 1, 4, out) != 4) 
				return RETURN_WRITE_ERROR; 
			continue; 
		}

		if ((j & 0x20) == 0) {

			switch (read_sequence(in, 1, ichars)) {
			case -1:
				/* Read error or bad sequence. */
				j = bo_write32(0xFFFD); 	
				if (fwrite(&j, 1, 4, out) != 4) 
					return RETURN_WRITE_ERROR; 

				/* Leave them to catch it next time. */
				continue; 
			case 1: 
				/* All good. */
				break;
			default:
				; 
			}

			i = ichars[0] & ~(0x80); 
			i |= ((j & ~(0xC0)) << 6); 

			if (i < 0x0800 && i > 0x7F)
				j = bo_write32(i); 
			else /* Invalid character. Return a block. */
				j = bo_write32(0xFFFD); 

			if (fwrite(&j, 1, 4, out) != 4) 
				return RETURN_WRITE_ERROR; 
			continue; 
		}

		if ((j & 0x10) == 0) {

			switch (read_sequence(in, 2, ichars)) {
			case -1:
				/* Read error, or bad sequence.  */
				j = bo_write32(0xFFFD); 	
				if (fwrite(&j, 1, 4, out) != 4) 
					return RETURN_WRITE_ERROR; 

				/* Leave them to catch it next time. */
				continue; 
				break;
			case 2: 
				/* All good. */
				break;
			default:
				; 
			}

			i = ichars[1] & ~(0x80); 
			i |= (ichars[0] & ~(0x80)) << 6; 
			i |= ((j & ~(0xE0)) << 12); 

			if (i < 0x10000 && i > 0x07FF 
			    /* Check for invalid UTF-16 surrogates. */
			    && (i < 0xD800 || i > 0xDFFF))

				j = bo_write32(i); 
			else /* Invalid character. */
				j = bo_write32(0xFFFD); 

			if (fwrite(&j, 1, 4, out) != 4) 
				return RETURN_WRITE_ERROR; 
			continue; 
		}

		if ((j & 0x08) == 0) {

			switch (read_sequence(in, 3, ichars)) {
			case -1:
				/* Read error, or bad seqence. */
				j = bo_write32(0xFFFD); 	
				if (fwrite(&j, 1, 4, out) != 4) 
					return RETURN_WRITE_ERROR; 

				/* Leave them to catch it next time. */
				continue; 
				break; 
			case 3: 
				/* All good. */
				break;
			default:
				; 
			}

			i = ichars[2] & ~(0x80); 
			i |= (ichars[1] & ~(0x80)) << 6; 
			i |= (ichars[0] & ~(0x80)) << 12; 
			i |= ((j & ~(0xF8)) << 18); 
	    
			if (i < 0x200000 && i > 0xFFFF)
				j = bo_write32(i); 
			else /* Invalid character. */
				j = bo_write32(0xFFFD); 
	    
			if (fwrite(&j, 1, 4, out) != 4) 
				return RETURN_WRITE_ERROR; 
			continue; 
		}

		if ((j & 0x04) == 0) {

			switch (read_sequence(in, 4, ichars)) {
			case -1:
				/* Read error, or bad sequence. */
				j = bo_write32(0xFFFD); 	
				if (fwrite(&j, 1, 4, out) != 4) 
					return RETURN_WRITE_ERROR; 

				/* Leave them to catch it next time. */
				continue; 
				break; 
			case 4: 
				/* All good. */
				break;
			default:
				; 
			}

			i = ichars[4] & ~(0x80); 
			i |= (ichars[2] & ~(0x80)) << 6; 
			i |= (ichars[1] & ~(0x80)) << 12; 
			i |= (ichars[0] & ~(0x80)) << 18; 
			i |= ((j & ~(0xFC)) << 24); 

			if (i < 0x04000000 && i > 0x1FFFFF) 
				j = bo_write32(i); 
			else /* Invalid Char. */
				j = bo_write32(0xFFFD); 

			if (fwrite(&j, 1, 4, out) != 4) 
				return RETURN_WRITE_ERROR; 
			continue; 
		}

		if ((j & 0x02) == 0) {

			switch (read_sequence(in, 5, ichars)) {
			case -1:
				/* Read error, or bad sequence. */
				j = bo_write32(0xFFFD); 	
				if (fwrite(&j, 1, 4, out) != 4) 
					return RETURN_WRITE_ERROR; 

				/* Leave them to catch it next time. */
				continue; 
				break; 
			case 5: 
				/* All good. */
				break;

			default:
				; 
			}

			i = ichars[4] & ~(0x80); 
			i |= (ichars[3] & ~(0x80)) << 6; 
			i |= (ichars[2] & ~(0x80)) << 12; 
			i |= (ichars[1] & ~(0x80)) << 18; 
			i |= (ichars[0] & ~(0x80)) << 24; 
			i |= ((j & ~(0xFE)) << 24); 
			/* j can't be wider than the range of a
                           uint32_t. */
			if (i > 0x3FFFFFF)
				j = bo_write32(i); 
			else
				j = bo_write32(0xFFFD); 
	    
			if (fwrite(&j, 1, 4, out) != 4) 
				return RETURN_WRITE_ERROR; 
			continue; 
		}

		if ((j & 0x01) == 0 || j == 0xFF) { 
			/* Something's wrong. Either the sequence is 7
			   bits long, or it's just a bad byte. ... */
			j = bo_write32(0xFFFD); 	

			if (fwrite(&j, 1, 4, out) != 4) 
				return RETURN_WRITE_ERROR; 
			continue; 
		}
		/* NOTREACHED */
	}
}

/*
 * utf8toutf16: Read a UTF-8 encoded stream from in, writing the
 * equivalent UTF-16 encoded stream to out. Return 0 on success, 1 on
 * read error, or 2 on write error. CAUTION; this may lose data. UTF-8
 * encodes a 32 bit space, while UTF-16 encodes a ~20bit
 * space. Characters higher than U+0x110000 are mapped to 0xFFFD .
 */

int
utf8toutf16(FILE *in, FILE *out)
{
	uint16_t i, j; 
	uint32_t scratch32, s032; 
	unsigned char ichars[5] = ""; 

	if (in == NULL || out == NULL) 
		return RETURN_READ_ERROR; 

	for (;;) {

		if ((j = fgetc(in)) == (uint16_t)EOF)
			return feof(in) ? RETURN_OK : RETURN_READ_ERROR; 

		if ((j & 0x80) == 0) {
			/* If it's less than 0x80, nothing to be
                           done. */
			i = bo_write16(j); 
			if (fwrite(&i, 1, 2, out) != 2) 
				return RETURN_WRITE_ERROR; 
			continue; 
		}

		if ((j & 0x40) == 0) {
			/* Highest bit set, second highest
			   not--there's something wrong. */
			j = bo_write16(0xFFFD); 	
			if (fwrite(&j, 1, 2, out) != 2) 
				return RETURN_WRITE_ERROR; 
			continue; 
		}

		if ((j & 0x20) == 0) {

			switch (read_sequence(in, 1, ichars)) {
			case -1:
				/* Read error or bad sequence. */
				j = bo_write16(0xFFFD); 	
				if (fwrite(&j, 1, 2, out) != 2) 
					return RETURN_WRITE_ERROR; 

				/* Leave them to catch it next
                                   time. */
				continue; 
			case 1: 
				/* All good. */
				break;
			default:
				; 
			}

			i = ichars[0] & ~(0x80); 
			i |= ((j & ~(0xC0)) << 6); 

			if (i < 0x0800 && i > 0x7F)
				j = bo_write16(i); 
			else /* Invalid character. Return a block. */
				j = bo_write16(0xFFFD); 

			if (fwrite(&j, 1, 2, out) != 2) 
				return RETURN_WRITE_ERROR; 
			continue; 
		}

		if ((j & 0x10) == 0) {

			switch (read_sequence(in, 2, ichars)) {
			case -1:
				/* Read error, or bad sequence.  */
				j = bo_write16(0xFFFD); 	
				if (fwrite(&j, 1, 2, out) != 2) 
					return RETURN_WRITE_ERROR; 
				continue; 
				break;
			case 2: 
				/* All good. */
				break;
			default:
				; 
			}

			i = ichars[1] & ~(0x80); 
			i |= (ichars[0] & ~(0x80)) << 6; 
			i |= ((j & ~(0xE0)) << 12); 

			if (i > 0x07FF /* i < 0x10000--it's sixteen
					  bit. Obviously. */
			    /* Check for invalid UTF-16 surrogates. */
			    && (i < 0xD800 || i > 0xDFFF))
				j = bo_write16(i); 
			else /* Invalid character. */
				j = bo_write16(0xFFFD); 

			if (fwrite(&j, 1, 2, out) != 2) 
				return RETURN_WRITE_ERROR; 
			continue; 
		}

		if ((j & 0x08) == 0) {

			switch (read_sequence(in, 3, ichars)) {
			case -1:
				/* Read error, or bad seqence. */
				j = bo_write16(0xFFFD); 	
				if (fwrite(&j, 1, 2, out) != 2) 
					return RETURN_WRITE_ERROR; 

				/* Leave them to catch it next time. */
				continue; 
				break; 
			case 3: 
				/* All good. */
				break;
			default:
				; 
			}

			scratch32 = ichars[2] & ~(0x80); 
			scratch32 |= (ichars[1] & ~(0x80)) << 6; 
			scratch32 |= (ichars[0] & ~(0x80)) << 12; 
			s032 = (j & ~(0xF8)); 
			scratch32 |= (s032 << 18); 
	    
			if (scratch32 < 0x110000 && scratch32 > 0xFFFF) {
				/* Use surrogate pairs. */
				i = 0xD7C0 + (scratch32 >> 10); 

				j = bo_write16(i); 
				if (fwrite(&j, 1, 2, out) != 2) 
					return RETURN_WRITE_ERROR; 

				i = 0xDC00 | (scratch32 & 0x3FF); 

				j = bo_write16(i); 

			} else /* Invalid character. */
				j = bo_write16(0xFFFD); 
	    
			if (fwrite(&j, 1, 2, out) != 2) 
				return RETURN_WRITE_ERROR; 
			continue; 
		}

		if ((j & 0x04) == 0) {

			switch (read_sequence(in, 4, ichars)) {
			case -1:
				/* Read error, or bad sequence. */
				j = bo_write16(0xFFFD); 	
				if (fwrite(&j, 1, 2, out) != 2) 
					return RETURN_WRITE_ERROR; 

				/* Leave them to catch it next time. */
				continue; 
				break; 
			case 2: 
				/* All good. */
				break;
			default:
				; 
			}

			/* Invalid Char. */
			j = bo_write16(0xFFFD); 

			if (fwrite(&j, 1, 2, out) != 2) 
				return RETURN_WRITE_ERROR; 
			continue; 
		}

		if ((j & 0x02) == 0) {

			switch (read_sequence(in, 5, ichars)) {
			case -1:
				/* Read error, or bad sequence. */
				j = bo_write16(0xFFFD); 	
				if (fwrite(&j, 1, 2, out) != 2) 
					return RETURN_WRITE_ERROR; 

				/* Leave them to catch it next time. */
				continue; 
				break; 
			case 5: 
				/* All good. */
				break;

			default:
				; 
			}

			/* Invalid char in UTF-16. */
			j = bo_write16(0xFFFD); 
	    
			if (fwrite(&j, 1, 2, out) != 2) 
				return RETURN_WRITE_ERROR; 
			continue; 
		}

		if ((j & 0x01) == 0 || j == 0xFF) { 
			/* Something's wrong. Either the sequence is 7
			   bits long, or it's just a bad byte. ... */
			j = bo_write16(0xFFFD); 	

			if (fwrite(&j, 1, 2, out) != 2) 
				return RETURN_WRITE_ERROR; 
			continue; 
		}
		/* NOTREACHED */
	}
}
