generated html version of ttfinvread.cHOME

#include "stdio.h"
#include "stdint.h"
#include "string.h"

char *int2u8(unsigned U){
	static char out[7];
	int i=0;
	if(U<=0x80)out[i++]=U;
	else if(U<0x800){
		out[i++]=(U>>6|0xC0);
		out[i++]=(U&0x3f|0x80);
	}else if(U<=0x10000){
		out[i++]=(U>>12|0xE0);
		out[i++]=(U>>6&0x3f|0x80);
		out[i++]=(U&0x3f|0x80);
	}else if(U<=0x200000){
		out[i++]=(U>>18|0xF0);
		out[i++]=(U>>12&0x3f|0x80);
		out[i++]=(U>>6&0x3f|0x80);
		out[i++]=(U&0x3f|0x80);
	}else if(U<=0x4000000){
		out[i++]=(U>>24|0xF8);
		out[i++]=(U>>18&0x3f|0x80);
		out[i++]=(U>>12&0x3f|0x80);
		out[i++]=(U>>6&0x3f|0x80);
		out[i++]=(U&0x3f|0x80);
	}else if(U<=0x80000000){
		out[i++]=(U>>30|0xFC);
		out[i++]=(U>>24&0x3f|0x80);
		out[i++]=(U>>18&0x3f|0x80);
		out[i++]=(U>>12&0x3f|0x80);
		out[i++]=(U>>6&0x3f|0x80);
		out[i++]=(U&0x3f|0x80);
	}
	out[i]=0;
	return out;
}

uint32_t read32(FILE *f){
	uint32_t c;
	fread(&c,4,1,f);
	c = c >> 16 | c << 16;
	c = c >> 8 & 0xFF00FF | c << 8 & 0xFF00FF00;
	return c;
}

uint16_t read16(FILE *f){
	uint16_t c;
	fread(&c,2,1,f);
	c = c >> 8 | c << 8;
	return c;
}

int u8charwid(int ch){
        if(ch < ' ')return 0;
        if(ch < 0x300)return 1;
        if(ch < 0x370)return 0;// combining
        if(ch < 0x1100)return 1;
        if(ch < 0x1200)return 2;
        if(ch < 0x2E80)return 1;
        if(ch < 0xA000)return 2;
        if(ch < 0xAC00)return 1;
        if(ch < 0xD800)return 2;
        if(ch < 0xF900)return 1;
        if(ch < 0xFB00)return 2;
        if(ch < 0xFE00)return 1;
        if(ch < 0xFE10)return 0;// variation sel.
        if(ch < 0xFE20)return 2;
        if(ch < 0xFE30)return 0;// combining
        if(ch < 0xFE70)return 2;
        if(ch < 0xFF00)return 1;
        if(ch < 0xFF61)return 2;// fullwidth ascii
        if(ch < 0xFFE0)return 1;// hafwidth kana/jamo
        if(ch < 0xFFE7)return 2;// fullwidth symbols
        if(ch < 0x20000)return 1;// 2nd multilingual plane
        if(ch < 0x30000)return 2;// supplementary ideographic plane
	return 1;
}

int main(int argc, char **argv){
	int a = 1;
	int onlyranges = 0;
	if(a >= argc)return (printf ("need a filename"),1);
	if(!strcmp(argv[a],"-r")){
		onlyranges=1;
		a++;
	}
	if(a >= argc)return (printf("need a filename"),1);
	FILE *f = fopen(argv[a],"r+");
	while(read32(f)!=0x636d6170);
	read32(f);//discard checksum, fuck that
	uint32_t cmap_loc = read32(f);
	//printf("cmap is at %x\n",cmap_loc);
	fseek(f,cmap_loc,SEEK_SET);
	read32(f);//discard table header
	while(read32(f)!=0x3000A)//skip to UCS-4 table
		read32(f);//skip...
	uint32_t ucs4_loc = cmap_loc + read32(f);
	//printf("ucs4 is at %x\n",ucs4_loc);
	fseek(f,ucs4_loc,SEEK_SET);
	if(read16(f)!=12)return(printf("ucs4 table not format 12"),1);
	fseek(f,ucs4_loc + 12,SEEK_SET);
	uint32_t i,n_ranges = read32(f);
	uint32_t n = 0;
	int countkanji = 0;
	int counthangul = 0;
	int total = 0;
	n = 0;
	for(i=0;i<n_ranges;i++){
		uint32_t sta,end;
		sta = read32(f);
		end = read32(f);
		total += end - sta + 1;
		read32(f);//ignore glph id.
		if(!onlyranges)while(sta <= end){
			fputs(int2u8(sta),stdout);
			if(sta >= 0x4E00 && sta < 0xA000)countkanji++;
			if(sta >= 0xAC00 && sta < 0xD7B0)counthangul++;
			n += u8charwid(sta);
			sta++;
			if( n >= 64 ){putchar('\n');n=0;}
		}else{
			printf("range %d U%x U%x\n",i,sta,end);
		}
	}
	printf("\nthere are %d characters supported\n",total);
	printf("there are %d hangul precomposed characters supported\n",counthangul);
	printf("there are %d non-hangul characters supported\n",total-counthangul);
	printf("there are %d chinese characters supported\n",countkanji);
	return 0;
}