%{
/*-
 * Copyright (c) 2005, 2006 intron <intron@intron.ac>.  All rights reserved.
 * Copyright (c) 2005, 2006 The FreeBSD Simplified Chinese Project.
 * All rights reserved.
 *
 * This code is derived from software contributed to The FreeBSD Simplified
 * Chinese Project by intron.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * From CNPROJ: doc/zh_CN.GB2312/share/mk/fixrtf.lex,v 1.1.1000.20 2006/02/19 10:21:40 intron Exp
 */

#include <sys/cdefs.h>
__FBSDID("$FreeBSD: /tmp/pcvs/ports/textproc/fixrtf/src/fixrtf.l,v 1.1 2006-03-16 07:50:15 delphij Exp $");

#include <err.h>
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include <sys/param.h>
#include <stdlib.h>
#include <unistd.h>
#include <time.h>
#include <png.h>

/*
 * This program is used to fix RTF:
 * 1. Embed PNGs into RTF.
 * 2. Embed FreeBSD-specific information into RTF, such as organization name,
 *    building time. But unfortunately, so far only Microsoft Word can read
 *    them. In contrast, Microsoft Word Viewer and OpenOffice even cannot read
 *    this kind of information from RTF created by Microsoft Word and
 *    OpenOffice. (Option: -i)
 * 3. Do some locale-specific fixing. (Option: -e <encoding>)
 *
 * See also Rich Text Format (RTF) Specification:
 * 1. Version 1.8 (Microsoft Word 2003)
 *    http://www.microsoft.com/downloads/details.aspx?familyid=ac57de32-17f0-4b46-9e4e-467ef9bc5540&displaylang=en
 * 2. Version 1.7 (Microsoft Word 2002)
 *    http://support.microsoft.com/kb/q86999/
 * 3. Version 1.6 (Microsoft Word 2000)
 *    http://msdn.microsoft.com/library/en-us/dnrtfspec/html/rtfspec.asp
 */


int embedpng_enable=0;

/* See also http://msdn.microsoft.com/library/en-us/intl/unicode_81rn.asp */
#define	ENCODING_UNKNOWN	0
#define	ENCODING_GB2312		936
#define	ENCODING_GB18030	54936
#define	ENCODING_BIG5		950

int encoding=ENCODING_UNKNOWN;


int fetchinfo_enable=0; /* FALSE */


#define MY_BUFFER_SIZE		3072
#define MY_BUFFER_LIMIT		2048

/* MY_BUFFER_LIMIT is smaller MY_BUFFER_SIZE, reserving some redundance. */

/*
 * "mybuffer" is used to cache RTF stream 
 * while fetching book/article information.
 */
size_t mybufferlength=0;
char mybuffer[MY_BUFFER_SIZE];


#define	INFO_TITLE	0
#define	INFO_AUTHOR	1

/* To store fetched book/article information */
struct
{
	size_t length;
	char text[MY_BUFFER_SIZE];
} *pinfobuf=NULL,infobuf[]=
{
	{0,""},
	{0,""}
};

/*
 * See also the section "Pictures" in RTF specification.
 */
void
embedpng(char *field)
{
	char *p1,*p2,fn[PATH_MAX];
	unsigned char buf[256];
	FILE *fp;
	int l,i,nret;
	png_structp png_ptr;
	png_infop info_ptr,end_info;
	png_uint_32 width,height;

	p1=strcasestr(field,"INCLUDEPICTURE");
	p1=strchr(p1+14,'"'); /* String after "INCLUDEPICTURE" */
	p2=strchr(p1+1,'"');
	l=p2-(p1+1); /* Substantial length of file name */
	if(l>sizeof(fn)-1)
	{
		warnx("*** Buffer Overflow Attack Detected !!! ***");
		exit(1);
	}
	memcpy(fn,p1+1,l);
	fn[l]=0;

	if(l<4) /* It should be longer than ".png". */
	{
		warnx("File name '%s' is too short!",fn);
		goto embedpng_exit_1;
	}

	if(strcasecmp(fn+(l-4),".png")!=0)
	{
		warnx("File name '%s' has not a suffix '.png'. Keep untouched.",fn);
		goto embedpng_exit_1;
	}

	if((fp=fopen(fn,"rb"))==NULL)
	{
		warnx("Failed to open '%s'!",fn);
		goto embedpng_exit_1;
	}

	fread(buf,1,8,fp);
	if (png_sig_cmp(buf,0,8))
	{
		warnx("The file '%s' is NOT in PNG format!",fn);
		goto embedpng_exit_2;
	}
	png_ptr=png_create_read_struct(PNG_LIBPNG_VER_STRING,NULL,NULL,NULL);
	if (!png_ptr)
	{
		warnx("Unable to create PNG read struct(*png_ptr)!");
		goto embedpng_exit_2;
	}
	info_ptr=png_create_info_struct(png_ptr);
	if (!info_ptr)
	{
		warnx("Unable to create PNG info struct(*info_ptr)!");
		png_destroy_read_struct(&png_ptr,(png_infopp)NULL,(png_infopp)NULL);
		goto embedpng_exit_2;
	}
	end_info=png_create_info_struct(png_ptr);
	if(!end_info)
	{
		warnx("Unable to create PNG info struct(*end_info)!");
        	png_destroy_read_struct(&png_ptr,&info_ptr,(png_infopp)NULL);
		goto embedpng_exit_2;
	}
	if (setjmp(png_jmpbuf(png_ptr)))
	{
		warnx("LibPNG crashed!");
		png_destroy_read_struct(&png_ptr,&info_ptr,&end_info);
		goto embedpng_exit_2;
	}
	rewind(fp);
	png_init_io(png_ptr,fp);
	png_read_info(png_ptr,info_ptr);
	width=png_get_image_width(png_ptr,info_ptr);
	height=png_get_image_height(png_ptr,info_ptr);

	if(width>1024 || height>768) warnx("Picture is too large!");

	/*
	 * According to Microsoft's RTF specification, \picwN and \pichN is
	 * mandatory for \pict group. Actually, in both Microsoft Word Viewer
	 * and OpenOffice, these two control words take no effect for PNG.
	 */
	printf("{\\pict\\pngblip\\picscalex100\\picscaley100\\picw%u\\pich%u",
		(unsigned int)width,(unsigned int)height);

	rewind(fp);
	while((nret=fread(buf,1,64,fp))>0)
	{
		printf("\n");
		for(i=0;i<nret;i++)
			printf("%02x",(unsigned int)((unsigned char)buf[i]));
	}

	printf("}");

	warnx("'%s' (%ux%u) embedded.",fn,(unsigned int)width,(unsigned int)height);

	png_destroy_read_struct(&png_ptr,&info_ptr,&end_info);
	fclose(fp);
	goto embedpng_exit_0;

embedpng_exit_2:;
	fclose(fp);
embedpng_exit_1:;
	printf("%s",field); /* Keep link in RTF untouched */
embedpng_exit_0:;
	return;
}

/*
 * See also the section "Font Table" in RTF specification.
 */
void
modifycharset(char *fcharset)
{
	char *s;

	switch(encoding)
	{
	case ENCODING_GB2312:
	case ENCODING_GB18030: /* GB18030 is not supported in RTF so far */
		s="\\fcharset134";
		break;
	case ENCODING_BIG5:
		s="\\fcharset136";
		break;
	default:
		s="\\fcharset1"; /* "Default" */
		break;
	}

	printf("%s",s);

	warnx("Charset control word modified: %s -> %s",fcharset,s);

	return;
}

/*
 * (init|addto|flush)mybuffer maintain buffer to cache RTF stream
 * while fetching book/article information.
 */
void initmybuffer()
{
	int i;

	mybufferlength=0;
	for(i=0;i<sizeof(infobuf)/sizeof(infobuf[0]);i++)
	{
		infobuf[i].length=0;
		infobuf[i].text[0]=0;
	}
}

int addtomybuffer(char *text, size_t leng)
{
	if(mybufferlength+leng>MY_BUFFER_LIMIT) return -1;
	/* warnx("_%s_",yytext); */
	memcpy(mybuffer+mybufferlength,text,leng);
	mybufferlength+=leng; /* No terminator '\0' */
	return 0;
}

void flushmybuffer()
{
	fwrite(mybuffer,1,mybufferlength,yyout);
	mybufferlength=0;
}

#define	ADDTOBUF { \
	if(addtomybuffer(yytext,yyleng)) \
	{ \
		haltfetch(); \
		ECHO; \
		BEGIN(0); \
		warnx("Had been fetching book/article information until buffer was full!"); \
		YY_BREAK; \
	} \
   }


/* Collect book/article information RTF sequence */
void collectinfo(char *text, size_t leng)
{
	assert(pinfobuf!=NULL);
	if(pinfobuf->length+leng>=MY_BUFFER_LIMIT) /* Consider terminator '\0' */
	{
		warnx("*** Too long text for title or author !!! ***");
		warnx("*** Buffer Overflow Attack To Be Considered !!! ***");
		return; /* Information item buffer is full. */
	}
	memcpy(pinfobuf->text+pinfobuf->length,text,leng);
	pinfobuf->length+=leng;
	pinfobuf->text[pinfobuf->length]=0;
}

/* Identify a RTF control word */
int identifyctrlword(char *text, size_t leng, char *key)
{
	if(text[leng-1]==' ')
	{ /* Tailed by a space as delimiter */
		if(strlen(key)!=leng-1) return 0;
		return !strncmp(text,key,leng-1);
	}

	return !strcmp(text,key);
}

/*
 * Output fetch book/article information.
 * See also the section "Information Group" in RTF specification.
 */
void outputinfo()
{
	time_t t;
	char buf[128];

	printf("{\\info\\uc0");

	printf("{\\title %s}{\\author %s}",
		infobuf[INFO_TITLE].text,infobuf[INFO_AUTHOR].text);

	time(&t);
	strftime(buf,sizeof(buf),"\\yr%Y\\mo%m\\dy%d\\hr%H\\min%M\\sec%S",localtime(&t));
	printf("{\\creatim%s}",buf);

	printf("}");
}

void haltfetch()
{
	warnx("Title: %s",infobuf[INFO_TITLE].text);
	warnx("Author: %s",infobuf[INFO_AUTHOR].text);
	outputinfo();
	flushmybuffer();
}

%}

%option noyywrap

%s	fetchinfo

pnglink		\{\\field[^{}]*\{[^{}]*INCLUDEPICTURE[^{}]*\".+\"[^{}]*\}\{[^{}]*\}[^{}]*\}
sjischarset	\\fcharset128
stylesheet	\{\\stylesheet[ ]?
titlebegin	\\pard.{1,25}\\fs49[ ]?
authorbegin	\\pard.{1,25}\\fs34[ ]?
rtfhexvalue	\\\'[0-9A-Fa-f]{2}
rtfctrlword	\\[a-z]+([-]?[0-9]+)?[ ]?
rtfctrlsymbol	\\[^a-z]

%%

{pnglink}  { /*
	      * Substitute RTF \pict group for RTF field group.
	      * An example generated by Jade/OpenJade:
	      * {\field\flddirty{\*\fldinst INCLUDEPICTURE "sockets/layers.png" }{\fldrslt }}
	      */
		if(embedpng_enable) embedpng(yytext);
		else { ECHO; }
	   }

{sjischarset}  {
	      /*
	       * Jade/OpenJade mis-mark Chinese as Shift-JIS encoded Japanese.
	       * This may cause RTF viewer to display Chinese with Japanese font.
	       */
		if(encoding!=ENCODING_UNKNOWN) modifycharset(yytext);
		else { ECHO; }
	   }

{stylesheet}  { /* Insert book/article information just before style sheet. */
		if(fetchinfo_enable)
		{ /* Begin fetching book/article information. */
			initmybuffer();
			BEGIN(fetchinfo);
			fetchinfo_enable=0; /* FALSE, one-off */
			ADDTOBUF;
		}
		else
		{
			ECHO;
		}
	   }

<fetchinfo>{titlebegin}  { /* Beginning of title, hacked by font size. */
		ADDTOBUF;
		pinfobuf=&(infobuf[INFO_TITLE]);
		if(pinfobuf->length>0) collectinfo(", ",2); /* Duplicated */
	   }

<fetchinfo>{authorbegin}  { /* Beginning of author, hacked by font size. */
		ADDTOBUF;
		pinfobuf=&(infobuf[INFO_AUTHOR]);
		if(pinfobuf->length>0) collectinfo(", ",2); /* Duplicated */
	   }

<fetchinfo>{rtfhexvalue}  { /* A hexadecimal value, ignore. */
		ADDTOBUF;
	   }

<fetchinfo>\\~  { /* Nonbreaking space, a control symbol, collect */
		ADDTOBUF;
		if(pinfobuf!=NULL) collectinfo(" ",1);
	   }

<fetchinfo>\\[-_]  { /* Optional/nonbreaking hyphen, a control symbol, collect */
		ADDTOBUF;
		if(pinfobuf!=NULL) collectinfo("-",1);
	   }

<fetchinfo>{rtfctrlsymbol}  { /* Other control symbols, ignore */
		ADDTOBUF;
	   }

<fetchinfo>{rtfctrlword}  { /* Control word */
		ADDTOBUF;

		if(identifyctrlword(yytext,yyleng,"\\keepn"))
		{ /* End of title or author, actually a hack */
			pinfobuf=NULL;
		}
		else if(yytext[0]=='\\' && yytext[1]=='u' &&
			((yytext[2]>='0' && yytext[2]<='9') || yytext[2]=='-') )
		{ /* Unicode Character, collect */
			if(pinfobuf!=NULL)
			{
				collectinfo(yytext,yyleng);
				if(yytext[yyleng-1]!=' ') collectinfo(" ",1);
			}
		}
		else if(identifyctrlword(yytext,yyleng,"\\page"))
		{ /* Accomplished !!!  */
			haltfetch();
			BEGIN(0);
		}
	   }

<fetchinfo>[\n{}]  {  /* Ignore */
		ADDTOBUF;
	   }

<fetchinfo>.  { /* Collect */
		ADDTOBUF;
		if(pinfobuf!=NULL) collectinfo(yytext,yyleng);
	   }

%%

void printusage()
{
	fprintf(stderr,	"Usage: fixrtf [-e encoding] [-i] [-p] < inputfile > outputfile\n"
			"     Fix RTF file generated by Jade/OpenJade.\n"
			"Options:\n"
			"       -e encoding\n"
			"             Specify encoding to do specific fixing. (GB2312|BIG5)\n"
			"       -i\n"
			"             Fill RTF file information, such as title and author,\n"
			"             hacked from RTF file generated by Jade/OpenJade.\n"
			"       -p\n"
			"             Embed linked PNG images into RTF file.\n"
		);
}

int
main(int argc, char *argv[])
{
	int ch;

	if(argc<=1)
	{
		warnx("You should indicate at least one kind of fixing.");
		printusage();
		return 1;
	}

	while ((ch = getopt(argc, argv, "e:ip")) != -1)
	{
		switch (ch)
		{
		case 'e':
			if(strcasecmp(optarg,"GB2312")==0 ||
				strcasecmp(optarg,"GBK")==0)
			{
				encoding=ENCODING_GB2312;
			}
			else if(strcasecmp(optarg,"GB18030")==0)
			{
				encoding=ENCODING_GB18030;
			}
			else if(strcasecmp(optarg,"BIG5")==0)
			{
				encoding=ENCODING_BIG5;
			}
			break;
		case 'i':
			fetchinfo_enable=1; /* One-off */
			break;
		case 'p':
			embedpng_enable=1;
			break;
		default:
			printusage();
			return 1;
			break;
		}
	}

	yylex();

	return 0;
}