/*
 * Galatea Dialog Manager:
 * (c)2003 Takuya NISHIMOTO (nishimoto [atmark] m.ieice.org)
 *
 * $Id: GrammarMaker.java,v 1.12 2009/09/09 09:51:49 nishimoto Exp $
 */
package galatea.io.julius;

import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Properties;

import galatea.logger.Logger;
import galatea.relaxer.vxml20.VxmlGrammar;
import galatea.relaxer.vxml20.VxmlItem;
import galatea.relaxer.vxml20.VxmlOneOf;
import galatea.relaxer.vxml20.VxmlRule;
import galatea.relaxer.vxml20.VxmlRuleref;
import galatea.relaxer.vxml20.VxmlToken;
import galatea.util.Getopt;
import galatea.util.Util;

public class GrammarMaker {
	private Logger logger_ = new Logger(this.getClass());

	private static void debug(String msg) {
		// System.err.println(msg);
	}

	private static void debug() {
		debug("");
	}

	private static void info(String msg) {
		System.out.println(msg);
	}

	private static void info() {
		info("");
	}

	public GrammarMaker() {
	}

	private int unique_ = 0;

	private String _getUniqueId() {
		unique_++;
		StringBuffer id = new StringBuffer("grm").append(unique_);
		return id.toString();
	}

	/**
	 * item/ruleref+
	 */
	private boolean _isRulerefOnly(VxmlItem item) {
		int ntoken = 0;
		int nruleref = 0;
		for (int i = 0; i < item.sizeContent(); i++) {
			if (item.getContent(i) instanceof VxmlToken) {
				ntoken++;
			} else if (item.getContent(i) instanceof VxmlRuleref) {
				nruleref++;
			}
		}
		if (ntoken == 0)
			return true;
		return false;
	}

	/**
	 * item/token tag は許される
	 */
	private boolean _isToken(VxmlItem item) {
		int ntoken = 0;
		int nruleref = 0;
		for (int i = 0; i < item.sizeContent(); i++) {
			if (item.getContent(i) instanceof VxmlToken) {
				ntoken++;
			} else if (item.getContent(i) instanceof VxmlRuleref) {
				nruleref++;
			}
		}
		if (ntoken == 1 && nruleref == 0)
			return true;
		return false;
	}

	// rule/token の場合
	// そのまま IPA-SRG-XML-1.0 にする
	// ただし <tag> を解釈する
	//
	// <rule id="no"><token sym="の">の</token></rule>
	//
	private void _addRuleOfToken(VxmlGrammar ipasrg, String ruleid,
			VxmlToken token) {
		VxmlRule rule = new VxmlRule();
		rule.setId(ruleid);
		rule.addContent(token);
		ipasrg.addContent(rule);
	}

	private void _addRuleOfToken(VxmlGrammar ipasrg, String ruleid,
			VxmlItem item) {
		VxmlRule rule = new VxmlRule();
		rule.setId(ruleid);
		for (int i = 0; i < item.sizeContent(); i++) {
			if (!(item.getContent(i) instanceof VxmlToken))
				continue;
			VxmlToken token = (VxmlToken) (item.getContent(i));
			token.setContent(Util.removeSpaces(token.getContent()));
			rule.addContent(token);
			break;
		}
		ipasrg.addContent(rule);
	}

	// item/ruleref+ の場合
	// 同一 id の rule を生成する
	//
	// <item> <ruleref uri="#pre"/> <ruleref uri="#place"/> </item>
	//
	// <rule id="root"> <ruleref uri="pre"/> <ruleref uri="place"/> </rule>
	//
	private void _addRuleOfRuleref(VxmlGrammar ipasrg, String ruleid,
			VxmlItem item) {
		debug("_addRuleOfRuleref() with items " + item.sizeContent()); // DEBUG
		VxmlRule rule = new VxmlRule();
		rule.setId(ruleid);
		for (int i = 0; i < item.sizeContent(); i++) {
			if (!(item.getContent(i) instanceof VxmlRuleref))
				continue;

			VxmlRuleref ref = (VxmlRuleref) (item.getContent(i));
			String uri = ref.getUri().replaceAll("#", "");
			ref.setUri(uri);
			rule.addContent(ref);
			debug("_addRuleOfRuleref() added : " + uri + " " + ref.toString()); // DEBUG
		}
		ipasrg.addContent(rule);
	}

	private void _makeIpaXmlFromRule(VxmlGrammar ipasrg, VxmlRule rule) {
		String ruleid = rule.getId();
		if (ruleid == null)
			ruleid = _getUniqueId();

		debug();
		debug("***** ipasrg " + ipasrg.toString());
		debug("***** rule " + rule.toString());
		debug("***** ruleid " + ruleid);

		int oneof_count = 0, token_count = 0;
		ArrayList<VxmlToken> tokens = new ArrayList<VxmlToken>();
		for (int i = 0; i < rule.sizeContent(); i++) {
			if (rule.getContent(i) instanceof VxmlOneOf) {
				oneof_count++;
			}
			if (rule.getContent(i) instanceof VxmlToken) {
				token_count++;
				tokens.add((VxmlToken)(rule.getContent(i)));
			}
		}
		if (oneof_count > 0 && token_count > 0) {
			debug("***** handle mixture of 'one-of' and 'token' elements");
			VxmlRule newrule = new VxmlRule();
			newrule.setId(ruleid);
			for (int i = 0; i < rule.sizeContent(); i++) {
				if (rule.getContent(i) instanceof VxmlOneOf) {
					VxmlOneOf oneof = (VxmlOneOf) (rule.getContent(i));
					if (oneof.sizeItem() == 1) {
						if (oneof.getItem(0).sizeContent() == 1) {
							if (oneof.getItem(0).getContent(0) instanceof VxmlRuleref) {
								VxmlRuleref newref = (VxmlRuleref)(oneof.getItem(0).getContent(0));
								newref.setUri(newref.getUri().replaceAll("#", ""));
								newrule.addContent(newref);
							}
						}
					}
				} else if (rule.getContent(i) instanceof VxmlToken) {
					VxmlRule new_token_rule = new VxmlRule();
					String new_token_rule_id = _getUniqueId();
					new_token_rule.setId(new_token_rule_id);
					new_token_rule.addContent(rule.getContent(i));
					ipasrg.addContent(new_token_rule);
					//
					VxmlRuleref newref = new VxmlRuleref();
					newref.setUri(new_token_rule_id);
					newrule.addContent(newref);
				}
			}
			ipasrg.addContent(newrule);
			return;
		}
		
		debug("***** find all 'one-of item ruleref' elements");
		ArrayList<VxmlRuleref> a = new ArrayList<VxmlRuleref>();
		for (int i = 0; i < rule.sizeContent(); i++) {
			if (rule.getContent(i) instanceof VxmlOneOf) {
				VxmlOneOf oneof = (VxmlOneOf) (rule.getContent(i));
				if (oneof.sizeItem() == 1) {
					if (oneof.getItem(0).sizeContent() == 1) {
						if (oneof.getItem(0).getContent(0) instanceof VxmlRuleref) {
							a.add((VxmlRuleref)(oneof.getItem(0).getContent(0)));
						}
					}
				}
			} 
		}
		if (a.size() >= 2) {
			debug("***** sequence of ruleref : size=" + a.size());
			VxmlRule newrule = new VxmlRule();
			newrule.setId(ruleid);
			for (int i = 0; i < a.size(); i++) {
				VxmlRuleref newref = a.get(i);
				newref.setUri(newref.getUri().replaceAll("#", ""));
				newrule.addContent(newref);
			}
			debug("***** newrule " + newrule.toString());
			ipasrg.addContent(newrule);
			return;
		}

		for (int i = 0; i < rule.sizeContent(); i++) {
			debug("**** " + i + " of " + rule.sizeContent());

			if (rule.getContent(i) instanceof VxmlToken) {
				// rule/token の場合
				// そのまま IPA-SRG-XML-1.0 にする
				//
				// TODO: <token> に後続する <tag> を解釈する
				VxmlToken token = (VxmlToken) (rule.getContent(i));
				debug("*** token " + token.toString());
				_addRuleOfToken(ipasrg, ruleid, token);

				// // token の後に one-of が出てきたら無視する
				// break;

			} else if (rule.getContent(i) instanceof VxmlOneOf) {
				VxmlOneOf oneof = (VxmlOneOf) (rule.getContent(i));

				debug("*** one-of " + oneof.makeTextDocument());

				// foreach item+
				for (int j = 0; j < oneof.sizeItem(); j++) {

					VxmlItem item = oneof.getItem(j);
					debug("** one-of item " + item.makeTextDocument());

					if (_isRulerefOnly(item)) {
						// rule/one-of/item+/ruleref+ の場合
						// 同一 id の複数 rule を内部で生成する
						_addRuleOfRuleref(ipasrg, ruleid, item);

					} else if (_isToken(item)) {
						// rule/one-of/item+/token の場合
						// item ごとに new-id を生成し、それぞれ
						// rule/ruleref(@new-id)
						// rule(@new-id)/token
						// を生成する

						// <rule id="etto"><ruleref uri="etto_grm1"/></rule>
						// <rule id="etto_grm1"><token sym="あ">あ</token></rule>
						// <rule id="etto"><ruleref uri="etto_grm2"/></rule>
						// <rule id="etto_grm2"><token
						// sym="あの">あの</token></rule>

						String newid = _getUniqueId() + "_" + ruleid;

						VxmlRuleref nref = new VxmlRuleref();
						nref.setUri(newid);

						VxmlRule nrule = new VxmlRule();
						nrule.setId(ruleid);
						nrule.addContent(nref);
						debug("adding " + nrule.toString());
						ipasrg.addContent(nrule);

						_addRuleOfToken(ipasrg, newid, item);
					}
				}

				// // one-of の後に token が出てきたら無視する
				// break;

			} else if (rule.getContent(i) instanceof VxmlItem) {
				VxmlItem item = (VxmlItem) (rule.getContent(i));
				for (int j = 0; j < item.sizeContent(); j++) {
					if (item.getContent(j) instanceof VxmlToken) {
						debug("<rule> <item> <token>");
						VxmlToken token = (VxmlToken) (item.getContent(j));
						_addRuleOfToken(ipasrg, ruleid, token);
					}
				}
			}
		}
	}

	/**
	 * grammar version="1.0" と version="IPA-SRG-XML-1.0" との違い 同じ id
	 * の要素が複数存在しないこと
	 * 
	 * uri は外部ファイルを参照できる
	 */
	public VxmlGrammar makeIpaXml(VxmlGrammar gram) {
		VxmlGrammar ipasrg = new VxmlGrammar();
		debug("***** makeIpaXml(gram) gram " + gram.toString());

		for (int i = 0; i < gram.sizeContent(); i++) {
			if (gram.getContent(i) instanceof VxmlRule) {
				VxmlRule rule = (VxmlRule) (gram.getContent(i));
				_makeIpaXmlFromRule(ipasrg, rule);
			}
		}
		return ipasrg;
	}

	/**
	 * uri にフラグメントがない: grammar root="id" を見て root の Rule を返す 
	 * uri にフラグメントがある: その id の Rule を返す
	 */
	private VxmlRule _getRuleByUri(String uri) throws Exception {
		logger_.print("_getRuleByUri() " + uri);

		String subfile = Util.getUriWithoutFragment(uri);
		String fragment = Util.getUriFragment(uri);

		logger_.print("new VxmlGrammar(" + subfile + ")");
		VxmlGrammar src = new VxmlGrammar(subfile);
		VxmlRule newrule = new VxmlRule();

		for (int i = 0; i < src.sizeContent(); i++) {
			if (src.getContent(i) instanceof VxmlRule) {
				VxmlRule rule = (VxmlRule) (src.getContent(i));
				if (rule.getId().equals(fragment)) {
					logger_.print("_getRuleByUri() " + fragment + " found");
					newrule.setup(rule);
				}
			}
		}

		newrule.setId(fragment);
		return newrule;
	}

	/**
	 * grammar version="1.0" の外部参照を解決する
	 * 
	 * 再帰的に参照できる
	 */
	public void resolveExtRuleRef(VxmlGrammar dest, VxmlRule rule, String adrs,
			String org) throws Exception {
		// debug("***** resolveExtRuleRef(dest,rule,adrs,org) adrs=" + adrs +
		// " org=" + org);
		debug("***** resolveExtRuleRef(dest,rule,adrs,org) rule="
				+ rule.toString());
		VxmlRule newrule = new VxmlRule();
		newrule.setId(rule.getId());

		for (int j = 0; j < rule.sizeContent(); j++) {
			if (rule.getContent(j) instanceof VxmlOneOf) {
				VxmlOneOf oneof = (VxmlOneOf) (rule.getContent(j));
				VxmlOneOf newoneof = new VxmlOneOf();
				newoneof.setup(oneof);

				for (int k = 0; k < oneof.sizeItem(); k++) {
					VxmlItem item = oneof.getItem(k);

					for (int m = 0; m < item.sizeContent(); m++) {
						if (item.getContent(m) instanceof VxmlRuleref) {
							VxmlRuleref ref = (VxmlRuleref) (item.getContent(m));

							String uri = ref.getUri();

							if (uri == null)
								continue;
							logger_.print("adrs=" + adrs + " uri=" + uri);
							uri = Util.resolveAdrs(adrs, uri);

							logger_.print("org=" + org + " resolved=" + uri);

							if (!Util.isSameFile(org, uri)) {
								VxmlRule subrule = _getRuleByUri(uri);
								resolveExtRuleRef(dest, subrule, uri, org);

								VxmlRuleref r = (VxmlRuleref) (newoneof
										.getItem(k).getContent(m));
								r.setUri("#" + Util.getUriFragment(uri));
							}
						}
					}
				}
				newrule.addContent(newoneof);

			} else if (rule.getContent(j) instanceof VxmlRuleref) {
				// from : <ruleref uri="dialog.grxml#pref"/>
				// to : <one-of> <item> <ruleref uri="dialog.grxml#pref"/>
				// </item> </one-of>
				VxmlRuleref ref = (VxmlRuleref) (rule.getContent(j));
				String uri = ref.getUri();

				logger_.print("adrs=" + adrs);
				logger_.print("uri=" + uri);

				if (uri == null)
					continue;
				uri = Util.resolveAdrs(adrs, uri);

				logger_.print("uri org:" + org + " resolved:" + uri);

				VxmlOneOf newoneof = new VxmlOneOf();

				VxmlItem newitem = new VxmlItem();
				newoneof.addItem(newitem);

				VxmlRuleref newruleref = new VxmlRuleref();
				newitem.addContent(newruleref);

				if (!Util.isSameFile(org, uri)) {
					VxmlRule subrule = _getRuleByUri(uri);
					resolveExtRuleRef(dest, subrule, uri, org);
				}
				newruleref.setUri("#" + Util.getUriFragment(uri));

				newrule.addContent(newoneof);

			} else {
				newrule.addContent(rule.getContent(j));
			}
		}
		dest.addContent(newrule);
	}

	/**
	 * grammar version="1.0" の外部参照を解決する
	 * 
	 * <rule id="korega"> <one-of> <item> <ruleref uri="dialog.grxml#kore"/>
	 */
	public void resolveExtRuleRef(VxmlGrammar dest, VxmlGrammar src,
			String adrs, String org) throws Exception {
		for (int i = 0; i < src.sizeContent(); i++) {
			if (src.getContent(i) instanceof VxmlRule) {
				VxmlRule rule = (VxmlRule) (src.getContent(i));
				resolveExtRuleRef(dest, rule, adrs, org);
			} else {
				dest.addContent(src.getContent(i));
			}
		}
	}

	public VxmlGrammar makeIpaXml(VxmlGrammar gramXml, String file)
			throws Exception {
		VxmlGrammar src = new VxmlGrammar();
		debug("***** makeIpaXML(gramXml,file) gramXml " + gramXml.toString());
		resolveExtRuleRef(src, gramXml, file, file);
		debug("***** makeIpaXML(gramXml,file) src " + src.toString());
		VxmlGrammar ipasrg = makeIpaXml(src);
		GrammarUtil.convertToken(ipasrg);
		ipasrg.setVersion("IPA-XML-1.0");
		ipasrg.setRoot(gramXml.getRoot());
		return ipasrg;
	}

	/**
	 * 
	 */
	public static void main(String argv[]) throws Exception {
		String mkdfaDir = null;
		String encoding = null;
		boolean verbose = false;
		Getopt g = new Getopt("", argv, "d:e:V" /* "c:vpD:" */);
		g.setOpterr(false);
		int c;
		while ((c = g.getopt()) != -1) {
			switch (c) {
			case 'd':
				mkdfaDir = g.getOptarg();
				break;
			case 'e':
				encoding = g.getOptarg();
				break;
			case 'V':
				verbose = true;
				break;
			}
		}
		if (argv.length - g.getOptind() != 2) {
			info(galatea.dialog.DialogStudioVersion.COPYRIGHT);
			info(galatea.dialog.DialogStudioVersion.TSTAMP);
			info("GrammarMaker for Julius/Julian");
			info("Usage : java -cp gdm.jar galatea.io.julius.GrammarMaker [opt] gram dest");
			info("gram : xml grammar file");
			info("dest : target path + file_name without extentions");
			info("[opt]");
			info("-d dir : location of mkdfa.pl / mkfa etc.");
			info("-e enc : encoding for output");
			info("         default linux:euc-jp / windows:shift_jis");
			return;
		}
		String file = argv[g.getOptind()];
		String location = argv[g.getOptind() + 1];
		VxmlGrammar vg = new VxmlGrammar();
		if (verbose) {
			info(file);
		}
		vg.setup(file);
		String document = GrammarUtil.getJulianXmlGrammar(vg, file);
		document = document.replaceAll("<rule", "\n<rule").replaceAll(
				"\\<\\/rule", "\n</rule").replaceAll("\\<token", "\n<token")
				.replaceAll("\\<\\/grammar", "\n</grammar");
		Util.writeToFile(location + ".ipaxml", document, Charset
				.forName("utf-8"));
		if (verbose) {
			info(document);
		}
		GrammarTransformer gt = new GrammarTransformer();
		if (verbose) {
			gt.setVerbose(true);
		}
		if (mkdfaDir != null) {
			gt.setMkdfaDir(mkdfaDir);
		}
		if (encoding != null) {
			gt.setEncoding(encoding);
		}
		// debug("doTransform...");
		if (gt.doTransform(document, location) == false) {
			String msg = "GrammarTransformer doTransform " + location + "\n"
					+ "[err]\n" + gt.getErrors() + "[out]\n" + gt.getOutputs();
			System.out.print(msg);
			System.exit(-1);
		}
		if (gt.doCompile(document, location) == false) {
			String msg = "GrammarTransformer doCompile " + location + "\n"
					+ "[err]\n" + gt.getErrors() + "[out]\n" + gt.getOutputs();
			System.out.print(msg);
			System.exit(-1);
		}
		info("ok");
	}

}
