package smart_gs.transcription_tool.tesseract;

import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class HocrLine extends HocrElement{
	
	private List<HocrWord> words;

	@Override
	public String getText() {
		StringBuffer buf = new StringBuffer();
		for (HocrWord w : this.words) {
			if(buf.toString() != ""){
				buf.append(" ");
			}
			buf.append(w.getText());
		}
		return buf.toString();
	}

	public List<HocrWord> getWords() {
		return words;
	}

	public void setWords(List<HocrWord> words) {
		this.words = words;
	}
	
	public int getLineNum () {
		int lineNum = 0;
		String regex = "line_(\\d+)";
		Pattern p = Pattern.compile(regex);
		Matcher m = p.matcher(this.id);
		if(m.find()){
			lineNum = Integer.parseInt(m.group(1))-1;
		}
		return lineNum;
	}
	
	
	public void addWord(HocrWord word) {
		this.words.add(word);
	}
	
	public String toString() {
		return "HocrLine[text=" + this.getText() + "]";
	}

}
