/*
 * Decompiled with CFR 0.152.
 */
package org.apache.ctakes.core.ae;

import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.ctakes.core.ae.TokenConverter;
import org.apache.ctakes.core.nlp.tokenizer.Token;
import org.apache.ctakes.core.nlp.tokenizer.Tokenizer;
import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.core.resource.StringIntegerMapResource;
import org.apache.ctakes.core.util.ParamUtil;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.JFSIndexRepository;
import org.apache.uima.resource.ResourceAccessException;
import org.apache.uima.resource.ResourceInitializationException;

@PipeBitInfo(name="Tokenizer", description="Annotates Document Tokens.", dependencies={PipeBitInfo.TypeProduct.SECTION}, products={PipeBitInfo.TypeProduct.BASE_TOKEN})
public class TokenizerAnnotator
extends JCasAnnotator_ImplBase {
    private Logger logger = Logger.getLogger((String)((Object)((Object)this)).getClass().getName());
    public static final int TOKEN_CAP_NONE = 0;
    public static final int TOKEN_CAP_FIRST_ONLY = 1;
    public static final int TOKEN_CAP_MIXED = 2;
    public static final int TOKEN_CAP_ALL = 3;
    public static final int TOKEN_NUM_POS_NONE = 0;
    public static final int TOKEN_NUM_POS_FIRST = 1;
    public static final int TOKEN_NUM_POS_MIDDLE = 2;
    public static final int TOKEN_NUM_POS_LAST = 3;
    public static final int TOKEN_NUM_TYPE_UNKNOWN = 0;
    public static final int TOKEN_NUM_TYPE_INTEGER = 1;
    public static final int TOKEN_NUM_TYPE_DECIMAL = 2;
    public static final String PARAM_SEGMENTS_TO_SKIP = "SegmentsToSkip";
    private final String HYPH_FREQ_TABLE_RESRC_KEY = "HyphFreqTable";
    private UimaContext context;
    private Set<String> skipSegmentsSet;
    private Tokenizer tokenizer;
    private int tokenCount = 0;

    public void initialize(UimaContext aContext) throws ResourceInitializationException {
        super.initialize(aContext);
        this.context = aContext;
        try {
            this.configInit();
        }
        catch (ResourceAccessException ace) {
            throw new ResourceInitializationException((Throwable)ace);
        }
    }

    private void configInit() throws ResourceAccessException {
        this.skipSegmentsSet = ParamUtil.getStringParameterValuesSet(PARAM_SEGMENTS_TO_SKIP, this.context);
        int freqCutoff = (Integer)this.context.getConfigParameterValue("FreqCutoff");
        StringIntegerMapResource strIntMapResrc = (StringIntegerMapResource)this.context.getResourceObject("HyphFreqTable");
        if (strIntMapResrc == null) {
            this.logger.warn((Object)"Unable to locate resource with key=HyphFreqTable.  Proceeding without hyphenation support.");
            this.tokenizer = new Tokenizer();
        } else {
            this.logger.info((Object)("Hyphen dictionary: " + strIntMapResrc.toString()));
            Map<String, Integer> hyphMap = strIntMapResrc.getMap();
            this.tokenizer = new Tokenizer(hyphMap, freqCutoff);
        }
    }

    public void process(JCas jcas) throws AnalysisEngineProcessException {
        this.logger.info((Object)"process(JCas)");
        this.tokenCount = 0;
        JFSIndexRepository indexes = jcas.getJFSIndexRepository();
        for (Segment sa : indexes.getAnnotationIndex(Segment.type)) {
            String segmentID = sa.getId();
            if (this.skipSegmentsSet.contains(segmentID)) continue;
            this.annotateRange(jcas, sa.getBegin(), sa.getEnd());
        }
    }

    protected void annotateRange(JCas jcas, int beginPos, int endPos) throws AnalysisEngineProcessException {
        String text = jcas.getDocumentText().substring(beginPos, endPos);
        List<Token> tokens = null;
        try {
            tokens = this.tokenizer.tokenizeAndSort(text);
        }
        catch (Exception e) {
            throw new AnalysisEngineProcessException((Throwable)e);
        }
        for (Token token : tokens) {
            BaseToken bta = TokenConverter.convert(token, jcas, beginPos);
            bta.setTokenNumber(this.tokenCount);
            bta.addToIndexes();
            ++this.tokenCount;
        }
    }
}

