/*
 * Decompiled with CFR 0.152.
 */
package org.apache.ctakes.dictionary.lookup2.ae;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.core.util.collection.CollectionMap;
import org.apache.ctakes.dictionary.lookup2.ae.AbstractJCasTermAnnotator;
import org.apache.ctakes.dictionary.lookup2.dictionary.RareWordDictionary;
import org.apache.ctakes.dictionary.lookup2.term.RareWordTerm;
import org.apache.ctakes.dictionary.lookup2.textspan.DefaultTextSpan;
import org.apache.ctakes.dictionary.lookup2.textspan.MultiTextSpan;
import org.apache.ctakes.dictionary.lookup2.textspan.TextSpan;
import org.apache.ctakes.dictionary.lookup2.util.FastLookupToken;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.resource.ResourceInitializationException;

@PipeBitInfo(name="Dictionary Lookup (Overlap)", description="Annotates clinically-relevant terms.  Terms can overlap dictionary entries.", dependencies={PipeBitInfo.TypeProduct.SENTENCE, PipeBitInfo.TypeProduct.BASE_TOKEN}, products={PipeBitInfo.TypeProduct.IDENTIFIED_ANNOTATION})
public final class OverlapJCasTermAnnotator
extends AbstractJCasTermAnnotator {
    private final Logger _logger = Logger.getLogger((String)"OverlapJCasTermAnnotator");
    private static final String CONS_SKIP_PRP_KEY = "consecutiveSkips";
    private static final String TOTAL_SKIP_PRP_KEY = "totalTokenSkips";
    @ConfigurationParameter(name="consecutiveSkips", mandatory=false, description="Number of consecutive non-comma tokens that can be skipped")
    private int _consecutiveSkipMax = 2;
    @ConfigurationParameter(name="totalTokenSkips", mandatory=false, description="Number of total tokens that can be skipped")
    private int _totalSkipMax = 4;

    @Override
    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        this._logger.info((Object)("Maximum consecutive tokens that can be skipped: " + this._consecutiveSkipMax));
        this._logger.info((Object)("Maximum tokens that can be skipped: " + this._totalSkipMax));
    }

    @Override
    public void findTerms(RareWordDictionary dictionary, List<FastLookupToken> allTokens, List<Integer> lookupTokenIndices, CollectionMap<TextSpan, Long, ? extends Collection<Long>> termsFromDictionary) {
        for (Integer lookupTokenIndex : lookupTokenIndices) {
            FastLookupToken lookupToken = allTokens.get(lookupTokenIndex);
            Collection<RareWordTerm> rareWordHits = dictionary.getRareWordHits(lookupToken);
            if (rareWordHits == null || rareWordHits.isEmpty()) continue;
            for (RareWordTerm rareWordHit : rareWordHits) {
                TextSpan overlapSpan;
                if (rareWordHit.getText().length() < this._minimumLookupSpan) continue;
                if (rareWordHit.getTokenCount() == 1) {
                    termsFromDictionary.placeValue((Object)lookupToken.getTextSpan(), (Object)rareWordHit.getCuiCode());
                    continue;
                }
                int termStartIndex = lookupTokenIndex - rareWordHit.getRareWordIndex();
                if (termStartIndex < 0 || termStartIndex + rareWordHit.getTokenCount() > allTokens.size() || (overlapSpan = OverlapJCasTermAnnotator.getOverlapTerm(allTokens, lookupTokenIndex, rareWordHit, this._consecutiveSkipMax, this._totalSkipMax)) == null) continue;
                termsFromDictionary.placeValue((Object)overlapSpan, (Object)rareWordHit.getCuiCode());
            }
        }
    }

    private static TextSpan getOverlapTerm(List<FastLookupToken> allTokens, int lookupTokenIndex, RareWordTerm rareWordHit, int consecutiveSkipMax, int totalSkipMax) {
        String[] hitTokens = rareWordHit.getTokens();
        ArrayList<TextSpan> missingSpanKeys = new ArrayList<TextSpan>();
        int consecutiveSkips = 0;
        int totalSkips = 0;
        int firstWordIndex = -1;
        if (rareWordHit.getRareWordIndex() == 0) {
            firstWordIndex = lookupTokenIndex;
        } else {
            int nextRareWordIndex = rareWordHit.getRareWordIndex() - 1;
            for (int allTokensIndex = lookupTokenIndex - 1; allTokensIndex >= 0; --allTokensIndex) {
                if (hitTokens[nextRareWordIndex].equals(allTokens.get(allTokensIndex).getText()) || hitTokens[nextRareWordIndex].equals(allTokens.get(allTokensIndex).getVariant())) {
                    if (--nextRareWordIndex < 0) {
                        firstWordIndex = allTokensIndex;
                        break;
                    }
                    consecutiveSkips = 0;
                    continue;
                }
                missingSpanKeys.add(allTokens.get(allTokensIndex).getTextSpan());
                if (!allTokens.get(allTokensIndex).getText().equals(",") && ++consecutiveSkips > consecutiveSkipMax || ++totalSkips > totalSkipMax) break;
            }
            if (firstWordIndex == -1) {
                return null;
            }
        }
        int lastWordIndex = -1;
        if (rareWordHit.getRareWordIndex() == rareWordHit.getTokenCount() - 1) {
            lastWordIndex = lookupTokenIndex;
        } else {
            consecutiveSkips = 0;
            int nextRareWordIndex = rareWordHit.getRareWordIndex() + 1;
            for (int allTokensIndex = lookupTokenIndex + 1; allTokensIndex < allTokens.size(); ++allTokensIndex) {
                if (hitTokens[nextRareWordIndex].equals(allTokens.get(allTokensIndex).getText()) || hitTokens[nextRareWordIndex].equals(allTokens.get(allTokensIndex).getVariant())) {
                    if (++nextRareWordIndex >= rareWordHit.getTokenCount()) {
                        lastWordIndex = allTokensIndex;
                        break;
                    }
                    consecutiveSkips = 0;
                    continue;
                }
                missingSpanKeys.add(allTokens.get(allTokensIndex).getTextSpan());
                if (++consecutiveSkips > consecutiveSkipMax || ++totalSkips > totalSkipMax) break;
            }
            if (lastWordIndex == -1) {
                return null;
            }
        }
        if (missingSpanKeys.isEmpty()) {
            return new DefaultTextSpan(allTokens.get(firstWordIndex).getStart(), allTokens.get(lastWordIndex).getEnd());
        }
        return new MultiTextSpan(allTokens.get(firstWordIndex).getStart(), allTokens.get(lastWordIndex).getEnd(), missingSpanKeys);
    }

    public static AnalysisEngineDescription createAnnotatorDescription() throws ResourceInitializationException {
        return AnalysisEngineFactory.createEngineDescription(OverlapJCasTermAnnotator.class, (Object[])new Object[0]);
    }

    public static AnalysisEngineDescription createAnnotatorDescription(String descriptorPath) throws ResourceInitializationException {
        return AnalysisEngineFactory.createEngineDescription(OverlapJCasTermAnnotator.class, (Object[])new Object[]{"DictionaryDescriptor", descriptorPath});
    }

    public static AnalysisEngineDescription createAnnotatorDescription(String descriptorPath, int consecutiveSkipMax, int totalSkipMax) throws ResourceInitializationException {
        return AnalysisEngineFactory.createEngineDescription(OverlapJCasTermAnnotator.class, (Object[])new Object[]{"DictionaryDescriptor", descriptorPath, CONS_SKIP_PRP_KEY, consecutiveSkipMax, TOTAL_SKIP_PRP_KEY, totalSkipMax});
    }
}

