/*
 * Decompiled with CFR 0.152.
 */
package org.apache.ctakes.coreference.ae.features;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.cleartk.ml.Feature;

public class StringMatchingFeatureExtractor
implements RelationFeaturesExtractor<IdentifiedAnnotation, IdentifiedAnnotation> {
    public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg1, IdentifiedAnnotation arg2) throws AnalysisEngineProcessException {
        ArrayList<Feature> feats = new ArrayList<Feature>();
        if (StringMatchingFeatureExtractor.isPronoun(arg1) || StringMatchingFeatureExtractor.isPronoun(arg2)) {
            return feats;
        }
        String s1 = arg1.getCoveredText();
        String s2 = arg2.getCoveredText();
        Set<String> words1 = StringMatchingFeatureExtractor.contentWords((Annotation)arg1);
        Set<String> words2 = StringMatchingFeatureExtractor.contentWords((Annotation)arg2);
        feats.add(new Feature("MATCH_EXACT", (Object)s1.equalsIgnoreCase(s2)));
        feats.add(new Feature("MATCH_START", (Object)StringMatchingFeatureExtractor.startMatch(s1, s2)));
        feats.add(new Feature("MATCH_END", (Object)StringMatchingFeatureExtractor.endMatch(s1, s2)));
        feats.add(new Feature("MATCH_SOON", (Object)StringMatchingFeatureExtractor.soonMatch(s1, s2)));
        feats.add(new Feature("MATCH_OVERLAP", (Object)StringMatchingFeatureExtractor.wordOverlap(words1, words2)));
        feats.add(new Feature("MATCH_SUBSTRING", (Object)StringMatchingFeatureExtractor.wordSubstring(words1, words2)));
        return feats;
    }

    public static boolean startMatch(String a, String b) {
        int ia = a.indexOf(" ");
        int ib = b.indexOf(" ");
        String aa = a.substring(0, ia == -1 ? (a.length() > 5 ? 5 : a.length()) : ia);
        String bb = b.substring(0, ib == -1 ? (b.length() > 5 ? 5 : b.length()) : ib);
        return aa.equalsIgnoreCase(bb);
    }

    public static boolean endMatch(String a, String b) {
        int ia = a.lastIndexOf(" ");
        int ib = b.lastIndexOf(" ");
        String aa = a.substring(ia == -1 ? (a.length() > 5 ? a.length() - 5 : 0) : ia + 1);
        String bb = b.substring(ib == -1 ? (b.length() > 5 ? b.length() - 5 : 0) : ib + 1);
        return aa.equalsIgnoreCase(bb);
    }

    public static boolean soonMatch(String s1, String s2) {
        String sl1 = StringMatchingFeatureExtractor.nonDetSubstr(s1.toLowerCase());
        String sl2 = StringMatchingFeatureExtractor.nonDetSubstr(s2.toLowerCase());
        return sl1.equals(sl2);
    }

    public static String nonDetSubstr(String s) {
        if (s.startsWith("the ")) {
            return s.substring(4);
        }
        if (s.startsWith("a ")) {
            return s.substring(2);
        }
        if (s.startsWith("this ")) {
            return s.substring(5);
        }
        if (s.startsWith("that ")) {
            return s.substring(5);
        }
        if (s.startsWith("these ")) {
            return s.substring(6);
        }
        if (s.startsWith("those ")) {
            return s.substring(6);
        }
        return s;
    }

    public static boolean wordOverlap(Set<String> t1, Set<String> t2) {
        for (String s : t2) {
            if (!t1.contains(s)) continue;
            return true;
        }
        return false;
    }

    public static boolean wordSubstring(Set<String> t1, Set<String> t2) {
        for (String s1 : t1) {
            for (String s2 : t2) {
                if (!s1.contains(s2) && !s2.contains(s1)) continue;
                return true;
            }
        }
        return false;
    }

    public static Set<String> contentWords(Annotation a1) {
        HashSet<String> words = new HashSet<String>();
        for (BaseToken tok : JCasUtil.selectCovered(BaseToken.class, (AnnotationFS)a1)) {
            words.add(tok.getCoveredText().toLowerCase());
        }
        return words;
    }

    public static boolean isPronoun(IdentifiedAnnotation a1) {
        List tokens = JCasUtil.selectCovered(BaseToken.class, (AnnotationFS)a1);
        if (tokens.size() != 1) {
            return false;
        }
        BaseToken token = (BaseToken)tokens.get(0);
        if (token.getPartOfSpeech() == null) {
            return false;
        }
        if (token.getPartOfSpeech().startsWith("PRP")) {
            return true;
        }
        return token.getPartOfSpeech().equals("DT");
    }

    public static boolean inQuote(JCas jcas, Annotation a) {
        int firstQuote;
        boolean inQuote = false;
        String docText = jcas.getDocumentText();
        int lastNewline = docText.lastIndexOf("\n", a.getBegin());
        if (lastNewline != 0 && (firstQuote = docText.indexOf(34, lastNewline)) != 0) {
            inQuote = true;
        }
        return inQuote;
    }
}

