/*
 * Decompiled with CFR 0.152.
 */
package org.cleartk.corpus.conll2003;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.List;
import org.apache.uima.UimaContext;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.fit.component.JCasCollectionReader_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.SofaCapability;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.FSArray;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;
import org.cleartk.ne.type.Chunk;
import org.cleartk.ne.type.NamedEntity;
import org.cleartk.ne.type.NamedEntityMention;
import org.cleartk.token.type.Sentence;
import org.cleartk.token.type.Token;
import org.cleartk.util.ViewUriUtil;

@SofaCapability(outputSofas={"UriView"})
public class Conll2003GoldReader
extends JCasCollectionReader_ImplBase {
    public static final String PARAM_DATA_FILE_NAME = "dataFileName";
    @ConfigurationParameter(name="dataFileName", mandatory=true, description="Points to CoNLL data (e.g. ner/eng.train).")
    private String dataFileName;
    public static final String PARAM_LOAD_NAMED_ENTITIES = "loadNamedEntities";
    @ConfigurationParameter(name="loadNamedEntities", mandatory=true, description="determines if the named entities are loaded (i.e. named entity mention annotations are created) or if just plain text from the files is loaded.", defaultValue={"true"})
    private boolean loadNamedEntities;
    public static final String DOCSTART = "-DOCSTART-";
    BufferedReader reader;
    boolean hasNext = true;
    int documentIndex = 0;
    int entityIdIndex = 0;
    List<String> documentData;
    StringBuffer documentText;
    int sentenceStart;
    List<Token> sentenceTokens;
    List<Chunk> sentenceChunks;
    int tokenPosition;
    int chunkStart;
    String currentChunkType;
    List<Token> chunkTokens;
    int namedEntityStart;
    String currentNamedEntityType;
    List<Token> namedEntityTokens;

    public void initialize(UimaContext context) throws ResourceInitializationException {
        try {
            String line;
            File conllFile = new File(this.dataFileName);
            this.reader = new BufferedReader(new FileReader(conllFile));
            while ((line = this.reader.readLine()) != null) {
                if (!line.trim().startsWith(DOCSTART)) continue;
                this.reader.readLine();
                break;
            }
            this.sentenceTokens = new ArrayList<Token>();
            this.sentenceChunks = new ArrayList<Chunk>();
            this.chunkTokens = new ArrayList<Token>();
            this.namedEntityTokens = new ArrayList<Token>();
        }
        catch (FileNotFoundException fnfe) {
            throw new ResourceInitializationException((Throwable)fnfe);
        }
        catch (IOException ioe) {
            throw new ResourceInitializationException((Throwable)ioe);
        }
    }

    public void getNext(JCas jCas) throws IOException, CollectionException {
        URI uri;
        String line;
        this.documentData = new ArrayList<String>();
        while ((line = this.reader.readLine()) != null && !line.startsWith(DOCSTART)) {
            this.documentData.add(line.trim());
        }
        if (line == null) {
            this.hasNext = false;
        } else {
            line = this.reader.readLine().trim();
        }
        this.documentText = new StringBuffer();
        this.initSentence();
        this.tokenPosition = 0;
        this.chunkStart = 0;
        this.currentChunkType = "";
        this.chunkTokens.clear();
        this.namedEntityStart = 0;
        this.currentNamedEntityType = "";
        this.namedEntityTokens.clear();
        for (String dataLine : this.documentData) {
            if (dataLine.trim().equals("")) {
                this.createChunk(jCas);
                this.currentChunkType = "";
                this.createNamedEntity(jCas);
                this.currentNamedEntityType = "";
                Sentence sentence = new Sentence(jCas, this.sentenceStart, this.documentText.length());
                sentence.addToIndexes();
                this.initSentence();
                continue;
            }
            String[] dataPieces = dataLine.split(" ");
            String tok = dataPieces[0];
            String pos = dataPieces[1];
            String chunkType = dataPieces[2];
            if (this.currentChunkType.equals("")) {
                this.initChunk(chunkType);
            }
            String namedEntityType = dataPieces[3];
            if (this.currentNamedEntityType.equals("")) {
                this.initNamedEntity(namedEntityType);
            }
            Token token = new Token(jCas, this.documentText.length(), this.documentText.length() + tok.length());
            token.setPos(pos);
            token.addToIndexes();
            boolean chunkStartsWithB = this.startsWithB(this.currentChunkType, chunkType);
            if (!chunkType.equals(this.currentChunkType) && !chunkStartsWithB) {
                this.createChunk(jCas);
                this.initChunk(chunkType);
            }
            boolean namedEntityStartsWithB = this.startsWithB(this.currentNamedEntityType, namedEntityType);
            if (!namedEntityType.equals(this.currentNamedEntityType) && !namedEntityStartsWithB) {
                this.createNamedEntity(jCas);
                this.initNamedEntity(namedEntityType);
            }
            this.sentenceTokens.add(token);
            this.chunkTokens.add(token);
            this.namedEntityTokens.add(token);
            this.documentText.append(tok + " ");
        }
        jCas.setDocumentText(this.documentText.toString());
        URI fileURI = new File(this.dataFileName).toURI();
        String fragment = String.valueOf(this.documentIndex);
        try {
            uri = new URI(fileURI.getScheme(), fileURI.getHost(), fileURI.getPath(), fragment);
        }
        catch (URISyntaxException e) {
            throw new RuntimeException(e);
        }
        ViewUriUtil.setURI((JCas)jCas, (URI)uri);
        ++this.documentIndex;
    }

    private void initSentence() {
        this.sentenceStart = this.documentText.length();
        this.sentenceTokens.clear();
        this.sentenceChunks.clear();
    }

    private void createChunk(JCas jCas) {
        if (!this.currentChunkType.equals("O")) {
            Chunk chunk = new Chunk(jCas, this.chunkStart, this.documentText.length() - 1);
            chunk.setChunkType(this.currentChunkType.substring(2));
            chunk.addToIndexes();
            this.sentenceChunks.add(chunk);
        }
    }

    private void initChunk(String chunkType) {
        this.chunkStart = this.documentText.length();
        this.chunkTokens.clear();
        this.currentChunkType = chunkType;
    }

    private void createNamedEntity(JCas jCas) {
        if (!this.currentNamedEntityType.equals("O") && this.loadNamedEntities) {
            NamedEntity ne = new NamedEntity(jCas);
            ne.setEntityClass("SPC");
            ne.setEntityId("" + this.entityIdIndex++);
            ne.setEntityType(this.currentNamedEntityType.substring(2));
            ne.setEntitySubtype(this.currentNamedEntityType.substring(2));
            ne.addToIndexes();
            NamedEntityMention nem = new NamedEntityMention(jCas, this.namedEntityStart, this.documentText.length() - 1);
            nem.setMentionType("NAM");
            Annotation annotation = new Annotation(jCas, this.namedEntityStart, this.documentText.length() - 1);
            annotation.addToIndexes();
            nem.setAnnotation(annotation);
            nem.setHead(annotation);
            nem.setMentionedEntity(ne);
            nem.addToIndexes();
            ne.setMentions(new FSArray(jCas, 1));
            ne.setMentions(0, nem);
        }
    }

    private void initNamedEntity(String namedEntityType) {
        this.namedEntityStart = this.documentText.length();
        this.namedEntityTokens.clear();
        this.currentNamedEntityType = namedEntityType;
    }

    private boolean startsWithB(String bType, String iType) {
        return bType.startsWith("B") && iType.startsWith("I") && iType.substring(1).equals(bType.substring(1));
    }

    public void close() throws IOException {
        this.reader.close();
    }

    public Progress[] getProgress() {
        return new Progress[]{new ProgressImpl(this.documentIndex, 5000, "entities")};
    }

    public boolean hasNext() throws IOException, CollectionException {
        return this.hasNext;
    }

    public void setDataFileName(String dataFileName) {
        this.dataFileName = dataFileName;
    }

    public void setLoadNamedEntities(boolean loadNamedEntities) {
        this.loadNamedEntities = loadNamedEntities;
    }
}

