/*
 * Decompiled with CFR 0.152.
 */
package com.hankcs.hanlp.mining.word2vec;

import com.hankcs.hanlp.mining.word2vec.Config;
import com.hankcs.hanlp.mining.word2vec.VocabWord;
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.Map;

public abstract class Corpus {
    protected File cacheFile;
    protected Config config;
    protected int trainWords = 0;
    protected int vocabSize;
    protected int vocabMaxSize = 1000;
    protected VocabWord[] vocab;
    protected Map<String, Integer> vocabIndexMap;
    protected boolean eoc = true;
    protected Charset encoding = Charset.forName("UTF-8");
    protected int[] table;

    public Corpus(Config config) throws IOException {
        this.config = config;
    }

    public Corpus(Corpus cloneSrc) throws IOException {
        this.trainWords = cloneSrc.trainWords;
        this.vocabSize = cloneSrc.vocabSize;
        this.vocab = cloneSrc.vocab;
        this.vocabIndexMap = cloneSrc.vocabIndexMap;
        this.table = cloneSrc.table;
    }

    public boolean endOfCorpus() {
        return this.eoc;
    }

    protected int addWordToVocab(String word) {
        this.vocab[this.vocabSize] = new VocabWord(word);
        ++this.vocabSize;
        if (this.vocabSize + 2 >= this.vocabMaxSize) {
            this.vocabMaxSize += 1000;
            VocabWord[] temp = new VocabWord[this.vocabMaxSize];
            System.arraycopy(this.vocab, 0, temp, 0, this.vocabSize);
            this.vocab = temp;
        }
        this.vocabIndexMap.put(word, this.vocabSize - 1);
        return this.vocabSize - 1;
    }

    public int getTrainWords() {
        return this.trainWords;
    }

    public int getVocabSize() {
        return this.vocabSize;
    }

    public VocabWord[] getVocab() {
        return this.vocab;
    }

    public Map<String, Integer> getVocabIndexMap() {
        return this.vocabIndexMap;
    }

    public void rewind(int numThreads, int id) throws IOException {
        this.eoc = false;
    }

    public int readWordIndex() throws IOException {
        String word = this.nextWord();
        if (word == null) {
            if (this.eoc) {
                return -2;
            }
            return -3;
        }
        return this.searchVocab(word);
    }

    public abstract String nextWord() throws IOException;

    public void close() throws IOException {
        this.shutdown();
        this.cacheFile.delete();
    }

    public void shutdown() throws IOException {
        this.table = null;
    }

    int searchVocab(String word) {
        if (word == null) {
            return -1;
        }
        Integer pos = this.vocabIndexMap.get(word);
        return pos == null ? -1 : pos;
    }

    void sortVocab() {
        Arrays.sort(this.vocab, 0, this.vocabSize);
        int size = this.vocabSize;
        this.trainWords = 0;
        this.table = new int[size];
        for (int i = 0; i < size; ++i) {
            VocabWord word = this.vocab[i];
            if (word.cn < this.config.getMinCount()) {
                this.table[this.vocabIndexMap.get((Object)word.word).intValue()] = -4;
                --this.vocabSize;
                continue;
            }
            this.table[this.vocabIndexMap.get((Object)word.word).intValue()] = i;
            this.setVocabIndexMap(word, i);
        }
        this.vocabIndexMap = null;
        VocabWord[] nvocab = new VocabWord[this.vocabSize];
        System.arraycopy(this.vocab, 0, nvocab, 0, this.vocabSize);
    }

    void setVocabIndexMap(VocabWord src, int pos) {
        this.trainWords += src.cn;
    }

    void createBinaryTree() {
        int i;
        int[] point = new int[40];
        char[] code = new char[40];
        int[] count = new int[this.vocabSize * 2 + 1];
        char[] binary = new char[this.vocabSize * 2 + 1];
        int[] parentNode = new int[this.vocabSize * 2 + 1];
        for (i = 0; i < this.vocabSize; ++i) {
            count[i] = this.vocab[i].cn;
        }
        for (i = this.vocabSize; i < this.vocabSize * 2; ++i) {
            count[i] = Integer.MAX_VALUE;
        }
        int pos1 = this.vocabSize - 1;
        int pos2 = this.vocabSize;
        for (int i2 = 0; i2 < this.vocabSize - 1; ++i2) {
            int min1i = pos1 >= 0 ? (count[pos1] < count[pos2] ? pos1-- : pos2++) : pos2++;
            int min2i = pos1 >= 0 ? (count[pos1] < count[pos2] ? pos1-- : pos2++) : pos2++;
            count[this.vocabSize + i2] = count[min1i] + count[min2i];
            parentNode[min1i] = this.vocabSize + i2;
            parentNode[min2i] = this.vocabSize + i2;
            binary[min2i] = '\u0001';
        }
        for (int j = 0; j < this.vocabSize; ++j) {
            int k = j;
            int i3 = 0;
            do {
                code[i3] = binary[k];
                point[i3] = k;
                ++i3;
            } while ((k = parentNode[k]) != this.vocabSize * 2 - 2);
            this.vocab[j].codelen = i3;
            this.vocab[j].point[0] = this.vocabSize - 2;
            for (k = 0; k < i3; ++k) {
                this.vocab[j].code[i3 - k - 1] = code[k];
                this.vocab[j].point[i3 - k] = point[k] - this.vocabSize;
            }
        }
    }
}

