/*
 * Decompiled with CFR 0.152.
 */
package org.musicbrainz.search.index;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import org.apache.commons.compress.archivers.ArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.similarities.Similarity;
import org.mozilla.universalchardet.UniversalDetector;
import org.musicbrainz.search.MbDocument;
import org.musicbrainz.search.index.DatabaseIndex;
import org.musicbrainz.search.index.FreeDBIndexField;
import org.musicbrainz.search.index.Index;
import org.musicbrainz.search.index.IndexField;
import org.musicbrainz.search.index.MetaIndexField;

public class FreeDBIndex
implements Index {
    private int emptyCount = 0;
    private int failedCount = 0;
    private Set<String> unknownCharsets = new HashSet<String>();
    private Map<String, CharsetDecoder> decoderMap = new HashMap<String, CharsetDecoder>();
    private Map<String, Integer> countMap = new TreeMap<String, Integer>();
    private static final String INDEX_SUFFIX = "_index";
    protected static String[] CATEGORIES = new String[]{"data", "folk", "jazz", "misc", "rock", "country", "blues", "newage", "reggae", "classical", "soundtrack"};
    protected File dumpFile;

    private void initDecoders() {
        CharsetDecoder decoder = Charset.forName("UTF8").newDecoder();
        decoder.onMalformedInput(CodingErrorAction.REPORT);
        decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
        this.decoderMap.put("UTF8", decoder);
        this.countMap.put("UTF8", 0);
        decoder = Charset.forName("ISO-8859-1").newDecoder();
        decoder.onMalformedInput(CodingErrorAction.REPORT);
        decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
        this.decoderMap.put("ISO-8859-1", decoder);
        this.countMap.put("ISO-8859-1", 0);
    }

    public FreeDBIndex() {
        this.initDecoders();
    }

    @Override
    public void addMetaInformation(IndexWriter indexWriter) throws IOException {
        MbDocument doc = new MbDocument();
        doc.addNumericField((IndexField)MetaIndexField.LAST_UPDATED, new Date().getTime());
        indexWriter.addDocument(doc.getLuceneDocument());
    }

    public File getDumpFile() {
        return this.dumpFile;
    }

    public void setDumpFile(File dumpFile) {
        this.dumpFile = dumpFile;
    }

    @Override
    public Analyzer getAnalyzer() {
        return DatabaseIndex.getAnalyzer(FreeDBIndexField.class);
    }

    @Override
    public String getName() {
        return "freedb";
    }

    @Override
    public String getFilename() {
        return this.getName() + INDEX_SUFFIX;
    }

    public void indexData(IndexWriter indexWriter) throws IOException {
        ArchiveEntry entry;
        BufferedInputStream fileInput = new BufferedInputStream(new FileInputStream(this.dumpFile));
        BZip2CompressorInputStream bzIn = new BZip2CompressorInputStream(fileInput);
        TarArchiveInputStream tarIn = new TarArchiveInputStream(bzIn);
        String category = "";
        boolean indexCategory = false;
        while ((entry = tarIn.getNextEntry()) != null) {
            if (entry.isDirectory()) {
                category = entry.getName().replace("/", "");
                indexCategory = Arrays.asList(CATEGORIES).contains(category);
                if (indexCategory) {
                    System.out.println("  Indexing category: " + category);
                    continue;
                }
                if (".".equals(category)) continue;
                System.out.println("  Skipping category: " + category);
                continue;
            }
            if (!indexCategory) continue;
            byte[] content = new byte[(int)entry.getSize()];
            int numBytesRead = tarIn.read(content, 0, (int)entry.getSize());
            if ((long)numBytesRead != entry.getSize()) {
                ++this.emptyCount;
                continue;
            }
            Document doc = this.documentFromFreeDBEntry(entry.getName(), category, content);
            if (doc == null) continue;
            indexWriter.addDocument(doc);
        }
        for (Map.Entry<String, Integer> charsetCounter : this.countMap.entrySet()) {
            System.out.println("No of " + charsetCounter.getKey() + " entries " + charsetCounter.getValue());
        }
        System.out.println("  No of empty entries " + this.emptyCount);
        System.out.println("  No of failed entries " + this.failedCount);
    }

    private String detectCharset(byte[] content) {
        UniversalDetector detector = new UniversalDetector(null);
        detector.handleData(content, 0, content.length);
        detector.dataEnd();
        String charSet = detector.getDetectedCharset();
        detector.reset();
        return charSet;
    }

    private Document parseEntryAndCreateDocument(String entryName, String category, byte[] content, CharsetDecoder cd, String charsetName) {
        MbDocument doc = new MbDocument();
        BufferedReader reader = new BufferedReader(new InputStreamReader((InputStream)new ByteArrayInputStream(content), cd));
        cd.reset();
        String title = "";
        String artist = "";
        String release = "";
        String discid = "";
        String year = "";
        String lastTrack = "";
        Integer numTracks = 0;
        try {
            String line;
            while ((line = reader.readLine()) != null) {
                if (line.startsWith("DTITLE=")) {
                    title = title + line.substring(7);
                }
                if (line.startsWith("DISCID=") && (discid = line.substring(7)).contains(",")) {
                    discid = discid.substring(0, discid.indexOf(",") - 1);
                }
                if (line.startsWith("DYEAR=")) {
                    year = line.substring(6);
                }
                if (!line.startsWith("TTITLE")) continue;
                lastTrack = line;
            }
        }
        catch (IOException e) {
            return null;
        }
        try {
            String[] tmp = title.split(" / ");
            if (tmp != null && tmp.length >= 2) {
                artist = tmp[0].trim();
                release = tmp[1].trim();
            }
            if ((tmp = lastTrack.split("="))[0].length() >= 7) {
                numTracks = new Integer(tmp[0].substring(6)) + 1;
            } else {
                System.err.println("Value of lastTrack cannot be parsed is:" + lastTrack);
            }
        }
        catch (Exception e) {
            System.err.println("  " + entryName + " Unable to determine no of tracks from " + lastTrack);
            e.printStackTrace();
            return null;
        }
        doc.addField((IndexField)FreeDBIndexField.ARTIST, artist);
        doc.addField((IndexField)FreeDBIndexField.TITLE, release);
        doc.addField((IndexField)FreeDBIndexField.DISCID, discid);
        doc.addField((IndexField)FreeDBIndexField.CATEGORY, category);
        doc.addField((IndexField)FreeDBIndexField.YEAR, year);
        doc.addField((IndexField)FreeDBIndexField.TRACKS, numTracks.toString());
        this.countMap.put(charsetName, this.countMap.get(charsetName) + 1);
        return doc.getLuceneDocument();
    }

    protected Document documentFromFreeDBEntry(String entryName, String category, byte[] content) {
        Document doc;
        Charset charset;
        CharsetDecoder decoder = null;
        String charsetName = this.detectCharset(content);
        if (charsetName != null && (decoder = this.decoderMap.get(charsetName)) == null && !this.unknownCharsets.contains(charsetName) && (charset = Charset.forName(charsetName)) != null) {
            CharsetDecoder charsetDecoder = charset.newDecoder();
            charsetDecoder.onMalformedInput(CodingErrorAction.REPORT);
            charsetDecoder.onUnmappableCharacter(CodingErrorAction.REPORT);
            this.decoderMap.put(charsetName, charsetDecoder);
            this.countMap.put(charsetName, 0);
            decoder = charsetDecoder;
        }
        if (decoder != null) {
            doc = this.parseEntryAndCreateDocument(entryName, category, content, decoder, charsetName);
            if (doc != null) {
                return doc;
            }
            return doc;
        }
        charsetName = "UTF8";
        doc = this.parseEntryAndCreateDocument(entryName, category, content, this.decoderMap.get(charsetName), charsetName);
        if (doc != null) {
            return doc;
        }
        charsetName = "ISO-8859-1";
        doc = this.parseEntryAndCreateDocument(entryName, category, content, this.decoderMap.get(charsetName), charsetName);
        if (doc != null) {
            return doc;
        }
        ++this.failedCount;
        return null;
    }

    @Override
    public Similarity getSimilarity() {
        return null;
    }
}

