package generalCluster;

import clustering.ClusterData;
import clustering.PatternSimple;
import clustering.SDnorData;
import java.io.File;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import libs.IO;
import libs.Write;
import org.apache.commons.collections.primitives.ArrayIntList;
import sequences.FastaByteUC;
import sequences.FastaByteUtil;
import sequences.ReverseBases;
import stat.BasicStat;

/* loaded from: input_file:generalCluster/GeneralCluster.class */
public class GeneralCluster {
    public static String sdNorFile;

    public static void main(String[] strArr) {
        Vars.getParameters(strArr);
        if (Vars.pattern != null) {
            IO.log(Vars.log, 1, "Start word-cluster analysis with pattern:" + Vars.pattern, false);
            Vars.distMode = 1;
            IO.log(Vars.log, 1, "Set distance mode to 1 (start-start)", true);
            patternInput();
        }
    }

    public static void patternInput() {
        Set<String> patternSet = getPatternSet(Vars.pattern.split(":"), Vars.strand);
        Hashtable hashtable = new Hashtable();
        Set<String> chromosomes = FastaByteUtil.getChromosomes(Vars.genome);
        sdNorFile = String.valueOf(Vars.output) + File.separator + "CVnor.txt";
        String str = String.valueOf(Vars.output) + File.separator + "stat.txt";
        new File(sdNorFile).delete();
        new File(str).delete();
        if (Vars.chromStat) {
            Write.writeString(str, "chrom\tGC-content\tOE\tlength\ttotalContigLength\tpatternCount\tprobability\tanalysedBases", false);
        }
        for (String str2 : chromosomes) {
            FastaByteUC fastaByteUCZip = FastaByteUtil.getFastaByteUCZip(Vars.genome, str2);
            PatternSimple patternSimple = new PatternSimple(fastaByteUCZip, patternSet, Vars.distMode);
            Vars.chromStat = true;
            if (1 != 0) {
                double gc = fastaByteUCZip.getGC();
                double oe = fastaByteUCZip.getOE(patternSet);
                Write.writeString(str, String.valueOf(str2) + "\t" + gc + "\t" + oe + "\t" + fastaByteUCZip.getSequenceLength() + "\t" + fastaByteUCZip.getContigsLength() + "\t" + patternSimple.getCount() + "\t" + patternSimple.getProb() + "\t" + patternSimple.getAnalysedBases(), true);
                IO.log(Vars.log, 1, "Chromosome statistics for " + str2 + ": %GC: " + gc + "; OE: " + oe + "; length: " + fastaByteUCZip.getSeqLength() + "; total contig length: " + fastaByteUCZip.getContigsLength(), true);
            }
            hashtable.put(str2, patternSimple);
        }
        setDistanceProb(hashtable);
        String str3 = String.valueOf(Vars.output) + File.separator + "cluster.txt";
        ClusterData.writeHeader(str3);
        for (String str4 : hashtable.keySet()) {
            List<ClusterData> clusterList = ((PatternSimple) hashtable.get(str4)).getClusterList(((PatternSimple) hashtable.get(str4)).getDistanceThreshold(), Vars.plimit);
            ClusterData.getComposition(clusterList, FastaByteUtil.getFastaByteUCZip(Vars.genome, str4), patternSet);
            ClusterData.writeFile(clusterList, str3, str4);
            if (Vars.writedistribution && !Vars.genomeDist) {
                ((PatternSimple) hashtable.get(str4)).writeOutDistanceDistribution(String.valueOf(Vars.output) + File.separator + str4.split(":")[0] + ".distr", Vars.maxDistance);
            }
        }
    }

    public static void setDistanceProb(Map<String, PatternSimple> map) {
        if (Vars.genomeDist) {
            if (Vars.fixedDist > 0) {
                setGenomicFixedDist(map);
                return;
            } else if (Vars.percentil >= 0.0d) {
                setGenomicPercentile(map);
                return;
            } else {
                setGenomicIntersectionDistance(map);
                return;
            }
        }
        for (String str : map.keySet()) {
            PatternSimple patternSimple = map.get(str);
            if (Vars.fixedDist >= 0) {
                patternSimple.setDistance(Vars.fixedDist);
            } else if (Vars.percentil >= 0.0d) {
                patternSimple.setDistance(patternSimple.getDistPercentile(Vars.percentil));
            } else {
                patternSimple.setDistance(patternSimple.getMinMaxDistance()[0]);
            }
            IO.log(Vars.log, 1, "For chromosome " + str + " found total number of patterns: " + patternSimple.getCount(), true);
            IO.log(Vars.log, 1, "For chromosome " + str + " found total number of analysed bases: " + patternSimple.getAnalysedBases(), true);
            IO.log(Vars.log, 1, "For chromosome " + str + " found probability " + patternSimple.getProb() + "  and distance threshold " + patternSimple.getDistanceThreshold(), true);
            calcSDnor(patternSimple.getDistanceInt(), patternSimple.getProb(), Vars.pattern).writeToFile(sdNorFile, str);
        }
    }

    public static void setDistProb(Map<String, PatternSimple> map, int i, double d) {
        Iterator<String> it = map.keySet().iterator();
        while (it.hasNext()) {
            PatternSimple patternSimple = map.get(it.next());
            if (d >= 0.0d) {
                patternSimple.setProbability(d);
            }
            if (i >= 0) {
                patternSimple.setDistance(i);
            }
        }
    }

    public static void setGenomicFixedDist(Map<String, PatternSimple> map) {
        ArrayIntList arrayIntList = new ArrayIntList();
        int i = 0;
        double d = 0.0d;
        Iterator<String> it = map.keySet().iterator();
        while (it.hasNext()) {
            PatternSimple patternSimple = map.get(it.next());
            i += patternSimple.getCount();
            d += patternSimple.getAnalysedBases();
            arrayIntList.addAll(patternSimple.getDistance());
        }
        double d2 = i / d;
        IO.log(Vars.log, 1, "For genome-mode and fixed distance " + Vars.fixedDist + ": found total number of patterns: " + i, true);
        IO.log(Vars.log, 1, "For genome-mode and fixed distance " + Vars.fixedDist + ": found total number of analysed bases: " + d, true);
        IO.log(Vars.log, 1, "For genome-mode and fixed distance " + Vars.fixedDist + ": found probability of " + d2, true);
        setDistProb(map, Vars.fixedDist, d2);
        calcSDnor(convertListToArray(arrayIntList), d2, Vars.pattern).writeToFile(sdNorFile, "genome");
        if (Vars.writedistribution) {
            PatternSimple.writeOutDistanceDistribution(String.valueOf(Vars.output) + File.separator + "genome.distr", Vars.maxDistance, arrayIntList, d2);
        }
    }

    public static void setGenomicIntersectionDistance(Map<String, PatternSimple> map) {
        ArrayIntList arrayIntList = new ArrayIntList();
        int i = 0;
        double d = 0.0d;
        Iterator<String> it = map.keySet().iterator();
        while (it.hasNext()) {
            PatternSimple patternSimple = map.get(it.next());
            i += patternSimple.getCount();
            d += patternSimple.getAnalysedBases();
            arrayIntList.addAll(patternSimple.getDistance());
        }
        double d2 = i / d;
        int[] minMaxDistance = PatternSimple.getMinMaxDistance(arrayIntList, d2);
        IO.log(Vars.log, 1, "For genome-mode (intersection) : found total number of patterns: " + i, true);
        IO.log(Vars.log, 1, "For genome-mode (intersection) : found total number of analysed bases: " + d, true);
        IO.log(Vars.log, 1, "For genome-mode (intersection): found probability of " + d2 + " and threshold distance of " + minMaxDistance[0], true);
        setDistProb(map, minMaxDistance[0], d2);
        calcSDnor(convertListToArray(arrayIntList), d2, Vars.pattern).writeToFile(sdNorFile, "genome");
        if (Vars.writedistribution) {
            PatternSimple.writeOutDistanceDistribution(String.valueOf(Vars.output) + File.separator + "genome.distr", Vars.maxDistance, arrayIntList, d2);
        }
    }

    public static void setGenomicPercentile(Map<String, PatternSimple> map) {
        ArrayIntList arrayIntList = new ArrayIntList();
        int i = 0;
        double d = 0.0d;
        Iterator<String> it = map.keySet().iterator();
        while (it.hasNext()) {
            PatternSimple patternSimple = map.get(it.next());
            i += patternSimple.getCount();
            d += patternSimple.getAnalysedBases();
            arrayIntList.addAll(patternSimple.getDistance());
        }
        int[] iArr = new int[arrayIntList.size()];
        int size = arrayIntList.size();
        for (int i2 = 0; i2 < size; i2++) {
            iArr[i2] = arrayIntList.get(i2);
        }
        double d2 = i / d;
        int percentile = (int) (new BasicStat(iArr).percentile(Vars.percentil) + 0.5d);
        IO.log(Vars.log, 1, "For genome-mode and percentil " + Vars.percentil + ": found total number of patterns: " + i, true);
        IO.log(Vars.log, 1, "For genome-mode and percentil " + Vars.percentil + ": found total number of analysed bases: " + d, true);
        IO.log(Vars.log, 1, "For genome-mode and percentil " + Vars.percentil + ": found probability of " + d2 + " and threshold distance of " + percentile, true);
        setDistProb(map, percentile, d2);
        calcSDnor(iArr, d2, Vars.pattern).writeToFile(sdNorFile, "genome");
        if (Vars.writedistribution) {
            PatternSimple.writeOutDistanceDistribution(String.valueOf(Vars.output) + File.separator + "genome.distr", Vars.maxDistance, arrayIntList, d2);
        }
    }

    public static Set<String> getPatternSet(String[] strArr, String str) {
        HashSet hashSet = new HashSet();
        for (String str2 : strArr) {
            if (str.equals("-")) {
                hashSet.add(str2);
                hashSet.add(ReverseBases.getRevSeq(str2));
            } else {
                hashSet.add(str2);
            }
        }
        return hashSet;
    }

    public static SDnorData calcSDnor(int[] iArr, double d, String str) {
        if (iArr.length <= 0) {
            return null;
        }
        BasicStat basicStat = new BasicStat(normalize(iArr));
        double stdDev = basicStat.stdDev() / Math.sqrt(1.0d - d);
        int length = iArr.length + 1;
        return new SDnorData(str, length, Double.valueOf(stdDev), Double.valueOf((stdDev - (((2.0d * length) - 1.0d) / ((2.0d * length) + 2.0d))) / (1.0d / (Math.sqrt(length) * (1.0d + (2.8d * Math.pow(length, -2.865d)))))), d, basicStat.mean());
    }

    private static double[] normalize(int[] iArr) {
        double mean = new BasicStat(iArr).mean();
        double[] dArr = new double[iArr.length];
        for (int i = 0; i < iArr.length; i++) {
            dArr[i] = iArr[i] / mean;
        }
        return dArr;
    }

    private static int[] convertListToArray(ArrayIntList arrayIntList) {
        int[] iArr = new int[arrayIntList.size()];
        for (int i = 0; i < arrayIntList.size(); i++) {
            iArr[i] = arrayIntList.get(i);
        }
        return iArr;
    }
}
