package com.appian.documentunderstanding.prediction.keyvalue;

import com.appian.documentunderstanding.boundingbox.JaccardIndexComparer;
import com.appian.documentunderstanding.common.DocumentExtractionFeatureToggles;
import com.appian.documentunderstanding.common.DocumentExtractionMetricConstants;
import com.appian.documentunderstanding.function.OcrJobContext;
import com.appian.documentunderstanding.populate.KeyData;
import com.appian.documentunderstanding.prediction.DocumentUnderstandingAbstractEsPredictionService;
import com.appian.documentunderstanding.prediction.PredictionType;
import com.appian.documentunderstanding.prediction.SearchRequestExecutor;
import com.appian.documentunderstanding.prediction.datatypes.CustomDatatype;
import com.appian.documentunderstanding.prediction.datatypes.CustomFieldType;
import com.appian.documentunderstanding.prediction.keyvalue.ReconciledEntryEsBridge;
import com.appian.documentunderstanding.prediction.metrics.DocExtractPredictionMetricsCollector;
import com.appiancorp.common.monitoring.ProductMetricsAggregatedDataCollector;
import com.appiancorp.core.expr.portable.assertions.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.gson.Gson;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.index.query.DisMaxQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.sort.SortBuilders;
import org.elasticsearch.search.sort.SortOrder;

/* loaded from: input_file:com/appian/documentunderstanding/prediction/keyvalue/DocumentUnderstandingKvpPlusPositionEsPredictionService.class */
public class DocumentUnderstandingKvpPlusPositionEsPredictionService extends DocumentUnderstandingAbstractEsPredictionService {
    private static final int EXACT_MATCH_TEXT_MAX_LENGTH = 500;
    private static final int EXACT_MATCH_TEXTS_LIMIT = 1000;
    private static final int FUZZY_MATCH_TEXTS_LIMIT = 1000;
    private static final int FUZZY_MATCH_TEXT_MAX_LENGTH = 50;
    private static final int FUZZY_MATCH_TEXT_MIN_LENGTH = 3;
    private static final String[] SOURCE_FETCH_INCLUDES = {ReconciledEntryEsBridge.Field.cdtFieldName.name(), ReconciledEntryEsBridge.Field.reconciledKey.name(), ReconciledEntryEsBridge.Field.counter.name(), ReconciledEntryEsBridge.Field.jsonBlob.name()};
    private SearchRequestExecutor searchRequestExecutor;

    public DocumentUnderstandingKvpPlusPositionEsPredictionService(SearchRequestExecutor searchRequestExecutor, DocExtractPredictionMetricsCollector docExtractPredictionMetricsCollector, DocumentExtractionFeatureToggles documentExtractionFeatureToggles) {
        super(PredictionType.KVP_POSITION, docExtractPredictionMetricsCollector, documentExtractionFeatureToggles);
        this.searchRequestExecutor = searchRequestExecutor;
    }

    @Override // com.appian.documentunderstanding.prediction.PredictionService
    public Map<String, Collection<KeyData>> getPredictions(CustomDatatype customDatatype, Set<String> set, Collection<KeyData> collection, boolean z) {
        Preconditions.checkNotNull(customDatatype, "type");
        Preconditions.checkNotNull(collection, "detectedKeys");
        ImmutableMap.Builder<String, Collection<KeyData>> builder = ImmutableMap.builder();
        String unversionedQName = customDatatype.getUnversionedQName();
        Set<String> hashSet = set != null ? new HashSet<>(set) : customDatatype.getFieldNames(CustomFieldType.STRING, CustomFieldType.BOOLEAN);
        if (hashSet.isEmpty()) {
            return Collections.emptyMap();
        }
        Set<KeyData> filterKeysForExactMatch = filterKeysForExactMatch(collection);
        if (filterKeysForExactMatch.isEmpty()) {
            return Collections.emptyMap();
        }
        Set<String> set2 = (Set) filterKeysForExactMatch.stream().map((v0) -> {
            return v0.getKey();
        }).collect(Collectors.toSet());
        Map<String, Set<KeyData>> mapOfKeyToKeyDataList = getMapOfKeyToKeyDataList(collection);
        getExactMatches(builder, unversionedQName, hashSet, set2, mapOfKeyToKeyDataList);
        if (z) {
            if (hashSet.isEmpty()) {
                return builder.build();
            }
            Set<String> filterKeysForFuzzyMatch = filterKeysForFuzzyMatch(collection, set2);
            if (filterKeysForFuzzyMatch.isEmpty()) {
                return builder.build();
            }
            getFuzzyMatches(builder, unversionedQName, hashSet, mapOfKeyToKeyDataList, filterKeysForFuzzyMatch);
        }
        return builder.build();
    }

    private void getFuzzyMatches(ImmutableMap.Builder<String, Collection<KeyData>> builder, String str, Set<String> set, Map<String, Set<KeyData>> map, Set<String> set2) {
        AtomicInteger atomicInteger = new AtomicInteger();
        SearchResponse searchFuzzy = searchFuzzy(str, set, set2);
        HashSet hashSet = new HashSet();
        List<SearchHit> allTopHits = getAllTopHits(searchFuzzy);
        allTopHits.sort((searchHit, searchHit2) -> {
            return compareSearchHits(searchHit.getSourceAsMap(), searchHit2.getSourceAsMap());
        });
        allTopHits.forEach(searchHit3 -> {
            Map sourceAsMap = searchHit3.getSourceAsMap();
            String str2 = (String) sourceAsMap.get(ReconciledEntryEsBridge.Field.cdtFieldName.name());
            if (hashSet.contains(str2)) {
                return;
            }
            String str3 = searchHit3.getMatchedQueries()[0];
            Object obj = sourceAsMap.get(ReconciledEntryEsBridge.Field.jsonBlob.name());
            if (addBestMatchIfAvailable(str2, builder, map, str3, obj != null ? (KeyValueAnnotation) new Gson().fromJson(obj.toString(), KeyValueAnnotation.class) : null, set, atomicInteger)) {
                hashSet.add(str2);
            }
        });
        ProductMetricsAggregatedDataCollector.recordData(DocumentExtractionMetricConstants.DE_MAPPING_KEYS_ESMATCH_KVP_PLUS_FUZZY, atomicInteger.get());
    }

    private void getExactMatches(ImmutableMap.Builder<String, Collection<KeyData>> builder, String str, Set<String> set, Set<String> set2, Map<String, Set<KeyData>> map) {
        AtomicInteger atomicInteger = new AtomicInteger();
        SearchResponse searchExact = searchExact(str, set, set2);
        HashSet hashSet = new HashSet();
        List<SearchHit> allTopHits = getAllTopHits(searchExact);
        allTopHits.sort((searchHit, searchHit2) -> {
            return compareSearchHits(searchHit.getSourceAsMap(), searchHit2.getSourceAsMap());
        });
        allTopHits.forEach(searchHit3 -> {
            Map sourceAsMap = searchHit3.getSourceAsMap();
            String str2 = (String) sourceAsMap.get(ReconciledEntryEsBridge.Field.cdtFieldName.name());
            if (hashSet.contains(str2)) {
                return;
            }
            String str3 = (String) sourceAsMap.get(ReconciledEntryEsBridge.Field.reconciledKey.name());
            Object obj = sourceAsMap.get(ReconciledEntryEsBridge.Field.jsonBlob.name());
            if (addBestMatchIfAvailable(str2, builder, map, str3, obj != null ? (KeyValueAnnotation) new Gson().fromJson(obj.toString(), KeyValueAnnotation.class) : null, set, atomicInteger)) {
                hashSet.add(str2);
            }
        });
        ProductMetricsAggregatedDataCollector.recordData(DocumentExtractionMetricConstants.DE_MAPPING_KEYS_ESMATCH_KVP_PLUS_EXACT, atomicInteger.get());
    }

    int compareSearchHits(Map<String, Object> map, Map<String, Object> map2) {
        boolean containsKey = map.containsKey(ReconciledEntryEsBridge.Field.counter.name());
        boolean containsKey2 = map2.containsKey(ReconciledEntryEsBridge.Field.counter.name());
        if (containsKey && containsKey2) {
            return Integer.compare(((Integer) map2.get(ReconciledEntryEsBridge.Field.counter.name())).intValue(), ((Integer) map.get(ReconciledEntryEsBridge.Field.counter.name())).intValue());
        }
        if (containsKey) {
            return -1;
        }
        return containsKey2 ? 1 : 0;
    }

    private KeyData findBestMatch(Map<String, Set<KeyData>> map, String str, KeyValueAnnotation keyValueAnnotation) {
        if (keyValueAnnotation == null) {
            return null;
        }
        KeyData keyData = null;
        double d = 0.0d;
        for (KeyData keyData2 : (Set) map.get(str).stream().filter(keyData3 -> {
            return keyData3.getPage().equals(keyValueAnnotation.getPage());
        }).collect(Collectors.toSet())) {
            double computeJaccardIndex = JaccardIndexComparer.computeJaccardIndex(keyValueAnnotation.getAnnotationCoordinates(), keyData2.getAnnotationBoundingBox());
            if (keyData == null || computeJaccardIndex > d) {
                keyData = keyData2;
                d = computeJaccardIndex;
            }
        }
        if (d > OcrJobContext.DEFAULT_CONFIDENCE_THRESHOLD) {
            return keyData;
        }
        return null;
    }

    private boolean addBestMatchIfAvailable(String str, ImmutableMap.Builder<String, Collection<KeyData>> builder, Map<String, Set<KeyData>> map, String str2, KeyValueAnnotation keyValueAnnotation, Set<String> set, AtomicInteger atomicInteger) {
        KeyData findBestMatch = findBestMatch(map, str2, keyValueAnnotation);
        if (findBestMatch != null) {
            builder.put(str, ImmutableList.of(findBestMatch));
            set.remove(str);
            atomicInteger.getAndIncrement();
            map.get(str2).remove(findBestMatch);
        }
        return findBestMatch != null;
    }

    private Map<String, Set<KeyData>> getMapOfKeyToKeyDataList(Collection<KeyData> collection) {
        HashMap hashMap = new HashMap();
        for (KeyData keyData : collection) {
            if (hashMap.containsKey(keyData.getKey())) {
                ((Set) hashMap.get(keyData.getKey())).add(keyData);
            } else {
                HashSet hashSet = new HashSet();
                hashSet.add(keyData);
                hashMap.put(keyData.getKey(), hashSet);
            }
        }
        return hashMap;
    }

    SearchResponse searchExact(String str, Set<String> set, Set<String> set2) {
        return this.searchRequestExecutor.execute(this.searchRequestExecutor.buildSearchRequest(new SearchSourceBuilder().query(filter(str, set, QueryBuilders.termsQuery(ReconciledEntryEsBridge.Field.reconciledKey.name(), set2))).aggregation(AggregationBuilders.terms("group_by_field_names").field(ReconciledEntryEsBridge.Field.cdtFieldName.name()).size(set.size()).subAggregation(AggregationBuilders.topHits("frequently_choosen").from(0).sort(SortBuilders.fieldSort(ReconciledEntryEsBridge.Field.counter.name()).order(SortOrder.DESC)).fetchSource(SOURCE_FETCH_INCLUDES, new String[0])))));
    }

    SearchResponse searchFuzzy(String str, Set<String> set, Set<String> set2) {
        DisMaxQueryBuilder disMaxQuery = QueryBuilders.disMaxQuery();
        for (String str2 : set2) {
            disMaxQuery.add(QueryBuilders.fuzzyQuery(ReconciledEntryEsBridge.Field.reconciledKeyNormalized.name(), StringUtils.normalizeSpace(str2).toLowerCase()).fuzziness(Fuzziness.ONE).prefixLength(2).transpositions(true).queryName(str2));
        }
        return this.searchRequestExecutor.execute(this.searchRequestExecutor.buildSearchRequest(new SearchSourceBuilder().query(filter(str, set, disMaxQuery)).aggregation(AggregationBuilders.terms("group_by_field_names").field(ReconciledEntryEsBridge.Field.cdtFieldName.name()).size(set.size()).subAggregation(AggregationBuilders.topHits("frequently_choosen").from(0).sort(SortBuilders.scoreSort()).fetchSource(SOURCE_FETCH_INCLUDES, new String[0])))));
    }

    private QueryBuilder filter(String str, Set<String> set, QueryBuilder queryBuilder) {
        return QueryBuilders.boolQuery().filter(QueryBuilders.termQuery("_t", ReconciledEntryEsBridge.RECONCILE_ENTRY_TYPE)).filter(QueryBuilders.termQuery(ReconciledEntryEsBridge.Field.cdtQName.name(), str)).filter(QueryBuilders.termsQuery(ReconciledEntryEsBridge.Field.cdtFieldName.name(), set)).must(queryBuilder);
    }

    private Set<KeyData> filterKeysForExactMatch(Collection<KeyData> collection) {
        return (Set) collection.stream().filter(keyData -> {
            return keyData.getKey().length() <= 500;
        }).limit(1000L).collect(Collectors.toSet());
    }

    private Set<String> filterKeysForFuzzyMatch(Collection<KeyData> collection, Set<String> set) {
        return (Set) collection.stream().filter(keyData -> {
            return set.contains(keyData.getKey());
        }).filter(keyData2 -> {
            return keyData2.getKey().length() >= 3;
        }).filter(keyData3 -> {
            return keyData3.getKey().length() <= FUZZY_MATCH_TEXT_MAX_LENGTH;
        }).sorted(Comparator.comparingInt((v0) -> {
            return v0.getPage();
        })).limit(1000L).map((v0) -> {
            return v0.getKey();
        }).collect(Collectors.toSet());
    }
}
