package com.appian.documentunderstanding.prediction.keyvalue;

import com.appian.documentunderstanding.common.DocumentExtractionFeatureToggles;
import com.appian.documentunderstanding.common.DocumentExtractionMetricConstants;
import com.appian.documentunderstanding.populate.KeyData;
import com.appian.documentunderstanding.prediction.DocumentUnderstandingAbstractEsPredictionService;
import com.appian.documentunderstanding.prediction.PredictionType;
import com.appian.documentunderstanding.prediction.SearchRequestExecutor;
import com.appian.documentunderstanding.prediction.datatypes.CustomDatatype;
import com.appian.documentunderstanding.prediction.datatypes.CustomFieldType;
import com.appian.documentunderstanding.prediction.keyvalue.ReconciledEntryEsBridge;
import com.appian.documentunderstanding.prediction.metrics.DocExtractPredictionMetricsCollector;
import com.appiancorp.common.monitoring.ProductMetricsAggregatedDataCollector;
import com.appiancorp.core.expr.portable.assertions.Preconditions;
import com.google.common.collect.ImmutableMap;
import java.util.AbstractMap;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.index.query.DisMaxQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.sort.SortBuilders;
import org.elasticsearch.search.sort.SortOrder;

/* loaded from: input_file:com/appian/documentunderstanding/prediction/keyvalue/DocumentUnderstandingKvpEsPredictionService.class */
public class DocumentUnderstandingKvpEsPredictionService extends DocumentUnderstandingAbstractEsPredictionService {
    private static final int EXACT_MATCH_TEXT_MIN_LENGTH = 0;
    private static final int EXACT_MATCH_TEXT_MAX_LENGTH = 500;
    private static final int EXACT_MATCH_TEXTS_LIMIT = 1000;
    private static final int FUZZY_MATCH_TEXTS_LIMIT = 1000;
    private static final int FUZZY_MATCH_TEXT_MAX_LENGTH = 50;
    private static final int FUZZY_MATCH_TEXT_MIN_LENGTH = 3;
    private static final String[] SOURCE_FETCH_INCLUDES = {ReconciledEntryEsBridge.Field.cdtFieldName.name(), ReconciledEntryEsBridge.Field.reconciledKey.name()};
    private final SearchRequestExecutor searchRequestExecutor;

    public DocumentUnderstandingKvpEsPredictionService(SearchRequestExecutor searchRequestExecutor, DocExtractPredictionMetricsCollector docExtractPredictionMetricsCollector, DocumentExtractionFeatureToggles documentExtractionFeatureToggles) {
        super(PredictionType.KVP, docExtractPredictionMetricsCollector, documentExtractionFeatureToggles);
        this.searchRequestExecutor = searchRequestExecutor;
    }

    @Override // com.appian.documentunderstanding.prediction.PredictionService
    public Map<String, Collection<KeyData>> getPredictions(CustomDatatype customDatatype, Set<String> set, Collection<KeyData> collection, boolean z) {
        Preconditions.checkNotNull(customDatatype, "type");
        Preconditions.checkNotNull(collection, "detectedKeys");
        ImmutableMap.Builder<String, String> builder = ImmutableMap.builder();
        String unversionedQName = customDatatype.getUnversionedQName();
        Set<String> hashSet = set != null ? new HashSet<>(set) : customDatatype.getFieldNames(CustomFieldType.STRING, CustomFieldType.BOOLEAN);
        if (hashSet.isEmpty()) {
            return Collections.emptyMap();
        }
        Set<String> filterKeysForExactMatch = filterKeysForExactMatch(collection);
        if (filterKeysForExactMatch.isEmpty()) {
            return Collections.emptyMap();
        }
        AtomicInteger atomicInteger = new AtomicInteger();
        getTopHits(searchExact(unversionedQName, hashSet, filterKeysForExactMatch)).forEach(searchHit -> {
            Map sourceAsMap = searchHit.getSourceAsMap();
            String str = (String) sourceAsMap.get(ReconciledEntryEsBridge.Field.cdtFieldName.name());
            builder.put(str, (String) sourceAsMap.get(ReconciledEntryEsBridge.Field.reconciledKey.name()));
            hashSet.remove(str);
            atomicInteger.getAndIncrement();
        });
        ProductMetricsAggregatedDataCollector.recordData(DocumentExtractionMetricConstants.DE_MAPPING_KEYS_ESMATCH_EXACT, atomicInteger.get());
        if (z) {
            if (hashSet.isEmpty()) {
                return buildPredictionResults(collection, builder);
            }
            Set<String> filterKeysForFuzzyMatch = filterKeysForFuzzyMatch(collection, filterKeysForExactMatch);
            if (filterKeysForFuzzyMatch.isEmpty()) {
                return buildPredictionResults(collection, builder);
            }
            AtomicInteger atomicInteger2 = new AtomicInteger();
            getTopHits(searchFuzzy(unversionedQName, hashSet, filterKeysForFuzzyMatch)).forEach(searchHit2 -> {
                String str = (String) searchHit2.getSourceAsMap().get(ReconciledEntryEsBridge.Field.cdtFieldName.name());
                builder.put(str, searchHit2.getMatchedQueries()[0]);
                hashSet.remove(str);
                atomicInteger2.getAndIncrement();
            });
            ProductMetricsAggregatedDataCollector.recordData(DocumentExtractionMetricConstants.DE_MAPPING_KEYS_ESMATCH_FUZZY, atomicInteger2.get());
        }
        return buildPredictionResults(collection, builder);
    }

    @Override // com.appian.documentunderstanding.prediction.DocumentUnderstandingAbstractEsPredictionService, com.appian.documentunderstanding.prediction.PredictionService
    public boolean doReconciliationsExist(CustomDatatype customDatatype) {
        Preconditions.checkNotNull(ReconciledEntryEsBridge.Field.cdtQName, "cdtQName");
        return this.searchRequestExecutor.execute(this.searchRequestExecutor.buildSearchRequest(new SearchSourceBuilder().query(QueryBuilders.boolQuery().filter(QueryBuilders.termQuery("_t", ReconciledEntryEsBridge.RECONCILE_ENTRY_TYPE)).filter(QueryBuilders.termQuery(ReconciledEntryEsBridge.Field.cdtQName.name(), customDatatype.getUnversionedQName()))).trackTotalHits(true).fetchSource(false).trackScores(false).size(0).terminateAfter(1))).getHits().getTotalHits().value > 0;
    }

    private Map<String, Collection<KeyData>> buildPredictionResults(Collection<KeyData> collection, ImmutableMap.Builder<String, String> builder) {
        return getFieldNamesToKeyData(collection, builder.build());
    }

    SearchResponse searchExact(String str, Set<String> set, Set<String> set2) {
        return this.searchRequestExecutor.execute(this.searchRequestExecutor.buildSearchRequest(new SearchSourceBuilder().query(filter(str, set, QueryBuilders.termsQuery(ReconciledEntryEsBridge.Field.reconciledKey.name(), set2))).aggregation(AggregationBuilders.terms("group_by_field_names").field(ReconciledEntryEsBridge.Field.cdtFieldName.name()).size(set.size()).subAggregation(AggregationBuilders.topHits("frequently_choosen").size(1).from(0).sort(SortBuilders.fieldSort(ReconciledEntryEsBridge.Field.counter.name()).order(SortOrder.DESC)).fetchSource(new String[]{ReconciledEntryEsBridge.Field.cdtFieldName.name(), ReconciledEntryEsBridge.Field.reconciledKey.name(), ReconciledEntryEsBridge.Field.counter.name()}, new String[0])))));
    }

    SearchResponse searchFuzzy(String str, Set<String> set, Set<String> set2) {
        DisMaxQueryBuilder disMaxQuery = QueryBuilders.disMaxQuery();
        for (String str2 : set2) {
            disMaxQuery.add(QueryBuilders.fuzzyQuery(ReconciledEntryEsBridge.Field.reconciledKeyNormalized.name(), StringUtils.normalizeSpace(str2).toLowerCase()).fuzziness(Fuzziness.ONE).prefixLength(2).transpositions(true).queryName(str2));
        }
        return this.searchRequestExecutor.execute(this.searchRequestExecutor.buildSearchRequest(new SearchSourceBuilder().query(filter(str, set, disMaxQuery)).aggregation(AggregationBuilders.terms("group_by_field_names").field(ReconciledEntryEsBridge.Field.cdtFieldName.name()).size(set.size()).subAggregation(AggregationBuilders.topHits("frequently_choosen").size(1).from(0).sort(SortBuilders.scoreSort()).fetchSource(SOURCE_FETCH_INCLUDES, new String[0])))));
    }

    private QueryBuilder filter(String str, Set<String> set, QueryBuilder queryBuilder) {
        return QueryBuilders.boolQuery().filter(QueryBuilders.termQuery("_t", ReconciledEntryEsBridge.RECONCILE_ENTRY_TYPE)).filter(QueryBuilders.termQuery(ReconciledEntryEsBridge.Field.cdtQName.name(), str)).filter(QueryBuilders.termsQuery(ReconciledEntryEsBridge.Field.cdtFieldName.name(), set)).must(queryBuilder);
    }

    private Set<String> filterKeysForExactMatch(Collection<KeyData> collection) {
        return (Set) collection.stream().map((v0) -> {
            return v0.getKey();
        }).filter(str -> {
            return str.length() > 0;
        }).filter(str2 -> {
            return str2.length() <= 500;
        }).limit(1000L).collect(Collectors.toSet());
    }

    private Set<String> filterKeysForFuzzyMatch(Collection<KeyData> collection, Set<String> set) {
        return (Set) collection.stream().filter(keyData -> {
            return set.contains(keyData.getKey());
        }).filter(keyData2 -> {
            return keyData2.getKey().length() >= 3;
        }).filter(keyData3 -> {
            return keyData3.getKey().length() <= FUZZY_MATCH_TEXT_MAX_LENGTH;
        }).sorted(Comparator.comparingInt((v0) -> {
            return v0.getPage();
        })).limit(1000L).map((v0) -> {
            return v0.getKey();
        }).collect(Collectors.toSet());
    }

    Map<String, Collection<KeyData>> getFieldNamesToKeyData(Collection<KeyData> collection, Map<String, String> map) {
        return (Map) map.entrySet().stream().map(entry -> {
            return new AbstractMap.SimpleEntry(entry.getKey(), collection.stream().filter(keyData -> {
                return keyData.getKey().equals(entry.getValue());
            }).findFirst());
        }).filter(simpleEntry -> {
            return ((Optional) simpleEntry.getValue()).isPresent();
        }).collect(Collectors.toMap((v0) -> {
            return v0.getKey();
        }, simpleEntry2 -> {
            return Collections.singletonList(((Optional) simpleEntry2.getValue()).get());
        }));
    }
}
