package com.appian.documentunderstanding.client.appianocr;

import com.appian.documentunderstanding.client.ClientResponse;
import com.appian.documentunderstanding.client.service.AppianClient;
import com.appian.documentunderstanding.common.DocumentExtractionMetricConstants;
import com.appian.documentunderstanding.common.DocumentUnderstandingContentServiceAdapter;
import com.appian.documentunderstanding.exception.DocExtractionException;
import com.appian.documentunderstanding.exception.DocExtractionGenericException;
import com.appian.documentunderstanding.exception.InvalidAppianDocumentException;
import com.appian.documentunderstanding.exception.InvalidDocumentAttributesException;
import com.appian.documentunderstanding.exception.PermissionsException;
import com.appian.documentunderstanding.pdf.PdfInspector;
import com.appian.documentunderstanding.populate.InterpretedCheckbox;
import com.appian.documentunderstanding.populate.InterpretedDocKeyValuePair;
import com.appian.documentunderstanding.populate.InterpretedPage;
import com.appian.documentunderstanding.populate.InterpretedPoint;
import com.appian.documentunderstanding.populate.InterpretedToken;
import com.appian.documentunderstanding.populate.OcrResult;
import com.appian.documentunderstanding.queue.kafka.DownloadMessageToken;
import com.appiancorp.common.monitoring.ProductMetricsAggregatedDataCollector;
import com.appiancorp.documentunderstanding.persistence.DocExtractJob;
import com.appiancorp.documentunderstanding.persistence.Vendor;
import com.appiancorp.suiteapi.common.exceptions.AppianException;
import com.appiancorp.suiteapi.knowledge.Document;
import java.awt.geom.Rectangle2D;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.commons.lang.StringUtils;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
import org.apache.pdfbox.pdmodel.interactive.form.PDCheckBox;
import org.apache.pdfbox.pdmodel.interactive.form.PDField;
import org.apache.pdfbox.pdmodel.interactive.form.PDNonTerminalField;
import org.apache.pdfbox.util.Matrix;

/* loaded from: input_file:com/appian/documentunderstanding/client/appianocr/AppianOcrClient.class */
public class AppianOcrClient extends AppianClient {
    private static final int MAX_CONFIDENCE = 1;
    private final DocumentUnderstandingContentServiceAdapter contentServiceAdapter;

    public AppianOcrClient(DocumentUnderstandingContentServiceAdapter documentUnderstandingContentServiceAdapter) {
        this.contentServiceAdapter = documentUnderstandingContentServiceAdapter;
    }

    @Override // com.appian.documentunderstanding.client.DocumentUnderstandingClient
    public ClientResponse<List<Document>> downloadToContent(DocExtractJob docExtractJob, String str) throws DocExtractionGenericException, AppianException {
        ArrayList arrayList = new ArrayList();
        try {
            arrayList.add(this.contentServiceAdapter.getDocument(docExtractJob.getAppianDocId()));
            return ClientResponse.buildResponse(arrayList);
        } catch (InvalidAppianDocumentException | InvalidDocumentAttributesException | PermissionsException e) {
            return ClientResponse.retryResponseWithException(e);
        }
    }

    @Override // com.appian.documentunderstanding.client.DocumentUnderstandingClient
    public OcrResult interpret(List<Document> list) throws IOException, DocExtractionException {
        PDDocument load = PDDocument.load(this.contentServiceAdapter.getDocumentInputStream(Long.valueOf(list.get(0).getId().longValue())));
        PDAcroForm acroForm = load.getDocumentCatalog().getAcroForm();
        HashMap hashMap = new HashMap();
        AppianOcrGetWordsFromPdf appianOcrGetWordsFromPdf = new AppianOcrGetWordsFromPdf();
        for (int i = 0; i < load.getNumberOfPages(); i++) {
            PDPage pDPage = load.getPages().get(i);
            ArrayList arrayList = new ArrayList();
            ArrayList arrayList2 = new ArrayList();
            PDRectangle mediaBox = pDPage.getMediaBox();
            Matrix matrix = new Matrix();
            float width = mediaBox.getWidth();
            float height = mediaBox.getHeight();
            matrix.scale(1.0f / width, 1.0f / height);
            List<PDAnnotation> annotations = pDPage.getAnnotations();
            List<PDAnnotation> textFields = getTextFields(annotations);
            List<PDAnnotation> checkBoxes = getCheckBoxes(annotations);
            List<PDAnnotation> fieldsToTreatAsTokens = getFieldsToTreatAsTokens(annotations);
            List<InterpretedToken> interpretedTokens = appianOcrGetWordsFromPdf.getInterpretedTokens(load, i + 1);
            Iterator<PDAnnotation> it = textFields.iterator();
            while (it.hasNext()) {
                arrayList.add(processTextPDAnnotation(it.next(), matrix));
            }
            Iterator<PDAnnotation> it2 = fieldsToTreatAsTokens.iterator();
            while (it2.hasNext()) {
                interpretedTokens.add(processFieldAsToken(it2.next(), matrix));
            }
            if (acroForm != null) {
                Iterator<PDAnnotation> it3 = checkBoxes.iterator();
                while (it3.hasNext()) {
                    arrayList2.addAll(processCheckboxPDAnnotation(acroForm, it3.next(), matrix));
                }
            }
            logPdfFieldMetrics(annotations);
            hashMap.put(Integer.valueOf(i + 1), InterpretedPage.builder(width, height).setKeyValuePairs(arrayList).setCheckboxes(arrayList2).setTokens(interpretedTokens).build());
        }
        return new OcrResult(hashMap);
    }

    private void logPdfFieldMetrics(List<PDAnnotation> list) {
        Iterator<PDAnnotation> it = list.iterator();
        while (it.hasNext()) {
            COSName dictionaryObject = it.next().getCOSObject().getDictionaryObject(COSName.FT);
            if (dictionaryObject instanceof COSName) {
                ProductMetricsAggregatedDataCollector.recordData(DocumentExtractionMetricConstants.DE_START_EXTRACTION_DOC_FIELD_TYPE_COUNT + dictionaryObject.getName());
            }
        }
    }

    private List<PDAnnotation> getCheckBoxes(List<PDAnnotation> list) {
        return (List) list.stream().filter(pDAnnotation -> {
            return isEqualsFieldType(pDAnnotation, COSName.BTN);
        }).collect(Collectors.toList());
    }

    private List<PDAnnotation> getFieldsToTreatAsTokens(List<PDAnnotation> list) {
        return (List) list.stream().filter(pDAnnotation -> {
            return isVisible(pDAnnotation) && ((isEqualsFieldType(pDAnnotation, COSName.TX) && isPopulated(pDAnnotation)) || (isEqualsFieldType(pDAnnotation, COSName.CH) && isPopulated(pDAnnotation)));
        }).collect(Collectors.toList());
    }

    private List<PDAnnotation> getTextFields(List<PDAnnotation> list) {
        return (List) list.stream().filter(pDAnnotation -> {
            return PdfInspector.ALLOWED_PDF_TEXT_FIELD_TYPES.contains(pDAnnotation.getCOSObject().getDictionaryObject(COSName.FT)) && isPopulated(pDAnnotation) && isVisible(pDAnnotation);
        }).collect(Collectors.toList());
    }

    private InterpretedToken processFieldAsToken(PDAnnotation pDAnnotation, Matrix matrix) {
        COSDictionary cOSObject = pDAnnotation.getCOSObject();
        List<InterpretedPoint> interpretedPoints = getInterpretedPoints(pDAnnotation, matrix);
        String string = cOSObject.getString(COSName.V);
        if (isEqualsFieldType(pDAnnotation, COSName.CH) && StringUtils.isBlank(string)) {
            string = getStringFromMultiValuedElement(cOSObject);
        }
        return new InterpretedToken(string, 1.0d, interpretedPoints);
    }

    private String getStringFromMultiValuedElement(COSDictionary cOSDictionary) {
        COSArray cOSArray = cOSDictionary.getCOSArray(COSName.V);
        ArrayList arrayList = new ArrayList();
        for (int i = 0; i < cOSArray.size(); i++) {
            arrayList.add(cOSArray.getString(i));
        }
        return (String) arrayList.stream().collect(Collectors.joining(" "));
    }

    private InterpretedDocKeyValuePair processTextPDAnnotation(PDAnnotation pDAnnotation, Matrix matrix) {
        COSDictionary cOSObject = pDAnnotation.getCOSObject();
        String string = cOSObject.getString(COSName.T);
        String string2 = cOSObject.getString(COSName.V);
        if (isEqualsFieldType(pDAnnotation, COSName.CH) && StringUtils.isBlank(string2)) {
            string2 = getStringFromMultiValuedElement(cOSObject);
        }
        List<InterpretedPoint> interpretedPoints = getInterpretedPoints(pDAnnotation, matrix);
        return new InterpretedDocKeyValuePair.InterpretedDocKeyValuePairBuilder().setConfidence(1.0d).setKeyText(string).setValueText(string2).setExactAnnotation(true).setKeyAnnotation(interpretedPoints).setValueAnnotation(interpretedPoints).setKeyValueAnnotation(interpretedPoints).build();
    }

    private List<InterpretedCheckbox> processCheckboxPDAnnotation(PDAcroForm pDAcroForm, PDAnnotation pDAnnotation, Matrix matrix) {
        return (List) getTerminalCheckboxes(pDAcroForm.getField(pDAnnotation.getCOSObject().getString(COSName.T))).stream().map(pDCheckBox -> {
            return mapPdCheckboxToInterpretedCheckboxes(pDCheckBox, matrix);
        }).flatMap((v0) -> {
            return v0.stream();
        }).collect(Collectors.toList());
    }

    private List<InterpretedCheckbox> mapPdCheckboxToInterpretedCheckboxes(PDCheckBox pDCheckBox, Matrix matrix) {
        boolean isChecked = pDCheckBox.isChecked();
        String string = pDCheckBox.getCOSObject().getString(COSName.T);
        return (List) pDCheckBox.getWidgets().stream().map(pDAnnotationWidget -> {
            return getInterpretedPoints(pDAnnotationWidget, matrix);
        }).map(list -> {
            return new InterpretedCheckbox(string, isChecked, 1.0d, list, list);
        }).collect(Collectors.toList());
    }

    private List<PDCheckBox> getTerminalCheckboxes(PDField pDField) {
        return pDField instanceof PDCheckBox ? Collections.singletonList((PDCheckBox) pDField) : pDField instanceof PDNonTerminalField ? (List) ((PDNonTerminalField) pDField).getChildren().stream().map(this::getTerminalCheckboxes).flatMap((v0) -> {
            return v0.stream();
        }).collect(Collectors.toList()) : new ArrayList();
    }

    private List<InterpretedPoint> getInterpretedPoints(PDAnnotation pDAnnotation, Matrix matrix) {
        PDRectangle rectangle = pDAnnotation.getRectangle();
        ArrayList arrayList = new ArrayList();
        Rectangle2D bounds2D = rectangle.transform(matrix).getBounds2D();
        arrayList.add(new InterpretedPoint(bounds2D.getMinX(), 1.0d - bounds2D.getMaxY()));
        arrayList.add(new InterpretedPoint(bounds2D.getMaxX(), 1.0d - bounds2D.getMaxY()));
        arrayList.add(new InterpretedPoint(bounds2D.getMaxX(), 1.0d - bounds2D.getMinY()));
        arrayList.add(new InterpretedPoint(bounds2D.getMinX(), 1.0d - bounds2D.getMinY()));
        return arrayList;
    }

    boolean isPopulated(PDAnnotation pDAnnotation) {
        COSArray cOSArray;
        COSDictionary cOSObject = pDAnnotation.getCOSObject();
        if (!cOSObject.containsKey(COSName.V)) {
            return false;
        }
        if (StringUtils.isNotBlank(cOSObject.getString(COSName.V))) {
            return true;
        }
        return cOSObject.containsKey(COSName.OPT) && (cOSArray = cOSObject.getCOSArray(COSName.V)) != null && cOSArray.size() > 0;
    }

    private boolean isVisible(PDAnnotation pDAnnotation) {
        return (pDAnnotation.isHidden() || pDAnnotation.isNoView()) ? false : true;
    }

    private boolean isEqualsFieldType(PDAnnotation pDAnnotation, COSName cOSName) {
        return cOSName.equals(pDAnnotation.getCOSObject().getCOSName(COSName.FT));
    }

    @Override // com.appian.documentunderstanding.client.DocumentUnderstandingClient
    public DownloadMessageToken buildDownloadMessageTokenForJob(DocExtractJob docExtractJob) {
        Long id = docExtractJob.getId();
        return new DownloadMessageToken(getVendor(), String.valueOf(id), id);
    }

    @Override // com.appian.documentunderstanding.client.DocumentUnderstandingClient
    public Vendor getVendor() {
        return Vendor.APPIAN;
    }
}
