001package ca.uhn.fhir.jpa.batch.reader;
002
003/*-
004 * #%L
005 * HAPI FHIR JPA Server
006 * %%
007 * Copyright (C) 2014 - 2022 Smile CDR, Inc.
008 * %%
009 * Licensed under the Apache License, Version 2.0 (the "License");
010 * you may not use this file except in compliance with the License.
011 * You may obtain a copy of the License at
012 *
013 *      http://www.apache.org/licenses/LICENSE-2.0
014 *
015 * Unless required by applicable law or agreed to in writing, software
016 * distributed under the License is distributed on an "AS IS" BASIS,
017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
018 * See the License for the specific language governing permissions and
019 * limitations under the License.
020 * #L%
021 */
022
023import org.slf4j.Logger;
024import org.slf4j.LoggerFactory;
025
026import java.util.Date;
027import java.util.List;
028import java.util.Set;
029import java.util.function.Function;
030
031public class BatchDateThresholdUpdater {
032        private static final Logger ourLog = LoggerFactory.getLogger(BatchDateThresholdUpdater.class);
033
034        private Function<Long, Date> myDateFromPid;
035
036        public BatchDateThresholdUpdater() {
037        }
038
039        public BatchDateThresholdUpdater(Function<Long, Date> theDateFromPid) {
040                myDateFromPid = theDateFromPid;
041        }
042
043        /**
044         * This method is used by batch jobs that process resource pids by date in multiple passes.  It's used to ensure
045         * the same resource isn't processed twice.  What it does is after a pass of processing pids, it sets
046         * the threshold date for the next pass from the last resource on the list and collects all of the resources that have that date into a temporary cache
047         * so that the caller can exclude those from the next pass.
048         *
049         * @param thePrevThreshold                         the date threshold from the previous pass
050         * @param theAlreadyProcessedPidsWithThresholdDate the set to load pids into that have the new threshold
051         * @param theProcessedPidsOrderedByDate            the pids ordered by date (can be ascending or descending)
052         * @return the new date threshold (can be the same as the old threshold if all pids on the list share the same date)
053         */
054
055        public Date updateThresholdAndCache(Date thePrevThreshold, Set<Long> theAlreadyProcessedPidsWithThresholdDate, List<Long> theProcessedPidsOrderedByDate) {
056                if (theProcessedPidsOrderedByDate.isEmpty()) {
057                        return thePrevThreshold;
058                }
059
060                // Adjust the low threshold to be the last resource in the batch we found
061                Long pidOfLatestResourceInBatch = theProcessedPidsOrderedByDate.get(theProcessedPidsOrderedByDate.size() - 1);
062                Date latestUpdatedDate = myDateFromPid.apply(pidOfLatestResourceInBatch);
063
064                // The latest date has changed, create a new cache to store pids with that date
065                if (thePrevThreshold != latestUpdatedDate) {
066                        theAlreadyProcessedPidsWithThresholdDate.clear();
067                }
068                theAlreadyProcessedPidsWithThresholdDate.add(pidOfLatestResourceInBatch);
069
070                Date newThreshold = latestUpdatedDate;
071                if (theProcessedPidsOrderedByDate.size() <= 1) {
072                        return newThreshold;
073                }
074
075                // There is more than one resource in this batch, add any others with the same date.  Assume the list is ordered by date.
076                for (int index = theProcessedPidsOrderedByDate.size() - 2; index >= 0; --index) {
077                        Long pid = theProcessedPidsOrderedByDate.get(index);
078                        Date newDate = myDateFromPid.apply(pid);
079                        if (!latestUpdatedDate.equals(newDate)) {
080                                break;
081                        }
082                        theAlreadyProcessedPidsWithThresholdDate.add(pid);
083                }
084
085                return newThreshold;
086        }
087
088        /**
089         * @param theDateFromPid this is a Function to extract a date from a resource id
090         * @return
091         */
092        public BatchDateThresholdUpdater setDateFromPid(Function<Long, Date> theDateFromPid) {
093                myDateFromPid = theDateFromPid;
094                return this;
095        }
096}