Browse Source

More efficiently schedule sync runs for files that need delayed sync runs

Signed-off-by: Claudio Cambra <claudio.cambra@gmail.com>
Claudio Cambra 3 years ago
parent
commit
bbbfe3f1c4

+ 3 - 3
src/common/syncjournaldb.cpp

@@ -1379,9 +1379,9 @@ bool SyncJournalDb::updateLocalMetadata(const QString &filename,
     query->bindValue(3, modtime);
     query->bindValue(4, size);
     query->bindValue(5, lockInfo._locked ? 1 : 0);
-    query->bindValue(6, lockInfo._lockOwnerDisplayName);
-    query->bindValue(7, lockInfo._lockOwnerId);
-    query->bindValue(8, lockInfo._lockOwnerType);
+    query->bindValue(6, lockInfo._lockOwnerType);
+    query->bindValue(7, lockInfo._lockOwnerDisplayName);
+    query->bindValue(8, lockInfo._lockOwnerId);
     query->bindValue(9, lockInfo._lockEditorApp);
     query->bindValue(10, lockInfo._lockTime);
     query->bindValue(11, lockInfo._lockTimeout);

+ 7 - 3
src/libsync/discovery.cpp

@@ -398,11 +398,15 @@ void ProcessDirectoryJob::processFile(PathTuple path,
     if(serverEntry.locked == SyncFileItem::LockStatus::LockedItem) {
         const auto lockExpirationTime = serverEntry.lockTime + serverEntry.lockTimeout;
         const auto timeRemaining = QDateTime::currentDateTime().secsTo(QDateTime::fromSecsSinceEpoch(lockExpirationTime));
-        const auto timerInterval = qMax(5LL, timeRemaining);
+        // Add on a second as a precaution, sometimes we catch the server before it has had a chance to update
+        const auto lockExpirationTimeout = qMax(5LL, timeRemaining + 1);
+
+        qCInfo(lcDisco) << "File:" << path._original << "is locked."
+                        << "Lock expires in:" << lockExpirationTimeout << "seconds."
+                        << "A sync run will be scheduled for around that time.";
 
-        qCInfo(lcDisco) << "Will re-check lock status for:" << path._original << "in:" << timerInterval << "seconds.";
         _discoveryData->_anotherSyncNeeded = true;
-        _discoveryData->_scheduleSyncInSecs = timerInterval;
+        _discoveryData->_filesNeedingScheduledSync.insert(path._original, lockExpirationTimeout);
     }
 
     // VFS suffixed files on the server are ignored

+ 1 - 1
src/libsync/discoveryphase.h

@@ -283,7 +283,7 @@ public:
     // output
     QByteArray _dataFingerprint;
     bool _anotherSyncNeeded = false;
-    int _scheduleSyncInSecs = -1;
+    QHash<QString, long long> _filesNeedingScheduledSync;
 
 signals:
     void fatalError(const QString &errorString);

+ 167 - 5
src/libsync/syncengine.cpp

@@ -463,7 +463,6 @@ void SyncEngine::startSync()
     }
 
     if (s_anySyncRunning || _syncRunning) {
-        ASSERT(false)
         return;
     }
 
@@ -699,10 +698,8 @@ void SyncEngine::slotDiscoveryFinished()
             restoreOldFiles(_syncItems);
         }
 
-        if (_discoveryPhase->_anotherSyncNeeded && _discoveryPhase->_scheduleSyncInSecs > 0) {
-            QTimer::singleShot(_discoveryPhase->_scheduleSyncInSecs * 1000, this, [this]{
-                this->startSync();
-            });
+        if (_discoveryPhase->_anotherSyncNeeded && !_discoveryPhase->_filesNeedingScheduledSync.empty()) {
+            slotScheduleFilesDelayedSync();
         } else if (_discoveryPhase->_anotherSyncNeeded && _anotherSyncNeeded == NoFollowUpSync) {
             _anotherSyncNeeded = ImmediateFollowUp;
         }
@@ -1130,4 +1127,169 @@ void SyncEngine::slotInsufficientRemoteStorage()
     emit syncError(msg, ErrorCategory::InsufficientRemoteStorage);
 }
 
+void SyncEngine::slotScheduleFilesDelayedSync()
+{
+    if (!_discoveryPhase || _discoveryPhase->_filesNeedingScheduledSync.empty()) {
+        return;
+    }
+
+    // The latest sync of the interval bucket is the one that goes through and is used in the timer.
+    // By running the sync run as late as possible in the selected interval, we try to strike a
+    // balance between updating the needed file in a timely manner while also syncing late enough
+    // to cover all the files in the interval bucket.
+
+    static constexpr long long intervalSecs = 60;
+    const auto scheduledSyncBuckets = groupNeededScheduledSyncRuns(intervalSecs);
+
+    qCDebug(lcEngine) << "Active scheduled sync run timers:" << _scheduledSyncTimers.count();
+
+    for (const auto &[scheduledSyncTimerSecs, filesAffected] : scheduledSyncBuckets) {
+
+        const auto currentSecsSinceEpoch = QDateTime::currentSecsSinceEpoch();
+        const auto scheduledSyncTimerTime = QDateTime::fromSecsSinceEpoch(currentSecsSinceEpoch + scheduledSyncTimerSecs);
+        const auto scheduledSyncTimerMsecs = std::chrono::milliseconds(scheduledSyncTimerSecs * 1000);
+
+        // We want to make sure that this bucket won't schedule a sync near a pre-existing sync run,
+        // as we often get, for example, locked file notifications one by one as the user interacts
+        // through the web.
+
+        if (nearbyScheduledSyncTimerUsable(scheduledSyncTimerSecs, intervalSecs)) {
+            qCInfo(lcEngine) << "Already have a nearby scheduled sync run at:" << scheduledSyncTimerTime
+                             << "which will be used for files:" << filesAffected;
+
+            // We still want to remove these files as files scheduled for sync. We schedule another timer
+            // for the same time the nearby scheduled sync run completes, remocing these files from our
+            // hash of files already scheduled for later sync.
+
+            QTimer::singleShot(scheduledSyncTimerMsecs, this, [this, filesAffected = filesAffected] {
+                qCInfo(lcEngine) << "Following files:" << filesAffected
+                                 << "were scheduled for a sync run that already had a nearby run scheduled.";
+
+                for (const auto &file : filesAffected) {
+                    _filesScheduledForLaterSync.remove(file);
+                }
+            });
+
+            continue;
+        }
+
+        qCInfo(lcEngine) << "Will have a new sync run in" << scheduledSyncTimerSecs
+                         << "seconds, at" << scheduledSyncTimerTime
+                         << "for files:" << filesAffected;
+
+        QSharedPointer<QTimer> newTimer(new QTimer);
+        newTimer->setSingleShot(true);
+
+        // In C++17 structured bindings (used above) cannot be captured, so we need an init capture.
+        // This is because structured bindings are never names of variables, thus uncapturable.
+        // This has been changed in C++20 -- feel free to change once we use it
+
+        newTimer->callOnTimeout(this, [this, filesAffected = filesAffected] {
+            qCInfo(lcEngine) << "Rescanning now that delayed sync run is scheduled for:" << filesAffected;
+
+            for (const auto &file : filesAffected) {
+                this->_filesScheduledForLaterSync.remove(file);
+            }
+
+            this->startSync();
+            this->slotCleanupScheduledSyncTimers();
+        });
+
+        newTimer->start(scheduledSyncTimerMsecs);
+        _scheduledSyncTimers.append(newTimer);
+    }
+}
+
+QHash<long long, SyncEngine::ScheduledSyncBucket> SyncEngine::groupNeededScheduledSyncRuns(const int interval)
+{
+    QHash<long long, ScheduledSyncBucket> intervalSyncBuckets;
+
+    for (auto it = _discoveryPhase->_filesNeedingScheduledSync.cbegin();
+         it != _discoveryPhase->_filesNeedingScheduledSync.cend();
+         ++it) {
+
+        const auto file = it.key();
+        const auto syncScheduledSecs = it.value();
+
+        // We don't want to schedule syncs again for files we have already discovered needing a
+        // scheduled sync, unless the files have been re-locked or had their lock expire time
+        // extended
+
+        if (_filesScheduledForLaterSync.contains(file) &&
+            _filesScheduledForLaterSync.value(file) > syncScheduledSecs) {
+
+            continue;
+        }
+
+        _filesScheduledForLaterSync.insert(file, syncScheduledSecs);
+        // Both long long so division results in floor-ed result
+        const auto intervalBucketKey = syncScheduledSecs / interval;
+
+        if (!intervalSyncBuckets.contains(intervalBucketKey)) {
+            intervalSyncBuckets.insert(intervalBucketKey, {syncScheduledSecs, {file}});
+            continue;
+        }
+
+        auto bucketValue = intervalSyncBuckets.value(intervalBucketKey);
+        bucketValue.scheduledSyncTimerSecs = qMax(bucketValue.scheduledSyncTimerSecs, syncScheduledSecs);
+        bucketValue.files.append(file);
+        intervalSyncBuckets.insert(intervalBucketKey, bucketValue);
+    }
+
+    return intervalSyncBuckets;
+}
+
+bool SyncEngine::nearbyScheduledSyncTimerUsable(const long long scheduledSyncTimerSecs,
+                                                const long long intervalSecs) const
+{
+    const auto scheduledSyncTimerMsecs = scheduledSyncTimerSecs * 1000;
+    const auto halfIntervalMsecs = (intervalSecs * 1000) / 2;
+    auto nearbyScheduledSync = false;
+
+    for (const auto &scheduledTimer : _scheduledSyncTimers) {
+
+        const auto timerRemainingMsecs = scheduledTimer->remainingTime();
+        const auto differenceMsecs = timerRemainingMsecs - scheduledSyncTimerMsecs;
+
+        nearbyScheduledSync = differenceMsecs > -halfIntervalMsecs && differenceMsecs < halfIntervalMsecs;
+
+        // Iterated timer is going to fire slightly before we need it to for the parameter timer,
+        // delay it.
+        if (differenceMsecs > -halfIntervalMsecs && differenceMsecs < 0) {
+            const auto scheduledSyncTimerTimeoutMsecs = std::chrono::milliseconds(scheduledSyncTimerMsecs);
+
+            nearbyScheduledSync = true;
+            scheduledTimer->start(scheduledSyncTimerTimeoutMsecs);
+
+            qCInfo(lcEngine) << "Delayed sync timer with remaining time" << timerRemainingMsecs / 1000
+                             << "by" << (differenceMsecs * -1) / 1000
+                             << "seconds due to nearby new sync run needed.";
+        }
+
+        if (nearbyScheduledSync) {
+            break;
+        }
+    }
+
+    return nearbyScheduledSync;
+}
+
+void SyncEngine::slotCleanupScheduledSyncTimers()
+{
+    qCDebug(lcEngine) << "Beginning scheduled sync timer cleanup.";
+
+    auto it = _scheduledSyncTimers.begin();
+
+    while(it != _scheduledSyncTimers.end()) {
+        const auto &timerPtr = *it;
+
+        if(timerPtr.isNull() || !timerPtr->isActive()) {
+            qCDebug(lcEngine) << "Erasing an expired scheduled sync timer.";
+            it = _scheduledSyncTimers.erase(it);
+        } else {
+            ++it;
+        }
+    }
+}
+
 } // namespace OCC

+ 51 - 0
src/libsync/syncengine.h

@@ -213,7 +213,35 @@ private slots:
     void slotInsufficientLocalStorage();
     void slotInsufficientRemoteStorage();
 
+    void slotScheduleFilesDelayedSync();
+    void slotCleanupScheduledSyncTimers();
+
 private:
+    // Some files need a sync run to be executed at a specified time after
+    // their status is scheduled to change (e.g. lock status will expire in
+    // 20 minutes.)
+    //
+    // Rather than execute a sync run for each file that needs one, we want
+    // to schedule as few sync runs as possible, trying to have the state of
+    // these files updated in a timely manner without scheduling runs too
+    // frequently. We can therefore group files into a bucket.
+    //
+    // A bucket contains a group of files requiring a sync run in close
+    // proximity to each other, with an assigned sync timer interval that can
+    // be used to schedule a sync run which will update all the files in the
+    // bucket at the time their state is scheduled to change.
+    //
+    // In the pair, first is the actual time at which the bucket is going to
+    // have its sync scheduled. Second is the vector of all the (paths of)
+    // files that fall into this bucket.
+    //
+    // See SyncEngine::groupNeededScheduledSyncRuns and
+    // SyncEngine::slotScheduleFilesDelayedSync for usage.
+    struct ScheduledSyncBucket {
+        long long scheduledSyncTimerSecs;
+        QVector<QString> files;
+    };
+
     bool checkErrorBlacklisting(SyncFileItem &item);
 
     // Cleans up unnecessary downloadinfo entries in the journal as well
@@ -232,6 +260,24 @@ private:
     // cleanup and emit the finished signal
     void finalize(bool success);
 
+    // Aggregate scheduled sync runs into interval buckets. Can be used to
+    // schedule a sync run per bucket instead of per file, reducing load.
+    //
+    // Bucket classification is done by simply dividing the seconds until
+    // scheduled sync time by the interval (note -- integer division!)
+    QHash<long long, ScheduledSyncBucket> groupNeededScheduledSyncRuns(const int interval);
+
+    // Checks if there is already a scheduled sync run timer active near the
+    // time provided as the parameter.
+    //
+    // If this timer will expire within the interval provided, the return is
+    // true.
+    //
+    // If this expiration occurs before the scheduled sync run provided as the
+    // parameter, it is rescheduled to expire at the time of the parameter.
+    bool nearbyScheduledSyncTimerUsable(const long long scheduledSyncTimerSecs,
+                                        const long long intervalSecs) const;
+
     static bool s_anySyncRunning; //true when one sync is running somewhere (for debugging)
 
     // Must only be acessed during update and reconcile
@@ -303,6 +349,11 @@ private:
     std::set<QString> _localDiscoveryPaths;
 
     QStringList _leadingAndTrailingSpacesFilesAllowed;
+
+    // Hash of files we have scheduled for later sync runs, along with their lock expire times
+    // NOTE: not necessarily the time at which their sync run will take place
+    QHash<QString, long long> _filesScheduledForLaterSync;
+    QVector<QSharedPointer<QTimer>> _scheduledSyncTimers;
 };
 }