csync_reconcile.cpp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470
  1. /*
  2. * libcsync -- a library to sync a directory with another
  3. *
  4. * Copyright (c) 2008-2013 by Andreas Schneider <asn@cryptomilk.org>
  5. *
  6. * This library is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * This library is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with this library; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include "config_csync.h"
  21. #include <assert.h>
  22. #include "csync_private.h"
  23. #include "csync_reconcile.h"
  24. #include "csync_util.h"
  25. #include "csync_rename.h"
  26. #include "common/c_jhash.h"
  27. #include "common/asserts.h"
  28. #include "common/syncjournalfilerecord.h"
  29. #include <QLoggingCategory>
  30. Q_LOGGING_CATEGORY(lcReconcile, "nextcloud.sync.csync.reconciler", QtInfoMsg)
  31. // Needed for PRIu64 on MinGW in C++ mode.
  32. #define __STDC_FORMAT_MACROS
  33. #include "inttypes.h"
  34. /* Check if a file is ignored because one parent is ignored.
  35. * return the node of the ignored directoy if it's the case, or \c nullptr if it is not ignored */
  36. static csync_file_stat_t *_csync_check_ignored(csync_s::FileMap *tree, const ByteArrayRef &path)
  37. {
  38. /* compute the size of the parent directory */
  39. int parentlen = path.size() - 1;
  40. while (parentlen > 0 && path.at(parentlen) != '/') {
  41. parentlen--;
  42. }
  43. if (parentlen <= 0) {
  44. return nullptr;
  45. }
  46. ByteArrayRef parentPath = path.left(parentlen);
  47. csync_file_stat_t *fs = tree->findFile(parentPath);
  48. if (fs) {
  49. if (fs->instruction == CSYNC_INSTRUCTION_IGNORE) {
  50. /* Yes, we are ignored */
  51. return fs;
  52. } else {
  53. /* Not ignored */
  54. return nullptr;
  55. }
  56. } else {
  57. /* Try if the parent itself is ignored */
  58. return _csync_check_ignored(tree, parentPath);
  59. }
  60. }
  61. /**
  62. * The main function in the reconcile pass.
  63. *
  64. * It's called for each entry in the local and remote files by
  65. * csync_reconcile()
  66. *
  67. * Before the reconcile phase the trees already know about changes
  68. * relative to the sync journal. This function's job is to spot conflicts
  69. * between local and remote changes and adjust the nodes accordingly.
  70. *
  71. * See doc/dev/sync-algorithm.md for an overview.
  72. *
  73. *
  74. * Older detail comment:
  75. *
  76. * We merge replicas at the file level. The merged replica contains the
  77. * superset of files that are on the local machine and server copies of
  78. * the replica. In the case where the same file is in both the local
  79. * and server copy, the file that was modified most recently is used.
  80. * This means that new files are not deleted, and updated versions of
  81. * existing files are not overwritten.
  82. *
  83. * When a file is updated, the merge algorithm compares the destination
  84. * file with the the source file. If the destination file is newer
  85. * (timestamp is newer), it is not overwritten. If both files, on the
  86. * source and the destination, have been changed, the newer file wins.
  87. */
  88. static void _csync_merge_algorithm_visitor(csync_file_stat_t *cur, CSYNC * ctx) {
  89. csync_s::FileMap *our_tree = nullptr;
  90. csync_s::FileMap *other_tree = nullptr;
  91. /* we need the opposite tree! */
  92. switch (ctx->current) {
  93. case LOCAL_REPLICA:
  94. our_tree = &ctx->local.files;
  95. other_tree = &ctx->remote.files;
  96. break;
  97. case REMOTE_REPLICA:
  98. our_tree = &ctx->remote.files;
  99. other_tree = &ctx->local.files;
  100. break;
  101. default:
  102. break;
  103. }
  104. csync_file_stat_t *other = other_tree->findFile(cur->path);
  105. if (!other) {
  106. if (ctx->current == REMOTE_REPLICA) {
  107. // The file was not found and the other tree is the local one
  108. // check if the path doesn't match a mangled file name
  109. other = other_tree->findFileMangledName(cur->path);
  110. } else {
  111. other = other_tree->findFile(cur->e2eMangledName);
  112. }
  113. }
  114. if (!other) {
  115. /* Check the renamed path as well. */
  116. other = other_tree->findFile(csync_rename_adjust_parent_path(ctx, cur->path));
  117. }
  118. if (!other) {
  119. /* Check if it is ignored */
  120. other = _csync_check_ignored(other_tree, cur->path);
  121. /* If it is ignored, other->instruction will be IGNORE so this one will also be ignored */
  122. }
  123. /* file only found on current replica */
  124. if (!other) {
  125. switch(cur->instruction) {
  126. /* file has been modified */
  127. case CSYNC_INSTRUCTION_EVAL:
  128. cur->instruction = CSYNC_INSTRUCTION_NEW;
  129. break;
  130. /* file has been removed on the opposite replica */
  131. case CSYNC_INSTRUCTION_NONE:
  132. case CSYNC_INSTRUCTION_UPDATE_METADATA:
  133. if (cur->has_ignored_files) {
  134. /* Do not remove a directory that has ignored files */
  135. break;
  136. }
  137. if (cur->child_modified) {
  138. /* re-create directory that has modified contents */
  139. cur->instruction = CSYNC_INSTRUCTION_NEW;
  140. break;
  141. }
  142. cur->instruction = CSYNC_INSTRUCTION_REMOVE;
  143. break;
  144. case CSYNC_INSTRUCTION_EVAL_RENAME: {
  145. // By default, the EVAL_RENAME decays into a NEW
  146. cur->instruction = CSYNC_INSTRUCTION_NEW;
  147. bool processedRename = false;
  148. auto renameCandidateProcessing = [&](const QByteArray &basePath) {
  149. if (processedRename)
  150. return;
  151. if (basePath.isEmpty())
  152. return;
  153. /* First, check that the file is NOT in our tree (another file with the same name was added) */
  154. if (our_tree->findFile(basePath)) {
  155. other = nullptr;
  156. qCInfo(lcReconcile, "Origin found in our tree : %s", basePath.constData());
  157. } else {
  158. /* Find the potential rename source file in the other tree.
  159. * If the renamed file could not be found in the opposite tree, that is because it
  160. * is not longer existing there, maybe because it was renamed or deleted.
  161. * The journal is cleaned up later after propagation.
  162. */
  163. other = other_tree->findFile(basePath);
  164. qCInfo(lcReconcile, "Rename origin in other tree (%s) %s",
  165. basePath.constData(), other ? "found" : "not found");
  166. }
  167. if(!other) {
  168. // Stick with the NEW
  169. return;
  170. } else if (other->instruction == CSYNC_INSTRUCTION_RENAME) {
  171. // Some other EVAL_RENAME already claimed other.
  172. // We do nothing: maybe a different candidate for
  173. // other is found as well?
  174. qCInfo(lcReconcile, "Other has already been renamed to %s",
  175. other->rename_path.constData());
  176. } else if (cur->type == ItemTypeDirectory
  177. // The local replica is reconciled first, so the remote tree would
  178. // have either NONE or UPDATE_METADATA if the remote file is safe to
  179. // move.
  180. // In the remote replica, REMOVE is also valid (local has already
  181. // been reconciled). NONE can still happen if the whole parent dir
  182. // was set to REMOVE by the local reconcile.
  183. || other->instruction == CSYNC_INSTRUCTION_NONE
  184. || other->instruction == CSYNC_INSTRUCTION_UPDATE_METADATA
  185. || other->instruction == CSYNC_INSTRUCTION_REMOVE) {
  186. qCInfo(lcReconcile, "Switching %s to RENAME to %s",
  187. other->path.constData(), cur->path.constData());
  188. other->instruction = CSYNC_INSTRUCTION_RENAME;
  189. other->rename_path = cur->path;
  190. if( !cur->file_id.isEmpty() ) {
  191. other->file_id = cur->file_id;
  192. }
  193. if (ctx->current == LOCAL_REPLICA) {
  194. // Keep the local mtime.
  195. other->modtime = cur->modtime;
  196. }
  197. other->inode = cur->inode;
  198. cur->instruction = CSYNC_INSTRUCTION_NONE;
  199. // We have consumed 'other': exit this loop to not consume another one.
  200. processedRename = true;
  201. } else if (our_tree->findFile(csync_rename_adjust_parent_path(ctx, other->path)) == cur) {
  202. // If we're here, that means that the other side's reconcile will be able
  203. // to work against cur: The filename itself didn't change, only a parent
  204. // directory was renamed! In that case it's safe to ignore the rename
  205. // since the parent directory rename will already deal with it.
  206. // Local: The remote reconcile will be able to deal with this.
  207. // Remote: The local replica has already dealt with this.
  208. // See the EVAL_RENAME case when other was found directly.
  209. qCInfo(lcReconcile, "File in a renamed directory, other side's instruction: %d",
  210. other->instruction);
  211. cur->instruction = CSYNC_INSTRUCTION_NONE;
  212. } else {
  213. // This can, for instance, happen when there was a local change in other
  214. // and the instruction in the local tree is NEW while cur has EVAL_RENAME
  215. // due to a remote move of the same file. In these scenarios we just
  216. // want the instruction to stay NEW.
  217. qCInfo(lcReconcile, "Other already has instruction %d",
  218. other->instruction);
  219. }
  220. };
  221. if (ctx->current == LOCAL_REPLICA) {
  222. /* use the old name to find the "other" node */
  223. OCC::SyncJournalFileRecord base;
  224. qCInfo(lcReconcile, "Finding rename origin through inode %" PRIu64 "",
  225. cur->inode);
  226. ctx->statedb->getFileRecordByInode(cur->inode, &base);
  227. renameCandidateProcessing(base._path);
  228. } else {
  229. ASSERT(ctx->current == REMOTE_REPLICA);
  230. // The update phase has already mapped out all dir->dir renames, check the
  231. // path that is consistent with that first. Otherwise update mappings and
  232. // reconcile mappings might disagree, leading to odd situations down the
  233. // line.
  234. auto basePath = csync_rename_adjust_full_path_source(ctx, cur->path);
  235. if (basePath != cur->path) {
  236. qCInfo(lcReconcile, "Trying rename origin by csync_rename mapping %s",
  237. basePath.constData());
  238. // We go through getFileRecordsByFileId to ensure the basePath
  239. // computed in this way also has the expected fileid.
  240. ctx->statedb->getFileRecordsByFileId(cur->file_id,
  241. [&](const OCC::SyncJournalFileRecord &base) {
  242. if (base._path == basePath)
  243. renameCandidateProcessing(basePath);
  244. });
  245. }
  246. // Also feed all the other files with the same fileid if necessary
  247. if (!processedRename) {
  248. qCInfo(lcReconcile, "Finding rename origin through file ID %s",
  249. cur->file_id.constData());
  250. ctx->statedb->getFileRecordsByFileId(cur->file_id,
  251. [&](const OCC::SyncJournalFileRecord &base) { renameCandidateProcessing(base._path); });
  252. }
  253. }
  254. break;
  255. }
  256. default:
  257. break;
  258. }
  259. } else {
  260. bool is_conflict = true;
  261. /*
  262. * file found on the other replica
  263. */
  264. switch (cur->instruction) {
  265. case CSYNC_INSTRUCTION_UPDATE_METADATA:
  266. if (other->instruction == CSYNC_INSTRUCTION_UPDATE_METADATA && ctx->current == LOCAL_REPLICA) {
  267. // Remote wins, the SyncEngine will pick relevant local metadata since the remote tree is walked last.
  268. cur->instruction = CSYNC_INSTRUCTION_NONE;
  269. }
  270. break;
  271. case CSYNC_INSTRUCTION_EVAL_RENAME:
  272. /* If the file already exist on the other side, we have a conflict.
  273. Abort the rename and consider it is a new file. */
  274. cur->instruction = CSYNC_INSTRUCTION_NEW;
  275. /* fall through */
  276. /* file on current replica is changed or new */
  277. case CSYNC_INSTRUCTION_EVAL:
  278. case CSYNC_INSTRUCTION_NEW:
  279. switch (other->instruction) {
  280. /* file on other replica is changed or new */
  281. case CSYNC_INSTRUCTION_NEW:
  282. case CSYNC_INSTRUCTION_EVAL:
  283. if (other->type == ItemTypeDirectory &&
  284. cur->type == ItemTypeDirectory) {
  285. // Folders of the same path are always considered equals
  286. is_conflict = false;
  287. } else {
  288. // If the size or mtime is different, it's definitely a conflict.
  289. is_conflict = ((other->size != cur->size) || (other->modtime != cur->modtime));
  290. // It could be a conflict even if size and mtime match!
  291. //
  292. // In older client versions we always treated these cases as a
  293. // non-conflict. This behavior is preserved in case the server
  294. // doesn't provide a content checksum.
  295. //
  296. // When it does have one, however, we do create a job, but the job
  297. // will compare hashes and avoid the download if possible.
  298. QByteArray remoteChecksumHeader =
  299. (ctx->current == REMOTE_REPLICA ? cur->checksumHeader : other->checksumHeader);
  300. if (!remoteChecksumHeader.isEmpty()) {
  301. is_conflict = true;
  302. // Do we have an UploadInfo for this?
  303. // Maybe the Upload was completed, but the connection was broken just before
  304. // we recieved the etag (Issue #5106)
  305. auto up = ctx->statedb->getUploadInfo(cur->path);
  306. if (up._valid && up._contentChecksum == remoteChecksumHeader) {
  307. // Solve the conflict into an upload, or nothing
  308. auto remoteNode = ctx->current == REMOTE_REPLICA ? cur : other;
  309. auto localNode = ctx->current == REMOTE_REPLICA ? other : cur;
  310. remoteNode->instruction = CSYNC_INSTRUCTION_NONE;
  311. localNode->instruction = up._modtime == localNode->modtime ? CSYNC_INSTRUCTION_UPDATE_METADATA : CSYNC_INSTRUCTION_SYNC;
  312. // Update the etag and other server metadata in the journal already
  313. // (We can't use a typical CSYNC_INSTRUCTION_UPDATE_METADATA because
  314. // we must not store the size/modtime from the file system)
  315. OCC::SyncJournalFileRecord rec;
  316. if (ctx->statedb->getFileRecord(remoteNode->path, &rec)) {
  317. rec._path = remoteNode->path;
  318. rec._etag = remoteNode->etag;
  319. rec._fileId = remoteNode->file_id;
  320. rec._modtime = remoteNode->modtime;
  321. rec._type = remoteNode->type;
  322. rec._fileSize = remoteNode->size;
  323. rec._remotePerm = remoteNode->remotePerm;
  324. rec._checksumHeader = remoteNode->checksumHeader;
  325. ctx->statedb->setFileRecordMetadata(rec);
  326. }
  327. break;
  328. }
  329. }
  330. // SO: If there is no checksum, we can have !is_conflict here
  331. // even though the files have different content! This is an
  332. // intentional tradeoff. Downloading and comparing files would
  333. // be technically correct in this situation but leads to too
  334. // much waste.
  335. // In particular this kind of NEW/NEW situation with identical
  336. // sizes and mtimes pops up when the local database is lost for
  337. // whatever reason.
  338. }
  339. if (ctx->current == REMOTE_REPLICA) {
  340. // If the files are considered equal, only update the DB with the etag from remote
  341. cur->instruction = is_conflict ? CSYNC_INSTRUCTION_CONFLICT : CSYNC_INSTRUCTION_UPDATE_METADATA;
  342. other->instruction = CSYNC_INSTRUCTION_NONE;
  343. } else {
  344. cur->instruction = CSYNC_INSTRUCTION_NONE;
  345. other->instruction = is_conflict ? CSYNC_INSTRUCTION_CONFLICT : CSYNC_INSTRUCTION_UPDATE_METADATA;
  346. }
  347. break;
  348. /* file on the other replica has not been modified */
  349. case CSYNC_INSTRUCTION_NONE:
  350. case CSYNC_INSTRUCTION_UPDATE_METADATA:
  351. if (cur->type != other->type) {
  352. // If the type of the entity changed, it's like NEW, but
  353. // needs to delete the other entity first.
  354. cur->instruction = CSYNC_INSTRUCTION_TYPE_CHANGE;
  355. other->instruction = CSYNC_INSTRUCTION_NONE;
  356. } else if (cur->type == ItemTypeDirectory) {
  357. cur->instruction = CSYNC_INSTRUCTION_UPDATE_METADATA;
  358. other->instruction = CSYNC_INSTRUCTION_NONE;
  359. } else {
  360. cur->instruction = CSYNC_INSTRUCTION_SYNC;
  361. other->instruction = CSYNC_INSTRUCTION_NONE;
  362. }
  363. break;
  364. case CSYNC_INSTRUCTION_IGNORE:
  365. cur->instruction = CSYNC_INSTRUCTION_IGNORE;
  366. break;
  367. default:
  368. break;
  369. }
  370. // Ensure we're not leaving discovery-only instructions
  371. // in place. This can happen, for instance, when other's
  372. // instruction is EVAL_RENAME because the parent dir was renamed.
  373. // NEW is safer than EVAL because it will end up with
  374. // propagation unless it's changed by something, and EVAL and
  375. // NEW are treated equivalently during reconcile.
  376. if (cur->instruction == CSYNC_INSTRUCTION_EVAL)
  377. cur->instruction = CSYNC_INSTRUCTION_NEW;
  378. break;
  379. default:
  380. break;
  381. }
  382. }
  383. //hide instruction NONE messages when log level is set to debug,
  384. //only show these messages on log level trace
  385. const char *repo = ctx->current == REMOTE_REPLICA ? "server" : "client";
  386. if(cur->instruction ==CSYNC_INSTRUCTION_NONE)
  387. {
  388. if(cur->type == ItemTypeDirectory)
  389. {
  390. qCDebug(lcReconcile,
  391. "%-30s %s dir: %s",
  392. csync_instruction_str(cur->instruction),
  393. repo,
  394. cur->path.constData());
  395. }
  396. else
  397. {
  398. qCDebug(lcReconcile,
  399. "%-30s %s file: %s",
  400. csync_instruction_str(cur->instruction),
  401. repo,
  402. cur->path.constData());
  403. }
  404. }
  405. else
  406. {
  407. if(cur->type == ItemTypeDirectory)
  408. {
  409. qCInfo(lcReconcile,
  410. "%-30s %s dir: %s",
  411. csync_instruction_str(cur->instruction),
  412. repo,
  413. cur->path.constData());
  414. }
  415. else
  416. {
  417. qCInfo(lcReconcile,
  418. "%-30s %s file: %s",
  419. csync_instruction_str(cur->instruction),
  420. repo,
  421. cur->path.constData());
  422. }
  423. }
  424. }
  425. void csync_reconcile_updates(CSYNC *ctx) {
  426. csync_s::FileMap *tree = nullptr;
  427. switch (ctx->current) {
  428. case LOCAL_REPLICA:
  429. tree = &ctx->local.files;
  430. break;
  431. case REMOTE_REPLICA:
  432. tree = &ctx->remote.files;
  433. break;
  434. default:
  435. break;
  436. }
  437. for (auto &pair : *tree) {
  438. _csync_merge_algorithm_visitor(pair.second.get(), ctx);
  439. }
  440. }
  441. /* vim: set ts=8 sw=2 et cindent: */