csync_exclude.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415
  1. /*
  2. * libcsync -- a library to sync a directory with another
  3. *
  4. * Copyright (c) 2008-2013 by Andreas Schneider <asn@cryptomilk.org>
  5. *
  6. * This library is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * This library is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with this library; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include "config_csync.h"
  21. #ifndef _GNU_SOURCE
  22. #define _GNU_SOURCE
  23. #endif
  24. #include <stdio.h>
  25. #include <sys/types.h>
  26. #include <sys/stat.h>
  27. #include <fcntl.h>
  28. #include <unistd.h>
  29. #include "c_lib.h"
  30. #include "c_private.h"
  31. #include "csync_private.h"
  32. #include "csync_exclude.h"
  33. #include "csync_misc.h"
  34. #define CSYNC_LOG_CATEGORY_NAME "csync.exclude"
  35. #include "csync_log.h"
  36. #ifndef WITH_UNIT_TESTING
  37. static
  38. #endif
  39. int _csync_exclude_add(c_strlist_t **inList, const char *string) {
  40. size_t i = 0;
  41. // We never want duplicates, so check whether the string is already
  42. // in the list first.
  43. if (*inList) {
  44. for (i = 0; i < (*inList)->count; ++i) {
  45. char *pattern = (*inList)->vector[i];
  46. if (c_streq(pattern, string)) {
  47. return 1;
  48. }
  49. }
  50. }
  51. return c_strlist_add_grow(inList, string);
  52. }
  53. /** Expands C-like escape sequences.
  54. *
  55. * The returned string is heap-allocated and owned by the caller.
  56. */
  57. static const char *csync_exclude_expand_escapes(const char * input)
  58. {
  59. size_t i_len = strlen(input) + 1;
  60. char *out = c_malloc(i_len); // out can only be shorter
  61. size_t i = 0;
  62. size_t o = 0;
  63. for (; i < i_len; ++i) {
  64. if (input[i] == '\\') {
  65. // at worst input[i+1] is \0
  66. switch (input[i+1]) {
  67. case '\'': out[o++] = '\''; break;
  68. case '"': out[o++] = '"'; break;
  69. case '?': out[o++] = '?'; break;
  70. case '\\': out[o++] = '\\'; break;
  71. case 'a': out[o++] = '\a'; break;
  72. case 'b': out[o++] = '\b'; break;
  73. case 'f': out[o++] = '\f'; break;
  74. case 'n': out[o++] = '\n'; break;
  75. case 'r': out[o++] = '\r'; break;
  76. case 't': out[o++] = '\t'; break;
  77. case 'v': out[o++] = '\v'; break;
  78. default:
  79. out[o++] = input[i];
  80. out[o++] = input[i+1];
  81. break;
  82. }
  83. ++i;
  84. } else {
  85. out[o++] = input[i];
  86. }
  87. }
  88. return out;
  89. }
  90. int csync_exclude_load(const char *fname, c_strlist_t **list) {
  91. int fd = -1;
  92. int i = 0;
  93. int rc = -1;
  94. int64_t size;
  95. char *buf = NULL;
  96. char *entry = NULL;
  97. mbchar_t *w_fname;
  98. if (fname == NULL) {
  99. return -1;
  100. }
  101. #ifdef _WIN32
  102. _fmode = _O_BINARY;
  103. #endif
  104. w_fname = c_utf8_path_to_locale(fname);
  105. if (w_fname == NULL) {
  106. return -1;
  107. }
  108. fd = _topen(w_fname, O_RDONLY);
  109. c_free_locale_string(w_fname);
  110. if (fd < 0) {
  111. return -1;
  112. }
  113. size = lseek(fd, 0, SEEK_END);
  114. if (size < 0) {
  115. rc = -1;
  116. goto out;
  117. }
  118. lseek(fd, 0, SEEK_SET);
  119. if (size == 0) {
  120. rc = 0;
  121. goto out;
  122. }
  123. buf = c_malloc(size + 1);
  124. if (read(fd, buf, size) != size) {
  125. rc = -1;
  126. goto out;
  127. }
  128. buf[size] = '\0';
  129. /* FIXME: Use fgets and don't add duplicates */
  130. entry = buf;
  131. for (i = 0; i < size; i++) {
  132. if (buf[i] == '\n' || buf[i] == '\r') {
  133. if (entry != buf + i) {
  134. buf[i] = '\0';
  135. if (*entry != '#') {
  136. const char *unescaped = csync_exclude_expand_escapes(entry);
  137. rc = _csync_exclude_add(list, unescaped);
  138. if( rc == 0 ) {
  139. CSYNC_LOG(CSYNC_LOG_PRIORITY_TRACE, "Adding entry: %s", unescaped);
  140. }
  141. SAFE_FREE(unescaped);
  142. if (rc < 0) {
  143. goto out;
  144. }
  145. }
  146. }
  147. entry = buf + i + 1;
  148. }
  149. }
  150. rc = 0;
  151. out:
  152. SAFE_FREE(buf);
  153. close(fd);
  154. return rc;
  155. }
  156. // See http://support.microsoft.com/kb/74496 and
  157. // https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx
  158. // Additionally, we ignore '$Recycle.Bin', see https://github.com/owncloud/client/issues/2955
  159. static const char* win_reserved_words[] = {"CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4", "COM5",
  160. "COM6", "COM7", "COM8", "COM9", "LPT1", "LPT2", "LPT3", "LPT4",
  161. "LPT5", "LPT6", "LPT7", "LPT8", "LPT9", "CLOCK$", "$Recycle.Bin" };
  162. bool csync_is_windows_reserved_word(const char* filename) {
  163. size_t win_reserve_words_len = sizeof(win_reserved_words) / sizeof(char*);
  164. size_t j;
  165. for (j = 0; j < win_reserve_words_len; j++) {
  166. int len_reserved_word = strlen(win_reserved_words[j]);
  167. int len_filename = strlen(filename);
  168. if (len_filename == 2 && filename[1] == ':') {
  169. if (filename[0] >= 'a' && filename[0] <= 'z') {
  170. return true;
  171. }
  172. if (filename[0] >= 'A' && filename[0] <= 'Z') {
  173. return true;
  174. }
  175. }
  176. if (c_strncasecmp(filename, win_reserved_words[j], len_reserved_word) == 0) {
  177. if (len_filename == len_reserved_word) {
  178. return true;
  179. }
  180. if ((len_filename > len_reserved_word) && (filename[len_reserved_word] == '.')) {
  181. return true;
  182. }
  183. }
  184. }
  185. return false;
  186. }
  187. static CSYNC_EXCLUDE_TYPE _csync_excluded_common(c_strlist_t *excludes, const char *path, int filetype, bool check_leading_dirs) {
  188. size_t i = 0;
  189. const char *bname = NULL;
  190. size_t blen = 0;
  191. char *conflict = NULL;
  192. int rc = -1;
  193. CSYNC_EXCLUDE_TYPE match = CSYNC_NOT_EXCLUDED;
  194. CSYNC_EXCLUDE_TYPE type = CSYNC_NOT_EXCLUDED;
  195. /* split up the path */
  196. bname = strrchr(path, '/');
  197. if (bname) {
  198. bname += 1; // don't include the /
  199. } else {
  200. bname = path;
  201. }
  202. blen = strlen(bname);
  203. rc = csync_fnmatch(".csync_journal.db*", bname, 0);
  204. if (rc == 0) {
  205. match = CSYNC_FILE_SILENTLY_EXCLUDED;
  206. goto out;
  207. }
  208. // check the strlen and ignore the file if its name is longer than 254 chars.
  209. // whenever changing this also check createDownloadTmpFileName
  210. if (blen > 254) {
  211. match = CSYNC_FILE_EXCLUDE_LONG_FILENAME;
  212. goto out;
  213. }
  214. #ifdef _WIN32
  215. // Windows cannot sync files ending in spaces (#2176). It also cannot
  216. // distinguish files ending in '.' from files without an ending,
  217. // as '.' is a separator that is not stored internally, so let's
  218. // not allow to sync those to avoid file loss/ambiguities (#416)
  219. if (blen > 1) {
  220. if (bname[blen-1]== ' ') {
  221. match = CSYNC_FILE_EXCLUDE_TRAILING_SPACE;
  222. goto out;
  223. } else if (bname[blen-1]== '.' ) {
  224. match = CSYNC_FILE_EXCLUDE_INVALID_CHAR;
  225. goto out;
  226. }
  227. }
  228. if (csync_is_windows_reserved_word(bname)) {
  229. match = CSYNC_FILE_EXCLUDE_INVALID_CHAR;
  230. goto out;
  231. }
  232. // Filter out characters not allowed in a filename on windows
  233. const char *p = NULL;
  234. for (p = path; *p; p++) {
  235. switch (*p) {
  236. case '\\':
  237. case ':':
  238. case '?':
  239. case '*':
  240. case '"':
  241. case '>':
  242. case '<':
  243. case '|':
  244. match = CSYNC_FILE_EXCLUDE_INVALID_CHAR;
  245. goto out;
  246. default:
  247. break;
  248. }
  249. }
  250. #endif
  251. rc = csync_fnmatch(".owncloudsync.log*", bname, 0);
  252. if (rc == 0) {
  253. match = CSYNC_FILE_SILENTLY_EXCLUDED;
  254. goto out;
  255. }
  256. /* Always ignore conflict files, not only via the exclude list */
  257. rc = csync_fnmatch("*_conflict-*", bname, 0);
  258. if (rc == 0) {
  259. match = CSYNC_FILE_SILENTLY_EXCLUDED;
  260. goto out;
  261. }
  262. if (getenv("CSYNC_CONFLICT_FILE_USERNAME")) {
  263. rc = asprintf(&conflict, "*_conflict_%s-*", getenv("CSYNC_CONFLICT_FILE_USERNAME"));
  264. if (rc < 0) {
  265. goto out;
  266. }
  267. rc = csync_fnmatch(conflict, path, 0);
  268. if (rc == 0) {
  269. match = CSYNC_FILE_SILENTLY_EXCLUDED;
  270. SAFE_FREE(conflict);
  271. goto out;
  272. }
  273. SAFE_FREE(conflict);
  274. }
  275. if( ! excludes ) {
  276. goto out;
  277. }
  278. c_strlist_t *path_components = NULL;
  279. if (check_leading_dirs) {
  280. /* Build a list of path components to check. */
  281. path_components = c_strlist_new(32);
  282. char *path_split = strdup(path);
  283. size_t len = strlen(path_split);
  284. for (i = len; ; --i) {
  285. // read backwards until a path separator is found
  286. if (i != 0 && path_split[i-1] != '/') {
  287. continue;
  288. }
  289. // check 'basename', i.e. for "/foo/bar/fi" we'd check 'fi', 'bar', 'foo'
  290. if (path_split[i] != 0) {
  291. c_strlist_add_grow(&path_components, path_split + i);
  292. }
  293. if (i == 0) {
  294. break;
  295. }
  296. // check 'dirname', i.e. for "/foo/bar/fi" we'd check '/foo/bar', '/foo'
  297. path_split[i-1] = '\0';
  298. c_strlist_add_grow(&path_components, path_split);
  299. }
  300. SAFE_FREE(path_split);
  301. }
  302. /* Loop over all exclude patterns and evaluate the given path */
  303. for (i = 0; match == CSYNC_NOT_EXCLUDED && i < excludes->count; i++) {
  304. bool match_dirs_only = false;
  305. char *pattern = excludes->vector[i];
  306. type = CSYNC_FILE_EXCLUDE_LIST;
  307. if (!pattern[0]) { /* empty pattern */
  308. continue;
  309. }
  310. /* Excludes starting with ']' means it can be cleanup */
  311. if (pattern[0] == ']') {
  312. ++pattern;
  313. if (filetype == CSYNC_FTW_TYPE_FILE) {
  314. type = CSYNC_FILE_EXCLUDE_AND_REMOVE;
  315. }
  316. }
  317. /* Check if the pattern applies to pathes only. */
  318. if (pattern[strlen(pattern)-1] == '/') {
  319. if (!check_leading_dirs && filetype == CSYNC_FTW_TYPE_FILE) {
  320. continue;
  321. }
  322. match_dirs_only = true;
  323. pattern[strlen(pattern)-1] = '\0'; /* Cut off the slash */
  324. }
  325. /* check if the pattern contains a / and if, compare to the whole path */
  326. if (strchr(pattern, '/')) {
  327. rc = csync_fnmatch(pattern, path, FNM_PATHNAME);
  328. if( rc == 0 ) {
  329. match = type;
  330. }
  331. /* if the pattern requires a dir, but path is not, its still not excluded. */
  332. if (match_dirs_only && filetype != CSYNC_FTW_TYPE_DIR) {
  333. match = CSYNC_NOT_EXCLUDED;
  334. }
  335. }
  336. /* if still not excluded, check each component and leading directory of the path */
  337. if (match == CSYNC_NOT_EXCLUDED && check_leading_dirs) {
  338. size_t j = 0;
  339. if (match_dirs_only && filetype == CSYNC_FTW_TYPE_FILE) {
  340. j = 1; // skip the first entry, which is bname
  341. }
  342. for (; j < path_components->count; ++j) {
  343. rc = csync_fnmatch(pattern, path_components->vector[j], 0);
  344. if (rc == 0) {
  345. match = type;
  346. break;
  347. }
  348. }
  349. } else if (match == CSYNC_NOT_EXCLUDED && !check_leading_dirs) {
  350. rc = csync_fnmatch(pattern, bname, 0);
  351. if (rc == 0) {
  352. match = type;
  353. }
  354. }
  355. if (match_dirs_only) {
  356. /* restore the '/' */
  357. pattern[strlen(pattern)] = '/';
  358. }
  359. }
  360. c_strlist_destroy(path_components);
  361. out:
  362. return match;
  363. }
  364. CSYNC_EXCLUDE_TYPE csync_excluded_traversal(c_strlist_t *excludes, const char *path, int filetype) {
  365. return _csync_excluded_common(excludes, path, filetype, false);
  366. }
  367. CSYNC_EXCLUDE_TYPE csync_excluded_no_ctx(c_strlist_t *excludes, const char *path, int filetype) {
  368. return _csync_excluded_common(excludes, path, filetype, true);
  369. }