blob: e2c8bd16e273f98a3fafe8cbe1d9263c63f5ad75 [file] [log] [blame]
From 6b2dc72a5ee758f00eed2be641104c8420b29dc9 Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <johannes.schindelin@gmx.de>
Date: Fri, 11 May 2018 16:03:54 +0200
Subject: is_ntfs_dotgit: match other .git files
commit e7cb0b4455c85b53aeba40f88ffddcf6d4002498 upstream.
When we started to catch NTFS short names that clash with .git, we only
looked for GIT~1. This is sufficient because we only ever clone into an
empty directory, so .git is guaranteed to be the first subdirectory or
file in that directory.
However, even with a fresh clone, .gitmodules is *not* necessarily the
first file to be written that would want the NTFS short name GITMOD~1: a
malicious repository can add .gitmodul0000 and friends, which sorts
before `.gitmodules` and is therefore checked out *first*. For that
reason, we have to test not only for ~1 short names, but for others,
too.
It's hard to just adapt the existing checks in is_ntfs_dotgit(): since
Windows 2000 (i.e., in all Windows versions still supported by Git),
NTFS short names are only generated in the <prefix>~<number> form up to
number 4. After that, a *different* prefix is used, calculated from the
long file name using an undocumented, but stable algorithm.
For example, the short name of .gitmodules would be GITMOD~1, but if it
is taken, and all of ~2, ~3 and ~4 are taken, too, the short name
GI7EBA~1 will be used. From there, collisions are handled by
incrementing the number, shortening the prefix as needed (until ~9999999
is reached, in which case NTFS will not allow the file to be created).
We'd also want to handle .gitignore and .gitattributes, which suffer
from a similar problem, using the fall-back short names GI250A~1 and
GI7D29~1, respectively.
To accommodate for that, we could reimplement the hashing algorithm, but
it is just safer and simpler to provide the known prefixes. This
algorithm has been reverse-engineered and described at
https://usn.pw/blog/gen/2015/06/09/filenames/, which is defunct but
still available via https://web.archive.org/.
These can be recomputed by running the following Perl script:
-- snip --
#! /usr/bin/perl
use warnings;
use strict;
# Does *not* work for non-ASCII file names
sub compute_short_name_hash ($) {
my $checksum = 0;
foreach (split('', $_[0])) {
$checksum = ($checksum * 0x25 + ord($_)) & 0xffff;
}
$checksum = ($checksum * 314159269) & 0xffffffff;
$checksum = 1 + (~$checksum & 0x7fffffff) if ($checksum & 0x80000000);
$checksum -= (($checksum * 1152921497) >> 60) * 1000000007;
return scalar reverse sprintf("%x", $checksum & 0xffff);
}
print compute_short_name_hash($ARGV[0]);
-- snap --
E.g., running that with the argument ".gitignore" will
result in "250a" (which then becomes "gi250a" in the code).
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
cache.h | 10 ++++++-
path.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 93 insertions(+), 1 deletion(-)
diff --git a/cache.h b/cache.h
index 4a537d686d..98144baa25 100644
--- a/cache.h
+++ b/cache.h
@@ -1074,7 +1074,15 @@ int normalize_path_copy(char *dst, const char *src);
int longest_ancestor_length(const char *path, struct string_list *prefixes);
char *strip_path_suffix(const char *path, const char *suffix);
int daemon_avoid_alias(const char *path);
-extern int is_ntfs_dotgit(const char *name);
+
+/*
+ * These functions match their is_hfs_dotgit() counterparts; see utf8.h for
+ * details.
+ */
+int is_ntfs_dotgit(const char *name);
+int is_ntfs_dotgitmodules(const char *name);
+int is_ntfs_dotgitignore(const char *name);
+int is_ntfs_dotgitattributes(const char *name);
/*
* Returns true iff "str" could be confused as a command-line option when
diff --git a/path.c b/path.c
index 2f12d8eac6..98c35e0e9c 100644
--- a/path.c
+++ b/path.c
@@ -1252,6 +1252,90 @@ int is_ntfs_dotgit(const char *name)
}
}
+static int is_ntfs_dot_generic(const char *name,
+ const char *dotgit_name,
+ size_t len,
+ const char *dotgit_ntfs_shortname_prefix)
+{
+ int saw_tilde;
+ size_t i;
+
+ if ((name[0] == '.' && !strncasecmp(name + 1, dotgit_name, len))) {
+ i = len + 1;
+only_spaces_and_periods:
+ for (;;) {
+ char c = name[i++];
+ if (!c)
+ return 1;
+ if (c != ' ' && c != '.')
+ return 0;
+ }
+ }
+
+ /*
+ * Is it a regular NTFS short name, i.e. shortened to 6 characters,
+ * followed by ~1, ... ~4?
+ */
+ if (!strncasecmp(name, dotgit_name, 6) && name[6] == '~' &&
+ name[7] >= '1' && name[7] <= '4') {
+ i = 8;
+ goto only_spaces_and_periods;
+ }
+
+ /*
+ * Is it a fall-back NTFS short name (for details, see
+ * https://en.wikipedia.org/wiki/8.3_filename?
+ */
+ for (i = 0, saw_tilde = 0; i < 8; i++)
+ if (name[i] == '\0')
+ return 0;
+ else if (saw_tilde) {
+ if (name[i] < '0' || name[i] > '9')
+ return 0;
+ } else if (name[i] == '~') {
+ if (name[++i] < '1' || name[i] > '9')
+ return 0;
+ saw_tilde = 1;
+ } else if (i >= 6)
+ return 0;
+ else if (name[i] < 0) {
+ /*
+ * We know our needles contain only ASCII, so we clamp
+ * here to make the results of tolower() sane.
+ */
+ return 0;
+ } else if (tolower(name[i]) != dotgit_ntfs_shortname_prefix[i])
+ return 0;
+
+ goto only_spaces_and_periods;
+}
+
+/*
+ * Inline helper to make sure compiler resolves strlen() on literals at
+ * compile time.
+ */
+static inline int is_ntfs_dot_str(const char *name, const char *dotgit_name,
+ const char *dotgit_ntfs_shortname_prefix)
+{
+ return is_ntfs_dot_generic(name, dotgit_name, strlen(dotgit_name),
+ dotgit_ntfs_shortname_prefix);
+}
+
+int is_ntfs_dotgitmodules(const char *name)
+{
+ return is_ntfs_dot_str(name, "gitmodules", "gi7eba");
+}
+
+int is_ntfs_dotgitignore(const char *name)
+{
+ return is_ntfs_dot_str(name, "gitignore", "gi250a");
+}
+
+int is_ntfs_dotgitattributes(const char *name)
+{
+ return is_ntfs_dot_str(name, "gitattributes", "gi7d29");
+}
+
int looks_like_command_line_option(const char *str)
{
return str && str[0] == '-';
--
2.17.0.921.gf22659ad46