(0000948)
eblake (manager)
2011-08-31 19:03
|
I've completed my assigned research assignment.
The Linux implementation of locks maintains a single circular list of
all locks held for a file description. The list is kept grouped by
process id, and for the portion of the list associated with a given
process, each lock is arranged in ascending byte order (since a single
process cannot have overlapping lock ranges), which makes it possible
to split or merge locks as new locks are created by the same process.
When adding a new lock for a process not already in the list, the
addition is done according to the sequence in which the lock was
requested. And when searching for a conflict from other processes,
the search ends on the first hit - this hit will necessarily be the
lowest conflicting locked byte range of the given l_pid returned,
but is not necessarily the lowest overall byte range of any other
conflict from processes later in the list. This also implies that
the lock returned is not necessarily the chronologically oldest
conflicting lock, as the list is not maintained by lock age (age
plays a role to some degree, in that locks tied to one process
appear in the age that the process grabbed its first lock, but
the process' first lock is not necessarily the conflicting lock).
Given this fact, I don't think we can reliably impose any constraints
on which lock is returned, and the Desired Action is as good as we
can do (portable applications cannot be relying on any particular
criteria for which lock is returned, whether that be lowest byte
range, lowest pid, oldest lock, or other mechanism).
I used the following program to demonstrate that on Linux, the lock
returned is not always the lowest byte range, nor always belonging
to the lowest pid. As written, the test only creates one lock per
process, so the results does show the chronologically oldest lock,
but I imagine that further tweaks to the test could also disprove
that pattern. Meanwhile, the same program on Cygwin gives different
results on the conditions for when the higher byte range is given,
and is not always the chronologically oldest lock.
#include <fcntl.h>
#include <errno.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/wait.h>
int main(int argc, char **argv) {
if (argc < 3)
return 1;
/* Create three processes, then sort by pid in each. The parent and
the second child can easily learn all three pids, but the first
child only knows two of the pids, so we pass the third over a
pipe. */
pid_t pids[3] = { 0 }; // All three pids, eventually sorted.
pid_t parent = pids[0] = getpid(); // Valid in all processes
pid_t child1 = 0; // Valid only in parent
pid_t child2 = 0; // Valid only in parent
int fds[2]; // Communicate from parent to child1
char name[] = "lockXXXXXX";
int file = mkstemp(name);
struct flock fl;
if (file < 0)
return 2;
if (pipe(fds) < 0)
return 3;
pid_t pid = fork();
if (pid < 0)
return 4;
if (pid) { // parent
child1 = pids[1] = pid;
if (close(fds[0]) < 0)
return 5;
pid = fork();
if (pid < 0)
return 6;
if (pid) { // still parent
child2 = pids[2] = pid;
if (dprintf(fds[1], "%ld\n", (long)pid) < 0)
return 7;
if (close(fds[1]) < 0)
return 8;
} else { // child 2
pids[2] = getpid();
if (close(fds[1]) < 0)
return 9;
}
} else { // child 1
pids[1] = getpid();
if (close(fds[1]) < 0)
return 10;
char buf[20];
if (read(fds[0], buf, sizeof(buf)) < 0)
return 11;
pids[2] = strtoull(buf, NULL, 10);
if (!pids[2])
return 12;
if (close(fds[0]) < 0)
return 13;
}
/* All three processes, start by sorting pids. */
if (pids[1] < pids[0]) {
pid = pids[1];
pids[1] = pids[0];
pids[0] = pid;
}
if (pids[2] < pids[1]) {
pid = pids[2];
pids[2] = pids[1];
pids[1] = pid;
}
if (pids[1] < pids[0]) {
pid = pids[1];
pids[1] = pids[0];
pids[0] = pid;
}
/* Now for the test, based on argv. The test uses three steps,
where two processes create distinct read locks, then the third
process queries whether a write lock that overlaps both read
locks would succeed. argv[1] controls which order the processes
run in (6 possibilities), and argv[2] controls whether the higher
or lower read lock will be created first (2 possibilities). */
switch (atoi(argv[1])) {
case 0: case 1: pid = pids[0]; break;
case 2: case 3: pid = pids[1]; break;
case 4: case 5: pid = pids[2]; break;
}
if (getpid() == pid) {
// create read lock 1
fl.l_type = F_RDLCK;
fl.l_whence = SEEK_SET;
fl.l_start = atoi(argv[2]) ? 2 : 1;
fl.l_len = 1;
if (fcntl(file, F_SETLK, &fl) < 0)
return 14;
printf("pid %ld got read lock\n", (long) pid);
} else {
sleep(1);
}
switch (atoi(argv[1])) {
case 2: case 4: pid = pids[0]; break;
case 0: case 5: pid = pids[1]; break;
case 1: case 3: pid = pids[2]; break;
}
if (getpid() == pid) {
// create read lock 2
fl.l_type = F_RDLCK;
fl.l_whence = SEEK_SET;
fl.l_start = atoi(argv[2]) ? 1 : 2;
fl.l_len = 1;
if (fcntl(file, F_SETLK, &fl) < 0)
return 15;
printf("pid %ld got read lock\n", (long) pid);
} else {
sleep(1);
}
switch (atoi(argv[1])) {
case 3: case 5: pid = pids[0]; break;
case 1: case 4: pid = pids[1]; break;
case 0: case 2: pid = pids[2]; break;
}
if (getpid() == pid) {
// probe write lock
fl.l_type = F_WRLCK;
fl.l_whence = SEEK_SET;
fl.l_start = 1;
fl.l_len = 2;
if (fcntl(file, F_GETLK, &fl) < 0)
return 16;
if (fl.l_type == F_UNLCK)
return 17;
printf("pid %ld would block on offset %ld from pid %ld\n",
(long) pid, (long) fl.l_start, (long) fl.l_pid);
} else {
sleep(2);
}
/* Parent-specific cleanup (reflect non-zero status from either
child into the parent). */
if (getpid() == parent) {
int status;
if (child1) {
if (waitpid(child1, &status, 0) != child1)
return 18;
if (status) {
if (!WIFEXITED(status))
return 19;
return WEXITSTATUS(status);
}
}
if (child2) {
if (waitpid(child2, &status, 0) != child2)
return 20;
if (status) {
if (!WIFEXITED(status))
return 21;
return WEXITSTATUS(status);
}
}
if (close(file) < 0)
return 22;
if (unlink(name) < 0)
return 23;
}
/* We got here, test is complete. */
return 0;
}
On Linux:
$ for i in 0 1 2 3 4 5 ; do for j in 0 1 ; do
echo " => $i $j"; ./foo $i $j || echo "died status $?"; done ; done
=> 0 0
pid 20196 got read lock
pid 20197 got read lock
pid 20198 would block on offset 1 from pid 20196
=> 0 1
pid 20202 got read lock
pid 20203 got read lock
pid 20204 would block on offset 2 from pid 20202
=> 1 0
pid 20209 got read lock
pid 20211 got read lock
pid 20210 would block on offset 1 from pid 20209
=> 1 1
pid 20215 got read lock
pid 20217 got read lock
pid 20216 would block on offset 2 from pid 20215
=> 2 0
pid 20222 got read lock
pid 20221 got read lock
pid 20223 would block on offset 1 from pid 20222
=> 2 1
pid 20228 got read lock
pid 20227 got read lock
pid 20229 would block on offset 2 from pid 20228
=> 3 0
pid 20240 got read lock
pid 20241 got read lock
pid 20239 would block on offset 1 from pid 20240
=> 3 1
pid 20246 got read lock
pid 20247 got read lock
pid 20245 would block on offset 2 from pid 20246
=> 4 0
pid 20253 got read lock
pid 20251 got read lock
pid 20252 would block on offset 1 from pid 20253
=> 4 1
pid 20259 got read lock
pid 20257 got read lock
pid 20258 would block on offset 2 from pid 20259
=> 5 0
pid 20265 got read lock
pid 20264 got read lock
pid 20263 would block on offset 1 from pid 20265
=> 5 1
pid 20271 got read lock
pid 20270 got read lock
pid 20269 would block on offset 2 from pid 20271
On Cygwin:
$ for i in 0 1 2 3 4 5 ; do for j in 0 1 ; do
echo " => $i $j"; ./foo $i $j || echo "died status $?"; done ; done
=> 0 0
pid 1104 got read lock
pid 2452 got read lock
pid 3968 would block on offset 2 from pid 2452
=> 0 1
pid 2428 got read lock
pid 2632 got read lock
pid 2868 would block on offset 1 from pid 2632
=> 1 0
pid 2580 got read lock
pid 4024 got read lock
pid 3528 would block on offset 1 from pid 2580
=> 1 1
pid 900 got read lock
pid 3516 got read lock
pid 932 would block on offset 2 from pid 900
=> 2 0
pid 2288 got read lock
pid 1240 got read lock
pid 3840 would block on offset 2 from pid 1240
=> 2 1
pid 2660 got read lock
pid 312 got read lock
pid 3232 would block on offset 2 from pid 2660
=> 3 0
pid 3028 got read lock
pid 3864 got read lock
pid 1852 would block on offset 2 from pid 3864
=> 3 1
pid 1356 got read lock
pid 3408 got read lock
pid 552 would block on offset 2 from pid 1356
=> 4 0
pid 4080 got read lock
pid 1376 got read lock
pid 2176 would block on offset 2 from pid 1376
=> 4 1
pid 3676 got read lock
pid 692 got read lock
pid 2732 would block on offset 1 from pid 692
=> 5 0
pid 3144 got read lock
pid 2804 got read lock
pid 2784 would block on offset 2 from pid 2804
=> 5 1
pid 3912 got read lock
pid 1340 got read lock
pid 1328 would block on offset 1 from pid 1340 |