diff --git a/src/file.c b/src/file.c
index 765037507a..3f134ac08f 100644
--- a/src/file.c
+++ b/src/file.c
@@ -36,6 +36,177 @@ this program.  If not, see <http://www.gnu.org/licenses/>.  */
    only work on files which have not yet been snapped. */
 int snapped_deps = 0;
 
+/* All file deps in the global namespace for which new_job() needs to wait
+ * until all commands have been processed (as via .NOTPARALLEL:).
+ * In order to be able to create all necessary dependency relationships we
+ * need the latter, it is resolved within snap_deps.
+ * These will be initialized once the first such dep is seen. */
+struct a_file_wait_late
+{
+  struct a_file_wait_late *last;
+  char **targets;
+  char **deps;
+};
+
+static struct hash_table *a_file_wait;
+static struct a_file_wait_late *a_file_wait_late;
+static int a_file_wait_snapping;
+
+static unsigned long
+a_file_wait_hash_1 (const void *key)
+{
+  return_ISTRING_HASH_1 ((const char *) key);
+}
+
+static unsigned long
+a_file_wait_hash_2 (const void *key)
+{
+  return_ISTRING_HASH_2 ((const char *) key);
+}
+
+static int
+a_file_wait_hash_cmp (const void *x, const void *y)
+{
+  return_ISTRING_COMPARE ((const char *) x, (const char *) y);
+}
+
+static int
+a_file_wait_add(struct hash_table **htpp, const void *cvp)
+{
+  int rv;
+  void **slot;
+
+  if (a_file_wait == NULL)
+    {
+      a_file_wait = xmalloc (sizeof (*a_file_wait));
+      hash_init (a_file_wait, 256,
+        &a_file_wait_hash_1, &a_file_wait_hash_2, &a_file_wait_hash_cmp);
+    }
+
+  if ((rv = (*(slot = hash_find_slot (*htpp, cvp)) == NULL)))
+    hash_insert_at (*htpp, cvp, slot);
+
+  return rv;
+}
+
+static void
+a_file_wait__late_recur (const struct a_file_wait_late *wlp, const char *dep,
+    struct hash_table *htp, size_t level)
+{
+  struct dep *dp;
+  struct file *fp;
+
+  fp = lookup_file (dep);
+  assert (fp != NULL);
+
+  /* Only need to generate possibly more for prerequistes of levels >0. */
+  if (level > 0)
+    {
+      char *buf, *cp, **cpp, *nxt_dep;
+      size_t l, buf_len, any;
+
+      l = strlen (dep);
+
+      cp = buf = xmalloc (buf_len = l + 1 +1);
+
+      memcpy (cp, dep, l);
+      cp += l;
+      *cp++ = ':';
+
+      for (any = 0, cpp = wlp->deps; (nxt_dep = *cpp) != NULL; ++cpp)
+        {
+          size_t x;
+          struct file *nxt_fp;
+
+          /* No self-references. */
+          ISTRING_COMPARE (dep, nxt_dep, x);
+          if (!x)
+            continue;
+
+          nxt_fp = lookup_file (nxt_dep);
+          assert (nxt_fp != NULL);
+
+          /* Avoid circular dependencies. */
+          for (dp = nxt_fp->deps; dp != NULL; dp = dp->next)
+            {
+              assert (dp->file != NULL);
+              ISTRING_COMPARE (dep, dp->file->name, x);
+              if (!x)
+                {
+                  nxt_fp = NULL;
+                  break;
+                }
+            }
+          if (nxt_fp == NULL)
+            continue;
+
+          l = strlen (nxt_dep);
+
+          /* (Buffer grow?) */
+          x = (size_t) (cp - buf);
+          if (buf_len - x <= l + 1 +1)
+            {
+              buf_len += l;
+              buf_len <<= 1; /* xxx overwrap */
+              buf = xrealloc (buf, buf_len);
+              cp = &buf[x];
+            }
+
+          if (any++ > 0)
+            *cp++ = ' ';
+          memcpy (cp, nxt_dep, l);
+          cp += l;
+        }
+
+      if (any)
+        {
+          *cp++ = '\0';
+          eval_buffer (buf, NULL);
+        }
+
+      free (buf);
+    }
+
+  /* We only need to recurse into prerequisites once per target. */
+  for (dp = fp->deps; dp != NULL; dp = dp->next)
+    if (dp->file == NULL)
+      continue;
+    else if (a_file_wait_add (&htp, dp->file->name))
+      a_file_wait__late_recur (wlp, dp->file->name, htp, level + 1);
+}
+
+static void
+a_file_wait_late_recur (const struct a_file_wait_late *wlp)
+{
+  struct hash_table ht;
+  const char **cpp;
+
+  hash_init (&ht, 256,
+    &a_file_wait_hash_1, &a_file_wait_hash_2, &a_file_wait_hash_cmp);
+
+  for (cpp = (const char **) wlp->targets; *cpp != NULL; ++cpp)
+    a_file_wait__late_recur (wlp, *cpp, &ht, 0);
+
+  hash_free (&ht, 0);
+}
+
+static void
+a_file_wait_recur (const char *fname, size_t level)
+{
+  struct dep *dp;
+  struct file *fp;
+
+  fp = lookup_file (fname);
+  assert (fp != NULL);
+
+  if (level == 0)
+    file_wait_add (fname);
+
+  for (dp = fp->deps; dp != NULL; dp = dp->next)
+    if (dp->name != NULL)
+      a_file_wait_recur (dp->name, level + 1);
+}
+
 /* Hash table of files the makefile knows how to make.  */
 
 static unsigned long
@@ -442,9 +613,11 @@ remove_intermediates (int sig)
 
 /* Given a string containing prerequisites (fully expanded), break it up into
    a struct dep list.  Enter each of these prereqs into the file database.
+   dep_targets_to_eval must be NULL but for first expansion pass, where it will
+   be used to create the necessary dependency relations for .WAIT:.
  */
 struct dep *
-split_prereqs (char *p)
+split_prereqs (char *p, char **dep_targets_to_eval)
 {
   struct dep *new = PARSE_FILE_SEQ (&p, struct dep, MAP_PIPE, NULL,
                                     PARSEFS_NONE);
@@ -472,6 +645,117 @@ split_prereqs (char *p)
         ood->ignore_mtime = 1;
     }
 
+  /* At each occurrance of .WAIT we want to place barriers on first pass. */
+  if (dep_targets_to_eval != NULL && !a_file_wait_snapping) {
+    struct dep *curr, *tail;
+
+    for (tail = NULL, curr = new; curr != NULL;)
+      {
+         if (strcmp (curr->name, ".WAIT") != 0)
+           {
+             tail = curr;
+             curr = curr->next;
+           }
+         else if (tail == NULL) /* xxx warn user of nonsense? */
+           new = curr = curr->next;
+         else
+           {
+             char *buf;
+             struct dep *tmp;
+             size_t all_dep_no, all_dep_len, l, all_len, late_no, late_len;
+
+             /* Enforce wait(2) for jobs for targets before barrier. */
+             all_dep_no = all_dep_len = 0;
+             for (tmp = new; tmp != curr; tmp = tmp->next)
+               {
+                 ++all_dep_no;
+                 file_wait_add (tmp->name);
+                 all_dep_len += strlen (tmp->name) + 1; /* ':' / ' ' / '\0' */
+               }
+             assert (all_dep_no > 0);
+
+             /* Take out .WAIT, noone else can deal with it. */
+             tail->next = curr = curr->next;
+
+             /* Create dependencies for targets after barrier to those before
+              * the barrier, so the normal dependency mechanism kicks in.
+              * Later we need to recurse into these in order to create such
+              * dependencies also for their prerequisites; this can happen no
+              * sooner that once we "snap": these late ones are expensive. */
+             buf = *dep_targets_to_eval;
+             all_len = (buf != NULL) ? strlen (buf) : 0;
+             late_no = late_len = 0;
+
+             for (tmp = curr; tmp != NULL; tmp = tmp->next)
+               if (strcmp (tmp->name, ".WAIT") != 0)
+                 {
+                   ++late_no;
+                   assert (strcache_iscached (tmp->name));
+                   l = strlen (tmp->name) +1;
+                   late_len += l;
+                   buf = xrealloc (buf, all_len + 1 + l);
+                   if (tmp != curr)
+                     buf[all_len++] = ' ';
+                   else if (all_len != 0)
+                     buf[all_len++] = '\n';
+                   memcpy (&buf[all_len], tmp->name, l);
+                   all_len += --l;
+                 }
+
+             if (late_no++ > 0)
+               {
+                 char **cpp, *cp;
+                 struct a_file_wait_late *wlp;
+
+                 ++all_dep_no;
+
+                 wlp = xmalloc (sizeof (struct a_file_wait_late) +
+                     (late_no * sizeof (char *)) + late_len +
+                     (all_dep_no * sizeof (char *) + all_dep_len));
+                 wlp->last = a_file_wait_late;
+                 a_file_wait_late = wlp;
+
+                 wlp->targets = cpp = (char **) &wlp[1];
+                 wlp->deps = &cpp[late_no];
+
+                 cp = (char *) &cpp[late_no + all_dep_no];
+
+                 for (tmp = curr; tmp != NULL; tmp = tmp->next)
+                   if (strcmp (tmp->name, ".WAIT") != 0)
+                     {
+                       l = strlen(tmp->name) +1;
+                       *cpp++ = cp;
+                       memcpy(cp, tmp->name, l);
+                       cp += l;
+                     }
+                 *cpp++ = NULL;
+                 assert (cpp == wlp->deps);
+
+                 buf = xrealloc (buf, all_len + all_dep_len +1);
+
+                 for (tmp = new; tmp != curr; tmp = tmp->next)
+                   {
+                     l = strlen (tmp->name) +1;
+
+                     if (tmp != new)
+                         buf[all_len++] = ' ';
+                     else
+                         buf[all_len++] = ':';
+                     memcpy (&buf[all_len], tmp->name, l);
+                     all_len += l -1;
+
+                     *cpp++ = cp;
+                     memcpy (cp, tmp->name, l);
+                     cp += l;
+                   }
+                 *cpp++ = NULL;
+               }
+
+             *dep_targets_to_eval = buf;
+           }
+      }
+  }
+
   return new;
 }
 
@@ -618,7 +902,7 @@ expand_deps (struct file *f)
       free (name);
 
       /* Parse the prerequisites and enter them into the file database.  */
-      new = enter_prereqs (split_prereqs (p), d->stem);
+      new = enter_prereqs (split_prereqs (p, NULL), d->stem);
 
       /* If there were no prereqs here (blank!) then throw this one out.  */
       if (new == 0)
@@ -645,7 +929,7 @@ struct dep *
 expand_extra_prereqs (const struct variable *extra)
 {
   struct dep *d;
-  struct dep *prereqs = extra ? split_prereqs (variable_expand (extra->value)) : NULL;
+  struct dep *prereqs = extra ? split_prereqs (variable_expand (extra->value), NULL) : NULL;
 
   for (d = prereqs; d; d = d->next)
     {
@@ -718,6 +1002,22 @@ snap_deps (void)
   struct file *f2;
   struct dep *d;
 
+  /* If we have targets to be .WAITed for, dive into the tree of prerequisites
+   * of the targets which have to .WAIT for those, as we need to create
+   * dependencies for all of these too, just as we did for the top level. */
+  a_file_wait_snapping = 1;
+  while (a_file_wait_late != NULL)
+    {
+      struct a_file_wait_late *wlp;
+
+      wlp = a_file_wait_late;
+      a_file_wait_late = wlp->last;
+
+      a_file_wait_late_recur (wlp);
+
+      free (wlp);
+    }
+
   /* Remember that we've done this.  Once we start snapping deps we can no
      longer define new targets.  */
   snapped_deps = 1;
@@ -753,6 +1053,18 @@ snap_deps (void)
       free (file_slot_0);
     }
 
+  /* If we have targets which require .WAITing, dive into their prerequisites,
+   * they need to be marked like so as well, to get cross-dependencies right */
+  if (a_file_wait != NULL)
+    {
+      char **cppx, **cpp = (char **) hash_dump (a_file_wait, 0, 0);
+
+      for (cppx = cpp; *cppx != NULL; ++cppx)
+        a_file_wait_recur (*cppx, 1);
+
+      free (cpp);
+    }
+
   /* Now manage all the special targets.  */
 
   for (f = lookup_file (".PRECIOUS"); f != 0; f = f->prev)
@@ -1203,4 +1515,18 @@ init_hash_files (void)
   hash_init (&files, 1000, file_hash_1, file_hash_2, file_hash_cmp);
 }
 
+void
+file_wait_add (const char *name)
+{
+  assert (strcache_iscached (name));
+  (void) a_file_wait_add(&a_file_wait, name);
+}
+
+int
+file_wait_is_needed (const struct file *file)
+{
+  return (a_file_wait != NULL &&
+      hash_find_item (a_file_wait, file->name) != NULL);
+}
+
 /* EOF */
diff --git a/src/filedef.h b/src/filedef.h
index 972f85331c..fa06e34cce 100644
--- a/src/filedef.h
+++ b/src/filedef.h
@@ -113,7 +113,7 @@ extern struct file *default_file;
 
 struct file *lookup_file (const char *name);
 struct file *enter_file (const char *name);
-struct dep *split_prereqs (char *prereqstr);
+struct dep *split_prereqs (char *prereqstr, char **dep_targets_to_eval);
 struct dep *enter_prereqs (struct dep *prereqs, const char *stem);
 struct dep *expand_extra_prereqs (const struct variable *extra);
 void remove_intermediates (int sig);
@@ -216,3 +216,7 @@ FILE_TIMESTAMP f_mtime (struct file *file, int search);
 
 /* Have we snapped deps yet?  */
 extern int snapped_deps;
+
+/* .WAIT logic */
+void file_wait_add (const char *name);
+int file_wait_is_needed (const struct file *file);
diff --git a/src/job.c b/src/job.c
index ee59b95b68..987ca33318 100644
--- a/src/job.c
+++ b/src/job.c
@@ -1909,7 +1909,7 @@ new_job (struct file *file)
      (This will notice if there is in fact no recipe.)  */
   start_waiting_job (c);
 
-  if (job_slots == 1 || not_parallel)
+  if (job_slots == 1 || not_parallel || file_wait_is_needed (file))
     /* Since there is only one job slot, make things run linearly.
        Wait for the child to die, setting the state to 'cs_finished'.  */
     while (file->command_state == cs_running)
diff --git a/src/read.c b/src/read.c
index c0e3315f41..56fac53ae1 100644
--- a/src/read.c
+++ b/src/read.c
@@ -148,7 +148,8 @@ static void record_files (struct nameseq *filenames, int are_also_makes,
                           const char *pattern_percent, char *depstr,
                           unsigned int cmds_started, char *commands,
                           size_t commands_idx, int two_colon,
-                          char prefix, const floc *flocp);
+                          char prefix, const floc *flocp,
+                          char **dep_targets_to_eval);
 static void record_target_var (struct nameseq *filenames, char *defn,
                                enum variable_origin origin,
                                struct vmodifiers *vmod,
@@ -566,7 +567,7 @@ parse_var_assignment (const char *line, int targvar, struct vmodifiers *vmod)
 static void
 eval (struct ebuffer *ebuf, int set_default)
 {
-  char *collapsed = 0;
+  char *collapsed = 0, *dep_targets_to_eval = 0;
   size_t collapsed_length = 0;
   size_t commands_len = 200;
   char *commands;
@@ -595,7 +596,7 @@ eval (struct ebuffer *ebuf, int set_default)
           record_files (filenames, also_make_targets, pattern,                \
                         pattern_percent, depstr,                              \
                         cmds_started, commands, commands_idx, two_colon,      \
-                        prefix, &fi);                                         \
+                        prefix, &fi, &dep_targets_to_eval);                   \
           filenames = 0;                                                      \
         }                                                                     \
       commands_idx = 0;                                                       \
@@ -1327,6 +1328,13 @@ eval (struct ebuffer *ebuf, int set_default)
 
 #undef word1eq
 
+  /* Insert possibly auto-generated target dependencies */
+  if (dep_targets_to_eval != NULL)
+    {
+      eval_buffer (dep_targets_to_eval, fstart);
+      free (dep_targets_to_eval);
+    }
+
   if (conditionals->if_cmds)
     O (fatal, fstart, _("missing 'endif'"));
 
@@ -1980,7 +1988,7 @@ record_files (struct nameseq *filenames, int are_also_makes,
               const char *pattern_percent, char *depstr,
               unsigned int cmds_started, char *commands,
               size_t commands_idx, int two_colon,
-              char prefix, const floc *flocp)
+              char prefix, const floc *flocp, char **dep_targets_to_eval)
 {
   struct commands *cmds;
   struct dep *deps;
@@ -2031,7 +2039,7 @@ record_files (struct nameseq *filenames, int are_also_makes,
         }
       else
         {
-          deps = split_prereqs (depstr);
+          deps = split_prereqs (depstr, dep_targets_to_eval);
           free (depstr);
 
           /* We'll enter static pattern prereqs later when we have the stem.
