/* elt-dirs.c: Translate a path element to its corresponding director{y,ies}.

   Copyright 1993, 1994, 1995, 1996, 1997, 2008, 2009, 2010 Karl Berry.
   Copyright 1997, 1998, 1999, 2000, 2005 Olaf Weber.

   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   This library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public License
   along with this library; if not, see <http://www.gnu.org/licenses/>.  */

#include <kpathsea/config.h>

#include <kpathsea/c-pathch.h>
#include <kpathsea/expand.h>
#include <kpathsea/fn.h>
#include <kpathsea/pathsearch.h>
#include <kpathsea/xopendir.h>

/* To avoid giving prototypes for all the routines and then their real
   definitions, we give all the subroutines first.  The entry point is
   the last routine in the file.  */

/* Make a copy of DIR (unless it's null) and save it in L.  Ensure that
   DIR ends with a DIR_SEP for the benefit of later searches.  */

static void
dir_list_add (str_llist_type *l,  const_string dir)
{
  char last_char = dir[strlen (dir) - 1];
  string saved_dir
    = IS_DIR_SEP (last_char) || IS_DEVICE_SEP (last_char)
      ? xstrdup (dir)
      : concat (dir, DIR_SEP_STRING);

  str_llist_add (l, saved_dir);
}


/* If DIR is a directory, add it to the list L.  */

static void
checked_dir_list_add (kpathsea kpse, str_llist_type *l,  const_string dir)
{
    if (kpathsea_dir_p (kpse, dir))
    dir_list_add (l, dir);
}

/* The cache.  Typically, several paths have the same element; for
   example, /usr/local/lib/texmf/fonts//.  We don't want to compute the
   expansion of such a thing more than once.  Even though we also cache
   the dir_links call, that's not enough -- without this path element
   caching as well, the execution time doubles.  */

/* Associate KEY with VALUE.  We implement the cache as a simple linear
   list, since it's unlikely to ever be more than a dozen or so elements
   long.  We don't bother to check here if PATH has already been saved;
   we always add it to our list.  We copy KEY but not VALUE; not sure
   that's right, but it seems to be all that's needed.  */

static void
cache (kpathsea kpse, const_string key,  str_llist_type *value)
{
  kpse->cache_length++;
  XRETALLOC (kpse->the_cache, kpse->cache_length, cache_entry);
  kpse->the_cache[kpse->cache_length - 1].key = xstrdup (key);
  kpse->the_cache[kpse->cache_length - 1].value = value;
}


/* To retrieve, just check the list in order.  */

static str_llist_type *
cached (kpathsea kpse, const_string key)
{
  unsigned p;

  for (p = 0; p < kpse->cache_length; p++)
    {
      if (FILESTRCASEEQ (kpse->the_cache[p].key, key))
        return kpse->the_cache[p].value;
    }

  return NULL;
}

/* Handle the magic path constructs.  */

/* Declare recursively called routine.  */
static void expand_elt (kpathsea, str_llist_type *, const_string, unsigned);


/* POST is a pointer into the original element (which may no longer be
   ELT) to just after the doubled DIR_SEP, perhaps to the null.  Append
   subdirectories of ELT (up to ELT_LENGTH, which must be a /) to
   STR_LIST_PTR.  */

#ifdef WIN32
/* Shared across recursive calls, it acts like a stack. */
static char dirname[MAX_PATH];
#endif

static void
do_subdir (kpathsea kpse, str_llist_type *str_list_ptr,  const_string elt,
              unsigned elt_length,  const_string post)
{
#ifdef WIN32
  WIN32_FIND_DATA find_file_data;
  HANDLE hnd;
  int proceed;
  int nlinks = 2;
#else
  DIR *dir;
  struct dirent *e;
#endif /* not WIN32 */
  fn_type name;

  /* Some old compilers don't allow aggregate initialization.  */
  name = fn_copy0 (elt, elt_length);

  assert (IS_DIR_SEP (elt[elt_length - 1])
          || IS_DEVICE_SEP (elt[elt_length - 1]));

#if defined (WIN32)
  strcpy(dirname, FN_STRING(name));
  strcat(dirname, "/*.*");         /* "*.*" or "*" -- seems equivalent. */
  hnd = FindFirstFile(dirname, &find_file_data);

  if (hnd == INVALID_HANDLE_VALUE) {
    fn_free(&name);
    return;
  }

  /* Include top level before subdirectories, if nothing to match.  */
  if (*post == 0)
    dir_list_add (str_list_ptr, FN_STRING (name));
  else {
    /* If we do have something to match, see if it exists.  For
       example, POST might be `pk/ljfour', and they might have a
       directory `$TEXMF/fonts/pk/ljfour' that we should find.  */
    fn_str_grow (&name, post);
    expand_elt (kpse, str_list_ptr, FN_STRING (name), elt_length);
    fn_shrink_to (&name, elt_length);
  }
  proceed = 1;
  while (proceed) {
    if (find_file_data.cFileName[0] != '.') {
      int links;

      /* Construct the potential subdirectory name.  */
      fn_str_grow (&name, find_file_data.cFileName);

      /* Maybe we have cached the leafness of this directory.
                 The function will return 0 if unknown,
                 else the actual (Unix-like) value. */
      links = kpathsea_dir_links (kpse, FN_STRING (name), 0);

      if (find_file_data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
        unsigned potential_len = FN_LENGTH (name);
        /* in any case, compute the leafness */
        nlinks++;

        /* It's a directory, so append the separator.  */
        fn_str_grow (&name, DIR_SEP_STRING);
        if (*post != 0) {
          fn_str_grow (&name, post);
          /* Unfortunately we can't check if the new element is
             a leaf directory, because we don't have a directory
             name here, we just have a path spec. This means we
             may descend into a leaf directory cm/pk, if the
             spec is ...fonts//pk//.  */
          expand_elt (kpse, str_list_ptr, FN_STRING (name), potential_len);
          fn_shrink_to (&name, potential_len);
        }
        /* Should we recurse?  To see if the subdirectory is a
           leaf, check if it has two links (one for . and one for
           ..).  This means that symbolic links to directories do
           not affect the leaf-ness.  This is arguably wrong, but
           the only alternative I know of is to stat every entry
           in the directory, and that is unacceptably slow. */

        if (links == 0 || links > 2)
          /* All criteria are met; find subdirectories.  */
        do_subdir (kpse, str_list_ptr, FN_STRING (name),
                     potential_len, post);
        else if (*post == 0)
          /* Nothing to match, no recursive subdirectories to
             look for: we're done with this branch.  Add it.  */
          dir_list_add (str_list_ptr, FN_STRING (name));
      }
      fn_shrink_to (&name, elt_length);
    }
    proceed = FindNextFile (hnd, &find_file_data);
  }
  /* Update the leafness of name. */
  kpathsea_dir_links(kpse, FN_STRING(name), nlinks);
  fn_free (&name);
  FindClose(hnd);

#else /* not WIN32 */

  /* If we can't open it, quit.  */
  dir = opendir (FN_STRING (name));
  if (dir == NULL)
    {
      fn_free (&name);
      return;
    }

  /* Include top level before subdirectories, if nothing to match.  */
  if (*post == 0)
    dir_list_add (str_list_ptr, FN_STRING (name));
  else
    { /* If we do have something to match, see if it exists.  For
         example, POST might be `pk/ljfour', and they might have a
         directory `$TEXMF/fonts/pk/ljfour' that we should find.  */
      fn_str_grow (&name, post);
      expand_elt (kpse, str_list_ptr, FN_STRING (name), elt_length);
      fn_shrink_to (&name, elt_length);
    }

  while ((e = readdir (dir)) != NULL)
    { /* If it begins with a `.', never mind.  (This allows ``hidden''
         directories that the algorithm won't find.)  */
      if (e->d_name[0] != '.')
        {
          int links;

          /* Construct the potential subdirectory name.  */
          fn_str_grow (&name, e->d_name);

          /* If we can't stat it, or if it isn't a directory, continue.  */
          links = kpathsea_dir_links (kpse, FN_STRING (name), 0);

          if (links >= 0)
            {
              unsigned potential_len = FN_LENGTH (name);

              /* It's a directory, so append the separator.  */
              fn_str_grow (&name, DIR_SEP_STRING);

              if (*post != 0)
                {
                  fn_str_grow (&name, post);
                  /* Unfortunately we can't check if the new element is
                     a leaf directory, because we don't have a directory
                     name here, we just have a path spec. This means we
                     may descend into a leaf directory cm/pk, if the
                     spec is ...fonts//pk//.  */
                  expand_elt (kpse, str_list_ptr, FN_STRING (name),
                              potential_len);
                  fn_shrink_to (&name, potential_len);
                }

              /* Should we recurse?  To see if the subdirectory is a
                 leaf, check if it has two links (one for . and one for
                 ..).  This means that symbolic links to directories do
                 not affect the leaf-ness.  This is arguably wrong, but
                 the only alternative I know of is to stat every entry
                 in the directory, and that is unacceptably slow.

                 The #ifdef here makes all this configurable at
                 compile-time, so that if we're using VMS directories or
                 some such, we can still find subdirectories, even if it
                 is much slower.  */
#ifdef ST_NLINK_TRICK
              /* With SAS/C++ 6.55 on the Amiga, stat sets the st_nlink
                 field to -1 for a file, or to 1 for a directory.
                 Cygwin 1.7 also leaves st_nlink as 1:
                 http://cygwin.com/ml/cygwin-developers/2008-04/msg00110.html
                 */
              if (links != 2)
#endif /* ST_NLINK_TRICK */
                /* All criteria are met; find subdirectories.  */
                  do_subdir (kpse, str_list_ptr, FN_STRING (name),
                           potential_len, post);
#ifdef ST_NLINK_TRICK
              else if (*post == 0)
                /* Nothing to match, no recursive subdirectories to
                   look for: we're done with this branch.  Add it.  */
                dir_list_add (str_list_ptr, FN_STRING (name));
#endif
            }

          /* Remove the directory entry we just checked from `name'.  */
          fn_shrink_to (&name, elt_length);
        }
    }

  fn_free (&name);
  xclosedir (dir);
#endif /* not WIN32 */
}


/* Assume ELT is non-empty and non-NULL.  Return list of corresponding
   directories (with no terminating NULL entry) in STR_LIST_PTR.  Start
   looking for magic constructs at START.  */

static void
expand_elt (kpathsea kpse, str_llist_type * str_list_ptr,  const_string elt,
               unsigned start)
{
  const_string dir = elt + start, post;

  while (*dir != 0)
    {
      if (IS_DIR_SEP (*dir))
        {
          /* If two or more consecutive /'s, find subdirectories.  */
          if (IS_DIR_SEP (dir[1]))
            {
              for (post = dir + 1; IS_DIR_SEP (*post); post++) ;
            do_subdir (kpse, str_list_ptr, elt, dir - elt + 1, post);
              return;
            }

          /* No special stuff at this slash.  Keep going.  */
        }

      dir++;
    }

  /* When we reach the end of ELT, it will be a normal filename.  */
  checked_dir_list_add (kpse, str_list_ptr, elt);
}

/* The first bits of a path element can be problematic because they
   look like a request to expand a whole disk, rather than a subtree.
   - It can contain a drive specification.
   - It can be a UNC path (w32, but they are part of the single
     Unix specification as well).
   The argument is a string as the function can diddle into the argument
   to canonicalize it, which tends to matter on windows platforms.
   - Always lower-case drive letters a-z, even those filesystem that
     preserve case in filenames do not care about the case of the drive
     letters.
   - Remove unneeded double slashes. The problem is Windows does not
     handle well filenames like c://dir/foo. So canonicalize the names.
     The resulting name will always be shorter than the one passed, so no
     problem.
   - If possible, we merely skip multiple leading slashes to prevent
     expanding from the root of a UNIX filesystem tree.  */

unsigned
kpathsea_normalize_path (kpathsea kpse, string elt)
{
  unsigned ret;
  unsigned i;

  if (NAME_BEGINS_WITH_DEVICE(elt)) {
    if (*elt >= 'A' && *elt <= 'Z')
      *elt += 'a' - 'A';
    for (i = 2; IS_DIR_SEP(elt[i]); ++i)
      ;
    if (i > 3)
      memmove (elt+3, elt+i, strlen(elt+i) + 1);
    ret = 2;

  } else if (IS_UNC_NAME(elt)) {
    for (ret = 2; elt[ret] && !IS_DIR_SEP(elt[ret]); ++ret)
      ;
    for (i = ret; elt[i] && IS_DIR_SEP(elt[i]); ++i)
      ;
    if (i > ret+1)
      memmove (elt+ret+1, elt+i, strlen(elt+i) + 1);

  } else {
    for (ret = 0; IS_DIR_SEP(elt[ret]); ++ret)
      ;
  }

#ifdef KPSE_DEBUG
  if (KPATHSEA_DEBUG_P (KPSE_DEBUG_STAT) && ret != 1)
    DEBUGF2 ("kpse_normalize_path (%s) => %u\n", elt, ret);
#endif /* KPSE_DEBUG */

  return ret;
}

#if defined(KPSE_COMPAT_API)
unsigned
kpse_normalize_path (string elt)
{
    return kpathsea_normalize_path(kpse_def, elt);
}
#endif

/* Here is the entry point.  Returns directory list for ELT.  */

str_llist_type *
kpathsea_element_dirs (kpathsea kpse, string elt)
{
  str_llist_type *ret;

  /* If given nothing, return nothing.  */
  if (!elt || !*elt)
    return NULL;

  /* If we've already cached the answer for ELT, return it.  */
  ret = cached (kpse, elt);
  if (ret)
    return ret;

  /* We're going to have a real directory list to return.  */
  ret = XTALLOC1 (str_llist_type);
  *ret = NULL;

  /* We handle the hard case in a subroutine.  */
  expand_elt (kpse, ret, elt, kpathsea_normalize_path (kpse, elt));

  /* Remember the directory list we just found, in case future calls are
     made with the same ELT.  */
  cache (kpse, elt, ret);

#ifdef KPSE_DEBUG
  if (KPATHSEA_DEBUG_P (KPSE_DEBUG_EXPAND))
    {
      DEBUGF1 ("path element %s =>", elt);
      if (ret)
        {
          str_llist_elt_type *e;
          for (e = *ret; e; e = STR_LLIST_NEXT (*e))
            fprintf (stderr, " %s", STR_LLIST (*e));
        }
      putc ('\n', stderr);
      fflush (stderr);
    }
#endif /* KPSE_DEBUG */

  return ret;
}

#if defined(KPSE_COMPAT_API)
str_llist_type *
kpse_element_dirs (string elt)
{
    return kpathsea_element_dirs(kpse_def, elt);
}
#endif

#ifdef TEST

void
print_element_dirs (const_string elt)
{
  str_llist_type *dirs;

  printf ("Directories of %s:\t", elt ? elt : "(nil)");
  fflush (stdout);

  dirs = kpse_element_dirs (elt);

  if (!dirs)
    printf ("(nil)");
  else
    {
      str_llist_elt_type *dir;
      for (dir = *dirs; dir; dir = STR_LLIST_NEXT (*dir))
        {
          string d = STR_LLIST (*dir);
          printf ("%s ", *d ? d : "`'");
        }
    }

  putchar ('\n');
}

int
main ()
{
  /* DEBUG_SET (DEBUG_STAT); */
  /* All lists end with NULL.  */
  print_element_dirs (NULL);    /* */
  print_element_dirs ("");      /* ./ */
  print_element_dirs ("/k");    /* */
  print_element_dirs (".//");   /* ./ ./archive/ */
  print_element_dirs (".//archive");    /* ./ ./archive/ */
#ifdef AMIGA
  print_element_dirs ("TeXMF:AmiWeb2c/texmf/fonts//"); /* lots */
  print_element_dirs ("TeXMF:AmiWeb2c/share/texmf/fonts//bakoma"); /*just one*/
  print_element_dirs ("TeXMF:AmiWeb2c/texmf/fonts//"); /* lots again [cache] */
  print_element_dirs ("TeXMF:");        /* TeXMF: */
  print_element_dirs ("TeXMF:/");       /* TeXMF: and all subdirs */
#else /* not AMIGA */
  print_element_dirs ("/tmp/fonts//");  /* no need to stat anything */
  print_element_dirs ("/usr/local/lib/tex/fonts//");      /* lots */
  print_element_dirs ("/usr/local/lib/tex/fonts//times"); /* just one */
  print_element_dirs ("/usr/local/lib/tex/fonts//"); /* lots again [cache] */
  print_element_dirs ("~karl");         /* tilde expansion */
  print_element_dirs ("$karl");         /* variable expansion */
  print_element_dirs ("~${LOGNAME}");   /* both */
#endif /* not AMIGA */
  return 0;
}

#endif /* TEST */


/*
Local variables:
standalone-compile-command: "gcc -g -I. -I.. -DTEST elt-dirs.c kpathsea.a"
End:
*/