/* * qfind - a quick! "find . -type f" like program optimized for Windows. * * Description: * 1. By default, only prints non-directory file names. * 2. Prints all a directory's files, before processing its subdirectories. * 3. Does not prefix filenames with redundant "./". * * Background: * It is not currently possible to write efficient directory traversal code in * Perl on Windows. This is because you need to stat() each file returned by * readdir() to test if it is a directory when this information has already * been obtained but not used. The extra unnecessary stat() calls severely * degrade performance. This program was written to be run from a Perl script * to provide it with a faster method of finding all the files beneath a * directory than (for example) File::Find::find(). See "peg.pl" for details. * * To compile: * % gcc -O2 qfind.c * * Copyright (c) 2007-2012 Alex Davies. All rights reserved. This program * is free software; you can redistribute it and/or modify it under the * same terms as Perl itself. */ /******************************************************************************/ #define QFIND_VERSION "1.19" #define QFIND_COPYRIGHT "Copyright (c) 2007-2012 Alex Davies." /* Are we on a MS Windows platform? */ #if defined(_WIN32) && !defined(WIN32) #define WIN32 #endif #if defined(_WIN64) && !defined(WIN64) #define WIN64 #endif #if (defined(WIN32) || defined(WIN64)) && !defined(WINDOWS) #define WINDOWS #endif #if defined(WINDOWS) && defined(UNICODE) #error A UNICODE build is not supported #endif /* There are two versions of qfind: one uses Find(First|Next)File, the other * uses opendir/readdir/stat. Both can be built on Windows (the former is more * efficient and hence faster), while only the latter will work on *nix. */ #if defined(WINDOWS) && !defined(QFINDX) #define USE_FINDFILE 1 #else #define USE_FINDFILE 0 #endif /******************************************************************************/ #include #include #include #include #include #if defined(WINDOWS) #include #include #endif #if USE_FINDFILE #include #else #include #include #include #include #include #include #include #endif /******************************************************************************/ #if USE_FINDFILE #define QCODE(a, b) a #else #define QCODE(a, b) b #endif #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L #define STDC99 #endif #if !(defined(STDC99) || defined(__GNUC__) || defined(__cplusplus)) #define inline #endif #if defined(__GNUC__) #define NOINLINE __attribute__ ((noinline)) #else #define NOINLINE #endif #if defined(__cplusplus) #define CPP_CAST(type) (type) #else #define CPP_CAST(type) #endif #ifdef _DIRENT_HAVE_D_TYPE #define HAS_D_TYPE #endif #if defined(HAS_D_TYPE) && defined(WINDOWS) && !USE_FINDFILE /* Allow test compiling the HAS_D_TYPE build on Windows. */ #define d_type d_ino #define DT_UNKNOWN 0 /* NB. d_ino is always 0 on Windows. */ #define DT_FIFO 1 #define DT_CHR 2 #define DT_DIR 4 #define DT_BLK 6 #define DT_REG 8 #define DT_LNK 10 #define DT_SOCK 12 #define DT_WHT 14 #endif /******************************************************************************/ /* Encapsulate a simple stack of strings. */ struct strstack { struct { /* A single buffer containing all the strings. */ char *buf; size_t size; } str; struct { /* Store each string's offset into the buffer. */ size_t *list; size_t max; size_t next; } offset; size_t total; /* # of strings stored. */ }; /* Initial stack sizes. */ #define DIR_STACK_ELEMENTS (1 * 1024) #define DIR_STACK_BUFSIZE (8 * DIR_STACK_ELEMENTS) #define FILE_STACK_ELEMENTS (4 * 1024) #define FILE_STACK_BUFSIZE (8 * FILE_STACK_ELEMENTS) /* Use a stack of directories to allow us to print all the files * in a directory before processing its subdirectories. */ static struct strstack dir_stack; /* A stack of filenames which is sorted prior to printing. */ static struct strstack file_stack; /* A function for comparing strings. */ typedef int (*strcmp_fn_t)(const char *s1, const char *s2); /* The comparison function used to sort filenames. * We don't sort the filenames before printing by default when using * FindNextFile() because it already returns sorted results on NTFS. */ static strcmp_fn_t filename_cmp = QCODE(NULL, strcmp); #if USE_FINDFILE /* => WINDOWS */ static char slash = '/'; static char slosh = '\\'; /* The other slash. */ #else static const char slash = '/'; #endif /* Store various boolean options in a single variable. */ static unsigned int flags = 0; #if USE_FINDFILE /* Print classic 8.3 filenames eg. "filename.txt". */ #define TRUNCATED_FILENAMES (1u << 0) /* -t */ /* Skip SYSTEM|HIDDEN files & directories. */ #define SKIP_HIDDEN_FILES (1u << 1) /* -y */ #else /* By default the opendir/readdir build only prints regular files. * ie. skip sockets, character/block special files & named pipes. */ #define ANY_FILE (1u << 2) /* -a */ /* Skip non-readable files. */ #define SKIP_NON_READABLE_FILES_ALWAYS (1u << 3) /* -r */ #define SKIP_NON_READABLE_FILES_IF_FREE (1u << 4) /* -R */ #define SKIP_NON_READABLE_FILES \ (SKIP_NON_READABLE_FILES_ALWAYS | SKIP_NON_READABLE_FILES_IF_FREE) #endif /* Skip dot files & directories. */ #define SKIP_DOT_FILES (1u << 5) /* -o */ /* Silence messages to STDERR. */ #define SILENT (1u << 6) /* -s */ /* Print files matching given size restraints. */ #define MAX_SIZE_TEST (1u << 7) /* -K */ #define MIN_SIZE_TEST_ALWAYS (1u << 8) /* -z or -J */ #define MIN_SIZE_TEST_IF_FREE (1u << 9) /* -Z */ #define MIN_SIZE_TEST \ (MIN_SIZE_TEST_ALWAYS | MIN_SIZE_TEST_IF_FREE) #if USE_FINDFILE struct high_low { DWORD high, low; }; static struct high_low min_size_hl, max_size_hl; #else typedef unsigned long filesize_t; static filesize_t min_size, max_size; #endif /* Modification time tests. */ #define SKIP_NEW_FILES (1u << 10) /* -O */ #define SKIP_OLD_FILES (1u << 11) /* -N */ static time_t opt_N_time; static time_t opt_O_time; #define MTIME_TEST (SKIP_OLD_FILES | SKIP_NEW_FILES) #define CONSIDER_CTIME (1u << 12) /* -c */ /* Do we need to call is_exclude_extension() on each filename. */ #define EXTENSION_TEST (1u << 13) /* -e or -p */ /* Match/ignore files belonging to a given user/group id. */ #if !USE_FINDFILE #define UID_TEST_MATCH (1u << 14) /* -u */ #define UID_TEST_SKIP (1u << 15) /* -U */ #define GID_TEST_MATCH (1u << 16) /* -g */ #define GID_TEST_SKIP (1u << 17) /* -G */ /* One or more of -u/-U/-g/-G. */ #define ID_TEST (0 \ | UID_TEST_MATCH \ | UID_TEST_SKIP \ | GID_TEST_MATCH \ | GID_TEST_SKIP) typedef unsigned int id_t; static id_t opt_uid; static id_t opt_gid; #endif #if !USE_FINDFILE /* By default qfind ignores symolic links to directories. The -l/-L options * make qfind follow them; the difference between them is that -l will skip * directories it has already been in, while -L follows them all except where * that leads to recursion. */ #define FOLLOW_LINKS_NO_REPS (1u << 18) /* -l */ #define FOLLOW_LINKS_ALL (1u << 19) /* -L */ #define FOLLOW_LINKS (FOLLOW_LINKS_NO_REPS | FOLLOW_LINKS_ALL) /* The idea of not chdir()'ing came from GNU grep. Unfortunately It will fail * with ENAMETOOLONG if the pathname exceeds PATH_MAX. To handle arbitarily * deep directory trees means we have to chdir(). * It may still be useful on mounted filesystems. */ #define NO_CHDIR (1u << 20) /* -X */ /* Equivalent to GNU find's -xdev/-mount option. */ #define XDEV (1u << 21) /* -m */ #endif /* Qfind is really all about printing (regular) files and not directories. * However, since it is occasionally useful to have a list of directories, and * since it fairly simple to tack this capability onto qfind, here it is... * NB. The mnemonic for -+ is the "+"s seen in file/directory tree views. */ #define PRINT_DIRS_ALSO (1u << 22) /* -+ */ #define PRINT_DIRS_ONLY (1u << 23) /* -: */ #define PRINT_DIRS (PRINT_DIRS_ALSO | PRINT_DIRS_ONLY) #define DEPTH_TEST (1u << 24) /* -E */ static int depth; static int max_depth; #ifdef HAS_D_TYPE /* It is not always necessary to stat() the file if its d_type is DT_REG. */ #define NEED_TO_CALL_STAT (0 \ | MAX_SIZE_TEST \ | MIN_SIZE_TEST_ALWAYS \ | SKIP_NON_READABLE_FILES_ALWAYS \ | MTIME_TEST \ | ID_TEST) #endif /* We need to determine if a given string is among a list of other strings. * To speed lookup, after initialization the strings are sorted and then we * build a table keyed by the first character of member strings which gives the * first index in the strstack of strings with that first character. */ struct lookup_list { struct strstack ss; size_t firstchar[UCHAR_MAX + 1]; /* A primitive hash table. */ }; /* Value of firstchar[UCHAR(c)] indicating no string in the lookup_list with * a first character of 'c'. */ #define NONE ((size_t) -1) #define EMPTY_STACK ((size_t) -1) /* A list of directories to ignore eg. ".git". */ static struct lookup_list xd; /* A list of file extensions to ignore eg. "obj". */ static struct lookup_list xe; /* A list of file extensions to match. This overrides xe. */ static struct lookup_list xp; /* A list of directory paths to ignore. */ static struct lookup_list xD; /* A list of filenames to ignore. */ static struct lookup_list xv; /* Data needed by cmp_ss(), the qsort comparison function. */ static struct { struct strstack *ssp; strcmp_fn_t cmp; } sort_data; /* The name of the directory currently being processed. */ static char *dirname = NULL; static size_t dirname_bufsize; static size_t dirname_len; #define DIRNAME_BUFSIZE 280 #if !USE_FINDFILE /* The name of the starting directory. This is only needed if multiple DIR * command line arguments are specified. */ static char *start_dir = NULL; #define START_DIR_BUFSIZE 280 /* If symbolic links to directories are followed, it is possible to enter an * infinite loop if a link points back to a directory containing that link. * To avoid this we store ids of the directories processed. */ struct dirid { ino_t ino; dev_t dev; }; struct dirids { struct dirid *list; size_t size; size_t total; }; /* To reduce lookup time, use a fixed size array of lists keyed by the ino. */ #define DIRID_LIST_SIZE 31 #define DIRID_LIST_IDX(ino, dev) (((unsigned int) ino) % DIRID_LIST_SIZE) static struct dirids dirid_list[DIRID_LIST_SIZE]; /* Data stored after directory names in dir_stack. */ struct dir_info { struct dirid di; int is_a_link; }; /* To create a pointer to an arbitrary type at or beyond a given pointer 'ptr' * in a char buffer of sufficient size, we add ALIGN_PADDING(ptr) to it to give * a pointer lying on an 8 byte boundary. */ #define ALIGN_SIZE 8 #define ALIGN_PADDING(ptr) \ ((ALIGN_SIZE - (((uintptr_t) (ptr)) % ALIGN_SIZE)) % ALIGN_SIZE) /* A function type representing stat() and lstat(). */ typedef int (*stat_fn_t)(const char *f, struct stat *statp); /* The device number for the current top directory. */ static dev_t start_dev; #endif /******************************************************************************/ typedef QCODE(DWORD, int) err_t; /* GetLastError() or errno. */ static void setup(void); static int process_argv(int argc, char *argv[]); static int process_option(char *arg); static void prepare(void); static void print_help(void); static void scan_dir(void); #if USE_FINDFILE #define scan_dir2(d, cwdp) scan_dir() #else static void scan_dir2(const char *d, int *cwd_fdp); #endif static void print_files(void) NOINLINE; static void print_file_stack(void); static void flush_output(void); static void found_file(const char *f); static void write_dirname(void); static void print_directory(void); static void dirwarn(const char *msg, err_t err); static char * remove_trailing_slash(void); static char * get_error_msg(err_t err); #ifdef QFIND_CLEAN_UP static void clean_up(void); static void free_ss(struct strstack *ssp); static void free_ll(struct lookup_list *llp); #if !USE_FINDFILE static void free_dirid_list(void); #endif #endif static void terminate(int status); static size_t get_alloc_size(size_t needed); static void * Malloc(size_t size); static void * Realloc(void *ptr, size_t size); static void init_ss(struct strstack *ssp); static void alloc_ss(struct strstack *ssp, size_t elements, size_t bufsize); static void init_ll(struct lookup_list *xptr); static void init_ll_firstchar(struct lookup_list *xptr); static void push_ss(struct strstack *ssp, const char *str); static void grow_ss(struct strstack *ssp, size_t elements, size_t size); static inline char * get_str(const struct strstack *ssp, size_t i); static void clear_ss(struct strstack *ssp); static void sort_ss(struct strstack *ssp, size_t from, size_t n, strcmp_fn_t cmp); static void sort_filenames(struct strstack *ssp, size_t from, size_t n); static void remove_reps(struct lookup_list *xptr); static int stri2eq(const char *s1, const char *s2); static int cmp_ss(const void *elem1, const void *elem2); static int mystricmp(const char *s1, const char *s2); static int strnumcmp(const char *s1, const char *s2); static void parse_name_list(const char *arg, struct lookup_list *xptr); static void parse_ext_arg(char *arg, struct lookup_list *xptr); static void parse_time_arg(const char *arg, char opt); static void parse_size_arg(const char *arg, char opt); static void parse_depth_arg(const char *arg); static void fix_xD(void); #if !USE_FINDFILE static char * get_cwd(size_t bufsize); static void init_start_dir(void); static void chdir_to_start_dir(void); static void parse_id_arg(const char *arg, char opt); static void stat_failed(const char *f); static void lstat_failed(const char *f); static time_t get_mtime_from_stat_data(const struct stat *statp); static int open_cwd(void); static void chdir_up(void); static void dirdie(const char *msg, err_t err); static DIR * do_opendir(void); static int do_stat(const char *f, struct stat *statp); static int do_lstat(const char *f, struct stat *statp); static int do_stat2(const char *f, struct stat *statp, stat_fn_t stat_func); static void found_dir(const char *str, const struct stat *statp, int is_a_link); static void push_dir_ss(const char *str, const struct stat *statp, int is_a_link); static void init_dirid_list(void); static void clear_dirid_list(void); static void add_dirid_list(ino_t ino, dev_t dev); static int in_dirid_list(ino_t ino, dev_t dev); static struct dir_info * get_dir_info(const char *d); #endif static void skip_zero_size_files(const char opt); static int lookup(const struct lookup_list *xptr, const char *name); static int lookup_i(const struct lookup_list *xptr, const char *name); static int is_exclude_dir(const char *f); static int is_exclude_extension(const char *f); static int is_dirname_excluded(void); static int is_filename_excluded(const char *f); static void alloc_dirname(size_t bufsize); static void append_dirname(const char *d, size_t old_dirname_len); static int set_dirname(const char *dir); static void restore_dirname(size_t len); #if USE_FINDFILE static HANDLE get_first_file(WIN32_FIND_DATA *fDatap); static time_t filetime_to_timet(const FILETIME *filetimep); static BOOL filetime_from_time(PFILETIME pFileTime, time_t Time); static time_t get_mtime_from_find_data(const WIN32_FIND_DATA *fDatap); static time_t to_utc_time(time_t t); static void consistent_slashes(char *path); static inline int high_low_cmp(DWORD ah, DWORD al, DWORD bh, DWORD bl); #define high_low_gt(ah, al, bh, bl) (high_low_cmp(ah, al, bh, bl) > 0) #define high_low_lt(ah, al, bh, bl) (high_low_cmp(ah, al, bh, bl) < 0) #endif static inline int to_lower(int c); static inline int is_digit(int c); #define strEQ(s1, s2) (strcmp(s1, s2) == 0) #define strNE(s1, s2) (strcmp(s1, s2) != 0) /******************************************************************************/ #define Swap(T, a, b) \ do { \ T temp = a; \ a = b; \ b = temp; \ } while (0) /******************************************************************************/ #if defined(WINDOWS) #define lstat stat /* There is no lstat() on Windows. */ #endif /******************************************************************************/ #if UCHAR_MAX != 255 #undef QFIND_ASCII #endif #ifdef QFIND_ASCII /* Provide fast equivalents for 's tolower() and isdigit() assuming * the ASCII character set. */ /* Map ASCII characters to their lower case versions. */ static const unsigned char lc[256] = { 0, 1, 2, 3, 4, 5, 6, 7, /* 0-7 */ 8, 9, 10, 11, 12, 13, 14, 15, /* 8-15 */ 16, 17, 18, 19, 20, 21, 22, 23, /* 16-23 */ 24, 25, 26, 27, 28, 29, 30, 31, /* 24-31 */ 32, 33, 34, 35, 36, 37, 38, 39, /* 32-39 */ 40, 41, 42, 43, 44, 45, 46, 47, /* 40-47 */ 48, 49, 50, 51, 52, 53, 54, 55, /* 48-55 */ 56, 57, 58, 59, 60, 61, 62, 63, /* 56-63 */ 64, 97, 98, 99, 100, 101, 102, 103, /* 64-71 */ 104, 105, 106, 107, 108, 109, 110, 111, /* 72-79 */ 112, 113, 114, 115, 116, 117, 118, 119, /* 80-87 */ 120, 121, 122, 91, 92, 93, 94, 95, /* 88-95 */ 96, 97, 98, 99, 100, 101, 102, 103, /* 96-103 */ 104, 105, 106, 107, 108, 109, 110, 111, /* 104-111 */ 112, 113, 114, 115, 116, 117, 118, 119, /* 112-119 */ 120, 121, 122, 123, 124, 125, 126, 127, /* 120-127 */ 128, 129, 130, 131, 132, 133, 134, 135, /* 128-135 */ 136, 137, 138, 139, 140, 141, 142, 143, /* 136-143 */ 144, 145, 146, 147, 148, 149, 150, 151, /* 144-151 */ 152, 153, 154, 155, 156, 157, 158, 159, /* 152-159 */ 160, 161, 162, 163, 164, 165, 166, 167, /* 160-167 */ 168, 169, 170, 171, 172, 173, 174, 175, /* 168-175 */ 176, 177, 178, 179, 180, 181, 182, 183, /* 176-183 */ 184, 185, 186, 187, 188, 189, 190, 191, /* 184-191 */ 192, 193, 194, 195, 196, 197, 198, 199, /* 192-199 */ 200, 201, 202, 203, 204, 205, 206, 207, /* 200-207 */ 208, 209, 210, 211, 212, 213, 214, 215, /* 208-215 */ 216, 217, 218, 219, 220, 221, 222, 223, /* 216-223 */ 224, 225, 226, 227, 228, 229, 230, 231, /* 224-231 */ 232, 233, 234, 235, 236, 237, 238, 239, /* 232-239 */ 240, 241, 242, 243, 244, 245, 246, 247, /* 240-247 */ 248, 249, 250, 251, 252, 253, 254, 255, /* 248-255 */ }; #else #include #include #endif static inline int to_lower(int c) { #ifdef QFIND_ASCII return lc[c]; #else return tolower(c); #endif } static inline int is_digit(int c) { #ifdef QFIND_ASCII return c <= '9' && c >= '0'; #else return isdigit(c); #endif } #define UCHAR(c) ((unsigned char) (c)) #define lowercase(c) (to_lower(UCHAR(c))) #define izdigit(c) (is_digit(UCHAR(c))) /******************************************************************************/ int main(int argc, char *argv[]) { int i, first_dir_arg_idx, num_dir_args; setup(); num_dir_args = process_argv(argc, argv); prepare(); first_dir_arg_idx = argc - num_dir_args; if (!num_dir_args) argv[argc++] = (char *) ""; /* Fake an extra command line argument. */ #if !USE_FINDFILE if (num_dir_args > 1 && !(flags & NO_CHDIR)) init_start_dir(); #endif for (i = first_dir_arg_idx; i < argc; ++i) { const char *dir = argv[i]; if (!set_dirname(dir)) continue; scan_dir(); } terminate(EXIT_SUCCESS); return 0; } /******************************************************************************/ /* Initialize the stacks etc. */ static void setup(void) { init_ll(&xd); init_ll(&xD); init_ll(&xe); init_ll(&xp); init_ll(&xv); init_ss(&dir_stack); init_ss(&file_stack); #if !USE_FINDFILE init_dirid_list(); #endif #ifndef QFIND_ASCII /* Use the current locale's tolower(). */ setlocale(LC_CTYPE, ""); #endif #if defined(WINDOWS) /* Write LF newlines, not CRLFs. */ if (setmode(fileno(stdout), O_BINARY) == -1) { fprintf(stderr, "qfind: failed to binmode output: %d\n", errno); terminate(EXIT_FAILURE); } #endif } /******************************************************************************/ /* Process the command line arguments. */ static int process_argv(int argc, char *argv[]) { if (argc <= 0) return 0; ++argv; --argc; /* Skip program name (assume it is "qfind"). */ while (argc > 0) { if (argv[0][0] == '-') { char *arg = argv[0]; ++argv; --argc; if (process_option(arg)) continue; } break; } return argc; /* # of remaining non option (ie. DIR) arguments. */ } /******************************************************************************/ /* Process an option argument. */ static int process_option(char *arg) { char opt; int eoarg = 0; int in_options = 1; if (*arg++ != '-') return 0; while ((opt = *arg++) != '\0') { switch (opt) { #if USE_FINDFILE case 'b': slash = '\\'; slosh = '/'; break; case 't': flags |= TRUNCATED_FILENAMES; break; case 'y': flags |= SKIP_HIDDEN_FILES; break; #else case 'a': flags |= ANY_FILE; break; case 'g': case 'G': case 'u': case 'U': parse_id_arg(arg, opt); eoarg = 1; break; case 'l': flags |= FOLLOW_LINKS_NO_REPS; flags &= ~FOLLOW_LINKS_ALL; /* cf. -Ll */ break; case 'L': flags |= FOLLOW_LINKS_ALL; flags &= ~FOLLOW_LINKS_NO_REPS; /* cf. -lL */ break; case 'm': flags |= XDEV; break; case 'r': flags |= SKIP_NON_READABLE_FILES_ALWAYS; break; case 'R': flags |= SKIP_NON_READABLE_FILES_IF_FREE; break; #endif case 'c': flags |= CONSIDER_CTIME; break; case 'd': parse_name_list(arg, &xd); eoarg = 1; break; case 'e': case 'p': parse_ext_arg(arg, opt == 'e' ? &xe : &xp); eoarg = 1; break; case 'h': print_help(); terminate(EXIT_SUCCESS); break; case 'i': filename_cmp = mystricmp; break; case 'n': filename_cmp = strnumcmp; break; case 'o': flags |= SKIP_DOT_FILES; break; case 's': flags |= SILENT; break; case 'v': parse_name_list(arg, &xv); eoarg = 1; break; case 'x': filename_cmp = NULL; break; case 'z': case 'Z': skip_zero_size_files(opt); break; case 'D': parse_name_list(arg, &xD); eoarg = 1; break; case 'E': parse_depth_arg(arg); eoarg = 1; break; case 'J': case 'K': parse_size_arg(arg, opt); eoarg = 1; break; case 'N': case 'O': parse_time_arg(arg, opt); eoarg = 1; break; case 'S': filename_cmp = strcmp; break; case 'V': printf("qfind v%s %s\n", QFIND_VERSION, QFIND_COPYRIGHT); terminate(EXIT_SUCCESS); break; case 'X': #if !USE_FINDFILE flags |= NO_CHDIR; #endif break; case ':': flags |= PRINT_DIRS_ONLY; flags &= ~PRINT_DIRS_ALSO; /* cf. -+: */ break; case '+': flags |= PRINT_DIRS_ALSO; flags &= ~PRINT_DIRS_ONLY; /* cf. -:+ */ break; case '-': in_options = 0; break; default: fprintf(stderr, "qfind: unknown option '%c'\n", opt); terminate(EXIT_FAILURE); break; } /* end of switch */ if (eoarg) break; } return in_options; } /******************************************************************************/ /* Do any post command line argument processing setup here. */ static void prepare(void) { /* If both -N & -O, or -J & -K, are specified then assume the ranges are * intended to be between the two given values. */ if ((flags & MTIME_TEST) == MTIME_TEST) if (opt_N_time > opt_O_time) Swap(time_t, opt_N_time, opt_O_time); if ((flags & MIN_SIZE_TEST) && (flags & MAX_SIZE_TEST)) { #if USE_FINDFILE if (high_low_gt(min_size_hl.high, min_size_hl.low, max_size_hl.high, max_size_hl.low)) Swap(struct high_low, max_size_hl, min_size_hl); #else if (min_size > max_size) Swap(filesize_t, max_size, min_size); #endif } alloc_dirname(DIRNAME_BUFSIZE); alloc_ss(&dir_stack, DIR_STACK_ELEMENTS, DIR_STACK_BUFSIZE); if (filename_cmp) alloc_ss(&file_stack, FILE_STACK_ELEMENTS, FILE_STACK_BUFSIZE); if (xp.ss.total || xe.ss.total) { init_ll_firstchar(xp.ss.total ? &xp : &xe); /* xp overrides xe */ flags |= EXTENSION_TEST; } init_ll_firstchar(&xd); init_ll_firstchar(&xv); fix_xD(); init_ll_firstchar(&xD); } /******************************************************************************/ /* Print the (regular) files in a directory, then recurse on its subdirectories. * NB. dirname is either "" or terminated with a slash. */ static void scan_dir(void) { size_t i, end_total, num_dirs, this_dirname_len; int cwd_fd = -1; /* Only used if FOLLOW_LINKS. */ /* Save the state of the dir_stack. */ const size_t next_offset = dir_stack.offset.next; const size_t start_total = dir_stack.total; print_directory(); print_files(); end_total = dir_stack.total; num_dirs = end_total - start_total; if (num_dirs == 0) return; sort_filenames(&dir_stack, start_total, num_dirs); this_dirname_len = dirname_len; ++depth; for (i = start_total; i < end_total; ++i) { const char *d = get_str(&dir_stack, i); append_dirname(d, this_dirname_len); if (is_dirname_excluded()) continue; scan_dir2(d, &cwd_fd); /* recurse */ } --depth; if (cwd_fd != -1) close(cwd_fd); restore_dirname(this_dirname_len); /* Restore dir_stack to its previous state. */ dir_stack.offset.next = next_offset; dir_stack.total = start_total; } /******************************************************************************/ /* Print the files in a directory according to the command line options. * NB. dirname is either "" or terminated with a slash. */ static void print_files(void) { #if USE_FINDFILE const char *f, *s; WIN32_FIND_DATA fData; HANDLE find_han; if ((find_han = get_first_file(&fData)) == INVALID_HANDLE_VALUE) { dirwarn("", GetLastError()); return; } while (1) { if (!*fData.cAlternateFileName) f = fData.cFileName; else if (flags & TRUNCATED_FILENAMES) f = fData.cAlternateFileName; else { /* Any "?"s in cFileName indicate characters not in current code * page; in which case use cAlternateFileName instead. */ f = fData.cFileName; for (s = f; *s; ++s) { if (*s == '?') { f = fData.cAlternateFileName; break; } } } if ((flags & SKIP_DOT_FILES) && f[0] == '.') goto next_file; if ((flags & SKIP_HIDDEN_FILES) && (fData.dwFileAttributes & (FILE_ATTRIBUTE_HIDDEN | FILE_ATTRIBUTE_SYSTEM))) goto next_file; if (fData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { if (f[0] == '.' && (!f[1] || (f[1] == '.' && !f[2]))) ; /* Ignore "." and ".." directories. */ else if ((flags & DEPTH_TEST) && depth >= max_depth) ; else if (is_exclude_dir(f)) ; else push_ss(&dir_stack, f); goto next_file; } if (flags & PRINT_DIRS_ONLY) goto next_file; if ((flags & MIN_SIZE_TEST) && high_low_lt(fData.nFileSizeHigh, fData.nFileSizeLow, min_size_hl.high, min_size_hl.low)) goto next_file; if ((flags & MAX_SIZE_TEST) && high_low_gt(fData.nFileSizeHigh, fData.nFileSizeLow, max_size_hl.high, max_size_hl.low)) goto next_file; if (flags & MTIME_TEST) { time_t mtime = get_mtime_from_find_data(&fData); if ( ((flags & SKIP_OLD_FILES) && mtime < opt_N_time) || ((flags & SKIP_NEW_FILES) && mtime > opt_O_time)) goto next_file; } if ((flags & EXTENSION_TEST) && is_exclude_extension(f)) goto next_file; if (is_filename_excluded(f)) goto next_file; found_file(f); next_file: if (!FindNextFile(find_han, &fData)) { const DWORD err = GetLastError(); if (err != ERROR_NO_MORE_FILES) dirwarn("FindNextFile error on ", err); break; } } if (FindClose(find_han) == 0) dirwarn("FindClose error on ", GetLastError()); #else DIR *dirp; struct dirent *dp; struct stat statbuf; const char *f; int is_a_link; if ((dirp = do_opendir()) == NULL) { dirwarn("can't opendir ", errno); return; } while (1) { errno = 0; if ((dp = readdir(dirp)) == NULL) { if (errno) dirwarn("can't readdir ", errno); break; } f = dp->d_name; /* Ignore "." and ".." directories. */ if (f[0] == '.') if (!f[1] || (f[1] == '.' && !f[2]) || (flags & SKIP_DOT_FILES)) continue; #ifdef HAS_D_TYPE /* Use d_type to save calling stat() and lstat() when possible. */ switch (dp->d_type) { case DT_DIR: if (!(flags & (FOLLOW_LINKS | XDEV))) goto f_is_dir; break; case DT_FIFO: case DT_CHR: case DT_BLK: case DT_SOCK: if (!(flags & ANY_FILE)) /* We don't show non regular files. */ continue; /* No break; drop through to regular file logic. */ case DT_REG: if (!(flags & NEED_TO_CALL_STAT)) goto non_statbuf_tests; break; case DT_LNK: /* Must do a stat() to see what type of file it links to. */ case DT_UNKNOWN: default: /* Must do a stat() to see what type of file it is. */ break; case DT_WHT: /* "A whiteout is a directory entry that hides any identically named * objects in the lower layers. The semantics of a whiteout for a * given name is that a lookup on that name should return ENOENT." * So, qfind should (probably?) ignore them. */ continue; } #endif /* NB. In the case of symbolic links, qfind's file tests relate to * the file *linked to*, so use stat() and not lstat(). */ if (do_stat(f, &statbuf)) { stat_failed(f); continue; } if (S_ISDIR(statbuf.st_mode)) { if ((flags & XDEV) && statbuf.st_dev != start_dev) continue; #ifdef HAS_D_TYPE f_is_dir: #endif if ((flags & DEPTH_TEST) && depth >= max_depth) continue; if (is_exclude_dir(f)) continue; /* Need to determine if it's a link to a directory. */ #ifdef HAS_D_TYPE if (dp->d_type == DT_LNK) is_a_link = 1; else if (dp->d_type == DT_DIR) is_a_link = 0; else #endif { /* NB. we can reuse statbuf since if both stat() and lstat() * say it's a directory then they will both give the same * values of its ino/dev. */ if (do_lstat(f, &statbuf)) { lstat_failed(f); continue; } is_a_link = !S_ISDIR(statbuf.st_mode); } if (is_a_link && !(flags & FOLLOW_LINKS)) continue; found_dir(f, &statbuf, is_a_link); continue; } if (!(S_ISREG(statbuf.st_mode) || (flags & ANY_FILE))) continue; if ((flags & MIN_SIZE_TEST) && (filesize_t) statbuf.st_size < min_size) continue; if ((flags & MAX_SIZE_TEST) && (filesize_t) statbuf.st_size > max_size) continue; if ((flags & SKIP_NON_READABLE_FILES) && !(statbuf.st_mode & S_IREAD)) continue; if (flags & MTIME_TEST) { time_t mtime = get_mtime_from_stat_data(&statbuf); if ( ((flags & SKIP_OLD_FILES) && mtime < opt_N_time) || ((flags & SKIP_NEW_FILES) && mtime > opt_O_time)) continue; } if (flags & ID_TEST) { id_t uid = statbuf.st_uid; id_t gid = statbuf.st_gid; if ( ((flags & UID_TEST_MATCH) && uid != opt_uid) || ((flags & UID_TEST_SKIP) && uid == opt_uid) || ((flags & GID_TEST_MATCH) && gid != opt_gid) || ((flags & GID_TEST_SKIP) && gid == opt_gid)) continue; } #ifdef HAS_D_TYPE non_statbuf_tests: #endif if (flags & PRINT_DIRS_ONLY) continue; if ((flags & EXTENSION_TEST) && is_exclude_extension(f)) continue; if (is_filename_excluded(f)) continue; found_file(f); } if (closedir(dirp) == -1) dirwarn("can't closedir ", errno); #endif if (file_stack.total) { if (file_stack.total != EMPTY_STACK) print_file_stack(); clear_ss(&file_stack); flush_output(); } } /******************************************************************************/ /* NB. We reuse the dirname buffer for writing "{dirname}{f}\n". */ static void print_file_stack(void) { size_t i, this_dirname_len = dirname_len; /* NB. filename_cmp will be non-NULL here. */ sort_ss(&file_stack, 0, file_stack.total, filename_cmp); for (i = 0; i < file_stack.total; ++i) { const char *f = get_str(&file_stack, i); append_dirname(f, this_dirname_len); write_dirname(); } restore_dirname(this_dirname_len); } /******************************************************************************/ /* We want output to be flushed to the caller asap; but calling fflush() * after every set of fwrite()s can be unnecessary and wasteful. * This is a compromise that ensures "peg -l +1 -dR_M 1 /" shows results * in the root directory immediately. */ static void flush_output(void) { static int n = 5; if (n > 0) { fflush(stdout); --n; } } /******************************************************************************/ /* In directories containing many thousands of files it will be faster to turn * off sorting and write the filenames as we get them. */ static void found_file(const char *f) { if (filename_cmp) { push_ss(&file_stack, f); } else { size_t this_dirname_len = dirname_len; append_dirname(f, this_dirname_len); write_dirname(); restore_dirname(this_dirname_len); file_stack.total = EMPTY_STACK; /* Indicate files found & printed. */ } } /******************************************************************************/ /* Print dirname (minus its trailing slash) to stdout. */ static void write_dirname(void) { size_t written; /* Replace trailing slash with newline. */ dirname[dirname_len - 1] = '\n'; written = fwrite(dirname, sizeof(char), dirname_len, stdout); /* Check for an output error cf. "qfind | more". */ if (written != dirname_len) terminate(EXIT_FAILURE); } /******************************************************************************/ static void print_directory(void) { if (flags & PRINT_DIRS) { size_t this_dirname_len = dirname_len; if (dirname_len == 0) { /* "" -> "./" */ dirname_len = 2; dirname[0] = '.'; dirname[1] = '/'; /* NB. no need for a terminating '\0' here. */ } ++dirname_len; /* To keep the trailing slash. */ write_dirname(); restore_dirname(this_dirname_len); } } /******************************************************************************/ #if USE_FINDFILE static HANDLE get_first_file(WIN32_FIND_DATA *fDatap) { char *dirname_end = dirname + dirname_len; HANDLE find_han; /* Append a "*" to dirname so it can be used as a glob pattern. */ dirname_end[0] = '*'; dirname_end[1] = '\0'; find_han = FindFirstFile(dirname, fDatap); /* Remove temporary "*" from dirname. */ dirname_end[0] = '\0'; return find_han; } #endif /******************************************************************************/ #if !USE_FINDFILE static DIR * do_opendir(void) { const char *d; if ((flags & NO_CHDIR) && dirname_len > 0) d = dirname; else d = "."; return opendir(d); } /******************************************************************************/ static int do_stat(const char *f, struct stat *statp) { return do_stat2(f, statp, stat); } /******************************************************************************/ static int do_lstat(const char *f, struct stat *statp) { return do_stat2(f, statp, lstat); } /******************************************************************************/ static int do_stat2(const char *f, struct stat *statp, stat_fn_t stat_func) { int result; if (flags & NO_CHDIR) { size_t this_dirname_len = dirname_len; append_dirname(f, this_dirname_len); dirname[dirname_len - 1] = '\0'; /* Erase trailing slash. */ result = stat_func(dirname, statp); restore_dirname(this_dirname_len); } else { result = stat_func(f, statp); } return result; } /******************************************************************************/ /* When the stat() fails, give a different error message for the case of a * symbolic link pointing to a nonexistent file. */ static void stat_failed(const char *f) { if (!(flags & SILENT)) { int saved_errno = errno; int dangling_link = 0; if (errno == ENOENT) { struct stat statbuf; if (!do_lstat(f, &statbuf)) dangling_link = 1; /* stat() failed, but lstat() successful. */ } if (dangling_link) fprintf(stderr, "qfind: dangling link %s%s\n", dirname, f); else fprintf(stderr, "qfind: can't stat %s%s: %s\n", dirname, f, strerror(saved_errno)); } } /******************************************************************************/ static void lstat_failed(const char *f) { if (!(flags & SILENT)) fprintf(stderr, "qfind: can't lstat %s%s: %s\n", dirname, f, strerror(errno)); } /******************************************************************************/ /* There are a number of reasons why a file's ctime is different to its mtime: * - the file has been the argument of one of various system calls that * affect the inode, but not the file contents eg. chmod(), link(), * rename(), unlink() etc. * - various file archive extraction programs such as unzip create files with * the mtime of the original archived file, but sets the ctime to the time * when it was created. */ static time_t get_mtime_from_stat_data(const struct stat *statp) { time_t mt = statp->st_mtime; if (flags & CONSIDER_CTIME) { time_t ct = statp->st_ctime; if (ct > mt) mt = ct; } return mt; } /******************************************************************************/ static char * get_cwd(size_t bufsize) { char *buf = NULL; while (1) { buf = CPP_CAST(char *) Realloc(buf, bufsize); if (getcwd(buf, bufsize)) return buf; if (errno != ERANGE) { fprintf(stderr, "qfind: getcwd failed: %s\n", strerror(errno)); free(buf); terminate(EXIT_FAILURE); } bufsize *= 2; } } /******************************************************************************/ static void init_start_dir(void) { start_dir = get_cwd(START_DIR_BUFSIZE); } /******************************************************************************/ static void chdir_to_start_dir(void) { if (chdir(start_dir)) { fprintf(stderr, "qfind: can't chdir back to %s: %s\n", start_dir, strerror(errno)); terminate(EXIT_FAILURE); } } /******************************************************************************/ static void scan_dir2(const char *d, int *cwd_fdp) { ino_t ino = 0; dev_t dev = 0; if (flags & FOLLOW_LINKS) { const struct dir_info *dip = get_dir_info(d); ino = dip->di.ino; dev = dip->di.dev; if (in_dirid_list(ino, dev)) return; if (!(flags & NO_CHDIR) && dip->is_a_link && *cwd_fdp == -1) { if ((*cwd_fdp = open_cwd()) == -1) { dirwarn("can't open parent directory ", errno); return; } } add_dirid_list(ino, dev); } if (!(flags & NO_CHDIR) && chdir(d)) { dirwarn("can't chdir into ", errno); return; } scan_dir(); if (flags & FOLLOW_LINKS_ALL) /* Remove current ino/dev pair from dirid_list. */ --dirid_list[ DIRID_LIST_IDX(ino, dev) ].total; if (!(flags & NO_CHDIR)) { if (*cwd_fdp != -1) { /* NB. d not necessarily a link here. */ errno = 0; #if !defined(WINDOWS) /* No fchdir() on Windows. */ if (fchdir(*cwd_fdp)) dirdie("can't fchdir back to parent directory", errno); #endif } else chdir_up(); } } /******************************************************************************/ static int open_cwd(void) { errno = 0; return open(".", O_RDONLY /* XXX Not sure if O_LARGEFILE is necessary, but it is what GNU find does. */ #ifdef O_LARGEFILE | O_LARGEFILE #endif ); } /******************************************************************************/ static void chdir_up(void) { if (chdir("..")) dirdie("can't chdir(\"..\") from ", errno); /* XXX Have we returned to the original parent directory? * May need to perform a stat(".") and check the device and inode are the * same as those before we chdir'd into dirname. */ } /******************************************************************************/ static void dirdie(const char *msg, err_t err) { flags &= ~SILENT; dirwarn(msg, err); terminate(EXIT_FAILURE); } #endif /******************************************************************************/ /* NB. dirname is either "" (which corresponds to ".") or ends in a slash. * If the latter, we remove the slash prior to printing. */ static void dirwarn(const char *msg, err_t err) { char *slashp = NULL; const char *dir; if (flags & SILENT) return; if (*dirname) { slashp = remove_trailing_slash(); dir = dirname; } else dir = "."; if (err) fprintf(stderr, "qfind: %s%s: %s\n", msg, dir, get_error_msg(err)); else fprintf(stderr, "qfind: %s%s\n", msg, dir); if (slashp) *slashp = slash; } /******************************************************************************/ /* Remove any trailing slash from the dirname (except for the case where it's * a root directory). */ static char * remove_trailing_slash(void) { char *end = dirname + dirname_len - 1; int root_path = (dirname_len == 1 && dirname[0] == slash); char *slashp = NULL; #if defined(WINDOWS) if (dirname_len == 3 && dirname[1] == ':' && dirname[2] == slash) root_path = 1; #endif if (!root_path && *end == slash) { slashp = end; *slashp = '\0'; } return slashp; } /******************************************************************************/ static char * get_error_msg(err_t err) { #if USE_FINDFILE static char buf[200]; if (FormatMessage( /* NB. returns 0 on error. */ FORMAT_MESSAGE_FROM_SYSTEM, NULL, err, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), /* Default language. */ buf, sizeof(buf) - 1, NULL )) { /* Chomp any trailing newline from the error message. Needed! */ char *end = buf + strlen(buf) - 1; if (*end == '\n') *end = '\0'; } else { sprintf(buf, "error code %lu (FormatMessage error %lu)", (unsigned long) err, (unsigned long) GetLastError()); } return (char *) buf; #else return strerror(err); #endif } /******************************************************************************/ /* By default we leave the OS to reclaim alloc'd memory on program exit. */ static void terminate(int status) { #ifdef QFIND_CLEAN_UP clean_up(); #endif exit(status); } /******************************************************************************/ #ifdef QFIND_CLEAN_UP static void clean_up(void) { free_ll(&xd); free_ll(&xD); free_ll(&xe); free_ll(&xp); free_ll(&xv); free_ss(&dir_stack); free_ss(&file_stack); free(dirname); #if !USE_FINDFILE free(start_dir); free_dirid_list(); #endif } /******************************************************************************/ static void free_ll(struct lookup_list *llp) { free_ss(&llp->ss); } /******************************************************************************/ static void free_ss(struct strstack *ssp) { free(ssp->str.buf); free(ssp->offset.list); } /******************************************************************************/ #if !USE_FINDFILE static void free_dirid_list(void) { size_t i; for (i = 0; i < DIRID_LIST_SIZE; ++i) { free(dirid_list[i].list); } } #endif #endif /******************************************************************************/ #if USE_FINDFILE /* Need to convert between FILETIME's and time_t's. */ /* Taken from Perl's "win32/win32.c". */ #ifdef __GNUC__ #define Const64(x) x##LL #else #define Const64(x) x##i64 #endif /* Number of 100 nanosecond units from 1/1/1601 to 1/1/1970 */ #define EPOCH_BIAS Const64(116444736000000000) static time_t filetime_to_timet(const FILETIME *filetimep) { __int64 t; t = filetimep->dwHighDateTime; t <<= 32; t |= filetimep->dwLowDateTime; t -= EPOCH_BIAS; t /= 10000000; return (time_t) t; } /******************************************************************************/ /* NB. it's possible to have a file whose LastWriteTime is older than its * CreationTime eg. a file extracted from an archive retains the 'lwt' from * the original file while its 'ct' is when it was extracted. */ static time_t get_mtime_from_find_data(const WIN32_FIND_DATA *fDatap) { time_t lwt = filetime_to_timet(&fDatap->ftLastWriteTime); if (flags & CONSIDER_CTIME) { time_t ct = filetime_to_timet(&fDatap->ftCreationTime); if (ct > lwt) lwt = ct; } return lwt; } /******************************************************************************/ static BOOL filetime_from_time(PFILETIME pFileTime, time_t Time) { struct tm *pTM = localtime(&Time); SYSTEMTIME SystemTime; FILETIME LocalTime; if (pTM == NULL) return FALSE; SystemTime.wYear = pTM->tm_year + 1900; SystemTime.wMonth = pTM->tm_mon + 1; SystemTime.wDay = pTM->tm_mday; SystemTime.wHour = pTM->tm_hour; SystemTime.wMinute = pTM->tm_min; SystemTime.wSecond = pTM->tm_sec; SystemTime.wMilliseconds = 0; return SystemTimeToFileTime(&SystemTime, &LocalTime) && LocalFileTimeToFileTime(&LocalTime, pFileTime); } /******************************************************************************/ /* See Win32::UTCFileTime for the horrors of working with UTC times and the * time_t's returned by Win32's stat(). */ static time_t to_utc_time(time_t t) { FILETIME ft; time_t utc; if (!filetime_from_time(&ft, t)) { fprintf(stderr, "qfind: failed to convert time '%ld'\n", t); terminate(EXIT_FAILURE); } utc = filetime_to_timet(&ft); return utc; } #endif /******************************************************************************/ static void init_ss(struct strstack *ssp) { ssp->str.buf = NULL; ssp->str.size = 0; ssp->offset.list = NULL; ssp->offset.max = 0; ssp->offset.next = 0; ssp->total = 0; } /******************************************************************************/ static void alloc_ss(struct strstack *ssp, size_t elements, size_t bufsize) { ssp->offset.max = elements; ssp->offset.list = CPP_CAST(size_t *) Malloc( ssp->offset.max * sizeof(size_t)); ssp->str.size = bufsize; ssp->str.buf = CPP_CAST(char *) Malloc(ssp->str.size); } /******************************************************************************/ /* Place a string on the end of a strstack. */ static void push_ss(struct strstack *ssp, const char *str) { const size_t size = strlen(str) + 1; grow_ss(ssp, 1, size); memcpy(ssp->str.buf + ssp->offset.next, str, size); ssp->offset.list[ssp->total++] = ssp->offset.next; ssp->offset.next += size; } /******************************************************************************/ /* Ensure there is space in a strstack's buffers for additional strings. */ static void grow_ss(struct strstack *ssp, size_t elements, size_t size) { size_t needed; needed = elements + ssp->total; if (ssp->offset.max < needed) { ssp->offset.max = get_alloc_size(needed); ssp->offset.list = CPP_CAST(size_t *) Realloc(ssp->offset.list, ssp->offset.max * sizeof(size_t)); } needed = size + ssp->offset.next; if (ssp->str.size < needed) { ssp->str.size = get_alloc_size(needed); ssp->str.buf = CPP_CAST(char *) Realloc(ssp->str.buf, ssp->str.size); } } /******************************************************************************/ static inline char * get_str(const struct strstack *ssp, size_t i) { return ssp->str.buf + ssp->offset.list[i]; } /******************************************************************************/ static void clear_ss(struct strstack *ssp) { ssp->total = 0; ssp->offset.next = 0; } /******************************************************************************/ static void sort_ss(struct strstack *ssp, size_t from, size_t n, strcmp_fn_t cmp) { if (n > 1) { sort_data.ssp = ssp; sort_data.cmp = cmp; qsort(ssp->offset.list + from, n, sizeof(ssp->offset.list[0]), cmp_ss); } } /******************************************************************************/ static void init_ll(struct lookup_list *xptr) { const size_t n = sizeof(xptr->firstchar) / sizeof(xptr->firstchar[0]); size_t i; init_ss(&xptr->ss); for (i = 0; i < n; ++i) xptr->firstchar[i] = NONE; } /******************************************************************************/ /* Organize the strings in the stack to make their lookup in the main loop * of print_files() as efficient as possible. */ static void init_ll_firstchar(struct lookup_list *xptr) { size_t i; sort_ss(&xptr->ss, 0, xptr->ss.total, strcmp); remove_reps(xptr); for (i = 0; i < xptr->ss.total; ++i) { const char *s = get_str(&xptr->ss, i); const unsigned char uc = s[0]; if (xptr->firstchar[uc] == NONE) xptr->firstchar[uc] = i; } xptr->firstchar[0] = NONE; /* Ensure "" is not a match. */ } /******************************************************************************/ /* Remove repeated names by 'collapsing' the offset.list[]. * NB. Since the strings are sorted, repetitions will be next to each other. */ static void remove_reps(struct lookup_list *xptr) { if (xptr->ss.total > 1) { size_t i, new_total = 1; const char *cur_str = get_str(&xptr->ss, 0); for (i = 1; i < xptr->ss.total; ++i) { const size_t offset = xptr->ss.offset.list[i]; const char *str = xptr->ss.str.buf + offset; if (strNE(cur_str, str)) { xptr->ss.offset.list[new_total++] = offset; cur_str = str; } } xptr->ss.total = new_total; /* Update total to reflect new contents. */ } } /******************************************************************************/ /* Is a name in a lookup_list? */ static int lookup(const struct lookup_list *xptr, const char *name) { int result = 0; const char name_0 = name[0]; const size_t first_try = xptr->firstchar[UCHAR(name_0)]; size_t i; if (first_try != NONE) { const char name_1 = name[1]; for (i = first_try; i < xptr->ss.total; ++i) { const char *s = get_str(&xptr->ss, i); if (s[0] != name_0) break; if (s[1] == name_1) { if (!s[1] || strEQ(s + 2, name + 2)) { result = 1; break; } } else if (UCHAR(s[1]) > UCHAR(name_1)) break; } } return result; } /******************************************************************************/ /* Is a name in a lookup_list (ignoring case distinctions)? * Assumes all strings in the lookup_list are already in lowercase. */ static int lookup_i(const struct lookup_list *xptr, const char *name) { int result = 0; const char lc_name_0 = lowercase(name[0]); const size_t first_try = xptr->firstchar[UCHAR(lc_name_0)]; size_t i; if (first_try != NONE) { const char lc_name_1 = lowercase(name[1]); for (i = first_try; i < xptr->ss.total; ++i) { const char *s = get_str(&xptr->ss, i); /* NB. is lowercase */ if (s[0] != lc_name_0) break; if (s[1] == lc_name_1) { if (!lc_name_1 || stri2eq(s + 2, name + 2)) { result = 1; break; } } else if (UCHAR(s[1]) > UCHAR(lc_name_1)) break; } } return result; } /******************************************************************************/ /* Should the given filename be excluded because of its extension? * Treats the extension case insensitively. NB. since all the names in * xp & xe are already in lower case, we only need to lower case * the file extension to compare them. */ static int is_exclude_extension(const char *f) { const char *s, *ext = NULL; int found; for (s = f; *s; ++s) if (*s == '.') ext = s + 1; found = ext && lookup_i(xp.ss.total ? &xp : &xe, ext); return xp.ss.total ? !found : found; } /******************************************************************************/ /* Is the given name in the list of excluded directories? */ static int is_exclude_dir(const char *f) { return xd.ss.total && lookup(&xd, f); } /******************************************************************************/ /* Is dirname in the list of excluded paths? */ static int is_dirname_excluded(void) { return xD.ss.total && lookup(&xD, dirname); } /******************************************************************************/ /* Is the given filename in the list of excluded files? */ static int is_filename_excluded(const char *f) { return xv.ss.total && lookup(&xv, f); } /******************************************************************************/ /* Treating file extensions as case insensitive is probably the correct * approach even on platforms that are case sensitive. * Consider: "readme.txt" vs "readme.TXT" vs "readme.Txt". */ static void parse_ext_arg(char *arg, struct lookup_list *xptr) { char *s; /* NB. 'multipart' extensions such as "tar.gz" are not supported. * We could cater for "-p=.c:.h", but frankly it does not seem worth it * when it's easier to just type "-p=c:h". */ if (strchr(arg, '.')) { fprintf(stderr, "qfind: dotted extensions not supported\n"); terminate(EXIT_FAILURE); } for (s = arg; *s; ++s) *s = lowercase(*s); parse_name_list(arg, xptr); } /******************************************************************************/ /* Process -d/e/p's argument (eg. "=.git:CVS:RCS") and place the info in the * relevant lookup_list. */ static void parse_name_list(const char *arg, struct lookup_list *xptr) { size_t elements = 1, size = 1; const char *s; char *d; if (arg[0] == '=') ++arg; for (s = arg; *s; ++s) { ++size; if (*s == ':') ++elements; } /* Ensure the buffers in the xptr's strstack are large enough. */ grow_ss(&xptr->ss, elements, size); /* Copy the strings into the buffer and store their offsets. */ d = xptr->ss.str.buf + xptr->ss.offset.next; for (s = arg; 1; ++s) { if (*s == ':' || !*s) { *d++ = '\0'; xptr->ss.offset.list[xptr->ss.total++] = xptr->ss.offset.next; xptr->ss.offset.next = d - xptr->ss.str.buf; if (!*s) break; } else *d++ = *s; } } /******************************************************************************/ static void parse_time_arg(const char *arg, char opt) { static time_t time_now = 0; int time_is_since_now = 0; char *end = NULL; time_t t; if (arg[0] == '=') ++arg; /* Provide a syntax to specify times relative to now. * So "-N=#60" filters files older than 60 secs since now. */ if (arg[0] == '#') { time_is_since_now = 1; ++arg; if (!time_now) time_now = time(NULL); } errno = 0; t = strtol(arg, &end, 10); if (!*arg || errno || !end || *end) { fprintf(stderr, "qfind: bad time argument '%s'\n", arg); terminate(EXIT_FAILURE); } if (time_is_since_now) t = time_now - t; #if USE_FINDFILE else { /* Must convert the time_t from Win32's stat() into a UTC correct time_t * so comparisons's to the time values in a WIN32_FIND_DATA's are valid. * NB. to ensure comparisons are made at the 1 second granularity, * we compare time_t's and not FILETIMEs. */ t = to_utc_time(t); } #endif if (opt == 'N') { flags |= SKIP_OLD_FILES; opt_N_time = t; } else { /* 'O' */ flags |= SKIP_NEW_FILES; opt_O_time = t; } } /******************************************************************************/ /* Parse a string containing a file size. */ static void parse_size_arg(const char *arg, char opt) { #if USE_FINDFILE DWORD high, low; #else filesize_t size; #endif char *end = NULL; if (arg[0] == '=') ++arg; errno = 0; #if USE_FINDFILE { #if defined(STDC99) unsigned long long sz64 = strtoull(arg, &end, 10); high = sz64 >> 32; low = sz64 & MAXDWORD; #else unsigned long sz32 = strtoul(arg, &end, 10); high = 0; low = sz32; #endif } #else size = (filesize_t) strtoul(arg, &end, 10); #endif if (!*arg || errno || !end || *end) { fprintf(stderr, "qfind: bad size argument '%s'\n", arg); terminate(EXIT_FAILURE); } if (opt == 'J') { flags |= MIN_SIZE_TEST_ALWAYS; #if USE_FINDFILE min_size_hl.high = high; min_size_hl.low = low; #else min_size = size; #endif } else { /* 'K' */ flags |= MAX_SIZE_TEST; #if USE_FINDFILE max_size_hl.high = high; max_size_hl.low = low; #else max_size = size; #endif } } /******************************************************************************/ /* Parse a string containing an integer >= 0. */ static void parse_depth_arg(const char *arg) { char *end = NULL; if (arg[0] == '=') ++arg; errno = 0; max_depth = (int) strtol(arg, &end, 10); if (!*arg || errno || !end || *end || max_depth < 0) { fprintf(stderr, "qfind: bad depth argument '%s'\n", arg); terminate(EXIT_FAILURE); } flags |= DEPTH_TEST; } /******************************************************************************/ #if !USE_FINDFILE static void parse_id_arg(const char *arg, char opt) { char *end = NULL; id_t id; if (arg[0] == '=') ++arg; errno = 0; id = (id_t) strtoul(arg, &end, 10); if (!*arg || errno || !end || *end) { fprintf(stderr, "qfind: bad id argument '%s'\n", arg); terminate(EXIT_FAILURE); } if (opt == 'u' || opt == 'U') { flags |= (opt == 'u' ? UID_TEST_MATCH : UID_TEST_SKIP); opt_uid = id; } else { /* 'g' or 'G' */ flags |= (opt == 'g' ? GID_TEST_MATCH : GID_TEST_SKIP); opt_gid = id; } } /******************************************************************************/ /* When following links, we need to store the directory's ino/dev. */ static void found_dir(const char *str, const struct stat *statp, int is_a_link) { if (flags & FOLLOW_LINKS) push_dir_ss(str, statp, is_a_link); else push_ss(&dir_stack, str); } /******************************************************************************/ /* We store the directory's dir_info after its name in the dir_stack. */ static void push_dir_ss(const char *str, const struct stat *statp, int is_a_link) { const size_t str_size = strlen(str) + 1; const size_t needed = str_size + sizeof(struct dir_info) + ALIGN_SIZE - 1; struct dir_info *dip; size_t padding; char *s; grow_ss(&dir_stack, 1, needed); s = dir_stack.str.buf + dir_stack.offset.next; memcpy(s, str, str_size); s += str_size; /* First free byte after str. */ padding = ALIGN_PADDING(s); dir_stack.offset.list[dir_stack.total++] = dir_stack.offset.next; dir_stack.offset.next += str_size + padding + sizeof(struct dirid); dip = (struct dir_info *) (s + padding); dip->di.ino = statp->st_ino; dip->di.dev = statp->st_dev; dip->is_a_link = is_a_link; } /******************************************************************************/ /* Return a pointer to the dir_info stored after the directory name. */ static struct dir_info * get_dir_info(const char *d) { size_t padding; struct dir_info *dip; d += strlen(d) + 1; /* Advance to first free byte. */ padding = ALIGN_PADDING(d); dip = (struct dir_info *) (d + padding); return dip; } /******************************************************************************/ static void init_dirid_list(void) { size_t i; for (i = 0; i < DIRID_LIST_SIZE; ++i) { dirid_list[i].list = NULL; dirid_list[i].size = 0; dirid_list[i].total = 0; } } /******************************************************************************/ static void clear_dirid_list(void) { size_t i; for (i = 0; i < DIRID_LIST_SIZE; ++i) { dirid_list[i].total = 0; } } /******************************************************************************/ static void add_dirid_list(ino_t ino, dev_t dev) { struct dirids *disp = &dirid_list[ DIRID_LIST_IDX(ino, dev) ]; size_t needed = 1 + disp->total; if (disp->size < needed) { disp->size = get_alloc_size(needed); disp->list = CPP_CAST(struct dirid *) Realloc(disp->list, disp->size * sizeof(struct dirid)); } disp->list[disp->total].ino = ino; disp->list[disp->total].dev = dev; ++disp->total; } /******************************************************************************/ static int in_dirid_list(ino_t ino, dev_t dev) { const struct dirids *disp = &dirid_list[ DIRID_LIST_IDX(ino, dev) ]; int found = 0; size_t i; #if defined(WINDOWS) /* Windows' stat() does not return a useful ino. For testing purposes, * it's best to pretend we didn't find a match. */ return 0; #endif for (i = 0; i < disp->total; ++i) { if (disp->list[i].ino == ino && disp->list[i].dev == dev) { found = 1; break; } } return found; } #endif /******************************************************************************/ /* The directory path names in xD are compared to dirname which will have a * trailing slash. This ensures the names in xD also have a trailing slash. */ static void fix_xD(void) { const size_t total = xD.ss.total; /* The total before adding more. */ char *buf; size_t i; if (!total) return; /* Use a temporary buffer to build 'fixed' path names. */ buf = CPP_CAST(char *) Malloc(xD.ss.offset.next + 2); for (i = 0; i < total; ++i) { char *orig = get_str(&xD.ss, i); char *s, *d = buf; for (s = orig; *s; ++s) { #if USE_FINDFILE if (*s == slosh) *s = slash; #endif *d++ = *s; } if (d > buf && *(d - 1) != slash) { *d++ = slash; *d = '\0'; push_ss(&xD.ss, buf); orig[0] = '\0'; /* Erase original string. */ } } free(buf); } /******************************************************************************/ /* Skip zero size files by setting a minimum size requirement of 1. */ static void skip_zero_size_files(const char opt) { if (flags & MIN_SIZE_TEST_ALWAYS) /* cf. "qfind -G=22 -z". */ return; flags |= (opt == 'z' ? MIN_SIZE_TEST_ALWAYS : MIN_SIZE_TEST_IF_FREE); #if USE_FINDFILE min_size_hl.high = 0; min_size_hl.low = 1; #else min_size = 1; #endif } /******************************************************************************/ #if USE_FINDFILE /* Compare a high/low integer cf. the file size in a WIN32_FIND_DATA. * Returns 1 if a > b, -1 if a < b, and 0 if a == b. */ static inline int high_low_cmp(DWORD ah, DWORD al, DWORD bh, DWORD bl) { int result; if (ah == bh) { if (al > bl) result = 1; else if (al < bl) result = -1; else result = 0; } else if (ah > bh) result = 1; else result = -1; return result; } #endif /******************************************************************************/ static void sort_filenames(struct strstack *ssp, size_t from, size_t n) { if (filename_cmp) sort_ss(ssp, from, n, filename_cmp); } /******************************************************************************/ /* Test if two strings are the same (ignoring case differences) where the first * is known to be in lower case. */ static int stri2eq(const char *s1, const char *s2) { while (1) { const int c1 = UCHAR(*s1); const int c2 = lowercase(*s2); if (c1 != c2) return 0; if (!c1) return 1; ++s1; ++s2; } } /******************************************************************************/ /* Qsort comparison function for the strings inside a strstack. */ static int cmp_ss(const void *elem1, const void *elem2) { const size_t offset1 = *((const size_t *) elem1); const size_t offset2 = *((const size_t *) elem2); const char *s1 = sort_data.ssp->str.buf + offset1; const char *s2 = sort_data.ssp->str.buf + offset2; return sort_data.cmp(s1, s2); } /******************************************************************************/ #if !(UCHAR_MAX <= INT_MAX) #error The code assumes that UCHAR_MAX <= INT_MAX. /* ...in particular on machines where char and int are types of the same size, * the difference of two unsigned char values (including the sign bit) doesn't * fit in an int. */ #endif /* Perform a string comparison ignoring case except in a tie. * NB. This is different to the standard stricmp() and should not be used for * testing equivalence eg. mystricmp("ABC", "abc") != 0. */ static int mystricmp(const char *s1, const char *s2) { int result = 0; while (1) { const int c1 = UCHAR(*s1); const int c2 = UCHAR(*s2); if (c1 == c2) { if (!c1) break; } else { const int lc1 = to_lower(c1); const int lc2 = to_lower(c2); if (lc1 == lc2) { if (!result) result = c1 - c2; } else { result = lc1 - lc2; break; } } ++s1; ++s2; } return result; } /******************************************************************************/ /* A string comparison that considers numbers embedded within them. */ static int strnumcmp(const char *s1, const char *s2) { int case_diff = 0; while (1) { if (izdigit(*s1) && izdigit(*s2)) { int num_diff = 0, zeros = 0; while (*s1 == '0' && izdigit(s1[1])) { ++s1; --zeros; } while (*s2 == '0' && izdigit(s2[1])) { ++s2; ++zeros; } while (1) { /* Both *s1 and *s2 are digits. */ if (!num_diff) num_diff = UCHAR(*s1) - UCHAR(*s2); ++s1; ++s2; if (!izdigit(*s1)) { if (izdigit(*s2)) /* # in s1 has less digits, so is < # in s2. */ return -1; if (num_diff) return num_diff; if (zeros) return zeros; break; } else if (!izdigit(*s2)) /* # in s1 has more digits, so is > # in s2. */ return 1; } } { const int c1 = UCHAR(*s1); const int c2 = UCHAR(*s2); if (c1 == c2) { if (!c1) return case_diff; } else { const int lc1 = to_lower(c1); const int lc2 = to_lower(c2); if (lc1 == lc2) { if (!case_diff) case_diff = c1 - c2; } else return lc1 - lc2; } ++s1; ++s2; } } /* Unreachable. */ } /******************************************************************************/ static void alloc_dirname(size_t bufsize) { dirname_bufsize = bufsize; dirname = CPP_CAST(char *) Malloc(dirname_bufsize); } /******************************************************************************/ /* Append to dirname the given string at the given position, plus add a * trailing slash. */ static void append_dirname(const char *d, size_t old_dirname_len) { const size_t d_len = strlen(d); size_t needed; dirname_len = old_dirname_len + d_len + 1; /* We also need space for the NUL char and get_first_file() * assumes additional space to append a "*". */ needed = dirname_len + 2; if (dirname_bufsize < needed) { dirname_bufsize = get_alloc_size(needed); dirname = CPP_CAST(char *) Realloc(dirname, dirname_bufsize); } memcpy(dirname + old_dirname_len, d, d_len); dirname[dirname_len - 1] = slash; dirname[dirname_len] = '\0'; } /******************************************************************************/ #if USE_FINDFILE static void consistent_slashes(char *path) { char *s; for (s = path; *s; ++s) if (*s == slosh) *s = slash; } #endif /******************************************************************************/ /* An empty DIR argument is taken to imply scan the start directory. */ static int set_dirname(const char *dir) { #if !USE_FINDFILE if (start_dir) /* NB. => (num_dir_args > 1 && !(flags & NO_CHDIR)) */ chdir_to_start_dir(); /* Ensures relative paths work. */ if (flags & (FOLLOW_LINKS | XDEV)) { const char *d = dir[0] ? dir : "."; struct stat st; if (stat(d, &st)) { fprintf(stderr, "qfind: can't stat %s: %s\n", d, strerror(errno)); terminate(EXIT_FAILURE); } if (flags & FOLLOW_LINKS) { if (flags & FOLLOW_LINKS_ALL) clear_dirid_list(); else /* FOLLOW_LINKS_NO_REPS */ if (in_dirid_list(st.st_ino, st.st_dev)) return 0; add_dirid_list(st.st_ino, st.st_dev); } start_dev = st.st_dev; } #endif if (!dir[0]) { restore_dirname(0); } else { #if USE_FINDFILE /* Ensure no wildcard characters. */ if (strpbrk(dir, "*?")) { fprintf(stderr, "qfind: wildcards not supported: %s\n", dir); terminate(EXIT_FAILURE); } #endif append_dirname(dir, 0); #if USE_FINDFILE consistent_slashes(dirname); #endif /* Since append_dirname() appends a trailing slash, if the given dir * path already ends in a slash we need to chomp dirname's final slash. * NB. dirname_len >= 2. */ if (dirname[dirname_len - 2] == slash) dirname[--dirname_len] = '\0'; #if !USE_FINDFILE if (!(flags & NO_CHDIR) && chdir(dir)) { dirwarn("can't chdir into ", errno); return 0; } #endif } depth = 0; return 1; } /******************************************************************************/ static void restore_dirname(size_t len) { dirname[len] = '\0'; dirname_len = len; } /******************************************************************************/ /* XXX In theory, the arithmetic below could overflow. In practise, if this did * occur your filesystem would be so clogged as to make this bug in qfind the * least of your problems :-) */ static size_t get_alloc_size(size_t needed) { size_t alloc; if (needed == 0) { alloc = 8; } else { alloc = 2 * needed; if (alloc & 7) { alloc &= ~7u; alloc += 8; } } return alloc; } /******************************************************************************/ static void * Malloc(size_t size) { void *result = malloc(size); if (!result) { fprintf(stderr, "qfind: malloc(%lu) failed\n", (unsigned long) size); terminate(EXIT_FAILURE); } return result; } /******************************************************************************/ static void * Realloc(void *ptr, size_t size) { void *result = realloc(ptr, size); if (!result) { fprintf(stderr, "qfind: realloc(%lu) failed\n", (unsigned long) size); terminate(EXIT_FAILURE); } return result; } /******************************************************************************/ #if USE_FINDFILE #define F(s) s #define P(s) #else #define F(s) #define P(s) s #endif static void print_help(void) { fprintf(stderr, "Usage: qfind [OPTION...] [DIR...]\n" " Recursively prints out the regular files beneath a directory.\n" " If no DIR is specified, the current directory is assumed.\n" "OPTIONs:\n" P(" -a Print all non-directory files.\n") F(" -b Print backslashed path names.\n") " -c -N/-O uses most-recent(ctime, mtime).\n" " -d=DIR Ignore files beneath the directory DIR.\n" " Multiple directories can be specified:\n" " qfind -d=DIR1:DIR2:DIR3 -d=DIR4\n" " -D=PATH Ignore files beneath the pathname PATH.\n" " Multiple paths can be specified:\n" " qfind -D=PATH1:PATH2 -D=PATH3\n" " -e=EXT Ignore files whose extension is EXT.\n" " Multiple extensions can be specified:\n" " qfind -e=EXT1:EXT2:EXT3 -e=EXT4\n" P(" -g=GID Only print files belonging to group GID.\n") P(" -G=GID Ignore files belonging to group GID.\n") " -i Sort the filenames ignoring case distinctions.\n" P(" -l Follow links (no repetitions).\n") P(" -L Follow links.\n") P(" -m Ignore directories on other filesystems.\n") " -n Sort the filenames wrt. embedded numbers.\n" " -o Ignore 'dot' files/directories.\n" " -p=EXT Only print files with extension EXT.\n" " This option overrides -e.\n" " Multiple extensions can be specified:\n" " qfind -p=EXT1:EXT2:EXT3 -p=EXT4\n" #ifdef HAS_D_TYPE P(" -r Only print readable files.\n") P(" -R Only print readable files (if stat info available).\n") #else P(" -r/R Only print readable files.\n") #endif " -s Silent. No error messages to STDERR.\n" F(" -t Truncate filenames ie. the classic 8.3 format.\n") P(" -u=UID Only print files belonging to user UID.\n") P(" -U=UID Ignore files belonging to user UID.\n") " -v=NAME Ignore files with the given NAME.\n" " Multiple filenames can be specified:\n" " qfind -v=NAME1:NAME2:NAME3 -v=NAME4\n" " -x Do not sort filenames.\n" F(" -y Ignore HIDDEN|SYSTEM files/directories.\n") #ifdef HAS_D_TYPE " -z Ignore zero size files.\n" " -Z Ignore zero size files (if stat info available).\n" #else " -z/Z Ignore zero size files.\n" #endif " -E=DEPTH Ignore directories deeper than DEPTH.\n" " -J=SIZE Ignore files smaller than SIZE bytes.\n" " -K=SIZE Ignore files larger than SIZE bytes.\n" " -N=TIME Ignore files older than TIME (as a time_t).\n" " -O=TIME Ignore files newer than TIME (as a time_t).\n" " -S Sort the filenames prior to printing.\n" P(" -X Do not chdir.\n") " -: Print directories but not files.\n" " -+ Print both files and directories.\n" " -V Print version information and exit.\n" " -h Show this help and exit.\n" " -- End options.\n" "\n" ); } #undef F #undef P /******************************************************************************/