realpath() without resolving symlinks?

2019-01-18 23:41发布

问题:

I already read about realpath(), but is there a function that I can pass a base directory and a filename that would give me the following result without resolving symlinks or checking whether files actually exist? Or do I have to use a modified realpath()?

"/var/", "../etc///././/passwd" => "/etc/passwd"

回答1:

Here is a normalize_path() function:

If the given path is relative, the function starts by prepending the current working directory to it.

Then the special path components like .., . or empty components are treated, and the result is returned.

For .., the last component is removed if there is one (/.. will just return /).
For . or empty components (double /), this is just skipped.

The function ensures to not return empty an path (/ is returned instead).

#define _GNU_SOURCE /* memrchr() */

#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <limits.h>

char * normalize_path(const char * src, size_t src_len) {

        char * res;
        size_t res_len;

        const char * ptr = src;
        const char * end = &src[src_len];
        const char * next;

        if (src_len == 0 || src[0] != '/') {

                // relative path

                char pwd[PATH_MAX];
                size_t pwd_len;

                if (getcwd(pwd, sizeof(pwd)) == NULL) {
                        return NULL;
                }

                pwd_len = strlen(pwd);
                res = malloc(pwd_len + 1 + src_len + 1);
                memcpy(res, pwd, pwd_len);
                res_len = pwd_len;
        } else {
                res = malloc((src_len > 0 ? src_len : 1) + 1);
                res_len = 0;
        }

        for (ptr = src; ptr < end; ptr=next+1) {
                size_t len;
                next = memchr(ptr, '/', end-ptr);
                if (next == NULL) {
                        next = end;
                }
                len = next-ptr;
                switch(len) {
                case 2:
                        if (ptr[0] == '.' && ptr[1] == '.') {
                                const char * slash = memrchr(res, '/', res_len);
                                if (slash != NULL) {
                                        res_len = slash - res;
                                }
                                continue;
                        }
                        break;
                case 1:
                        if (ptr[0] == '.') {
                                continue;

                        }
                        break;
                case 0:
                        continue;
                }
                res[res_len++] = '/';
                memcpy(&res[res_len], ptr, len);
                res_len += len;
        }

        if (res_len == 0) {
                res[res_len++] = '/';
        }
        res[res_len] = '\0';
        return res;
}


回答2:

function normalize_path($path, $pwd = '/') {
        if (!isset($path[0]) || $path[0] !== '/') {
                $result = explode('/', getcwd());
        } else {
                $result = array('');
        }
        $parts = explode('/', $path);
        foreach($parts as $part) {
            if ($part === '' || $part == '.') {
                    continue;
            } if ($part == '..') {
                    array_pop($result);
            } else {
                    $result[] = $part;
            }
        }
        return implode('/', $result);
}

(The question was tagged PHP at the time I wrote this.)

Anyway, here is a regex version:

function normalize_path($path, $pwd = '/') {
        if (!isset($path[0]) || $path[0] !== '/') {
                $path = "$pwd/$path";
        }
        return preg_replace('~
                ^(?P>sdotdot)?(?:(?P>sdot)*/\.\.)*
                |(?<sdotdot>(?:(?P>sdot)*/(?!\.\.)(?:[^/]+)(?P>sdotdot)?(?P>sdot)*/\.\.)+)
                |(?<sdot>/\.?(?=/|$))+
        ~sx', '', $path);
}


回答3:

I use Hardex's solution:

#include <string.h>

char * normalizePath(char* pwd, const char * src, char* res) {
    size_t res_len;
    size_t src_len = strlen(src);

    const char * ptr = src;
    const char * end = &src[src_len];
    const char * next;

    if (src_len == 0 || src[0] != '/') {
        // relative path
        size_t pwd_len;

        pwd_len = strlen(pwd);
        memcpy(res, pwd, pwd_len);
        res_len = pwd_len;
    } else {
        res_len = 0;
    }

    for (ptr = src; ptr < end; ptr=next+1) {
        size_t len;
        next = (char*)memchr(ptr, '/', end-ptr);
        if (next == NULL) {
            next = end;
        }
        len = next-ptr;
        switch(len) {
        case 2:
            if (ptr[0] == '.' && ptr[1] == '.') {
                const char * slash = (char*)memrchr(res, '/', res_len);
                if (slash != NULL) {
                    res_len = slash - res;
                }
                continue;
            }
            break;
        case 1:
            if (ptr[0] == '.') {
                continue;
            }
            break;
        case 0:
            continue;
        }

        if (res_len != 1)
            res[res_len++] = '/';

        memcpy(&res[res_len], ptr, len);
        res_len += len;
    }

    if (res_len == 0) {
        res[res_len++] = '/';
    }
    res[res_len] = '\0';
    return res;
}

Example:

#include <stdio.h>

int main(){
    char path[FILENAME_MAX+1];
    printf("\n%s\n",normalizePath((char*)"/usr/share/local/apps",(char*)"./../../../",path));
    return 0;
}

Output:

/usr


Note:

  1. The first argument is the directory path (absolute path) relative to which other paths will be normalized. It is generally the absolute path of the current directory.
  2. The second argument is the string to be normalized without resolving symlinks.
  3. The third argument is a char* which must have the required memory/capacity to contain the normalized path.