I already read about realpath()
, but is there a function that I can pass a base directory and a filename that would give me the following result without resolving symlinks or checking whether files actually exist? Or do I have to use a modified realpath()
?
"/var/", "../etc///././/passwd" => "/etc/passwd"
Here is a normalize_path() function:
If the given path is relative, the function starts by prepending the current working directory to it.
Then the special path components like ..
, .
or empty components are treated, and the result is returned.
For ..
, the last component is removed if there is one (/..
will just return /
).
For .
or empty components (double /
), this is just skipped.
The function ensures to not return empty an path (/
is returned instead).
#define _GNU_SOURCE /* memrchr() */
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <limits.h>
char * normalize_path(const char * src, size_t src_len) {
char * res;
size_t res_len;
const char * ptr = src;
const char * end = &src[src_len];
const char * next;
if (src_len == 0 || src[0] != '/') {
// relative path
char pwd[PATH_MAX];
size_t pwd_len;
if (getcwd(pwd, sizeof(pwd)) == NULL) {
return NULL;
}
pwd_len = strlen(pwd);
res = malloc(pwd_len + 1 + src_len + 1);
memcpy(res, pwd, pwd_len);
res_len = pwd_len;
} else {
res = malloc((src_len > 0 ? src_len : 1) + 1);
res_len = 0;
}
for (ptr = src; ptr < end; ptr=next+1) {
size_t len;
next = memchr(ptr, '/', end-ptr);
if (next == NULL) {
next = end;
}
len = next-ptr;
switch(len) {
case 2:
if (ptr[0] == '.' && ptr[1] == '.') {
const char * slash = memrchr(res, '/', res_len);
if (slash != NULL) {
res_len = slash - res;
}
continue;
}
break;
case 1:
if (ptr[0] == '.') {
continue;
}
break;
case 0:
continue;
}
res[res_len++] = '/';
memcpy(&res[res_len], ptr, len);
res_len += len;
}
if (res_len == 0) {
res[res_len++] = '/';
}
res[res_len] = '\0';
return res;
}
function normalize_path($path, $pwd = '/') {
if (!isset($path[0]) || $path[0] !== '/') {
$result = explode('/', getcwd());
} else {
$result = array('');
}
$parts = explode('/', $path);
foreach($parts as $part) {
if ($part === '' || $part == '.') {
continue;
} if ($part == '..') {
array_pop($result);
} else {
$result[] = $part;
}
}
return implode('/', $result);
}
(The question was tagged PHP at the time I wrote this.)
Anyway, here is a regex version:
function normalize_path($path, $pwd = '/') {
if (!isset($path[0]) || $path[0] !== '/') {
$path = "$pwd/$path";
}
return preg_replace('~
^(?P>sdotdot)?(?:(?P>sdot)*/\.\.)*
|(?<sdotdot>(?:(?P>sdot)*/(?!\.\.)(?:[^/]+)(?P>sdotdot)?(?P>sdot)*/\.\.)+)
|(?<sdot>/\.?(?=/|$))+
~sx', '', $path);
}
I use Hardex's solution:
#include <string.h>
char * normalizePath(char* pwd, const char * src, char* res) {
size_t res_len;
size_t src_len = strlen(src);
const char * ptr = src;
const char * end = &src[src_len];
const char * next;
if (src_len == 0 || src[0] != '/') {
// relative path
size_t pwd_len;
pwd_len = strlen(pwd);
memcpy(res, pwd, pwd_len);
res_len = pwd_len;
} else {
res_len = 0;
}
for (ptr = src; ptr < end; ptr=next+1) {
size_t len;
next = (char*)memchr(ptr, '/', end-ptr);
if (next == NULL) {
next = end;
}
len = next-ptr;
switch(len) {
case 2:
if (ptr[0] == '.' && ptr[1] == '.') {
const char * slash = (char*)memrchr(res, '/', res_len);
if (slash != NULL) {
res_len = slash - res;
}
continue;
}
break;
case 1:
if (ptr[0] == '.') {
continue;
}
break;
case 0:
continue;
}
if (res_len != 1)
res[res_len++] = '/';
memcpy(&res[res_len], ptr, len);
res_len += len;
}
if (res_len == 0) {
res[res_len++] = '/';
}
res[res_len] = '\0';
return res;
}
Example:
#include <stdio.h>
int main(){
char path[FILENAME_MAX+1];
printf("\n%s\n",normalizePath((char*)"/usr/share/local/apps",(char*)"./../../../",path));
return 0;
}
Output:
/usr
Note:
- The first argument is the directory path (absolute path) relative to which other paths will be normalized. It is generally the absolute path of the current directory.
- The second argument is the string to be normalized without resolving symlinks.
- The third argument is a
char*
which must have the required memory/capacity to contain the normalized path.