映射非连续块从文件到连续的存储器地址(Mapping non-contiguous blocks f

我感兴趣的使用内存映射的IO，最好利用的设施，升压::进程间的跨平台支持，在文件中不连续的系统页面大小的块映射到内存中的一个连续的地址空间的前景。

一个简化的具体情况：

我有一个号码的“普通老式数据”结构，每一个固定长度（小于系统页面大小。）这些结构连接成与由所确定的结构的类型和位置的（很长）流的那些继续进行他们的流中的结构的值。我的目标，以尽量减少等待时间，并在苛刻环境下的并发吞吐量最大化。

我可以非常有效地利用内存映射它的块至少两次读取该数据的系统页面大小...并立即建立一个新的映射已经阅读延伸超出倒数第二个系统页边界的结构。这使得与普通老式的数据结构交互的代码是一无所知，这些结构是存储器映射...，例如，可以比较使用memcmp（）直接在两个不同的结构，而不必关心页边界。

事情变得有趣的是，相对于更新这些数据流......当他们是（兼）读取。我想使用的策略是通过在系统页面大小的粒度“写时复制”的启发......基本上是写“叠加页面” - 允许一个程序读取旧数据，而另一读取更新的数据。

在管理其覆盖页面中使用，而当，不一定小事......那不是我的主要关注点。我最关心的是，我可能会跨越4和第5页的结构，然后更新完全包含在5页......在第6写入新页的结构...留下5页是“垃圾回收”当它是确定为无再访问。这意味着，如果我映射4页到位置M，I需要6页映射到存储器位置M + PAGE_SIZE ...以便能够跨越使用现有的（非存储器mapping-页边界可靠地处理结构aware）的功能。

我试图建立的最佳策略，而我通过文件，我觉得是不完整的阻碍。从本质上讲，我需要去耦内存映射的地址空间分配到该地址空间。随着mmap（）的，我知道，我可以使用MAP_FIXED - 如果我想明确地控制映射位置......但我不清楚我应该怎么才能安全地做到这一点预留地址空间。我可以映射的/ dev /零两页没有MAP_FIXED，然后使用MAP_FIXED两次映射两页成明确的VM地址是分配的空间？如果是这样，我应该叫munmap（）三次吗？它会泄漏资源和/或任何其它不良的开销？为了使更复杂的问题，我想在Windows媲美的行为......有没有办法做到这一点？是如果我妥协我的跨平台的野心有整洁的解决方案？

谢谢您的回答，马哈茂德......我读过，而且觉得我已经明白，代码...我已经在Linux下编译它，它表现为你的建议。

我的主要问题是与线62 - 使用MAP_FIXED。这使得关于MMAP一些假设，当我阅读文档，我能找到我一直无法确认。你映射的“更新”页面到相同的地址空间的mmap（）返回最初 - 我认为这是“正确的” - 即不是东西，只是碰巧在Linux上运行？我还需要假设它适用于文件映射的跨平台以及匿名映射。

样品肯定让我感动......转发文件证明什么，我最终需要的是有可能在Linux上的mmap（）实现 - 至少。我真正喜欢的是一个指针文件，证明该MAP_FIXED线将作为样品展示......和，idealy，从Linux / Unix的特定的mmap（转换），以一个独立于平台（升压::进程间）的方法。

Answer 1:

你的问题是有点混乱。从我的理解，这个代码将你所需要的：

#define PAGESIZE 4096

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <errno.h>
#include <sys/types.h>
#include <fcntl.h>
#include <unistd.h>
#include <assert.h>

struct StoredObject
{
    int IntVal;
    char StrVal[25];
};

int main(int argc, char **argv)
{
    int fd = open("mmapfile", O_RDWR | O_CREAT | O_TRUNC, (mode_t) 0600);
    //Set the file to the size of our data (2 pages)
    lseek(fd, PAGESIZE*2 - 1, SEEK_SET);
    write(fd, "", 1); //The final byte

    unsigned char *mapPtr = (unsigned char *) mmap(0, PAGESIZE * 2, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);

    struct StoredObject controlObject;
    controlObject.IntVal = 12;
    strcpy(controlObject.StrVal, "Mary had a little lamb.\n");

    struct StoredObject *mary1;
    mary1 = (struct StoredObject *)(mapPtr + PAGESIZE - 4); //Will fall on the boundary between first and second page
    memcpy(mary1, &controlObject, sizeof(StoredObject));

    printf("%d, %s", mary1->IntVal, mary1->StrVal);
    //Should print "12, Mary had a little lamb.\n"

    struct StoredObject *john1;
    john1 = mary1 + 1; //Comes immediately after mary1 in memory; will start and end in the second page
    memcpy(john1, &controlObject, sizeof(StoredObject));

    john1->IntVal = 42;
    strcpy(john1->StrVal, "John had a little lamb.\n");

    printf("%d, %s", john1->IntVal, john1->StrVal);
    //Should print "12, Mary had a little lamb.\n"

    //Make sure the data's on the disk, as this is the initial, "read-only" data
    msync(mapPtr, PAGESIZE * 2, MS_SYNC);

    //This is the inital data set, now in memory, loaded across two pages
    //At this point, someone could be reading from there. We don't know or care.
    //We want to modify john1, but don't want to write over the existing data
    //Easy as pie.

    //This is the shadow map. COW-like optimization will take place: 
    //we'll map the entire address space from the shared source, then overlap with a new map to modify
    //This is mapped anywhere, letting the system decide what address we'll be using for the new data pointer
    unsigned char *mapPtr2 = (unsigned char *) mmap(0, PAGESIZE * 2, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);

    //Map the second page on top of the first mapping; this is the one that we're modifying. It is *not* backed by disk
    unsigned char *temp = (unsigned char *) mmap(mapPtr2 + PAGESIZE, PAGESIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED | MAP_ANON, 0, 0);
    if (temp == MAP_FAILED)
    {
        printf("Fixed map failed. %s", strerror(errno));
    }
    assert(temp == mapPtr2 + PAGESIZE);

    //Make a copy of the old data that will later be changed
    memcpy(mapPtr2 + PAGESIZE, mapPtr + PAGESIZE, PAGESIZE);

    //The two address spaces should still be identical until this point
    assert(memcmp(mapPtr, mapPtr2, PAGESIZE * 2) == 0);

    //We can now make our changes to the second page as needed
    struct StoredObject *mary2 = (struct StoredObject *)(((unsigned char *)mary1 - mapPtr) + mapPtr2);
    struct StoredObject *john2 = (struct StoredObject *)(((unsigned char *)john1 - mapPtr) + mapPtr2);

    john2->IntVal = 52;
    strcpy(john2->StrVal, "Mike had a little lamb.\n");

    //Test that everything worked OK
    assert(memcmp(mary1, mary2, sizeof(struct StoredObject)) == 0);
    printf("%d, %s", john2->IntVal, john2->StrVal);
    //Should print "52, Mike had a little lamb.\n"

    //Now assume our garbage collection routine has detected that no one is using the original copy of the data
    munmap(mapPtr, PAGESIZE * 2);

    mapPtr = mapPtr2;

    //Now we're done with all our work and want to completely clean up
    munmap(mapPtr2, PAGESIZE * 2);

    close(fd);

    return 0;
}

我修改的答案应该解决您的安全问题。仅使用MAP_FIXED第二mmap调用（就像我上面）。关于很酷的事情MAP_FIXED是，它可以让你覆盖现有mmap地址段。它会卸载你重叠的范围内，并与新的映射的内容替换为：

 MAP_FIXED
              [...] If the memory
              region specified by addr and len overlaps pages of any existing
              mapping(s), then the overlapped part of the existing mapping(s) will be
              discarded. [...]

这样一来，你让OS照顾发现数百个兆为您的连续内存块（不调用MAP_FIXED上的地址，你不知道可以肯定的是不可用）。然后调用MAP_FIXED上，你会被修改的数据，现在映射空间巨大的小节。田田。

在Windows中，这样的事情应该工作（我是Mac上的那一刻，所以未经测试）：

int main(int argc, char **argv)
{
    HANDLE hFile = CreateFile(L"mmapfile", GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
    //Set the file to the size of our data (2 pages)
    SetFilePointer(hFile, PAGESIZE*2 - 1, 0, FILE_BEGIN);
    DWORD bytesWritten = -1;
    WriteFile(hFile, "", 1, &bytesWritten, NULL);

    HANDLE hMap = CreateFileMapping(hFile, NULL, PAGE_READWRITE, 0, PAGESIZE * 2, NULL);
    unsigned char *mapPtr = (unsigned char *) MapViewOfFile(hMap, FILE_MAP_READ | FILE_MAP_WRITE, 0, 0, PAGESIZE * 2);

    struct StoredObject controlObject;
    controlObject.IntVal = 12;
    strcpy(controlObject.StrVal, "Mary had a little lamb.\n");

    struct StoredObject *mary1;
    mary1 = (struct StoredObject *)(mapPtr + PAGESIZE - 4); //Will fall on the boundary between first and second page
    memcpy(mary1, &controlObject, sizeof(StoredObject));

    printf("%d, %s", mary1->IntVal, mary1->StrVal);
    //Should print "12, Mary had a little lamb.\n"

    struct StoredObject *john1;
    john1 = mary1 + 1; //Comes immediately after mary1 in memory; will start and end in the second page
    memcpy(john1, &controlObject, sizeof(StoredObject));

    john1->IntVal = 42;
    strcpy(john1->StrVal, "John had a little lamb.\n");

    printf("%d, %s", john1->IntVal, john1->StrVal);
    //Should print "12, Mary had a little lamb.\n"

    //Make sure the data's on the disk, as this is the initial, "read-only" data
    //msync(mapPtr, PAGESIZE * 2, MS_SYNC);

    //This is the inital data set, now in memory, loaded across two pages
    //At this point, someone could be reading from there. We don't know or care.
    //We want to modify john1, but don't want to write over the existing data
    //Easy as pie.

    //This is the shadow map. COW-like optimization will take place: 
    //we'll map the entire address space from the shared source, then overlap with a new map to modify
    //This is mapped anywhere, letting the system decide what address we'll be using for the new data pointer
    unsigned char *reservedMem = (unsigned char *) VirtualAlloc(NULL, PAGESIZE * 2, MEM_RESERVE, PAGE_READWRITE);
    HANDLE hMap2 = CreateFileMapping(hFile, NULL, PAGE_READWRITE, 0, PAGESIZE, NULL);
    unsigned char *mapPtr2 = (unsigned char *) MapViewOfFileEx(hMap2, FILE_MAP_READ | FILE_MAP_WRITE, 0, 0, PAGESIZE, reservedMem);

    //Map the second page on top of the first mapping; this is the one that we're modifying. It is *not* backed by disk
    unsigned char *temp = (unsigned char *) MapViewOfFileEx(hMap2, FILE_MAP_READ | FILE_MAP_WRITE, 0, 0, PAGESIZE, reservedMem + PAGESIZE);
    if (temp == NULL)
    {
        printf("Fixed map failed. 0x%x\n", GetLastError());
        return -1;
    }
    assert(temp == mapPtr2 + PAGESIZE);

    //Make a copy of the old data that will later be changed
    memcpy(mapPtr2 + PAGESIZE, mapPtr + PAGESIZE, PAGESIZE);

    //The two address spaces should still be identical until this point
    assert(memcmp(mapPtr, mapPtr2, PAGESIZE * 2) == 0);

    //We can now make our changes to the second page as needed
    struct StoredObject *mary2 = (struct StoredObject *)(((unsigned char *)mary1 - mapPtr) + mapPtr2);
    struct StoredObject *john2 = (struct StoredObject *)(((unsigned char *)john1 - mapPtr) + mapPtr2);

    john2->IntVal = 52;
    strcpy(john2->StrVal, "Mike had a little lamb.\n");

    //Test that everything worked OK
    assert(memcmp(mary1, mary2, sizeof(struct StoredObject)) == 0);
    printf("%d, %s", john2->IntVal, john2->StrVal);
    //Should print "52, Mike had a little lamb.\n"

    //Now assume our garbage collection routine has detected that no one is using the original copy of the data
    //munmap(mapPtr, PAGESIZE * 2);

    mapPtr = mapPtr2;

    //Now we're done with all our work and want to completely clean up
    //munmap(mapPtr2, PAGESIZE * 2);

    //close(fd);

    return 0;
}

Answer 2:

但我不清楚为了安全地做到这一点，我应该如何保留地址空间

这是怎么回事的操作系统有所不同，但在MSDN上一点点挖掘的mmap（我开始在MSDN上搜索“XP MMAP”）显示了微软有自己平时VerboseAndHelpfullyCapitalizedNames为实现mmap件（许多）功能。无论是文件级和anonymous-映射器可以处理固定地址的请求只是像任何POSIX-2001系统就可以了，即如果其他的东西在你的地址空间被谈话的核心，你得到它整理出来。没办法，我要“安全”触摸，有没有这样的事情“安全”与你想移植到非指定平台的代码。你将不得不建立自己的预映射的匿名内存，你可以取消映射和自己的控制之下后包裹出池。

Answer 3:

我从@Mahmoud测试的Windows代码，那么实际上我测试了以下类似的代码，它不工作（Linux的代码工作。）如果取消注释VirtualFree，它会工作。正如我在评论上面提到的，在Windows中可以使用的VirtualAlloc预留的地址空间，但你不能与已映射地址使用MapViewOfFileEx，所以你需要先VirtualFree它。然后有一个竞争条件，你做之前另一个线程可以抓住的内存地址，所以你必须做的一切在一个循环中，例如尝试高达1000次，然后放弃。

package main

import (
    "fmt"
    "os"
    "syscall"
)

func main() {
    const size = 1024 * 1024

    file, err := os.Create("foo.dat")
    if err != nil {
        panic(err)
    }

    if err := file.Truncate(size); err != nil {
        panic(err)
    }

    const MEM_COMMIT = 0x1000

    addr, err := virtualAlloc(0, size, MEM_COMMIT, protReadWrite)
    if err != nil {
        panic(err)
    }

    fd, err := syscall.CreateFileMapping(
        syscall.Handle(file.Fd()),
        nil,
        uint32(protReadWrite),
        0,
        uint32(size),
        nil,
    )

    //if err := virtualFree(addr); err != nil {
    //  panic(err)
    //}

    base, err := mapViewOfFileEx(fd, syscall.FILE_MAP_READ|syscall.FILE_MAP_WRITE, 0, 0, size, addr)
    if base == 0 {
        panic("mapViewOfFileEx returned 0")
    }
    if err != nil {
        panic(err)
    }

    fmt.Println("success!")
}

type memProtect uint32

const (
    protReadOnly  memProtect = 0x02
    protReadWrite memProtect = 0x04
    protExecute   memProtect = 0x20
    protAll       memProtect = 0x40
)

var (
    modkernel32         = syscall.MustLoadDLL("kernel32.dll")
    procMapViewOfFileEx = modkernel32.MustFindProc("MapViewOfFileEx")
    procVirtualAlloc    = modkernel32.MustFindProc("VirtualAlloc")
    procVirtualFree     = modkernel32.MustFindProc("VirtualFree")
    procVirtualProtect  = modkernel32.MustFindProc("VirtualProtect")
)

func mapViewOfFileEx(handle syscall.Handle, prot memProtect, offsetHigh uint32, offsetLow uint32, length uintptr, target uintptr) (addr uintptr, err error) {
    r0, _, e1 := syscall.Syscall6(procMapViewOfFileEx.Addr(), 6, uintptr(handle), uintptr(prot), uintptr(offsetHigh), uintptr(offsetLow), length, target)
    addr = uintptr(r0)
    if addr == 0 {
        if e1 != 0 {
            err = error(e1)
        } else {
            err = syscall.EINVAL
        }
    }
    return addr, nil
}

func virtualAlloc(addr, size uintptr, allocType uint32, prot memProtect) (mem uintptr, err error) {
    r0, _, e1 := syscall.Syscall6(procVirtualAlloc.Addr(), 4, addr, size, uintptr(allocType), uintptr(prot), 0, 0)
    mem = uintptr(r0)
    if e1 != 0 {
        return 0, error(e1)
    }
    return mem, nil
}

func virtualFree(addr uintptr) error {
    const MEM_RELEASE = 0x8000
    _, _, e1 := syscall.Syscall(procVirtualFree.Addr(), 3, addr, 0, MEM_RELEASE)
    if e1 != 0 {
        return error(e1)
    }
    return nil
}

文章来源: Mapping non-contiguous blocks from a file into contiguous memory addresses