2013 lines
56 KiB
C++
2013 lines
56 KiB
C++
/* mmap.cc
|
|
|
|
This file is part of Cygwin.
|
|
|
|
This software is a copyrighted work licensed under the terms of the
|
|
Cygwin license. Please consult the file "CYGWIN_LICENSE" for
|
|
details. */
|
|
|
|
#include "winsup.h"
|
|
#include "miscfuncs.h"
|
|
#include <unistd.h>
|
|
#include <stdlib.h>
|
|
#include <sys/mman.h>
|
|
#include <sys/param.h>
|
|
#include "cygerrno.h"
|
|
#include "security.h"
|
|
#include "path.h"
|
|
#include "fhandler.h"
|
|
#include "dtable.h"
|
|
#include "cygheap.h"
|
|
#include "ntdll.h"
|
|
#include <sys/queue.h>
|
|
|
|
/* __PROT_ATTACH indicates an anonymous mapping which is supposed to be
|
|
attached to a file mapping for pages beyond the file's EOF. The idea
|
|
is to support mappings longer than the file, without the file growing
|
|
to mapping length (POSIX semantics). */
|
|
#define __PROT_ATTACH 0x8000000
|
|
/* Filler pages are the pages from the last file backed page to the next
|
|
64K boundary. These pages are created as anonymous pages, but with
|
|
the same page protection as the file's pages, since POSIX applications
|
|
expect to be able to access this part the same way as the file pages. */
|
|
#define __PROT_FILLER 0x4000000
|
|
|
|
/* Stick with 4K pages for bookkeeping, otherwise we just get confused
|
|
when trying to do file mappings with trailing filler pages correctly. */
|
|
#define PAGE_CNT(bytes) howmany((bytes), wincap.page_size())
|
|
|
|
#define PGBITS (sizeof (DWORD)*8)
|
|
#define MAPSIZE(pages) howmany ((pages), PGBITS)
|
|
|
|
#define MAP_SET(n) (page_map[(n)/PGBITS] |= (1L << ((n) % PGBITS)))
|
|
#define MAP_CLR(n) (page_map[(n)/PGBITS] &= ~(1L << ((n) % PGBITS)))
|
|
#define MAP_ISSET(n) (page_map[(n)/PGBITS] & (1L << ((n) % PGBITS)))
|
|
|
|
/* Used for anonymous mappings. */
|
|
static fhandler_dev_zero fh_anonymous;
|
|
|
|
/* Used for thread synchronization while accessing mmap bookkeeping lists. */
|
|
static NO_COPY muto mmap_guard;
|
|
#define LIST_LOCK() (mmap_guard.init ("mmap_guard")->acquire ())
|
|
#define LIST_UNLOCK() (mmap_guard.release ())
|
|
|
|
/* Small helpers to avoid having lots of flag bit tests in the code. */
|
|
static inline bool
|
|
priv (int flags)
|
|
{
|
|
return (flags & MAP_PRIVATE) == MAP_PRIVATE;
|
|
}
|
|
|
|
static inline bool
|
|
fixed (int flags)
|
|
{
|
|
return (flags & MAP_FIXED) == MAP_FIXED;
|
|
}
|
|
|
|
static inline bool
|
|
anonymous (int flags)
|
|
{
|
|
return (flags & MAP_ANONYMOUS) == MAP_ANONYMOUS;
|
|
}
|
|
|
|
static inline bool
|
|
noreserve (int flags)
|
|
{
|
|
return (flags & MAP_NORESERVE) == MAP_NORESERVE;
|
|
}
|
|
|
|
static inline bool
|
|
autogrow (int flags)
|
|
{
|
|
return (flags & MAP_AUTOGROW) == MAP_AUTOGROW;
|
|
}
|
|
|
|
static inline bool
|
|
attached (int prot)
|
|
{
|
|
return (prot & __PROT_ATTACH) == __PROT_ATTACH;
|
|
}
|
|
|
|
static inline bool
|
|
filler (int prot)
|
|
{
|
|
return (prot & __PROT_FILLER) == __PROT_FILLER;
|
|
}
|
|
|
|
static inline DWORD
|
|
gen_create_protect (DWORD openflags, int flags)
|
|
{
|
|
DWORD ret = PAGE_READONLY;
|
|
|
|
if (priv (flags))
|
|
ret = PAGE_WRITECOPY;
|
|
else if (openflags & GENERIC_WRITE)
|
|
ret = PAGE_READWRITE;
|
|
|
|
if (openflags & GENERIC_EXECUTE)
|
|
ret <<= 4;
|
|
|
|
return ret;
|
|
}
|
|
|
|
/* Generate Windows protection flags from mmap prot and flag values. */
|
|
static inline DWORD
|
|
gen_protect (int prot, int flags)
|
|
{
|
|
DWORD ret = PAGE_NOACCESS;
|
|
|
|
/* Attached pages are only reserved, but the protection must be a
|
|
valid value, so we just return PAGE_READWRITE. */
|
|
if (attached (prot))
|
|
return PAGE_EXECUTE_READWRITE;
|
|
|
|
if (prot & PROT_WRITE)
|
|
ret = (priv (flags) && (!anonymous (flags) || filler (prot)))
|
|
? PAGE_WRITECOPY : PAGE_READWRITE;
|
|
else if (prot & PROT_READ)
|
|
ret = PAGE_READONLY;
|
|
|
|
if (prot & PROT_EXEC)
|
|
ret <<= 4;
|
|
|
|
return ret;
|
|
}
|
|
|
|
static HANDLE
|
|
CreateMapping (HANDLE fhdl, size_t len, off_t off, DWORD openflags,
|
|
int prot, int flags)
|
|
{
|
|
HANDLE h;
|
|
NTSTATUS status;
|
|
|
|
LARGE_INTEGER sectionsize = { QuadPart: (LONGLONG) len };
|
|
ULONG protect = gen_create_protect (openflags, flags);
|
|
ULONG attributes = attached (prot) ? SEC_RESERVE : SEC_COMMIT;
|
|
|
|
OBJECT_ATTRIBUTES oa;
|
|
InitializeObjectAttributes (&oa, NULL, OBJ_INHERIT, NULL,
|
|
sec_none.lpSecurityDescriptor);
|
|
|
|
if (fhdl == INVALID_HANDLE_VALUE)
|
|
{
|
|
/* Standard anonymous mapping needs non-zero len. */
|
|
status = NtCreateSection (&h, SECTION_ALL_ACCESS, &oa, §ionsize,
|
|
protect, attributes, NULL);
|
|
}
|
|
else if (autogrow (flags))
|
|
{
|
|
/* Auto-grow only works if the protection is PAGE_READWRITE. So,
|
|
first we call NtCreateSection with PAGE_READWRITE, then, if the
|
|
requested protection is different, we close the mapping and
|
|
reopen it again with the correct protection, if auto-grow worked. */
|
|
sectionsize.QuadPart += off;
|
|
status = NtCreateSection (&h, SECTION_ALL_ACCESS, &oa, §ionsize,
|
|
PAGE_READWRITE, attributes, fhdl);
|
|
if (NT_SUCCESS (status) && protect != PAGE_READWRITE)
|
|
{
|
|
NtClose (h);
|
|
status = NtCreateSection (&h, SECTION_ALL_ACCESS, &oa, §ionsize,
|
|
protect, attributes, fhdl);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* Zero len creates mapping for whole file and allows
|
|
AT_EXTENDABLE_FILE mapping, if we ever use it... */
|
|
sectionsize.QuadPart = 0;
|
|
status = NtCreateSection (&h, SECTION_ALL_ACCESS, &oa, §ionsize,
|
|
protect, attributes, fhdl);
|
|
}
|
|
if (!NT_SUCCESS (status))
|
|
{
|
|
h = NULL;
|
|
SetLastError (RtlNtStatusToDosError (status));
|
|
}
|
|
return h;
|
|
}
|
|
|
|
static void *
|
|
MapView (HANDLE h, void *addr, size_t len, DWORD openflags,
|
|
int prot, int flags, off_t off)
|
|
{
|
|
NTSTATUS status;
|
|
LARGE_INTEGER offset = { QuadPart:off };
|
|
DWORD protect = gen_create_protect (openflags, flags);
|
|
void *base = addr;
|
|
SIZE_T commitsize = attached (prot) ? 0 : len;
|
|
SIZE_T viewsize = len;
|
|
#ifdef __x86_64__ /* AT_ROUND_TO_PAGE isn't supported on 64 bit systems. */
|
|
ULONG alloc_type = MEM_TOP_DOWN;
|
|
#else
|
|
ULONG alloc_type = (base && !wincap.is_wow64 () ? AT_ROUND_TO_PAGE : 0)
|
|
| MEM_TOP_DOWN;
|
|
#endif
|
|
|
|
/* Try mapping using the given address first, even if it's NULL.
|
|
If it failed, and addr was not NULL and flags is not MAP_FIXED,
|
|
try again with NULL address.
|
|
|
|
Note: Retrying the mapping might be unnecessary, now that mmap64 checks
|
|
for a valid memory area first. */
|
|
status = NtMapViewOfSection (h, NtCurrentProcess (), &base, 0, commitsize,
|
|
&offset, &viewsize, ViewShare, alloc_type,
|
|
protect);
|
|
if (!NT_SUCCESS (status) && addr && !fixed (flags))
|
|
{
|
|
base = NULL;
|
|
status = NtMapViewOfSection (h, NtCurrentProcess (), &base, 0, commitsize,
|
|
&offset, &viewsize, ViewShare, 0, protect);
|
|
}
|
|
if (!NT_SUCCESS (status))
|
|
{
|
|
base = NULL;
|
|
SetLastError (RtlNtStatusToDosError (status));
|
|
}
|
|
debug_printf ("%p (status %p) = NtMapViewOfSection (h:%p, addr:%p, len:%lu,"
|
|
" off:%Y, protect:%y, type:%y)",
|
|
base, status, h, addr, len, off, protect, 0);
|
|
return base;
|
|
}
|
|
|
|
/* Class structure used to keep a record of all current mmap areas
|
|
in a process. Needed for bookkeeping all mmaps in a process and
|
|
for duplicating all mmaps after fork() since mmaps are not propagated
|
|
to child processes by Windows. All information must be duplicated
|
|
by hand, see fixup_mmaps_after_fork().
|
|
|
|
The class structure:
|
|
|
|
One member of class map per process, global variable mmapped_areas.
|
|
Contains a singly-linked list of type class mmap_list. Each mmap_list
|
|
entry represents all mapping to a file, keyed by file descriptor and
|
|
file name hash.
|
|
Each list entry contains a singly-linked list of type class mmap_record.
|
|
Each mmap_record represents exactly one mapping. For each mapping, there's
|
|
an additional so called `page_map'. It's an array of bits, one bit
|
|
per mapped memory page. The bit is set if the page is accessible,
|
|
unset otherwise. */
|
|
|
|
#pragma pack(push, 4)
|
|
class mmap_record
|
|
{
|
|
public:
|
|
LIST_ENTRY (mmap_record) mr_next;
|
|
|
|
private:
|
|
/* 4 byte on 32 bit, 8 byte on 64 bit */
|
|
HANDLE mapping_hdl;
|
|
SIZE_T len;
|
|
caddr_t base_address;
|
|
/* Always 8 bytes */
|
|
off_t offset;
|
|
/* Always 4 bytes */
|
|
int fd;
|
|
DWORD openflags;
|
|
int prot;
|
|
int flags;
|
|
dev_t dev;
|
|
DWORD page_map[0];
|
|
|
|
public:
|
|
mmap_record (int nfd, HANDLE h, DWORD of, int p, int f, off_t o, SIZE_T l,
|
|
caddr_t b) :
|
|
mapping_hdl (h),
|
|
len (l),
|
|
base_address (b),
|
|
offset (o),
|
|
fd (nfd),
|
|
openflags (of),
|
|
prot (p),
|
|
flags (f)
|
|
{
|
|
dev = 0;
|
|
if (fd >= 0 && !cygheap->fdtab.not_open (fd))
|
|
dev = cygheap->fdtab[fd]->dev ();
|
|
else if (fd == -1)
|
|
dev = FH_ZERO;
|
|
}
|
|
|
|
int get_fd () const { return fd; }
|
|
HANDLE get_handle () const { return mapping_hdl; }
|
|
int get_device () { return dev; }
|
|
int get_prot () const { return prot; }
|
|
int get_openflags () const { return openflags; }
|
|
int get_flags () const { return flags; }
|
|
bool priv () const { return ::priv (flags); }
|
|
bool fixed () const { return ::fixed (flags); }
|
|
bool anonymous () const { return ::anonymous (flags); }
|
|
bool noreserve () const { return ::noreserve (flags); }
|
|
bool autogrow () const { return ::autogrow (flags); }
|
|
bool attached () const { return ::attached (prot); }
|
|
bool filler () const { return ::filler (prot); }
|
|
off_t get_offset () const { return offset; }
|
|
SIZE_T get_len () const { return len; }
|
|
caddr_t get_address () const { return base_address; }
|
|
|
|
void init_page_map (mmap_record &r);
|
|
|
|
SIZE_T find_unused_pages (SIZE_T pages) const;
|
|
bool match (caddr_t addr, SIZE_T len, caddr_t &m_addr, SIZE_T &m_len);
|
|
off_t map_pages (SIZE_T len);
|
|
bool map_pages (caddr_t addr, SIZE_T len);
|
|
bool unmap_pages (caddr_t addr, SIZE_T len);
|
|
int access (caddr_t address);
|
|
|
|
fhandler_base *alloc_fh ();
|
|
void free_fh (fhandler_base *fh);
|
|
|
|
DWORD gen_create_protect () const
|
|
{ return ::gen_create_protect (get_openflags (), get_flags ()); }
|
|
DWORD gen_protect () const
|
|
{ return ::gen_protect (get_prot (), get_flags ()); }
|
|
bool compatible_flags (int fl) const;
|
|
};
|
|
#pragma pack(pop)
|
|
|
|
class mmap_list
|
|
{
|
|
public:
|
|
LIST_ENTRY (mmap_list) ml_next;
|
|
LIST_HEAD (, mmap_record) recs;
|
|
|
|
private:
|
|
int fd;
|
|
ino_t hash;
|
|
|
|
public:
|
|
int get_fd () const { return fd; }
|
|
ino_t get_hash () const { return hash; }
|
|
|
|
bool anonymous () const { return fd == -1; }
|
|
void set (int nfd, struct stat *st);
|
|
mmap_record *add_record (mmap_record &r);
|
|
bool del_record (mmap_record *rec);
|
|
caddr_t try_map (void *addr, size_t len, int flags, off_t off);
|
|
};
|
|
|
|
class mmap_areas
|
|
{
|
|
public:
|
|
LIST_HEAD (, mmap_list) lists;
|
|
|
|
mmap_list *get_list_by_fd (int fd, struct stat *st);
|
|
mmap_list *add_list (int fd, struct stat *st);
|
|
void del_list (mmap_list *ml);
|
|
};
|
|
|
|
/* This is the global map structure pointer. */
|
|
static mmap_areas mmapped_areas;
|
|
|
|
bool
|
|
mmap_record::compatible_flags (int fl) const
|
|
{
|
|
#define MAP_COMPATMASK (MAP_TYPE | MAP_NORESERVE)
|
|
return (get_flags () & MAP_COMPATMASK) == (fl & MAP_COMPATMASK);
|
|
}
|
|
|
|
SIZE_T
|
|
mmap_record::find_unused_pages (SIZE_T pages) const
|
|
{
|
|
SIZE_T mapped_pages = PAGE_CNT (get_len ());
|
|
SIZE_T start;
|
|
|
|
if (pages > mapped_pages)
|
|
return (SIZE_T) -1;
|
|
for (start = 0; start <= mapped_pages - pages; ++start)
|
|
if (!MAP_ISSET (start))
|
|
{
|
|
SIZE_T cnt;
|
|
for (cnt = 0; cnt < pages; ++cnt)
|
|
if (MAP_ISSET (start + cnt))
|
|
break;
|
|
if (cnt >= pages)
|
|
return start;
|
|
}
|
|
return (SIZE_T) -1;
|
|
}
|
|
|
|
bool
|
|
mmap_record::match (caddr_t addr, SIZE_T len, caddr_t &m_addr, SIZE_T &m_len)
|
|
{
|
|
caddr_t low = (addr >= get_address ()) ? addr : get_address ();
|
|
caddr_t high = get_address ();
|
|
if (filler ())
|
|
high += get_len ();
|
|
else
|
|
high += (PAGE_CNT (get_len ()) * wincap.page_size ());
|
|
high = (addr + len < high) ? addr + len : high;
|
|
if (low < high)
|
|
{
|
|
m_addr = low;
|
|
m_len = high - low;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void
|
|
mmap_record::init_page_map (mmap_record &r)
|
|
{
|
|
*this = r;
|
|
DWORD start_protect = gen_create_protect ();
|
|
DWORD real_protect = gen_protect ();
|
|
if (real_protect != start_protect && !noreserve ()
|
|
&& !VirtualProtect (get_address (), get_len (),
|
|
real_protect, &start_protect))
|
|
system_printf ("Warning: VirtualProtect (addr: %p, len: %ly, "
|
|
"new_prot: %y, old_prot: %y), %E",
|
|
get_address (), get_len (),
|
|
real_protect, start_protect);
|
|
SIZE_T len = PAGE_CNT (get_len ());
|
|
while (len-- > 0)
|
|
MAP_SET (len);
|
|
}
|
|
|
|
off_t
|
|
mmap_record::map_pages (SIZE_T len)
|
|
{
|
|
/* Used ONLY if this mapping matches into the chunk of another already
|
|
performed mapping in a special case of MAP_ANON|MAP_PRIVATE.
|
|
|
|
Otherwise it's job is now done by init_page_map(). */
|
|
DWORD old_prot;
|
|
debug_printf ("map_pages (fd=%d, len=%lu)", get_fd (), len);
|
|
len = PAGE_CNT (len);
|
|
|
|
off_t off = find_unused_pages (len);
|
|
if (off == (off_t) -1)
|
|
return (off_t) 0;
|
|
if (!noreserve ()
|
|
&& !VirtualProtect (get_address () + off * wincap.page_size (),
|
|
len * wincap.page_size (), gen_protect (),
|
|
&old_prot))
|
|
{
|
|
__seterrno ();
|
|
return (off_t) -1;
|
|
}
|
|
|
|
while (len-- > 0)
|
|
MAP_SET (off + len);
|
|
return off * wincap.page_size ();
|
|
}
|
|
|
|
bool
|
|
mmap_record::map_pages (caddr_t addr, SIZE_T len)
|
|
{
|
|
debug_printf ("map_pages (addr=%p, len=%lu)", addr, len);
|
|
DWORD old_prot;
|
|
off_t off = addr - get_address ();
|
|
off /= wincap.page_size ();
|
|
len = PAGE_CNT (len);
|
|
/* First check if the area is unused right now. */
|
|
for (SIZE_T l = 0; l < len; ++l)
|
|
if (MAP_ISSET (off + l))
|
|
{
|
|
set_errno (EINVAL);
|
|
return false;
|
|
}
|
|
if (!noreserve ()
|
|
&& !VirtualProtect (get_address () + off * wincap.page_size (),
|
|
len * wincap.page_size (), gen_protect (),
|
|
&old_prot))
|
|
{
|
|
__seterrno ();
|
|
return false;
|
|
}
|
|
for (; len-- > 0; ++off)
|
|
MAP_SET (off);
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
mmap_record::unmap_pages (caddr_t addr, SIZE_T len)
|
|
{
|
|
DWORD old_prot;
|
|
SIZE_T off = addr - get_address ();
|
|
if (noreserve ()
|
|
&& !VirtualFree (get_address () + off, len, MEM_DECOMMIT))
|
|
debug_printf ("VirtualFree in unmap_pages () failed, %E");
|
|
else if (!VirtualProtect (get_address () + off, len, PAGE_NOACCESS,
|
|
&old_prot))
|
|
debug_printf ("VirtualProtect in unmap_pages () failed, %E");
|
|
|
|
off /= wincap.page_size ();
|
|
len = PAGE_CNT (len);
|
|
for (; len-- > 0; ++off)
|
|
MAP_CLR (off);
|
|
/* Return TRUE if all pages are free'd which may result in unmapping
|
|
the whole chunk. */
|
|
for (len = MAPSIZE (PAGE_CNT (get_len ())); len > 0; )
|
|
if (page_map[--len])
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
int
|
|
mmap_record::access (caddr_t address)
|
|
{
|
|
if (address < get_address () || address >= get_address () + get_len ())
|
|
return 0;
|
|
SIZE_T off = (address - get_address ()) / wincap.page_size ();
|
|
return MAP_ISSET (off);
|
|
}
|
|
|
|
fhandler_base *
|
|
mmap_record::alloc_fh ()
|
|
{
|
|
if (anonymous ())
|
|
{
|
|
fh_anonymous.set_io_handle (INVALID_HANDLE_VALUE);
|
|
fh_anonymous.set_access (GENERIC_READ | GENERIC_WRITE | GENERIC_EXECUTE);
|
|
return &fh_anonymous;
|
|
}
|
|
|
|
/* The file descriptor could have been closed or, even
|
|
worse, could have been reused for another file before
|
|
the call to fork(). This requires creating a fhandler
|
|
of the correct type to be sure to call the method of the
|
|
correct class. */
|
|
device fdev;
|
|
fdev.name ("");
|
|
fdev.native ("");
|
|
fdev.parse (get_device ());
|
|
fhandler_base *fh = build_fh_dev (fdev);
|
|
if (fh)
|
|
fh->set_access (get_openflags ());
|
|
return fh;
|
|
}
|
|
|
|
void
|
|
mmap_record::free_fh (fhandler_base *fh)
|
|
{
|
|
if (!anonymous ())
|
|
delete fh;
|
|
}
|
|
|
|
mmap_record *
|
|
mmap_list::add_record (mmap_record &r)
|
|
{
|
|
mmap_record *rec = (mmap_record *) ccalloc (HEAP_MMAP,
|
|
sizeof (mmap_record)
|
|
+ MAPSIZE (PAGE_CNT (r.get_len ())) * sizeof (DWORD), 1);
|
|
if (!rec)
|
|
return NULL;
|
|
rec->init_page_map (r);
|
|
|
|
LIST_INSERT_HEAD (&recs, rec, mr_next);
|
|
return rec;
|
|
}
|
|
|
|
void
|
|
mmap_list::set (int nfd, struct stat *st)
|
|
{
|
|
fd = nfd;
|
|
if (!anonymous ())
|
|
{
|
|
/* The fd isn't sufficient since it could already be the fd of another
|
|
file. So we use the inode number as evaluated by fstat to identify
|
|
the file. */
|
|
hash = st ? st->st_ino : (ino_t) 0;
|
|
}
|
|
LIST_INIT (&recs);
|
|
}
|
|
|
|
bool
|
|
mmap_list::del_record (mmap_record *rec)
|
|
{
|
|
LIST_REMOVE (rec, mr_next);
|
|
cfree (rec);
|
|
/* Return true if the list is empty which allows the caller to remove
|
|
this list from the list of lists. */
|
|
return !LIST_FIRST(&recs);
|
|
}
|
|
|
|
caddr_t
|
|
mmap_list::try_map (void *addr, size_t len, int flags, off_t off)
|
|
{
|
|
mmap_record *rec;
|
|
|
|
if (off == 0 && !fixed (flags))
|
|
{
|
|
/* If MAP_FIXED isn't given, check if this mapping matches into the
|
|
chunk of another already performed mapping. */
|
|
SIZE_T plen = PAGE_CNT (len);
|
|
LIST_FOREACH (rec, &recs, mr_next)
|
|
if (rec->find_unused_pages (plen) != (SIZE_T) -1)
|
|
break;
|
|
if (rec && rec->compatible_flags (flags))
|
|
{
|
|
if ((off = rec->map_pages (len)) == (off_t) -1)
|
|
return (caddr_t) MAP_FAILED;
|
|
return (caddr_t) rec->get_address () + off;
|
|
}
|
|
}
|
|
else if (fixed (flags))
|
|
{
|
|
/* If MAP_FIXED is given, test if the requested area is in an
|
|
unmapped part of an still active mapping. This can happen
|
|
if a memory region is unmapped and remapped with MAP_FIXED. */
|
|
caddr_t u_addr;
|
|
SIZE_T u_len;
|
|
|
|
LIST_FOREACH (rec, &recs, mr_next)
|
|
if (rec->match ((caddr_t) addr, len, u_addr, u_len))
|
|
break;
|
|
if (rec)
|
|
{
|
|
if (u_addr > (caddr_t) addr || u_addr + len < (caddr_t) addr + len
|
|
|| !rec->compatible_flags (flags))
|
|
{
|
|
/* Partial match only, or access mode doesn't match. */
|
|
/* FIXME: Handle partial mappings gracefully if adjacent
|
|
memory is available. */
|
|
set_errno (EINVAL);
|
|
return (caddr_t) MAP_FAILED;
|
|
}
|
|
if (!rec->map_pages ((caddr_t) addr, len))
|
|
return (caddr_t) MAP_FAILED;
|
|
return (caddr_t) addr;
|
|
}
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
mmap_list *
|
|
mmap_areas::get_list_by_fd (int fd, struct stat *st)
|
|
{
|
|
mmap_list *ml;
|
|
LIST_FOREACH (ml, &lists, ml_next)
|
|
{
|
|
if (fd == -1 && ml->anonymous ())
|
|
return ml;
|
|
/* The fd isn't sufficient since it could already be the fd of another
|
|
file. So we use the inode number as evaluated by fstat to identify
|
|
the file. */
|
|
if (fd != -1 && st && ml->get_hash () == st->st_ino)
|
|
return ml;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
mmap_list *
|
|
mmap_areas::add_list (int fd, struct stat *st)
|
|
{
|
|
mmap_list *ml = (mmap_list *) cmalloc (HEAP_MMAP, sizeof (mmap_list));
|
|
if (!ml)
|
|
return NULL;
|
|
ml->set (fd, st);
|
|
LIST_INSERT_HEAD (&lists, ml, ml_next);
|
|
return ml;
|
|
}
|
|
|
|
void
|
|
mmap_areas::del_list (mmap_list *ml)
|
|
{
|
|
LIST_REMOVE (ml, ml_next);
|
|
cfree (ml);
|
|
}
|
|
|
|
/* This function allows an external function to test if a given memory
|
|
region is part of an mmapped memory region. */
|
|
bool
|
|
is_mmapped_region (caddr_t start_addr, caddr_t end_address)
|
|
{
|
|
size_t len = end_address - start_addr;
|
|
|
|
LIST_LOCK ();
|
|
mmap_list *map_list = mmapped_areas.get_list_by_fd (-1, NULL);
|
|
|
|
if (!map_list)
|
|
{
|
|
LIST_UNLOCK ();
|
|
return false;
|
|
}
|
|
|
|
mmap_record *rec;
|
|
caddr_t u_addr;
|
|
SIZE_T u_len;
|
|
bool ret = false;
|
|
|
|
LIST_FOREACH (rec, &map_list->recs, mr_next)
|
|
{
|
|
if (rec->match (start_addr, len, u_addr, u_len))
|
|
{
|
|
ret = true;
|
|
break;
|
|
}
|
|
}
|
|
LIST_UNLOCK ();
|
|
return ret;
|
|
}
|
|
|
|
/* This function is called from exception_handler when a segmentation
|
|
violation has occurred. It should also be called from all Cygwin
|
|
functions that want to support passing noreserve mmap page addresses
|
|
to Windows system calls. In that case, it should be called only after
|
|
a system call indicates that the application buffer passed had an
|
|
invalid virtual address to avoid any performance impact in non-noreserve
|
|
cases.
|
|
|
|
Check if the address range is all within noreserve mmap regions. If so,
|
|
call VirtualAlloc to commit the pages and return MMAP_NORESERVE_COMMITED
|
|
on success. If the page has __PROT_ATTACH (SUSv3 memory protection
|
|
extension), or if VirtualAlloc fails, return MMAP_RAISE_SIGBUS.
|
|
Otherwise, return MMAP_NONE if the address range is not covered by an
|
|
attached or noreserve map.
|
|
|
|
On MAP_NORESERVE_COMMITED, the exeception handler should return 0 to
|
|
allow the application to retry the memory access, or the calling Cygwin
|
|
function should retry the Windows system call. */
|
|
|
|
mmap_region_status
|
|
mmap_is_attached_or_noreserve (void *addr, size_t len)
|
|
{
|
|
mmap_region_status ret = MMAP_NONE;
|
|
|
|
LIST_LOCK ();
|
|
mmap_list *map_list = mmapped_areas.get_list_by_fd (-1, NULL);
|
|
|
|
size_t pagesize = wincap.allocation_granularity ();
|
|
caddr_t start_addr = (caddr_t) rounddown ((uintptr_t) addr, pagesize);
|
|
len += ((caddr_t) addr - start_addr);
|
|
len = roundup2 (len, pagesize);
|
|
|
|
if (map_list == NULL)
|
|
goto out;
|
|
|
|
mmap_record *rec;
|
|
caddr_t u_addr;
|
|
SIZE_T u_len;
|
|
|
|
LIST_FOREACH (rec, &map_list->recs, mr_next)
|
|
{
|
|
if (!rec->match (start_addr, len, u_addr, u_len))
|
|
continue;
|
|
if (rec->attached ())
|
|
{
|
|
ret = MMAP_RAISE_SIGBUS;
|
|
break;
|
|
}
|
|
if (!rec->noreserve ())
|
|
break;
|
|
|
|
size_t commit_len = u_len - (start_addr - u_addr);
|
|
if (commit_len > len)
|
|
commit_len = len;
|
|
|
|
if (!VirtualAlloc (start_addr, commit_len, MEM_COMMIT,
|
|
rec->gen_protect ()))
|
|
{
|
|
ret = MMAP_RAISE_SIGBUS;
|
|
break;
|
|
}
|
|
|
|
start_addr += commit_len;
|
|
len -= commit_len;
|
|
if (!len)
|
|
{
|
|
ret = MMAP_NORESERVE_COMMITED;
|
|
break;
|
|
}
|
|
}
|
|
out:
|
|
LIST_UNLOCK ();
|
|
return ret;
|
|
}
|
|
|
|
static caddr_t
|
|
mmap_worker (mmap_list *map_list, fhandler_base *fh, caddr_t base, size_t len,
|
|
int prot, int flags, int fd, off_t off, struct stat *st)
|
|
{
|
|
HANDLE h = fh->mmap (&base, len, prot, flags, off);
|
|
if (h == INVALID_HANDLE_VALUE)
|
|
return NULL;
|
|
if (!map_list
|
|
&& !(map_list = mmapped_areas.get_list_by_fd (fd, st))
|
|
&& !(map_list = mmapped_areas.add_list (fd, st)))
|
|
{
|
|
fh->munmap (h, base, len);
|
|
return NULL;
|
|
}
|
|
mmap_record mmap_rec (fd, h, fh->get_access (), prot, flags, off, len, base);
|
|
mmap_record *rec = map_list->add_record (mmap_rec);
|
|
if (!rec)
|
|
{
|
|
fh->munmap (h, base, len);
|
|
return NULL;
|
|
}
|
|
return base;
|
|
}
|
|
|
|
#ifdef __x86_64__
|
|
|
|
/* The memory region used for memory maps */
|
|
#define MMAP_STORAGE_LOW 0x00800000000L /* Leave 8 Gigs for heap. */
|
|
#define MMAP_STORAGE_HIGH 0x70000000000L /* Leave enough room for OS. */
|
|
|
|
/* FIXME? Unfortunately the OS doesn't support a top down allocation with
|
|
a ceiling value. The ZeroBits mechanism only works for
|
|
NtMapViewOfSection and it only evaluates the high bit of ZeroBits
|
|
on 64 bit, so it's pretty much useless for our purposes.
|
|
|
|
If the below simple mechanism to perform top-down allocations
|
|
turns out to be too dumb, we need something else. One idea is to
|
|
dived the space in (3835) 4 Gig chunks and simply store the
|
|
available free space per slot. Then we can go top down from slot
|
|
to slot and only try slots which are supposed to have enough space.
|
|
Bookkeeping would be very simple and fast. */
|
|
class mmap_allocator
|
|
{
|
|
caddr_t mmap_current_low;
|
|
|
|
public:
|
|
mmap_allocator () : mmap_current_low ((caddr_t) MMAP_STORAGE_HIGH) {}
|
|
|
|
PVOID alloc (PVOID in_addr, SIZE_T in_size, bool fixed)
|
|
{
|
|
MEMORY_BASIC_INFORMATION mbi;
|
|
|
|
SIZE_T size = roundup2 (in_size, wincap.allocation_granularity ());
|
|
/* First check for the given address. */
|
|
if (in_addr)
|
|
{
|
|
/* If it points to a free area, big enough to fulfill the request,
|
|
return the address. */
|
|
if (VirtualQuery (in_addr, &mbi, sizeof mbi)
|
|
&& mbi.State == MEM_FREE
|
|
&& mbi.RegionSize >= size)
|
|
return in_addr;
|
|
/* Otherwise, if MAP_FIXED was given, give up. */
|
|
if (fixed)
|
|
return NULL;
|
|
/* Otherwise, fall through to the usual free space search mechanism. */
|
|
}
|
|
/* Start with the last allocation start address - requested size. */
|
|
caddr_t addr = mmap_current_low - size;
|
|
bool merry_go_round = false;
|
|
do
|
|
{
|
|
/* Did we hit the lower ceiling? If so, restart from the upper
|
|
ceiling, but note that we did it. */
|
|
if (addr < (caddr_t) MMAP_STORAGE_LOW)
|
|
{
|
|
addr = (caddr_t) MMAP_STORAGE_HIGH - size;
|
|
merry_go_round = true;
|
|
}
|
|
/* Shouldn't fail, but better test. */
|
|
if (!VirtualQuery ((PVOID) addr, &mbi, sizeof mbi))
|
|
return NULL;
|
|
/* If the region is free... */
|
|
if (mbi.State == MEM_FREE)
|
|
{
|
|
/* ...and the region is big enough to fulfill the request... */
|
|
if (mbi.RegionSize >= size)
|
|
{
|
|
/* ...note the address as next start address for our simple
|
|
merry-go-round and return the address. */
|
|
mmap_current_low = addr;
|
|
return (PVOID) addr;
|
|
}
|
|
/* Otherwise, subtract what's missing in size and try again. */
|
|
addr -= size - mbi.RegionSize;
|
|
}
|
|
/* If the region isn't free, skip to address below AllocationBase
|
|
and try again. */
|
|
else
|
|
addr = (caddr_t) mbi.AllocationBase - size;
|
|
}
|
|
/* Repeat until we had a full ride on the merry_go_round. */
|
|
while (!merry_go_round || addr >= mmap_current_low);
|
|
return NULL;
|
|
}
|
|
};
|
|
|
|
static mmap_allocator mmap_alloc; /* Inherited by forked child. */
|
|
#endif
|
|
|
|
extern "C" void *
|
|
mmap64 (void *addr, size_t len, int prot, int flags, int fd, off_t off)
|
|
{
|
|
syscall_printf ("addr %p, len %lu, prot %y, flags %y, fd %d, off %Y",
|
|
addr, len, prot, flags, fd, off);
|
|
|
|
caddr_t ret = (caddr_t) MAP_FAILED;
|
|
fhandler_base *fh = NULL;
|
|
fhandler_disk_file *fh_disk_file = NULL; /* Used for reopening a disk file
|
|
when necessary. */
|
|
mmap_list *map_list = NULL;
|
|
size_t orig_len = 0;
|
|
caddr_t base = NULL;
|
|
struct stat st;
|
|
|
|
size_t pagesize = wincap.allocation_granularity ();
|
|
|
|
fh_anonymous.set_io_handle (INVALID_HANDLE_VALUE);
|
|
fh_anonymous.set_access (GENERIC_READ | GENERIC_WRITE | GENERIC_EXECUTE);
|
|
|
|
/* EINVAL error conditions. */
|
|
if (off % pagesize
|
|
|| ((prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)))
|
|
|| ((flags & MAP_TYPE) != MAP_SHARED
|
|
&& (flags & MAP_TYPE) != MAP_PRIVATE)
|
|
|| (fixed (flags) && ((uintptr_t) addr % pagesize))
|
|
|| !len)
|
|
{
|
|
set_errno (EINVAL);
|
|
goto out;
|
|
}
|
|
|
|
if (!anonymous (flags) && fd != -1)
|
|
{
|
|
/* Ensure that fd is open */
|
|
cygheap_fdget cfd (fd);
|
|
if (cfd < 0)
|
|
goto out;
|
|
|
|
fh = cfd;
|
|
|
|
/* mmap /dev/zero is like MAP_ANONYMOUS. */
|
|
if (fh->get_device () == FH_ZERO)
|
|
flags |= MAP_ANONYMOUS;
|
|
|
|
/* The autoconf mmap test maps a file of size 1 byte. It then tests
|
|
every byte of the entire mapped page of 64K for 0-bytes since that's
|
|
what POSIX requires. The problem is, we can't create that mapping on
|
|
64 bit systems. The file mapping will be only a single page, 4K, and
|
|
since 64 bit systems don't support the AT_ROUND_TO_PAGE flag, the
|
|
remainder of the 64K slot will result in a SEGV when accessed.
|
|
|
|
So, what we do here is cheating for the sake of the autoconf test
|
|
on 64 bit systems. The justification is that there's very likely
|
|
no application actually utilizing the map beyond EOF, and we know that
|
|
all bytes beyond EOF are set to 0 anyway. If this test doesn't work
|
|
on 64 bit systems, it will result in not using mmap at all in a
|
|
package. But we want that mmap is treated as usable by autoconf,
|
|
regardless whether the autoconf test runs on a 32 bit or a 64 bit
|
|
system.
|
|
|
|
Ok, so we know exactly what autoconf is doing. The file is called
|
|
"conftest.txt", it has a size of 1 byte, the mapping size is the
|
|
pagesize, the requested protection is PROT_READ | PROT_WRITE, the
|
|
mapping is MAP_SHARED, the offset is 0.
|
|
|
|
If all these requirements are given, we just return an anonymous map.
|
|
This will help to get over the autoconf test even on 64 bit systems.
|
|
The tests are ordered for speed. */
|
|
#ifdef __x86_64__
|
|
if (1)
|
|
#else
|
|
if (wincap.is_wow64 ())
|
|
#endif
|
|
{
|
|
UNICODE_STRING fname;
|
|
IO_STATUS_BLOCK io;
|
|
FILE_STANDARD_INFORMATION fsi;
|
|
|
|
if (len == pagesize
|
|
&& prot == (PROT_READ | PROT_WRITE)
|
|
&& flags == MAP_SHARED
|
|
&& off == 0
|
|
&& (RtlSplitUnicodePath (fh->pc.get_nt_native_path (), NULL,
|
|
&fname),
|
|
wcscmp (fname.Buffer, L"conftest.txt") == 0)
|
|
&& NT_SUCCESS (NtQueryInformationFile (fh->get_handle (), &io,
|
|
&fsi, sizeof fsi,
|
|
FileStandardInformation))
|
|
&& fsi.EndOfFile.QuadPart == 1LL)
|
|
flags |= MAP_ANONYMOUS;
|
|
}
|
|
}
|
|
|
|
if (anonymous (flags) || fd == -1)
|
|
{
|
|
fh = &fh_anonymous;
|
|
fd = -1;
|
|
flags |= MAP_ANONYMOUS;
|
|
/* Anonymous mappings are always forced to pagesize length with
|
|
no offset. */
|
|
len = roundup2 (len, pagesize);
|
|
off = 0;
|
|
}
|
|
else if (fh->get_device () == FH_FS)
|
|
{
|
|
/* EACCES error conditions according to SUSv3. File must be opened
|
|
for reading, regardless of the requested protection, and file must
|
|
be opened for writing when PROT_WRITE together with MAP_SHARED
|
|
is requested. */
|
|
if (!(fh->get_access () & GENERIC_READ)
|
|
|| (!(fh->get_access () & GENERIC_WRITE)
|
|
&& (prot & PROT_WRITE) && !priv (flags)))
|
|
{
|
|
set_errno (EACCES);
|
|
goto out;
|
|
}
|
|
|
|
/* You can't create mappings with PAGE_EXECUTE protection if
|
|
the file isn't explicitely opened with EXECUTE access. */
|
|
OBJECT_ATTRIBUTES attr;
|
|
NTSTATUS status;
|
|
HANDLE h;
|
|
IO_STATUS_BLOCK io;
|
|
|
|
InitializeObjectAttributes (&attr, &ro_u_empty, fh->pc.objcaseinsensitive (),
|
|
fh->get_handle (), NULL);
|
|
status = NtOpenFile (&h,
|
|
fh->get_access () | GENERIC_EXECUTE | SYNCHRONIZE,
|
|
&attr, &io, FILE_SHARE_VALID_FLAGS,
|
|
FILE_SYNCHRONOUS_IO_NONALERT
|
|
| FILE_OPEN_FOR_BACKUP_INTENT);
|
|
if (NT_SUCCESS (status))
|
|
{
|
|
fh_disk_file = new (ccalloc (HEAP_FHANDLER, 1, sizeof *fh_disk_file))
|
|
fhandler_disk_file;
|
|
fh_disk_file->set_name (fh->pc);
|
|
fh_disk_file->set_io_handle (h);
|
|
fh_disk_file->set_access (fh->get_access () | GENERIC_EXECUTE);
|
|
fh = fh_disk_file;
|
|
}
|
|
else if (prot & PROT_EXEC)
|
|
{
|
|
/* TODO: To be or not to be... I'm opting for refusing this
|
|
mmap request rather than faking it, but that might break
|
|
some non-portable code. */
|
|
set_errno (EACCES);
|
|
goto out;
|
|
}
|
|
|
|
if (fh->fstat_fs (&st))
|
|
{
|
|
__seterrno ();
|
|
goto out;
|
|
}
|
|
off_t fsiz = st.st_size;
|
|
|
|
/* Don't allow file mappings beginning beyond EOF since Windows can't
|
|
handle that POSIX like, unless MAP_AUTOGROW flag is set, which
|
|
mimics Windows behaviour. */
|
|
if (off >= fsiz && !autogrow (flags))
|
|
{
|
|
/* Instead, it seems suitable to return an anonymous mapping of
|
|
the given size instead. Mapped addresses beyond EOF aren't
|
|
written back to the file anyway, so the handling is identical
|
|
to other pages beyond EOF. */
|
|
fh = &fh_anonymous;
|
|
len = roundup2 (len, pagesize);
|
|
prot = PROT_READ | PROT_WRITE | __PROT_ATTACH;
|
|
flags &= MAP_FIXED;
|
|
flags |= MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
|
|
fd = -1;
|
|
off = 0;
|
|
goto go_ahead;
|
|
}
|
|
fsiz -= off;
|
|
/* We're creating the pages beyond EOF as reserved, anonymous pages.
|
|
Note that this isn't done in 64 bit environments since apparently
|
|
64 bit systems don't support the AT_ROUND_TO_PAGE flag, which is
|
|
required to get this right. Too bad. */
|
|
#ifndef __x86_64__
|
|
if (!wincap.is_wow64 ()
|
|
&& (((off_t) len > fsiz && !autogrow (flags))
|
|
|| roundup2 (len, wincap.page_size ())
|
|
< roundup2 (len, pagesize)))
|
|
orig_len = len;
|
|
#endif
|
|
if ((off_t) len > fsiz)
|
|
{
|
|
if (autogrow (flags))
|
|
{
|
|
/* Allow mapping beyond EOF if MAP_AUTOGROW flag is set.
|
|
Check if file has been opened for writing, otherwise
|
|
MAP_AUTOGROW is invalid. */
|
|
if (!(fh->get_access () & GENERIC_WRITE))
|
|
{
|
|
set_errno (EINVAL);
|
|
goto out;
|
|
}
|
|
}
|
|
else
|
|
/* Otherwise, don't map beyond EOF, since Windows would change
|
|
the file to the new length, in contrast to POSIX. */
|
|
len = fsiz;
|
|
}
|
|
|
|
/* If the requested offset + len is <= file size, drop MAP_AUTOGROW.
|
|
This simplifes fhandler::mmap's job. */
|
|
if (autogrow (flags) && (off + (off_t) len) <= fsiz)
|
|
flags &= ~MAP_AUTOGROW;
|
|
}
|
|
|
|
go_ahead:
|
|
|
|
/* MAP_NORESERVE is only supported on private anonymous mappings.
|
|
Remove that bit from flags so that later code doesn't have to
|
|
test all bits. */
|
|
if (noreserve (flags) && (!anonymous (flags) || !priv (flags)))
|
|
flags &= ~MAP_NORESERVE;
|
|
|
|
LIST_LOCK ();
|
|
map_list = mmapped_areas.get_list_by_fd (fd, &st);
|
|
|
|
/* Test if an existing anonymous mapping can be recycled. */
|
|
if (map_list && anonymous (flags))
|
|
{
|
|
caddr_t tried = map_list->try_map (addr, len, flags, off);
|
|
/* try_map returns NULL if no map matched, otherwise it returns
|
|
a valid address, or MAP_FAILED in case of a fatal error. */
|
|
if (tried)
|
|
{
|
|
ret = tried;
|
|
goto out_with_unlock;
|
|
}
|
|
}
|
|
|
|
#ifdef __x86_64__
|
|
addr = mmap_alloc.alloc (addr, orig_len ?: len, fixed (flags));
|
|
#else
|
|
if (orig_len)
|
|
{
|
|
/* If the requested length is bigger than the file size, we try to
|
|
allocate an area of the full size first. This area is immediately
|
|
deallocated and the address we got is used as base address for the
|
|
subsequent real mappings. This ensures that we have enough space
|
|
for the whole thing. */
|
|
orig_len = roundup2 (orig_len, pagesize);
|
|
PVOID newaddr = VirtualAlloc (addr, orig_len, MEM_TOP_DOWN | MEM_RESERVE,
|
|
PAGE_READWRITE);
|
|
if (!newaddr)
|
|
{
|
|
/* If addr is not NULL, but MAP_FIXED isn't given, allow the OS
|
|
to choose. */
|
|
if (addr && !fixed (flags))
|
|
newaddr = VirtualAlloc (NULL, orig_len, MEM_TOP_DOWN | MEM_RESERVE,
|
|
PAGE_READWRITE);
|
|
if (!newaddr)
|
|
{
|
|
__seterrno ();
|
|
goto out_with_unlock;
|
|
}
|
|
}
|
|
if (!VirtualFree (newaddr, 0, MEM_RELEASE))
|
|
{
|
|
__seterrno ();
|
|
goto out_with_unlock;
|
|
}
|
|
addr = newaddr;
|
|
}
|
|
#endif
|
|
|
|
base = mmap_worker (map_list, fh, (caddr_t) addr, len, prot, flags, fd, off,
|
|
&st);
|
|
if (!base)
|
|
goto out_with_unlock;
|
|
|
|
if (orig_len)
|
|
{
|
|
/* If the requested length is bigger than the file size, the
|
|
remainder is created as anonymous mapping. Actually two
|
|
mappings are created, first the remainder from the file end to
|
|
the next 64K boundary as accessible pages with the same
|
|
protection as the file's pages, then as much pages as necessary
|
|
to accomodate the requested length, but as reserved pages which
|
|
raise a SIGBUS when trying to access them. AT_ROUND_TO_PAGE
|
|
and page protection on shared pages is only supported by 32 bit NT,
|
|
so don't even try on WOW64. This is accomplished by not setting
|
|
orig_len on WOW64 above. */
|
|
#if 0
|
|
orig_len = roundup2 (orig_len, pagesize);
|
|
#endif
|
|
len = roundup2 (len, wincap.page_size ());
|
|
if (orig_len - len)
|
|
{
|
|
orig_len -= len;
|
|
size_t valid_page_len = orig_len % pagesize;
|
|
size_t sigbus_page_len = orig_len - valid_page_len;
|
|
|
|
caddr_t at_base = base + len;
|
|
if (valid_page_len)
|
|
{
|
|
prot |= __PROT_FILLER;
|
|
flags &= MAP_SHARED | MAP_PRIVATE;
|
|
flags |= MAP_ANONYMOUS | MAP_FIXED;
|
|
at_base = mmap_worker (NULL, &fh_anonymous, at_base,
|
|
valid_page_len, prot, flags, -1, 0, NULL);
|
|
if (!at_base)
|
|
{
|
|
fh->munmap (fh->get_handle (), base, len);
|
|
set_errno (ENOMEM);
|
|
goto out_with_unlock;
|
|
}
|
|
at_base += valid_page_len;
|
|
}
|
|
if (sigbus_page_len)
|
|
{
|
|
prot = PROT_READ | PROT_WRITE | __PROT_ATTACH;
|
|
flags = MAP_ANONYMOUS | MAP_NORESERVE | MAP_FIXED;
|
|
at_base = mmap_worker (NULL, &fh_anonymous, at_base,
|
|
sigbus_page_len, prot, flags, -1, 0, NULL);
|
|
if (!at_base)
|
|
debug_printf ("Warning: Mapping beyond EOF failed, %E");
|
|
}
|
|
}
|
|
}
|
|
|
|
ret = base;
|
|
|
|
out_with_unlock:
|
|
LIST_UNLOCK ();
|
|
|
|
out:
|
|
|
|
if (fh_disk_file)
|
|
{
|
|
NtClose (fh_disk_file->get_handle ());
|
|
delete fh;
|
|
}
|
|
|
|
syscall_printf ("%p = mmap() ", ret);
|
|
return ret;
|
|
}
|
|
|
|
#ifdef __x86_64__
|
|
EXPORT_ALIAS (mmap64, mmap)
|
|
#else
|
|
extern "C" void *
|
|
mmap (void *addr, size_t len, int prot, int flags, int fd, _off_t off)
|
|
{
|
|
return mmap64 (addr, len, prot, flags, fd, (off_t)off);
|
|
}
|
|
#endif
|
|
|
|
/* munmap () removes all mmapped pages between addr and addr+len. */
|
|
|
|
extern "C" int
|
|
munmap (void *addr, size_t len)
|
|
{
|
|
syscall_printf ("munmap (addr %p, len %lu)", addr, len);
|
|
|
|
/* Error conditions according to SUSv3 */
|
|
if (!addr || !len || check_invalid_virtual_addr (addr, len))
|
|
{
|
|
set_errno (EINVAL);
|
|
return -1;
|
|
}
|
|
size_t pagesize = wincap.allocation_granularity ();
|
|
if (((uintptr_t) addr % pagesize) || !len)
|
|
{
|
|
set_errno (EINVAL);
|
|
return -1;
|
|
}
|
|
len = roundup2 (len, pagesize);
|
|
|
|
LIST_LOCK ();
|
|
|
|
/* Iterate over maps, unmap pages between addr and addr+len in all maps. */
|
|
mmap_list *map_list, *next_map_list;
|
|
LIST_FOREACH_SAFE (map_list, &mmapped_areas.lists, ml_next, next_map_list)
|
|
{
|
|
mmap_record *rec, *next_rec;
|
|
caddr_t u_addr;
|
|
SIZE_T u_len;
|
|
|
|
LIST_FOREACH_SAFE (rec, &map_list->recs, mr_next, next_rec)
|
|
{
|
|
if (!rec->match ((caddr_t) addr, len, u_addr, u_len))
|
|
continue;
|
|
if (rec->unmap_pages (u_addr, u_len))
|
|
{
|
|
/* The whole record has been unmapped, so we now actually
|
|
unmap it from the system in full length... */
|
|
fhandler_base *fh = rec->alloc_fh ();
|
|
fh->munmap (rec->get_handle (),
|
|
rec->get_address (),
|
|
rec->get_len ());
|
|
rec->free_fh (fh);
|
|
|
|
/* ...and delete the record. */
|
|
if (map_list->del_record (rec))
|
|
{
|
|
/* Yay, the last record has been removed from the list,
|
|
we can remove the list now, too. */
|
|
mmapped_areas.del_list (map_list);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
LIST_UNLOCK ();
|
|
syscall_printf ("0 = munmap(): %p", addr);
|
|
return 0;
|
|
}
|
|
|
|
/* Sync file with memory. Ignore flags for now. */
|
|
|
|
extern "C" int
|
|
msync (void *addr, size_t len, int flags)
|
|
{
|
|
int ret = -1;
|
|
mmap_list *map_list;
|
|
|
|
syscall_printf ("msync (addr: %p, len %lu, flags %y)", addr, len, flags);
|
|
|
|
pthread_testcancel ();
|
|
|
|
LIST_LOCK ();
|
|
|
|
if (((uintptr_t) addr % wincap.allocation_granularity ())
|
|
|| (flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE))
|
|
|| ((flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC)))
|
|
{
|
|
set_errno (EINVAL);
|
|
goto out;
|
|
}
|
|
#if 0 /* If I only knew why I did that... */
|
|
len = roundup2 (len, wincap.allocation_granularity ());
|
|
#endif
|
|
|
|
/* Iterate over maps, looking for the mmapped area. Error if not found. */
|
|
LIST_FOREACH (map_list, &mmapped_areas.lists, ml_next)
|
|
{
|
|
mmap_record *rec;
|
|
LIST_FOREACH (rec, &map_list->recs, mr_next)
|
|
{
|
|
if (rec->access ((caddr_t) addr))
|
|
{
|
|
/* Check whole area given by len. */
|
|
for (SIZE_T i = wincap.allocation_granularity ();
|
|
i < len;
|
|
i += wincap.allocation_granularity ())
|
|
if (!rec->access ((caddr_t) addr + i))
|
|
{
|
|
set_errno (ENOMEM);
|
|
goto out;
|
|
}
|
|
fhandler_base *fh = rec->alloc_fh ();
|
|
ret = fh->msync (rec->get_handle (), (caddr_t) addr, len, flags);
|
|
rec->free_fh (fh);
|
|
goto out;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* No matching mapping exists. */
|
|
set_errno (ENOMEM);
|
|
|
|
out:
|
|
LIST_UNLOCK ();
|
|
syscall_printf ("%R = msync()", ret);
|
|
return ret;
|
|
}
|
|
|
|
/* Set memory protection */
|
|
|
|
extern "C" int
|
|
mprotect (void *addr, size_t len, int prot)
|
|
{
|
|
bool in_mapped = false;
|
|
bool ret = false;
|
|
DWORD old_prot;
|
|
DWORD new_prot = 0;
|
|
|
|
syscall_printf ("mprotect (addr: %p, len %lu, prot %y)", addr, len, prot);
|
|
|
|
/* See comment in mmap64 for a description. */
|
|
size_t pagesize = wincap.allocation_granularity ();
|
|
if ((uintptr_t) addr % pagesize)
|
|
{
|
|
set_errno (EINVAL);
|
|
goto out;
|
|
}
|
|
len = roundup2 (len, pagesize);
|
|
|
|
LIST_LOCK ();
|
|
|
|
/* Iterate over maps, protect pages between addr and addr+len in all maps. */
|
|
mmap_list *map_list;
|
|
LIST_FOREACH (map_list, &mmapped_areas.lists, ml_next)
|
|
{
|
|
mmap_record *rec;
|
|
caddr_t u_addr;
|
|
SIZE_T u_len;
|
|
|
|
LIST_FOREACH (rec, &map_list->recs, mr_next)
|
|
{
|
|
if (!rec->match ((caddr_t) addr, len, u_addr, u_len))
|
|
continue;
|
|
in_mapped = true;
|
|
if (rec->attached ())
|
|
continue;
|
|
new_prot = gen_protect (prot, rec->get_flags ());
|
|
if (rec->noreserve ())
|
|
{
|
|
if (new_prot == PAGE_NOACCESS)
|
|
ret = VirtualFree (u_addr, u_len, MEM_DECOMMIT);
|
|
else
|
|
ret = !!VirtualAlloc (u_addr, u_len, MEM_COMMIT, new_prot);
|
|
}
|
|
else
|
|
ret = VirtualProtect (u_addr, u_len, new_prot, &old_prot);
|
|
if (!ret)
|
|
{
|
|
__seterrno ();
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
LIST_UNLOCK ();
|
|
|
|
if (!in_mapped)
|
|
{
|
|
int flags = 0;
|
|
MEMORY_BASIC_INFORMATION mbi;
|
|
|
|
ret = VirtualQuery (addr, &mbi, sizeof mbi);
|
|
if (ret)
|
|
{
|
|
/* If write protection is requested, check if the page was
|
|
originally protected writecopy. In this case call VirtualProtect
|
|
requesting PAGE_WRITECOPY, otherwise the VirtualProtect will fail
|
|
on NT version >= 5.0 */
|
|
if (prot & PROT_WRITE)
|
|
{
|
|
if (mbi.AllocationProtect == PAGE_WRITECOPY
|
|
|| mbi.AllocationProtect == PAGE_EXECUTE_WRITECOPY)
|
|
flags = MAP_PRIVATE;
|
|
}
|
|
new_prot = gen_protect (prot, flags);
|
|
if (new_prot != PAGE_NOACCESS && mbi.State == MEM_RESERVE)
|
|
ret = VirtualAlloc (addr, len, MEM_COMMIT, new_prot);
|
|
else
|
|
ret = VirtualProtect (addr, len, new_prot, &old_prot);
|
|
}
|
|
if (!ret)
|
|
__seterrno ();
|
|
}
|
|
|
|
out:
|
|
|
|
syscall_printf ("%R = mprotect ()", ret ? 0 : -1);
|
|
return ret ? 0 : -1;
|
|
}
|
|
|
|
extern "C" int
|
|
mlock (const void *addr, size_t len)
|
|
{
|
|
int ret = -1;
|
|
|
|
/* Align address and length values to page size. */
|
|
size_t pagesize = wincap.allocation_granularity ();
|
|
PVOID base = (PVOID) rounddown((uintptr_t) addr, pagesize);
|
|
SIZE_T size = roundup2 (((uintptr_t) addr - (uintptr_t) base) + len,
|
|
pagesize);
|
|
NTSTATUS status = 0;
|
|
do
|
|
{
|
|
status = NtLockVirtualMemory (NtCurrentProcess (), &base, &size,
|
|
MAP_PROCESS);
|
|
if (status == STATUS_WORKING_SET_QUOTA)
|
|
{
|
|
/* The working set is too small, try to increase it so that the
|
|
requested locking region fits in. Unfortunately I don't know
|
|
any function which would return the currently locked pages of
|
|
a process (no go with NtQueryVirtualMemory).
|
|
|
|
So, except for the border cases, what we do here is something
|
|
really embarrassing. We raise the working set by 64K at a time
|
|
and retry, until either we fail to raise the working set size
|
|
further, or until NtLockVirtualMemory returns successfully (or
|
|
with another error). */
|
|
SIZE_T min, max;
|
|
if (!GetProcessWorkingSetSize (GetCurrentProcess (), &min, &max))
|
|
{
|
|
set_errno (ENOMEM);
|
|
break;
|
|
}
|
|
if (min < size)
|
|
min = size + pagesize;
|
|
else if (size < pagesize)
|
|
min += size;
|
|
else
|
|
min += pagesize;
|
|
if (max < min)
|
|
max = min;
|
|
if (!SetProcessWorkingSetSize (GetCurrentProcess (), min, max))
|
|
{
|
|
set_errno (ENOMEM);
|
|
break;
|
|
}
|
|
}
|
|
else if (!NT_SUCCESS (status))
|
|
__seterrno_from_nt_status (status);
|
|
else
|
|
ret = 0;
|
|
}
|
|
while (status == STATUS_WORKING_SET_QUOTA);
|
|
|
|
syscall_printf ("%R = mlock(%p, %lu)", ret, addr, len);
|
|
return ret;
|
|
}
|
|
|
|
extern "C" int
|
|
munlock (const void *addr, size_t len)
|
|
{
|
|
int ret = -1;
|
|
|
|
/* Align address and length values to page size. */
|
|
size_t pagesize = wincap.allocation_granularity ();
|
|
PVOID base = (PVOID) rounddown((uintptr_t) addr, pagesize);
|
|
SIZE_T size = roundup2 (((uintptr_t) addr - (uintptr_t) base) + len,
|
|
pagesize);
|
|
NTSTATUS status = NtUnlockVirtualMemory (NtCurrentProcess (), &base, &size,
|
|
MAP_PROCESS);
|
|
if (!NT_SUCCESS (status))
|
|
__seterrno_from_nt_status (status);
|
|
else
|
|
ret = 0;
|
|
|
|
syscall_printf ("%R = munlock(%p, %lu)", ret, addr, len);
|
|
return ret;
|
|
}
|
|
|
|
/* This is required until Mingw-w64 catches up with newer functions. */
|
|
extern "C" WINAPI DWORD DiscardVirtualMemory (PVOID, SIZE_T);
|
|
|
|
extern "C" int
|
|
posix_madvise (void *addr, size_t len, int advice)
|
|
{
|
|
int ret = 0;
|
|
/* Check parameters. */
|
|
if (advice < POSIX_MADV_NORMAL || advice > POSIX_MADV_DONTNEED
|
|
|| !len)
|
|
{
|
|
ret = EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
/* Check requested memory area. */
|
|
MEMORY_BASIC_INFORMATION m;
|
|
char *p, *endp;
|
|
|
|
for (p = (char *) addr, endp = p + len;
|
|
p < endp;
|
|
p = (char *) m.BaseAddress + m.RegionSize)
|
|
{
|
|
if (!VirtualQuery (p, &m, sizeof m) || m.State == MEM_FREE)
|
|
{
|
|
ret = ENOMEM;
|
|
break;
|
|
}
|
|
}
|
|
if (ret)
|
|
goto out;
|
|
switch (advice)
|
|
{
|
|
case POSIX_MADV_WILLNEED:
|
|
{
|
|
/* Align address and length values to page size. */
|
|
size_t pagesize = wincap.allocation_granularity ();
|
|
PVOID base = (PVOID) rounddown ((uintptr_t) addr, pagesize);
|
|
SIZE_T size = roundup2 (((uintptr_t) addr - (uintptr_t) base)
|
|
+ len, pagesize);
|
|
WIN32_MEMORY_RANGE_ENTRY me = { base, size };
|
|
if (!PrefetchVirtualMemory (GetCurrentProcess (), 1, &me, 0)
|
|
&& GetLastError () != ERROR_PROC_NOT_FOUND)
|
|
{
|
|
/* FIXME 2015-08-27: On W10 build 10240 under WOW64,
|
|
PrefetchVirtualMemory always returns ERROR_INVALID_PARAMETER
|
|
for some reason. If we're running on W10 WOW64, ignore this
|
|
error. This has been fixed in W10 1511. */
|
|
if (!wincap.has_broken_prefetchvm ()
|
|
|| GetLastError () != ERROR_INVALID_PARAMETER)
|
|
ret = EINVAL;
|
|
}
|
|
}
|
|
break;
|
|
case POSIX_MADV_DONTNEED:
|
|
{
|
|
/* Align address and length values to page size. */
|
|
size_t pagesize = wincap.allocation_granularity ();
|
|
PVOID base = (PVOID) rounddown ((uintptr_t) addr, pagesize);
|
|
SIZE_T size = roundup2 (((uintptr_t) addr - (uintptr_t) base)
|
|
+ len, pagesize);
|
|
DWORD err = DiscardVirtualMemory (base, size);
|
|
/* DiscardVirtualMemory is unfortunately pretty crippled:
|
|
On copy-on-write pages it returns ERROR_INVALID_PARAMETER, on
|
|
any file-backed memory map it returns ERROR_USER_MAPPED_FILE.
|
|
Since POSIX_MADV_DONTNEED is advisory only anyway, let them
|
|
slip through. */
|
|
switch (err)
|
|
{
|
|
case ERROR_PROC_NOT_FOUND:
|
|
case ERROR_USER_MAPPED_FILE:
|
|
case 0:
|
|
break;
|
|
case ERROR_INVALID_PARAMETER:
|
|
{
|
|
ret = EINVAL;
|
|
/* Check if the region contains copy-on-write pages.*/
|
|
for (p = (char *) addr, endp = p + len;
|
|
p < endp;
|
|
p = (char *) m.BaseAddress + m.RegionSize)
|
|
{
|
|
if (VirtualQuery (p, &m, sizeof m)
|
|
&& m.State == MEM_COMMIT
|
|
&& m.Protect
|
|
& (PAGE_EXECUTE_WRITECOPY | PAGE_WRITECOPY))
|
|
{
|
|
/* Yes, let this slip. */
|
|
ret = 0;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
ret = geterrno_from_win_error (err);
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
out:
|
|
syscall_printf ("%d = posix_madvise(%p, %lu, %d)", ret, addr, len, advice);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Base implementation:
|
|
*
|
|
* `mmap' returns ENODEV as documented in SUSv2.
|
|
* In contrast to the global function implementation, the member function
|
|
* `mmap' has to return the mapped base address in `addr' and the handle to
|
|
* the mapping object as return value. In case of failure, the fhandler
|
|
* mmap has to close that handle by itself and return INVALID_HANDLE_VALUE.
|
|
*
|
|
* `munmap' and `msync' get the handle to the mapping object as first parameter
|
|
* additionally.
|
|
*/
|
|
HANDLE
|
|
fhandler_base::mmap (caddr_t *addr, size_t len, int prot,
|
|
int flags, off_t off)
|
|
{
|
|
set_errno (ENODEV);
|
|
return INVALID_HANDLE_VALUE;
|
|
}
|
|
|
|
int
|
|
fhandler_base::munmap (HANDLE h, caddr_t addr, size_t len)
|
|
{
|
|
set_errno (ENODEV);
|
|
return -1;
|
|
}
|
|
|
|
int
|
|
fhandler_base::msync (HANDLE h, caddr_t addr, size_t len, int flags)
|
|
{
|
|
set_errno (ENODEV);
|
|
return -1;
|
|
}
|
|
|
|
bool
|
|
fhandler_base::fixup_mmap_after_fork (HANDLE h, int prot, int flags,
|
|
off_t offset, SIZE_T size,
|
|
void *address)
|
|
{
|
|
set_errno (ENODEV);
|
|
return -1;
|
|
}
|
|
|
|
/* Implementation for anonymous maps. Using fhandler_dev_zero looks
|
|
quite the natural way. */
|
|
HANDLE
|
|
fhandler_dev_zero::mmap (caddr_t *addr, size_t len, int prot,
|
|
int flags, off_t off)
|
|
{
|
|
HANDLE h;
|
|
void *base;
|
|
|
|
if (priv (flags) && !filler (prot))
|
|
{
|
|
/* Private anonymous maps are now implemented using VirtualAlloc.
|
|
This has two advantages:
|
|
|
|
- VirtualAlloc has a smaller footprint than a copy-on-write
|
|
anonymous map.
|
|
|
|
- It supports decommitting using VirtualFree, in contrast to
|
|
section maps. This allows minimum footprint private maps,
|
|
when using the (non-POSIX, yay-Linux) MAP_NORESERVE flag.
|
|
*/
|
|
DWORD protect = gen_protect (prot, flags);
|
|
DWORD alloc_type = MEM_TOP_DOWN | MEM_RESERVE
|
|
| (noreserve (flags) ? 0 : MEM_COMMIT);
|
|
base = VirtualAlloc (*addr, len, alloc_type, protect);
|
|
if (!base && addr && !fixed (flags))
|
|
base = VirtualAlloc (NULL, len, alloc_type, protect);
|
|
if (!base || (fixed (flags) && base != *addr))
|
|
{
|
|
if (!base)
|
|
__seterrno ();
|
|
else
|
|
{
|
|
VirtualFree (base, 0, MEM_RELEASE);
|
|
set_errno (EINVAL);
|
|
debug_printf ("VirtualAlloc: address shift with MAP_FIXED given");
|
|
}
|
|
return INVALID_HANDLE_VALUE;
|
|
}
|
|
h = (HANDLE) 1; /* Fake handle to indicate success. */
|
|
}
|
|
else
|
|
{
|
|
h = CreateMapping (get_handle (), len, off, get_access (), prot, flags);
|
|
if (!h)
|
|
{
|
|
__seterrno ();
|
|
debug_printf ("CreateMapping failed with %E");
|
|
return INVALID_HANDLE_VALUE;
|
|
}
|
|
|
|
base = MapView (h, *addr, len, get_access(), prot, flags, off);
|
|
if (!base || (fixed (flags) && base != *addr))
|
|
{
|
|
if (!base)
|
|
__seterrno ();
|
|
else
|
|
{
|
|
NtUnmapViewOfSection (NtCurrentProcess (), base);
|
|
set_errno (EINVAL);
|
|
debug_printf ("MapView: address shift with MAP_FIXED given");
|
|
}
|
|
NtClose (h);
|
|
return INVALID_HANDLE_VALUE;
|
|
}
|
|
}
|
|
*addr = (caddr_t) base;
|
|
return h;
|
|
}
|
|
|
|
int
|
|
fhandler_dev_zero::munmap (HANDLE h, caddr_t addr, size_t len)
|
|
{
|
|
if (h == (HANDLE) 1) /* See fhandler_dev_zero::mmap. */
|
|
VirtualFree (addr, 0, MEM_RELEASE);
|
|
else
|
|
{
|
|
NtUnmapViewOfSection (NtCurrentProcess (), addr);
|
|
NtClose (h);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
fhandler_dev_zero::msync (HANDLE h, caddr_t addr, size_t len, int flags)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
bool
|
|
fhandler_dev_zero::fixup_mmap_after_fork (HANDLE h, int prot, int flags,
|
|
off_t offset, SIZE_T size,
|
|
void *address)
|
|
{
|
|
/* Re-create the map */
|
|
void *base;
|
|
if (priv (flags) && !filler (prot))
|
|
{
|
|
DWORD alloc_type = MEM_RESERVE | (noreserve (flags) ? 0 : MEM_COMMIT);
|
|
/* Always allocate R/W so that ReadProcessMemory doesn't fail
|
|
due to a non-writable target address. The protection is
|
|
set to the correct one anyway in the fixup loop. */
|
|
base = VirtualAlloc (address, size, alloc_type, PAGE_READWRITE);
|
|
}
|
|
else
|
|
base = MapView (h, address, size, get_access (), prot, flags, offset);
|
|
if (base != address)
|
|
{
|
|
MEMORY_BASIC_INFORMATION m;
|
|
VirtualQuery (address, &m, sizeof (m));
|
|
system_printf ("requested %p != %p mem alloc base %p, state %y, "
|
|
"size %lu, %E", address, base, m.AllocationBase, m.State,
|
|
m.RegionSize);
|
|
}
|
|
return base == address;
|
|
}
|
|
|
|
/* Implementation for disk files and anonymous mappings. */
|
|
HANDLE
|
|
fhandler_disk_file::mmap (caddr_t *addr, size_t len, int prot,
|
|
int flags, off_t off)
|
|
{
|
|
HANDLE h = CreateMapping (get_handle (), len, off, get_access (),
|
|
prot, flags);
|
|
if (!h)
|
|
{
|
|
__seterrno ();
|
|
debug_printf ("CreateMapping failed with %E");
|
|
return INVALID_HANDLE_VALUE;
|
|
}
|
|
|
|
void *base = MapView (h, *addr, len, get_access (), prot, flags, off);
|
|
if (!base || (fixed (flags) && base != *addr))
|
|
{
|
|
if (!base)
|
|
__seterrno ();
|
|
else
|
|
{
|
|
NtUnmapViewOfSection (NtCurrentProcess (), base);
|
|
set_errno (EINVAL);
|
|
debug_printf ("MapView: address shift with MAP_FIXED given");
|
|
}
|
|
NtClose (h);
|
|
return INVALID_HANDLE_VALUE;
|
|
}
|
|
|
|
*addr = (caddr_t) base;
|
|
return h;
|
|
}
|
|
|
|
int
|
|
fhandler_disk_file::munmap (HANDLE h, caddr_t addr, size_t len)
|
|
{
|
|
NtUnmapViewOfSection (NtCurrentProcess (), addr);
|
|
NtClose (h);
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
fhandler_disk_file::msync (HANDLE h, caddr_t addr, size_t len, int flags)
|
|
{
|
|
const int retry = 100;
|
|
/* The wisdom of google tells us that FlushViewOfFile may fail with
|
|
ERROR_LOCK_VIOLATION if "if the memory system is writing dirty
|
|
pages to disk". And, we've seen reports of this happening in the
|
|
cygwin list. So retry 99 times and hope we get lucky. */
|
|
for (int i = 0; i < retry; i++)
|
|
if (FlushViewOfFile (addr, len))
|
|
{
|
|
/* FlushViewOfFile just triggers the action and returns immediately,
|
|
so it's equivalent to MS_ASYNC. MS_SYNC requires another call to
|
|
FlushFileBuffers. */
|
|
if (flags & MS_SYNC)
|
|
FlushFileBuffers (h);
|
|
return 0;
|
|
}
|
|
else if (GetLastError () != ERROR_LOCK_VIOLATION)
|
|
break;
|
|
else if (i < (retry - 1))
|
|
yield ();
|
|
|
|
__seterrno ();
|
|
return -1;
|
|
}
|
|
|
|
bool
|
|
fhandler_disk_file::fixup_mmap_after_fork (HANDLE h, int prot, int flags,
|
|
off_t offset, SIZE_T size,
|
|
void *address)
|
|
{
|
|
/* Re-create the map */
|
|
void *base = MapView (h, address, size, get_access (), prot, flags, offset);
|
|
if (base != address)
|
|
{
|
|
MEMORY_BASIC_INFORMATION m;
|
|
VirtualQuery (address, &m, sizeof (m));
|
|
system_printf ("requested %p != %p mem alloc base %p, state %y, "
|
|
"size %lu, %E", address, base, m.AllocationBase, m.State,
|
|
m.RegionSize);
|
|
}
|
|
return base == address;
|
|
}
|
|
|
|
/* Call to re-create all the file mappings in a forked child. Called from
|
|
the child in initialization. At this point we are passed a valid
|
|
mmapped_areas map, and all the HANDLE's are valid for the child, but
|
|
none of the mapped areas are in our address space. We need to iterate
|
|
through the map, doing the MapViewOfFile calls. */
|
|
|
|
int __stdcall
|
|
fixup_mmaps_after_fork (HANDLE parent)
|
|
{
|
|
/* Iterate over maps */
|
|
mmap_list *map_list;
|
|
LIST_FOREACH (map_list, &mmapped_areas.lists, ml_next)
|
|
{
|
|
mmap_record *rec;
|
|
LIST_FOREACH (rec, &map_list->recs, mr_next)
|
|
{
|
|
debug_printf ("fd %d, h %p, address %p, len %ly, prot: %y, "
|
|
"flags: %y, offset %Y",
|
|
rec->get_fd (), rec->get_handle (), rec->get_address (),
|
|
rec->get_len (), rec->get_prot (), rec->get_flags (),
|
|
rec->get_offset ());
|
|
|
|
fhandler_base *fh = rec->alloc_fh ();
|
|
bool ret = fh->fixup_mmap_after_fork (rec->get_handle (),
|
|
rec->get_prot (),
|
|
rec->get_flags () | MAP_FIXED,
|
|
rec->get_offset (),
|
|
rec->get_len (),
|
|
rec->get_address ());
|
|
rec->free_fh (fh);
|
|
|
|
if (!ret)
|
|
{
|
|
if (rec->attached ())
|
|
{
|
|
system_printf ("Warning: Fixup mapping beyond EOF failed");
|
|
continue;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
MEMORY_BASIC_INFORMATION mbi;
|
|
DWORD old_prot;
|
|
|
|
for (char *address = rec->get_address ();
|
|
address < rec->get_address () + rec->get_len ();
|
|
address += mbi.RegionSize)
|
|
{
|
|
if (!VirtualQueryEx (parent, address, &mbi, sizeof mbi))
|
|
{
|
|
system_printf ("VirtualQueryEx failed for MAP_PRIVATE "
|
|
"address %p, %E", address);
|
|
return -1;
|
|
}
|
|
/* Just skip reserved pages. */
|
|
if (mbi.State == MEM_RESERVE)
|
|
continue;
|
|
/* Copy-on-write pages must be copied to the child to circumvent
|
|
a strange notion how copy-on-write is supposed to work. */
|
|
if (rec->priv ())
|
|
{
|
|
if (rec->noreserve ()
|
|
&& !VirtualAlloc (address, mbi.RegionSize,
|
|
MEM_COMMIT, PAGE_READWRITE))
|
|
{
|
|
system_printf ("VirtualAlloc failed for MAP_PRIVATE "
|
|
"address %p, %E", address);
|
|
return -1;
|
|
}
|
|
if (mbi.Protect == PAGE_NOACCESS
|
|
&& !VirtualProtectEx (parent, address, mbi.RegionSize,
|
|
PAGE_READONLY, &old_prot))
|
|
{
|
|
system_printf ("VirtualProtectEx failed for MAP_PRIVATE "
|
|
"address %p, %E", address);
|
|
return -1;
|
|
}
|
|
else if ((mbi.AllocationProtect == PAGE_WRITECOPY
|
|
|| mbi.AllocationProtect == PAGE_EXECUTE_WRITECOPY)
|
|
&& (mbi.Protect == PAGE_READWRITE
|
|
|| mbi.Protect == PAGE_EXECUTE_READWRITE))
|
|
/* A WRITECOPY page which has been written to is set to
|
|
READWRITE, but that's an incompatible protection to
|
|
set the page to. Convert the protection to WRITECOPY
|
|
so that the below VirtualProtect doesn't fail. */
|
|
mbi.Protect <<= 1;
|
|
|
|
if (!ReadProcessMemory (parent, address, address,
|
|
mbi.RegionSize, NULL))
|
|
{
|
|
system_printf ("ReadProcessMemory failed for MAP_PRIVATE "
|
|
"address %p, %E", address);
|
|
return -1;
|
|
}
|
|
if (mbi.Protect == PAGE_NOACCESS
|
|
&& !VirtualProtectEx (parent, address, mbi.RegionSize,
|
|
PAGE_NOACCESS, &old_prot))
|
|
{
|
|
system_printf ("WARNING: VirtualProtectEx to return to "
|
|
"PAGE_NOACCESS state in parent failed for "
|
|
"MAP_PRIVATE address %p, %E", address);
|
|
return -1;
|
|
}
|
|
}
|
|
/* Set child page protection to parent protection */
|
|
if (!VirtualProtect (address, mbi.RegionSize,
|
|
mbi.Protect, &old_prot))
|
|
{
|
|
MEMORY_BASIC_INFORMATION m;
|
|
VirtualQuery (address, &m, sizeof m);
|
|
system_printf ("VirtualProtect failed for "
|
|
"address %p, "
|
|
"parentstate: %y, "
|
|
"state: %y, "
|
|
"parentprot: %y, "
|
|
"prot: %y, %E",
|
|
address, mbi.State, m.State,
|
|
mbi.Protect, m.Protect);
|
|
return -1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
debug_printf ("succeeded");
|
|
return 0;
|
|
}
|