eygle.com   eygle.com
eygle.com eygle
eygle.com  
 

« 并购狂潮-EMC的动作 | Blog首页 | 给自己的礼物-谁是谁的麦琪? »

如何判断Solaris上是否支持异步IO

Solaris Internals这本书上,作者提到一段代码用以检测系统是否支持异步I/O.
感觉很有助于理解Solaris系统异步IO的实现以及系统调用。
列在这里和大家分享.

1.源码:

/*
* Quick kaio test. Read 1k bytes from a file using async I/O.
* To compile:
* cc -o aio aio.c -laio
* To run:
* aio file_name
*/
#include <stdio.h>
#include <sys/types.h>
#include <sys/fcntl.h>
#include <sys/aio.h>

#define BSIZE 1024

main(int argc, char *argv[])
{
        aio_result_t res;
        char buf[BSIZE];
        int fd;

        if ((fd=open(argv[1], O_RDONLY)) == -1) {
                perror("open");
                exit(-1);
        }
        aioread(fd, buf, BSIZE, 0L, SEEK_SET, &res);
        aiowait(0);
        if (res.aio_return == BSIZE) {
                printf("aio succeeded\n");
                close(fd);
                exit(0);
        }
        perror("aio");
}

2.编译:
# cc -o aio aio.c -laio
3.运行:
首先测试裸设备:
# truss -t kaio,lwp_create ./aio /dev/rdsk/c0t0d0s1
            kaio(5, 0xFFBEF640, 0x00000000, 0xFF21FB68, 0x00000000, 0xFFBEF648, 0x00000000) = 0
            lwp_create(0xFFBEF640, 0, 0xFF21FF5C)           = 2
            lwp_create()    (returning as new lwp ...)      = 0
            kaio(AIOREAD, 3, 0xFFBEF9C0, 1024, 0, 0xFFBEFDC0) = 0
            kaio(AIOWAIT, 0x00000000)                       = 4290706880
            aio succeeded
            
我们看到,Solaris对于裸设备,最终异步IO调用通过AIOREAD来实现,异步操作成功。
而对于常规文件系统文件的读取:
# truss -t kaio,lwp_create ./aio /a.sh
            kaio(5, 0xFFBEF650, 0x00000000, 0xFF21FB68, 0x00000000, 0xFFBEF658, 0x00000000) = 0
            lwp_create(0xFFBEF650, 0, 0xFF21FF5C)           = 2
            lwp_create()    (returning as new lwp ...)      = 0
            kaio(AIOREAD, 3, 0xFFBEF9D0, 1024, 0, 0xFFBEFDD0) Err#48 ENOTSUP
            lwp_create(0xFFBEF5F0, 0, 0xFF20DF5C)           = 3
            lwp_create()    (returning as new lwp ...)      = 0
            lwp_create(0xFFBEF5F0, 0, 0xFF1FBF5C)           = 4
            lwp_create()    (returning as new lwp ...)      = 0
            lwp_create(0xFFBEF5F0, 0, 0xFF1E9F5C)           = 5
            lwp_create()    (returning as new lwp ...)      = 0
            lwp_create(0xFFBEF5F0, 0, 0xFF1D7F5C)           = 6
            lwp_create()    (returning as new lwp ...)      = 0
            lwp_create(0xFFBEF5F0, 0, 0xFF1C5F5C)           = 7
            lwp_create()    (returning as new lwp ...)      = 0
            lwp_create(0xFFBEF5F0, 0, 0xFF1B3F5C)           = 8
            lwp_create()    (returning as new lwp ...)      = 0
            lwp_create(0xFFBEF5F0, 0, 0xFF1A1F5C)           = 9
            lwp_create()    (returning as new lwp ...)      = 0
            lwp_create(0xFFBEF5F0, 0, 0xFF18FF5C)           = 10
            lwp_create()    (returning as new lwp ...)      = 0
            lwp_create(0xFFBEF5F0, 0, 0xFF17DF5C)           = 11
            lwp_create()    (returning as new lwp ...)      = 0
            kaio(AIOWAIT, 0x00000000)                       = 1
            kaio(AIONOTIFY, -13008896)                      = 0
            aio succeeded
            
我们注意到,首先对于AIOREAD系统给出了一个Err#48错误,表明对于文件系统的异步IO不被支持。
最后操作成功,实际上是系统把异步IO调用转化为pread读取。
在完整的truss输出中你可以看到如下一行:
pread64(3, "7F E L F010201\0\0\0\0\0".., 1024, 0) = 1024
而成功的异步IO是通过如下一行来完成读取的:
kaio(AIOREAD, 3, 0xFFBEF9C0, 1024, 0, 0xFFBEFDC0) = 0
下面是完整的truss输出供参考:
# truss /aio a.sh
            execve("/aio", 0xFFBEFE4C, 0xFFBEFE58)  argc = 2
            resolvepath("/usr/lib/ld.so.1", "/usr/lib/ld.so.1", 1023) = 16
            open("/var/ld/ld.config", O_RDONLY)             Err#2 ENOENT
            stat("/usr/lib/libaio.so.1", 0xFFBEF56C)        = 0
            open("/usr/lib/libaio.so.1", O_RDONLY)          = 3
            fstat(3, 0xFFBEF56C)                            = 0
            mmap(0x00000000, 8192, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0xFF3A0000
            mmap(0x00000000, 106496, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0xFF380000
            mmap(0xFF398000, 1584, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_FIXED, 3, 32768) = 0xFF398000
            munmap(0xFF388000, 65536)                       = 0
            memcntl(0xFF380000, 7184, MC_ADVISE, MADV_WILLNEED, 0, 0) = 0
            close(3)                                        = 0
            stat("/usr/lib/libc.so.1", 0xFFBEF56C)          = 0
            open("/usr/lib/libc.so.1", O_RDONLY)            = 3
            fstat(3, 0xFFBEF56C)                            = 0
            mmap(0xFF3A0000, 8192, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED, 3, 0) = 0xFF3A0000
            mmap(0x00000000, 802816, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0xFF280000
            mmap(0xFF33C000, 24764, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_FIXED, 3, 704512) = 0xFF33C000
            munmap(0xFF32C000, 65536)                       = 0
            memcntl(0xFF280000, 113504, MC_ADVISE, MADV_WILLNEED, 0, 0) = 0
            close(3)                                        = 0
            stat("/usr/lib/libdl.so.1", 0xFFBEF56C)         = 0
            open("/usr/lib/libdl.so.1", O_RDONLY)           = 3
            fstat(3, 0xFFBEF56C)                            = 0
            mmap(0xFF3A0000, 8192, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED, 3, 0) = 0xFF3A0000
            close(3)                                        = 0
            stat("/usr/platform/SUNW,Ultra-4/lib/libc_psr.so.1", 0xFFBEF37C) = 0
            open("/usr/platform/SUNW,Ultra-4/lib/libc_psr.so.1", O_RDONLY) = 3
            fstat(3, 0xFFBEF37C)                            = 0
            mmap(0x00000000, 8192, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0xFF370000
            mmap(0x00000000, 8192, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANON, -1, 0) = 0xFF360000
            mmap(0x00000000, 16384, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0xFF350000
            close(3)                                        = 0
            munmap(0xFF370000, 8192)                        = 0
            open("a.sh", O_RDONLY)                          = 3
            sysconfig(_CONFIG_PAGESIZE)                     = 8192
            open("/dev/zero", O_RDWR)                       = 4
            mmap(0x00000000, 1179648, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_NORESERVE, 4, 0) = 0xFF100000
            close(4)                                        = 0
            mprotect(0xFF100000, 8192, PROT_NONE)           = 0
            mprotect(0xFF112000, 8192, PROT_NONE)           = 0
            mprotect(0xFF124000, 8192, PROT_NONE)           = 0
            mprotect(0xFF136000, 8192, PROT_NONE)           = 0
            mprotect(0xFF148000, 8192, PROT_NONE)           = 0
            mprotect(0xFF15A000, 8192, PROT_NONE)           = 0
            mprotect(0xFF16C000, 8192, PROT_NONE)           = 0
            mprotect(0xFF17E000, 8192, PROT_NONE)           = 0
            mprotect(0xFF190000, 8192, PROT_NONE)           = 0
            mprotect(0xFF1A2000, 8192, PROT_NONE)           = 0
            mprotect(0xFF1B4000, 8192, PROT_NONE)           = 0
            mprotect(0xFF1C6000, 8192, PROT_NONE)           = 0
            mprotect(0xFF1D8000, 8192, PROT_NONE)           = 0
            mprotect(0xFF1EA000, 8192, PROT_NONE)           = 0
            mprotect(0xFF1FC000, 8192, PROT_NONE)           = 0
            mprotect(0xFF20E000, 8192, PROT_NONE)           = 0
            sigfillset(0xFF3428C8)                          = 0
            kaio(5, 0xFFBEF650, 0x00000000, 0xFF21FB68, 0x00000000, 0xFFBEF658, 0x00000000) = 0
            lwp_create(0xFFBEF650, 0, 0xFF21FF5C)           = 2
            lwp_create()    (returning as new lwp ...)      = 0
            kaio(AIOREAD, 3, 0xFFBEF9D0, 1024, 0, 0xFFBEFDD0) Err#48 ENOTSUP
            getpid()                                        = 22727 [22726]
            sigaction(SIGPROF, 0xFFBEF798, 0xFF399768)      = 0
            brk(0x00020B20)                                 = 0
            brk(0x0002AB20)                                 = 0
            lwp_create(0xFFBEF5F0, 0, 0xFF20DF5C)           = 3
            lwp_create()    (returning as new lwp ...)      = 0
            lwp_self()                                      = 3
            lwp_create(0xFFBEF5F0, 0, 0xFF1FBF5C)           = 4
            lwp_create()    (returning as new lwp ...)      = 0
            lwp_self()                                      = 4
            lwp_create(0xFFBEF5F0, 0, 0xFF1E9F5C)           = 5
            lwp_create()    (returning as new lwp ...)      = 0
            lwp_self()                                      = 5
            lwp_create(0xFFBEF5F0, 0, 0xFF1D7F5C)           = 6
            lwp_create()    (returning as new lwp ...)      = 0
            lwp_self()                                      = 6
            lwp_create(0xFFBEF5F0, 0, 0xFF1C5F5C)           = 7
            lwp_create()    (returning as new lwp ...)      = 0
            lwp_self()                                      = 7
            lwp_create(0xFFBEF5F0, 0, 0xFF1B3F5C)           = 8
            lwp_create()    (returning as new lwp ...)      = 0
            lwp_self()                                      = 8
            lwp_create(0xFFBEF5F0, 0, 0xFF1A1F5C)           = 9
            lwp_create()    (returning as new lwp ...)      = 0
            lwp_self()                                      = 9
            lwp_create(0xFFBEF5F0, 0, 0xFF18FF5C)           = 10
            lwp_create()    (returning as new lwp ...)      = 0
            lwp_self()                                      = 10
            lwp_create(0xFFBEF5F0, 0, 0xFF17DF5C)           = 11
            lwp_create()    (returning as new lwp ...)      = 0
            lwp_self()                                      = 11
            brk(0x0002AB20)                                 = 0
            brk(0x0014AB20)                                 = 0
            lwp_cond_broadcast(0xFF3997C0)                  = 0
            lwp_cond_signal(0xFF20DFA0)                     = 0
            lwp_cond_wait(0xFF20DFA0, 0xFF20DFB0, 0x00000000) = 0
            pread64(3, "7F E L F010201\0\0\0\0\0".., 1024, 0) = 1024
            ioctl(1, TCGETA, 0xFFBEEBD4)                    = 0
            aio succeeded
            write(1, " a i o   s u c c e e d e".., 14)      = 14
            close(3)                                        = 0
            llseek(0, 0, SEEK_CUR)                          = 14361
            _exit(0)
            # truss ./aio /dev/rdsk/c0t2d0s5
            execve("/aio", 0xFFBEFE3C, 0xFFBEFE48)  argc = 2
            resolvepath("/usr/lib/ld.so.1", "/usr/lib/ld.so.1", 1023) = 16
            open("/var/ld/ld.config", O_RDONLY)             Err#2 ENOENT
            stat("/usr/lib/libaio.so.1", 0xFFBEF55C)        = 0
            open("/usr/lib/libaio.so.1", O_RDONLY)          = 3
            fstat(3, 0xFFBEF55C)                            = 0
            mmap(0x00000000, 8192, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0xFF3A0000
            mmap(0x00000000, 106496, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0xFF380000
            mmap(0xFF398000, 1584, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_FIXED, 3, 32768) = 0xFF398000
            munmap(0xFF388000, 65536)                       = 0
            memcntl(0xFF380000, 7184, MC_ADVISE, MADV_WILLNEED, 0, 0) = 0
            close(3)                                        = 0
            stat("/usr/lib/libc.so.1", 0xFFBEF55C)          = 0
            open("/usr/lib/libc.so.1", O_RDONLY)            = 3
            fstat(3, 0xFFBEF55C)                            = 0
            mmap(0xFF3A0000, 8192, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED, 3, 0) = 0xFF3A0000
            mmap(0x00000000, 802816, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0xFF280000
            mmap(0xFF33C000, 24764, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_FIXED, 3, 704512) = 0xFF33C000
            munmap(0xFF32C000, 65536)                       = 0
            memcntl(0xFF280000, 113504, MC_ADVISE, MADV_WILLNEED, 0, 0) = 0
            close(3)                                        = 0
            stat("/usr/lib/libdl.so.1", 0xFFBEF55C)         = 0
            open("/usr/lib/libdl.so.1", O_RDONLY)           = 3
            fstat(3, 0xFFBEF55C)                            = 0
            mmap(0xFF3A0000, 8192, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED, 3, 0) = 0xFF3A0000
            close(3)                                        = 0
            stat("/usr/platform/SUNW,Ultra-4/lib/libc_psr.so.1", 0xFFBEF36C) = 0
            open("/usr/platform/SUNW,Ultra-4/lib/libc_psr.so.1", O_RDONLY) = 3
            fstat(3, 0xFFBEF36C)                            = 0
            mmap(0x00000000, 8192, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0xFF370000
            mmap(0x00000000, 8192, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANON, -1, 0) = 0xFF360000
            mmap(0x00000000, 16384, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0xFF350000
            close(3)                                        = 0
            munmap(0xFF370000, 8192)                        = 0
            open("/dev/rdsk/c0t2d0s5", O_RDONLY)            = 3
            sysconfig(_CONFIG_PAGESIZE)                     = 8192
            open("/dev/zero", O_RDWR)                       = 4
            mmap(0x00000000, 1179648, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_NORESERVE, 4, 0) = 0xFF100000
            close(4)                                        = 0
            mprotect(0xFF100000, 8192, PROT_NONE)           = 0
            mprotect(0xFF112000, 8192, PROT_NONE)           = 0
            mprotect(0xFF124000, 8192, PROT_NONE)           = 0
            mprotect(0xFF136000, 8192, PROT_NONE)           = 0
            mprotect(0xFF148000, 8192, PROT_NONE)           = 0
            mprotect(0xFF15A000, 8192, PROT_NONE)           = 0
            mprotect(0xFF16C000, 8192, PROT_NONE)           = 0
            mprotect(0xFF17E000, 8192, PROT_NONE)           = 0
            mprotect(0xFF190000, 8192, PROT_NONE)           = 0
            mprotect(0xFF1A2000, 8192, PROT_NONE)           = 0
            mprotect(0xFF1B4000, 8192, PROT_NONE)           = 0
            mprotect(0xFF1C6000, 8192, PROT_NONE)           = 0
            mprotect(0xFF1D8000, 8192, PROT_NONE)           = 0
            mprotect(0xFF1EA000, 8192, PROT_NONE)           = 0
            mprotect(0xFF1FC000, 8192, PROT_NONE)           = 0
            mprotect(0xFF20E000, 8192, PROT_NONE)           = 0
            sigfillset(0xFF3428C8)                          = 0
            kaio(5, 0xFFBEF640, 0x00000000, 0xFF21FB68, 0x00000000, 0xFFBEF648, 0x00000000) = 0
            lwp_create(0xFFBEF640, 0, 0xFF21FF5C)           = 2
            lwp_create()    (returning as new lwp ...)      = 0
            kaio(AIOREAD, 3, 0xFFBEF9C0, 1024, 0, 0xFFBEFDC0) = 0
            kaio(AIOWAIT, 0x00000000)                       = 4290706880
            ioctl(1, TCGETA, 0xFFBEEBC4)                    = 0
            aio succeeded
            write(1, " a i o   s u c c e e d e".., 14)      = 14
            close(3)                                        = 0
            llseek(0, 0, SEEK_CUR)                          = 18744
            _exit(0)
            #
            

历史上的今天...
    >> 2015-12-29文章:
    >> 2012-12-29文章:
    >> 2010-12-29文章:
           EXPDP测试:性能数据记录
    >> 2008-12-29文章:
    >> 2007-12-29文章:
    >> 2006-12-29文章:
    >> 2005-12-29文章:
           遭遇Referer Spam Too
           spam留言知几何?

无觅

By eygle on 2004-12-29 14:06 | Comments (7) | System | 134 |

7 Comments

厉害。不过如果能将include中的头文件
补全的话会更好,谢谢

哦,没注意到。
MT网页展现时,把include部分作为代码隐藏了。
你查看网页源代码就看到了.

-eygle

如上是kaio和aio的差别了;kaio是kernal模式,aio是user-mode;kaio在solaris上只支持raw device,对文件系统使用aio.
我的看法:应该把文章中的“异步IO”改为“核心异步IO”;man aiowait中的一段话 aiowait() suspends the calling process until one of its outstanding asynchronous I/O operations completes. This provides a synchronous method of notification.
If timeout is a non-zero pointer, it specifies a maximum interval to wait for the completion of an asynchronous I/O operation. If timeout is a zero pointer, then aiowait() blocks indefinitely. To effect a poll, the timeout parameter should be non-zero, pointing to a zero-valued timeval structure.
如上调用的aiowait传递的参数是0,应该“不确定性”等待信号;实际上仍然有异步的效果。

实际上,文中想探讨的就是KAIO,对于user mode的AIO,后面的过程我没有详细说明,实际上OS通过lwp_create创建了大量的LWPS可以模拟实现AIO;而这种方式通常会有额外的负担,不会带来性能提升;只不过因为KAIO的调用对于用户是透明的,OS自动执行了一系列的判断和过程而已:

lwp_create(0xFFBEF5F0, 0, 0xFF20DF5C) = 3
lwp_create() (returning as new lwp ...) = 0
lwp_self() = 3
lwp_create(0xFFBEF5F0, 0, 0xFF1FBF5C) = 4
lwp_create() (returning as new lwp ...) = 0

当然KAIO不止支持裸设备,对于使用Veritas Volume Manager或者DiskSuite等卷管理工具创建的卷,KAIO也被支持。

架设系统只有aio,dbwr执行写dirty buffer的命令,dbwr将blocks 分配给多个lwp_ 线程 完成i/o任务,这时候dbwr是空闲等待lwp_通知i/o结束?还是扫描buffer cache寻找下一批写回磁盘的其他buffer? 如果是后一种情况,理论上aio要比同步I/O性能要好; 如果是前一种,则aio实际上和同步I/O 一样了。
不清楚oracle是哪一种情况???

这可以分为两类情况:
1.对于Oracle来说
a.如果操作系统支持异步IO,Oracle推荐使用多个dbwr进程来解决IO负载过重的情况
b.如果操作系统不支持异步IO,Oracle推荐使用dbwr_io_slaves来缓解负载过重的情况

而不管是使用多个dbwr还是使用io_slave进程,Oracle都不会等待写完成。

2.对于操作系统来说
如果支持异步IO,那么不管是user mode还是kaio,只要Oracle应用调用了aio接口,那么OS就会按照以上两种模式执行。
对于user mode的kaio,因为对于用户透明,如果kaio不被支持,那么势必需要一系列额外的动作来实现,这些额外的动作就是额外的负担,性能固然及不上kaio,对于频繁的小量写操作等,甚至及不上同步IO。

"1.对于Oracle来说
a.如果操作系统支持异步IO,Oracle推荐使用多个dbwr进程来解决IO负载过重的情况"

我认为如果操作系统支持异步IO,应该是设置DISK_ASYNCH_IO=true允许oracle使用AIO,单个DBWR进程也是能够利用AIO的,而多个dbwr进程应该在多CPU的情况下使用,单CPU下使用多个dbwr进程的意义不大


CopyRight © 2004~2020 云和恩墨,成就未来!, All rights reserved.
数据恢复·紧急救援·性能优化 云和恩墨 24x7 热线电话:400-600-8755 业务咨询:010-59007017-7040 or 7037 业务合作: marketing@enmotech.com