DSエミュレータのパフォーマンス調査&調査
■ 使用したバージョン:
desmume-0.9.10 (linux版)
■ スレッドについて:
・CPU実行用と、レンダリングの2つ以上がある模様。
・複数あるけれども、CPU実行用のスレッド負荷が異常に高い。
・レンダリングのバックトレース
RasterizerUnit<true>::runscanlines<true>()
RasterizerUnit<true>::sharp_engine()
mainLoop<true>()
execRasterzerUnit()
taskProc()
start_thread()
clone()
・CPU実行用のバックトレース
armcpu_exec<0>()
armInternalLoop<true,true,false>()
NDS_exec<false>()
desmume_cycle()
EmuLoop()
?? ()
g_main_context_dispatch()
?? ()
g_main_loop_run()
?? ()
main()
■ /procの情報
Name: desmume
State: R (running)
Tgid: 11992
Ngid: 0
Pid: 11992
PPid: 11990
TracerPid: 11990
Uid: 1000 1000 1000 1000
Gid: 1000 1000 1000 1000
FDSize: 32
Groups: 20 24 25 29 40 44 46 111 116 1000
VmPeak: 1212040 kB
VmSize: 1190968 kB
VmLck: 0 kB
VmPin: 0 kB
VmHWM: 210960 kB
VmRSS: 201388 kB
VmData: 781932 kB
VmStk: 1172 kB
VmExe: 3504 kB
VmLib: 29536 kB
VmPTE: 632 kB
VmSwap: 0 kB
Threads: 12
SigQ: 0/23540
SigPnd: 0000000000000000
ShdPnd: 0000000000000000
SigBlk: 0000000000000000
SigIgn: 0000000001003000
SigCgt: 00000001800044c6
CapInh: 0000000000000000
CapPrm: 0000000000000000
CapEff: 0000000000000000
CapBnd: 0000001fffffffff
Seccomp: 0
Cpus_allowed: f
Cpus_allowed_list: 0-3
Mems_allowed: 01
Mems_allowed_list: 0
voluntary_ctxt_switches: 2091
nonvoluntary_ctxt_switches: 13038
■ psの結果など
root@debian-r8 /proc/11992# ps auxww -L | egrep "desmu|PID" | sed -e 's@desmume-0.9.10-gprof/@@g'
USER PID LWP %CPU NLWP %MEM VSZ RSS TTY STAT START TIME COMMAND
sakaihdt 11983 11983 1.1 3 1.6 143088 48440 pts/2 Sl+ 15:33 0:20 kdbg ./src/gtk/desmume
sakaihdt 11983 11984 0.0 3 1.6 143088 48440 pts/2 Sl+ 15:33 0:00 kdbg ./src/gtk/desmume
sakaihdt 11983 12017 0.0 3 1.6 143088 48440 pts/2 Sl+ 15:36 0:00 kdbg ./src/gtk/desmume
sakaihdt 11992 11992 21.4 12 6.6 1190968 201404 ? Rsl 15:33 5:59 src/gtk/desmume
sakaihdt 11992 11995 0.8 12 6.6 1190968 201404 ? Ssl 15:33 0:14 src/gtk/desmume
sakaihdt 11992 11996 0.8 12 6.6 1190968 201404 ? Ssl 15:33 0:14 src/gtk/desmume
sakaihdt 11992 11997 0.6 12 6.6 1190968 201404 ? Ssl 15:33 0:10 src/gtk/desmume
sakaihdt 11992 11998 0.6 12 6.6 1190968 201404 ? Ssl 15:33 0:10 src/gtk/desmume
sakaihdt 11992 11999 0.2 12 6.6 1190968 201404 ? Ssl 15:33 0:04 src/gtk/desmume
sakaihdt 11992 12001 0.2 12 6.6 1190968 201404 ? Ssl 15:33 0:04 src/gtk/desmume
sakaihdt 11992 12002 0.3 12 6.6 1190968 201404 ? Ssl 15:33 0:06 src/gtk/desmume
sakaihdt 11992 12004 0.1 12 6.6 1190968 201404 ? Ssl 15:33 0:02 src/gtk/desmume
sakaihdt 11992 12007 2.0 12 6.6 1190968 201404 ? Ssl 15:33 0:33 src/gtk/desmume
sakaihdt 11992 12008 1.9 12 6.6 1190968 201404 ? Ssl 15:33 0:33 src/gtk/desmume
sakaihdt 11992 12022 0.1 12 6.6 1190968 201404 ? Ssl 15:36 0:02 src/gtk/desmume
■ gprofした結果:
sakaihdt@debian-r8 ~/src/desmume/desmume-0.9.10-gprof$ cat gprof.log | head -100 [16:06:49]
Flat profile:
Each sample counts as 0.01 seconds.
% cumulative self self total
time seconds seconds calls ms/call ms/call name
14.11 1.02 1.02 43661634 0.00 0.00 unsigned int armcpu_exec<0>()
11.34 1.84 0.82 31259 0.03 0.03 hq2xS_32_def(unsigned int*, unsigned int*, unsigned int const*, unsigned int const*, unsigned int const*, unsigned int)
9.82 2.55 0.71 52632 0.01 0.01 void RasterizerUnit::runscanlines(edge_fx_fl*, edge_fx_fl*, bool, bool)
7.75 3.11 0.56 3989547 0.00 0.00 std::pair<int, int> armInnerLoop<true, true, false>(unsigned long long, int, int, int)
4.15 3.41 0.30 61458 0.00 0.00 void renderline_textBG(GPU*, unsigned short, unsigned short, unsigned short)
4.08 3.71 0.30 3989712 0.00 0.00 Sequencer::execHardware()
3.04 3.93 0.22 5666530 0.00 0.00 unsigned int armcpu_exec<1>()
2.35 4.10 0.17 57611 0.00 0.01 gpu_SetRotateScreen(unsigned short)
2.07 4.25 0.15 ExposeDrawingArea(_GtkWidget*, _GdkEventExpose*, void*)
1.94 4.39 0.14 4997559 0.00 0.00 unsigned int OP_LDR_P_IMM_OFF<0>(unsigned int)
1.66 4.51 0.12 1861608 0.00 0.00 void FetchADPCMData<(SPUInterpolationMode)1>(channel_struct*, int*)
1.52 4.62 0.11 6182731 0.00 0.00 _MMU_ARM7_read32(unsigned int)
1.45 4.72 0.11 3989563 0.00 0.00 Sequencer::findNext()
1.38 4.82 0.10 43661630 0.00 0.00 unsigned int armcpu_exec<0, false>()
1.24 4.91 0.09 155 0.58 32.45 void NDS_exec(int)
1.24 5.00 0.09 2 45.00 45.00 GPU_InitFadeColors()
1.11 5.08 0.08 2205393 0.00 0.00 unsigned int OP_CMP_IMM_VAL<0>(unsigned int)
0.97 5.15 0.07 981011 0.00 0.00 unsigned int OP_LDMIA_W<0>(unsigned int)
0.97 5.22 0.07 19200 0.00 0.00 void GPU::_spriteRender<(GPU::SpriteRenderMode)0>(unsigned char*, unsigned char*, unsigned char*, unsigned char*)
0.97 5.29 0.07 unsigned int OP_LDR_M_IMM_OFF_PREIND<0>(unsigned int)
0.83 5.35 0.06 648523 0.00 0.00 MatrixMultVec4x4(int const*, int*)
0.76 5.41 0.06 2296900 0.00 0.00 _MMU_ARM9_write32(unsigned int, unsigned int)
0.69 5.46 0.05 711973 0.00 0.00 unsigned int OP_STMDB_W<0>(unsigned int)
0.69 5.51 0.05 190379 0.00 0.00 unsigned int OP_LDMIA<0>(unsigned int)
0.69 5.56 0.05 TRAPUNDEF(armcpu_t*)
0.62 5.60 0.05 2827870 0.00 0.00 triggerDma(EDMAMode)
0.55 5.64 0.04 1620376 0.00 0.00 GFX_PIPErecv(unsigned char*, unsigned int*)
0.55 5.68 0.04 65 0.62 2.00 SoftRastRender()
0.55 5.72 0.04 65 0.62 0.62 SoftRasterizerEngine::initFramebuffer(int, int, bool)
0.41 5.75 0.03 4096439 0.00 0.00 unsigned int MMU_struct::gen_IF<0>()
0.41 5.78 0.03 3897095 0.00 0.00 unsigned int OP_B<0>(unsigned int)
0.41 5.81 0.03 1805404 0.00 0.00 bool validateIORegsWrite<(unsigned char)0>(unsigned int, unsigned char, unsigned int)
0.41 5.84 0.03 1577773 0.00 0.00 unsigned int OP_CMP_LSL_IMM<0>(unsigned int)
0.41 5.87 0.03 1525055 0.00 0.00 _MMU_ARM9_read32(unsigned int)
0.41 5.90 0.03 497578 0.00 0.00 unsigned int OP_LDR_P_IMM_OFF<1>(unsigned int)
0.41 5.93 0.03 205790 0.00 0.00 unsigned int OP_STMIA_W<0>(unsigned int)
0.41 5.96 0.03 585 0.05 0.05 unsigned int OP_LDMIB2<1>(unsigned int)
0.41 5.99 0.03 156 0.19 0.19 gfx3d_VBlankSignal()
0.41 6.02 0.03 unsigned int armcpu_exec<0, true>()
0.41 6.05 0.03 unsigned int OP_TEQ_IMM_VAL<0>(unsigned int)
0.28 6.07 0.02 6182743 0.00 0.00 bool slot2_read<(unsigned char)1, unsigned int>(unsigned int, unsigned int&)
0.28 6.09 0.02 5666530 0.00 0.00 unsigned int armcpu_exec<1, false>()
0.28 6.11 0.02 2608466 0.00 0.00 WIFI_usTrigger()
0.28 6.13 0.02 1324726 0.00 0.00 gfx3d_sendCommandToFIFO(unsigned int)
0.28 6.15 0.02 1077816 0.00 0.00 unsigned int OP_BX<0>(unsigned int)
0.28 6.17 0.02 741944 0.00 0.00 unsigned int OP_STRB_M_IMM_OFF_PREIND<0>(unsigned int)
0.28 6.19 0.02 654867 0.00 0.00 unsigned int OP_LDRH_P_IMM_OFF<0>(unsigned int)
0.28 6.21 0.02 524530 0.00 0.00 gfx3d_execute3D()
0.28 6.23 0.02 471990 0.00 0.00 armcpu_switchMode(armcpu_t*, unsigned char)
0.28 6.25 0.02 371797 0.00 0.00 _MMU_ARM7_write32(unsigned int, unsigned int)
0.28 6.27 0.02 324308 0.00 0.00 SetVertex()
0.28 6.29 0.02 300006 0.00 0.00 _MMU_ARM9_read16(unsigned int)
0.28 6.31 0.02 120385 0.00 0.00 unsigned int OP_STMIA<0>(unsigned int)
0.28 6.33 0.02 117200 0.00 0.00 unsigned int OP_LDRB_REG_OFF<0>(unsigned int)
0.28 6.35 0.02 96884 0.00 0.00 void GFX3D_Clipper::clipPoly(POLY*, VERT**)
0.28 6.37 0.02 78497 0.00 0.00 unsigned int OP_MVN_IMM_VAL<0>(unsigned int)
0.28 6.39 0.02 76803 0.00 0.00 unsigned int OP_STRH_P_IMM_OFF<1>(unsigned int)
0.28 6.41 0.02 16016 0.00 0.00 unsigned int OP_RSB_IMM_VAL<0>(unsigned int)
0.28 6.43 0.02 15720 0.00 0.00 unsigned int OP_STR_IMM_OFF<0>(unsigned int)
0.28 6.45 0.02 10560 0.00 0.01 void DmaController::doCopy<0>()
0.28 6.47 0.02 4889 0.00 0.00 TexCacheItem* TexCache::scan<(TexCache_TexFormat)2>(unsigned int, unsigned int)
0.28 6.49 0.02 unsigned int OP_ORR_ROR_REG<0>(unsigned int)
0.28 6.51 0.02 unsigned int OP_TST_ROR_REG<0>(unsigned int)
0.28 6.53 0.02 unsigned int OP_LDRB_M_IMM_OFF<0>(unsigned int)
0.14 6.54 0.01 3271649 0.00 0.00 NDS_RescheduleGXFIFO(unsigned int)
0.14 6.55 0.01 2206590 0.00 0.00 unsigned int OP_ADD_IMM_VAL<0>(unsigned int)
0.14 6.56 0.01 1430243 0.00 0.00 unsigned int OP_MOV_IMM_VAL<0>(unsigned int)
0.14 6.57 0.01 1414126 0.00 0.00 unsigned int OP_STR_P_IMM_OFF<0>(unsigned int)
0.14 6.58 0.01 1104065 0.00 0.00 GFX_FIFOsend(unsigned char, unsigned int)
0.14 6.59 0.01 1102514 0.00 0.00 unsigned int OP_SUB_S_IMM_VAL<0>(unsigned int)
0.14 6.60 0.01 763780 0.00 0.00 unsigned int OP_AND_S_IMM_VAL<0>(unsigned int)
0.14 6.61 0.01 698449 0.00 0.00 unsigned int OP_ADD_LSL_IMM<0>(unsigned int)
0.14 6.62 0.01 682332 0.00 0.00 unsigned int OP_B<1>(unsigned int)
0.14 6.63 0.01 681462 0.00 0.00 SNDDummyPostProcessSamples(short*, unsigned int, ESynchMode, ISynchronizingAudioBuffer*)
0.14 6.64 0.01 659606 0.00 0.00 unsigned int OP_LDRB_P_LSL_IMM_OFF<0>(unsigned int)
0.14 6.65 0.01 625906 0.00 0.00 unsigned int OP_ADD_IMM3<0>(unsigned int)
0.14 6.66 0.01 569864 0.00 0.00 unsigned int OP_AND_IMM_VAL<0>(unsigned int)
0.14 6.67 0.01 503940 0.00 0.00 unsigned int OP_CMP_IMM_VAL<1>(unsigned int)
0.14 6.68 0.01 217068 0.00 0.00 unsigned int OP_MOV_LSL_REG<0>(unsigned int)
0.14 6.69 0.01 201466 0.00 0.00 _MMU_ARM7_read16(unsigned int)
0.14 6.70 0.01 194542 0.00 0.00 unsigned int OP_ORR_IMM_VAL<0>(unsigned int)
0.14 6.71 0.01 192345 0.00 0.00 unsigned int OP_BLX_REG<0>(unsigned int)
0.14 6.72 0.01 185745 0.00 0.00 bool slot2_read<(unsigned char)1, unsigned short>(unsigned int, unsigned short&)
0.14 6.73 0.01 184219 0.00 0.00 _MMU_ARM9_write16(unsigned int, unsigned short)
0.14 6.74 0.01 184218 0.00 0.00 bool slot2_write<(unsigned char)0, unsigned short>(unsigned int, unsigned short)
0.14 6.75 0.01 179963 0.00 0.00 ClipperPlane<2, 1, ClipperOutput>::clipVert(bool, VERT*)
0.14 6.76 0.01 161919 0.00 0.00 unsigned int OP_LDR_IMM_OFF<0>(unsigned int)
0.14 6.77 0.01 159479 0.00 0.00 armcp15_t::moveARM2CP(unsigned int, unsigned char, unsigned char, unsigned char, unsigned char)
0.14 6.78 0.01 141539 0.00 0.00 unsigned int OP_LDMIA_W<1>(unsigned int)
0.14 6.79 0.01 138056 0.00 0.00 unsigned int OP_MOV_LSR_REG<0>(unsigned int)
0.14 6.80 0.01 115153 0.00 0.00 unsigned int OP_LDR_PCREL<0>(unsigned int)
0.14 6.81 0.01 107648 0.00 0.00 Slot1Comp_Rom::read()
0.14 6.82 0.01 92561 0.00 0.00 unsigned int OP_LDRB_M_LSL_IMM_OFF<0>(unsigned int)
0.14 6.83 0.01 81816 0.00 0.00 unsigned int OP_MOV_IMM8<0>(unsigned int)
0.14 6.84 0.01 79811 0.00 0.00 unsigned int OP_STR_P_IMM_OFF_POSTIND<0>(unsigned int)
PR