Reading Profile files in profile.* NODE 0;CONTEXT 0;THREAD 0: --------------------------------------------------------------------------------------- %Time Exclusive Inclusive #Call #Subrs Inclusive Name msec total msec usec/call --------------------------------------------------------------------------------------- 100.0 3 57,030 1 15 57030319 applu 95.7 2,297 54,603 3 37517 18201094 bcast_inputs 95.7 2,297 54,602 2 37508 27301284 applu => bcast_inputs 28.8 13,980 16,417 301 602 54544 rhs 28.8 13,980 16,417 301 602 54544 bcast_inputs => rhs 20.2 9,109 11,494 9300 18600 1236 bcast_inputs => buts 20.2 9,109 11,494 9300 18600 1236 buts 17.0 8,368 9,699 9300 18600 1043 bcast_inputs => blts 17.0 8,368 9,699 9300 18600 1043 blts 14.2 8,091 8,091 9300 0 870 jacld 14.2 8,091 8,091 9300 0 870 bcast_inputs => jacld 11.6 6,597 6,597 9300 0 709 jacu 11.6 6,597 6,597 9300 0 709 bcast_inputs => jacu 6.5 1,212 3,715 37200 37200 100 exchange_1 4.3 527 2,448 604 1812 4054 exchange_3 4.3 525 2,437 602 1806 4049 rhs => exchange_3 4.2 604 2,384 18600 18600 128 buts => exchange_1 3.1 1,780 1,780 18600 0 96 exchange_1 => MPI_Recv() 3.1 1,780 1,780 18600 0 96 MPI_Recv() 2.8 1,622 1,622 608 0 2668 MPI_Wait() 2.8 1,621 1,621 604 0 2685 exchange_3 => MPI_Wait() 2.3 607 1,330 18600 18600 72 blts => exchange_1 1.8 890 1,015 1 47616 1015911 setiv 1.8 890 1,015 1 47616 1015911 applu => setiv 1.8 1,009 1,009 1 0 1009780 MPI_Finalize() 1.8 1,009 1,009 1 0 1009780 applu => MPI_Finalize() 1.8 1,003 1,003 19204 0 52 MPI_Send() 1.3 722 722 18600 0 39 exchange_1 => MPI_Send() 0.5 281 281 604 0 466 exchange_3 => MPI_Send() 0.3 151 172 1 7937 172427 applu => error 0.3 151 172 1 7937 172427 error 0.3 157 157 57252 0 3 exact 0.2 125 125 47616 0 3 setiv => exact 0.2 1 102 1 4 102427 applu => init_comm 0.2 1 102 1 4 102427 init_comm 0.2 101 101 1 0 101373 init_comm => MPI_Init() 0.2 101 101 1 0 101373 MPI_Init() 0.1 62 73 1 2 73574 applu => erhs 0.1 62 73 1 2 73574 erhs 0.1 32 44 1 1700 44314 applu => setbv 0.1 32 44 1 1700 44314 setbv 0.0 20 20 7936 0 3 error => exact 0.0 17 17 608 0 29 MPI_Irecv() 0.0 17 17 604 0 29 exchange_3 => MPI_Irecv() 0.0 11 11 1700 0 7 setbv => exact 0.0 2 11 2 6 5583 erhs => exchange_3 0.0 3 4 3 3 1613 bcast_inputs => l2norm 0.0 3 4 3 3 1613 l2norm 0.0 3 3 1 2 3776 read_input 0.0 3 3 1 2 3776 applu => read_input 0.0 1 1 8 0 215 MPI_Allreduce() 0.0 0.655 1 1 6 1454 applu => pintgr 0.0 0.655 1 1 6 1454 pintgr 0.0 0.914 0.914 3 0 305 l2norm => MPI_Allreduce() 0.0 0.23 0.713 1 9 713 read_input => bcast_inputs 0.0 0.553 0.553 1 0 553 bcast_inputs => MPI_Allreduce() 0.0 0.483 0.483 9 0 54 bcast_inputs => MPI_Bcast() 0.0 0.483 0.483 9 0 54 MPI_Bcast() 0.0 0.165 0.374 1 4 374 exchange_4 0.0 0.165 0.374 1 4 374 pintgr => exchange_4 0.0 0.315 0.315 1 0 315 sethyper 0.0 0.315 0.315 1 0 315 applu => sethyper 0.0 0.181 0.181 2 0 91 exchange_4 => MPI_Wait() 0.0 0.101 0.149 1 2 149 exchange_5 0.0 0.101 0.149 1 2 149 pintgr => exchange_5 0.0 0.145 0.145 3 0 48 pintgr => MPI_Allreduce() 0.0 0.0941 0.131 1 2 131 exchange_6 0.0 0.0941 0.131 1 2 131 pintgr => exchange_6 0.0 0.111 0.111 1 0 111 error => MPI_Allreduce() 0.0 0.0788 0.0788 1 0 79 MPI_Barrier() 0.0 0.0788 0.0788 1 0 79 bcast_inputs => MPI_Barrier() 0.0 0.0447 0.0447 2 0 22 bcast_inputs => MPI_Wtime() 0.0 0.0447 0.0447 2 0 22 MPI_Wtime() 0.0 0.0365 0.0365 1 0 37 exchange_5 => MPI_Wait() 0.0 0.0306 0.0306 2 0 15 MPI_Comm_size() 0.0 0.0272 0.0272 1 0 27 exchange_6 => MPI_Wait() 0.0 0.027 0.027 2 0 13 exchange_4 => MPI_Irecv() 0.0 0.0218 0.0218 1 0 22 init_comm => MPI_Comm_size() 0.0 0.0112 0.0112 1 0 11 exchange_5 => MPI_Irecv() 0.0 0.0106 0.0106 1 0 11 subdomain 0.0 0.0106 0.0106 1 0 11 applu => subdomain 0.0 0.00962 0.00962 1 0 10 exchange_6 => MPI_Irecv() 0.0 0.00901 0.00901 1 0 9 setcoeff 0.0 0.00901 0.00901 1 0 9 applu => setcoeff 0.0 0.0088 0.0088 1 0 9 read_input => MPI_Comm_size() 0.0 0.00449 0.00449 1 0 4 MPI_Comm_rank() 0.0 0.00449 0.00449 1 0 4 init_comm => MPI_Comm_rank() 0.0 0.00258 0.00258 1 0 3 nodedim 0.0 0.00258 0.00258 1 0 3 init_comm => nodedim 0.0 0.0025 0.0025 1 0 2 neighbors 0.0 0.0025 0.0025 1 0 2 applu => neighbors 0.0 0.00214 0.00214 1 0 2 proc_grid 0.0 0.00214 0.00214 1 0 2 applu => proc_grid --------------------------------------------------------------------------------------- USER EVENTS Profile :NODE 0, CONTEXT 0, THREAD 0 --------------------------------------------------------------------------------------- NumSamples MaxValue MinValue MeanValue Std. Dev. Event Name --------------------------------------------------------------------------------------- 1.92E+04 4.488E+04 640 2031 7721 Message size sent to all nodes 0 0 0 0 0 Message size sent to node 0 9602 4.488E+04 640 2031 7721 Message size sent to node 1 9602 4.488E+04 640 2031 7721 Message size sent to node 2 0 0 0 0 0 Message size sent to node 3 --------------------------------------------------------------------------------------- NODE 1;CONTEXT 0;THREAD 0: --------------------------------------------------------------------------------------- %Time Exclusive Inclusive #Call #Subrs Inclusive Name msec total msec usec/call --------------------------------------------------------------------------------------- 100.0 2 57,030 1 14 57030273 applu 95.8 1,824 54,611 2 37517 27305511 bcast_inputs 95.7 1,824 54,604 1 37508 54604899 applu => bcast_inputs 30.4 13,169 17,349 301 602 57640 rhs 30.4 13,169 17,349 301 602 57640 bcast_inputs => rhs 21.5 7,727 12,242 9300 18600 1316 blts 21.5 7,727 12,242 9300 18600 1316 bcast_inputs => blts 17.3 8,200 9,880 9300 18600 1062 buts 17.3 8,200 9,880 9300 18600 1062 bcast_inputs => buts 12.7 7,268 7,268 9300 0 782 bcast_inputs => jacld 12.7 7,268 7,268 9300 0 782 jacld 10.9 930 6,193 37200 37200 166 exchange_1 10.6 6,018 6,018 9300 0 647 jacu 10.6 6,018 6,018 9300 0 647 bcast_inputs => jacu 8.2 4,671 4,671 18600 0 251 MPI_Recv() 8.2 4,671 4,671 18600 0 251 exchange_1 => MPI_Recv() 7.9 464 4,514 18600 18600 243 blts => exchange_1 7.5 512 4,256 604 1812 7048 exchange_3 7.3 510 4,180 602 1806 6945 rhs => exchange_3 5.9 3,376 3,376 606 0 5572 MPI_Wait() 5.9 3,376 3,376 604 0 5591 exchange_3 => MPI_Wait() 2.9 465 1,679 18600 18600 90 buts => exchange_1 1.8 1,013 1,013 1 0 1013033 applu => MPI_Finalize() 1.8 1,013 1,013 1 0 1013033 MPI_Finalize() 1.7 832 949 1 44640 949877 setiv 1.7 832 949 1 44640 949877 applu => setiv 1.6 920 920 19206 0 48 MPI_Send() 1.0 591 591 18600 0 32 exchange_1 => MPI_Send() 0.6 328 328 604 0 544 exchange_3 => MPI_Send() 0.3 144 172 1 7441 172431 error 0.3 144 172 1 7441 172431 applu => error 0.3 148 148 53713 0 3 exact 0.2 60 136 1 2 136841 erhs 0.2 60 136 1 2 136841 applu => erhs 0.2 117 117 44640 0 3 setiv => exact 0.2 1 100 1 4 100989 applu => init_comm 0.2 1 100 1 4 100989 init_comm 0.2 99 99 1 0 99748 MPI_Init() 0.2 99 99 1 0 99748 init_comm => MPI_Init() 0.1 2 76 2 6 38178 erhs => exchange_3 0.1 30 42 1 1633 42285 applu => setbv 0.1 30 42 1 1633 42285 setbv 0.1 38 38 606 0 64 MPI_Irecv() 0.1 38 38 604 0 64 exchange_3 => MPI_Irecv() 0.0 25 25 8 0 3250 MPI_Allreduce() 0.0 3 20 3 3 6866 l2norm 0.0 3 20 3 3 6866 bcast_inputs => l2norm 0.0 19 19 7440 0 3 error => exact 0.0 16 16 3 0 5591 l2norm => MPI_Allreduce() 0.0 11 11 1633 0 7 setbv => exact 0.0 8 8 1 0 8383 error => MPI_Allreduce() 0.0 0.0576 6 1 1 6181 read_input 0.0 0.0576 6 1 1 6181 applu => read_input 0.0 0.232 6 1 9 6123 read_input => bcast_inputs 0.0 5 5 9 0 655 bcast_inputs => MPI_Bcast() 0.0 5 5 9 0 655 MPI_Bcast() 0.0 0.662 1 1 6 1438 pintgr 0.0 0.662 1 1 6 1438 applu => pintgr 0.0 0.55 0.55 1 0 550 bcast_inputs => MPI_Allreduce() 0.0 0.293 0.293 3 0 98 pintgr => MPI_Allreduce() 0.0 0.155 0.287 1 3 287 pintgr => exchange_4 0.0 0.155 0.287 1 3 287 exchange_4 0.0 0.245 0.245 1 0 245 sethyper 0.0 0.245 0.245 1 0 245 applu => sethyper 0.0 0.0917 0.129 1 2 129 pintgr => exchange_6 0.0 0.0917 0.129 1 2 129 exchange_6 0.0 0.085 0.085 1 0 85 MPI_Barrier() 0.0 0.085 0.085 1 0 85 bcast_inputs => MPI_Barrier() 0.0 0.0445 0.0676 1 1 68 exchange_5 0.0 0.0445 0.0676 1 1 68 pintgr => exchange_5 0.0 0.0607 0.0607 1 0 61 exchange_4 => MPI_Send() 0.0 0.0456 0.0456 1 0 46 exchange_4 => MPI_Irecv() 0.0 0.0442 0.0442 2 0 22 MPI_Wtime() 0.0 0.0442 0.0442 2 0 22 bcast_inputs => MPI_Wtime() 0.0 0.0295 0.0295 1 0 29 init_comm => MPI_Comm_size() 0.0 0.0295 0.0295 1 0 29 MPI_Comm_size() 0.0 0.0269 0.0269 1 0 27 exchange_6 => MPI_Wait() 0.0 0.0258 0.0258 1 0 26 exchange_4 => MPI_Wait() 0.0 0.0231 0.0231 1 0 23 exchange_5 => MPI_Send() 0.0 0.0126 0.0126 1 0 13 applu => setcoeff 0.0 0.0126 0.0126 1 0 13 setcoeff 0.0 0.0105 0.0105 1 0 11 exchange_6 => MPI_Irecv() 0.0 0.00907 0.00907 1 0 9 subdomain 0.0 0.00907 0.00907 1 0 9 applu => subdomain 0.0 0.00407 0.00407 1 0 4 init_comm => MPI_Comm_rank() 0.0 0.00407 0.00407 1 0 4 MPI_Comm_rank() 0.0 0.0037 0.0037 1 0 4 nodedim 0.0 0.0037 0.0037 1 0 4 init_comm => nodedim 0.0 0.00319 0.00319 1 0 3 applu => proc_grid 0.0 0.00319 0.00319 1 0 3 proc_grid 0.0 0.00204 0.00204 1 0 2 applu => neighbors 0.0 0.00204 0.00204 1 0 2 neighbors --------------------------------------------------------------------------------------- USER EVENTS Profile :NODE 1, CONTEXT 0, THREAD 0 --------------------------------------------------------------------------------------- NumSamples MaxValue MinValue MeanValue Std. Dev. Event Name --------------------------------------------------------------------------------------- 1.921E+04 4.488E+04 264 1970 7498 Message size sent to all nodes 9604 4.488E+04 264 2031 7721 Message size sent to node 0 0 0 0 0 0 Message size sent to node 1 0 0 0 0 0 Message size sent to node 2 9602 4.224E+04 600 1910 7268 Message size sent to node 3 --------------------------------------------------------------------------------------- NODE 2;CONTEXT 0;THREAD 0: --------------------------------------------------------------------------------------- %Time Exclusive Inclusive #Call #Subrs Inclusive Name msec total msec usec/call --------------------------------------------------------------------------------------- 100.0 2 57,030 1 14 57030229 applu 95.8 1,742 54,608 2 37517 27304427 bcast_inputs 95.7 1,742 54,603 1 37508 54603982 applu => bcast_inputs 31.9 12,954 18,182 301 602 60406 rhs 31.9 12,954 18,182 301 602 60406 bcast_inputs => rhs 20.9 7,477 11,933 9300 18600 1283 blts 20.9 7,477 11,933 9300 18600 1283 bcast_inputs => blts 16.7 7,951 9,512 9300 18600 1023 buts 16.7 7,951 9,512 9300 18600 1023 bcast_inputs => buts 12.7 7,220 7,220 9300 0 776 bcast_inputs => jacld 12.7 7,220 7,220 9300 0 776 jacld 10.5 935 6,016 37200 37200 162 exchange_1 10.5 5,993 5,993 9300 0 644 jacu 10.5 5,993 5,993 9300 0 644 bcast_inputs => jacu 9.3 517 5,308 604 1812 8789 exchange_3 9.2 515 5,227 602 1806 8683 rhs => exchange_3 8.0 4,565 4,565 18600 0 245 MPI_Recv() 8.0 4,565 4,565 18600 0 245 exchange_1 => MPI_Recv() 7.8 483 4,455 18600 18600 240 blts => exchange_1 7.7 4,415 4,415 606 0 7286 MPI_Wait() 7.7 4,415 4,415 604 0 7310 exchange_3 => MPI_Wait() 2.7 451 1,560 18600 18600 84 buts => exchange_1 1.8 1,013 1,013 1 0 1013015 applu => MPI_Finalize() 1.8 1,013 1,013 1 0 1013015 MPI_Finalize() 1.7 830 947 1 44640 947178 setiv 1.7 830 947 1 44640 947178 applu => setiv 1.5 863 863 19206 0 45 MPI_Send() 0.9 515 515 18600 0 28 exchange_1 => MPI_Send() 0.6 347 347 604 0 576 exchange_3 => MPI_Send() 0.3 143 172 1 7441 172438 error 0.3 143 172 1 7441 172438 applu => error 0.3 148 148 53713 0 3 exact 0.2 59 140 1 2 140726 erhs 0.2 59 140 1 2 140726 applu => erhs 0.2 117 117 44640 0 3 setiv => exact 0.2 1 102 1 4 102110 applu => init_comm 0.2 1 102 1 4 102110 init_comm 0.2 100 100 1 0 100973 MPI_Init() 0.2 100 100 1 0 100973 init_comm => MPI_Init() 0.1 2 81 2 6 40556 erhs => exchange_3 0.1 30 42 1 1633 42056 applu => setbv 0.1 30 42 1 1633 42056 setbv 0.0 27 27 606 0 45 MPI_Irecv() 0.0 27 27 604 0 45 exchange_3 => MPI_Irecv() 0.0 24 24 8 0 3109 MPI_Allreduce() 0.0 19 19 7440 0 3 error => exact 0.0 3 19 3 3 6338 l2norm 0.0 3 19 3 3 6338 bcast_inputs => l2norm 0.0 15 15 3 0 5007 l2norm => MPI_Allreduce() 0.0 11 11 1633 0 7 setbv => exact 0.0 8 8 1 0 8963 error => MPI_Allreduce() 0.0 0.0567 4 1 1 4928 applu => read_input 0.0 0.0567 4 1 1 4928 read_input 0.0 0.239 4 1 9 4871 read_input => bcast_inputs 0.0 4 4 9 0 515 bcast_inputs => MPI_Bcast() 0.0 4 4 9 0 515 MPI_Bcast() 0.0 0.645 1 1 6 1439 applu => pintgr 0.0 0.645 1 1 6 1439 pintgr 0.0 0.563 0.563 1 0 563 bcast_inputs => MPI_Allreduce() 0.0 0.327 0.327 3 0 109 pintgr => MPI_Allreduce() 0.0 0.161 0.266 1 3 266 pintgr => exchange_4 0.0 0.161 0.266 1 3 266 exchange_4 0.0 0.241 0.241 1 0 241 sethyper 0.0 0.241 0.241 1 0 241 applu => sethyper 0.0 0.0911 0.128 1 2 128 pintgr => exchange_5 0.0 0.0911 0.128 1 2 128 exchange_5 0.0 0.0994 0.0994 1 0 99 MPI_Barrier() 0.0 0.0994 0.0994 1 0 99 bcast_inputs => MPI_Barrier() 0.0 0.0494 0.0735 1 1 73 pintgr => exchange_6 0.0 0.0494 0.0735 1 1 73 exchange_6 0.0 0.0495 0.0495 1 0 50 exchange_4 => MPI_Send() 0.0 0.0417 0.0417 2 0 21 MPI_Wtime() 0.0 0.0417 0.0417 2 0 21 bcast_inputs => MPI_Wtime() 0.0 0.0395 0.0395 1 0 40 exchange_4 => MPI_Irecv() 0.0 0.0259 0.0259 1 0 26 exchange_5 => MPI_Wait() 0.0 0.0254 0.0254 1 0 25 init_comm => MPI_Comm_size() 0.0 0.0254 0.0254 1 0 25 MPI_Comm_size() 0.0 0.0241 0.0241 1 0 24 exchange_6 => MPI_Send() 0.0 0.0164 0.0164 1 0 16 setcoeff 0.0 0.0164 0.0164 1 0 16 applu => setcoeff 0.0 0.0158 0.0158 1 0 16 exchange_4 => MPI_Wait() 0.0 0.0108 0.0108 1 0 11 exchange_5 => MPI_Irecv() 0.0 0.01 0.01 1 0 10 subdomain 0.0 0.01 0.01 1 0 10 applu => subdomain 0.0 0.00384 0.00384 1 0 4 applu => proc_grid 0.0 0.00384 0.00384 1 0 4 proc_grid 0.0 0.00368 0.00368 1 0 4 MPI_Comm_rank() 0.0 0.00368 0.00368 1 0 4 init_comm => MPI_Comm_rank() 0.0 0.00256 0.00256 1 0 3 init_comm => nodedim 0.0 0.00256 0.00256 1 0 3 nodedim 0.0 0.00206 0.00206 1 0 2 neighbors 0.0 0.00206 0.00206 1 0 2 applu => neighbors --------------------------------------------------------------------------------------- USER EVENTS Profile :NODE 2, CONTEXT 0, THREAD 0 --------------------------------------------------------------------------------------- NumSamples MaxValue MinValue MeanValue Std. Dev. Event Name --------------------------------------------------------------------------------------- 1.921E+04 4.488E+04 264 1970 7498 Message size sent to all nodes 9604 4.488E+04 264 2031 7721 Message size sent to node 0 0 0 0 0 0 Message size sent to node 1 0 0 0 0 0 Message size sent to node 2 9602 4.224E+04 600 1910 7268 Message size sent to node 3 --------------------------------------------------------------------------------------- NODE 3;CONTEXT 0;THREAD 0: --------------------------------------------------------------------------------------- %Time Exclusive Inclusive #Call #Subrs Inclusive Name msec total msec usec/call --------------------------------------------------------------------------------------- 100.0 2 57,031 1 14 57031078 applu 95.8 1,664 54,614 2 37517 27307162 bcast_inputs 95.8 1,664 54,607 1 37508 54607864 applu => bcast_inputs 31.6 12,045 18,034 301 602 59915 rhs 31.6 12,045 18,034 301 602 59915 bcast_inputs => rhs 24.8 7,102 14,119 9300 18600 1518 blts 24.8 7,102 14,119 9300 18600 1518 bcast_inputs => blts 14.8 7,521 8,466 9300 18600 910 bcast_inputs => buts 14.8 7,521 8,466 9300 18600 910 buts 14.0 909 7,961 37200 37200 214 exchange_1 12.3 492 7,016 18600 18600 377 blts => exchange_1 11.8 6,701 6,701 9300 0 721 jacld 11.8 6,701 6,701 9300 0 721 bcast_inputs => jacld 11.4 6,523 6,523 18600 0 351 MPI_Recv() 11.4 6,523 6,523 18600 0 351 exchange_1 => MPI_Recv() 10.7 480 6,128 604 1812 10146 exchange_3 10.5 477 5,989 602 1806 9949 rhs => exchange_3 9.8 5,589 5,589 9300 0 601 jacu 9.8 5,589 5,589 9300 0 601 bcast_inputs => jacu 9.4 5,386 5,386 604 0 8917 exchange_3 => MPI_Wait() 9.4 5,386 5,386 604 0 8917 MPI_Wait() 1.8 1,013 1,013 1 0 1013048 applu => MPI_Finalize() 1.8 1,013 1,013 1 0 1013048 MPI_Finalize() 1.7 416 945 18600 18600 51 buts => exchange_1 1.6 779 889 1 41850 889031 applu => setiv 1.6 779 889 1 41850 889031 setiv 1.3 740 740 19208 0 39 MPI_Send() 0.9 528 528 18600 0 28 exchange_1 => MPI_Send() 0.4 211 211 604 0 351 exchange_3 => MPI_Send() 0.3 57 196 1 2 196359 applu => erhs 0.3 57 196 1 2 196359 erhs 0.3 134 172 1 6976 172435 applu => error 0.3 134 172 1 6976 172435 error 0.2 139 139 50393 0 3 exact 0.2 2 138 2 6 69425 erhs => exchange_3 0.2 109 109 41850 0 3 setiv => exact 0.2 2 101 1 4 101472 init_comm 0.2 2 101 1 4 101472 applu => init_comm 0.2 99 99 1 0 99261 init_comm => MPI_Init() 0.2 99 99 1 0 99261 MPI_Init() 0.1 49 49 604 0 83 MPI_Irecv() 0.1 49 49 604 0 83 exchange_3 => MPI_Irecv() 0.1 48 48 8 0 6046 MPI_Allreduce() 0.1 29 40 1 1568 40616 applu => setbv 0.1 29 40 1 1568 40616 setbv 0.1 3 31 3 3 10411 bcast_inputs => l2norm 0.1 3 31 3 3 10411 l2norm 0.0 27 27 3 0 9176 l2norm => MPI_Allreduce() 0.0 19 19 1 0 19793 error => MPI_Allreduce() 0.0 18 18 6975 0 3 error => exact 0.0 11 11 1568 0 7 setbv => exact 0.0 0.0584 6 1 1 6518 read_input 0.0 0.0584 6 1 1 6518 applu => read_input 0.0 0.228 6 1 9 6460 read_input => bcast_inputs 0.0 6 6 9 0 692 bcast_inputs => MPI_Bcast() 0.0 6 6 9 0 692 MPI_Bcast() 0.0 0.607 1 1 6 1455 applu => pintgr 0.0 0.607 1 1 6 1455 pintgr 0.0 0.551 0.551 3 0 184 pintgr => MPI_Allreduce() 0.0 0.498 0.498 1 0 498 bcast_inputs => MPI_Allreduce() 0.0 0.233 0.233 1 0 233 applu => sethyper 0.0 0.233 0.233 1 0 233 sethyper 0.0 0.0806 0.163 1 2 163 exchange_4 0.0 0.0806 0.163 1 2 163 pintgr => exchange_4 0.0 0.0822 0.0822 2 0 41 exchange_4 => MPI_Send() 0.0 0.0777 0.0777 1 0 78 MPI_Barrier() 0.0 0.0777 0.0777 1 0 78 bcast_inputs => MPI_Barrier() 0.0 0.0467 0.07 1 1 70 exchange_6 0.0 0.0467 0.07 1 1 70 pintgr => exchange_6 0.0 0.0441 0.0649 1 1 65 exchange_5 0.0 0.0441 0.0649 1 1 65 pintgr => exchange_5 0.0 0.0492 0.0492 2 0 25 bcast_inputs => MPI_Wtime() 0.0 0.0492 0.0492 2 0 25 MPI_Wtime() 0.0 0.0315 0.0315 1 0 32 MPI_Comm_size() 0.0 0.0315 0.0315 1 0 32 init_comm => MPI_Comm_size() 0.0 0.0233 0.0233 1 0 23 exchange_6 => MPI_Send() 0.0 0.0208 0.0208 1 0 21 exchange_5 => MPI_Send() 0.0 0.0134 0.0134 1 0 13 setcoeff 0.0 0.0134 0.0134 1 0 13 applu => setcoeff 0.0 0.00953 0.00953 1 0 10 subdomain 0.0 0.00953 0.00953 1 0 10 applu => subdomain 0.0 0.00582 0.00582 1 0 6 MPI_Comm_rank() 0.0 0.00582 0.00582 1 0 6 init_comm => MPI_Comm_rank() 0.0 0.00326 0.00326 1 0 3 applu => proc_grid 0.0 0.00326 0.00326 1 0 3 proc_grid 0.0 0.00199 0.00199 1 0 2 applu => neighbors 0.0 0.00199 0.00199 1 0 2 neighbors 0.0 0.00187 0.00187 1 0 2 init_comm => nodedim 0.0 0.00187 0.00187 1 0 2 nodedim --------------------------------------------------------------------------------------- USER EVENTS Profile :NODE 3, CONTEXT 0, THREAD 0 --------------------------------------------------------------------------------------- NumSamples MaxValue MinValue MeanValue Std. Dev. Event Name --------------------------------------------------------------------------------------- 1.921E+04 4.224E+04 256 1909 7267 Message size sent to all nodes 0 0 0 0 0 Message size sent to node 0 9604 4.224E+04 256 1909 7267 Message size sent to node 1 9604 4.224E+04 264 1909 7267 Message size sent to node 2 0 0 0 0 0 Message size sent to node 3 --------------------------------------------------------------------------------------- FUNCTION SUMMARY (total): --------------------------------------------------------------------------------------- %Time Exclusive Inclusive #Call #Subrs Inclusive Name msec total msec usec/call --------------------------------------------------------------------------------------- 100.0 9 3:48.121 4 57 57030475 applu 95.8 7,529 3:38.437 9 150068 24270831 bcast_inputs 95.7 7,528 3:38.419 5 150032 43683863 applu => bcast_inputs 30.7 52,149 1:09.983 1204 2408 58126 rhs 30.7 52,149 1:09.983 1204 2408 58126 bcast_inputs => rhs 21.0 30,677 47,993 37200 74400 1290 blts 21.0 30,677 47,993 37200 74400 1290 bcast_inputs => blts 17.3 32,783 39,353 37200 74400 1058 buts 17.3 32,783 39,353 37200 74400 1058 bcast_inputs => buts 12.8 29,281 29,281 37200 0 787 bcast_inputs => jacld 12.8 29,281 29,281 37200 0 787 jacld 10.6 24,199 24,199 37200 0 651 jacu 10.6 24,199 24,199 37200 0 651 bcast_inputs => jacu 10.5 3,987 23,886 148800 148800 161 exchange_1 8.0 2,038 18,142 2416 7248 7509 exchange_3 7.8 2,029 17,834 2408 7224 7406 rhs => exchange_3 7.7 17,540 17,540 74400 0 236 MPI_Recv() 7.7 17,540 17,540 74400 0 236 exchange_1 => MPI_Recv() 7.6 2,048 17,316 74400 74400 233 blts => exchange_1 6.5 14,800 14,800 2424 0 6106 MPI_Wait() 6.5 14,800 14,800 2416 0 6126 exchange_3 => MPI_Wait() 2.9 1,938 6,570 74400 74400 88 buts => exchange_1 1.8 4,048 4,048 4 0 1012219 applu => MPI_Finalize() 1.8 4,048 4,048 4 0 1012219 MPI_Finalize() 1.7 3,332 3,801 4 178746 950499 setiv 1.7 3,332 3,801 4 178746 950499 applu => setiv 1.5 3,528 3,528 76824 0 46 MPI_Send() 1.0 2,358 2,358 74400 0 32 exchange_1 => MPI_Send() 0.5 1,169 1,169 2416 0 484 exchange_3 => MPI_Send() 0.3 574 689 4 29795 172433 error 0.3 574 689 4 29795 172433 applu => error 0.3 594 594 215071 0 3 exact 0.2 240 547 4 8 136875 erhs 0.2 240 547 4 8 136875 applu => erhs 0.2 469 469 178746 0 3 setiv => exact 0.2 5 406 4 16 101750 applu => init_comm 0.2 5 406 4 16 101750 init_comm 0.2 401 401 4 0 100339 MPI_Init() 0.2 401 401 4 0 100339 init_comm => MPI_Init() 0.1 9 307 8 24 38436 erhs => exchange_3 0.1 122 169 4 6534 42318 applu => setbv 0.1 122 169 4 6534 42318 setbv 0.1 133 133 2424 0 55 MPI_Irecv() 0.1 133 133 2416 0 55 exchange_3 => MPI_Irecv() 0.0 100 100 32 0 3155 MPI_Allreduce() 0.0 78 78 29791 0 3 error => exact 0.0 15 75 12 12 6307 l2norm 0.0 15 75 12 12 6307 bcast_inputs => l2norm 0.0 60 60 12 0 5020 l2norm => MPI_Allreduce() 0.0 46 46 6534 0 7 setbv => exact 0.0 37 37 4 0 9312 error => MPI_Allreduce() 0.0 3 21 4 5 5351 applu => read_input 0.0 3 21 4 5 5351 read_input 0.0 0.928 18 4 36 4542 read_input => bcast_inputs 0.0 17 17 36 0 479 bcast_inputs => MPI_Bcast() 0.0 17 17 36 0 479 MPI_Bcast() 0.0 2 5 4 24 1447 applu => pintgr 0.0 2 5 4 24 1447 pintgr 0.0 2 2 4 0 541 bcast_inputs => MPI_Allreduce() 0.0 1 1 12 0 110 pintgr => MPI_Allreduce() 0.0 0.562 1 4 12 272 exchange_4 0.0 0.562 1 4 12 272 pintgr => exchange_4 0.0 1 1 4 0 259 sethyper 0.0 1 1 4 0 259 applu => sethyper 0.0 0.281 0.409 4 6 102 pintgr => exchange_5 0.0 0.281 0.409 4 6 102 exchange_5 0.0 0.282 0.404 4 6 101 pintgr => exchange_6 0.0 0.282 0.404 4 6 101 exchange_6 0.0 0.341 0.341 4 0 85 MPI_Barrier() 0.0 0.341 0.341 4 0 85 bcast_inputs => MPI_Barrier() 0.0 0.223 0.223 4 0 56 exchange_4 => MPI_Wait() 0.0 0.192 0.192 4 0 48 exchange_4 => MPI_Send() 0.0 0.18 0.18 8 0 22 MPI_Wtime() 0.0 0.18 0.18 8 0 22 bcast_inputs => MPI_Wtime() 0.0 0.117 0.117 5 0 23 MPI_Comm_size() 0.0 0.112 0.112 4 0 28 exchange_4 => MPI_Irecv() 0.0 0.108 0.108 4 0 27 init_comm => MPI_Comm_size() 0.0 0.0625 0.0625 2 0 31 exchange_5 => MPI_Wait() 0.0 0.0541 0.0541 2 0 27 exchange_6 => MPI_Wait() 0.0 0.0514 0.0514 4 0 13 setcoeff 0.0 0.0514 0.0514 4 0 13 applu => setcoeff 0.0 0.0474 0.0474 2 0 24 exchange_6 => MPI_Send() 0.0 0.0439 0.0439 2 0 22 exchange_5 => MPI_Send() 0.0 0.0392 0.0392 4 0 10 subdomain 0.0 0.0392 0.0392 4 0 10 applu => subdomain 0.0 0.022 0.022 2 0 11 exchange_5 => MPI_Irecv() 0.0 0.0202 0.0202 2 0 10 exchange_6 => MPI_Irecv() 0.0 0.0181 0.0181 4 0 5 init_comm => MPI_Comm_rank() 0.0 0.0181 0.0181 4 0 5 MPI_Comm_rank() 0.0 0.0124 0.0124 4 0 3 applu => proc_grid 0.0 0.0124 0.0124 4 0 3 proc_grid 0.0 0.0107 0.0107 4 0 3 init_comm => nodedim 0.0 0.0107 0.0107 4 0 3 nodedim 0.0 0.0088 0.0088 1 0 9 read_input => MPI_Comm_size() 0.0 0.00859 0.00859 4 0 2 neighbors 0.0 0.00859 0.00859 4 0 2 applu => neighbors FUNCTION SUMMARY (mean): --------------------------------------------------------------------------------------- %Time Exclusive Inclusive #Call #Subrs Inclusive Name msec total msec usec/call --------------------------------------------------------------------------------------- 100.0 2 57,030 1 14.25 57030475 applu 95.8 1,882 54,609 2.25 37517 24270831 bcast_inputs 95.7 1,882 54,604 1.25 37508 43683863 applu => bcast_inputs 30.7 13,037 17,495 301 602 58126 bcast_inputs => rhs 30.7 13,037 17,495 301 602 58126 rhs 21.0 7,669 11,998 9300 18600 1290 bcast_inputs => blts 21.0 7,669 11,998 9300 18600 1290 blts 17.3 8,195 9,838 9300 18600 1058 buts 17.3 8,195 9,838 9300 18600 1058 bcast_inputs => buts 12.8 7,320 7,320 9300 0 787 jacld 12.8 7,320 7,320 9300 0 787 bcast_inputs => jacld 10.6 6,049 6,049 9300 0 651 bcast_inputs => jacu 10.6 6,049 6,049 9300 0 651 jacu 10.5 996 5,971 37200 37200 161 exchange_1 8.0 509 4,535 604 1812 7509 exchange_3 7.8 507 4,458 602 1806 7406 rhs => exchange_3 7.7 4,385 4,385 18600 0 236 MPI_Recv() 7.7 4,385 4,385 18600 0 236 exchange_1 => MPI_Recv() 7.6 512 4,329 18600 18600 233 blts => exchange_1 6.5 3,700 3,700 606 0 6106 MPI_Wait() 6.5 3,700 3,700 604 0 6126 exchange_3 => MPI_Wait() 2.9 484 1,642 18600 18600 88 buts => exchange_1 1.8 1,012 1,012 1 0 1012219 MPI_Finalize() 1.8 1,012 1,012 1 0 1012219 applu => MPI_Finalize() 1.7 833 950 1 44686.5 950499 applu => setiv 1.7 833 950 1 44686.5 950499 setiv 1.5 882 882 19206 0 46 MPI_Send() 1.0 589 589 18600 0 32 exchange_1 => MPI_Send() 0.5 292 292 604 0 484 exchange_3 => MPI_Send() 0.3 143 172 1 7448.75 172433 applu => error 0.3 143 172 1 7448.75 172433 error 0.3 148 148 53767.8 0 3 exact 0.2 60 136 1 2 136875 applu => erhs 0.2 60 136 1 2 136875 erhs 0.2 117 117 44686.5 0 3 setiv => exact 0.2 1 101 1 4 101750 init_comm 0.2 1 101 1 4 101750 applu => init_comm 0.2 100 100 1 0 100339 init_comm => MPI_Init() 0.2 100 100 1 0 100339 MPI_Init() 0.1 2 76 2 6 38436 erhs => exchange_3 0.1 30 42 1 1633.5 42318 setbv 0.1 30 42 1 1633.5 42318 applu => setbv 0.1 33 33 606 0 55 MPI_Irecv() 0.1 33 33 604 0 55 exchange_3 => MPI_Irecv() 0.0 25 25 8 0 3155 MPI_Allreduce() 0.0 19 19 7447.75 0 3 error => exact 0.0 3 18 3 3 6307 bcast_inputs => l2norm 0.0 3 18 3 3 6307 l2norm 0.0 15 15 3 0 5020 l2norm => MPI_Allreduce() 0.0 11 11 1633.5 0 7 setbv => exact 0.0 9 9 1 0 9312 error => MPI_Allreduce() 0.0 0.807 5 1 1.25 5351 read_input 0.0 0.807 5 1 1.25 5351 applu => read_input 0.0 0.232 4 1 9 4542 read_input => bcast_inputs 0.0 4 4 9 0 479 MPI_Bcast() 0.0 4 4 9 0 479 bcast_inputs => MPI_Bcast() 0.0 0.642 1 1 6 1447 applu => pintgr 0.0 0.642 1 1 6 1447 pintgr 0.0 0.541 0.541 1 0 541 bcast_inputs => MPI_Allreduce() 0.0 0.329 0.329 3 0 110 pintgr => MPI_Allreduce() 0.0 0.14 0.272 1 3 272 pintgr => exchange_4 0.0 0.14 0.272 1 3 272 exchange_4 0.0 0.259 0.259 1 0 259 applu => sethyper 0.0 0.259 0.259 1 0 259 sethyper 0.0 0.0702 0.102 1 1.5 102 pintgr => exchange_5 0.0 0.0702 0.102 1 1.5 102 exchange_5 0.0 0.0705 0.101 1 1.5 101 exchange_6 0.0 0.0705 0.101 1 1.5 101 pintgr => exchange_6 0.0 0.0852 0.0852 1 0 85 MPI_Barrier() 0.0 0.0852 0.0852 1 0 85 bcast_inputs => MPI_Barrier() 0.0 0.0558 0.0558 1 0 56 exchange_4 => MPI_Wait() 0.0 0.0481 0.0481 1 0 48 exchange_4 => MPI_Send() 0.0 0.0449 0.0449 2 0 22 bcast_inputs => MPI_Wtime() 0.0 0.0449 0.0449 2 0 22 MPI_Wtime() 0.0 0.0293 0.0293 1.25 0 23 MPI_Comm_size() 0.0 0.028 0.028 1 0 28 exchange_4 => MPI_Irecv() 0.0 0.0271 0.0271 1 0 27 init_comm => MPI_Comm_size() 0.0 0.0156 0.0156 0.5 0 31 exchange_5 => MPI_Wait() 0.0 0.0135 0.0135 0.5 0 27 exchange_6 => MPI_Wait() 0.0 0.0128 0.0128 1 0 13 applu => setcoeff 0.0 0.0128 0.0128 1 0 13 setcoeff 0.0 0.0118 0.0118 0.5 0 24 exchange_6 => MPI_Send() 0.0 0.011 0.011 0.5 0 22 exchange_5 => MPI_Send() 0.0 0.00981 0.00981 1 0 10 applu => subdomain 0.0 0.00981 0.00981 1 0 10 subdomain 0.0 0.00551 0.00551 0.5 0 11 exchange_5 => MPI_Irecv() 0.0 0.00504 0.00504 0.5 0 10 exchange_6 => MPI_Irecv() 0.0 0.00452 0.00452 1 0 5 MPI_Comm_rank() 0.0 0.00452 0.00452 1 0 5 init_comm => MPI_Comm_rank() 0.0 0.00311 0.00311 1 0 3 proc_grid 0.0 0.00311 0.00311 1 0 3 applu => proc_grid 0.0 0.00268 0.00268 1 0 3 nodedim 0.0 0.00268 0.00268 1 0 3 init_comm => nodedim 0.0 0.0022 0.0022 0.25 0 9 read_input => MPI_Comm_size() 0.0 0.00215 0.00215 1 0 2 neighbors 0.0 0.00215 0.00215 1 0 2 applu => neighbors