#include <stack>
#include "AXI_master_if.h"
#include "Memory_map.h"
//#define COMPILE_CPU_COPY
//#define COMPILE_DMAC_POLLING
#define COMPILE_DMAC_INTERRUPT
//#define BROT_SOFTWARE
//#define BROT_HARDWARE_POLLING
//#define BROT_HARDWARE_INTERRUPT
//#define CLCDC_BASE SLAVE_BASE_ADDR_02
#define DATA_SIZE_1BYTE 0b000
#define DATA_SIZE_2BYTE 0b001
#define DATA_SIZE_4BYTE 0b010
#define BURST_FIXED 0b00
#define BURST_INCRESE 0b01
#define BURST_LEN_01 0x0
#define BURST_LEN_02 0x1
#define BURST_LEN_03 0x2
#define BURST_LEN_04 0x3
#define BURST_LEN_05 0x4
#define BURST_LEN_06 0x5
#define BURST_LEN_07 0x6
#define BURST_LEN_08 0x7
#define BURST_LEN_09 0x8
#define BURST_LEN_10 0x9
#define BURST_LEN_11 0xA
#define BURST_LEN_12 0xB
#define BURST_LEN_13 0xC
#define BURST_LEN_14 0xD
#define BURST_LEN_15 0xE
#define BURST_LEN_16 0xF
/*! Fixed point arithmetic shift. */
#define SHIFT 13
/*! Fixed point co-ord shift. */
#define COORD_SHIFT 20
#define CONST_VALUE (256 / 2)
#define CACHE_HIT_RATIO 0.95
#define CACHE_MISS_PENALTY 100
#define SOFTWARE_JUMP_DELAY 5
#define SOFTWARE_MUL_DELAY 4
#define SOFTWARE_DIV_DELAY 10
#define SOFTWARE_ELSE_DELAY 1
#define FRACTAL_LOOP 1
SC_MODULE(fractal_processor) {
private:
// Master
sc_uint<2> Master_ID;
sc_uint<32>* data_buffer;
unsigned int hit;
unsigned int miss;
bool state_interrupt;
bool wait_dmac_interrupt;
bool wait_brot_interrupt;
public:
AXI_master_if* m_interface;
sc_in <bool> brot_interrupt;
sc_in <bool> dmac_interrupt;
typedef struct single_func_info {
sc_time start;
sc_time end;
sc_time duration;
sc_time net_duration;
bool sub_func = false;
unsigned int num;
sc_time tmp_time;
}SINGLE_FUC_INFO;
typedef struct total_func_info {
std::string function_name;
unsigned int number_of_calls = 0;
sc_time total_duration;
//sc_time self_duration;
sc_time avg_duration;
sc_time start;
sc_time end;
//SINGLE_FUC_INFO** single_info;
std::vector <SINGLE_FUC_INFO> single_info;
}FUC_INFO;
//std::stack<std::string, FUC_INFO*> sw_profile;
std::stack<std::pair<std::string, size_t>> func_stack;
std::map<std::string, FUC_INFO*> sw_profile;
void sw_profiling(const char* func, int state) {
//////////////////////////////////
// About state
// 0: start
// 1: sub-function call
// 2: sub-function return
// 3: end
// 4: simulation end
//////////////////////////////////
std::string current_funct = func;
std::map<std::string, FUC_INFO*>::iterator it = sw_profile.find(current_funct);
std::vector <SINGLE_FUC_INFO>::iterator it2;
if (state == 0) {
if (sw_profile.empty()) {
FUC_INFO* new_info = new FUC_INFO;
new_info->function_name = current_funct;
new_info->start = sc_time_stamp();
sw_profile[current_funct] = new_info;
it = sw_profile.begin();
}
else if (it == sw_profile.end()) {
FUC_INFO* new_info = new FUC_INFO;
new_info->function_name = current_funct;
new_info->start = sc_time_stamp();
sw_profile[current_funct] = new_info;
it = sw_profile.find(current_funct);
}
SINGLE_FUC_INFO new_single_info;
new_single_info.start = sc_time_stamp();
new_single_info.net_duration *= 0;
new_single_info.tmp_time = sc_time_stamp();
func_stack.push(std::make_pair(current_funct, it->second->single_info.size()));
it->second->single_info.push_back(new_single_info);
it->second->number_of_calls++;
}
if (state == 1) {
if (func_stack.top().first == current_funct) {
it->second->single_info[func_stack.top().second].net_duration +=
sc_time_stamp() - it->second->single_info[func_stack.top().second].tmp_time;
}
else {
cout << "stack is not matched" << endl;
}
}
if (state == 2) {
if (func_stack.top().first == current_funct) {
it->second->single_info[func_stack.top().second].tmp_time = sc_time_stamp();
}
else {
cout << "stack is not matched" << endl;
cout << func_stack.top().first << endl;
cout << current_funct << endl;
}
}
if (state == 3) {
it = sw_profile.find(current_funct);
if (func_stack.top().first == current_funct) {
it->second->single_info[func_stack.top().second].end = sc_time_stamp();
it->second->end = sc_time_stamp();
it->second->single_info[func_stack.top().second].net_duration +=
sc_time_stamp() - it->second->single_info[func_stack.top().second].tmp_time;
func_stack.pop();
}
else {
cout << "stack is not matched" << endl;
cout << func_stack.top().first << endl;
cout << current_funct << endl;
}
}
if (state == 4) {
ofstream outFile("software profiling.csv");
if (!outFile.is_open()) {
cout << sc_time_stamp() << "File is not opend in software profiling" << endl;
sc_stop();
}
outFile << "Function name, " << "No. Calls, " << "start, " << "end, " << "total_duration, " << "avg_duration, " << "Self_duration, "
<< "Ave_self_duation" << endl;
for (it = sw_profile.begin(); it != sw_profile.end(); it++) {
sc_time total_duration = it->second->end - it->second->start;
outFile << it->first << ", " << it->second->number_of_calls << ", " << it->second->start << ", " << it->second->end << ", "
<< total_duration << ", " << total_duration / it->second->number_of_calls;
sc_time total_self_duation;
for (it2 = it->second->single_info.begin(); it2 != it->second->single_info.end(); it2++) {
//cout << it2->net_duration << endl;
total_self_duation += it2->net_duration;
//cout << it2->start << it2->end << endl;
}
outFile << ", " << total_self_duation << ", " << total_self_duation / it->second->number_of_calls << endl;
}
outFile.close();
}
}
int mem_write(sc_uint<2> ID, sc_uint<32> addr, sc_uint<32> * data, sc_uint<3> size, sc_uint<2> burst, sc_uint<4> len) {
if ((double)hit / ((double)hit + (double)miss) > CACHE_HIT_RATIO) {
// miss
for (int i = 0; i < CACHE_MISS_PENALTY; i++) {
wait(m_interface->clk.posedge_event());
}
m_interface->master_write(ID, addr, data, size, burst, len);
}
else {
wait(m_interface->clk.posedge_event());
}
}
void processor_execute() {
wait();
while (1) {
//int argc;
//char** argv;
//software_main(argc, argv);
software_main();
cout << "hit: " << hit << endl;
cout << "miss: " << miss << endl;
sc_stop();
wait();
}
}
//int master_write(sc_uint<2> ID, sc_uint<32> addr, sc_uint<32> * data, sc_uint<3> size, sc_uint<2> burst, sc_uint<4> len) {
void initLCDC() {
sw_profiling(__func__, 0);
//m_interface->master_write(Master_ID, (CLCDC_BASE + 0), &sc_uint<32>(SCRN_WIDTH), DATA_SIZE_4BYTE, BURST_INCRESE, BURST_LEN_01);
//m_interface->master_write(Master_ID, (CLCDC_BASE + 1), &sc_uint<32>(SCRN_HEIGHT), DATA_SIZE_4BYTE, BURST_INCRESE, BURST_LEN_01);
//m_interface->master_write(Master_ID, (CLCDC_BASE + 2), &sc_uint<32>(1), DATA_SIZE_4BYTE, BURST_INCRESE, BURST_LEN_01);
//m_interface->master_write(Master_ID, (CLCDC_BASE + 3), &sc_uint<32>(1), DATA_SIZE_4BYTE, BURST_INCRESE, BURST_LEN_01);
//m_interface->master_write(Master_ID, (CLCDC_BASE + 4), &sc_uint<32>(1), DATA_SIZE_4BYTE, BURST_INCRESE, BURST_LEN_01);
//m_interface->master_write(Master_ID, (CLCDC_BASE + 5), &sc_uint<32>(1), DATA_SIZE_4BYTE, BURST_INCRESE, BURST_LEN_01);
sw_profiling(__func__, 3);
}
void zoom_loop(void) {
sw_profiling(__func__, 0);
long cx, cy, dx, dy;
int count;
// Centre the view on (-1, 0)
cx = (-1) << COORD_SHIFT;
cy = (0) << COORD_SHIFT;
// Calculate dx/dy values for initial view 4 units wide and high,
// given that screen is 256 dx units across and 512 dy units high
dx = 4 << (COORD_SHIFT - 8); // = (4/256) * pow(2,COORD_SHIFT)
dy = 4 << (COORD_SHIFT - 9); // = (4/512) * pow(2,COORD_SHIFT)
/*
push {r7, lr}
sub sp, #24
add r7, sp, #0
ldr r3, [pc, #108] ; (33c <_Z9zoom_loopv+0x74>)
str r3, [r7, #20]
movs r3, #0
str r3, [r7, #4]
mov.w r3, #16384 ; 0x4000
str r3, [r7, #16]
mov.w r3, #8192 ; 0x2000
str r3, [r7, #12]
movs r3, #0
str r3, [r7, #8]
ldr r3, [r7, #8]
total 14
load 2
store 6
else instruction 6
*/
for (count = 0; count < FRACTAL_LOOP; count++) {
/*
cmp r3, #99 ; 0x63
bgt.n 332 <_Z9zoom_loopv+0x6a> // count < 100 compare here
2ec: 68fb ldr r3, [r7, #12]
2ee: 693a ldr r2, [r7, #16]
2f0: 6879 ldr r1, [r7, #4]
2f2: 6978 ldr r0, [r7, #20]
2f4: f7ff fffe bl 8c <_Z6renderiiii>
total 16
load 2
store 5
jump 1
else instruction 5
*/
software_delay(5, 2, 1, 0, 0, 5);
sw_profiling(__func__, 1);
render(cx, cy, dx, dy);
sw_profiling(__func__, 2);
/*
2f8: f7ff fffe bl 238 <_Z11copy_resultv>
*/
software_delay(0, 0, 1, 0, 0, 0);
sw_profiling(__func__, 1);
copy_result();
sw_profiling(__func__, 2);
// Zoom in and recentre slightly.
cx -= 2 * dx;
dx = dx * 16 / 17;
dy = dy * 16 / 17;
/*
2fc: 693b ldr r3, [r7, #16]
2fe: 005b lsls r3, r3, #1
300: 697a ldr r2, [r7, #20]
302: 1ad3 subs r3, r2, r3
304: 617b str r3, [r7, #20]
306: 693b ldr r3, [r7, #16]
308: 011b lsls r3, r3, #4
30a: 4a0d ldr r2, [pc, #52] ; (340 <_Z9zoom_loopv+0x78>)
30c: fb82 1203 smull r1, r2, r2, r3 // 2 cycles
310: 10d2 asrs r2, r2, #3
312: 17db asrs r3, r3, #31
314: 1ad3 subs r3, r2, r3
316: 613b str r3, [r7, #16]
318: 68fb ldr r3, [r7, #12]
31a: 011b lsls r3, r3, #4
31c: 4a08 ldr r2, [pc, #32] ; (340 <_Z9zoom_loopv+0x78>)
31e: fb82 1203 smull r1, r2, r2, r3 // 2 cycles
322: 10d2 asrs r2, r2, #3
324: 17db asrs r3, r3, #31
326: 1ad3 subs r3, r2, r3
328: 60fb str r3, [r7, #12]
32a: 68bb ldr r3, [r7, #8]
32c: 3301 adds r3, #1
32e: 60bb str r3, [r7, #8]
330: e7d9 b.n 2e6 <_Z9zoom_loopv+0x1e>
total 25
load 7
store 4
jump 1
else instruction (mul 2) + 11
*/
software_delay(7, 4, 1, 2, 0, 11);
}
/*
332: bf00 nop
334: 3718 adds r7, #24
336: 46bd mov sp, r7
338: bd80 pop {r7, pc}
33a: bf00 nop
33c: fff00000 .word 0xfff00000
340: 78787879 .word 0x78787879
total 5
load 1
store 0
jump 0
else instruction 4
*/
software_delay(0, 1, 0, 0, 0, 4);
sw_profiling(__func__, 3);
}
void render(int cx, int cy, int dx, int dy) {
sw_profiling(__func__, 0);
#ifdef BROT_HARDWARE_POLLING
int sx = cx - dx * CONST_VALUE;
int sy = 0 * dy;
int py = sy;
int x, y;
int nYStart = SCRN_HEIGHT / 2;
/*
8c: b580 push {r7, lr}
8e: b08e sub sp, #56 ; 0x38
90: af00 add r7, sp, #0
92: 60f8 str r0, [r7, #12]
94: 60b9 str r1, [r7, #8]
96: 607a str r2, [r7, #4]
98: 603b str r3, [r7, #0]
9a: 687b ldr r3, [r7, #4]
9c: 01db lsls r3, r3, #7
9e: 68fa ldr r2, [r7, #12]
a0: 1ad3 subs r3, r2, r3
a2: 627b str r3, [r7, #36] ; 0x24
a4: 2300 movs r3, #0
a6: 623b str r3, [r7, #32]
a8: 6a3b ldr r3, [r7, #32]
aa: 637b str r3, [r7, #52] ; 0x34
ac: 2332 movs r3, #50 ; 0x32
ae: 61fb str r3, [r7, #28]
b0: 69fb ldr r3, [r7, #28]
b2: 3b01 subs r3, #1
b4: 62fb str r3, [r7, #44] ; 0x2c
total 24
store 10
load 4
jump 0
else instruction 7
*/
software_delay(10, 4, 0, 0, 0, 7);
for (y = nYStart - 1; y >= 0; y--) {
int px = sx;
/*
b6: 6afb ldr r3, [r7, #44] ; 0x2c
b8: 2b00 cmp r3, #0 y>=0
ba: f2c0 80ae blt.w 21a <_Z6renderiiii+0x18e> //loop finish
be: 6a7b ldr r3, [r7, #36] ; 0x24
c0: 62bb str r3, [r7, #40] ; 0x28
c2: 2300 movs r3, #0
c4: 633b str r3, [r7, #48] ; 0x30
c6: 6b3b ldr r3, [r7, #48] ; 0x30
total 8
load 3
store 2
jump 1
else instruction 2
*/
software_delay(2, 3, 1, 0, 0, 2);
for (x = 0; x < SCRN_WIDTH; x += 2) {
/*
c8: 2b9f cmp r3, #159 ; 0x9f
ca: f300 8083 bgt.w 1d4 <_Z6renderiiii+0x148>
store 0
load 0
jump 1
else instruction 1
*/
software_delay(0, 0, 1, 0, 0, 1);
int offset;
int py_temp;
int col;
unsigned char pixelR, pixelG, pixelB;
/*
ce: 6afa ldr r2, [r7, #44] ; 0x2c
d0: 4613 mov r3, r2
d2: 009b lsls r3, r3, #2
d4: 4413 add r3, r2
d6: 015b lsls r3, r3, #5
d8: 461a mov r2, r3
da: 6b3b ldr r3, [r7, #48] ; 0x30
dc: 441a add r2, r3
de: 4613 mov r3, r2
e0: 005b lsls r3, r3, #1
e2: 4413 add r3, r2
e4: 61bb str r3, [r7, #24]
e6: 6abb ldr r3, [r7, #40] ; 0x28
e8: 11da asrs r2, r3, #7
ea: 6b7b ldr r3, [r7, #52] ; 0x34
ec: 11db asrs r3, r3, #7
ee: 4619 mov r1, r3
f0: 4610 mov r0, r2
total 19
load 4
store 1
jump 0
else instruction 13
*/
software_delay(4, 4, 0, 0, 0, 13);
while (!m_interface->master_read(Master_ID, (BROT_BASE + BROT_REG_OFFSET_STATE), DATA_SIZE_4BYTE, BURST_INCRESE, BURST_LEN_01));
m_interface->master_write(Master_ID, (BROT_BASE + BROT_REG_OFFSET_INT_ENABLE), &sc_uint<32>(0), DATA_SIZE_4BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (BROT_BASE + BROT_REG_OFFSET_PX), &sc_uint<32>(px), DATA_SIZE_4BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (BROT_BASE + BROT_REG_OFFSET_PY), &sc_uint<32>(py), DATA_SIZE_4BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (BROT_BASE + BROT_REG_OFFSET_DX), &sc_uint<32>(dx), DATA_SIZE_4BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (BROT_BASE + BROT_REG_OFFSET_OPSTART), &sc_uint<32>(1), DATA_SIZE_4BYTE, BURST_INCRESE, BURST_LEN_01);
while (!m_interface->master_read(Master_ID, (BROT_BASE + BROT_REG_OFFSET_STATE), DATA_SIZE_4BYTE, BURST_INCRESE, BURST_LEN_01));
col = m_interface->master_read(Master_ID, (BROT_BASE + BROT_REG_OFFSET_COL00), DATA_SIZE_4BYTE, BURST_INCRESE, BURST_LEN_01)[0];
pixelR = (col) << 1;
pixelG = (col * 9) << 1;
pixelB = (col * 7) << 1;
offset = (y * SCRN_WIDTH + x) * 3;
/*
f6: 6178 str r0, [r7, #20]
f8: 697b ldr r3, [r7, #20]
fa: b2db uxtb r3, r3 ;UXTB extends an 8-bit value to a 32-bit value
fc: 005b lsls r3, r3, #1
fe: 74fb strb r3, [r7, #19] ; pixelR
100: 697b ldr r3, [r7, #20]
102: b2db uxtb r3, r3
104: 461a mov r2, r3
106: 00d2 lsls r2, r2, #3
108: 4413 add r3, r2
10a: b2db uxtb r3, r3
10c: 005b lsls r3, r3, #1
10e: 74bb strb r3, [r7, #18] ; pixelG
110: 697b ldr r3, [r7, #20]
112: b2db uxtb r3, r3
114: 461a mov r2, r3
116: 00d2 lsls r2, r2, #3
118: 1ad3 subs r3, r2, r3
11a: b2db uxtb r3, r3
11c: 005b lsls r3, r3, #1
11e: 747b strb r3, [r7, #17] ; pixelB
120: 69bb ldr r3, [r7, #24]
122: 3303 adds r3, #3
124: 4940 ldr r1, [pc, #256] ; (228 <_Z6renderiiii+0x19c>)
126: 7cfa ldrb r2, [r7, #19]
128: 54ca strb r2, [r1, r3]
12a: 4a3f ldr r2, [pc, #252] ; (228 <_Z6renderiiii+0x19c>)
12c: 5cd1 ldrb r1, [r2, r3]
12e: 4a3e ldr r2, [pc, #248] ; (228 <_Z6renderiiii+0x19c>)
130: 69bb ldr r3, [r7, #24]
132: 4413 add r3, r2
134: 460a mov r2, r1
136: 701a strb r2, [r3, #0]
total 33
load 12
store 4
jump 0
else instruction 17
*/
software_delay(12, 4, 0, 0, 0, 17);
m_interface->master_write(Master_ID, (RESULT_BUFF_BASE + offset + 0), &sc_uint<32>(pixelR), DATA_SIZE_1BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (RESULT_BUFF_BASE + offset + 3), &sc_uint<32>(pixelR), DATA_SIZE_1BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (RESULT_BUFF_BASE + offset + 1), &sc_uint<32>(pixelG), DATA_SIZE_1BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (RESULT_BUFF_BASE + offset + 4), &sc_uint<32>(pixelG), DATA_SIZE_1BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (RESULT_BUFF_BASE + offset + 2), &sc_uint<32>(pixelB), DATA_SIZE_1BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (RESULT_BUFF_BASE + offset + 5), &sc_uint<32>(pixelB), DATA_SIZE_1BYTE, BURST_INCRESE, BURST_LEN_01);
offset = ((SCRN_HEIGHT - 1 - y) * SCRN_WIDTH + x) * 3;
/*
164: 6afb ldr r3, [r7, #44] ; 0x2c
166: f1c3 0263 rsb r2, r3, #99 ; 0x63
16a: 4613 mov r3, r2
16c: 009b lsls r3, r3, #2
16e: 4413 add r3, r2
170: 015b lsls r3, r3, #5
172: 461a mov r2, r3
174: 6b3b ldr r3, [r7, #48] ; 0x30
176: 441a add r2, r3
178: 4613 mov r3, r2
17a: 005b lsls r3, r3, #1
17c: 4413 add r3, r2
17e: 61bb str r3, [r7, #24]
180: 69bb ldr r3, [r7, #24]
182: 3303 adds r3, #3
total 15
load 3
store 1
jump 0
else instruction 11
*/
software_delay(3, 1, 0, 0, 0, 11);
m_interface->master_write(Master_ID, (RESULT_BUFF_BASE + offset + 0), &sc_uint<32>(pixelR), DATA_SIZE_1BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (RESULT_BUFF_BASE + offset + 3), &sc_uint<32>(pixelR), DATA_SIZE_1BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (RESULT_BUFF_BASE + offset + 1), &sc_uint<32>(pixelG), DATA_SIZE_1BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (RESULT_BUFF_BASE + offset + 4), &sc_uint<32>(pixelG), DATA_SIZE_1BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (RESULT_BUFF_BASE + offset + 2), &sc_uint<32>(pixelB), DATA_SIZE_1BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (RESULT_BUFF_BASE + offset + 5), &sc_uint<32>(pixelB), DATA_SIZE_1BYTE, BURST_INCRESE, BURST_LEN_01);
px += dx;
/*
1b8: 69ba ldr r2, [r7, #24]
1ba: 3202 adds r2, #2
1bc: 491a ldr r1, [pc, #104] ; (228 <_Z6renderiiii+0x19c>)
1be: 5cc9 ldrb r1, [r1, r3]
1c0: 4b19 ldr r3, [pc, #100] ; (228 <_Z6renderiiii+0x19c>)
1c2: 5499 strb r1, [r3, r2]
1c4: 6aba ldr r2, [r7, #40] ; 0x28
1c6: 687b ldr r3, [r7, #4]
1c8: 4413 add r3, r2
1ca: 62bb str r3, [r7, #40] ; 0x28
1cc: 6b3b ldr r3, [r7, #48] ; 0x30
1ce: 3302 adds r3, #2
1d0: 633b str r3, [r7, #48] ; 0x30
1d2: e778 b.n c6 <_Z6renderiiii+0x3a>
total 14
load 7
store 3
jump 1
else instruction 3
*/
software_delay(7, 3, 1, 0, 0, 3);
}
py -= dy;
/*
1d4: 6b7a ldr r2, [r7, #52] ; 0x34
1d6: 683b ldr r3, [r7, #0]
1d8: 1ad3 subs r3, r2, r3
1da: 637b str r3, [r7, #52] ; 0x34
1dc: 6af9 ldr r1, [r7, #44] ; 0x2c
1de: 4b13 ldr r3, [pc, #76] ; (22c <_Z6renderiiii+0x1a0>)
1e0: fb83 2301 smull r2, r3, r3, r1 // 2 cycles
1e4: 109a asrs r2, r3, #2
1e6: 17cb asrs r3, r1, #31
1e8: 1ad2 subs r2, r2, r3
1ea: 4613 mov r3, r2
1ec: 009b lsls r3, r3, #2
1ee: 4413 add r3, r2
1f0: 005b lsls r3, r3, #1
1f2: 1aca subs r2, r1, r3
1f4: 2a00 cmp r2, #0
1f6: d10c bne.n 212 <_Z6renderiiii+0x186>
total 17
load 4
store 1
jump 1
else instruction 1 (mul) + 11
*/
software_delay(1, 4, 1, 0, 1, 11);
if ((y % 10) == 0) {
/*
1f8: 69fa ldr r2, [r7, #28]
1fa: 6afb ldr r3, [r7, #44] ; 0x2c
1fc: 1ad3 subs r3, r2, r3
1fe: 2264 movs r2, #100 ; 0x64
200: fb02 f203 mul.w r2, r2, r3
204: 69fb ldr r3, [r7, #28]
206: fb92 f3f3 sdiv r3, r2, r3
20a: 4619 mov r1, r3
20c: 4808 ldr r0, [pc, #32] ; (230 <_Z6renderiiii+0x1a4>)
20e: f7ff fffe bl 0 <printf>
total 10
load 4
store 0
jump 1
else instruction 1 (mul) + 1 (sdiv) + 3
*/
software_delay(0, 4, 1, 1, 1, 3);
// prinf delat = 1026 cycles
wait(1026);
printf("Calc. - %d%%\n", (nYStart - y) * 100 / (nYStart));
}
/*
212: 6afb ldr r3, [r7, #44] ; 0x2c
214: 3b01 subs r3, #1
216: 62fb str r3, [r7, #44] ; 0x2c
218: e74d b.n b6 <_Z6renderiiii+0x2a> // jump to loop
total 4
load 1
store 1
jump 1
else instruction 1
*/
software_delay(1, 1, 1, 0, 0, 1);
}
wait(1026);
printf("Calc. completed.\n");
#endif //BROT_HARDWARE
#ifdef BROT_HARDWARE_INTERRUPT
int cnt = 0;
int sx = cx - dx * CONST_VALUE;
int sy = 0 * dy;
int py = sy;
int x, y;
int nYStart = SCRN_HEIGHT / 2;
/*
8c: b580 push {r7, lr}
8e: b08e sub sp, #56 ; 0x38
90: af00 add r7, sp, #0
92: 60f8 str r0, [r7, #12]
94: 60b9 str r1, [r7, #8]
96: 607a str r2, [r7, #4]
98: 603b str r3, [r7, #0]
9a: 687b ldr r3, [r7, #4]
9c: 01db lsls r3, r3, #7
9e: 68fa ldr r2, [r7, #12]
a0: 1ad3 subs r3, r2, r3
a2: 627b str r3, [r7, #36] ; 0x24
a4: 2300 movs r3, #0
a6: 623b str r3, [r7, #32]
a8: 6a3b ldr r3, [r7, #32]
aa: 637b str r3, [r7, #52] ; 0x34
ac: 2332 movs r3, #50 ; 0x32
ae: 61fb str r3, [r7, #28]
b0: 69fb ldr r3, [r7, #28]
b2: 3b01 subs r3, #1
b4: 62fb str r3, [r7, #44] ; 0x2c
total 24
store 10
load 4
jump 0
else instruction 7
*/
software_delay(10, 4, 0, 0, 0, 7);
for (y = nYStart - 1; y >= 0; y--) {
int px = sx;
/*
b6: 6afb ldr r3, [r7, #44] ; 0x2c
b8: 2b00 cmp r3, #0 y>=0
ba: f2c0 80ae blt.w 21a <_Z6renderiiii+0x18e> //loop finish
be: 6a7b ldr r3, [r7, #36] ; 0x24
c0: 62bb str r3, [r7, #40] ; 0x28
c2: 2300 movs r3, #0
c4: 633b str r3, [r7, #48] ; 0x30
c6: 6b3b ldr r3, [r7, #48] ; 0x30
total 8
load 3
store 2
jump 1
else instruction 2
*/
software_delay(2, 3, 1, 0, 0, 2);
for (x = 0; x < SCRN_WIDTH; x += 2) {
/*
c8: 2b9f cmp r3, #159 ; 0x9f
ca: f300 8083 bgt.w 1d4 <_Z6renderiiii+0x148>
store 0
load 0
jump 1
else instruction 1
*/
int offset;
int py_temp;
int col;
unsigned char pixelR, pixelG, pixelB;
/*
ce: 6afa ldr r2, [r7, #44] ; 0x2c
d0: 4613 mov r3, r2
d2: 009b lsls r3, r3, #2
d4: 4413 add r3, r2
d6: 015b lsls r3, r3, #5
d8: 461a mov r2, r3
da: 6b3b ldr r3, [r7, #48] ; 0x30
dc: 441a add r2, r3
de: 4613 mov r3, r2
e0: 005b lsls r3, r3, #1
e2: 4413 add r3, r2
e4: 61bb str r3, [r7, #24]
e6: 6abb ldr r3, [r7, #40] ; 0x28
e8: 11da asrs r2, r3, #7
ea: 6b7b ldr r3, [r7, #52] ; 0x34
ec: 11db asrs r3, r3, #7
ee: 4619 mov r1, r3
f0: 4610 mov r0, r2
total 18
load 4
store 1
jump 0
else instruction 13
*/
software_delay(4, 4, 0, 0, 0, 13);
while (wait_brot_interrupt) {
wait();
if (!state_interrupt)
cnt++;//other job
}
wait_brot_interrupt = true;
m_interface->master_write(Master_ID, (BROT_BASE + BROT_REG_OFFSET_INT_ENABLE), &sc_uint<32>(1), DATA_SIZE_4BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (BROT_BASE + BROT_REG_OFFSET_PX), &sc_uint<32>(px), DATA_SIZE_4BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (BROT_BASE + BROT_REG_OFFSET_PY), &sc_uint<32>(py), DATA_SIZE_4BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (BROT_BASE + BROT_REG_OFFSET_DX), &sc_uint<32>(dx), DATA_SIZE_4BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (BROT_BASE + BROT_REG_OFFSET_OPSTART), &sc_uint<32>(1), DATA_SIZE_4BYTE, BURST_INCRESE, BURST_LEN_01);
while (wait_brot_interrupt) {//operation done check
wait();
if(!state_interrupt)
cnt++;//other job
}
col = m_interface->master_read(Master_ID, (BROT_BASE + BROT_REG_OFFSET_COL00), DATA_SIZE_4BYTE, BURST_INCRESE, BURST_LEN_01)[0];
pixelR = (col) << 1;
pixelG = (col * 9) << 1;
pixelB = (col * 7) << 1;
offset = (y * SCRN_WIDTH + x) * 3;
/*
f6: 6178 str r0, [r7, #20]
f8: 697b ldr r3, [r7, #20]
fa: b2db uxtb r3, r3 ;UXTB extends an 8-bit value to a 32-bit value
fc: 005b lsls r3, r3, #1
fe: 74fb strb r3, [r7, #19] ; pixelR
100: 697b ldr r3, [r7, #20]
102: b2db uxtb r3, r3
104: 461a mov r2, r3
106: 00d2 lsls r2, r2, #3
108: 4413 add r3, r2
10a: b2db uxtb r3, r3
10c: 005b lsls r3, r3, #1
10e: 74bb strb r3, [r7, #18] ; pixelG
110: 697b ldr r3, [r7, #20]
112: b2db uxtb r3, r3
114: 461a mov r2, r3
116: 00d2 lsls r2, r2, #3
118: 1ad3 subs r3, r2, r3
11a: b2db uxtb r3, r3
11c: 005b lsls r3, r3, #1
11e: 747b strb r3, [r7, #17] ; pixelB
120: 69bb ldr r3, [r7, #24]
122: 3303 adds r3, #3
124: 4940 ldr r1, [pc, #256] ; (228 <_Z6renderiiii+0x19c>)
126: 7cfa ldrb r2, [r7, #19]
128: 54ca strb r2, [r1, r3]
12a: 4a3f ldr r2, [pc, #252] ; (228 <_Z6renderiiii+0x19c>)
12c: 5cd1 ldrb r1, [r2, r3]
12e: 4a3e ldr r2, [pc, #248] ; (228 <_Z6renderiiii+0x19c>)
130: 69bb ldr r3, [r7, #24]
132: 4413 add r3, r2
134: 460a mov r2, r1
136: 701a strb r2, [r3, #0]
total 33
load 12
store 4
jump 0
else instruction 17
*/
software_delay(12, 4, 0, 0, 0, 17);
m_interface->master_write(Master_ID, (RESULT_BUFF_BASE + offset + 0), &sc_uint<32>(pixelR), DATA_SIZE_1BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (RESULT_BUFF_BASE + offset + 3), &sc_uint<32>(pixelR), DATA_SIZE_1BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (RESULT_BUFF_BASE + offset + 1), &sc_uint<32>(pixelG), DATA_SIZE_1BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (RESULT_BUFF_BASE + offset + 4), &sc_uint<32>(pixelG), DATA_SIZE_1BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (RESULT_BUFF_BASE + offset + 2), &sc_uint<32>(pixelB), DATA_SIZE_1BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (RESULT_BUFF_BASE + offset + 5), &sc_uint<32>(pixelB), DATA_SIZE_1BYTE, BURST_INCRESE, BURST_LEN_01);
/*
164: 6afb ldr r3, [r7, #44] ; 0x2c
166: f1c3 0263 rsb r2, r3, #99 ; 0x63
16a: 4613 mov r3, r2
16c: 009b lsls r3, r3, #2
16e: 4413 add r3, r2
170: 015b lsls r3, r3, #5
172: 461a mov r2, r3
174: 6b3b ldr r3, [r7, #48] ; 0x30
176: 441a add r2, r3
178: 4613 mov r3, r2
17a: 005b lsls r3, r3, #1
17c: 4413 add r3, r2
17e: 61bb str r3, [r7, #24]
180: 69bb ldr r3, [r7, #24]
182: 3303 adds r3, #3
total 15
load 3
store 1
jump 0
else instruction 11
*/
software_delay(3, 1, 0, 0, 0, 11);
offset = ((SCRN_HEIGHT - 1 - y) * SCRN_WIDTH + x) * 3;
m_interface->master_write(Master_ID, (RESULT_BUFF_BASE + offset + 0), &sc_uint<32>(pixelR), DATA_SIZE_1BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (RESULT_BUFF_BASE + offset + 3), &sc_uint<32>(pixelR), DATA_SIZE_1BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (RESULT_BUFF_BASE + offset + 1), &sc_uint<32>(pixelG), DATA_SIZE_1BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (RESULT_BUFF_BASE + offset + 4), &sc_uint<32>(pixelG), DATA_SIZE_1BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (RESULT_BUFF_BASE + offset + 2), &sc_uint<32>(pixelB), DATA_SIZE_1BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (RESULT_BUFF_BASE + offset + 5), &sc_uint<32>(pixelB), DATA_SIZE_1BYTE, BURST_INCRESE, BURST_LEN_01);
px += dx;
/*
1b8: 69ba ldr r2, [r7, #24]
1ba: 3202 adds r2, #2
1bc: 491a ldr r1, [pc, #104] ; (228 <_Z6renderiiii+0x19c>)
1be: 5cc9 ldrb r1, [r1, r3]
1c0: 4b19 ldr r3, [pc, #100] ; (228 <_Z6renderiiii+0x19c>)
1c2: 5499 strb r1, [r3, r2]
1c4: 6aba ldr r2, [r7, #40] ; 0x28
1c6: 687b ldr r3, [r7, #4]
1c8: 4413 add r3, r2
1ca: 62bb str r3, [r7, #40] ; 0x28
1cc: 6b3b ldr r3, [r7, #48] ; 0x30
1ce: 3302 adds r3, #2
1d0: 633b str r3, [r7, #48] ; 0x30
1d2: e778 b.n c6 <_Z6renderiiii+0x3a>
total 14
load 7
store 3
jump 1
else instruction 3
*/
software_delay(7, 3, 1, 0, 0, 3);
}
py -= dy;
/*
1d4: 6b7a ldr r2, [r7, #52] ; 0x34
1d6: 683b ldr r3, [r7, #0]
1d8: 1ad3 subs r3, r2, r3
1da: 637b str r3, [r7, #52] ; 0x34
1dc: 6af9 ldr r1, [r7, #44] ; 0x2c
1de: 4b13 ldr r3, [pc, #76] ; (22c <_Z6renderiiii+0x1a0>)
1e0: fb83 2301 smull r2, r3, r3, r1 // 2 cycles
1e4: 109a asrs r2, r3, #2
1e6: 17cb asrs r3, r1, #31
1e8: 1ad2 subs r2, r2, r3
1ea: 4613 mov r3, r2
1ec: 009b lsls r3, r3, #2
1ee: 4413 add r3, r2
1f0: 005b lsls r3, r3, #1
1f2: 1aca subs r2, r1, r3
1f4: 2a00 cmp r2, #0
1f6: d10c bne.n 212 <_Z6renderiiii+0x186>
total 17
load 4
store 1
jump 1
else instruction 1 (mul) + 11
*/
software_delay(1, 4, 1, 0, 1, 11);
if ((y % 10) == 0) {
/*
1f8: 69fa ldr r2, [r7, #28]
1fa: 6afb ldr r3, [r7, #44] ; 0x2c
1fc: 1ad3 subs r3, r2, r3
1fe: 2264 movs r2, #100 ; 0x64
200: fb02 f203 mul.w r2, r2, r3
204: 69fb ldr r3, [r7, #28]
206: fb92 f3f3 sdiv r3, r2, r3
20a: 4619 mov r1, r3
20c: 4808 ldr r0, [pc, #32] ; (230 <_Z6renderiiii+0x1a4>)
20e: f7ff fffe bl 0 <printf>
total 10
load 4
store 0
jump 1
else instruction 1 (mul) + 1 (sdiv) + 3
*/
software_delay(0, 4, 1, 1, 1, 3);
// prinf delat = 1026 cycles
wait(1026);
printf("Calc. - %d%%\n", (nYStart - y) * 100 / (nYStart));
}
/*
212: 6afb ldr r3, [r7, #44] ; 0x2c
214: 3b01 subs r3, #1
216: 62fb str r3, [r7, #44] ; 0x2c
218: e74d b.n b6 <_Z6renderiiii+0x2a> // jump to loop
total 4
load 1
store 1
jump 1
else instruction 1
*/
software_delay(1, 1, 1, 0, 0, 1);
}
wait(1026);
printf("Calc. completed. brot other jobs: %d\n",cnt);
#endif //BROT_HARDWARE
#ifdef BROT_SOFTWARE
int sx = cx - dx * CONST_VALUE;
int sy = 0 * dy;
int py = sy;
int x, y;
int nYStart = SCRN_HEIGHT / 2;
/*
8c: b580 push {r7, lr}
8e: b08e sub sp, #56 ; 0x38
90: af00 add r7, sp, #0
92: 60f8 str r0, [r7, #12]
94: 60b9 str r1, [r7, #8]
96: 607a str r2, [r7, #4]
98: 603b str r3, [r7, #0]
9a: 687b ldr r3, [r7, #4]
9c: 01db lsls r3, r3, #7
9e: 68fa ldr r2, [r7, #12]
a0: 1ad3 subs r3, r2, r3
a2: 627b str r3, [r7, #36] ; 0x24
a4: 2300 movs r3, #0
a6: 623b str r3, [r7, #32]
a8: 6a3b ldr r3, [r7, #32]
aa: 637b str r3, [r7, #52] ; 0x34
ac: 2332 movs r3, #50 ; 0x32
ae: 61fb str r3, [r7, #28]
b0: 69fb ldr r3, [r7, #28]
b2: 3b01 subs r3, #1
b4: 62fb str r3, [r7, #44] ; 0x2c
total 24
store 10
load 4
jump 0
else instruction 7
*/
software_delay(10, 4, 0, 0, 0, 7);
for (y = nYStart - 1; y >= 0; y--) {
int px = sx;
/*
b6: 6afb ldr r3, [r7, #44] ; 0x2c
b8: 2b00 cmp r3, #0 y>=0
ba: f2c0 80ae blt.w 21a <_Z6renderiiii+0x18e> //loop finish
be: 6a7b ldr r3, [r7, #36] ; 0x24
c0: 62bb str r3, [r7, #40] ; 0x28
c2: 2300 movs r3, #0
c4: 633b str r3, [r7, #48] ; 0x30
c6: 6b3b ldr r3, [r7, #48] ; 0x30
total 8
load 3
store 2
jump 1
else instruction 2
*/
software_delay(2, 3, 1, 0, 0, 2);
for (x = 0; x < SCRN_WIDTH; x += 2) {
/*
c8: 2b9f cmp r3, #159 ; 0x9f
ca: f300 8083 bgt.w 1d4 <_Z6renderiiii+0x148>
store 0
load 0
jump 1
else instruction 1
*/
software_delay(0, 0, 1, 0, 0, 1);
int offset = (y * SCRN_WIDTH + x) * 3;
int col;
//unsigned short pixel;
unsigned char pixelR, pixelG, pixelB;
/*
ce: 6afa ldr r2, [r7, #44] ; 0x2c
d0: 4613 mov r3, r2
d2: 009b lsls r3, r3, #2
d4: 4413 add r3, r2
d6: 015b lsls r3, r3, #5
d8: 461a mov r2, r3
da: 6b3b ldr r3, [r7, #48] ; 0x30
dc: 441a add r2, r3
de: 4613 mov r3, r2
e0: 005b lsls r3, r3, #1
e2: 4413 add r3, r2
e4: 61bb str r3, [r7, #24]
e6: 6abb ldr r3, [r7, #40] ; 0x28
e8: 11da asrs r2, r3, #7
ea: 6b7b ldr r3, [r7, #52] ; 0x34
ec: 11db asrs r3, r3, #7
ee: 4619 mov r1, r3
f0: 4610 mov r0, r2
f2: f7ff fffe bl 14 <_Z4brotll>
total 19
load 4
store 1
jump 1
else instruction 13
*/
software_delay(4, 4, 1, 0, 0, 13);
// Get iteration count.
sw_profiling(__func__, 1);
col = brot(px >> (COORD_SHIFT - SHIFT),
py >> (COORD_SHIFT - SHIFT));
sw_profiling(__func__, 2);
// Random colour mapping:
pixelR = (col) << 1;
pixelG = (col * 9) << 1;
pixelB = (col * 7) << 1;
// Plot first pair of pixels.
m_interface->master_write(Master_ID, (RESULT_BUFF_BASE + offset + 0), &sc_uint<32>(pixelR), DATA_SIZE_1BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (RESULT_BUFF_BASE + offset + 3), &sc_uint<32>(pixelR), DATA_SIZE_1BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (RESULT_BUFF_BASE + offset + 1), &sc_uint<32>(pixelG), DATA_SIZE_1BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (RESULT_BUFF_BASE + offset + 4), &sc_uint<32>(pixelG), DATA_SIZE_1BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (RESULT_BUFF_BASE + offset + 2), &sc_uint<32>(pixelB), DATA_SIZE_1BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (RESULT_BUFF_BASE + offset + 5), &sc_uint<32>(pixelB), DATA_SIZE_1BYTE, BURST_INCRESE, BURST_LEN_01);
// Plot mirror image pixels:
offset = ((SCRN_HEIGHT - 1 - y) * SCRN_WIDTH + x) * 3;
m_interface->master_write(Master_ID, (RESULT_BUFF_BASE + offset + 0), &sc_uint<32>(pixelR), DATA_SIZE_1BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (RESULT_BUFF_BASE + offset + 3), &sc_uint<32>(pixelR), DATA_SIZE_1BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (RESULT_BUFF_BASE + offset + 1), &sc_uint<32>(pixelG), DATA_SIZE_1BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (RESULT_BUFF_BASE + offset + 4), &sc_uint<32>(pixelG), DATA_SIZE_1BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (RESULT_BUFF_BASE + offset + 2), &sc_uint<32>(pixelB), DATA_SIZE_1BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (RESULT_BUFF_BASE + offset + 5), &sc_uint<32>(pixelB), DATA_SIZE_1BYTE, BURST_INCRESE, BURST_LEN_01);
px += dx;
/*
f6: 6178 str r0, [r7, #20]
f8: 697b ldr r3, [r7, #20]
fa: b2db uxtb r3, r3
fc: 005b lsls r3, r3, #1
fe: 74fb strb r3, [r7, #19]
100: 697b ldr r3, [r7, #20]
102: b2db uxtb r3, r3
104: 461a mov r2, r3
106: 00d2 lsls r2, r2, #3
108: 4413 add r3, r2
10a: b2db uxtb r3, r3
10c: 005b lsls r3, r3, #1
10e: 74bb strb r3, [r7, #18]
110: 697b ldr r3, [r7, #20]
112: b2db uxtb r3, r3
114: 461a mov r2, r3
116: 00d2 lsls r2, r2, #3
118: 1ad3 subs r3, r2, r3
11a: b2db uxtb r3, r3
11c: 005b lsls r3, r3, #1
11e: 747b strb r3, [r7, #17]
120: 69bb ldr r3, [r7, #24]
122: 3303 adds r3, #3
124: 4940 ldr r1, [pc, #256] ; (228 <_Z6renderiiii+0x19c>)
126: 7cfa ldrb r2, [r7, #19]
128: 54ca strb r2, [r1, r3]
12a: 4a3f ldr r2, [pc, #252] ; (228 <_Z6renderiiii+0x19c>)
12c: 5cd1 ldrb r1, [r2, r3]
12e: 4a3e ldr r2, [pc, #248] ; (228 <_Z6renderiiii+0x19c>)
130: 69bb ldr r3, [r7, #24]
132: 4413 add r3, r2
134: 460a mov r2, r1
136: 701a strb r2, [r3, #0]
138: 69bb ldr r3, [r7, #24]
13a: 3304 adds r3, #4
13c: 493a ldr r1, [pc, #232] ; (228 <_Z6renderiiii+0x19c>)
13e: 7cba ldrb r2, [r7, #18]
140: 54ca strb r2, [r1, r3]
142: 69ba ldr r2, [r7, #24]
144: 3201 adds r2, #1
146: 4938 ldr r1, [pc, #224] ; (228 <_Z6renderiiii+0x19c>)
148: 5cc9 ldrb r1, [r1, r3]
14a: 4b37 ldr r3, [pc, #220] ; (228 <_Z6renderiiii+0x19c>)
14c: 5499 strb r1, [r3, r2]
14e: 69bb ldr r3, [r7, #24]
150: 3305 adds r3, #5
152: 4935 ldr r1, [pc, #212] ; (228 <_Z6renderiiii+0x19c>)
154: 7c7a ldrb r2, [r7, #17]
156: 54ca strb r2, [r1, r3]
158: 69ba ldr r2, [r7, #24]
15a: 3202 adds r2, #2
15c: 4932 ldr r1, [pc, #200] ; (228 <_Z6renderiiii+0x19c>)
15e: 5cc9 ldrb r1, [r1, r3]
160: 4b31 ldr r3, [pc, #196] ; (228 <_Z6renderiiii+0x19c>)
162: 5499 strb r1, [r3, r2]
164: 6afb ldr r3, [r7, #44] ; 0x2c
166: f1c3 0263 rsb r2, r3, #99 ; 0x63
16a: 4613 mov r3, r2
16c: 009b lsls r3, r3, #2
16e: 4413 add r3, r2
170: 015b lsls r3, r3, #5
172: 461a mov r2, r3
174: 6b3b ldr r3, [r7, #48] ; 0x30
176: 441a add r2, r3
178: 4613 mov r3, r2
17a: 005b lsls r3, r3, #1
17c: 4413 add r3, r2
17e: 61bb str r3, [r7, #24]
180: 69bb ldr r3, [r7, #24]
182: 3303 adds r3, #3
184: 4928 ldr r1, [pc, #160] ; (228 <_Z6renderiiii+0x19c>)
186: 7cfa ldrb r2, [r7, #19]
188: 54ca strb r2, [r1, r3]
18a: 4a27 ldr r2, [pc, #156] ; (228 <_Z6renderiiii+0x19c>)
18c: 5cd1 ldrb r1, [r2, r3]
18e: 4a26 ldr r2, [pc, #152] ; (228 <_Z6renderiiii+0x19c>)
190: 69bb ldr r3, [r7, #24]
192: 4413 add r3, r2
194: 460a mov r2, r1
196: 701a strb r2, [r3, #0]
198: 69bb ldr r3, [r7, #24]
19a: 3304 adds r3, #4
19c: 4922 ldr r1, [pc, #136] ; (228 <_Z6renderiiii+0x19c>)
19e: 7cba ldrb r2, [r7, #18]
1a0: 54ca strb r2, [r1, r3]
1a2: 69ba ldr r2, [r7, #24]
1a4: 3201 adds r2, #1
1a6: 4920 ldr r1, [pc, #128] ; (228 <_Z6renderiiii+0x19c>)
1a8: 5cc9 ldrb r1, [r1, r3]
1aa: 4b1f ldr r3, [pc, #124] ; (228 <_Z6renderiiii+0x19c>)
1ac: 5499 strb r1, [r3, r2]
1ae: 69bb ldr r3, [r7, #24]
1b0: 3305 adds r3, #5
1b2: 491d ldr r1, [pc, #116] ; (228 <_Z6renderiiii+0x19c>)
1b4: 7c7a ldrb r2, [r7, #17]
1b6: 54ca strb r2, [r1, r3]
1b8: 69ba ldr r2, [r7, #24]
1ba: 3202 adds r2, #2
1bc: 491a ldr r1, [pc, #104] ; (228 <_Z6renderiiii+0x19c>)
1be: 5cc9 ldrb r1, [r1, r3]
1c0: 4b19 ldr r3, [pc, #100] ; (228 <_Z6renderiiii+0x19c>)
1c2: 5499 strb r1, [r3, r2]
1c4: 6aba ldr r2, [r7, #40] ; 0x28
1c6: 687b ldr r3, [r7, #4]
1c8: 4413 add r3, r2
1ca: 62bb str r3, [r7, #40] ; 0x28
1cc: 6b3b ldr r3, [r7, #48] ; 0x30
1ce: 3302 adds r3, #2
1d0: 633b str r3, [r7, #48] ; 0x30
1d2: e778 b.n c6 <_Z6renderiiii+0x3a>
total 110
load 50
store 19
jump 1
else instruction 40
여기서 data 저장은 실제 memory에 하므로 store 횟수가 감소 (strb r2, [r1, r3] == bus transaction)
*/
software_delay(13, 50, 1, 0, 0, 40);
}
py -= dy;
/*
1d4: 6b7a ldr r2, [r7, #52] ; 0x34
1d6: 683b ldr r3, [r7, #0]
1d8: 1ad3 subs r3, r2, r3
1da: 637b str r3, [r7, #52] ; 0x34
1dc: 6af9 ldr r1, [r7, #44] ; 0x2c
1de: 4b13 ldr r3, [pc, #76] ; (22c <_Z6renderiiii+0x1a0>)
1e0: fb83 2301 smull r2, r3, r3, r1 // 2 cycles
1e4: 109a asrs r2, r3, #2
1e6: 17cb asrs r3, r1, #31
1e8: 1ad2 subs r2, r2, r3
1ea: 4613 mov r3, r2
1ec: 009b lsls r3, r3, #2
1ee: 4413 add r3, r2
1f0: 005b lsls r3, r3, #1
1f2: 1aca subs r2, r1, r3
1f4: 2a00 cmp r2, #0
1f6: d10c bne.n 212 <_Z6renderiiii+0x186>
total 17
load 4
store 1
jump 1
else instruction 1 (mul) + 11
*/
software_delay(1, 4, 1, 0, 1, 11);
if ((y % 10) == 0) {
/*
1f8: 69fa ldr r2, [r7, #28]
1fa: 6afb ldr r3, [r7, #44] ; 0x2c
1fc: 1ad3 subs r3, r2, r3
1fe: 2264 movs r2, #100 ; 0x64
200: fb02 f203 mul.w r2, r2, r3
204: 69fb ldr r3, [r7, #28]
206: fb92 f3f3 sdiv r3, r2, r3
20a: 4619 mov r1, r3
20c: 4808 ldr r0, [pc, #32] ; (230 <_Z6renderiiii+0x1a4>)
20e: f7ff fffe bl 0 <printf>
total 10
load 4
store 0
jump 1
else instruction 1 (mul) + 1 (sdiv) + 3
*/
software_delay(0, 4, 1, 1, 1, 3);
wait(1026);
printf("Calc. - %d%%\n", (nYStart - y) * 100 / (nYStart));
}
/*
212: 6afb ldr r3, [r7, #44] ; 0x2c
214: 3b01 subs r3, #1
216: 62fb str r3, [r7, #44] ; 0x2c
218: e74d b.n b6 <_Z6renderiiii+0x2a> // jump to loop
total 4
load 1
store 1
jump 1
else instruction 1
*/
software_delay(1, 1, 1, 0, 0, 1);
}
wait(1026);
printf("Calc. completed.\n");
#endif //BROT_software
/*
228: 00000000 .word 0x00000000
22c: 66666667 .word 0x66666667
230: 00000004 .word 0x00000004
234: 00000014 .word 0x00000014
*/
sw_profiling(__func__, 3);
}
int brot(long sx, long sy) {
sw_profiling(__func__, 0);
int count;
long px = sx, py = sy;
long valueCheck = 4 << SHIFT;
/*
store 6+1 (str, push)
load 3
jump 0
else instruction 4
*/
software_delay(7, 3, 0, 0, 0, 4);
for (count = 0; count < CONST_VALUE; count++) {
/*
for loop condition check
32: 2b7f cmp r3, #127 ; 0x7f
34: dc23 bgt.n 7e <_Z4brotll+0x6a>
jump 1
else instruction 1
*/
software_delay(0, 0, 1, 0, 0, 1);
long px2, py2;
px2 = (px * px) >> (SHIFT + 1);
py2 = (py * py) >> (SHIFT + 1);
/*
36: 69bb ldr r3, [r7, #24]
38: fb03 f303 mul.w r3, r3, r3
3c: 139b asrs r3, r3, #14
3e: 60fb str r3, [r7, #12]
40: 697b ldr r3, [r7, #20]
42: fb03 f303 mul.w r3, r3, r3
46: 139b asrs r3, r3, #14
48: 60bb str r3, [r7, #8]
4a: 68fa ldr r2, [r7, #12]
4c: 68bb ldr r3, [r7, #8]
4e: 4413 add r3, r2
50: 693a ldr r2, [r7, #16]
52: 429a cmp r2, r3
54: da01 bge.n 5a <_Z4brotll+0x46>
store 2
load 5
jump 1
else instruction 2 (mul) + 4
*/
software_delay(2, 5, 1, 2, 0, 4);
/*
56: 69fb ldr r3, [r7, #28]
58: e013 b.n 82 <_Z4brotll+0x6e>
load 5
jump 1
*/
software_delay(0, 5, 1, 0, 0, 0);
if (px2 + py2 > valueCheck) {
/*
82: 4618 mov r0, r3
84: 3724 adds r7, #36 ; 0x24
86: 46bd mov sp, r7
88: bc80 pop {r7}
8a: 4770 bx lr
store 0
load 1
jump 1
else instruction 3
*/
software_delay(0, 1, 1, 0, 0, 3);
sw_profiling(__func__, 3);
return count;
}
/*
5a: 69bb ldr r3, [r7, #24]
5c: 697a ldr r2, [r7, #20]
5e: fb02 f303 mul.w r3, r2, r3
62: 131b asrs r3, r3, #12
64: 683a ldr r2, [r7, #0]
66: 4413 add r3, r2
68: 617b str r3, [r7, #20]
6a: 68fa ldr r2, [r7, #12]
6c: 68bb ldr r3, [r7, #8]
6e: 1ad3 subs r3, r2, r3
70: 687a ldr r2, [r7, #4]
72: 4413 add r3, r2
74: 61bb str r3, [r7, #24]
76: 69fb ldr r3, [r7, #28]
78: 3301 adds r3, #1
7a: 61fb str r3, [r7, #28]
7c: e7d8 b.n 30 <_Z4brotll+0x1c>
store 3
load 7
jump 1
else instruction 1 (mul) + 5
*/
software_delay(3, 7, 1, 1, 0, 5);
py = ((px * py) >> (SHIFT - 1)) + sy;
px = px2 - py2 + sx;
}
//7e: f04f 33ff mov.w r3, #4294967295
sw_profiling(__func__, 3);
return -1;
}
void copy_result() {
sw_profiling(__func__, 0);
/*
238: b580 push {r7, lr}
23a: b084 sub sp, #16
23c: af00 add r7, sp, #0
23e: 2300 movs r3, #0
240: 60bb str r3, [r7, #8]
*/
software_delay(1, 0, 0, 0, 0, 4);
#ifdef COMPILE_CPU_COPY
int x, y;
for (y = 0; y < SCRN_HEIGHT; ++y) {
//loop start
/*
242: 68bb ldr r3, [r7, #8]
244: 2b63 cmp r3, #99 ; 0x63
246: dc31 bgt.n 2ac <_Z11copy_resultv+0x74>
248: 2300 movs r3, #0
24a: 60fb str r3, [r7, #12]
*/
software_delay(1, 1, 1, 0, 0, 2);
//for (x = 0; x < SCRN_WIDTH; x = x + ((1 << DATA_SIZE_4BYTE) * (BURST_LEN_16 + 1))) {
for (x = 0; x < SCRN_WIDTH * 3; x = x + 4) {
//loop start
/*
24c: 68fb ldr r3, [r7, #12]
24e: 2b9f cmp r3, #159 ; 0x9f
250: dc28 bgt.n 2a4 <_Z11copy_resultv+0x6c>
252: 68ba ldr r2, [r7, #8]
254: 4613 mov r3, r2
256: 009b lsls r3, r3, #2
258: 4413 add r3, r2
25a: 015b lsls r3, r3, #5
25c: 461a mov r2, r3
25e: 68fb ldr r3, [r7, #12]
260: 441a add r2, r3
262: 4613 mov r3, r2
264: 005b lsls r3, r3, #1
266: 4413 add r3, r2
268: 607b str r3, [r7, #4]
26a: 4a14 ldr r2, [pc, #80] ; (2bc <_Z11copy_resultv+0x84>)
26c: 687b ldr r3, [r7, #4]
26e: 4413 add r3, r2
270: 7819 ldrb r1, [r3, #0]
272: 4a13 ldr r2, [pc, #76] ; (2c0 <_Z11copy_resultv+0x88>)
274: 687b ldr r3, [r7, #4]
276: 4413 add r3, r2
278: 460a mov r2, r1
27a: 701a strb r2, [r3, #0]
27c: 687b ldr r3, [r7, #4]
27e: 1c5a adds r2, r3, #1
280: 687b ldr r3, [r7, #4]
282: 3301 adds r3, #1
284: 490d ldr r1, [pc, #52] ; (2bc <_Z11copy_resultv+0x84>)
286: 5c89 ldrb r1, [r1, r2]
288: 4a0d ldr r2, [pc, #52] ; (2c0 <_Z11copy_resultv+0x88>)
28a: 54d1 strb r1, [r2, r3]
28c: 687b ldr r3, [r7, #4]
28e: 1c9a adds r2, r3, #2
290: 687b ldr r3, [r7, #4]
292: 3302 adds r3, #2
294: 4909 ldr r1, [pc, #36] ; (2bc <_Z11copy_resultv+0x84>)
296: 5c89 ldrb r1, [r1, r2]
298: 4a09 ldr r2, [pc, #36] ; (2c0 <_Z11copy_resultv+0x88>)
29a: 54d1 strb r1, [r2, r3]
29c: 68fb ldr r3, [r7, #12]
29e: 3301 adds r3, #1
2a0: 60fb str r3, [r7, #12]
2a2: e7d3 b.n 24c <_Z11copy_resultv+0x14>
2a4: 68bb ldr r3, [r7, #8]
2a6: 3301 adds r3, #1
2a8: 60bb str r3, [r7, #8]
2aa: e7ca b.n 242 <_Z11copy_resultv+0xa>
total 48
store 6
load 20
jump 3
else 19
*/
software_delay(6, 20, 3, 0, 0, 19);
int offset = (y * SCRN_WIDTH * 3 + x);
sc_uint<32>* tmp_data = new sc_uint<32>[1];
tmp_data = m_interface->master_read(Master_ID, (RESULT_BUFF_BASE + offset + 0), DATA_SIZE_4BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (FRAME_BUFF_BASE + offset + 0), tmp_data, DATA_SIZE_4BYTE, BURST_INCRESE, BURST_LEN_01);
}
}
wait(1026);
printf("Copy. completed.\n");
#endif //end of COMPILE_CPU_COPY
#ifdef COMPILE_DMAC_POLLING
while (m_interface->master_read(Master_ID, (DMAC_BASE + DMAC_OFF_STATE), DATA_SIZE_4BYTE, BURST_INCRESE, BURST_LEN_01)[0] != 1); //idle check
int nDataSize = SCRN_HEIGHT * SCRN_WIDTH * 3;
m_interface->master_write(Master_ID, (DMAC_BASE + DMAC_OFF_INT_ENABLE), &(sc_uint<32>)0, DATA_SIZE_4BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (DMAC_BASE + DMAC_OFF_SRC_ADDR), &(sc_uint<32>)RESULT_BUFF_BASE, DATA_SIZE_4BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (DMAC_BASE + DMAC_OFF_DST_ADDR), &(sc_uint<32>)FRAME_BUFF_BASE, DATA_SIZE_4BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (DMAC_BASE + DMAC_OFF_DATASIZE), &(sc_uint<32>)nDataSize, DATA_SIZE_4BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (DMAC_BASE + DMAC_OFF_OPSTART), &(sc_uint<32>)1, DATA_SIZE_4BYTE, BURST_INCRESE, BURST_LEN_01);
while (m_interface->master_read(Master_ID, (DMAC_BASE + DMAC_OFF_STATE), DATA_SIZE_4BYTE, BURST_INCRESE, BURST_LEN_01)[0] != 1); //operation done check
printf("Copy. completed. Other job: %d\n", cnt);
#endif //end of COMPILE_CPU_COPY
#ifdef COMPILE_DMAC_INTERRUPT
int cnt = 0;
while (wait_dmac_interrupt) {
wait();
if (!state_interrupt)
cnt++;//other job
}
wait_dmac_interrupt = true;
int nDataSize = SCRN_HEIGHT * SCRN_WIDTH * 3;
m_interface->master_write(Master_ID, (DMAC_BASE + DMAC_OFF_INT_ENABLE), &(sc_uint<32>)1, DATA_SIZE_4BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (DMAC_BASE + DMAC_OFF_SRC_ADDR), &(sc_uint<32>)RESULT_BUFF_BASE, DATA_SIZE_4BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (DMAC_BASE + DMAC_OFF_DST_ADDR), &(sc_uint<32>)FRAME_BUFF_BASE, DATA_SIZE_4BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (DMAC_BASE + DMAC_OFF_DATASIZE), &(sc_uint<32>)nDataSize, DATA_SIZE_4BYTE, BURST_INCRESE, BURST_LEN_01);
m_interface->master_write(Master_ID, (DMAC_BASE + DMAC_OFF_OPSTART), &(sc_uint<32>)1, DATA_SIZE_4BYTE, BURST_INCRESE, BURST_LEN_01);
while (wait_dmac_interrupt) {//operation done check
if (!state_interrupt) {
cnt++;//other job
}
wait();
}
printf("Copy. completed. Other job: %d\n", cnt);
#endif //end of COMPILE_DMAC_INTERRUPT
sw_profiling(__func__, 3);
}
void interrupt_handler() {
wait();
while (1) {
state_interrupt = true;
if (dmac_interrupt.read() == 1) {
sw_profiling("copy_result", 1);
sw_profiling(__func__, 0);
// Overhead == Miss penalty
for (int i = 0; i < CACHE_MISS_PENALTY; i++) {
wait(m_interface->clk.posedge_event());// interrupt overhead
}
m_interface->master_write(Master_ID, (DMAC_BASE + DMAC_OFF_INTERRUPT), &(sc_uint<32>)0, DATA_SIZE_4BYTE, BURST_INCRESE, BURST_LEN_01);
do {
wait(m_interface->clk.posedge_event());
} while (dmac_interrupt.read() == 1);
for (int i = 0; i < 1; i++) {
wait(m_interface->clk.posedge_event());// interrupt overhead
}
sw_profiling(__func__, 3);
sw_profiling("copy_result", 2);
}
else if (brot_interrupt.read() == 1) {
sw_profiling("render", 1);
sw_profiling(__func__, 0);
for (int i = 0; i < CACHE_MISS_PENALTY; i++) {
wait(m_interface->clk.posedge_event());// interrupt overhead
}
m_interface->master_write(Master_ID, (BROT_BASE + BROT_REG_OFFSET_INTERRUPT), &(sc_uint<32>)0, DATA_SIZE_4BYTE, BURST_INCRESE, BURST_LEN_01);
do {
wait(m_interface->clk.posedge_event());
} while (brot_interrupt.read() == 1);
for (int i = 0; i < 1; i++) {
wait(m_interface->clk.posedge_event());// interrupt overhead
}
sw_profiling(__func__, 3);
sw_profiling("render", 2);
}
wait_dmac_interrupt = false;
wait_brot_interrupt = false;
state_interrupt = false;
wait();
}
}
//int software_main(int argc, char* argv[]) {
int software_main() {
sw_profiling(__func__, 0);
/*
344: b580 push {r7, lr}
346: b082 sub sp, #8
348: af00 add r7, sp, #0
34a: 6078 str r0, [r7, #4]
34c: 6039 str r1, [r7, #0]
34e: 4805 ldr r0, [pc, #20] ; (364 <main+0x20>)
350: f7ff fffe bl 0 <puts>
store 3
load 1
jump 1
else instruction 1
*/
software_delay(3, 1, 1, 0, 0, 1);
printf("Fractal Display Start!\n");
/*
354: f7ff fffe bl 2c8 <_Z9zoom_loopv>
jump 1
*/
//initLCDC();
software_delay(0, 0, 1, 0, 0, 0);
sw_profiling(__func__, 1);
zoom_loop();
sw_profiling(__func__, 2);
/*
358: 2300 movs r3, #0
35a: 4618 mov r0, r3
35c: 3708 adds r7, #8
35e: 46bd mov sp, r7
360: bd80 pop {r7, pc}
362: bf00 nop
364: 0000003c .word 0x0000003c
store 0
load 1
jump 0
else instruction 4
*/
software_delay(0, 1, 0, 0, 0, 4);
sw_profiling(__func__, 3);
sw_profiling(" ", 4);
return 0;
}
void software_delay(int store, int load, int jump, int mul, int div, int else_inst) {
unsigned int total_cycles = 0;
total_cycles += jump * SOFTWARE_JUMP_DELAY;
total_cycles += mul * SOFTWARE_MUL_DELAY;
total_cycles += div * SOFTWARE_DIV_DELAY;
total_cycles += else_inst * SOFTWARE_ELSE_DELAY;
for (int i = 0; i < total_cycles; i++) {
wait(m_interface->clk.posedge_event());
if (state_interrupt) {
i--;
}
}
for (int i = 0; i < store + load; i++) {
mem_access();
}
}
void mem_access() {
if ((double)hit / ((double)hit + (double)miss) > (double) CACHE_HIT_RATIO) {
// miss
miss++;
for (int i = 0; i < CACHE_MISS_PENALTY; i++) {
if (state_interrupt) {
i++;
}
wait(m_interface->clk.posedge_event());
}
//m_interface->master_write(ID, addr, data, size, burst, len);
}
else {
hit++;
wait(m_interface->clk.posedge_event());
while (state_interrupt) {
wait(m_interface->clk.posedge_event());
}
}
}
SC_HAS_PROCESS(fractal_processor);
fractal_processor(sc_module_name _name, int ID) {
Master_ID = ID;
m_interface = new AXI_master_if("process_interface");
data_buffer = new sc_uint<32>[16];
hit = 0;
miss = 0;
state_interrupt = false;
wait_dmac_interrupt = false;
wait_brot_interrupt = false;
SC_THREAD(processor_execute);
sensitive << m_interface->clk.pos();
SC_THREAD(interrupt_handler);
sensitive << dmac_interrupt.pos() << brot_interrupt.pos();
}
};