summaryrefslogtreecommitdiff
path: root/application-devel/profiling/pres_app_profiling_en.tex
blob: aa26bbed38e78c3eafccaefc705fdcc43ba4d372 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
\input{configpres}

\subsection{Profiling}

\title{\lq Profiling and code coverage\rq}
\maketitle

\begin{frame}
\frametitle{Overview}
\tableofcontents
\end{frame}

\begin{frame}[fragile]
\frametitle{Using gprof}
Compile AND link with profile information:
\begin{verbatim}
gcc -Wall -no-pie -fno-pie -pg -o gprof_test gprof_test.c
\end{verbatim}
\end{frame}

\begin{frame}[fragile]
\frametitle{Using gprof}
Run the program:
\begin{verbatim}
# Run the program

$ ./gprof_test

# that should produce a gmon.out

$ ls gmon.out
gmon.out
\end{verbatim}
\end{frame}

\begin{frame}[fragile]
\frametitle{Using gprof}
\begin{alertblock}{When and where will gmon.out be written?}
gmon.out will be written right before your application exits.
It will be written to the directory where your application is
being executed.
\end{alertblock}
\end{frame}

\begin{frame}[fragile]
\frametitle{Using gprof}
Specifiying a custom filename for gmon.out:
\begin{verbatim}
export GMON_OUT_PREFIX=custom_gmon

# The resulting file will be called ${GMON_OUT_PREFIX}.PID
# (while PID is the process ID of the profiled process)
\end{verbatim}
\end{frame}

\begin{frame}[fragile]
\frametitle{Using gprof}
Run the program:
\begin{verbatim}
# Analyze the result

$ gprof gprof_test gmon.out > analysis.txt

$ less analysis.txt
\end{verbatim}
\end{frame}

\begin{frame}[fragile]
\frametitle{Using gprof}
\begin{verbatim}
Flat profile:

Each sample counts as 0.01 seconds.
  %   cumulative   self              self     total
 time   seconds   seconds    calls  ms/call  ms/call  name
 40.00      0.04     0.04        1    40.00    40.00  f2
 30.00      0.07     0.03        1    30.00    30.00  f1
 30.00      0.10     0.03                             main
\end{verbatim}
\end{frame}

\begin{frame}[fragile]
\frametitle{Using gprof}
\begin{verbatim}
             Call graph (explanation follows)

granularity: each sample hit covers 4 byte(s) for 10.00% of 0.10 seconds

index % time    self  children    called     name
                                                 <spontaneous>
[1]    100.0    0.03    0.07                 main [1]
                0.04    0.00       1/1           f2 [2]
                0.03    0.00       1/1           f1 [3]
-----------------------------------------------
                0.04    0.00       1/1           main [1]
[2]     40.0    0.04    0.00       1         f2 [2]
-----------------------------------------------
                0.03    0.00       1/1           main [1]
[3]     30.0    0.03    0.00       1         f1 [3]
-----------------------------------------------

\end{verbatim}
\end{frame}

\begin{frame}[fragile]
\frametitle{Using gprof}
Format gprof output
\begin{verbatim}
# Suppress printing of statically declared functions

gprof -a gprof_test gmon.out
gprof --no-static gprof_test gmon.out

# Don't be verbose

gprof -b gprof_test gmon.out
gprof --brief gprof_test gmon.out

# Only print the flat profile

gprof -p gprof_test gmon.out
gprof --flat-profile gprof_test gmon.out

# DON'T print the flat profile

gprof -P gprof_test gmon.out
gprof --no-flat-profile gprof_test gmon.out
\end{verbatim}
\end{frame}

\begin{frame}[fragile]
\frametitle{Callgrind}
\begin{verbatim}
$ gcc -Wall -o gprof_test gprof_test.c
$ valgrind --tool=callgrind ./gprof_test

$ ls callgrind*
callgrind.out.4804

$ callgrind_annotate callgrind.out.4804 gprof_test
\end{verbatim}
\end{frame}

\begin{frame}[fragile]
\frametitle{Callgrind and pthreads}
\begin{verbatim}
$ gcc -Wall -o gprof_test gprof_test.c
$ valgrind --tool=callgrind --separate-threads=yes ./gprof_test

$ ls callgrind*
callgrind.out.4804-01 callgrind.out.4804-02

$ callgrind_annotate callgrind.out.4804-01 gprof_test
$ callgrind_annotate callgrind.out.4804-02 gprof_test
\end{verbatim}
\end{frame}

\begin{frame}[fragile]
\frametitle{Profiling with perf}
\begin{verbatim}
$ perf record ./pthread_example

$ ls perf.data
perf.data

$ perf report
\end{verbatim}
\end{frame}

\subsection{Code coverage}

\begin{frame}[fragile]
\frametitle{Code coverage with gcov}
\begin{verbatim}
$ gcc -Wall -fprofile-arcs -ftest-coverage -o gcov_test gcov_test.c

$ ./gcov_test

$ gcov gcov_test.c 
File 'gcov_test.c'
Lines executed:100.00% of 9
gcov_test.c:creating 'gcov_test.c.gcov'

\end{verbatim}
\end{frame}

\begin{frame}[fragile]
\frametitle{Code coverage with gcov}
\begin{verbatim}
$ cat gcov_test.c.gcov
[...]
        1:    3:int main (void)
        -:    4:{
        -:    5:        int i;
        -:    6:
      100:    7:        for (i = 1; i < 100; i++) {
       99:    8:                if (i % 2 == 0)
       49:    9:                        printf("A");
       99:   10:                if (i % 3 == 0)
       33:   11:                        printf("B");
       99:   12:                if (i % 4 == 0)
       24:   13:                        printf("C");
        -:   14:        }
[...]
\end{verbatim}
\end{frame}
\input{tailpres}