|
5 | 5 | "metadata": {},
|
6 | 6 | "source": [
|
7 | 7 | "# Introduction to DataFrames\n",
|
8 |
| - "**[Bogumił Kamiński](http://bogumilkaminski.pl/about/), May 23, 2018**" |
| 8 | + "**[Bogumił Kamiński](http://bogumilkaminski.pl/about/), July 25, 2018**" |
9 | 9 | ]
|
10 | 10 | },
|
11 | 11 | {
|
12 | 12 | "cell_type": "code",
|
13 | 13 | "execution_count": 1,
|
14 |
| - "metadata": { |
15 |
| - "collapsed": true |
16 |
| - }, |
| 14 | + "metadata": {}, |
17 | 15 | "outputs": [],
|
18 | 16 | "source": [
|
19 | 17 | "using DataFrames # load package"
|
|
122 | 120 | "metadata": {},
|
123 | 121 | "outputs": [
|
124 | 122 | {
|
125 |
| - "name": "stdout", |
126 |
| - "output_type": "stream", |
127 |
| - "text": [ |
128 |
| - "A\n", |
129 |
| - "Summary Stats:\n", |
130 |
| - "Mean: 1.500000\n", |
131 |
| - "Minimum: 1.000000\n", |
132 |
| - "1st Quartile: 1.250000\n", |
133 |
| - "Median: 1.500000\n", |
134 |
| - "3rd Quartile: 1.750000\n", |
135 |
| - "Maximum: 2.000000\n", |
136 |
| - "Length: 2\n", |
137 |
| - "Type: Int64\n", |
138 |
| - "\n", |
139 |
| - "B\n", |
140 |
| - "Summary Stats:\n", |
141 |
| - "Mean: 1.000000\n", |
142 |
| - "Minimum: 1.000000\n", |
143 |
| - "1st Quartile: 1.000000\n", |
144 |
| - "Median: 1.000000\n", |
145 |
| - "3rd Quartile: 1.000000\n", |
146 |
| - "Maximum: 1.000000\n", |
147 |
| - "Length: 2\n", |
148 |
| - "Type: Union{Float64, Missings.Missing}\n", |
149 |
| - "Number Missing: 1\n", |
150 |
| - "% Missing: 50.000000\n", |
151 |
| - "\n", |
152 |
| - "C\n", |
153 |
| - "Summary Stats:\n", |
154 |
| - "Length: 2\n", |
155 |
| - "Type: String\n", |
156 |
| - "Number Unique: 2\n", |
157 |
| - "\n" |
158 |
| - ] |
| 123 | + "data": { |
| 124 | + "text/html": [ |
| 125 | + "<table class=\"data-frame\"><thead><tr><th></th><th>variable</th><th>mean</th><th>min</th><th>median</th><th>max</th><th>nunique</th><th>nmissing</th><th>eltype</th></tr></thead><tbody><tr><th>1</th><td>A</td><td>1.5</td><td>1</td><td>1.5</td><td>2</td><td></td><td></td><td>Int64</td></tr><tr><th>2</th><td>B</td><td>1.0</td><td>1.0</td><td>1.0</td><td>1.0</td><td></td><td>1</td><td>Float64</td></tr><tr><th>3</th><td>C</td><td></td><td>a</td><td></td><td>b</td><td>2</td><td></td><td>String</td></tr></tbody></table>" |
| 126 | + ], |
| 127 | + "text/plain": [ |
| 128 | + "3×8 DataFrames.DataFrame\n", |
| 129 | + "│ Row │ variable │ mean │ min │ median │ max │ nunique │ nmissing │ eltype │\n", |
| 130 | + "├─────┼──────────┼──────┼─────┼────────┼─────┼─────────┼──────────┼─────────┤\n", |
| 131 | + "│ 1 │ A │ 1.5 │ 1 │ 1.5 │ 2 │ │ │ Int64 │\n", |
| 132 | + "│ 2 │ B │ 1.0 │ 1.0 │ 1.0 │ 1.0 │ │ 1 │ Float64 │\n", |
| 133 | + "│ 3 │ C │ │ a │ │ b │ 2 │ │ String │" |
| 134 | + ] |
| 135 | + }, |
| 136 | + "execution_count": 5, |
| 137 | + "metadata": {}, |
| 138 | + "output_type": "execute_result" |
159 | 139 | }
|
160 | 140 | ],
|
161 | 141 | "source": [
|
162 | 142 | "describe(x)"
|
163 | 143 | ]
|
164 | 144 | },
|
165 |
| - { |
166 |
| - "cell_type": "markdown", |
167 |
| - "metadata": {}, |
168 |
| - "source": [ |
169 |
| - "Use `showcols` to get informaton about columns stored in a DataFrame." |
170 |
| - ] |
171 |
| - }, |
172 |
| - { |
173 |
| - "cell_type": "code", |
174 |
| - "execution_count": 6, |
175 |
| - "metadata": {}, |
176 |
| - "outputs": [ |
177 |
| - { |
178 |
| - "name": "stdout", |
179 |
| - "output_type": "stream", |
180 |
| - "text": [ |
181 |
| - "2×3 DataFrames.DataFrame\n", |
182 |
| - "│ Col # │ Name │ Eltype │ Missing │ Values │\n", |
183 |
| - "├───────┼──────┼──────────────────────────────────┼─────────┼─────────────────┤\n", |
184 |
| - "│ 1 │ A │ Int64 │ 0 │ 1 … 2 │\n", |
185 |
| - "│ 2 │ B │ Union{Float64, Missings.Missing} │ 1 │ 1.0 … missing │\n", |
186 |
| - "│ 3 │ C │ String │ 0 │ a … b │" |
187 |
| - ] |
188 |
| - } |
189 |
| - ], |
190 |
| - "source": [ |
191 |
| - "showcols(x)" |
192 |
| - ] |
193 |
| - }, |
194 | 145 | {
|
195 | 146 | "cell_type": "markdown",
|
196 | 147 | "metadata": {},
|
|
200 | 151 | },
|
201 | 152 | {
|
202 | 153 | "cell_type": "code",
|
203 |
| - "execution_count": 7, |
| 154 | + "execution_count": 6, |
204 | 155 | "metadata": {},
|
205 | 156 | "outputs": [
|
206 | 157 | {
|
|
212 | 163 | " :C"
|
213 | 164 | ]
|
214 | 165 | },
|
215 |
| - "execution_count": 7, |
| 166 | + "execution_count": 6, |
216 | 167 | "metadata": {},
|
217 | 168 | "output_type": "execute_result"
|
218 | 169 | }
|
|
221 | 172 | "names(x)"
|
222 | 173 | ]
|
223 | 174 | },
|
| 175 | + { |
| 176 | + "cell_type": "markdown", |
| 177 | + "metadata": {}, |
| 178 | + "source": [ |
| 179 | + "Future tip: In Julia 0.7 `propertynames` is also supported." |
| 180 | + ] |
| 181 | + }, |
224 | 182 | {
|
225 | 183 | "cell_type": "markdown",
|
226 | 184 | "metadata": {},
|
|
230 | 188 | },
|
231 | 189 | {
|
232 | 190 | "cell_type": "code",
|
233 |
| - "execution_count": 8, |
| 191 | + "execution_count": 7, |
234 | 192 | "metadata": {},
|
235 | 193 | "outputs": [
|
236 | 194 | {
|
|
242 | 200 | " String "
|
243 | 201 | ]
|
244 | 202 | },
|
245 |
| - "execution_count": 8, |
| 203 | + "execution_count": 7, |
246 | 204 | "metadata": {},
|
247 | 205 | "output_type": "execute_result"
|
248 | 206 | }
|
|
260 | 218 | },
|
261 | 219 | {
|
262 | 220 | "cell_type": "code",
|
263 |
| - "execution_count": 9, |
264 |
| - "metadata": { |
265 |
| - "collapsed": true |
266 |
| - }, |
| 221 | + "execution_count": 8, |
| 222 | + "metadata": {}, |
267 | 223 | "outputs": [],
|
268 | 224 | "source": [
|
269 | 225 | "y = DataFrame(rand(1:10, 1000, 10));"
|
|
278 | 234 | },
|
279 | 235 | {
|
280 | 236 | "cell_type": "code",
|
281 |
| - "execution_count": 10, |
| 237 | + "execution_count": 9, |
282 | 238 | "metadata": {},
|
283 | 239 | "outputs": [
|
284 | 240 | {
|
285 | 241 | "data": {
|
286 | 242 | "text/html": [
|
287 |
| - "<table class=\"data-frame\"><thead><tr><th></th><th>x1</th><th>x2</th><th>x3</th><th>x4</th><th>x5</th><th>x6</th><th>x7</th><th>x8</th><th>x9</th><th>x10</th></tr></thead><tbody><tr><th>1</th><td>8</td><td>6</td><td>1</td><td>2</td><td>7</td><td>10</td><td>5</td><td>1</td><td>5</td><td>10</td></tr><tr><th>2</th><td>8</td><td>9</td><td>6</td><td>6</td><td>10</td><td>4</td><td>9</td><td>3</td><td>10</td><td>9</td></tr><tr><th>3</th><td>5</td><td>1</td><td>4</td><td>3</td><td>10</td><td>5</td><td>1</td><td>10</td><td>5</td><td>9</td></tr><tr><th>4</th><td>2</td><td>9</td><td>2</td><td>2</td><td>5</td><td>7</td><td>7</td><td>9</td><td>9</td><td>5</td></tr><tr><th>5</th><td>4</td><td>8</td><td>4</td><td>10</td><td>8</td><td>5</td><td>1</td><td>2</td><td>1</td><td>10</td></tr><tr><th>6</th><td>8</td><td>6</td><td>6</td><td>8</td><td>3</td><td>3</td><td>3</td><td>6</td><td>8</td><td>6</td></tr></tbody></table>" |
| 243 | + "<table class=\"data-frame\"><thead><tr><th></th><th>x1</th><th>x2</th><th>x3</th><th>x4</th><th>x5</th><th>x6</th><th>x7</th><th>x8</th><th>x9</th><th>x10</th></tr></thead><tbody><tr><th>1</th><td>4</td><td>1</td><td>2</td><td>8</td><td>10</td><td>7</td><td>5</td><td>1</td><td>8</td><td>3</td></tr><tr><th>2</th><td>5</td><td>6</td><td>6</td><td>3</td><td>2</td><td>4</td><td>9</td><td>10</td><td>10</td><td>4</td></tr><tr><th>3</th><td>3</td><td>5</td><td>4</td><td>8</td><td>4</td><td>4</td><td>4</td><td>6</td><td>6</td><td>9</td></tr><tr><th>4</th><td>8</td><td>8</td><td>2</td><td>6</td><td>7</td><td>3</td><td>6</td><td>4</td><td>10</td><td>6</td></tr><tr><th>5</th><td>1</td><td>2</td><td>6</td><td>10</td><td>4</td><td>7</td><td>7</td><td>7</td><td>5</td><td>4</td></tr><tr><th>6</th><td>5</td><td>7</td><td>9</td><td>10</td><td>5</td><td>1</td><td>6</td><td>2</td><td>3</td><td>4</td></tr></tbody></table>" |
288 | 244 | ],
|
289 | 245 | "text/plain": [
|
290 | 246 | "6×10 DataFrames.DataFrame\n",
|
291 | 247 | "│ Row │ x1 │ x2 │ x3 │ x4 │ x5 │ x6 │ x7 │ x8 │ x9 │ x10 │\n",
|
292 | 248 | "├─────┼────┼────┼────┼────┼────┼────┼────┼────┼────┼─────┤\n",
|
293 |
| - "│ 1 │ 8 │ 6 │ 1 │ 2 │ 7 │ 10 │ 5 │ 1 │ 5 │ 10 │\n", |
294 |
| - "│ 2 │ 8 │ 9 │ 6 │ 6 │ 10 │ 4 │ 9 │ 3 │ 10 │ 9 │\n", |
295 |
| - "│ 3 │ 5 │ 1 │ 4 │ 3 │ 10 │ 5 │ 1 │ 10 │ 5 │ 9 │\n", |
296 |
| - "│ 4 │ 2 │ 9 │ 2 │ 2 │ 5 │ 7 │ 7 │ 9 │ 9 │ 5 │\n", |
297 |
| - "│ 5 │ 4 │ 8 │ 4 │ 10 │ 8 │ 5 │ 1 │ 2 │ 1 │ 10 │\n", |
298 |
| - "│ 6 │ 8 │ 6 │ 6 │ 8 │ 3 │ 3 │ 3 │ 6 │ 8 │ 6 │" |
| 249 | + "│ 1 │ 4 │ 1 │ 2 │ 8 │ 10 │ 7 │ 5 │ 1 │ 8 │ 3 │\n", |
| 250 | + "│ 2 │ 5 │ 6 │ 6 │ 3 │ 2 │ 4 │ 9 │ 10 │ 10 │ 4 │\n", |
| 251 | + "│ 3 │ 3 │ 5 │ 4 │ 8 │ 4 │ 4 │ 4 │ 6 │ 6 │ 9 │\n", |
| 252 | + "│ 4 │ 8 │ 8 │ 2 │ 6 │ 7 │ 3 │ 6 │ 4 │ 10 │ 6 │\n", |
| 253 | + "│ 5 │ 1 │ 2 │ 6 │ 10 │ 4 │ 7 │ 7 │ 7 │ 5 │ 4 │\n", |
| 254 | + "│ 6 │ 5 │ 7 │ 9 │ 10 │ 5 │ 1 │ 6 │ 2 │ 3 │ 4 │" |
299 | 255 | ]
|
300 | 256 | },
|
301 |
| - "execution_count": 10, |
| 257 | + "execution_count": 9, |
302 | 258 | "metadata": {},
|
303 | 259 | "output_type": "execute_result"
|
304 | 260 | }
|
|
316 | 272 | },
|
317 | 273 | {
|
318 | 274 | "cell_type": "code",
|
319 |
| - "execution_count": 11, |
| 275 | + "execution_count": 10, |
320 | 276 | "metadata": {},
|
321 | 277 | "outputs": [
|
322 | 278 | {
|
323 | 279 | "data": {
|
324 | 280 | "text/html": [
|
325 |
| - "<table class=\"data-frame\"><thead><tr><th></th><th>x1</th><th>x2</th><th>x3</th><th>x4</th><th>x5</th><th>x6</th><th>x7</th><th>x8</th><th>x9</th><th>x10</th></tr></thead><tbody><tr><th>1</th><td>1</td><td>10</td><td>5</td><td>7</td><td>8</td><td>6</td><td>1</td><td>2</td><td>3</td><td>6</td></tr><tr><th>2</th><td>1</td><td>1</td><td>2</td><td>7</td><td>9</td><td>7</td><td>3</td><td>3</td><td>3</td><td>3</td></tr><tr><th>3</th><td>4</td><td>6</td><td>1</td><td>2</td><td>1</td><td>1</td><td>4</td><td>7</td><td>9</td><td>4</td></tr></tbody></table>" |
| 281 | + "<table class=\"data-frame\"><thead><tr><th></th><th>x1</th><th>x2</th><th>x3</th><th>x4</th><th>x5</th><th>x6</th><th>x7</th><th>x8</th><th>x9</th><th>x10</th></tr></thead><tbody><tr><th>1</th><td>2</td><td>8</td><td>3</td><td>7</td><td>6</td><td>4</td><td>3</td><td>8</td><td>5</td><td>4</td></tr><tr><th>2</th><td>7</td><td>1</td><td>5</td><td>5</td><td>3</td><td>6</td><td>1</td><td>8</td><td>5</td><td>1</td></tr><tr><th>3</th><td>8</td><td>1</td><td>10</td><td>9</td><td>4</td><td>2</td><td>10</td><td>2</td><td>6</td><td>6</td></tr></tbody></table>" |
326 | 282 | ],
|
327 | 283 | "text/plain": [
|
328 | 284 | "3×10 DataFrames.DataFrame\n",
|
329 | 285 | "│ Row │ x1 │ x2 │ x3 │ x4 │ x5 │ x6 │ x7 │ x8 │ x9 │ x10 │\n",
|
330 | 286 | "├─────┼────┼────┼────┼────┼────┼────┼────┼────┼────┼─────┤\n",
|
331 |
| - "│ 1 │ 1 │ 10 │ 5 │ 7 │ 8 │ 6 │ 1 │ 2 │ 3 │ 6 │\n", |
332 |
| - "│ 2 │ 1 │ 1 │ 2 │ 7 │ 9 │ 7 │ 3 │ 3 │ 3 │ 3 │\n", |
333 |
| - "│ 3 │ 4 │ 6 │ 1 │ 2 │ 1 │ 1 │ 4 │ 7 │ 9 │ 4 │" |
| 287 | + "│ 1 │ 2 │ 8 │ 3 │ 7 │ 6 │ 4 │ 3 │ 8 │ 5 │ 4 │\n", |
| 288 | + "│ 2 │ 7 │ 1 │ 5 │ 5 │ 3 │ 6 │ 1 │ 8 │ 5 │ 1 │\n", |
| 289 | + "│ 3 │ 8 │ 1 │ 10 │ 9 │ 4 │ 2 │ 10 │ 2 │ 6 │ 6 │" |
334 | 290 | ]
|
335 | 291 | },
|
336 |
| - "execution_count": 11, |
| 292 | + "execution_count": 10, |
337 | 293 | "metadata": {},
|
338 | 294 | "output_type": "execute_result"
|
339 | 295 | }
|
|
353 | 309 | },
|
354 | 310 | {
|
355 | 311 | "cell_type": "code",
|
356 |
| - "execution_count": 12, |
| 312 | + "execution_count": 11, |
357 | 313 | "metadata": {},
|
358 | 314 | "outputs": [
|
359 | 315 | {
|
|
362 | 318 | "([1, 2], [1, 2], [1, 2])"
|
363 | 319 | ]
|
364 | 320 | },
|
365 |
| - "execution_count": 12, |
| 321 | + "execution_count": 11, |
366 | 322 | "metadata": {},
|
367 | 323 | "output_type": "execute_result"
|
368 | 324 | }
|
|
371 | 327 | "x[1], x[:A], x[:, 1]"
|
372 | 328 | ]
|
373 | 329 | },
|
| 330 | + { |
| 331 | + "cell_type": "markdown", |
| 332 | + "metadata": {}, |
| 333 | + "source": [ |
| 334 | + "Future tip: In Julia 0.7 also accessing column using `x.A` syntax (`getproperty`/`setproperty!`) is supported." |
| 335 | + ] |
| 336 | + }, |
374 | 337 | {
|
375 | 338 | "cell_type": "markdown",
|
376 | 339 | "metadata": {},
|
|
380 | 343 | },
|
381 | 344 | {
|
382 | 345 | "cell_type": "code",
|
383 |
| - "execution_count": 13, |
| 346 | + "execution_count": 12, |
384 | 347 | "metadata": {},
|
385 | 348 | "outputs": [
|
386 | 349 | {
|
|
395 | 358 | "│ 1 │ 1 │ 1.0 │ a │"
|
396 | 359 | ]
|
397 | 360 | },
|
398 |
| - "execution_count": 13, |
| 361 | + "execution_count": 12, |
399 | 362 | "metadata": {},
|
400 | 363 | "output_type": "execute_result"
|
401 | 364 | }
|
|
413 | 376 | },
|
414 | 377 | {
|
415 | 378 | "cell_type": "code",
|
416 |
| - "execution_count": 14, |
| 379 | + "execution_count": 13, |
417 | 380 | "metadata": {},
|
418 | 381 | "outputs": [
|
419 | 382 | {
|
|
422 | 385 | "1"
|
423 | 386 | ]
|
424 | 387 | },
|
425 |
| - "execution_count": 14, |
| 388 | + "execution_count": 13, |
426 | 389 | "metadata": {},
|
427 | 390 | "output_type": "execute_result"
|
428 | 391 | }
|
|
440 | 403 | },
|
441 | 404 | {
|
442 | 405 | "cell_type": "code",
|
443 |
| - "execution_count": 15, |
| 406 | + "execution_count": 14, |
444 | 407 | "metadata": {},
|
445 | 408 | "outputs": [
|
446 | 409 | {
|
|
456 | 419 | "│ 2 │ 1 │ 1.0 │ b │"
|
457 | 420 | ]
|
458 | 421 | },
|
459 |
| - "execution_count": 15, |
| 422 | + "execution_count": 14, |
460 | 423 | "metadata": {},
|
461 | 424 | "output_type": "execute_result"
|
462 | 425 | }
|
|
475 | 438 | },
|
476 | 439 | {
|
477 | 440 | "cell_type": "code",
|
478 |
| - "execution_count": 16, |
| 441 | + "execution_count": 15, |
479 | 442 | "metadata": {},
|
480 | 443 | "outputs": [
|
481 | 444 | {
|
|
491 | 454 | "│ 2 │ 2 │ 2.0 │ b │"
|
492 | 455 | ]
|
493 | 456 | },
|
494 |
| - "execution_count": 16, |
| 457 | + "execution_count": 15, |
495 | 458 | "metadata": {},
|
496 | 459 | "output_type": "execute_result"
|
497 | 460 | }
|
|
510 | 473 | },
|
511 | 474 | {
|
512 | 475 | "cell_type": "code",
|
513 |
| - "execution_count": 17, |
| 476 | + "execution_count": 16, |
514 | 477 | "metadata": {},
|
515 | 478 | "outputs": [
|
516 | 479 | {
|
|
526 | 489 | "│ 2 │ 7 │ 8.0 │ b │"
|
527 | 490 | ]
|
528 | 491 | },
|
529 |
| - "execution_count": 17, |
| 492 | + "execution_count": 16, |
530 | 493 | "metadata": {},
|
531 | 494 | "output_type": "execute_result"
|
532 | 495 | }
|
|
539 | 502 | ],
|
540 | 503 | "metadata": {
|
541 | 504 | "kernelspec": {
|
542 |
| - "display_name": "Julia 0.6.0", |
| 505 | + "display_name": "Julia 0.6.2", |
543 | 506 | "language": "julia",
|
544 | 507 | "name": "julia-0.6"
|
545 | 508 | },
|
546 | 509 | "language_info": {
|
547 | 510 | "file_extension": ".jl",
|
548 | 511 | "mimetype": "application/julia",
|
549 | 512 | "name": "julia",
|
550 |
| - "version": "0.6.0" |
| 513 | + "version": "0.6.3" |
551 | 514 | }
|
552 | 515 | },
|
553 | 516 | "nbformat": 4,
|
|
0 commit comments