MRN-Code.github.io/index.html at master · MRN-Code/MRN-Code.github.io · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
<!DOCTYPE html>
<html>
<head>
  <meta charset="utf-8">

  <title>MRN-Code</title>
  <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">
  <meta name="description" content="The Neuroinformatics team at the Mind Research Network develops the Collaborative Informatics and Neuroimaging Suite (COINS): a web application for managing neuroimaging data. This site will document">
<meta property="og:type" content="website">
<meta property="og:title" content="MRN-Code">
<meta property="og:url" content="https://MRN-Code.github.io/index.html">
<meta property="og:site_name" content="MRN-Code">
<meta property="og:description" content="The Neuroinformatics team at the Mind Research Network develops the Collaborative Informatics and Neuroimaging Suite (COINS): a web application for managing neuroimaging data. This site will document">
<meta name="twitter:card" content="summary">
<meta name="twitter:title" content="MRN-Code">
<meta name="twitter:description" content="The Neuroinformatics team at the Mind Research Network develops the Collaborative Informatics and Neuroimaging Suite (COINS): a web application for managing neuroimaging data. This site will document">

    <link rel="alternative" href="/atom.xml" title="MRN-Code" type="application/atom+xml">


    <link rel="shortcut icon" type="image/x-icon" href="/favicon.png">

  <link href="//fonts.googleapis.com/css?family=Inconsolata:400,700|Open+Sans:700,400" rel="stylesheet" type="text/css">
  <link rel="stylesheet" href="/css/style.css">


</head>
<body>
  <div id="container">
    <div id="wrap">
      <div id="header">
  <div id="header-outer" class="outer">
    <div id="header-inner" class="inner">
      <div id="header-title">
        <h1 id="logo-wrap">
          <a href="/" id="logo">MRN-Code

              <span id="subtitle">Technical musings from the MRN NI team</span>

          </a>
        </h1>
      </div>
      <nav id="sub-nav">

          <a id="nav-rss-link" class="nav-icon" href="/atom.xml" title="RSS Feed"></a>

        <a id="nav-search-btn" class="nav-icon" title="Search"></a>
      </nav>
      <nav id="main-nav">
        <a id="main-nav-toggle" class="nav-icon"></a>

          <a class="main-nav-link" href="/">Home</a>

          <a class="main-nav-link" href="/archives">Archives</a>

      </nav>
      <div id="search-form-wrap">
        <form action="//google.com/search" method="get" accept-charset="UTF-8" class="search-form"><input type="search" name="q" results="0" class="search-form-input" placeholder="Search"><button type="submit" class="search-form-submit">&#xF002;</button><input type="hidden" name="sitesearch" value="https://MRN-Code.github.io"></form>
      </div>
    </div>
  </div>
</div>
      <div class="outer">
        <section id="main">

    <article id="post-Resizing-a-Numeric-Column-in-a-PostgreSQL-Table-without-Changing-Data" class="article article-type-post" itemscope itemprop="blogPost">
  <div class="article-meta">
    <a href="/2016/09/14/Resizing-a-Numeric-Column-in-a-PostgreSQL-Table-without-Changing-Data/" class="article-date">
  <time datetime="2016-09-14T18:02:59.000Z" itemprop="datePublished">2016-09-14</time>
</a>

  </div>
  <div class="article-inner">


      <header class="article-header">


    <h1 itemprop="name">
      <a class="article-title" href="/2016/09/14/Resizing-a-Numeric-Column-in-a-PostgreSQL-Table-without-Changing-Data/">Resizing a Numeric Column in a PostgreSQL Table without Changing Data</a>
    </h1>


    <a href="/author/dlandis"><h4>Drew Landis</h4></a>


      </header>

    <div class="article-entry" itemprop="articleBody">

        <p>While using PostgreSQL, you may find yourself in a situation where you have a column whose data type is now too small and the length needs to be increased.  Updating a column type in PostgreSQL can, at times, be nothing short of very painful.</p>
<p>Let’s say you have a column of type <code>varchar(25)</code>.  When you first created the column, you decided there was absolutely no way you could ever need more than 25 characters in that column.  Fast forward months or years later and you now realize that column requires 40 characters.  This would be fine, except that (A) the table is huge - which means it could take a significant amount of time for this command to finish - and (B) there are views and rules that depend on that column - which generates errors when you try the standard <code>ALTER TABLE my_table ALTER COLUMN my_column TYPE varchar(40);</code>.  There is no good solution to (A) other than waiting.  Which may or may not be allowed based on your business needs.  The solution to (B) is painfully manual.  You need to drop all dependent views and rules (e.g. Primary Key, etc), make the column data type change, then recreate all dependent views and rules.  This sucks.</p>
<p>Luckily, the folks over at <a href="http://sniptools.com" target="_blank" rel="external">sniptools.com</a> solved this exact problem in this <a href="http://sniptools.com/databases/resize-a-column-in-a-postgresql-table-without-changing-data" target="_blank" rel="external">post</a>.  I won’t go into the details (you should look at the post directly), but suffice it to say, I have used their solution multiple times on a production database and it has worked amazingly well.</p>
<p>Great.  Problem solved.  …Except that now we have the exact same problem with columns of type <code>numeric(precision, scale)</code>.  I have a column of type <code>numeric(2,0)</code> and I really need it to be <code>numeric(4,0)</code>.  I’m running into all of the same problems as the <code>varchar</code> issue above.</p>
<p>Thankfully, there is a very similar solution!  To demonstrate this, let’s start by creating a fake table of varying numeric types:<br><figure class="highlight lsl"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div><div class="line">6</div><div class="line">7</div><div class="line">8</div><div class="line">9</div><div class="line">10</div><div class="line">11</div><div class="line">12</div></pre></td><td class="code"><pre><div class="line">CREATE TABLE my_numeric_test(</div><div class="line">   numeric_one_zero numeric(<span class="number">1</span>,<span class="number">0</span>),</div><div class="line">   numeric_one_one numeric(<span class="number">1</span>,<span class="number">1</span>),</div><div class="line">   numeric_two_zero numeric(<span class="number">2</span>,<span class="number">0</span>),</div><div class="line">   numeric_two_one numeric(<span class="number">2</span>,<span class="number">1</span>),</div><div class="line">   numeric_three_zero numeric(<span class="number">3</span>,<span class="number">0</span>),</div><div class="line">   numeric_three_one numeric(<span class="number">3</span>,<span class="number">1</span>),</div><div class="line">   numeric_four_zero numeric(<span class="number">4</span>,<span class="number">0</span>),</div><div class="line">   numeric_four_one numeric(<span class="number">4</span>,<span class="number">1</span>),</div><div class="line">   numeric_fortyfive_fifteen numeric(<span class="number">45</span>,<span class="number">15</span>),</div><div class="line">   numeric_sixhundredeightythree_threehundred numeric(<span class="number">683</span>,<span class="number">300</span>)</div><div class="line">);</div></pre></td></tr></table></figure></p>
<p>Next, inspect the <code>atttypmod</code> of the different columns:<br><figure class="highlight sql"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div><div class="line">6</div></pre></td><td class="code"><pre><div class="line"><span class="keyword">SELECT</span> atttypmod, attname</div><div class="line"><span class="keyword">FROM</span> pg_attribute</div><div class="line"><span class="keyword">WHERE</span> <span class="number">1</span> = <span class="number">1</span></div><div class="line"><span class="keyword">AND</span> attrelid = <span class="string">'my_numeric_test'</span>::regclass</div><div class="line"><span class="keyword">AND</span> attname <span class="keyword">LIKE</span> (<span class="string">'numeric_%'</span>)</div><div class="line"><span class="keyword">ORDER</span> <span class="keyword">BY</span> atttypmod;</div></pre></td></tr></table></figure></p>
<p>Notice, there is a pattern here:<br><code>atttypmod</code> = <code>precision</code> * 65,536 + <code>scale</code> + 4</p>
<p>Let’s say we want to update column <code>numeric_four_zero</code> to have type <code>numeric(9,0)</code>.  A couple of tests:<br><figure class="highlight sql"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div></pre></td><td class="code"><pre><div class="line"><span class="keyword">INSERT</span> <span class="keyword">INTO</span> my_numeric_test (numeric_four_zero) <span class="keyword">VALUES</span> (<span class="number">1234</span>);  <span class="comment">-- works!</span></div><div class="line"><span class="keyword">INSERT</span> <span class="keyword">INTO</span> my_numeric_test (numeric_four_zero) <span class="keyword">VALUES</span> (<span class="number">123456789</span>);  <span class="comment">-- ERROR:  numeric field overflow</span></div></pre></td></tr></table></figure></p>
<p>Using the algorithm from above, for <code>numeric(9,0)</code> we see <code>atttypmod</code> = 9 * 65,536 + 0 + 4 = 589,828.  Here is how we can update the column type:<br><figure class="highlight sql"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div><div class="line">6</div><div class="line">7</div><div class="line">8</div></pre></td><td class="code"><pre><div class="line"><span class="comment">-- Regular way to update column type (don't use this for this example...)</span></div><div class="line"><span class="comment">--ALTER TABLE my_numeric_test ALTER COLUMN numeric_four_zero TYPE numeric(9,0);</span></div><div class="line"></div><div class="line"><span class="comment">-- Hack way to update column type</span></div><div class="line"><span class="keyword">UPDATE</span> pg_attribute</div><div class="line"><span class="keyword">SET</span> atttypmod = <span class="number">589828</span></div><div class="line"><span class="keyword">WHERE</span> attrelid = <span class="string">'my_numeric_test'</span>::regclass</div><div class="line"><span class="keyword">AND</span> attname = <span class="string">'numeric_four_zero'</span>;</div></pre></td></tr></table></figure></p>
<p>We can run the same test as above and see that it works:<br><figure class="highlight sql"><table><tr><td class="gutter"><pre><div class="line">1</div></pre></td><td class="code"><pre><div class="line"><span class="keyword">INSERT</span> <span class="keyword">INTO</span> my_numeric_test (numeric_four_zero) <span class="keyword">VALUES</span> (<span class="number">123456789</span>);  <span class="comment">-- works!</span></div></pre></td></tr></table></figure></p>
<p>We can also select the column from the table and see that the column type has changed:<br><figure class="highlight sql"><table><tr><td class="gutter"><pre><div class="line">1</div></pre></td><td class="code"><pre><div class="line"><span class="keyword">SELECT</span> * <span class="keyword">FROM</span> my_numeric_test;</div></pre></td></tr></table></figure></p>
<p>Finally, cleanup:<br><figure class="highlight sql"><table><tr><td class="gutter"><pre><div class="line">1</div></pre></td><td class="code"><pre><div class="line"><span class="keyword">DROP</span> <span class="keyword">TABLE</span> my_numeric_test;</div></pre></td></tr></table></figure></p>
<p>Warning: I’m not sure if there are any side effects of doing this on your own code.  I <em>think</em> it should work, but give no guarantees implicitly nor explicitly that it will not turn your database into a smoking, ruined heap.</p>
<p>Again, many thanks to this sniptools <a href="http://sniptools.com/databases/resize-a-column-in-a-postgresql-table-without-changing-data" target="_blank" rel="external">post</a>.  Without them, it would not have been possible.</p>
<p>Hope that helps!</p>
<h4 id="Update-Sept-19-2016"><a href="#Update-Sept-19-2016" class="headerlink" title="Update (Sept 19, 2016):"></a>Update (Sept 19, 2016):</h4><p>I was too worried about potential side effects of using this hack and opted to <em>not</em> use it on a production environment.  Instead, I dropped 80 views, updated about 65 column data types, and then recreated the 80 views.  It required lots more work, but this way, I’m more confident in the final product.  As stated before, if you do use this hack, do so at your own risk.</p>


    </div>
    <footer class="article-footer">
      <a data-url="https://MRN-Code.github.io/2016/09/14/Resizing-a-Numeric-Column-in-a-PostgreSQL-Table-without-Changing-Data/" data-id="citahhnij0008zhw73nq716f6" class="article-share-link">Share</a>


  <ul class="article-tag-list"><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/PostgreSQL-Postgres-SQL-Databases/">PostgreSQL, Postgres, SQL, Databases</a></li></ul>

    </footer>
  </div>

</article>


    <article id="post-SQL-Lateral-Joins" class="article article-type-post" itemscope itemprop="blogPost">
  <div class="article-meta">
    <a href="/2016/07/12/SQL-Lateral-Joins/" class="article-date">
  <time datetime="2016-07-12T16:44:40.000Z" itemprop="datePublished">2016-07-12</time>
</a>

  </div>
  <div class="article-inner">


      <header class="article-header">


    <h1 itemprop="name">
      <a class="article-title" href="/2016/07/12/SQL-Lateral-Joins/">SQL Lateral Joins</a>
    </h1>


    <a href="/author/dlandis"><h4>Drew Landis</h4></a>


      </header>

    <div class="article-entry" itemprop="articleBody">

        <p>Recently, I heard about a relatively new (as of 9.3) feature in Postgres called a LATERAL JOIN.  A LATERAL JOIN enables a subquery in the <em>from</em> part of a clause to reference columns from preceding items in the <em>from</em> list (quoted from <a href="https://wiki.postgresql.org/wiki/What%27s_new_in_PostgreSQL_9.3#LATERAL_JOIN" target="_blank" rel="external">here</a>).  Without LATERAL, each subquery is evaluated independently and so cannot cross-reference any other <em>from</em> item (quoted from <a href="https://www.postgresql.org/docs/9.4/static/queries-table-expressions.html" target="_blank" rel="external">here</a>).</p>
<p>I’ve been looking for a good way to use this feature in our internal code and I finally found one.  In our specific instance, using a LATERAL JOIN sped up a query by an order of magnitude!  However, our example was relatively complex and specific to us, so here is a generic (very contrived) example.</p>
<h2 id="Setup"><a href="#Setup" class="headerlink" title="Setup:"></a>Setup:</h2><figure class="highlight sql"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div><div class="line">6</div><div class="line">7</div><div class="line">8</div><div class="line">9</div><div class="line">10</div><div class="line">11</div><div class="line">12</div><div class="line">13</div><div class="line">14</div><div class="line">15</div><div class="line">16</div><div class="line">17</div><div class="line">18</div><div class="line">19</div></pre></td><td class="code"><pre><div class="line"><span class="comment">-- Create a table with 10 million rows of 6 character random strings of numbers</span></div><div class="line"><span class="comment">-- takes about 30 sec to create</span></div><div class="line"><span class="keyword">CREATE</span> <span class="keyword">TABLE</span> rand_table <span class="keyword">AS</span></div><div class="line">   <span class="keyword">SELECT</span></div><div class="line">      <span class="keyword">id</span>,</div><div class="line">      <span class="keyword">LPAD</span>(<span class="keyword">FLOOR</span>(RANDOM()*<span class="number">1000000</span>)::<span class="built_in">text</span>, <span class="number">6</span>, <span class="string">'0'</span>) <span class="keyword">AS</span> rand_string</div><div class="line">   <span class="keyword">FROM</span> GENERATE_SERIES(<span class="number">1</span>,<span class="number">10000000</span>) <span class="keyword">id</span>;</div><div class="line"></div><div class="line"><span class="comment">-- Create table with 999,999 rows of 6 character strings of numbers (from 1 to 999,999)</span></div><div class="line"><span class="comment">-- takes about 1 sec to create</span></div><div class="line"><span class="keyword">CREATE</span> <span class="keyword">TABLE</span> series_table <span class="keyword">AS</span></div><div class="line">   <span class="keyword">SELECT</span></div><div class="line">      <span class="keyword">id</span>,</div><div class="line">      <span class="keyword">LPAD</span>(<span class="keyword">id</span>::<span class="built_in">text</span>, <span class="number">6</span>, <span class="string">'0'</span>) <span class="keyword">AS</span> series</div><div class="line">   <span class="keyword">FROM</span> GENERATE_SERIES(<span class="number">1</span>,<span class="number">999999</span>) <span class="keyword">id</span></div><div class="line"></div><div class="line"><span class="comment">-- Vacuum analyze both tables</span></div><div class="line">VACUUM <span class="keyword">ANALYZE</span> rand_table;</div><div class="line">VACUUM <span class="keyword">ANALYZE</span> series_table;</div></pre></td></tr></table></figure>
<h2 id="Test"><a href="#Test" class="headerlink" title="Test:"></a>Test:</h2><p>Let’s count how many instances of <code>&#39;010170&#39;</code> there are in <code>rand_table</code>.  Then we’ll LEFT JOIN that to the <code>series_table</code>.  Like, I said, super contrived…<br><figure class="highlight vbnet"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div><div class="line">6</div><div class="line">7</div><div class="line">8</div><div class="line">9</div><div class="line">10</div><div class="line">11</div><div class="line">12</div><div class="line">13</div><div class="line">14</div><div class="line">15</div><div class="line">16</div><div class="line">17</div><div class="line">18</div><div class="line">19</div><div class="line">20</div><div class="line">21</div><div class="line">22</div><div class="line">23</div><div class="line">24</div><div class="line">25</div><div class="line">26</div><div class="line">27</div><div class="line">28</div><div class="line">29</div></pre></td><td class="code"><pre><div class="line"><span class="keyword">SELECT</span></div><div class="line">   st.id <span class="keyword">AS</span> series_id,</div><div class="line">   st.series,</div><div class="line">   rt.count</div><div class="line"><span class="keyword">FROM</span> series_table st</div><div class="line"></div><div class="line">-- Fast query (<span class="keyword">using</span> a LATERAL <span class="keyword">JOIN</span>) ~<span class="number">2</span> seconds</div><div class="line">/*</div><div class="line">LEFT <span class="keyword">JOIN</span> LATERAL (</div><div class="line">   <span class="keyword">SELECT</span></div><div class="line">      rand_string,</div><div class="line">      COUNT(rand_string)</div><div class="line">   <span class="keyword">FROM</span> rand_table</div><div class="line">   <span class="keyword">WHERE</span> rand_string = st.series -- this <span class="keyword">is</span> the lateral magic!</div><div class="line">   <span class="keyword">GROUP</span> <span class="keyword">BY</span> rand_string</div><div class="line">) rt <span class="keyword">ON</span> st.series = rt.rand_string</div><div class="line">*/</div><div class="line"></div><div class="line">-- Slow query (<span class="keyword">using</span> a regular <span class="keyword">JOIN</span>) ~<span class="number">10</span> seconds</div><div class="line">/*</div><div class="line">LEFT <span class="keyword">JOIN</span> (</div><div class="line">   <span class="keyword">SELECT</span></div><div class="line">      rand_string,</div><div class="line">      COUNT(rand_string)</div><div class="line">   <span class="keyword">FROM</span> rand_table</div><div class="line">   <span class="keyword">GROUP</span> <span class="keyword">BY</span> rand_string</div><div class="line">) rt <span class="keyword">ON</span> st.series = rt.rand_string</div><div class="line">*/</div><div class="line"><span class="keyword">WHERE</span> st.id = <span class="number">10170</span></div></pre></td></tr></table></figure></p>
<h2 id="Clean-Up"><a href="#Clean-Up" class="headerlink" title="Clean Up:"></a>Clean Up:</h2><figure class="highlight sql"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div></pre></td><td class="code"><pre><div class="line"><span class="keyword">DROP</span> <span class="keyword">TABLE</span> rand_table;</div><div class="line"><span class="keyword">DROP</span> <span class="keyword">TABLE</span> series_table;</div></pre></td></tr></table></figure>
<h2 id="Results"><a href="#Results" class="headerlink" title="Results:"></a>Results:</h2><p>The LATERAL JOIN returns results in about 2 seconds, while the regular JOIN takes about 10 seconds.</p>
<p>In the slow (LEFT JOIN) query, the subquery is forced to return <em>all</em> data, and then JOIN on that entire result set.  It takes a long time to grab and count the entire result set of the subquery, which considerably increases the overall query time.  In the fast (LEFT JOIN LATERAL) query, the subquery is able to reference the st.series column from a preceding item, and pare down the subquery result set to only include data that will ultimately be JOINed upon.</p>
<p>As stated, this example is super contrived and there are definitely other ways to rewrite and improve it, but hopefully this will give you the gist of how a LATERAL JOIN should look and function.  Also note, this query speed only improved by about 5 times, however for our internal query, we were able to improve query time by an entire order of magnitude.  Depending where you use LATERAL JOINs, some queries will improve more than others.</p>
<p>Hope that helps!</p>
<h4 id="Update-July-15-2016"><a href="#Update-July-15-2016" class="headerlink" title="Update (July 15, 2016):"></a>Update (July 15, 2016):</h4><p>I updated the above test so that <code>rand_table</code> has 100 million rows of 7 character numeric strings, and <code>series_table</code> has 9,999,999 rows of 7 character numeric strings, then re-ran the test.  This time, the LATERAL JOIN finished in about 44 seconds and the regular JOIN finished in about 20 minutes and 45 seconds (–&gt; 1,245 seconds).  This means that the LATERAL JOIN completed the query 28 times faster than the regular JOIN!</p>


    </div>
    <footer class="article-footer">
      <a data-url="https://MRN-Code.github.io/2016/07/12/SQL-Lateral-Joins/" data-id="citahhnip000azhw7yj8kzavs" class="article-share-link">Share</a>


    </footer>
  </div>

</article>


    <article id="post-Monitoring-PostgreSQL-Replication-Lag-with-Monit" class="article article-type-post" itemscope itemprop="blogPost">
  <div class="article-meta">
    <a href="/2015/12/29/Monitoring-PostgreSQL-Replication-Lag-with-Monit/" class="article-date">
  <time datetime="2015-12-29T17:45:55.000Z" itemprop="datePublished">2015-12-29</time>
</a>

  </div>
  <div class="article-inner">


      <header class="article-header">


    <h1 itemprop="name">
      <a class="article-title" href="/2015/12/29/Monitoring-PostgreSQL-Replication-Lag-with-Monit/">Monitoring PostgreSQL Replication Lag with Monit</a>
    </h1>


    <a href="/author/dwood"><h4>Dylan Wood</h4></a>


      </header>

    <div class="article-entry" itemprop="articleBody">

        <p>For some time, we have been utilizing PostgreSQL’s hot standby replication feature in both our staging and production environments. Currently, the hot standby serves three functions:</p>
<ol>
<li>Standby server for maximum uptime if the master fails.</li>
<li>Disaster recovery if the master fails completely.</li>
<li>Read-only batch operations like taking nightly backups.</li>
</ol>
<p>All three of these functions are critical to the safety of our data, so we need to be sure that the master and slave are properly communicating at all times. We use <a href="https://mmonit.com/monit/" target="_blank" rel="external">Monit</a>Monit and <a href="https://mmonit.com" target="_blank" rel="external">M/Monit</a> for most of our application and server monitoring. Monit is a daemon that runs on each of our servers, and performs checks at regular intervals. M/Monit is a centralized dashboard and alert service to which all of the Monit instances report. To help ensure that we get alerts even if our network is completely offline, our M/Monit host is hosted by AWS.</p>
<p>Because replication is so important, I have taken a belt and suspenders approach to monitoring the replication lag. This means that Monit is checking the replication status on both the master and the slave servers. The approach uses Monit’s <code>check program</code> functionality to run a simple python script. If the script exits with an error (non-zero) status, then Monit will send an alert to our M/Monit server. M/Monit will then send emails and slack notifications to us.</p>
<p>On to the code:</p>
<h3 id="On-the-master-server"><a href="#On-the-master-server" class="headerlink" title="On the master server:"></a>On the master server:</h3><h4 id="etc-monit-conf-d-pg-master-replication-check"><a href="#etc-monit-conf-d-pg-master-replication-check" class="headerlink" title="/etc/monit/conf.d/pg-master-replication-check"></a><code>/etc/monit/conf.d/pg-master-replication-check</code></h4><figure class="highlight sql"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div></pre></td><td class="code"><pre><div class="line"><span class="keyword">check</span> program replication_check</div><div class="line">        <span class="keyword">with</span> <span class="keyword">path</span> <span class="string">"/coins/pg-monitoring/master-replication-check.py"</span></div><div class="line">        <span class="keyword">as</span> uid <span class="string">"&#123;&#123;postgresql_service_user&#125;&#125;"</span></div><div class="line">        <span class="keyword">and</span> gid <span class="string">"webadmins"</span></div><div class="line">    <span class="keyword">if</span> <span class="keyword">status</span> != <span class="number">0</span> <span class="keyword">for</span> <span class="number">3</span> cycles <span class="keyword">then</span> alert</div></pre></td></tr></table></figure>
<h3 id="coins-pg-monitoring-master-replication-check-py"><a href="#coins-pg-monitoring-master-replication-check-py" class="headerlink" title="/coins/pg-monitoring/master-replication-check.py"></a><code>/coins/pg-monitoring/master-replication-check.py</code></h3><p>This script queries the database to ascertain that it is in the right state (WAL streaming), and that the replication position reported by the slave is in line with that expected by the master.</p>
<figure class="highlight zephir"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div><div class="line">6</div><div class="line">7</div><div class="line">8</div><div class="line">9</div><div class="line">10</div><div class="line">11</div><div class="line">12</div><div class="line">13</div><div class="line">14</div><div class="line">15</div><div class="line">16</div><div class="line">17</div><div class="line">18</div><div class="line">19</div><div class="line">20</div><div class="line">21</div><div class="line">22</div><div class="line">23</div><div class="line">24</div><div class="line">25</div><div class="line">26</div><div class="line">27</div><div class="line">28</div></pre></td><td class="code"><pre><div class="line"><span class="comment">#!/usr/bin/python</span></div><div class="line">import subprocess</div><div class="line"></div><div class="line">repLagBytesLimit = <span class="number">128</span></div><div class="line"></div><div class="line"><span class="keyword">try</span>:</div><div class="line">    repModeRes = subprocess.check_output(<span class="string">'psql -t -p &#123;&#123;postgresql_port&#125;&#125; -c "SELECT state FROM pg_stat_replication"'</span>, shell=<span class="keyword">True</span>)</div><div class="line"></div><div class="line">    isInRepMode = repModeRes.strip() == <span class="string">'streaming'</span></div><div class="line"></div><div class="line">    repLagRes = subprocess.check_output(<span class="string">'psql -t -p &#123;&#123;postgresql_port&#125;&#125; -c "SELECT pg_xlog_location_diff(sent_location, replay_location) FROM pg_stat_replication"'</span>, shell=<span class="keyword">True</span>)</div><div class="line"></div><div class="line">    repLagBytes = <span class="keyword">float</span>(repLagRes)</div><div class="line"></div><div class="line">except subprocess.CalledProcessError <span class="keyword">as</span> e:</div><div class="line">    <span class="keyword">print</span> <span class="string">"Error retrieving stats: &#123;0&#125;"</span>.format(e)</div><div class="line">    <span class="keyword">exit</span>(<span class="number">1</span>)</div><div class="line"></div><div class="line"><span class="keyword">if</span> isInRepMode != <span class="keyword">True</span>:</div><div class="line">    <span class="keyword">print</span> (<span class="string">'Master server is not streaming to standby'</span>)</div><div class="line">    <span class="keyword">exit</span>(<span class="number">1</span>)</div><div class="line"></div><div class="line"><span class="keyword">if</span> repLagBytes &gt; repLagBytesLimit:</div><div class="line">    <span class="keyword">print</span> <span class="string">'Slave replay is lagging behind by %f bytes'</span> % repLagBytes</div><div class="line">    <span class="keyword">exit</span>(<span class="number">1</span>)</div><div class="line"></div><div class="line"><span class="keyword">print</span>(<span class="string">'All clear!'</span>)</div><div class="line"><span class="keyword">exit</span>(<span class="number">0</span>)</div></pre></td></tr></table></figure>
<h3 id="On-the-slave-server"><a href="#On-the-slave-server" class="headerlink" title="On the slave server"></a>On the slave server</h3><h4 id="etc-monit-conf-d-pg-slave-replication-check"><a href="#etc-monit-conf-d-pg-slave-replication-check" class="headerlink" title="/etc/monit/conf.d/pg-slave-replication-check"></a><code>/etc/monit/conf.d/pg-slave-replication-check</code></h4><figure class="highlight sql"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div></pre></td><td class="code"><pre><div class="line"><span class="keyword">check</span> program replication_check</div><div class="line">        <span class="keyword">with</span> <span class="keyword">path</span> <span class="string">"/coins/pg-monitoring/slave-replication-check.py"</span></div><div class="line">        <span class="keyword">as</span> uid <span class="string">"&#123;&#123;postgresql_service_user&#125;&#125;"</span></div><div class="line">        <span class="keyword">and</span> gid <span class="string">"webadmins"</span></div><div class="line">    <span class="keyword">if</span> <span class="keyword">status</span> != <span class="number">0</span> <span class="keyword">for</span> <span class="number">3</span> cycles <span class="keyword">then</span> alert</div></pre></td></tr></table></figure>
<h4 id="coins-pg-monitoring-slave-replication-check-py"><a href="#coins-pg-monitoring-slave-replication-check-py" class="headerlink" title="/coins/pg-monitoring/slave-replication-check.py"></a><code>/coins/pg-monitoring/slave-replication-check.py</code></h4><p>This script queries the database to ascertain that it is in the right<br>state (recovery). It also queries the current xlog position <strong>from the master</strong>,<br>and compares it to the last reply location of the slave.</p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div><div class="line">6</div><div class="line">7</div><div class="line">8</div><div class="line">9</div><div class="line">10</div><div class="line">11</div><div class="line">12</div><div class="line">13</div><div class="line">14</div><div class="line">15</div><div class="line">16</div><div class="line">17</div><div class="line">18</div><div class="line">19</div><div class="line">20</div><div class="line">21</div><div class="line">22</div><div class="line">23</div><div class="line">24</div><div class="line">25</div><div class="line">26</div><div class="line">27</div><div class="line">28</div></pre></td><td class="code"><pre><div class="line"><span class="comment">#!/usr/bin/python</span></div><div class="line">import subprocess</div><div class="line"></div><div class="line">slaveX<span class="built_in">log</span>DiffLimitBytes = 128</div><div class="line"></div><div class="line">try:</div><div class="line">    repModeRes = subprocess.check_output(<span class="string">'psql -t -p &#123;&#123;postgresql_port&#125;&#125; -c "SELECT pg_is_in_recovery()"'</span>, shell=True)</div><div class="line">    isInRepMode = repModeRes.strip() == <span class="string">'t'</span></div><div class="line"></div><div class="line">    masterX<span class="built_in">log</span>LocationRes = subprocess.check_output(<span class="string">'psql -t -p &#123;&#123;postgresql_port&#125;&#125; -h &#123;&#123;postgres_basebackup_host&#125;&#125; -U &#123;&#123;postgres_basebackup_user&#125;&#125; &#123;&#123;postgres_db_name&#125;&#125; -c "select pg_current_xlog_location();"'</span>, shell=True)</div><div class="line">    masterX<span class="built_in">log</span>LocationStr = masterX<span class="built_in">log</span>LocationRes.strip()</div><div class="line"></div><div class="line">    slaveX<span class="built_in">log</span>DiffRes = subprocess.check_output(<span class="string">'psql -t -p &#123;&#123;postgresql_port&#125;&#125; &#123;&#123;postgres_db_name&#125;&#125; -c "select pg_xlog_location_diff(pg_last_xlog_replay_location(), \'</span><span class="string">' + masterXlogLocationStr + '</span>\<span class="string">'::pg_lsn);"'</span>, shell=True)</div><div class="line">    slaveX<span class="built_in">log</span>DiffBytes = <span class="built_in">float</span>(slaveX<span class="built_in">log</span>DiffRes.strip())</div><div class="line">except subprocess.CalledProcessError as e:</div><div class="line">    <span class="built_in">print</span> <span class="string">"Error retrieving stats: &#123;0&#125;"</span>.format(e)</div><div class="line">    <span class="built_in">exit</span>(1)</div><div class="line"></div><div class="line"><span class="keyword">if</span> isInRepMode != True:</div><div class="line">    <span class="built_in">print</span> (<span class="string">'Slave server is not in recovery mode'</span>)</div><div class="line">    <span class="built_in">exit</span>(1)</div><div class="line"></div><div class="line"><span class="keyword">if</span> slaveX<span class="built_in">log</span>DiffBytes &gt; slaveX<span class="built_in">log</span>DiffLimitBytes:</div><div class="line">    <span class="built_in">print</span> <span class="string">"Slave server replication is behind master by %f bytes"</span> % slaveX<span class="built_in">log</span>DiffBytes</div><div class="line">    <span class="built_in">exit</span>(1)</div><div class="line"></div><div class="line"><span class="built_in">print</span>(<span class="string">'All clear!'</span>)</div><div class="line"><span class="built_in">exit</span>(0)</div></pre></td></tr></table></figure>
<p>You may wonder why I chose python instead of Bash or my usual favorite: Node.js. Python is installed in our base server image, while Node is not, and I want to keep out database servers as <em>stock</em> as possible. I chose python over bash because I find that bash scripts are brittle and difficult to debug.</p>


    </div>
    <footer class="article-footer">
      <a data-url="https://MRN-Code.github.io/2015/12/29/Monitoring-PostgreSQL-Replication-Lag-with-Monit/" data-id="citahhnif0007zhw70kzfc6t8" class="article-share-link">Share</a>


  <ul class="article-tag-list"><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/postgres-postgresql-monit-m-monit-monitoring-COINS3-0/">postgres postgresql monit m/monit monitoring COINS3.0</a></li></ul>

    </footer>
  </div>

</article>


    <article id="post-Launch-Console-in-VMWare-Web-Client-on-Windows-10-Chrome-42" class="article article-type-post" itemscope itemprop="blogPost">
  <div class="article-meta">
    <a href="/2015/07/04/Launch-Console-in-VMWare-Web-Client-on-Windows-10-Chrome-42/" class="article-date">
  <time datetime="2015-07-04T17:26:21.000Z" itemprop="datePublished">2015-07-04</time>
</a>

  </div>
  <div class="article-inner">


      <header class="article-header">


    <h1 itemprop="name">
      <a class="article-title" href="/2015/07/04/Launch-Console-in-VMWare-Web-Client-on-Windows-10-Chrome-42/">Launching a Console in VMware Web Client on Windows 10, Chrome 42+</a>
    </h1>


    <a href="/author/dwood"><h4>Dylan Wood</h4></a>


      </header>

    <div class="article-entry" itemprop="articleBody">

        <p>Maybe it is just me, but I had a difficult time launching the console of my<br>guest VMs using the VMware Web Client. Here is how I eventually got it working<br>on Windows 10 and Chrome 43.x.</p>
<img src="/2015/07/04/Launch-Console-in-VMWare-Web-Client-on-Windows-10-Chrome-42/launch.png" alt="Launch VMWare Web Client" title="Launch VMWare Web Client">
<h2 id="Background"><a href="#Background" class="headerlink" title="Background"></a>Background</h2><p>I am a huge fan of VMware’s plan to replace their Windows-only vSphere desktop<br>client with a web client. Using the web client, I am able to perform<br><em>most</em> tasks directly from my Mac, thus saving the time of booting a Windows VM.<br>The only task which cannot be performed in the web client from OS-X is launching<br>the guest OS console.</p>
<p>In order to open the console in the web client, it is necessary to install the<br><em>VMware Client Integration Plugin</em>, which VMWare claims is only available for<br>64/32-bit Windows and 32-bit Linux. I was unable to get the Client Integration<br>Plugin to install on Ubuntu 14.04, so it looks like I am still stuck using a<br>Windows VM to manage our VMware cluster.</p>
<p>Even on Windows, it took some time to get things configured such that I could<br>access a guest VM’s console via the web client. Here is how I eventually did it.</p>
<h2 id="Environment"><a href="#Environment" class="headerlink" title="Environment"></a>Environment</h2><ul>
<li>OS: Windows 10 Evaluation Copy</li>
<li>Browser: Google Chrome 43.0.2357.130 (Tried IE Edge and Firefox with no luck)</li>
</ul>
<h2 id="Install-Client-Integration-Plugin"><a href="#Install-Client-Integration-Plugin" class="headerlink" title="Install Client Integration Plugin"></a>Install Client Integration Plugin</h2><ol>
<li>Navigate to your VMware Web Client login page in your browser. <strong>Do not log in.</strong></li>
<li>Click the link at the bottom left of the page entitled ‘Download Client Integration Plugin’.</li>
<li>Run the downloaded installer, accepting all defaults.</li>
</ol>
<h2 id="Enable-NPAPI-plugins"><a href="#Enable-NPAPI-plugins" class="headerlink" title="Enable NPAPI plugins:"></a>Enable NPAPI plugins:</h2><ol>
<li>Paste <code>chrome://flags/#enable-npapi</code> into your address bar and press return.</li>
<li>Click <code>Enable</code> below <em>Enable NPAPI</em>.</li>
<li><strong>Click <code>Relaunch Now</code> at the bottom left of the page</strong>.</li>
</ol>
<h2 id="Allow-VMware-plugins-to-run"><a href="#Allow-VMware-plugins-to-run" class="headerlink" title="Allow VMware plugins to run"></a>Allow VMware plugins to run</h2><ol>
<li>Paste <code>chrome://plugins/</code> into the address bar and press return.</li>
<li>Check the box next to ‘Always allow to run’ below <strong>both</strong> VMware plugins.</li>
</ol>
<h2 id="Verify-plugins"><a href="#Verify-plugins" class="headerlink" title="Verify plugins"></a>Verify plugins</h2><ol>
<li>Restart the windows machine for good measure.</li>
<li>Open Chrome and navigate back to your VMware Web Client login page.<br>You should see two notifications from chrome at the top of the page (see image below).<br>These notifications can be disregarded (for now, see discussion further below).<img src="/2015/07/04/Launch-Console-in-VMWare-Web-Client-on-Windows-10-Chrome-42/plugin_warnings.png" alt="plugin_warnings.png" title="">
</li>
</ol>
<h2 id="Still-does-not-work"><a href="#Still-does-not-work" class="headerlink" title="Still does not work?"></a>Still does not work?</h2><p>If you do not see the warnings from Chrome, try this:</p>
<ol>
<li>Navigate to chrome://settings/content</li>
<li>Scroll to ‘Unsandboxed Plugins’</li>
<li>Select ‘Allow all sites …’</li>
<li>Click ‘Done’</li>
<li>Repeat the <em>Verify plugins</em> steps above.</li>
</ol>
<h2 id="Open-the-console-for-a-VM"><a href="#Open-the-console-for-a-VM" class="headerlink" title="Open the console for a VM"></a>Open the console for a VM</h2><ol>
<li>Log in to the VMware Web Client</li>
<li>Locate a VM whos console you want to open</li>
<li>Click the <code>Settings</code> tab</li>
<li>Near the top of the center pane, you should see a black square with the text <code>Launch Console</code> beneath it. If you see a link to <code>Download plugin</code> instead, something<br>is wrong. Try repeating the steps above.</li>
</ol>
<h2 id="Discussion-about-NPAPI-plugin-support"><a href="#Discussion-about-NPAPI-plugin-support" class="headerlink" title="Discussion about NPAPI plugin support"></a>Discussion about NPAPI plugin support</h2><p>Google has promised to completely remove NPAPI plugin support from Chrome with<br>version 45. Given the approximate 5-week release schedule that Google has been on,<br>this means that you will only be able to use the most recent version of Chrome<br>with the VMware Client Integration Plugin for another couple of months.</p>
<p>With this in mind, I am going to keep my vSphere desktop application installed.<br>Hopefully, VMware has already begun work on a truly cross platform Web Client<br>that supports launching a console.</p>


    </div>
    <footer class="article-footer">
      <a data-url="https://MRN-Code.github.io/2015/07/04/Launch-Console-in-VMWare-Web-Client-on-Windows-10-Chrome-42/" data-id="citahhnhz0003zhw7xudqsp1s" class="article-share-link">Share</a>


  <ul class="article-tag-list"><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/Chrome/">Chrome</a></li><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/VMware/">VMware</a></li><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/vSphere/">vSphere</a></li></ul>

    </footer>
  </div>

</article>


    <article id="post-Managing-Application-Dates-and-Times" class="article article-type-post" itemscope itemprop="blogPost">
  <div class="article-meta">
    <a href="/2015/06/24/Managing-Application-Dates-and-Times/" class="article-date">
  <time datetime="2015-06-24T16:37:55.000Z" itemprop="datePublished">2015-06-24</time>
</a>

  </div>
  <div class="article-inner">


      <header class="article-header">


    <h1 itemprop="name">
      <a class="article-title" href="/2015/06/24/Managing-Application-Dates-and-Times/">Managing Application Dates and Times</a>
    </h1>


    <a href="/author/cdieringer"><h4>Christopher Dieringer</h4></a>


      </header>

    <div class="article-entry" itemprop="articleBody">

        <p>Note: this is a repost from <a href="http://cdaringe.com/managing-application-dates/" target="_blank" rel="external">cdaringe.net</a></p>
<p>Managing date and times has long been trouble for every application developer.  In many cases, a simple app only cares about datetime resolution at the <code>day</code> level.  However, to many applications, higher time resolution is critical.  In these applications, a finer, more granular time-unit resolution may be highly desirable.  The difficulties in managing time emerge in the realm of relativity.   If an application, its users, and its dependent infrastructure are spread across timezones, synchronizing a chronological history of events may prove difficult if you haven’t designed your system to manage time full well.  This is discussion may be old hat for many, but a painful reality for many apps.</p>
<img src="/2015/06/24/Managing-Application-Dates-and-Times/Universal_Dial_Plate_or_Times_of_all_Nations_1854.png" alt="Time After Time..." title="Time After Time...">
<h2 id="why-is-it-difficult"><a href="#why-is-it-difficult" class="headerlink" title="why is it difficult?"></a>why is it difficult?</h2><p>It doesn’t have to be, actually.  The “difficult” aspects of managing time are generally designer oversight.  Two common oversights that I am personally guilty of are:</p>
<ul>
<li>Time is often captured incompletely. Application services consuming the incomplete time fill in the missing data with assumptions.<ul>
<li>ex: in js, <code>(new Date()).getTime() //=&gt; 1435089516878</code>.  What happens if you log this time on a server in a different timezone?  Most likely, the server uses its timezone or UTC, <em>not</em> the user’s time zone.</li>
</ul>
</li>
<li>Time is transferred in varying formats, generating sub-system overhead (or errors!)<ul>
<li>How do you serialize your <code>date</code> or <code>time</code> objects for sending over the wire?  Is your serialization lossy?  Do your services require knowledge of each others’ formats?</li>
</ul>
</li>
</ul>
<h2 id="how-do-we-fix-it"><a href="#how-do-we-fix-it" class="headerlink" title="how do we fix it"></a>how do we fix it</h2><p>Before we discuss how these issues manifest themselves in an application, let’s quickly discuss the general solution.  We need a solution to represent time that does so reliably across:</p>
<ul>
<li>distributed application environments (e.g. languages, operating systems, clients)</li>
<li>distributed application hardware</li>
<li>client time zones</li>
</ul>
<p>My preferred strategy is to <strong>store, transfer, and manipulate complete timestamps only</strong>.  What’s a complete timestamp?  It’s simply an absolute time with visual representation of timezone.  It’s a string or composite datatype specifying time with my application’s required time-unit resolution or finer, + TZ.  Practically speaking, in my app I will:</p>
<ul>
<li>store all database times as timestamp with timezone (or equivalent)</li>
<li><p>transfer all times as fully defined time strings <strong>with timezones</strong> in a standardized format (e.g. <a href="https://en.wikipedia.org/?title=ISO_8601" target="_blank" rel="external">ISO 8601</a>).  <strong>Know your application’s</strong> time-wise resolution needs, and adhere to them throughout the app.  Suppose you need <code>second</code> level resolution:</p>
<ul>
<li><strong>bad</strong>: ‘10/25/2010’</li>
<li><strong>bad</strong>: ‘10/25/2010 08:23:22’</li>
<li><strong>good</strong>: ‘10/25/2010 08:23:22-07’</li>
<li><strong>good</strong>: ‘10/25/2010 08:23:22.2324-07’ (note timezone offset always included)</li>
</ul>
</li>
<li><p>perform time operations only through utilities that can parse and understand the complete time strings.  avoid manually extracting time components out of strings.</p>
</li>
</ul>
<h2 id="application-date-and-time-oversights"><a href="#application-date-and-time-oversights" class="headerlink" title="application date and time oversights"></a>application <code>date</code> and <code>time</code> oversights</h2><p>We already discussed these above.  Let’s dive a bit deeper.</p>
<h4 id="time-captured-incompletely"><a href="#time-captured-incompletely" class="headerlink" title="time captured incompletely"></a>time captured incompletely</h4><p>Earlier, we examined computing unix time in the browser, using javascript.<br><figure class="highlight js"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div></pre></td><td class="code"><pre><div class="line"><span class="keyword">var</span> myDate = <span class="keyword">new</span> <span class="built_in">Date</span>();</div><div class="line">myDate.getTime(); <span class="comment">//=&gt; 1435089516878`</span></div></pre></td></tr></table></figure></p>
<p>The above is an easy way to get a time.  Let us use this in our app, so long as that time data doesn’t leave this client, or this machine doesn’t change timezones.  Can you assert that your user’s don’t travel?  Can you assert that your time or time calculations won’t be sent somewhere, beyond the client?  If you cannot, sending time in a basic integer format drops critical data.  Specifically, you lose timezone relativity and, in rare cases, a known base-time reference value.  For instance, does that integer reflect the # of seconds from unix-time-start in UTC time, or the # of seconds from unix-time-start, offset for your region?</p>
<p>You could, as some do, use the above integer time value in conjunction with a timezone string.  However, you’ve introduced generally 1 to 2 steps of extra parse complication on all services consuming your time values, and an unstated assumption that the unix time provided is already aligned with UTC (it generally is).  These are all simple concepts that stack up to be a complicated when you have many services in different languages.  JS (node and browser), for instance, default to milliseconds.  PHP likes seconds.</p>
<p>Managing this complication is generally unnecessary.  In order to convey a clear, accurate, and complete timestamp, one of which that you can interchange safely across services,  serialize your apps’ and services’ timestamps in a complete string during I/O, and parse via language natives or time helper libraries as required.</p>
<figure class="highlight php"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div><div class="line">6</div><div class="line">7</div><div class="line">8</div><div class="line">9</div><div class="line">10</div><div class="line">11</div><div class="line">12</div><div class="line">13</div><div class="line">14</div><div class="line">15</div><div class="line">16</div><div class="line">17</div><div class="line">18</div></pre></td><td class="code"><pre><div class="line"><span class="comment">// client makes xhr GET request to server</span></div><div class="line">xhr.get(..., cb);</div><div class="line"></div><div class="line"><span class="comment">// server responds (ex. php DateTime)</span></div><div class="line">$datetime = <span class="keyword">new</span> DateTime(<span class="string">'now'</span>, <span class="keyword">new</span> DateTimeZone(<span class="string">'America/Los_Angeles'</span>));</div><div class="line"><span class="keyword">echo</span> $datetime-&gt;format(<span class="string">'c'</span>); <span class="comment">// 2015-06-24T09:45:06-07:00</span></div><div class="line"> (ISO <span class="number">8601</span>)</div><div class="line"></div><div class="line"><span class="comment">// client parses. (ex. javascript moment.js)</span></div><div class="line"><span class="keyword">var</span> cb = <span class="function"><span class="keyword">function</span><span class="params">(response)</span> </span>&#123;</div><div class="line">    <span class="comment">// response =&gt; 2015-06-24T09:45:06-07:00</span></div><div class="line">    <span class="keyword">var</span> myDate = moment(response);</div><div class="line">    myDate.format(); <span class="comment">// 2015-06-24T09:45:06-07:00</span></div><div class="line">    myDate.toISOString(); <span class="comment">// 2015-06-24T16:45:06.000Z</span></div><div class="line">    <span class="comment">// how refreshing!  all I/O could use `.format()`</span></div><div class="line">    <span class="comment">// as your serialized version, instead of `.toISOString()`</span></div><div class="line">    <span class="comment">// so as to not drop user TZ</span></div><div class="line">&#125;</div></pre></td></tr></table></figure>
<p>This example leads us directly to our next topic!</p>
<h4 id="time-is-transferred-in-varying-formats"><a href="#time-is-transferred-in-varying-formats" class="headerlink" title="time is transferred in varying formats"></a>time is transferred in varying formats</h4><p>Look at your own applications.  How have you shared times between services?  Have you echoed time values directly out of your database?  Have your API’s used programming-language specific formatting functions to make time “look” standard to your liking?</p>
<p>Apps I have worked in have done all sorts of variants in php:<br><figure class="highlight php"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div></pre></td><td class="code"><pre><div class="line"><span class="keyword">echo</span> date(<span class="string">"Ymd"</span>); <span class="comment">// or</span></div><div class="line"><span class="keyword">echo</span> date(DATE_RFC2822); <span class="comment">// or</span></div><div class="line"><span class="keyword">echo</span> date(<span class="string">"Y-m-d H:i:s"</span>); <span class="comment">// very prevalent in codebases i've used</span></div><div class="line"><span class="keyword">echo</span> date(<span class="string">"c"</span>); <span class="comment">// my favorite :), e.g. 2004-02-12T15:19:21+00:00</span></div></pre></td></tr></table></figure></p>
<p><a href="http://php.net/manual/en/function.date.php" target="_blank" rel="external">date(“c”)?</a></p>
<p>Use a standard.  8601 is my personal preference.  Using a standard is generally the safest, as most languages have a toolset that can parse and manipulate dates/times from a standardized string.  It is ideal to do date/time I/O in the same string format on <strong>every transfer</strong> to make your interfaces predictable!</p>
<p>A consideration that <strong>must not be overlooked is whether or not the timestamp serializer normalizes to UTC or not</strong>.  In the server example directly above, we used <code>date(&quot;c&quot;)</code>.  This does <em>not</em> normalize to UTC time.  In the client example, we advised against using <code>myDate.toISOString()</code> in favor of <code>myDate.format()</code>, where <code>.toISOString()</code> normalized to UTC.  Again, all of the above variations are 8601 compliant, but <code>.toISOString()</code> drops user +TZ data.</p>
<p>It can be OK for servers to send outbound timestamps normalized to UTC time if:</p>
<ul>
<li>we have a centralized server model (because we tend to normalize internally against UTC anyway)  AND,</li>
<li>our client apps/services don’t care about client locale history</li>
</ul>
<p>Those are tough bullets to gamble over.  You may have not know how your app or ecosystem will change in time.  In a distributed server model, where server activity also needs to be tracked against other servers, UTC normalization may lead to bad consequences!  <strong>Don’t normalize to UTC if you have rich TZ data to begin with</strong> and there is possibility that you will want to maintain client locale time in any part of your app!</p>
<h4 id="summary"><a href="#summary" class="headerlink" title="summary"></a>summary</h4><p>It’s easy to drop critical time data.  It’s also very easy to maintain good timestamp data integrity.  When possible,</p>
<ul>
<li>clear understanding your app’s timestamp requirements,</li>
<li>use a timestamp standard,</li>
<li>avoid time normalization, and</li>
<li>practice lossless timestamp serialization and parsing.</li>
</ul>
<p>These tips will help yield a healthy app and good time intgrity.  It’s a bland topic–thanks for reading!</p>
<p><a href="http://momentjs.com/" target="_blank" rel="external">moment.js</a><br><br><a href="http://php.net/manual/en/class.datetime.php" target="_blank" rel="external">php DateTime</a><br><br>Note: <a href="https://bugzilla.mozilla.org/buglist.cgi?quicksearch=Date.parse&amp;list_id=12345854" target="_blank" rel="external">FF bug: Date.parse doesn’t honor valid ISO str</a>, hence moment.js usage for unified x-browser time-parsing experience!</p>


    </div>
    <footer class="article-footer">
      <a data-url="https://MRN-Code.github.io/2015/06/24/Managing-Application-Dates-and-Times/" data-id="citahhni20005zhw75nwvs3vc" class="article-share-link">Share</a>


    </footer>
  </div>

</article>


    <article id="post-How-to-partially-retreat-from-a-database-upgrade" class="article article-type-post" itemscope itemprop="blogPost">
  <div class="article-meta">
    <a href="/2015/06/21/How-to-partially-retreat-from-a-database-upgrade/" class="article-date">
  <time datetime="2015-06-22T00:00:17.000Z" itemprop="datePublished">2015-06-21</time>
</a>

  </div>
  <div class="article-inner">


      <header class="article-header">


    <h1 itemprop="name">
      <a class="article-title" href="/2015/06/21/How-to-partially-retreat-from-a-database-upgrade/">How to [partially] retreat from a database upgrade</a>
    </h1>


    <a href="/author/dwood"><h4>Dylan Wood</h4></a>


      </header>

    <div class="article-entry" itemprop="articleBody">

        <p>This post has turned into a bit of a long story.<br>If you are just looking for how to perform a <code>pg_restore</code> from a newer version of PostgreSQL to an older version of PostgreSQL, look down toward the bottom.</p>
<p>We recently upgraded our worn-out PostgreSQL 8.4 database running on a Cents 5.5 VM to a shiny new PostgreSQL 9.4 database on top of Ubuntu 14.04.<br>During a three week testing period, we encountered and fixed a coulple of upgrade-induced bugs in our staging environment.<br>At the end of three weeks of testing, we felt confident that the upgrade would go smoothly in production… and it did (mostly).</p>
<p>The day after the upgrade, users started to submit tickets complaining that our data export tool was running very slowly in some cases, and just hanging in other cases.<br>Myself and two other engineers spent the next day and a half benchmarking the new database servers over and over, and looking at <code>Explain Analyze</code> plans.<br>Eventually, we convinced ourselves that the issue was not with the underlying virtual machine, or the OS, but with our configuration of postgres.</p>
<p>To better debug, we restarted our old database server, and ran the offending queries there as well as in the new server in our staging environment.<br>We were able to gain some insights into the issue by comparing the <code>Explain Analyze</code> output from both servers:<br>The new database was not using the same indices that the old database was. This resulted in more nested loops and analyzing more rows than necessary.</p>
<p>By increasing the <code>random_page_cost</code> from 4 to 15, we were able to get the query explain plans to look more similar, but performance did not improve.<br>The new database was still choosing different indices to scan.</p>
<p>At this point, our users had been without a useful query-building-export tool for two business days, so it was time to switch tactics and implement a work-around solution.<br>I decided that it would be easiest to direct the queries used by our export tool to a copy of our old production database.<br>We would be able to keep the copy relatively up to date by loading nightly backups from our production infrastructure.</p>
<p>Modifying the application layer to send requests to the old database server was trivial, since there was a dedicated endpoint just for the low-performig export tool.<br>Getting the old database to refresh from a backup of the new database was a little trickier. </p>
<p>First, I set up a cron job to run a <code>pg_dump</code> on our hot standby database server every night, and store the dump on our network storage.<br>I have always used the <em>custom</em> format (<code>-Fc</code>) for pg_dumps, as they allow a lot of flexibility when performing the restore.<br>This was not an option in this case because I received the following error when trying to restore on the PG 8.4 server: <code>pg_restore: [archiver] unsupported version (1.12) in file header</code>.</p>
<p>My initial attempts to circumvent this included running the pg_dump of the new database remotely from the old database server unsuccessfully, and attempting to upgrade only <em>postgres-contrib</em> on the old database server.<br>Neither of these solutions worked out, so I decided to use the <em>plain</em> pg_dump format (<code>-Fp</code>). This outputs plain SQL statements to rebuild the schema and data.<br>There are still a few errors during the restore, because the <code>CREATE EXTENSION</code> functionality does not exist in PG 8.4, but I can simply rebuild the necessary extensions manually after the rebuild.</p>
<p>To reduce the time taken by the dump and restore process, I only dump the schema used by the export tool.<br>In addition, I omit all history tables (a construct we use to track changes made to data in the database) and some of the larger tables not used by the query tool.<br>This also reduces the size of the restored database considerably, and allows me to restore into a temporary database while the primary database is still running, allowing for near-zero downtime.</p>
<p>A simplified diagram of the current system is shown below:<br><img src="/2015/06/21/How-to-partially-retreat-from-a-database-upgrade/QBPG84.png" alt="QBPG84.png" title=""></p>
<p>Here is the cron task that dumps the data. This is placed in its own file in <code>/etc/cron.d</code><br><figure class="highlight basic"><table><tr><td class="gutter"><pre><div class="line">1</div></pre></td><td class="code"><pre><div class="line"><span class="symbol">30 </span><span class="number">23</span> * * * postgres pg_dump -vFp -p <span class="number">6117</span> -T <span class="comment">'mrsdba.mrs_series_data' -T '*_hist'  -T mrsdba.mrs_series  -T mrsdba.mrs_analysis_files -T mrsdba.mrs_assessment_events -T mrsdba.mrs_asmt_resp_hist_new -n 'mrsdba' postgres &gt; '/coins/mn    t/ni/prodrepdbcoin.sql' &gt; /tmp/rep_dump.log 2&gt;&amp;1</span></div></pre></td></tr></table></figure></p>
<p>Here is the script that creates a new Postgres 8.4 DB from the dump of the Postgres 9.4 database.<br><figure class="highlight bash"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div><div class="line">6</div><div class="line">7</div><div class="line">8</div><div class="line">9</div><div class="line">10</div><div class="line">11</div><div class="line">12</div><div class="line">13</div><div class="line">14</div><div class="line">15</div><div class="line">16</div><div class="line">17</div><div class="line">18</div><div class="line">19</div><div class="line">20</div><div class="line">21</div><div class="line">22</div><div class="line">23</div><div class="line">24</div><div class="line">25</div><div class="line">26</div><div class="line">27</div><div class="line">28</div><div class="line">29</div><div class="line">30</div><div class="line">31</div><div class="line">32</div><div class="line">33</div><div class="line">34</div><div class="line">35</div><div class="line">36</div><div class="line">37</div><div class="line">38</div><div class="line">39</div><div class="line">40</div><div class="line">41</div><div class="line">42</div><div class="line">43</div><div class="line">44</div><div class="line">45</div><div class="line">46</div><div class="line">47</div><div class="line">48</div><div class="line">49</div><div class="line">50</div><div class="line">51</div><div class="line">52</div><div class="line">53</div><div class="line">54</div><div class="line">55</div><div class="line">56</div><div class="line">57</div><div class="line">58</div><div class="line">59</div><div class="line">60</div><div class="line">61</div><div class="line">62</div><div class="line">63</div><div class="line">64</div></pre></td><td class="code"><pre><div class="line"></div><div class="line"><span class="comment"># Author: Dylan Wood</span></div><div class="line"><span class="comment"># Date: June.20.2015</span></div><div class="line"><span class="comment"># Script is called from /etc/cron.d/coins_query_restore</span></div><div class="line"></div><div class="line"><span class="built_in">echo</span> <span class="string">'Starting COINS DB refresh as user:'</span></div><div class="line"><span class="built_in">echo</span> `id`</div><div class="line"><span class="built_in">echo</span> `date`</div><div class="line"></div><div class="line"><span class="comment"># Define variables</span></div><div class="line">ARCHIVE_DIR=<span class="string">"/export/ni/prodrepdbcoin.sql"</span></div><div class="line">ARCHIVE_FILE=`ls -1t <span class="variable">$ARCHIVE_DIR</span> | head -1`</div><div class="line">DBNAME=<span class="string">"postgres"</span></div><div class="line">DBPORT=6117</div><div class="line"></div><div class="line"><span class="comment"># Create temp database</span></div><div class="line"><span class="comment"># Create empty DB</span></div><div class="line"><span class="built_in">echo</span> <span class="string">'Creating empty DB'</span></div><div class="line">createdb -p <span class="variable">$DBPORT</span> <span class="variable">$&#123;DBNAME&#125;</span>_temp</div><div class="line"></div><div class="line"><span class="comment"># Create lang</span></div><div class="line"><span class="built_in">echo</span> <span class="string">'create plpgsql lang'</span></div><div class="line">psql -p <span class="variable">$DBPORT</span> <span class="_">-d</span> <span class="variable">$&#123;DBNAME&#125;</span>_temp -c <span class="string">'CREATE LANGUAGE plpgsql'</span></div><div class="line"></div><div class="line"><span class="comment"># Restore DB</span></div><div class="line"><span class="built_in">echo</span> <span class="string">'restoring db from latest dump'</span></div><div class="line">psql -p <span class="variable">$DBPORT</span> <span class="_">-d</span> <span class="variable">$&#123;DBNAME&#125;</span>_temp <span class="_">-f</span> <span class="variable">$ARCHIVE_FILE</span></div><div class="line"></div><div class="line"><span class="comment"># Edit default search path</span></div><div class="line"><span class="built_in">echo</span> <span class="string">'Setting default search path'</span></div><div class="line">psql -p <span class="variable">$DBPORT</span> <span class="_">-d</span> <span class="variable">$&#123;DBNAME&#125;</span>_temp -c <span class="string">"ALTER DATABASE <span class="variable">$&#123;DBNAME&#125;</span>_temp SET search_path=mrsdba, casdba, public;"</span></div><div class="line"></div><div class="line"><span class="comment"># Truncate qb temp tables</span></div><div class="line"><span class="built_in">echo</span> <span class="string">'Truncating QB temp tables'</span></div><div class="line">psql -p <span class="variable">$DBPORT</span> <span class="_">-d</span> <span class="variable">$&#123;DBNAME&#125;</span>_temp -c <span class="string">"TRUNCATE TABLE mrsdba.mrs_qb_asmt_data_sort_temp; TRUNCATE TABLE mrsdba.mrs_qb_asmt_pivot_categories_temp;"</span></div><div class="line"></div><div class="line"><span class="comment"># Add empty schemas</span></div><div class="line"><span class="built_in">echo</span> <span class="string">'Adding casdba, dxdba and dtdba'</span></div><div class="line">psql -p <span class="variable">$DBPORT</span> <span class="_">-d</span> <span class="variable">$&#123;DBNAME&#125;</span>_temp -c <span class="string">"CREATE schema casdba; CREATE schema dxdba; CREATE schema dtdba;"</span></div><div class="line"></div><div class="line"><span class="comment"># Create tablefunc extension</span></div><div class="line"><span class="built_in">echo</span> <span class="string">'Create tablefunc extension'</span></div><div class="line">psql -p <span class="variable">$DBPORT</span> <span class="_">-d</span> <span class="variable">$&#123;DBNAME&#125;</span>_temp <span class="_">-f</span> /usr/share/pgsql/contrib/tablefunc.sql</div><div class="line"></div><div class="line"><span class="comment"># VACUUM ANALYZE THE DB</span></div><div class="line"><span class="built_in">echo</span> <span class="string">'VACUUM ANALYZE'</span></div><div class="line">psql -p <span class="variable">$DBPORT</span> <span class="_">-d</span> <span class="variable">$&#123;DBNAME&#125;</span>_temp -c <span class="string">"VACUUM ANALYZE"</span></div><div class="line"></div><div class="line"><span class="comment"># Drop database</span></div><div class="line"></div><div class="line"><span class="comment"># First, disconnect all connections</span></div><div class="line"><span class="built_in">echo</span> <span class="string">'Terminating connections to DB'</span></div><div class="line">psql <span class="_">-d</span> <span class="variable">$DBNAME</span> -p <span class="variable">$DBPORT</span> -c <span class="string">"SELECT pg_terminate_backend(procpid) FROM pg_stat_activity WHERE procpid &lt;&gt; pg_backend_pid() AND datname = '<span class="variable">$DBNAME</span>';"</span></div><div class="line"></div><div class="line"><span class="comment"># Drop DB</span></div><div class="line"><span class="built_in">echo</span> <span class="string">'Dropping DB'</span></div><div class="line">dropdb -p <span class="variable">$DBPORT</span> <span class="variable">$DBNAME</span></div><div class="line"></div><div class="line"><span class="comment"># Rename temp DB</span></div><div class="line"><span class="built_in">echo</span> <span class="string">'Renaming temp database'</span></div><div class="line">psql <span class="_">-d</span> habaridb -p <span class="variable">$DBPORT</span> -c <span class="string">"ALTER DATABASE <span class="variable">$&#123;DBNAME&#125;</span>_temp RENAME TO <span class="variable">$&#123;DBNAME&#125;</span>;"</span></div><div class="line"></div><div class="line"><span class="built_in">echo</span> <span class="string">'Finished with COINS DB refresh'</span></div><div class="line"><span class="built_in">echo</span> `date`</div></pre></td></tr></table></figure></p>


    </div>
    <footer class="article-footer">
      <a data-url="https://MRN-Code.github.io/2015/06/21/How-to-partially-retreat-from-a-database-upgrade/" data-id="citahhnhv0001zhw7ejwyw6yy" class="article-share-link">Share</a>


    </footer>
  </div>

</article>


    <article id="post-Standardizing-PostgresSQL-Instances" class="article article-type-post" itemscope itemprop="blogPost">
  <div class="article-meta">
    <a href="/2015/06/13/Standardizing-PostgresSQL-Instances/" class="article-date">
  <time datetime="2015-06-13T19:57:29.000Z" itemprop="datePublished">2015-06-13</time>
</a>

  </div>
  <div class="article-inner">


      <header class="article-header">


    <h1 itemprop="name">
      <a class="article-title" href="/2015/06/13/Standardizing-PostgresSQL-Instances/">Standardizing PostgresSQL Instances</a>
    </h1>


    <a href="/author/dwood"><h4>Dylan Wood</h4></a>


      </header>

    <div class="article-entry" itemprop="articleBody">

        <p>COINS uses a centralized PostgreSQL database. We have been so busy developing new features that we have not upgraded the database used by the COINS production application since 2010!<br>New feature requirements and a need for increased disk space on our production server are finally motivating us to upgrade to PostgreSQL 9.4.<br>While we are at it, we will upgrade the underlying virtual host to Ubuntu Server 14.04, with enough RAM to fit our rapidly growing datbase in memory.<br>Finally, it makes sense to lay some ground work to clean up our inconsistent use of database names and ports.</p>
<h2 id="Summary-of-changes"><a href="#Summary-of-changes" class="headerlink" title="Summary of changes:"></a>Summary of changes:</h2><table>
<thead>
<tr>
<th></th>
<th>Current Value</th>
<th>New Value </th>
</tr>
</thead>
<tbody>
<tr>
<td>OS</td>
<td>CentOS 5.5</td>
<td>Ubuntu Server 14.04</td>
</tr>
<tr>
<td>DBMS</td>
<td>PostgreSQL 8.4.5</td>
<td>PostgreSQL 9.4.2</td>
</tr>
<tr>
<td>RAM</td>
<td>16GB</td>
<td>48GB</td>
</tr>
<tr>
<td>CPU Cores</td>
<td>4</td>
<td>4</td>
</tr>
<tr>
<td>Recovery</td>
<td>Nightly pg_dump</td>
<td>WAL archiving for PITR (managed by <a href="http://pgbarman.org" target="_blank" rel="external">PG Barman</a></td>
</tr>
<tr>
<td>Replication</td>
<td>Daily pg_restore from nightly pg_dump</td>
<td>Hot Standby w/ WAL shipping</td>
</tr>
<tr>
<td>COINS DB name</td>
<td>postgres</td>
<td>coins</td>
</tr>
<tr>
<td>Port</td>
<td>6117</td>
<td>5432</td>
</tr>
<tr>
<td>Hostname</td>
<td>tesla.mind.unm.edu</td>
<td>proddbcoin.mind.unm.edu</td>
</tr>
<tr>
<td>Connection Pooling</td>
<td>none</td>
<td>pgbouncer</td>
</tr>
</tbody>
</table>
<h2 id="Justification"><a href="#Justification" class="headerlink" title="Justification"></a>Justification</h2><h3 id="Operating-System"><a href="#Operating-System" class="headerlink" title="Operating System"></a>Operating System</h3><p>CentOS has served us well for many years, but we have found that Ubuntu’s more up-to-date repositories allow us to stay with the herd as new features are released in packages we depend on (e.g. PostgreSQL, PHP5, Node.js, etc…)</p>
<h3 id="Postgres"><a href="#Postgres" class="headerlink" title="Postgres"></a>Postgres</h3><p>Simple: 8.4 is no longer supported. Also, new JSON functionality is really nice (e.g. row_to_json).</p>
<h3 id="RAM"><a href="#RAM" class="headerlink" title="RAM"></a>RAM</h3><p>When our current production database server was provisioned, 16GB was enough ram to hold two copies of the COINS database. The database is currently 24GB on disk, and growing fast. 48GB should buy us a little time.</p>
<h3 id="CPU-Cores"><a href="#CPU-Cores" class="headerlink" title="CPU Cores"></a>CPU Cores</h3><p>Postgres does not do anything special for multi-core environments. Instead, it relies on the operating system to destribute its child processes across the cores evenly. Our database has never been CPU bound, so we see no need to increase the number of cores at this point.</p>
<h3 id="Disaster-Recovery-and-Backups"><a href="#Disaster-Recovery-and-Backups" class="headerlink" title="Disaster Recovery and Backups"></a>Disaster Recovery and Backups</h3><p>We currently have a cron which performs a <code>pg_dump</code> of the production database every night, and stores the dump on our internal network storage at MRN. In the event of a total loss of our database server, we would be able to recover all changes made before midnight on the day of the failure. Utilizing WAL archiving will allow for Point in Time recovery, and could allow us to salvage data and changes made only minutes or seconds before the outage. In addition, it lays the ground work for a geographically distributed recovery system.</p>
<h3 id="Replication"><a href="#Replication" class="headerlink" title="Replication"></a>Replication</h3><p>In order to get COINS running as soon as possible after an outage, we have another production-ready database server running at all times. This database is refreshed every morning from the previous night’s <code>pg_dump</code> of production. Unfortunately, if the production database were to fail, users would loose access to data entered after the last <code>pg_dump</code>. Further, if we were able to salvage the data entered between the last <code>pg_dump</code> and the outage, we would need to somehow merge all of that data with data entered into the replication database after the outage. </p>
<p>The new system uses WAL streaming to replicate all changes made in production (even schema changes!). In the event that the production database were to fail, the replication database would likely be only a few records behind the production database. Aside from loosing much less data in the event of a failover, there are other benefits to having a nearly up-to-date copy of production lying around at all times:</p>
<ul>
<li>Backups can be made of the replication database, thus reducing the load on the production server during backup times</li>
<li>The replication database can be configured to handle read-only queries, further reducing the load on the master production database, and decreasing query time.</li>
</ul>
<h3 id="Database-name"><a href="#Database-name" class="headerlink" title="Database name"></a>Database name</h3><p>Back in 2010, the COINS team migrated from an Oracle Database to PostgreSQL. Our understanding of Postgres was still very limited, and we made some poor design decisions. One of these decisions was to use the default maintenance database as our primary DB. This does not directly cause any problems, but is generally a bad practice. </p>
<p>There are at least one dozen devices spread across the United States that connect to our production database, and rely on the current database name. Changing all of these touch points in a single go would be stressful and very risky (even after practice, and lots of planning, we could still miss or break a few devices). Updating the devices one at a time to utilize the new database name is therefore much more favorable. Doing so will allow us to cooperate with the device owners to come up with a time that will not negatively impact their work. </p>
<p><strong>The problem is:</strong> a Postgres database can only have one name and one port. We can overcome this by using a connection pooling tool called <a href="https://pgbouncer.github.io" target="_blank" rel="external">PGBouncer</a>. In addition to reducing the overhead involved with creating connections inside of PostgreSQL, PGBouncer also allows aliasing the database name. This means that some devices can connect to our database using the database name <em>postgres</em> while others can connect using the database name <em>coins</em>. </p>
<h3 id="Database-port"><a href="#Database-port" class="headerlink" title="Database port"></a>Database port</h3><p>Another one of the poor design decisions from back in 2010 was to use a non-standard port for PG. I believe that this was a <em>security through obscurity</em> decision. Not only does the obscurity cause issues with our own configuration, it pprovides no additional security against anyone who is able to port-scan on our network. Any security benefit that it might have given us is void as soon as I publish this article. </p>
<p>Changing the port is subject to the same pitfalls mentioned above, so we need a way to support both the legacy port and the new port simultaneously while we carefully update all devices. This can be accomplished using port forwarding within the database server</p>
<h3 id="Hostname"><a href="#Hostname" class="headerlink" title="Hostname"></a>Hostname</h3><p>Just to be consistent with our latest server naming convention, the production database should be called <strong>proddbcoin</strong>. Since we use static IP assignments in our DNS, this should be easy: We can direct all legacy and current hostnames to the same IP, allowing us to slowly migrate devices to the new hostname. In fact, most devices uses a public IP address to connect to our database, since they are not within our private network.</p>
<h3 id="Connection-Pooling"><a href="#Connection-Pooling" class="headerlink" title="Connection Pooling"></a>Connection Pooling</h3><p>I went over this a little in the database name section. The connection pooling approach prevents the overhead involved in creating a new connection each time a device, or our PHP server needs one. I also allows us to alias the database name for a smooth transition away from using the <em>postgres</em> database. Finally, it offers the benefits typically associated with a reverse proxy: the possibility of load ballancing across multiple servers, or switching the servers out behind the connection pool without interrupting service at all.</p>
<h2 id="Summary"><a href="#Summary" class="headerlink" title="Summary"></a>Summary</h2><p>The new COINS production database setup may seem a bit more complex than the one it is replacing, and it is. However, all of these complex pieces are being provisioned and configured using Ansible, so the steps can easily be repeated and tweaked. </p>
<p>Here is a diagram showing how both old and new connections strings can reach the same databse: <img src="/2015/06/13/Standardizing-PostgresSQL-Instances/network.png" alt="network.png" title=""></p>
<p><em>The database is slated to be replaced on Wednesday, June 17th. I will be practicing the deployment using Ansible in our staging environment until then</em></p>


    </div>
    <footer class="article-footer">
      <a data-url="https://MRN-Code.github.io/2015/06/13/Standardizing-PostgresSQL-Instances/" data-id="citahhniq000bzhw7ac0ft5kw" class="article-share-link">Share</a>


  <ul class="article-tag-list"><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/postgres-postgresql-pgbarman-pgbouncer-COINS3-0/">postgres postgresql pgbarman pgbouncer COINS3.0</a></li></ul>

    </footer>
  </div>

</article>


    <article id="post-Why-a-blog" class="article article-type-post" itemscope itemprop="blogPost">
  <div class="article-meta">
    <a href="/2015/06/13/Why-a-blog/" class="article-date">
  <time datetime="2015-06-13T19:01:14.000Z" itemprop="datePublished">2015-06-13</time>
</a>

  </div>
  <div class="article-inner">


      <header class="article-header">


    <h1 itemprop="name">
      <a class="article-title" href="/2015/06/13/Why-a-blog/">Why a blog</a>
    </h1>


    <a href="/author/dwood"><h4>Dylan Wood</h4></a>


      </header>

    <div class="article-entry" itemprop="articleBody">

        <p>“<strong>Why am I doing this?</strong>“ I try to ask myself this question at least once per hour while I am working to be sure that I am doing something that will contribute to our team’s goals.<br>Consequently, it makes sense to ask (and answer) this question now. There are a few justifications that come to mind.</p>
<h2 id="Internal-communication-and-continuity"><a href="#Internal-communication-and-continuity" class="headerlink" title="Internal communication and continuity"></a>Internal communication and continuity</h2><p>When working on a new project or feature, one of our team members will usually create a Google Doc, presentation or simple email that explains the design decisions made.<br>However, because there are a variety of venues for sharing this information (Gmail, Google Drive, Gitter, to name a few), it can be difficult to find it again later.<br>If this blog idea catches on with the rest of the team, this could become the centralized place to document design decisions.</p>
<h2 id="Sharing-with-the-community"><a href="#Sharing-with-the-community" class="headerlink" title="Sharing with the community"></a>Sharing with the community</h2><p>Many of the problems that we solve every day are not neuroimaging-specific. Instead, they are problems that web application engineers from all sorts of industries are faced with daily.<br>By placing our best practices and lessons learned in a public place, we may be able to help others avoid pitfals that we’ve already succumb to.<br>Further, as COINS becomes more open source, this blog may be the place that community contributers will look to find COINS-specific technical information.</p>
<p>The team that I work with does some pretty cool stuff, and I am excited that we will all be able to share it with anyone who is interested.</p>
<h2 id="Reflection"><a href="#Reflection" class="headerlink" title="Reflection"></a>Reflection</h2><p>Writing things down is a great way to process them. I have uncovered countless bugs by just documenting and justifying my changes.</p>
<p>Ok, now on to the first real post (Standardizing our PostgreSQL instances)…</p>


    </div>
    <footer class="article-footer">
      <a data-url="https://MRN-Code.github.io/2015/06/13/Why-a-blog/" data-id="citahhnis000czhw7oqj9s4ao" class="article-share-link">Share</a>


    </footer>
  </div>

</article>


</section>


            <aside id="sidebar">


  <div class="widget-wrap">
    <h3 class="widget-title">Recents</h3>
    <div class="widget recents">
      <ul>

          <li>
            <a href="/2016/09/14/Resizing-a-Numeric-Column-in-a-PostgreSQL-Table-without-Changing-Data/">Resizing a Numeric Column in a PostgreSQL Table without Changing Data</a>
          </li>

          <li>
            <a href="/2016/07/12/SQL-Lateral-Joins/">SQL Lateral Joins</a>
          </li>

          <li>
            <a href="/2015/12/29/Monitoring-PostgreSQL-Replication-Lag-with-Monit/">Monitoring PostgreSQL Replication Lag with Monit</a>
          </li>

          <li>
            <a href="/2015/07/04/Launch-Console-in-VMWare-Web-Client-on-Windows-10-Chrome-42/">Launching a Console in VMware Web Client on Windows 10, Chrome 42+</a>
          </li>

          <li>
            <a href="/2015/06/24/Managing-Application-Dates-and-Times/">Managing Application Dates and Times</a>
          </li>

      </ul>
    </div>
  </div>


  <div class="widget-wrap">
    <h3 class="widget-title">Tag Cloud</h3>
    <div class="widget tagcloud">
      <a href="/tags/Chrome/" style="font-size: 10px;">Chrome</a> <a href="/tags/PostgreSQL-Postgres-SQL-Databases/" style="font-size: 10px;">PostgreSQL, Postgres, SQL, Databases</a> <a href="/tags/VMware/" style="font-size: 10px;">VMware</a> <a href="/tags/postgres-postgresql-monit-m-monit-monitoring-COINS3-0/" style="font-size: 10px;">postgres postgresql monit m/monit monitoring COINS3.0</a> <a href="/tags/postgres-postgresql-pgbarman-pgbouncer-COINS3-0/" style="font-size: 10px;">postgres postgresql pgbarman pgbouncer COINS3.0</a> <a href="/tags/vSphere/" style="font-size: 10px;">vSphere</a>
    </div>
  </div>


  <div class="widget-wrap">
    <h3 class="widget-title">Archives</h3>
    <div class="widget">
      <ul class="archive-list"><li class="archive-list-item"><a class="archive-list-link" href="/archives/2016/09/">September 2016</a><span class="archive-list-count">1</span></li><li class="archive-list-item"><a class="archive-list-link" href="/archives/2016/07/">July 2016</a><span class="archive-list-count">1</span></li><li class="archive-list-item"><a class="archive-list-link" href="/archives/2015/12/">December 2015</a><span class="archive-list-count">1</span></li><li class="archive-list-item"><a class="archive-list-link" href="/archives/2015/07/">July 2015</a><span class="archive-list-count">1</span></li><li class="archive-list-item"><a class="archive-list-link" href="/archives/2015/06/">June 2015</a><span class="archive-list-count">4</span></li></ul>
    </div>
  </div>


</aside>


      </div>
      <div id="footer">

  <div class="outer">
    <div id="footer-info" class="inner">
      <a rel="license" href="http://creativecommons.org/licenses/by-nc/4.0/"><img alt="Creative Commons License" style="border-width:0" src="https://i.creativecommons.org/l/by-nc/4.0/88x31.png" /></a></br>
      &copy; 2016 Dylan Wood<br>
      Powered by <a href="http://hexo.io/" target="_blank">Hexo</a>
    </div>
  </div>
</div>
    </div>
    <nav id="mobile-nav">

    <a href="/" class="mobile-nav-link">Home</a>

    <a href="/archives" class="mobile-nav-link">Archives</a>

</nav>


<script src="//ajax.googleapis.com/ajax/libs/jquery/2.0.3/jquery.min.js"></script>


  <link rel="stylesheet" href="/fancybox/jquery.fancybox.css">
  <script src="/fancybox/jquery.fancybox.pack.js"></script>


<script src="/js/script.js"></script>

  </div>
</body>
</html>