-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdeltasJSON.py
More file actions
171 lines (110 loc) · 4.54 KB
/
deltasJSON.py
File metadata and controls
171 lines (110 loc) · 4.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
#!/usr/bin/python
import sys,gzip
tableName =sys.argv[1]
fileName =sys.argv[2]
headerFileName =sys.argv[3]
localStoreId={}
thisAction={}
localStoreVals={}
localStoreOldVals={}
writeEvery=5
rowHeader="\n "
#rowHeader=" "
dataFieldSeparator=','
headerSeparator=','
################################################################################
### READING PART ###############################################################
################################################################################
# Delta file example:
# -"123","bla bla bla OLD","22"
# +"123","bla bla bla NEW","22"
fileIn = gzip.open(fileName, 'r')
for thisLine in fileIn:
thisSign=thisLine[0]
# Get rid of leading [+-] sign
thisLine=thisLine[1:]
# Get rid of first two lines (+++ and ---, resulting from diff)
if thisLine[0] == '-' or thisLine[0]=='+':
continue
# Get rid of non-ID lines
if not thisLine[1].isdigit():
continue
thisLine=thisLine.rstrip('\n')
thisId=thisLine.split(dataFieldSeparator)[0]
# New Id, default action
if not localStoreId.has_key(thisId):
if thisSign == '-':
thisAction[thisId]='D'
if thisSign == '+':
thisAction[thisId]='I'
# If i already have this id, it is an updated
else:
thisAction[thisId]='U'
# If minus sign, i save old values
if thisSign == '-':
localStoreOldVals[thisId]=thisLine
localStoreId[thisId]=thisId
localStoreVals[thisId]=thisLine
fileIn.close()
# Header, write once execute everywhere :-D
headerFile = open(headerFileName, 'r')
headerString=headerFile.readline().rstrip('\n')
headerFile.close()
headerList=headerString.split(headerSeparator)
headerLen=len(headerList)
################################################################################
### WRITING PART ###############################################################
################################################################################
processedRows=0
writeStr=""
for thisKey, thisValue in localStoreVals.items():
updStr=""
opType=""
writeStr = writeStr + "{"
writeStr = writeStr + rowHeader + "\"tableName\" :\""+tableName+"\""
writeStr = writeStr + rowHeader + ",\"sourceTablePk\": "+thisKey
writeStr = writeStr + rowHeader + ",\"headerCsv\": \""+headerString+"\""
writeStr = writeStr + rowHeader + ",\"operationType\": \""+thisAction[thisKey]+"\""
# updStr[:-1] to get rid of last ","
# Old Values Part
if thisAction[thisKey] == 'D' or thisAction[thisKey] == 'U':
oldValues=list(localStoreOldVals[thisKey].replace('"','').split(dataFieldSeparator))
writeStr = writeStr + rowHeader + ",\"oldValuesCsv\":["+localStoreOldVals[thisKey]+"]"
updStr=""
for fieldIdx in range(0,headerLen):
updStr= updStr+ "{\""+headerList[fieldIdx].lower()+"\": \""+ oldValues[fieldIdx] +"\"},"
writeStr = writeStr + rowHeader + ",\"oldValuesKv\": ["+ updStr[:-1]+"]"
updStr=""
for fieldIdx in range(0,headerLen):
updStr= updStr+ "\""+headerList[fieldIdx].lower()+"\": \""+ oldValues[fieldIdx] +"\","
writeStr = writeStr + rowHeader + ",\"oldValuesJson\": {"+ updStr[:-1]+"}"
# New Values here
if thisAction[thisKey] == 'I' or thisAction[thisKey] == 'U':
newValues=list(thisValue.replace('"','').split(dataFieldSeparator))
writeStr = writeStr + rowHeader + ",\"newValuesCsv\": ["+localStoreVals[thisKey]+"]"
updStr=""
for fieldIdx in range(0,headerLen):
updStr= updStr+ "{\""+headerList[fieldIdx].lower()+"\": \""+ newValues[fieldIdx] +"\"},"
writeStr = writeStr + rowHeader + ",\"newValuesKv\": ["+ updStr[:-1]+"]"
updStr=""
for fieldIdx in range(0,headerLen):
updStr= updStr+ "\""+headerList[fieldIdx].lower()+"\": \""+ newValues[fieldIdx] +"\","
writeStr = writeStr + rowHeader + ",\"newValuesJson\": {"+ updStr[:-1]+"}"
# Just for updates
if thisAction[thisKey] == 'U':
updStr=""
updStrOldNew=""
for fieldIdx in range(0,headerLen):
if newValues[fieldIdx] != oldValues[fieldIdx]:
updStr= updStr+ "\""+headerList[fieldIdx].lower()+"\":\""+ newValues[fieldIdx] +"\","
updStrOldNew= updStrOldNew+ "{\""+headerList[fieldIdx].lower()+"\": {\"is\": \""+ newValues[fieldIdx] +"\", \"was\": \""+oldValues[fieldIdx]+"\" }},"
writeStr = writeStr + rowHeader + ",\"updateJsonNewVals\":{"+updStr[:-1]+"}"
writeStr = writeStr + rowHeader + ",\"updateJsonOldNewVals\": ["+updStrOldNew[:-1]+"]"
writeStr = writeStr + "\n}\n"
processedRows = processedRows+1
#print writeStr
if processedRows > writeEvery:
print writeStr
processedRows=0
writeStr=""
print writeStr