-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprocess_csv.py
More file actions
102 lines (85 loc) · 3.63 KB
/
process_csv.py
File metadata and controls
102 lines (85 loc) · 3.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#!/usr/bin/env python3
import sys
import os
import csv
from datetime import datetime, timedelta
def process_csv(folder, location, month, processed_file_name, mode='year'):
"""
Filters a CSV for a specific month.
- In 'year' mode (default, for manual upload), it prints the year.
- In 'filename' mode (for automation), it prints the final filename.
For automation (mode='filename'), if month is negative, it calculates the previous month.
"""
input_path = os.path.join(folder, processed_file_name)
temp_output_path = os.path.join(folder, "temp_filtered.csv")
header = None
latest_year = None
rows_written = 0
# Handle previous month calculation for automation
if month < 1:
# Calculate previous month
today = datetime.now()
first_day_this_month = today.replace(day=1)
last_day_prev_month = first_day_this_month - timedelta(days=1)
month = last_day_prev_month.month
sys.stderr.write(f"📊 Filtering logs for: {last_day_prev_month.strftime('%B %Y')} (previous month)\n")
else:
sys.stderr.write(f"📊 Filtering logs for: Month {month}\n")
try:
with open(input_path, 'r', newline='', encoding='utf-8') as infile, \
open(temp_output_path, 'w', newline='', encoding='utf-8') as outfile:
reader = csv.reader((line.replace('\0', '') for line in infile))
writer = csv.writer(outfile)
try:
header = next(reader)
writer.writerow(header)
except StopIteration:
sys.exit(0) # Exit gracefully if empty, printing nothing
for row in reader:
try:
date_obj = datetime.strptime(row[1], '%Y-%m-%d')
if date_obj.month == month:
writer.writerow(row)
rows_written += 1
if latest_year is None or date_obj.year > latest_year:
latest_year = date_obj.year
except Exception:
continue # Skip invalid rows
except Exception as e:
# Send errors to stderr so they don't get captured by runner.sh
sys.stderr.write(f"Error processing CSV: {e}\n")
sys.exit(1)
if rows_written == 0:
os.remove(temp_output_path)
sys.stderr.write(f"⚠️ No log entries found for this period\n")
# Print nothing if no file was created
sys.exit(0)
# Final rename
final_name = f"{location}_{month:02d}_{latest_year}_access_logs.csv"
final_path = os.path.join(folder, final_name)
os.rename(temp_output_path, final_path)
sys.stderr.write(f"✅ Found {rows_written} log entries for uploading\n")
if mode == 'filename':
print(final_name)
else:
print(latest_year)
if __name__ == "__main__":
if len(sys.argv) not in [5, 6]:
sys.stderr.write("Usage: python process_csv.py <folder> <location> <month> <processed_file_name> [mode]\n")
sys.exit(1)
folder = sys.argv[1]
location = sys.argv[2]
try:
month = int(sys.argv[3])
# Allow 0 for automation (previous month calculation) or 1-12 for manual
if not (month == 0 or 1 <= month <= 12):
raise ValueError
except ValueError:
sys.stderr.write("Error: Month must be 0 (for previous month) or an integer between 1 and 12.\n")
sys.exit(1)
processed_file_name = sys.argv[4]
# Determine mode for output
mode = 'year'
if len(sys.argv) == 6 and sys.argv[5] == 'filename':
mode = 'filename'
process_csv(folder, location, month, processed_file_name, mode)