Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,6 @@ venv/

.DS_Store

# claude
CLAUDE.md
/.claude
11 changes: 11 additions & 0 deletions backend/DbProvider.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,17 @@ function DbProvider({ children, startDate }) {
// Create db connection
const newConn = await newDb.connect();

// Create views so tables can be queried as requests_<year>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is 100% the right move to make, but it has some implications when using Huggingface as the dataset source. I believe it will try to download all datasets into memory before the user gets a chance to filter the dataset. This is what the network tab shows on your branch (regardless of what data source is specified in .env):

Click to see 311 map + network tab

Image

(Note: the failed fetches for dataset years that are pre-2024 are likely a separate problem, I'll try and surface that issue elsewhere)

I think ultimately we should be creating the views without actually loading data. We are simply using ... AS SELECT * FROM requestsYYYY.parquet so that we can automatically retrieve the column names. Maybe we can change the query to do that? Or we can try an approach that is independent from Huggingface and simply store the relevant columns (for each year...sigh) in a local file (as a javascript object, or just put a small file in the data folder).

for (let year = 2020; year <= currentYear; year++) {
try {
await newConn.query(
`CREATE VIEW requests_${year} AS SELECT * FROM 'requests${year}.parquet'`,
);
} catch (err) {
console.warn(`Failed to create view for year ${year}:`, err);
}
}

setDb(newDb);
setConn(newConn);
setWorker(newWorker);
Expand Down
10 changes: 7 additions & 3 deletions src/features/Map/index.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ class MapContainer extends React.Component {
endDate
);
} else {
requests = await getServiceRequestSocrata();
requests = await getServiceRequestSocrata(startDate, endDate);
}
return requests;
}
Expand Down Expand Up @@ -373,8 +373,12 @@ class MapContainer extends React.Component {
const typeId = getTypeIdFromTypeName(
request.RequestType ?? request.requesttype
);
const closedDate =
request.ClosedDate ?? moment(request.closeddate).valueOf();

// request.ClosedDate is undefined for Socrata data source (Socrata uses lowercase, PascalCase field doesn't exist)
// For open requests: request.closeddate is also undefined (absent from JSON when no close date)
// For closed requests: request.closeddate is a Date object (yup coerced it), so closedDate = moment(Date).valueOf() — also not null
const rawClosedDate = request.ClosedDate ?? request.closeddate;
const closedDate = rawClosedDate != null ? moment(rawClosedDate).valueOf() : null;
const createdDateMs = moment(
request.CreatedDate ?? request.createddate
).valueOf();
Expand Down
34 changes: 26 additions & 8 deletions src/utils/DataService.js
Original file line number Diff line number Diff line change
Expand Up @@ -60,17 +60,30 @@ const socrataServiceRequestSchema = object({

const srArraySchema = array().of(socrataServiceRequestSchema);

export async function getServiceRequestSocrata() {
export async function getServiceRequestSocrata(startDate, endDate) {
const dataLoadStartTime = performance.now();

try {
// Fetch current year SR data through Socrata API
const currentYear = String(new Date().getFullYear());
const currentYearFilename = `https://data.lacity.org/resource/${dataResources[currentYear]}.json`
const response = await fetch(
currentYearFilename
// Build list of years covered by the date range
const startYear = moment(startDate).year();
const endYear = moment(endDate).year();
const years = [];
for (let year = startYear; year <= endYear; year++) {
years.push(String(year));
}

// Fetch data for each year filtered by the requested date range.
// Without a $where clause, Socrata returns only 1000 records in internal-ID
// order (i.e. the oldest records first), which would all fail the client-side
// Mapbox date filter. We also raise $limit well above the default 1000 so that
// the full date range is covered.
const unvalidatedByYear = await Promise.all(
years.map((year) => {
const where = `createddate >= '${startDate}T00:00:00.000' AND createddate <= '${endDate}T23:59:59.999'`;
const url = `https://data.lacity.org/resource/${dataResources[year]}.json?$where=${encodeURIComponent(where)}&$limit=1000`;
return fetch(url).then((res) => res.json());
})
);
const unvalidatedSrs = await response.json();

const dataLoadEndTime = performance.now();
console.log(
Expand All @@ -80,7 +93,12 @@ export async function getServiceRequestSocrata() {
);

const mapLoadStartTime = performance.now();
const validatedSrs = await srArraySchema.validate(unvalidatedSrs);
const validatedByYear = await Promise.all(
unvalidatedByYear.map((unvalidatedSrs) =>
srArraySchema.validate(unvalidatedSrs)
)
);
const validatedSrs = validatedByYear.flat();
const mapLoadEndTime = performance.now();
console.log(
`Socrata map preparation time: ${Math.floor(
Expand Down
Loading