Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 38 additions & 1 deletion .github/workflows/data-processing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,24 @@ jobs:
- name: Install Python dependencies
run: python3 -m pip install -r ./requirements.txt



#========================================
# Setup Node.js for bibliography processing
#========================================
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '18'
cache: 'npm'
cache-dependency-path: bibtex_to_apa/package-lock.json
#========================================
# Install Node.js dependencies for bibliography processing
#========================================
- name: Install Node.js dependencies
run: |
cd bibtex_to_apa
npm install

#========================================
# Process contributor data using Tenzing script
#========================================
Expand Down Expand Up @@ -204,6 +221,15 @@ jobs:
fi
done

#========================================
# Generate APA lookup from bibliography
#========================================
- name: Generate APA lookup
continue-on-error: true # Continue even if this step fails
run: |
cd bibtex_to_apa
node bibtex_to_apa.js -o '../content/glossary/apa_lookup.json'

#========================================
# Process and generate glossary files
#========================================
Expand All @@ -213,6 +239,17 @@ jobs:
run: python3 content/glossary/_create_glossaries.py
# Execute the glossary script that generates glossary markdown files

- name: Check for missing references
if: always()
run: |
if [ -f "content/glossary/missing_references.txt" ]; then
echo "Missing references found:"
cat content/glossary/missing_references.txt
# Optionally fail the workflow or create an issue
else
echo "All references resolved successfully"
fi

#========================================
# Download Google Analytics data and validate
#========================================
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -268,3 +268,6 @@ gha-creds-*.json

# Tenzing failure reports (temporary files for CI)
scripts/forrt_contribs/tenzing_failures.json

# Bibtex to APA converter output
bibtex_to_apa/node_modules/
91 changes: 91 additions & 0 deletions bibtex_to_apa/bibtex_to_apa.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
const { Cite } = require('@citation-js/core');
require('@citation-js/plugin-bibtex');
require('@citation-js/plugin-csl');
const fs = require('fs');

const DEFAULT_INPUT = 'https://docs.google.com/document/d/1-KKsOYZWJ3LdgdO2b2uJsOG2AmUDaQBNqWVVTY2W4W8/edit?tab=t.0';
const DEFAULT_OUTPUT = 'apa_lookup.json';

async function fetchBibtex(input) {
if (!input.startsWith('http')) {
return fs.readFileSync(input, 'utf-8');
}

if (input.includes('docs.google.com')) {
const match = input.match(/\/d\/([a-zA-Z0-9_-]+)/);
if (!match) throw new Error('Invalid Google Doc URL');
const exportUrl = `https://docs.google.com/document/d/${match[1]}/export?format=txt`;
const response = await fetch(exportUrl);
if (!response.ok) throw new Error(`Failed to fetch: ${response.status}`);
let text = await response.text();
return text.replace(/\[[a-z]+\]/gi, ''); // Remove Google Docs comment markers
}

const response = await fetch(input);
if (!response.ok) throw new Error(`Failed to fetch: ${response.status}`);
return response.text();
}

function extractUrl(entry) {
if (entry.URL) return entry.URL;
if (entry.note) {
const match = entry.note.match(/https?:\/\/[^\s]+/);
if (match) return match[0];
}
return null;
}

function bibtexToApaJson(bibtexContent, includeUrl = true) {
const cite = new Cite(bibtexContent);
const result = {};

for (const entry of cite.data) {
const key = entry.id || entry['citation-key'];
let ref = new Cite(entry).format('bibliography', {
format: 'text',
template: 'apa',
lang: 'en-US'
}).trim();

if (includeUrl) {
const url = extractUrl(entry);
if (url && !url.includes('doi.org') && !ref.includes(url)) {
ref = ref.match(/https?:\/\/[^\s]+$/)
? `${ref} Retrieved from ${url}`
: ref.replace(/\.?$/, `. Retrieved from ${url}`);
}
}

result[key] = ref;
}

return result;
}

async function main() {
const args = process.argv.slice(2);
let input = DEFAULT_INPUT;
let output = DEFAULT_OUTPUT;
let includeUrl = true;

for (let i = 0; i < args.length; i++) {
if (args[i] === '-i' || args[i] === '--input') input = args[++i];
else if (args[i] === '-o' || args[i] === '--output') output = args[++i];
else if (args[i] === '--no-url') includeUrl = false;
else if (args[i] === '-h' || args[i] === '--help') {
console.log(`Usage: node bibtex_to_apa.js [-i INPUT] [-o OUTPUT] [--no-url]
Options:
-i, --input Input BibTeX (URL or file). Default: Google Doc
-o, --output Output JSON file. Default: apa_lookup.json
--no-url Don't append URLs to references`);
process.exit(0);
}
}

const bibtex = await fetchBibtex(input);
const apaJson = bibtexToApaJson(bibtex, includeUrl);
fs.writeFileSync(output, JSON.stringify(apaJson, null, 2));
console.log(`Wrote ${Object.keys(apaJson).length} references to ${output}`);
}

main().catch(console.error);
224 changes: 224 additions & 0 deletions bibtex_to_apa/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading