diff --git a/.gitignore b/.gitignore
index 705cdad..abdd45d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,10 +6,7 @@
 2025-06-24_COSdata_validated.xlsx
 2025-06-23_COSdata_validated.xlsx
 testing.R
-<<<<<<< HEAD
 misc
 process_replicationnetwork_data.R
 2025-10-16_COSdata_validated.xlsx
-=======
 ~$*
->>>>>>> d40aa040e23190d65994a36b3c35bf9ba9afccce
diff --git a/fred-data-validation/.gitignore b/fred-data-validation/.gitignore
new file mode 100644
index 0000000..a4376bc
--- /dev/null
+++ b/fred-data-validation/.gitignore
@@ -0,0 +1,4 @@
+fred_venv/
+fred_xml_output/
+fred_pdfs/
+**/**/__pycache__/
diff --git a/fred-data-validation/README.md b/fred-data-validation/README.md
new file mode 100644
index 0000000..7127514
--- /dev/null
+++ b/fred-data-validation/README.md
@@ -0,0 +1,330 @@
+# FRED Data Validation Pipeline
+
+Automated validation pipeline for replication studies using LLMs, Crossref API, and GROBID for PDF processing.
+
+## Overview
+
+This project validates replication studies by:
+1. **Reference Matching**: Comparing original study references (`ref_o`) with replication references (`ref_r`) to determine if the replication explicitly, implicitly, or unclearly addresses the original study
+2. **Abstract Extraction**: Fetching abstracts from Crossref API or extracting from PDFs using GROBID when Crossref data is unavailable
+3. **Central Claim Validation**: Using LLMs to determine if claims from original studies are central to the research based on the title and abstract
+
+## Features
+
+- **Parallel Processing**: Multi-threaded execution for efficient processing of large datasets
+- **Multiple Abstract Sources**: Prioritizes Crossref API, falls back to PDF extraction via GROBID
+- **LLM-Powered Validation**: Uses GPT models with structured output for consistent validation results
+- **Comprehensive Logging**: Detailed progress tracking and error reporting
+- **CSV Export**: Saves abstracts separately for reuse
+
+## Prerequisites
+
+### Required Software
+- Python 3.12.2 or higher
+- pip (Python package manager)
+
+### Required Accounts/APIs
+- OpenAI API key (for GPT models)
+- GROBID server access (default: https://kermitt2-grobid.hf.space/)
+
+## Installation
+
+### 1. Clone the Repository
+
+```bash
+git clone <repository-url>
+cd fred-data
+```
+
+### 2. Create Virtual Environment
+
+```bash
+python3.12 -m venv fred_venv
+source fred_venv/bin/activate  # On macOS/Linux
+# OR
+# fred_venv\Scripts\activate  # On Windows
+```
+
+### 3. Install Dependencies
+
+```bash
+pip install -r requirements.txt
+```
+
+**Required packages:**
+```txt
+pandas
+openpyxl
+crossref-commons
+grobid-client-python
+langchain
+langchain-openai
+langchain-core
+pydantic
+```
+
+### 4. Set Up API Keys
+
+Set your OpenAI API key as an environment variable:
+
+```bash
+export OPENAI_API_KEY="your-api-key-here"
+```
+
+Or add it to your shell profile (~/.zshrc, ~/.bashrc):
+
+```bash
+echo 'export OPENAI_API_KEY="your-api-key-here"' >> ~/.zshrc
+source ~/.zshrc
+```
+
+## Project Structure
+
+```
+fred-data/
+├── requirements.txt                	# Python dependencies
+├── README.md                       	# This file
+├── 2025-10-22_COSdata_validated.xlsx  # Input data file
+├── fred_mini.csv  							# Input data file with titles
+├── fred_pdfs/                      	# Directory with PDF files
+│   └── *.pdf                       	# Original study PDFs
+├── output/                         	# GROBID XML output
+│   └── *.grobid.tei.xml           		# Extracted XML files
+└── fred_venv/                      	# Python virtual environment
+```
+
+## Input Data Format
+
+The Excel file (`2025-10-22_COSdata_validated.xlsx`) should contain:
+
+### Required Columns
+- `ref_o`: Original study reference (author, year, title)
+- `ref_r`: Replication study reference
+- `doi_o`: DOI of the original study
+- `claim_text_o`: Claim text from the original study
+- `file_o`: PDF filename (optional, for PDF processing)
+
+### Optional Columns
+All other columns from your dataset will be preserved in the output.
+
+## Usage
+
+### Basic Usage
+
+```bash
+python fred-data.py
+```
+
+### Processing Configuration
+
+Edit the script to adjust processing parameters:
+
+```python
+# Number of parallel workers (adjust based on API rate limits)
+max_workers = min(10, os.cpu_count() or 1)
+
+# Model selection
+llm = ChatOpenAI(
+    model="gpt-5-mini",  # or "gpt-4o-mini", "gpt-4", etc.
+    temperature=0.0
+)
+```
+
+### Test Mode
+
+To process only a subset of records for testing:
+
+```python
+# Process only first 10 records
+for index, row in df.iloc[:10].iterrows():
+```
+
+## Output Files
+
+The script generates three output files:
+
+### 1. Main Results Excel File
+**Filename**: `2025-10-22_COSdata_combined_validation_parallel.xlsx`
+
+**New Columns Added:**
+- `reference_match`: Classification (explicit/implicit/unclear)
+- `ref_match_confidence`: Confidence score (0.0-1.0)
+- `ref_match_evidence`: Supporting evidence from text
+- `ref_match_explanation`: Detailed reasoning
+- `abstract_source`: Where abstract was found (crossref/pdf/none)
+- `has_abstract`: Boolean flag
+- `abstract_text`: Full abstract text
+- `is_central_claim`: Whether claim is central to article
+- `claim_confidence`: Confidence score (0.0-1.0)
+- `claim_match_type`: How claim maps (exact/construct_mapping/peripheral/unclear)
+- `claim_key_evidence`: Supporting evidence from abstract
+- `claim_concerns`: Methodological concerns
+- `claim_explanation`: Detailed reasoning
+
+### 2. Abstracts CSV
+**Filename**: `2025-10-22_abstracts_parallel.csv`
+
+**Columns:**
+- `doi_o`: DOI of the original study
+- `abstract`: Full abstract text
+- `source`: Source of abstract (crossref/pdf)
+
+### 3. Console Output
+Real-time progress and statistics printed to console.
+
+## Validation Logic
+
+### Reference Match Classification
+
+1. **EXPLICIT**: `ref_r` contains author names AND/OR publication year from `ref_o`
+   - Example: "Replication of Finucane et al.'s (2000) study"
+
+2. **IMPLICIT**: `ref_r` mentions specific topic/construct from `ref_o` but not author/year
+   - Example: Original topic "implicit threat-related bias" → Replication "attention bias to social threat"
+
+3. **UNCLEAR**: `ref_r` doesn't mention authors/year or specific topic
+   - Example: Generic titles like "Many Labs 2"
+
+### Central Claim Validation
+
+Claims are evaluated as central if they:
+- Target the main research question mentioned in title/abstract
+- Map to core constructs described in the abstract
+- Could be tested by methods consistent with the abstract
+- Are emphasized in the title and/or abstract
+
+**Note**: Validation only occurs for non-explicit reference matches to save API calls.
+
+## Performance Optimization
+
+### Parallel Processing
+- Uses `ThreadPoolExecutor` for concurrent API calls
+- Default: 10 worker threads (configurable)
+- Thread-safe statistics tracking with locks
+
+### Caching
+- Checks for existing GROBID XML files before reprocessing PDFs
+- Reuses abstracts from previous runs if XML files exist
+
+### API Rate Limiting
+- Built-in delays between requests
+- Configurable worker count to respect API limits
+- Error handling and retry logic
+
+## Troubleshooting
+
+### Common Issues
+
+#### 1. Missing OpenAI API Key
+```
+Error: OpenAI API key not set
+```
+**Solution**: Export your API key as shown in Setup section
+
+#### 2. GROBID Processing Failures
+```
+Error processing PDF: Connection timeout
+```
+**Solution**: Check GROBID server status or use alternative server
+
+#### 3. Memory Issues with Large Datasets
+```
+MemoryError
+```
+**Solution**: Reduce `max_workers` or process in batches
+
+#### 4. Invalid JSON Output from LLM
+```
+Error: Invalid json output
+```
+**Solution**: Model may be returning malformed JSON. Check model compatibility or adjust prompt.
+
+### Debug Mode
+
+Enable verbose logging:
+
+```python
+import logging
+logging.basicConfig(level=logging.DEBUG)
+```
+
+## Advanced Configuration
+
+### Custom GROBID Server
+
+```python
+client = GrobidClient(grobid_server="http://your-grobid-server:8070")
+```
+
+### Using Different LLM Models
+
+```python
+# For cheaper processing
+llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.0)
+
+# For better accuracy
+llm = ChatOpenAI(model="gpt-4o", temperature=0.0)
+
+# For Anthropic Claude
+from langchain_anthropic import ChatAnthropic
+llm = ChatAnthropic(model="claude-3-sonnet-20240229")
+```
+
+### Batch Processing
+
+Process data in chunks:
+
+```python
+chunk_size = 100
+for i in range(0, len(df), chunk_size):
+    chunk_df = df.iloc[i:i+chunk_size]
+    # Process chunk_df
+```
+
+## Contributing
+
+### Code Style
+- Follow PEP 8
+- Use type hints (Python 3.12+ syntax)
+- Add docstrings to functions
+- Keep functions focused and testable
+
+### Testing
+
+Run with a small subset first:
+
+```python
+df = df.iloc[:10]  # Test with 10 records
+```
+
+## License
+
+This project is open source and available under the MIT License.
+
+## Citation
+
+If you use this pipeline in your research, please cite:
+
+```
+[Add citation information]
+```
+
+## Support
+
+For issues, questions, or contributions, please create an issue on GitHub.
+
+## Acknowledgments
+
+- GROBID for PDF processing
+- Crossref for metadata API
+- OpenAI for LLM capabilities
+- LangChain for LLM orchestration
+
+## Changelog
+
+### Version 1.0.0 (2025-10-23)
+- Initial release
+- Parallel processing implementation
+- Reference matching validation
+- Central claim validation
+- Abstract extraction from Crossref and PDFs
diff --git a/fred-data-validation/main.py b/fred-data-validation/main.py
new file mode 100644
index 0000000..89be169
--- /dev/null
+++ b/fred-data-validation/main.py
@@ -0,0 +1,144 @@
+import os
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from threading import Lock
+import pandas as pd
+from crossref.restful import Works, Etiquette
+from grobid_client.grobid_client import GrobidClient
+from validators.reference_validation import setup_reference_validation_chain
+from validators.claim_validation import setup_central_claim_validation_chain
+from utils.row_processor import process_single_row
+
+if __name__ == "__main__":
+    source_directory = 'fred_pdfs/'
+    my_etiquette = Etiquette('FRED_DATA', 'v.1.0', 'https://github.com/forrtproject/FReD-data', 'ksiziva+fredData@gmail.com')
+    works = Works(etiquette=my_etiquette)
+    df = pd.read_excel('2025-10-22_COSdata_validated.xlsx', sheet_name='Sheet 1')
+
+    print(f"Total records: {len(df)}")
+    print(f"Columns: {df.columns.tolist()}")
+
+    # Setup validation chains
+    print("\nSetting up validation chains...")
+    try:
+        reference_chain = setup_reference_validation_chain()
+        print("✓ Reference validation chain setup complete")
+    except Exception as e:
+        print(f"✗ Error setting up reference chain: {e}")
+        reference_chain = None
+
+    try:
+        claim_chain = setup_central_claim_validation_chain()
+        print("✓ Central claim validation chain setup complete")
+    except Exception as e:
+        print(f"✗ Error setting up claim chain: {e}")
+        claim_chain = None
+
+    # Add new columns for results
+    df['reference_match'] = ''
+    df['ref_match_confidence'] = 0.0
+    df['ref_match_evidence'] = ''
+    df['ref_match_explanation'] = ''
+    df['abstract_source'] = ''
+    df['has_abstract'] = False
+    df['abstract_text'] = ''
+    df['is_central_claim'] = False
+    df['claim_confidence'] = 0.0
+    df['claim_match_type'] = ''
+    df['claim_key_evidence'] = ''
+    df['claim_concerns'] = ''
+    df['claim_explanation'] = ''
+
+    abstracts_data = []
+
+    if 'ref_o' not in df.columns or 'ref_r' not in df.columns:
+        print("Required columns 'ref_o' or 'ref_r' not found!")
+        print("Available columns:", df.columns.tolist())
+    else:
+        client = GrobidClient(grobid_server="https://kermitt2-grobid.hf.space/")
+        stats_lock = Lock()
+        stats = {
+            'validations_completed': 0,
+            'claim_validations': 0,
+            'with_abstract': 0,
+            'abstracts_from_pdf': 0,
+            'errors': 0
+        }
+
+        print(f"\nProcessing {len(df)} records in parallel...")
+        max_workers = min(10, os.cpu_count() or 1)
+
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            future_to_row = {
+                executor.submit(
+                    process_single_row,
+                    (index, row),
+                    reference_chain,
+                    claim_chain,
+                    works,
+                    client,
+                    source_directory
+                ): index
+                for index, row in df.iterrows()
+            }
+
+            completed = 0
+            for future in as_completed(future_to_row):
+                completed += 1
+                index = future_to_row[future]
+                try:
+                    result = future.result()
+                    df.at[result['index'], 'reference_match'] = result['reference_match']
+                    df.at[result['index'], 'ref_match_confidence'] = result['ref_match_confidence']
+                    df.at[result['index'], 'ref_match_evidence'] = result['ref_match_evidence']
+                    df.at[result['index'], 'ref_match_explanation'] = result['ref_match_explanation']
+                    df.at[result['index'], 'abstract_source'] = result['abstract_source']
+                    df.at[result['index'], 'has_abstract'] = result['has_abstract']
+                    df.at[result['index'], 'abstract_text'] = result['abstract_text']
+                    df.at[result['index'], 'is_central_claim'] = result['is_central_claim']
+                    df.at[result['index'], 'claim_confidence'] = result['claim_confidence']
+                    df.at[result['index'], 'claim_match_type'] = result['claim_match_type']
+                    df.at[result['index'], 'claim_key_evidence'] = result['claim_key_evidence']
+                    df.at[result['index'], 'claim_concerns'] = result['claim_concerns']
+                    df.at[result['index'], 'claim_explanation'] = result['claim_explanation']
+                    if result['abstract_data']:
+                        with stats_lock:
+                            abstracts_data.append(result['abstract_data'])
+                    with stats_lock:
+                        if result['reference_match']:
+                            stats['validations_completed'] += 1
+                        if result['is_central_claim'] or result['claim_confidence'] > 0:
+                            stats['claim_validations'] += 1
+                        if result['abstract_source'] == 'crossref':
+                            stats['with_abstract'] += 1
+                        elif result['abstract_source'] == 'pdf':
+                            stats['abstracts_from_pdf'] += 1
+                        if result['error']:
+                            stats['errors'] += 1
+                except Exception as e:
+                    print(f"Error retrieving result for row {index}: {e}")
+                    with stats_lock:
+                        stats['errors'] += 1
+
+                if completed % 10 == 0:
+                    print(f"\nProgress: {completed}/{len(df)} records completed")
+
+        output_filename = '2025-10-22_COSdata_combined_validation_parallel.xlsx'
+        try:
+            df.to_excel(output_filename, index=False)
+            print(f"\n✓ Results saved to: {output_filename}")
+        except Exception as e:
+            print(f"✗ Error saving results: {e}")
+
+        if abstracts_data:
+            abstracts_df = pd.DataFrame(abstracts_data)
+            abstracts_csv_filename = '2025-10-22_abstracts_parallel.csv'
+            try:
+                abstracts_df.to_csv(abstracts_csv_filename, index=False, encoding='utf-8')
+                print(f"✓ Abstracts saved to: {abstracts_csv_filename}")
+                print(f"  Total abstracts collected: {len(abstracts_df)}")
+            except Exception as e:
+                print(f"✗ Error saving abstracts CSV: {e}")
+        print("\nProcessing statistics:")
+
+        for k, v in stats.items():
+            print(f"  {k}: {v}")
diff --git a/fred-data-validation/requirements.txt b/fred-data-validation/requirements.txt
new file mode 100644
index 0000000..e53e34d
--- /dev/null
+++ b/fred-data-validation/requirements.txt
@@ -0,0 +1,7 @@
+pandas>=2.2.0
+openpyxl>=3.1.0
+crossrefapi>=1.7.0
+grobid-client-python>=0.0.10
+langchain-core>=1.0.0
+langchain-openai>=1.0.0
+pydantic>=2.2.0
diff --git a/fred-data-validation/utils/__init.py b/fred-data-validation/utils/__init.py
new file mode 100644
index 0000000..e69de29
diff --git a/fred-data-validation/utils/row_processor.py b/fred-data-validation/utils/row_processor.py
new file mode 100644
index 0000000..6636bc5
--- /dev/null
+++ b/fred-data-validation/utils/row_processor.py
@@ -0,0 +1,171 @@
+import os
+import shutil
+import tempfile
+import traceback
+import pandas as pd
+from utils.xml_utils import check_abstract_in_xml
+from validators.reference_validation import validate_reference_match
+from validators.claim_validation import validate_central_claim
+
+def process_single_row(
+    row_data: tuple,
+    reference_chain,
+    claim_chain,
+    works,
+    client,
+    source_directory: str
+) -> dict:
+    """
+    Process a single row - designed to be run in parallel
+    Returns a dictionary with all results for this row
+    """
+    index, row = row_data
+    result = {
+        'index': index,
+        'reference_match': '',
+        'ref_match_confidence': 0.0,
+        'ref_match_evidence': '',
+        'ref_match_explanation': '',
+        'abstract_source': '',
+        'has_abstract': False,
+        'abstract_text': '',
+        'is_central_claim': False,
+        'claim_confidence': 0.0,
+        'claim_match_type': '',
+        'claim_key_evidence': '',
+        'claim_concerns': '',
+        'claim_explanation': '',
+        'abstract_data': None,
+        'error': None
+    }
+
+    temp_dir = None
+
+    try:
+        print(f"[Row {index + 1}] Starting processing...")
+
+        # STEP 1: Reference matching
+        if (pd.notna(row['ref_o']) and str(row['ref_o']).strip() and
+            pd.notna(row['ref_r']) and str(row['ref_r']).strip()):
+
+            ref_o_text = str(row['ref_o']).strip()
+            ref_r_text = str(row['ref_r']).strip()
+
+            if reference_chain:
+                reference_match, confidence, evidence, explanation = validate_reference_match(
+                    reference_chain, ref_o_text, ref_r_text
+                )
+                
+                result['reference_match'] = reference_match
+                result['ref_match_confidence'] = confidence
+                result['ref_match_evidence'] = evidence
+                result['ref_match_explanation'] = explanation
+
+                print(f"[Row {index + 1}] Reference: {reference_match.upper()} ({confidence:.2f})")
+
+                # STEP 2: If NOT explicit, fetch abstract and validate claim
+                if reference_match != "explicit":
+                    abstract_text = ""
+                    abstract_source = "none"
+                    crossref_result = None
+
+                    # Try Crossref
+                    try:
+                        crossref_result = works.doi(row['doi_o'])
+                        if 'abstract' in crossref_result and crossref_result['abstract'] and crossref_result['abstract'].strip():
+                            abstract_text = crossref_result['abstract'].strip()
+                            abstract_source = "crossref"
+                            print(f"[Row {index + 1}] Abstract from Crossref")
+                    except Exception as crossref_error:
+                        print(f"[Row {index + 1}] Crossref error: {crossref_error}")
+
+                    # Try PDF if no Crossref abstract
+                    if (not abstract_text and 'file_o' in row and 
+                        pd.notna(row['file_o']) and str(row['file_o']).strip()):
+
+                        try:
+                            pdf_filename = str(row['file_o']).strip()
+                            xml_filename = pdf_filename.replace('.pdf', '.grobid.tei.xml')
+                            xml_path = os.path.join('fred_xml_output', xml_filename)
+
+                            if os.path.exists(xml_path):
+                                abstract_found, abstract_content = check_abstract_in_xml(xml_path)
+                                if abstract_found:
+                                    abstract_text = abstract_content
+                                    abstract_source = "pdf"
+                                    print(f"[Row {index + 1}] Abstract from existing XML")
+                            else:
+                                temp_dir = tempfile.mkdtemp()
+                                try:
+                                    source_file_path = os.path.join(source_directory, pdf_filename)
+                                    if os.path.exists(source_file_path):
+                                        temp_file_path = os.path.join(temp_dir, pdf_filename)
+                                        shutil.copy2(source_file_path, temp_file_path)
+                                        client.process("processFulltextDocument", temp_dir, "fred_xml_output")
+
+                                        if os.path.exists(xml_path):
+                                            abstract_found, abstract_content = check_abstract_in_xml(xml_path)
+                                            if abstract_found:
+                                                abstract_text = abstract_content
+                                                abstract_source = "pdf"
+                                                print(f"[Row {index + 1}] Abstract from new XML")
+                                finally:
+                                    if temp_dir and os.path.exists(temp_dir):
+                                        shutil.rmtree(temp_dir)
+                                        temp_dir = None
+                        except Exception as pdf_error:
+                            print(f"[Row {index + 1}] PDF error: {pdf_error}")
+
+                    # Update results
+                    result['abstract_source'] = abstract_source
+                    result['has_abstract'] = bool(abstract_text)
+                    result['abstract_text'] = abstract_text
+
+                    # Store abstract data for CSV
+                    if abstract_text and 'doi_o' in row and pd.notna(row['doi_o']):
+                        result['abstract_data'] = {
+                            'doi_o': str(row['doi_o']).strip(),
+                            'abstract': abstract_text,
+                            'source': abstract_source
+                        }
+
+                    # Validate central claim
+                    if (abstract_text and claim_chain and 
+                        pd.notna(row['claim_text_o']) and str(row['claim_text_o']).strip()):
+
+                        claim_text = str(row['claim_text_o']).strip()
+
+                        # Get title
+                        title = "Title not available"
+                        if crossref_result and 'title' in crossref_result:
+                            if isinstance(crossref_result['title'], list):
+                                title = crossref_result['title'][0]
+                            else:
+                                title = str(crossref_result['title'])
+
+                        is_central, claim_conf, match_type, claim_evidence, concerns, claim_explanation = validate_central_claim(
+                            claim_chain, claim_text, abstract_text, title, row['doi_o']
+                        )
+
+                        result['is_central_claim'] = is_central
+                        result['claim_confidence'] = claim_conf
+                        result['claim_match_type'] = match_type
+                        result['claim_key_evidence'] = claim_evidence
+                        result['claim_concerns'] = concerns
+                        result['claim_explanation'] = claim_explanation
+                        
+                        status = "CENTRAL" if is_central else "NOT CENTRAL"
+                        print(f"[Row {index + 1}] Claim: {status} ({match_type}, {claim_conf:.2f})")
+                else:
+                    print(f"[Row {index + 1}] Explicit match - skipping claim validation")
+
+        print(f"[Row {index + 1}] ✓ Complete")
+
+    except Exception as e:
+        error_msg = f"Error: {str(e)}\n{traceback.format_exc()}"
+        result['error'] = error_msg
+        print(f"[Row {index + 1}] ✗ {error_msg}")
+        if temp_dir and os.path.exists(temp_dir):
+            shutil.rmtree(temp_dir)
+
+    return result
diff --git a/fred-data-validation/utils/xml_utils.py b/fred-data-validation/utils/xml_utils.py
new file mode 100644
index 0000000..27a7b29
--- /dev/null
+++ b/fred-data-validation/utils/xml_utils.py
@@ -0,0 +1,75 @@
+import xml.etree.ElementTree as ET
+
+def check_abstract_in_xml(xml_file_path):
+    """Check if XML contains abstract or summary content"""
+    try:
+        tree = ET.parse(xml_file_path)
+        root = tree.getroot()
+
+        abstract_found = False
+        abstract_content = ""
+
+        namespaces = {'tei': 'http://www.tei-c.org/ns/1.0'}
+
+        for _, ns_uri in namespaces.items():
+            xpath_patterns = [
+                f".//{{{ns_uri}}}abstract",
+                f".//{{{ns_uri}}}div[@type='abstract']",
+                f".//{{{ns_uri}}}div[@subtype='abstract']",
+                f".//{{{ns_uri}}}summary",
+                f".//{{{ns_uri}}}div[@type='summary']"
+            ]
+            for pattern in xpath_patterns:
+                elements = root.findall(pattern)
+                for element in elements:
+                    text_content = get_element_text(element)
+                    if text_content and text_content.strip():
+                        abstract_found = True
+                        abstract_content = text_content.strip()
+                        break
+                if abstract_found:
+                    break
+            if abstract_found:
+                break
+
+        if not abstract_found:
+            xpath_patterns = [
+                ".//abstract",
+                ".//div[@type='abstract']",
+                ".//div[@subtype='abstract']",
+                ".//summary",
+                ".//div[@type='summary']"
+            ]
+            for pattern in xpath_patterns:
+                elements = root.findall(pattern)
+                for element in elements:
+                    text_content = get_element_text(element)
+                    if text_content and text_content.strip():
+                        abstract_found = True
+                        abstract_content = text_content.strip()
+                        break
+                if abstract_found:
+                    break
+
+        return abstract_found, abstract_content
+
+    except Exception as e:
+        print(f"Error parsing XML {xml_file_path}: {e}")
+        return False, ""
+
+
+def get_element_text(element):
+    """Recursively extract all text content from an element and its children"""
+    text_parts = []
+
+    if element.text:
+        text_parts.append(element.text.strip())
+
+    for child in element:
+        child_text = get_element_text(child)
+        if child_text:
+            text_parts.append(child_text)
+        if child.tail:
+            text_parts.append(child.tail.strip())
+
+    return ' '.join(filter(None, text_parts))
diff --git a/fred-data-validation/validators/__init__.py b/fred-data-validation/validators/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/fred-data-validation/validators/claim_validation.py b/fred-data-validation/validators/claim_validation.py
new file mode 100644
index 0000000..40e6c70
--- /dev/null
+++ b/fred-data-validation/validators/claim_validation.py
@@ -0,0 +1,71 @@
+from typing import Tuple
+from pydantic import BaseModel, Field
+from langchain_openai import ChatOpenAI
+from langchain_core.prompts import ChatPromptTemplate
+
+class CentralClaimValidation(BaseModel):
+    """Structure for central claim validation results"""
+    is_central_claim: bool = Field(description="Whether the claim is central")
+    confidence: float = Field(ge=0.0, le=1.0)
+    match_type: str = Field(description="How the claim maps: 'exact', 'construct_mapping', 'peripheral', or 'unclear'")
+    key_evidence: str = Field(description="Key sentence(s) from the title/abstract that support or contradict centrality")
+    concerns: str = Field(description="Methodological or mapping concerns (if any)")
+    explanation: str = Field(description="Detailed reasoning for the decision")
+
+
+def setup_central_claim_validation_chain():
+    """Setup LangChain for central claim validation"""
+    llm = ChatOpenAI(model="gpt-5-mini", temperature=0.0)
+    llm_structured = llm.with_structured_output(CentralClaimValidation)
+
+    prompt = ChatPromptTemplate.from_messages([
+        ("system", """You are an expert in research methodology.
+			Your task: Decide whether the provided claim from an original study is a CENTRAL CLAIM of that article based on the TITLE and ABSTRACT.
+
+			Definition — Central Claim:
+			- Central claims are the MAIN research questions or PRIMARY FINDINGS that the article emphasizes
+			- They are usually referenced in the TITLE and explicitly mentioned in the ABSTRACT
+			- The abstract usually already mentions results regarding these central claims
+			- Central claims can be tested with specific methods
+
+			Evaluation Guidance:
+			- Focus ONLY on TITLE and ABSTRACT
+			- Be conservative: Return true ONLY when there is clear alignment
+
+			Output: Return ONLY a valid JSON object matching the schema. No extra text."""),
+
+					("human", """
+			ORIGINAL STUDY:
+			Title: {title}
+			DOI: {doi}
+			Abstract: {abstract}
+
+			CLAIM (from original paper): {claim}
+
+			Task: Is this claim a central claim of the article based on title and abstract?
+			Return only the JSON object.""")
+    ])
+
+    chain = prompt | llm_structured
+    return chain
+
+
+def validate_central_claim(
+    chain, claim: str, abstract: str,
+    title: str, doi: str
+    ) -> Tuple[bool, float, str, str, str, str]:
+    """Validate if claim is central to the article"""
+    import json
+    try:
+        raw_result = chain.invoke({"claim": claim, "abstract": abstract, "title": title, "doi": doi})
+        if isinstance(raw_result, CentralClaimValidation):
+            parsed = raw_result
+        elif isinstance(raw_result, dict):
+            parsed = CentralClaimValidation.parse_obj(raw_result)
+        elif isinstance(raw_result, str):
+            parsed = CentralClaimValidation.parse_obj(json.loads(raw_result))
+        else:
+            parsed = CentralClaimValidation.parse_obj(dict(raw_result))
+        return parsed.is_central_claim, parsed.confidence, parsed.match_type, parsed.key_evidence, parsed.concerns, parsed.explanation
+    except Exception as e:
+        return False, 0.0, "error", f"Error: {e}", "Processing error", ""
diff --git a/fred-data-validation/validators/reference_validation.py b/fred-data-validation/validators/reference_validation.py
new file mode 100644
index 0000000..7d40e8d
--- /dev/null
+++ b/fred-data-validation/validators/reference_validation.py
@@ -0,0 +1,64 @@
+from typing import Tuple
+from pydantic import BaseModel, Field
+from langchain_openai import ChatOpenAI
+from langchain_core.prompts import ChatPromptTemplate
+
+class ReferenceMatchValidation(BaseModel):
+    """Structure for reference matching validation results"""
+    reference_match: str = Field(description="Classification: 'explicit', 'implicit', or 'unclear'")
+    confidence: float = Field(ge=0.0, le=1.0)
+    key_evidence: str = Field(description="Specific text from ref_r that supports the classification")
+    explanation: str = Field(description="Detailed reasoning for the classification decision")
+
+
+def setup_reference_validation_chain():
+    """Setup LangChain for reference matching validation"""
+    llm = ChatOpenAI(model="gpt-5-mini", temperature=0.0)
+    llm_structured = llm.with_structured_output(ReferenceMatchValidation)
+
+    prompt = ChatPromptTemplate.from_messages([
+        ("system", """You are an expert in academic citation analysis.
+		Your task: Determine if ref_r (replication reference) clearly indicates that it addresses ref_o (original reference).
+
+		Classification rules:
+		1. EXPLICIT: The title/text of ref_r contains the author name(s) AND/OR publication year from ref_o.
+		2. IMPLICIT: The title/text of ref_r does NOT mention author/year BUT does contain the specific, unambiguous topic, effect name, or key construct from ref_o.
+		3. UNCLEAR: The title/text of ref_r does NOT mention authors/year AND does NOT mention the specific topic from ref_o.
+
+		Be conservative: only classify as explicit/implicit when there is clear textual evidence.
+
+		Output: Return ONLY a valid JSON object matching the schema. No extra text."""),
+
+				("human", """
+		ORIGINAL REFERENCE (ref_o):
+		{ref_o}
+
+		REPLICATION REFERENCE (ref_r):
+		{ref_r}
+
+		Task: Does ref_r's text clearly indicate it addresses ref_o?
+
+		Classify as: 'explicit', 'implicit', or 'unclear'
+		Return only the JSON object.""")
+    ])
+
+    chain = prompt | llm_structured
+    return chain
+
+
+def validate_reference_match(chain, ref_o: str, ref_r: str) -> Tuple[str, float, str, str]:
+    """Validate if ref_r clearly addresses ref_o"""
+    import json
+    try:
+        raw_result = chain.invoke({"ref_o": ref_o, "ref_r": ref_r})
+        if isinstance(raw_result, ReferenceMatchValidation):
+            parsed = raw_result
+        elif isinstance(raw_result, dict):
+            parsed = ReferenceMatchValidation.parse_obj(raw_result)
+        elif isinstance(raw_result, str):
+            parsed = ReferenceMatchValidation.parse_obj(json.loads(raw_result))
+        else:
+            parsed = ReferenceMatchValidation.parse_obj(dict(raw_result))
+        return parsed.reference_match, parsed.confidence, parsed.key_evidence, parsed.explanation
+    except Exception as e:
+        return "error", 0.0, f"Error: {e}", "Processing error"