Skip to content

Commit e44229a

Browse files
committed
pre-commit passed
1 parent e525fac commit e44229a

File tree

4 files changed

+38
-28
lines changed

4 files changed

+38
-28
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,5 @@ repos:
2525
rev: v4.0.0-alpha.8
2626
hooks:
2727
- id: prettier
28+
files: '.*\.(md|markdown|json|yaml|yml|js|jsx|css|html)$'
2829
exclude: .venv

datafog/services/spark_service.py

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -23,51 +23,59 @@ class SparkService:
2323

2424
def __init__(self, master=None):
2525
self.master = master
26-
26+
2727
# Ensure pyspark is installed first
2828
self.ensure_installed("pyspark")
29-
29+
3030
# Now import necessary modules after ensuring pyspark is installed
3131
try:
3232
from pyspark.sql import DataFrame, SparkSession
3333
from pyspark.sql.functions import udf
3434
from pyspark.sql.types import ArrayType, StringType
35-
35+
3636
# Assign fields
3737
self.SparkSession = SparkSession
3838
self.DataFrame = DataFrame
3939
self.udf = udf
4040
self.ArrayType = ArrayType
4141
self.StringType = StringType
42-
42+
4343
# Create the spark session
4444
self.spark = self.create_spark_session()
4545
except ImportError as e:
46-
raise ImportError(f"Failed to import PySpark modules: {e}. "
47-
f"Make sure PySpark is installed correctly.")
46+
raise ImportError(
47+
f"Failed to import PySpark modules: {e}. "
48+
f"Make sure PySpark is installed correctly."
49+
)
4850

4951
def create_spark_session(self):
5052
# Check if we're running in a test environment
51-
in_test_env = 'PYTEST_CURRENT_TEST' in os.environ or 'TOX_ENV_NAME' in os.environ
52-
53+
in_test_env = (
54+
"PYTEST_CURRENT_TEST" in os.environ or "TOX_ENV_NAME" in os.environ
55+
)
56+
5357
# Set Java system properties to handle security manager issues
5458
# This is needed for newer Java versions
55-
os.environ['PYSPARK_SUBMIT_ARGS'] = '--conf spark.driver.allowMultipleContexts=true pyspark-shell'
56-
59+
os.environ["PYSPARK_SUBMIT_ARGS"] = (
60+
"--conf spark.driver.allowMultipleContexts=true pyspark-shell"
61+
)
62+
5763
# Create a builder with the app name
5864
builder = self.SparkSession.builder.appName("datafog")
59-
65+
6066
# Add configuration to work around security manager issues
6167
builder = builder.config("spark.driver.allowMultipleContexts", "true")
62-
builder = builder.config("spark.driver.extraJavaOptions", "-Djava.security.manager=allow")
63-
68+
builder = builder.config(
69+
"spark.driver.extraJavaOptions", "-Djava.security.manager=allow"
70+
)
71+
6472
# If master is specified, use it
6573
if self.master:
6674
builder = builder.master(self.master)
6775
# Otherwise, if we're in a test environment, use local mode
6876
elif in_test_env:
6977
builder = builder.master("local[1]")
70-
78+
7179
# Create and return the session
7280
return builder.getOrCreate()
7381

@@ -86,6 +94,7 @@ def ensure_installed(self, package_name):
8694
print(f"{package_name} installed successfully.")
8795
except subprocess.CalledProcessError as e:
8896
print(f"Failed to install {package_name}: {e}")
89-
raise ImportError(f"Could not install {package_name}. "
90-
f"Please install it manually with 'pip install {package_name}'.")
91-
97+
raise ImportError(
98+
f"Could not install {package_name}. "
99+
f"Please install it manually with 'pip install {package_name}'."
100+
)

notes/story-1.7-tkt.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,4 @@ Or directly with pytest:
2424

2525
```bash
2626
pytest -m "integration"
27-
```
27+
```

tests/test_spark_integration.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,11 @@ def spark_service():
1414
"""Create a shared SparkService instance for all tests."""
1515
# Initialize SparkService with explicit local mode
1616
service = SparkService(master="local[1]")
17-
17+
1818
yield service
19-
19+
2020
# Clean up after all tests
21-
if hasattr(service, 'spark') and service.spark is not None:
21+
if hasattr(service, "spark") and service.spark is not None:
2222
service.spark.stop()
2323

2424

@@ -30,15 +30,15 @@ def sample_json_data():
3030
{"name": "Jane Smith", "email": "jane.smith@example.com", "age": 25},
3131
{"name": "Bob Johnson", "email": "bob.johnson@example.com", "age": 40},
3232
]
33-
33+
3434
# Create a temporary file
3535
with tempfile.NamedTemporaryFile(suffix=".json", delete=False, mode="w") as f:
3636
for item in data:
3737
f.write(json.dumps(item) + "\n")
3838
temp_file = f.name
39-
39+
4040
yield temp_file
41-
41+
4242
# Clean up the temporary file after the test
4343
if os.path.exists(temp_file):
4444
os.remove(temp_file)
@@ -51,7 +51,7 @@ def test_spark_service_initialization(spark_service):
5151
assert spark_service.spark is not None
5252
assert spark_service.spark.sparkContext.appName == "datafog"
5353
assert spark_service.spark.sparkContext.master.startswith("local")
54-
54+
5555
# Verify that the necessary Spark classes are available
5656
assert spark_service.DataFrame is not None
5757
assert spark_service.SparkSession is not None
@@ -63,16 +63,16 @@ def test_spark_read_json(spark_service, sample_json_data):
6363
"""Test that SparkService can read JSON data in local mode."""
6464
# Read the JSON data
6565
result = spark_service.read_json(sample_json_data)
66-
66+
6767
# Verify the result
6868
assert len(result) == 3, f"Expected 3 rows, got {len(result)}"
69-
69+
7070
# PySpark Row objects have a __contains__ method and can be accessed like dictionaries
7171
# but they're not actually dictionaries
7272
assert all(hasattr(item, "name") for item in result), "Missing 'name' field"
7373
assert all(hasattr(item, "email") for item in result), "Missing 'email' field"
7474
assert all(hasattr(item, "age") for item in result), "Missing 'age' field"
75-
75+
7676
# Verify specific values
7777
names = [item.name for item in result]
7878
assert "John Doe" in names, f"Expected 'John Doe' in {names}"

0 commit comments

Comments
 (0)