-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodel_analysis.py
More file actions
116 lines (97 loc) · 4.42 KB
/
model_analysis.py
File metadata and controls
116 lines (97 loc) · 4.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
"""
Model Analysis Script - Generates insights and visualizations
"""
import pandas as pd
import numpy as np
import json
import joblib
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
print("Loading training data and models...")
# Load data
df = pd.read_csv('StudentPerformanceFactors.csv')
# Preprocessing (same as train_advanced.py)
columns_to_drop = ['Student_ID', 'Student_Name', 'Enrollment_Number', 'Academic_Year',
'Admission_Date', 'Data_Entry_Date', 'Enrollment_Status',
'Previous_Scores_Semester_Wise', 'Section']
df_clean = df.drop(columns=[col for col in columns_to_drop if col in df.columns])
# Mappings
mappings = {
'Parental_Involvement': {'Low': 0, 'Medium': 1, 'High': 2},
'Access_to_Resources': {'Low': 0, 'Medium': 1, 'High': 2},
'Motivation_Level': {'Low': 0, 'Medium': 1, 'High': 2},
'Family_Income': {'Low': 0, 'Medium': 1, 'High': 2},
'Teacher_Quality': {'Low': 0, 'Medium': 1, 'High': 2},
'Parental_Education_Level': {'High School': 0, 'College': 1, 'Postgraduate': 2},
'Distance_from_Home': {'Near': 0, 'Moderate': 1, 'Far': 2},
'Extracurricular_Activities': {'No': 0, 'Yes': 1},
'Internet_Access': {'No': 0, 'Yes': 1},
'Learning_Disabilities': {'No': 0, 'Yes': 1},
'School_Type': {'Public': 0, 'Private': 1},
'Gender': {'Male': 0, 'Female': 1},
'Peer_Influence': {'Negative': 0, 'Neutral': 1, 'Positive': 2}
}
df_processed = df_clean.copy()
for col, mapping in mappings.items():
if col in df_processed.columns:
df_processed[col] = df_processed[col].map(mapping)
# Load models and data
models = joblib.load('all_models.pkl')
model = joblib.load('student_performance_model.pkl')
# Generate correlation analysis
print("\nGenerating correlation analysis...")
numeric_cols = df_processed.select_dtypes(include=[np.number]).columns
correlation_matrix = df_processed[numeric_cols].corr()
# Get top correlations with Exam_Score
top_correlations = correlation_matrix['Exam_Score'].sort_values(ascending=False)
print("\nTop 10 Features Correlated with Exam Score:")
for i, (feat, corr) in enumerate(top_correlations.head(11)[1:].items(), 1):
print(f" {i}. {feat}: {corr:.4f}")
# Generate grade-level analysis
print("\nGrade Level Performance Analysis:")
for grade in [1, 2, 3, 4]:
grade_data = df[df['Grade_Level'] == grade]
if len(grade_data) > 0:
mean_score = grade_data['Exam_Score'].mean()
std_score = grade_data['Exam_Score'].std()
count = len(grade_data)
print(f" Year {grade}: Mean={mean_score:.2f}, Std={std_score:.2f}, N={count}")
# Generate demographics analysis
print("\nDemographics Analysis:")
gender_analysis = df.groupby('Gender')['Exam_Score'].agg(['mean', 'std', 'count'])
print(" By Gender:")
print(gender_analysis)
school_analysis = df.groupby('School_Type')['Exam_Score'].agg(['mean', 'std', 'count'])
print("\n By School Type:")
print(school_analysis)
# Generate behavioral patterns
print("\nBehavioral Patterns:")
high_performers = df[df['Exam_Score'] >= 80]
low_performers = df[df['Exam_Score'] < 60]
print(f" High Performers (Score >= 80): {len(high_performers)} students")
print(f" Avg Hours Studied: {high_performers['Hours_Studied'].mean():.2f}")
print(f" Avg Attendance: {high_performers['Attendance'].mean():.2f}%")
print(f" Avg Sleep Hours: {high_performers['Sleep_Hours'].mean():.2f}")
print(f"\n Low Performers (Score < 60): {len(low_performers)} students")
print(f" Avg Hours Studied: {low_performers['Hours_Studied'].mean():.2f}")
print(f" Avg Attendance: {low_performers['Attendance'].mean():.2f}%")
print(f" Avg Sleep Hours: {low_performers['Sleep_Hours'].mean():.2f}")
# Save analysis summary
analysis_summary = {
'total_students': len(df),
'mean_score': float(df['Exam_Score'].mean()),
'std_score': float(df['Exam_Score'].std()),
'high_performers': len(high_performers),
'low_performers': len(low_performers),
'grade_levels': {
f'year_{i}': {
'count': int(len(df[df['Grade_Level'] == i])),
'mean_score': float(df[df['Grade_Level'] == i]['Exam_Score'].mean())
}
for i in [1, 2, 3, 4]
},
'top_correlations': {feat: float(corr) for feat, corr in top_correlations.head(11)[1:].items()}
}
with open('analysis_summary.json', 'w') as f:
json.dump(analysis_summary, f, indent=4)
print("\n✅ Analysis complete. Results saved to 'analysis_summary.json'")