-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcron.php
More file actions
192 lines (163 loc) · 7.61 KB
/
cron.php
File metadata and controls
192 lines (163 loc) · 7.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
<?php
// Prevent direct web access
if (php_sapi_name() !== 'cli') {
die('This script can only be run from the command line.');
}
require 'boot.php';
require_once 'vendor/j4mie/idiorm/idiorm.php';
require_once 'app/controllers/ETL.php';
require_once 'func.php';
// --- Database Configuration for Cron ---
// The cron environment doesn't seem to pick up the DB config automatically.
// We will explicitly configure it here using the values from config.php.
if (file_exists('config.php')) {
require_once 'config.php';
if (isset($config) && !empty($config['database_host'])) {
ORM::configure('mysql:host=' . $config['database_host'] . ';dbname=' . $config['database_dbname']);
ORM::configure('username', $config['database_user']);
ORM::configure('password', $config['database_password']);
} else {
die("Database configuration not found or is empty in config.php. Cron job cannot continue.\n");
}
} else {
die("config.php not found. Cron job cannot continue.\n");
}
// Set a default timezone if not set in php.ini
date_default_timezone_set('UTC');
echo "Cron job started at " . date('Y-m-d H:i:s') . "\n";
/**
* Sends an email notification for a failed ETL job.
*
* @param string $to_email The recipient email address.
* @param object $saved_query The saved query object.
* @param string $error_message The error message from the failed job.
* @return bool True if email was sent successfully, false otherwise.
*/
function sendFailureNotification($to_email, $saved_query, $error_message) {
if (empty($to_email) || !filter_var($to_email, FILTER_VALIDATE_EMAIL)) {
return false;
}
$subject = "ETL Job Failed: " . $saved_query->query_name;
$message = "Hello,\n\n";
$message .= "This is an automated notification to inform you that a scheduled ETL job has failed.\n\n";
$message .= "Job Details:\n";
$message .= "- Query Name: " . $saved_query->query_name . "\n";
$message .= "- Query ID: " . $saved_query->id . "\n";
$message .= "- Failure Time: " . date('Y-m-d H:i:s') . "\n";
$message .= "- Error Message: " . $error_message . "\n\n";
$message .= "Please check the ETL configuration and resolve the issue.\n\n";
$message .= "Best regards,\n";
$message .= "QueryCloud ETL System";
$headers = "From: noreply@querycloud.local\r\n";
$headers .= "Reply-To: noreply@querycloud.local\r\n";
$headers .= "X-Mailer: PHP/" . phpversion();
return mail($to_email, $subject, $message, $headers);
}
/**
* Checks if a scheduled ETL job is due to run based on its specific configuration.
*
* @param array $etl_config The decoded etl_config from the database.
* @param DateTime $now The current time.
* @return bool True if the job is due, false otherwise.
*/
function isJobDue($etl_config, DateTime $now) {
$schedule_type = $etl_config['schedule_type'] ?? 'inactive';
if ($schedule_type === 'inactive') {
return false;
}
$last_run_at = null;
if (isset($etl_config['last_run_at'])) {
try {
$last_run_at = new DateTime($etl_config['last_run_at']);
} catch (Exception $e) {
echo "Skipping job due to invalid last_run_at date format: " . $etl_config['last_run_at'] . "\n";
return false;
}
}
switch ($schedule_type) {
case 'minutely':
$interval = (int)($etl_config['schedule_interval'] ?? 5);
if (!$last_run_at) return true; // First run
$next_run_at = (clone $last_run_at)->add(new DateInterval("PT{$interval}M"));
return $now >= $next_run_at;
case 'hourly':
$allowed_hours = $etl_config['schedule_hours'] ?? [];
if (empty($allowed_hours)) return false;
$current_hour = (int)$now->format('G'); // 0-23 format
if (!in_array($current_hour, $allowed_hours)) {
return false; // Not a scheduled hour
}
if (!$last_run_at) return true; // Never run before, and it's a valid hour
// Check if the last run was in a different hour. This prevents multiple runs in the same hour.
return $last_run_at->format('Y-m-d H') !== $now->format('Y-m-d H');
case 'daily':
$allowed_days = $etl_config['schedule_days'] ?? [];
if (empty($allowed_days)) return false;
$current_day = (int)$now->format('j'); // 1-31 format
if (!in_array($current_day, $allowed_days)) {
return false; // Not a scheduled day
}
// Check if time is past midnight (it always will be if cron runs after 00:00)
if (!$last_run_at) return true; // Never run before, and it's a valid day
// Check if the last run was on a different day. Prevents multiple runs on the same day.
return $last_run_at->format('Y-m-d') !== $now->format('Y-m-d');
case 'weekly':
if (!$last_run_at) return true; // First run
$next_run_at = (clone $last_run_at)->add(new DateInterval('P1W'));
return $now >= $next_run_at;
default:
return false;
}
}
// --- Main Execution Logic ---
$now = new DateTime();
$saved_queries = ORM::for_table('saved_queries')->where_not_null('etl_config')->find_many();
echo "Found " . count($saved_queries) . " queries with ETL config.\n";
foreach ($saved_queries as $saved_query) {
$etl_config = json_decode($saved_query->etl_config, true);
if (json_last_error() !== JSON_ERROR_NONE || !is_array($etl_config)) {
echo "Skipping query ID {$saved_query->id} due to invalid ETL config JSON.\n";
continue;
}
if (isJobDue($etl_config, $now)) {
echo "Query ID {$saved_query->id} ('{$saved_query->query_name}') is due. Starting ETL process.\n";
$log = ORM::for_table('etl_logs')->create();
$log->saved_query_id = $saved_query->id;
$log->execution_time = date('Y-m-d H:i:s');
$log->status = 'running';
$log->message = 'ETL process started.';
$log->save();
// Execute the refactored ETL job logic
$result = ETL::executeEtlJob($saved_query);
$current_execution_time = date('Y-m-d H:i:s');
// Update the log with the result
if ($result['status'] === 'success' || $result['status'] === 'info') {
$log->status = 'success';
// Update the 'last_run_at' timestamp ONLY on successful execution
$etl_config['last_run_at'] = $current_execution_time;
$saved_query->etl_config = json_encode($etl_config);
$saved_query->save();
echo "ETL for query ID {$saved_query->id} completed successfully.\n";
} else { // 'error'
$log->status = 'failed';
echo "ETL for query ID {$saved_query->id} failed: " . $result['message'] . "\n";
// Send email notification if configured
$notification_email = $etl_config['notification_email'] ?? '';
if (!empty($notification_email)) {
echo "Sending failure notification to: {$notification_email}\n";
$email_sent = sendFailureNotification($notification_email, $saved_query, $result['message']);
if ($email_sent) {
echo "Email notification sent successfully.\n";
} else {
echo "Failed to send email notification.\n";
}
}
}
$log->message = $result['message'];
$log->ended_at = $current_execution_time;
$log->save();
echo "Finished processing for query ID {$saved_query->id}.\n\n";
}
}
echo "Cron job finished at " . date('Y-m-d H:i:s') . "\n";
?>