schedule-dedupe.ts
1 import { CronExpressionParser } from 'cron-parser' 2 import type { ScheduleType } from '@/types' 3 import { dedup } from '@/lib/shared-utils' 4 5 export type ScheduleLike = { 6 id?: string 7 name?: string | null 8 agentId?: string | null 9 taskPrompt?: string | null 10 scheduleType?: ScheduleType | string | null 11 cron?: string | null 12 intervalMs?: number | null 13 runAt?: number | null 14 status?: string | null 15 updatedAt?: number | null 16 createdAt?: number | null 17 createdByAgentId?: string | null 18 createdInSessionId?: string | null 19 } 20 21 export interface ScheduleDuplicateCandidate { 22 id?: string | null 23 agentId?: string | null 24 taskPrompt?: string | null 25 scheduleType?: ScheduleType | string | null 26 cron?: string | null 27 intervalMs?: number | null 28 runAt?: number | null 29 createdByAgentId?: string | null 30 createdInSessionId?: string | null 31 } 32 33 export interface FindDuplicateScheduleOptions { 34 ignoreId?: string | null 35 includeStatuses?: string[] 36 creatorScope?: { 37 agentId?: string | null 38 sessionId?: string | null 39 } | null 40 } 41 42 interface ScheduleSignature { 43 id: string 44 agentId: string 45 taskPrompt: string 46 promptTokens: string[] 47 scheduleType: ScheduleType 48 cron: string 49 intervalMs: number | null 50 runAt: number | null 51 } 52 53 type ScheduleMatchKind = 'exact' | 'fuzzy' 54 55 const PROMPT_STOPWORDS = new Set([ 56 'a', 57 'an', 58 'and', 59 'any', 60 'at', 61 'back', 62 'by', 63 'check', 64 'for', 65 'from', 66 'if', 67 'in', 68 'into', 69 'me', 70 'my', 71 'of', 72 'on', 73 'once', 74 'please', 75 'remind', 76 'report', 77 'task', 78 'the', 79 'this', 80 'to', 81 'up', 82 'update', 83 'updates', 84 'with', 85 ]) 86 87 const ONCE_MATCH_WINDOW_MS = 15 * 60 * 1000 88 89 function normalizeString(value: unknown): string { 90 return typeof value === 'string' ? value.trim() : '' 91 } 92 93 function normalizePrompt(value: unknown): string { 94 const text = normalizeString(value) 95 if (!text) return '' 96 return text.replace(/\s+/g, ' ').trim().toLowerCase() 97 } 98 99 function normalizePromptToken(token: string): string { 100 let normalized = token 101 if (normalized.length > 4 && normalized.endsWith('ies')) normalized = `${normalized.slice(0, -3)}y` 102 else if (normalized.length > 5 && normalized.endsWith('ing')) normalized = normalized.slice(0, -3) 103 else if (normalized.length > 4 && normalized.endsWith('ed')) normalized = normalized.slice(0, -2) 104 else if (normalized.length > 3 && normalized.endsWith('s') && !normalized.endsWith('ss')) normalized = normalized.slice(0, -1) 105 return normalized 106 } 107 108 function tokenizePrompt(value: unknown): string[] { 109 const normalized = normalizePrompt(value).replace(/[^a-z0-9]+/g, ' ') 110 if (!normalized) return [] 111 return normalized 112 .split(' ') 113 .map((token) => normalizePromptToken(token.trim())) 114 .filter((token) => token.length > 0) 115 .filter((token) => token.length > 2 || ['ai', 'uk', 'us', 'eu'].includes(token)) 116 .filter((token) => !PROMPT_STOPWORDS.has(token)) 117 } 118 119 function normalizeCron(value: unknown): string { 120 const cron = normalizeString(value) 121 if (!cron) return '' 122 return cron.replace(/\s+/g, ' ').trim() 123 } 124 125 function normalizePositiveInt(value: unknown): number | null { 126 const parsed = typeof value === 'number' 127 ? value 128 : typeof value === 'string' 129 ? Number.parseInt(value, 10) 130 : Number.NaN 131 if (!Number.isFinite(parsed)) return null 132 const intVal = Math.trunc(parsed) 133 return intVal > 0 ? intVal : null 134 } 135 136 function normalizeScheduleType(value: unknown): ScheduleType { 137 if (value === 'cron' || value === 'once' || value === 'interval') return value 138 return 'interval' 139 } 140 141 function toSignature(raw: ScheduleLike | ScheduleDuplicateCandidate): ScheduleSignature { 142 return { 143 id: normalizeString(raw.id), 144 agentId: normalizeString(raw.agentId), 145 taskPrompt: normalizePrompt(raw.taskPrompt), 146 promptTokens: tokenizePrompt(raw.taskPrompt), 147 scheduleType: normalizeScheduleType(raw.scheduleType), 148 cron: normalizeCron(raw.cron), 149 intervalMs: normalizePositiveInt(raw.intervalMs), 150 runAt: normalizePositiveInt(raw.runAt), 151 } 152 } 153 154 function cadenceKey(signature: ScheduleSignature): string { 155 if (signature.scheduleType === 'cron') return `cron:${signature.cron || ''}` 156 if (signature.scheduleType === 'interval') return `interval:${signature.intervalMs ?? ''}` 157 if (signature.scheduleType === 'once') return `once:${signature.runAt ?? ''}` 158 return signature.scheduleType 159 } 160 161 export function getScheduleSignatureKey(input: ScheduleLike | ScheduleDuplicateCandidate): string { 162 const signature = toSignature(input) 163 if (!signature.agentId || !signature.taskPrompt) return '' 164 if (!sameCadence(signature, signature)) return '' 165 return `${signature.agentId}::${signature.taskPrompt}::${signature.scheduleType}::${cadenceKey(signature)}` 166 } 167 168 function sameCadence(a: ScheduleSignature, b: ScheduleSignature): boolean { 169 if (a.scheduleType !== b.scheduleType) return false 170 if (a.scheduleType === 'cron') return a.cron !== '' && a.cron === b.cron 171 if (a.scheduleType === 'interval') return a.intervalMs != null && a.intervalMs === b.intervalMs 172 if (a.scheduleType === 'once') { 173 if (a.runAt == null || b.runAt == null) return false 174 return Math.abs(a.runAt - b.runAt) <= 1000 175 } 176 return false 177 } 178 179 function tryResolveCronIntervalMs(cron: string): number | null { 180 if (!cron) return null 181 try { 182 const interval = CronExpressionParser.parse(cron, { 183 currentDate: new Date('2026-01-01T00:00:00.000Z'), 184 }) 185 const first = interval.next().getTime() 186 const second = interval.next().getTime() 187 const diff = second - first 188 return diff > 0 ? diff : null 189 } catch { 190 return null 191 } 192 } 193 194 function cadenceFamilyFromMs(intervalMs: number | null): string { 195 if (intervalMs == null || intervalMs <= 0) return '' 196 197 const families: Array<{ label: string; ms: number; toleranceMs: number }> = [ 198 { label: '15m', ms: 15 * 60 * 1000, toleranceMs: 60 * 1000 }, 199 { label: '30m', ms: 30 * 60 * 1000, toleranceMs: 2 * 60 * 1000 }, 200 { label: 'hourly', ms: 60 * 60 * 1000, toleranceMs: 5 * 60 * 1000 }, 201 { label: '6h', ms: 6 * 60 * 60 * 1000, toleranceMs: 15 * 60 * 1000 }, 202 { label: '12h', ms: 12 * 60 * 60 * 1000, toleranceMs: 30 * 60 * 1000 }, 203 { label: 'daily', ms: 24 * 60 * 60 * 1000, toleranceMs: 60 * 60 * 1000 }, 204 { label: 'weekly', ms: 7 * 24 * 60 * 60 * 1000, toleranceMs: 2 * 60 * 60 * 1000 }, 205 ] 206 207 for (const family of families) { 208 if (Math.abs(intervalMs - family.ms) <= family.toleranceMs) return family.label 209 } 210 211 return `interval:${Math.round(intervalMs / 60_000)}m` 212 } 213 214 function cadenceFamily(signature: ScheduleSignature): string { 215 if (signature.scheduleType === 'once') return signature.runAt != null ? 'once' : '' 216 if (signature.scheduleType === 'interval') return cadenceFamilyFromMs(signature.intervalMs) 217 if (signature.scheduleType === 'cron') return cadenceFamilyFromMs(tryResolveCronIntervalMs(signature.cron)) 218 return '' 219 } 220 221 function sameCadenceFamily(a: ScheduleSignature, b: ScheduleSignature): boolean { 222 if (sameCadence(a, b)) return true 223 if (a.scheduleType === 'once' && b.scheduleType === 'once') { 224 if (a.runAt == null || b.runAt == null) return false 225 return Math.abs(a.runAt - b.runAt) <= ONCE_MATCH_WINDOW_MS 226 } 227 if (a.scheduleType === 'once' || b.scheduleType === 'once') return false 228 const aFamily = cadenceFamily(a) 229 const bFamily = cadenceFamily(b) 230 return aFamily !== '' && aFamily === bFamily 231 } 232 233 function countTokenOverlap(a: string[], b: string[]): number { 234 if (!a.length || !b.length) return 0 235 const smaller = a.length <= b.length ? a : b 236 const largerSet = new Set(a.length <= b.length ? b : a) 237 let overlap = 0 238 for (const token of new Set(smaller)) { 239 if (largerSet.has(token)) overlap += 1 240 } 241 return overlap 242 } 243 244 function hasFuzzyPromptMatch(a: ScheduleSignature, b: ScheduleSignature): boolean { 245 if (!a.promptTokens.length || !b.promptTokens.length) return false 246 const uniqueA = dedup(a.promptTokens) 247 const uniqueB = dedup(b.promptTokens) 248 const overlap = countTokenOverlap(uniqueA, uniqueB) 249 if (overlap === 0) return false 250 const smallerSize = Math.min(uniqueA.length, uniqueB.length) 251 const largerSize = Math.max(uniqueA.length, uniqueB.length) 252 const coverage = overlap / smallerSize 253 const jaccard = overlap / new Set([...uniqueA, ...uniqueB]).size 254 if (smallerSize <= 2) return overlap === smallerSize 255 return overlap >= 2 && coverage >= 0.67 && (jaccard >= 0.5 || overlap >= Math.max(2, largerSize - 1)) 256 } 257 258 function isEligibleStatus(status: unknown, includeStatuses: Set<string>): boolean { 259 const normalized = normalizeString(status).toLowerCase() || 'active' 260 return includeStatuses.has(normalized) 261 } 262 263 function matchesCreatorScope( 264 schedule: ScheduleLike, 265 scope: FindDuplicateScheduleOptions['creatorScope'], 266 ): boolean { 267 if (!scope) return true 268 const scopeAgent = normalizeString(scope.agentId) 269 const scopeSession = normalizeString(scope.sessionId) 270 if (!scopeAgent && !scopeSession) return true 271 272 const existingAgent = normalizeString(schedule.createdByAgentId) 273 const existingSession = normalizeString(schedule.createdInSessionId) 274 275 if (scopeAgent && existingAgent && scopeAgent !== existingAgent) return false 276 if (scopeSession && existingSession && scopeSession !== existingSession) return false 277 return true 278 } 279 280 function compareUpdatedDesc(a: ScheduleLike, b: ScheduleLike): number { 281 const aTs = typeof a.updatedAt === 'number' ? a.updatedAt : (typeof a.createdAt === 'number' ? a.createdAt : 0) 282 const bTs = typeof b.updatedAt === 'number' ? b.updatedAt : (typeof b.createdAt === 'number' ? b.createdAt : 0) 283 return bTs - aTs 284 } 285 286 export function findDuplicateSchedule( 287 schedules: Record<string, ScheduleLike>, 288 candidateRaw: ScheduleDuplicateCandidate, 289 opts: FindDuplicateScheduleOptions = {}, 290 ): ScheduleLike | null { 291 return findEquivalentSchedules(schedules, candidateRaw, opts)[0] || null 292 } 293 294 export function findEquivalentSchedules( 295 schedules: Record<string, ScheduleLike>, 296 candidateRaw: ScheduleDuplicateCandidate, 297 opts: FindDuplicateScheduleOptions = {}, 298 ): ScheduleLike[] { 299 const candidate = toSignature(candidateRaw) 300 if (!candidate.agentId) return [] 301 if (!candidate.taskPrompt) return [] 302 303 const ignoreId = normalizeString(opts.ignoreId || candidate.id) 304 const statuses = new Set((opts.includeStatuses?.length ? opts.includeStatuses : ['active', 'paused']).map((s) => s.toLowerCase())) 305 const scopeSessionId = normalizeString(opts.creatorScope?.sessionId) 306 307 const matches = Object.values(schedules) 308 .filter((existing) => existing && typeof existing === 'object') 309 .map((existing) => { 310 const signature = toSignature(existing) 311 if (!signature.id) return null 312 if (ignoreId && signature.id === ignoreId) return null 313 if (!isEligibleStatus(existing.status, statuses)) return null 314 if (!matchesCreatorScope(existing, opts.creatorScope || null)) return null 315 if (signature.agentId !== candidate.agentId) return null 316 const exact = signature.taskPrompt === candidate.taskPrompt && sameCadence(signature, candidate) 317 if (exact) return { existing, kind: 'exact' as const } 318 const fuzzy = Boolean(scopeSessionId) 319 && hasFuzzyPromptMatch(signature, candidate) 320 && sameCadenceFamily(signature, candidate) 321 if (!fuzzy) return null 322 return { existing, kind: 'fuzzy' as const } 323 }) 324 .filter((entry): entry is { existing: ScheduleLike; kind: ScheduleMatchKind } => Boolean(entry)) 325 .sort((a, b) => { 326 if (a.kind !== b.kind) return a.kind === 'exact' ? -1 : 1 327 return compareUpdatedDesc(a.existing, b.existing) 328 }) 329 .map((entry) => entry.existing) 330 331 return matches 332 }