@@ -1298,7 +1298,11 @@ export class Doc2Vec {
12981298 // Get the last run date from the database
12991299 const startDate = config . start_date || `${ new Date ( ) . getFullYear ( ) } -01-01` ;
13001300 const lastRunDate = await DatabaseManager . getLastRunDate ( dbConnection , `zendesk_tickets_${ config . zendesk_subdomain } ` , `${ startDate } T00:00:00Z` , logger ) ;
1301-
1301+
1302+ // Status filter applied client-side (includes 'closed' by default so tickets
1303+ // transitioning to closed get updated rather than left stale)
1304+ const statusFilter = new Set ( config . ticket_status || [ 'new' , 'open' , 'pending' , 'hold' , 'solved' , 'closed' ] ) ;
1305+
13021306 const fetchWithRetry = async ( url : string , retries = 3 ) : Promise < any > => {
13031307 for ( let attempt = 0 ; attempt < retries ; attempt ++ ) {
13041308 try {
@@ -1308,16 +1312,19 @@ export class Doc2Vec {
13081312 'Content-Type' : 'application/json' ,
13091313 } ,
13101314 } ) ;
1311-
1312- if ( response . status === 429 ) {
1313- const retryAfter = parseInt ( response . headers [ 'retry-after' ] || '60' ) ;
1314- logger . warn ( `Rate limited, waiting ${ retryAfter } s before retry` ) ;
1315+ return response . data ;
1316+ } catch ( error : any ) {
1317+ // 403 is a permissions error — retrying won't help
1318+ if ( error . response ?. status === 403 ) throw error ;
1319+
1320+ if ( error . response ?. status === 429 ) {
1321+ const retryAfter = parseInt ( error . response . headers ?. [ 'retry-after' ] || '60' , 10 ) ;
1322+ logger . warn ( `Rate limited by Zendesk, waiting ${ retryAfter } s before retry` ) ;
13151323 await new Promise ( res => setTimeout ( res , retryAfter * 1000 ) ) ;
1324+ attempt -- ; // Don't burn a retry on rate-limit waits
13161325 continue ;
13171326 }
1318-
1319- return response . data ;
1320- } catch ( error : any ) {
1327+
13211328 logger . error ( `Zendesk API error (attempt ${ attempt + 1 } ):` , error . message ) ;
13221329 if ( attempt === retries - 1 ) throw error ;
13231330 await new Promise ( res => setTimeout ( res , 2000 * ( attempt + 1 ) ) ) ;
@@ -1367,118 +1374,96 @@ export class Doc2Vec {
13671374 const processTicket = async ( ticket : any ) : Promise < void > => {
13681375 const ticketId = ticket . id ;
13691376 const url = `https://${ config . zendesk_subdomain } .zendesk.com/agent/tickets/${ ticketId } ` ;
1370-
1377+
1378+ // Deleted tickets — remove their chunks and stop
1379+ if ( ticket . status === 'deleted' ) {
1380+ logger . info ( `Ticket #${ ticketId } was deleted in Zendesk — removing its chunks` ) ;
1381+ if ( dbConnection . type === 'sqlite' ) {
1382+ DatabaseManager . removeChunksByUrlSQLite ( dbConnection . db , url , logger ) ;
1383+ } else {
1384+ await DatabaseManager . removeChunksByUrlQdrant ( dbConnection , url , logger ) ;
1385+ }
1386+ return ;
1387+ }
1388+
1389+ // Skip tickets whose status is outside the configured filter
1390+ if ( ! statusFilter . has ( ticket . status ) ) {
1391+ logger . debug ( `Ticket #${ ticketId } has status '${ ticket . status } ' outside configured filter — skipping` ) ;
1392+ return ;
1393+ }
1394+
13711395 logger . info ( `Processing ticket #${ ticketId } ` ) ;
1372-
1396+
13731397 // Fetch ticket comments
13741398 const commentsUrl = `${ baseUrl } /tickets/${ ticketId } /comments.json` ;
13751399 const commentsData = await fetchWithRetry ( commentsUrl ) ;
13761400 const comments = commentsData ?. comments || [ ] ;
1377-
1401+
13781402 // Generate markdown for the ticket
13791403 const markdown = generateMarkdownForTicket ( ticket , comments ) ;
1380-
1404+
13811405 // Chunk the markdown content
13821406 const ticketConfig = {
13831407 ...config ,
13841408 product_name : config . product_name || `zendesk_${ config . zendesk_subdomain } ` ,
13851409 max_size : config . max_size || Infinity
13861410 } ;
1387-
1411+
13881412 const chunks = await this . contentProcessor . chunkMarkdown ( markdown , ticketConfig , url ) ;
13891413 logger . info ( `Ticket #${ ticketId } : Created ${ chunks . length } chunks` ) ;
1390-
1391- // Process and store each chunk
1392- for ( const chunk of chunks ) {
1393- const chunkHash = Utils . generateHash ( chunk . content ) ;
1394- const chunkId = chunk . metadata . chunk_id . substring ( 0 , 8 ) + '...' ;
1395-
1396- if ( dbConnection . type === 'sqlite' ) {
1397- const { checkHashStmt } = DatabaseManager . prepareSQLiteStatements ( dbConnection . db ) ;
1398- const existing = checkHashStmt . get ( chunk . metadata . chunk_id ) as { hash : string } | undefined ;
1399-
1400- if ( existing && existing . hash === chunkHash ) {
1401- logger . info ( `Skipping unchanged chunk: ${ chunkId } ` ) ;
1402- continue ;
1403- }
14041414
1405- const embeddings = await this . createEmbeddings ( [ chunk . content ] ) ;
1406- if ( embeddings . length ) {
1407- DatabaseManager . insertVectorsSQLite ( dbConnection . db , chunk , embeddings [ 0 ] , logger , chunkHash ) ;
1408- logger . debug ( `Stored chunk ${ chunkId } in SQLite` ) ;
1409- } else {
1410- logger . error ( `Embedding failed for chunk: ${ chunkId } ` ) ;
1411- }
1412- } else if ( dbConnection . type === 'qdrant' ) {
1413- try {
1414- let pointId : string ;
1415- try {
1416- pointId = chunk . metadata . chunk_id ;
1417- if ( ! Utils . isValidUuid ( pointId ) ) {
1418- pointId = Utils . hashToUuid ( chunk . metadata . chunk_id ) ;
1419- }
1420- } catch ( e ) {
1421- pointId = crypto . randomUUID ( ) ;
1422- }
1423-
1424- const existingPoints = await dbConnection . client . retrieve ( dbConnection . collectionName , {
1425- ids : [ pointId ] ,
1426- with_payload : true ,
1427- with_vector : false ,
1428- } ) ;
1429-
1430- if ( existingPoints . length > 0 && existingPoints [ 0 ] . payload && existingPoints [ 0 ] . payload . hash === chunkHash ) {
1431- logger . info ( `Skipping unchanged chunk: ${ chunkId } ` ) ;
1432- continue ;
1433- }
1434-
1435- const embeddings = await this . createEmbeddings ( [ chunk . content ] ) ;
1436- if ( embeddings . length ) {
1437- await DatabaseManager . storeChunkInQdrant ( dbConnection , chunk , embeddings [ 0 ] , chunkHash ) ;
1438- logger . debug ( `Stored chunk ${ chunkId } in Qdrant (${ dbConnection . collectionName } )` ) ;
1439- } else {
1440- logger . error ( `Embedding failed for chunk: ${ chunkId } ` ) ;
1441- }
1442- } catch ( error ) {
1443- logger . error ( `Error processing chunk in Qdrant:` , error ) ;
1444- }
1445- }
1446- }
1415+ // Use processChunksForUrl which performs a URL-level diff:
1416+ // deletes all existing chunks for this URL before reinserting,
1417+ // so stale chunks from previous versions are never left behind.
1418+ await this . processChunksForUrl ( chunks , url , dbConnection , logger ) ;
14471419 } ;
14481420
14491421 logger . info ( `Fetching Zendesk tickets updated since ${ lastRunDate } ` ) ;
1450-
1451- // Build query parameters
1452- const statusFilter = config . ticket_status || [ 'new' , 'open' , 'pending' , 'hold' , 'solved' ] ;
1453- const query = `updated>${ lastRunDate . split ( 'T' ) [ 0 ] } status:${ statusFilter . join ( ',status:' ) } ` ;
1454-
1455- let nextPage = `${ baseUrl } /search.json?query=${ encodeURIComponent ( query ) } &sort_by=updated_at&sort_order=asc` ;
1422+
1423+ // Build query parameters — use the status filter for the search query
1424+ const statusList = Array . from ( statusFilter ) ;
1425+ const query = `updated>${ lastRunDate . split ( 'T' ) [ 0 ] } status:${ statusList . join ( ',status:' ) } ` ;
1426+
1427+ let nextPage : string | null = `${ baseUrl } /search.json?query=${ encodeURIComponent ( query ) } &sort_by=updated_at&sort_order=asc` ;
14561428 let totalTickets = 0 ;
1457-
1429+ let failedTickets = 0 ;
1430+
14581431 while ( nextPage ) {
14591432 const data = await fetchWithRetry ( nextPage ) ;
14601433 const tickets = data . results || [ ] ;
1461-
1434+
14621435 logger . info ( `Processing batch of ${ tickets . length } tickets` ) ;
1463-
1436+
14641437 for ( const ticket of tickets ) {
1465- await processTicket ( ticket ) ;
1466- totalTickets ++ ;
1438+ try {
1439+ await processTicket ( ticket ) ;
1440+ totalTickets ++ ;
1441+ } catch ( error : any ) {
1442+ failedTickets ++ ;
1443+ logger . error ( `Failed to process ticket #${ ticket . id } , will retry next run: ${ error . message } ` ) ;
1444+ }
14671445 }
1468-
1469- nextPage = data . next_page ;
1470-
1446+
1447+ nextPage = data . next_page || null ;
1448+
14711449 if ( nextPage ) {
14721450 logger . debug ( `Fetching next page: ${ nextPage } ` ) ;
14731451 // Rate limiting: wait between requests
14741452 await new Promise ( res => setTimeout ( res , 1000 ) ) ;
14751453 }
14761454 }
14771455
1478- // Update the last run date in the database
1479- await DatabaseManager . updateLastRunDate ( dbConnection , `zendesk_tickets_${ config . zendesk_subdomain } ` , logger , this . embeddingDimension , syncStartDate ) ;
1480-
1481- logger . info ( `Successfully processed ${ totalTickets } tickets` ) ;
1456+ // Only advance the watermark when all tickets succeeded
1457+ if ( failedTickets === 0 ) {
1458+ await DatabaseManager . updateLastRunDate ( dbConnection , `zendesk_tickets_${ config . zendesk_subdomain } ` , logger , this . embeddingDimension , syncStartDate ) ;
1459+ logger . info ( `Successfully processed ${ totalTickets } tickets` ) ;
1460+ } else {
1461+ logger . warn (
1462+ `Run completed with ${ failedTickets } ticket failure(s). ` +
1463+ `Watermark NOT advanced — failed tickets will be retried next run. ` +
1464+ `Successfully processed: ${ totalTickets } .`
1465+ ) ;
1466+ }
14821467 }
14831468
14841469 private async fetchAndProcessZendeskArticles ( config : ZendeskSourceConfig , dbConnection : DatabaseConnection , logger : Logger ) : Promise < void > {
0 commit comments