Skip to content

Commit c8d21ee

Browse files
authored
Fixing zendesk bugs (#68)
* Fixing zendesk bugs Signed-off-by: Denis Jannot <denis.jannot@solo.io> * Updating version Signed-off-by: Denis Jannot <denis.jannot@solo.io> --------- Signed-off-by: Denis Jannot <denis.jannot@solo.io>
1 parent 433b284 commit c8d21ee

3 files changed

Lines changed: 295 additions & 89 deletions

File tree

doc2vec.ts

Lines changed: 72 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -1298,7 +1298,11 @@ export class Doc2Vec {
12981298
// Get the last run date from the database
12991299
const startDate = config.start_date || `${new Date().getFullYear()}-01-01`;
13001300
const lastRunDate = await DatabaseManager.getLastRunDate(dbConnection, `zendesk_tickets_${config.zendesk_subdomain}`, `${startDate}T00:00:00Z`, logger);
1301-
1301+
1302+
// Status filter applied client-side (includes 'closed' by default so tickets
1303+
// transitioning to closed get updated rather than left stale)
1304+
const statusFilter = new Set(config.ticket_status || ['new', 'open', 'pending', 'hold', 'solved', 'closed']);
1305+
13021306
const fetchWithRetry = async (url: string, retries = 3): Promise<any> => {
13031307
for (let attempt = 0; attempt < retries; attempt++) {
13041308
try {
@@ -1308,16 +1312,19 @@ export class Doc2Vec {
13081312
'Content-Type': 'application/json',
13091313
},
13101314
});
1311-
1312-
if (response.status === 429) {
1313-
const retryAfter = parseInt(response.headers['retry-after'] || '60');
1314-
logger.warn(`Rate limited, waiting ${retryAfter}s before retry`);
1315+
return response.data;
1316+
} catch (error: any) {
1317+
// 403 is a permissions error — retrying won't help
1318+
if (error.response?.status === 403) throw error;
1319+
1320+
if (error.response?.status === 429) {
1321+
const retryAfter = parseInt(error.response.headers?.['retry-after'] || '60', 10);
1322+
logger.warn(`Rate limited by Zendesk, waiting ${retryAfter}s before retry`);
13151323
await new Promise(res => setTimeout(res, retryAfter * 1000));
1324+
attempt--; // Don't burn a retry on rate-limit waits
13161325
continue;
13171326
}
1318-
1319-
return response.data;
1320-
} catch (error: any) {
1327+
13211328
logger.error(`Zendesk API error (attempt ${attempt + 1}):`, error.message);
13221329
if (attempt === retries - 1) throw error;
13231330
await new Promise(res => setTimeout(res, 2000 * (attempt + 1)));
@@ -1367,118 +1374,96 @@ export class Doc2Vec {
13671374
const processTicket = async (ticket: any): Promise<void> => {
13681375
const ticketId = ticket.id;
13691376
const url = `https://${config.zendesk_subdomain}.zendesk.com/agent/tickets/${ticketId}`;
1370-
1377+
1378+
// Deleted tickets — remove their chunks and stop
1379+
if (ticket.status === 'deleted') {
1380+
logger.info(`Ticket #${ticketId} was deleted in Zendesk — removing its chunks`);
1381+
if (dbConnection.type === 'sqlite') {
1382+
DatabaseManager.removeChunksByUrlSQLite(dbConnection.db, url, logger);
1383+
} else {
1384+
await DatabaseManager.removeChunksByUrlQdrant(dbConnection, url, logger);
1385+
}
1386+
return;
1387+
}
1388+
1389+
// Skip tickets whose status is outside the configured filter
1390+
if (!statusFilter.has(ticket.status)) {
1391+
logger.debug(`Ticket #${ticketId} has status '${ticket.status}' outside configured filter — skipping`);
1392+
return;
1393+
}
1394+
13711395
logger.info(`Processing ticket #${ticketId}`);
1372-
1396+
13731397
// Fetch ticket comments
13741398
const commentsUrl = `${baseUrl}/tickets/${ticketId}/comments.json`;
13751399
const commentsData = await fetchWithRetry(commentsUrl);
13761400
const comments = commentsData?.comments || [];
1377-
1401+
13781402
// Generate markdown for the ticket
13791403
const markdown = generateMarkdownForTicket(ticket, comments);
1380-
1404+
13811405
// Chunk the markdown content
13821406
const ticketConfig = {
13831407
...config,
13841408
product_name: config.product_name || `zendesk_${config.zendesk_subdomain}`,
13851409
max_size: config.max_size || Infinity
13861410
};
1387-
1411+
13881412
const chunks = await this.contentProcessor.chunkMarkdown(markdown, ticketConfig, url);
13891413
logger.info(`Ticket #${ticketId}: Created ${chunks.length} chunks`);
1390-
1391-
// Process and store each chunk
1392-
for (const chunk of chunks) {
1393-
const chunkHash = Utils.generateHash(chunk.content);
1394-
const chunkId = chunk.metadata.chunk_id.substring(0, 8) + '...';
1395-
1396-
if (dbConnection.type === 'sqlite') {
1397-
const { checkHashStmt } = DatabaseManager.prepareSQLiteStatements(dbConnection.db);
1398-
const existing = checkHashStmt.get(chunk.metadata.chunk_id) as { hash: string } | undefined;
1399-
1400-
if (existing && existing.hash === chunkHash) {
1401-
logger.info(`Skipping unchanged chunk: ${chunkId}`);
1402-
continue;
1403-
}
14041414

1405-
const embeddings = await this.createEmbeddings([chunk.content]);
1406-
if (embeddings.length) {
1407-
DatabaseManager.insertVectorsSQLite(dbConnection.db, chunk, embeddings[0], logger, chunkHash);
1408-
logger.debug(`Stored chunk ${chunkId} in SQLite`);
1409-
} else {
1410-
logger.error(`Embedding failed for chunk: ${chunkId}`);
1411-
}
1412-
} else if (dbConnection.type === 'qdrant') {
1413-
try {
1414-
let pointId: string;
1415-
try {
1416-
pointId = chunk.metadata.chunk_id;
1417-
if (!Utils.isValidUuid(pointId)) {
1418-
pointId = Utils.hashToUuid(chunk.metadata.chunk_id);
1419-
}
1420-
} catch (e) {
1421-
pointId = crypto.randomUUID();
1422-
}
1423-
1424-
const existingPoints = await dbConnection.client.retrieve(dbConnection.collectionName, {
1425-
ids: [pointId],
1426-
with_payload: true,
1427-
with_vector: false,
1428-
});
1429-
1430-
if (existingPoints.length > 0 && existingPoints[0].payload && existingPoints[0].payload.hash === chunkHash) {
1431-
logger.info(`Skipping unchanged chunk: ${chunkId}`);
1432-
continue;
1433-
}
1434-
1435-
const embeddings = await this.createEmbeddings([chunk.content]);
1436-
if (embeddings.length) {
1437-
await DatabaseManager.storeChunkInQdrant(dbConnection, chunk, embeddings[0], chunkHash);
1438-
logger.debug(`Stored chunk ${chunkId} in Qdrant (${dbConnection.collectionName})`);
1439-
} else {
1440-
logger.error(`Embedding failed for chunk: ${chunkId}`);
1441-
}
1442-
} catch (error) {
1443-
logger.error(`Error processing chunk in Qdrant:`, error);
1444-
}
1445-
}
1446-
}
1415+
// Use processChunksForUrl which performs a URL-level diff:
1416+
// deletes all existing chunks for this URL before reinserting,
1417+
// so stale chunks from previous versions are never left behind.
1418+
await this.processChunksForUrl(chunks, url, dbConnection, logger);
14471419
};
14481420

14491421
logger.info(`Fetching Zendesk tickets updated since ${lastRunDate}`);
1450-
1451-
// Build query parameters
1452-
const statusFilter = config.ticket_status || ['new', 'open', 'pending', 'hold', 'solved'];
1453-
const query = `updated>${lastRunDate.split('T')[0]} status:${statusFilter.join(',status:')}`;
1454-
1455-
let nextPage = `${baseUrl}/search.json?query=${encodeURIComponent(query)}&sort_by=updated_at&sort_order=asc`;
1422+
1423+
// Build query parameters — use the status filter for the search query
1424+
const statusList = Array.from(statusFilter);
1425+
const query = `updated>${lastRunDate.split('T')[0]} status:${statusList.join(',status:')}`;
1426+
1427+
let nextPage: string | null = `${baseUrl}/search.json?query=${encodeURIComponent(query)}&sort_by=updated_at&sort_order=asc`;
14561428
let totalTickets = 0;
1457-
1429+
let failedTickets = 0;
1430+
14581431
while (nextPage) {
14591432
const data = await fetchWithRetry(nextPage);
14601433
const tickets = data.results || [];
1461-
1434+
14621435
logger.info(`Processing batch of ${tickets.length} tickets`);
1463-
1436+
14641437
for (const ticket of tickets) {
1465-
await processTicket(ticket);
1466-
totalTickets++;
1438+
try {
1439+
await processTicket(ticket);
1440+
totalTickets++;
1441+
} catch (error: any) {
1442+
failedTickets++;
1443+
logger.error(`Failed to process ticket #${ticket.id}, will retry next run: ${error.message}`);
1444+
}
14671445
}
1468-
1469-
nextPage = data.next_page;
1470-
1446+
1447+
nextPage = data.next_page || null;
1448+
14711449
if (nextPage) {
14721450
logger.debug(`Fetching next page: ${nextPage}`);
14731451
// Rate limiting: wait between requests
14741452
await new Promise(res => setTimeout(res, 1000));
14751453
}
14761454
}
14771455

1478-
// Update the last run date in the database
1479-
await DatabaseManager.updateLastRunDate(dbConnection, `zendesk_tickets_${config.zendesk_subdomain}`, logger, this.embeddingDimension, syncStartDate);
1480-
1481-
logger.info(`Successfully processed ${totalTickets} tickets`);
1456+
// Only advance the watermark when all tickets succeeded
1457+
if (failedTickets === 0) {
1458+
await DatabaseManager.updateLastRunDate(dbConnection, `zendesk_tickets_${config.zendesk_subdomain}`, logger, this.embeddingDimension, syncStartDate);
1459+
logger.info(`Successfully processed ${totalTickets} tickets`);
1460+
} else {
1461+
logger.warn(
1462+
`Run completed with ${failedTickets} ticket failure(s). ` +
1463+
`Watermark NOT advanced — failed tickets will be retried next run. ` +
1464+
`Successfully processed: ${totalTickets}.`
1465+
);
1466+
}
14821467
}
14831468

14841469
private async fetchAndProcessZendeskArticles(config: ZendeskSourceConfig, dbConnection: DatabaseConnection, logger: Logger): Promise<void> {

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "doc2vec",
3-
"version": "2.7.0",
3+
"version": "2.8.0",
44
"type": "commonjs",
55
"description": "",
66
"main": "dist/doc2vec.js",

0 commit comments

Comments
 (0)