From 71d1d54f33b8251d5c8ac7e07008a7067c3328bf Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Wed, 8 Apr 2026 00:18:23 -0700 Subject: [PATCH 1/6] improvement(kb): deferred content fetching and metadata-based hashes for connectors --- apps/sim/connectors/asana/asana.ts | 8 +- apps/sim/connectors/fireflies/fireflies.ts | 63 ++++++---------- .../google-calendar/google-calendar.ts | 12 ++- .../sim/connectors/google-docs/google-docs.ts | 74 ++++++++----------- .../connectors/google-sheets/google-sheets.ts | 43 ++++++----- apps/sim/connectors/hubspot/hubspot.ts | 15 ++-- apps/sim/connectors/intercom/intercom.ts | 23 ++---- apps/sim/connectors/jira/jira.ts | 44 +++++++---- apps/sim/connectors/linear/linear.ts | 62 ++++++++-------- apps/sim/connectors/outlook/outlook.ts | 66 ++++++++++++----- apps/sim/connectors/reddit/reddit.ts | 46 +++++------- apps/sim/connectors/salesforce/salesforce.ts | 49 ++++++++---- apps/sim/connectors/servicenow/servicenow.ts | 26 +++---- apps/sim/connectors/webflow/webflow.ts | 13 ++-- apps/sim/connectors/wordpress/wordpress.ts | 10 +-- apps/sim/connectors/zendesk/zendesk.ts | 70 ++++++++++-------- 16 files changed, 324 insertions(+), 300 deletions(-) diff --git a/apps/sim/connectors/asana/asana.ts b/apps/sim/connectors/asana/asana.ts index e6febeb8210..25009ec5c4a 100644 --- a/apps/sim/connectors/asana/asana.ts +++ b/apps/sim/connectors/asana/asana.ts @@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger' import { AsanaIcon } from '@/components/icons' import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils' import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types' -import { computeContentHash, joinTagArray, parseTagDate } from '@/connectors/utils' +import { joinTagArray, parseTagDate } from '@/connectors/utils' const logger = createLogger('AsanaConnector') @@ -240,7 +240,6 @@ export const asanaConnector: ConnectorConfig = { for (const task of result.data) { const content = buildTaskContent(task) - const contentHash = await computeContentHash(content) const tagNames = task.tags?.map((t) => t.name).filter(Boolean) || [] documents.push({ @@ -249,7 +248,7 @@ export const asanaConnector: ConnectorConfig = { content, mimeType: 'text/plain', sourceUrl: task.permalink_url || undefined, - contentHash, + contentHash: `asana:${task.gid}:${task.modified_at ?? ''}`, metadata: { project: currentProjectGid, assignee: task.assignee?.name, @@ -315,7 +314,6 @@ export const asanaConnector: ConnectorConfig = { if (!task) return null const content = buildTaskContent(task) - const contentHash = await computeContentHash(content) const tagNames = task.tags?.map((t) => t.name).filter(Boolean) || [] return { @@ -324,7 +322,7 @@ export const asanaConnector: ConnectorConfig = { content, mimeType: 'text/plain', sourceUrl: task.permalink_url || undefined, - contentHash, + contentHash: `asana:${task.gid}:${task.modified_at ?? ''}`, metadata: { assignee: task.assignee?.name, completed: task.completed, diff --git a/apps/sim/connectors/fireflies/fireflies.ts b/apps/sim/connectors/fireflies/fireflies.ts index cfb9d0f24a2..c09cccb426d 100644 --- a/apps/sim/connectors/fireflies/fireflies.ts +++ b/apps/sim/connectors/fireflies/fireflies.ts @@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger' import { FirefliesIcon } from '@/components/icons' import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils' import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types' -import { computeContentHash, parseTagDate } from '@/connectors/utils' +import { parseTagDate } from '@/connectors/utils' const logger = createLogger('FirefliesConnector') @@ -196,17 +196,6 @@ export const firefliesConnector: ConnectorConfig = { id name } - sentences { - index - speaker_name - text - } - summary { - keywords - action_items - overview - short_summary - } } }`, variables @@ -214,32 +203,27 @@ export const firefliesConnector: ConnectorConfig = { const transcripts = (data.transcripts || []) as FirefliesTranscript[] - const documents: ExternalDocument[] = await Promise.all( - transcripts.map(async (transcript) => { - const content = formatTranscriptContent(transcript) - const contentHash = await computeContentHash(content) - - const meetingDate = transcript.date ? new Date(transcript.date).toISOString() : undefined - const speakerNames = transcript.speakers?.map((s) => s.name).filter(Boolean) ?? [] - - return { - externalId: transcript.id, - title: transcript.title || 'Untitled Meeting', - content, - mimeType: 'text/plain' as const, - sourceUrl: transcript.transcript_url || undefined, - contentHash, - metadata: { - hostEmail: transcript.host_email, - duration: transcript.duration, - meetingDate, - participants: transcript.participants, - speakers: speakerNames, - keywords: transcript.summary?.keywords, - }, - } - }) - ) + const documents: ExternalDocument[] = transcripts.map((transcript) => { + const meetingDate = transcript.date ? new Date(transcript.date).toISOString() : undefined + const speakerNames = transcript.speakers?.map((s) => s.name).filter(Boolean) ?? [] + + return { + externalId: transcript.id, + title: transcript.title || 'Untitled Meeting', + content: '', + contentDeferred: true, + mimeType: 'text/plain' as const, + sourceUrl: transcript.transcript_url || undefined, + contentHash: `fireflies:${transcript.id}:${transcript.date ?? ''}:${transcript.duration ?? ''}`, + metadata: { + hostEmail: transcript.host_email, + duration: transcript.duration, + meetingDate, + participants: transcript.participants, + speakers: speakerNames, + }, + } + }) const totalFetched = ((syncContext?.totalDocsFetched as number) ?? 0) + documents.length if (syncContext) syncContext.totalDocsFetched = totalFetched @@ -296,7 +280,7 @@ export const firefliesConnector: ConnectorConfig = { if (!transcript) return null const content = formatTranscriptContent(transcript) - const contentHash = await computeContentHash(content) + const contentHash = `fireflies:${transcript.id}:${transcript.date ?? ''}:${transcript.duration ?? ''}` const meetingDate = transcript.date ? new Date(transcript.date).toISOString() : undefined const speakerNames = transcript.speakers?.map((s) => s.name).filter(Boolean) ?? [] @@ -305,6 +289,7 @@ export const firefliesConnector: ConnectorConfig = { externalId: transcript.id, title: transcript.title || 'Untitled Meeting', content, + contentDeferred: false, mimeType: 'text/plain', sourceUrl: transcript.transcript_url || undefined, contentHash, diff --git a/apps/sim/connectors/google-calendar/google-calendar.ts b/apps/sim/connectors/google-calendar/google-calendar.ts index 1fbda8d1a3b..d107cb0569a 100644 --- a/apps/sim/connectors/google-calendar/google-calendar.ts +++ b/apps/sim/connectors/google-calendar/google-calendar.ts @@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger' import { GoogleCalendarIcon } from '@/components/icons' import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils' import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types' -import { computeContentHash, parseTagDate } from '@/connectors/utils' +import { parseTagDate } from '@/connectors/utils' const logger = createLogger('GoogleCalendarConnector') @@ -195,14 +195,12 @@ function getTimeRange(sourceConfig: Record): { timeMin: string; /** * Converts a CalendarEvent to an ExternalDocument. */ -async function eventToDocument(event: CalendarEvent): Promise { +function eventToDocument(event: CalendarEvent): ExternalDocument | null { if (event.status === 'cancelled') return null const content = eventToContent(event) if (!content.trim()) return null - const contentHash = await computeContentHash(content) - const startTime = event.start?.dateTime || event.start?.date || '' const attendeeCount = event.attendees?.filter((a) => !a.resource).length || 0 @@ -212,7 +210,7 @@ async function eventToDocument(event: CalendarEvent): Promise { - try { - const content = await fetchDocContent(accessToken, file.id) - if (!content.trim()) { - logger.info(`Skipping empty document: ${file.name} (${file.id})`) - return null - } - - const contentHash = await computeContentHash(content) - - return { - externalId: file.id, - title: file.name || 'Untitled', - content, - mimeType: 'text/plain', - sourceUrl: file.webViewLink || `https://docs.google.com/document/d/${file.id}/edit`, - contentHash, - metadata: { - modifiedTime: file.modifiedTime, - createdTime: file.createdTime, - owners: file.owners?.map((o) => o.displayName || o.emailAddress).filter(Boolean), - }, - } - } catch (error) { - logger.warn(`Failed to extract content from document: ${file.name} (${file.id})`, { - error: error instanceof Error ? error.message : String(error), - }) - return null +function fileToStub(file: DriveFile): ExternalDocument { + return { + externalId: file.id, + title: file.name || 'Untitled', + content: '', + contentDeferred: true, + mimeType: 'text/plain', + sourceUrl: file.webViewLink || `https://docs.google.com/document/d/${file.id}/edit`, + contentHash: `gdocs:${file.id}:${file.modifiedTime ?? ''}`, + metadata: { + modifiedTime: file.modifiedTime, + createdTime: file.createdTime, + owners: file.owners?.map((o) => o.displayName || o.emailAddress).filter(Boolean), + }, } } @@ -246,18 +229,11 @@ export const googleDocsConnector: ConnectorConfig = { const maxDocs = sourceConfig.maxDocs ? Number(sourceConfig.maxDocs) : 0 const previouslyFetched = (syncContext?.totalDocsFetched as number) ?? 0 - const CONCURRENCY = 5 - const documents: ExternalDocument[] = [] - for (let i = 0; i < files.length; i += CONCURRENCY) { - if (maxDocs > 0 && previouslyFetched + documents.length >= maxDocs) break - const batch = files.slice(i, i + CONCURRENCY) - const results = await Promise.all(batch.map((file) => fileToDocument(accessToken, file))) - documents.push(...(results.filter(Boolean) as ExternalDocument[])) - } + let documents = files.map(fileToStub) if (maxDocs > 0) { const remaining = maxDocs - previouslyFetched if (documents.length > remaining) { - documents.splice(remaining) + documents = documents.slice(0, remaining) } } @@ -300,7 +276,17 @@ export const googleDocsConnector: ConnectorConfig = { if (file.trashed) return null if (file.mimeType !== 'application/vnd.google-apps.document') return null - return fileToDocument(accessToken, file) + try { + const content = await fetchDocContent(accessToken, file.id) + if (!content.trim()) return null + + return { ...fileToStub(file), content, contentDeferred: false } + } catch (error) { + logger.warn(`Failed to extract content from document: ${file.name} (${file.id})`, { + error: error instanceof Error ? error.message : String(error), + }) + return null + } }, validateConfig: async ( diff --git a/apps/sim/connectors/google-sheets/google-sheets.ts b/apps/sim/connectors/google-sheets/google-sheets.ts index 9b70ea9a941..2fb1c1f2865 100644 --- a/apps/sim/connectors/google-sheets/google-sheets.ts +++ b/apps/sim/connectors/google-sheets/google-sheets.ts @@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger' import { GoogleSheetsIcon } from '@/components/icons' import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils' import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types' -import { computeContentHash, parseTagDate } from '@/connectors/utils' +import { parseTagDate } from '@/connectors/utils' const logger = createLogger('GoogleSheetsConnector') @@ -168,7 +168,6 @@ async function sheetToDocument( return null } - const contentHash = await computeContentHash(content) const rowCount = dataRows.length return { @@ -177,7 +176,7 @@ async function sheetToDocument( content, mimeType: 'text/plain', sourceUrl: `https://docs.google.com/spreadsheets/d/${spreadsheetId}/edit#gid=${sheet.sheetId}`, - contentHash, + contentHash: `gsheets:${spreadsheetId}:${sheet.sheetId}:${modifiedTime ?? ''}`, metadata: { spreadsheetId, spreadsheetTitle, @@ -259,22 +258,24 @@ export const googleSheetsConnector: ConnectorConfig = { sheetCount: sheets.length, }) - const documents: ExternalDocument[] = [] - for (let i = 0; i < sheets.length; i += CONCURRENCY) { - const batch = sheets.slice(i, i + CONCURRENCY) - const results = await Promise.all( - batch.map((sheet) => - sheetToDocument( - accessToken, - spreadsheetId, - metadata.properties.title, - sheet, - modifiedTime - ) - ) - ) - documents.push(...(results.filter(Boolean) as ExternalDocument[])) - } + const documents: ExternalDocument[] = sheets.map((sheet) => ({ + externalId: `${spreadsheetId}__sheet__${sheet.sheetId}`, + title: `${metadata.properties.title} - ${sheet.title}`, + content: '', + contentDeferred: true, + mimeType: 'text/plain', + sourceUrl: `https://docs.google.com/spreadsheets/d/${spreadsheetId}/edit#gid=${sheet.sheetId}`, + contentHash: `gsheets:${spreadsheetId}:${sheet.sheetId}:${modifiedTime ?? ''}`, + metadata: { + spreadsheetId, + spreadsheetTitle: metadata.properties.title, + sheetTitle: sheet.title, + sheetId: sheet.sheetId, + rowCount: sheet.gridProperties?.rowCount, + columnCount: sheet.gridProperties?.columnCount, + ...(modifiedTime ? { modifiedTime } : {}), + }, + })) return { documents, @@ -324,13 +325,15 @@ export const googleSheetsConnector: ConnectorConfig = { return null } - return sheetToDocument( + const doc = await sheetToDocument( accessToken, spreadsheetId, metadata.properties.title, sheetEntry.properties, modifiedTime ) + if (!doc) return null + return { ...doc, contentDeferred: false } }, validateConfig: async ( diff --git a/apps/sim/connectors/hubspot/hubspot.ts b/apps/sim/connectors/hubspot/hubspot.ts index 0a8a669284d..2395784e80d 100644 --- a/apps/sim/connectors/hubspot/hubspot.ts +++ b/apps/sim/connectors/hubspot/hubspot.ts @@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger' import { HubspotIcon } from '@/components/icons' import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils' import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types' -import { computeContentHash, parseTagDate } from '@/connectors/utils' +import { parseTagDate } from '@/connectors/utils' const logger = createLogger('HubSpotConnector') @@ -140,16 +140,15 @@ function buildRecordContent(objectType: string, properties: Record, objectType: string, portalId: string -): Promise { +): ExternalDocument { const id = record.id as string const properties = (record.properties || {}) as Record const content = buildRecordContent(objectType, properties) - const contentHash = await computeContentHash(content) const title = buildRecordTitle(objectType, properties) const lastModified = @@ -161,7 +160,7 @@ async function recordToDocument( content, mimeType: 'text/plain', sourceUrl: `https://app.hubspot.com/contacts/${portalId}/record/${objectType}/${id}`, - contentHash, + contentHash: `hubspot:${id}:${lastModified ?? ''}`, metadata: { objectType, owner: properties.hubspot_owner_id || undefined, @@ -260,8 +259,8 @@ export const hubspotConnector: ConnectorConfig = { const paging = data.paging as { next?: { after?: string } } | undefined const nextCursor = paging?.next?.after - const documents: ExternalDocument[] = await Promise.all( - results.map((record) => recordToDocument(record, objectType, portalId)) + const documents: ExternalDocument[] = results.map((record) => + recordToDocument(record, objectType, portalId) ) const previouslyFetched = (syncContext?.totalDocsFetched as number) ?? 0 @@ -322,7 +321,7 @@ export const hubspotConnector: ConnectorConfig = { } const record = await response.json() - return recordToDocument(record, objectType, portalId) + return recordToDocument(record as Record, objectType, portalId) }, validateConfig: async ( diff --git a/apps/sim/connectors/intercom/intercom.ts b/apps/sim/connectors/intercom/intercom.ts index 906607e2487..e7e011d83a3 100644 --- a/apps/sim/connectors/intercom/intercom.ts +++ b/apps/sim/connectors/intercom/intercom.ts @@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger' import { IntercomIcon } from '@/components/icons' import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils' import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types' -import { computeContentHash, htmlToPlainText, parseTagDate } from '@/connectors/utils' +import { htmlToPlainText, parseTagDate } from '@/connectors/utils' const logger = createLogger('IntercomConnector') @@ -309,7 +309,6 @@ export const intercomConnector: ConnectorConfig = { const content = formatArticle(article) if (!content.trim()) continue - const contentHash = await computeContentHash(content) const updatedAt = new Date(article.updated_at * 1000).toISOString() documents.push({ @@ -318,7 +317,7 @@ export const intercomConnector: ConnectorConfig = { content, mimeType: 'text/plain', sourceUrl: `https://app.intercom.com/a/apps/_/articles/articles/${article.id}/show`, - contentHash, + contentHash: `intercom:article-${article.id}:${article.updated_at}`, metadata: { type: 'article', state: article.state, @@ -337,28 +336,23 @@ export const intercomConnector: ConnectorConfig = { const conversations = await fetchConversations(accessToken, maxItems, conversationState) for (const conversation of conversations) { - const detail = await fetchConversationDetail(accessToken, conversation.id) - const content = formatConversation(detail) - if (!content.trim()) continue - - const contentHash = await computeContentHash(content) const updatedAt = new Date(conversation.updated_at * 1000).toISOString() const tags = conversation.tags?.tags?.map((t) => t.name) || [] documents.push({ externalId: `conversation-${conversation.id}`, title: conversation.title || `Conversation #${conversation.id}`, - content, + content: '', + contentDeferred: true, mimeType: 'text/plain', sourceUrl: `https://app.intercom.com/a/apps/_/inbox/inbox/all/conversations/${conversation.id}`, - contentHash, + contentHash: `intercom:conversation-${conversation.id}:${conversation.updated_at}`, metadata: { type: 'conversation', state: conversation.state, tags: tags.join(', '), updatedAt, createdAt: new Date(conversation.created_at * 1000).toISOString(), - messageCount: (detail.conversation_parts?.total_count ?? 0) + 1, }, }) } @@ -383,7 +377,6 @@ export const intercomConnector: ConnectorConfig = { const content = formatArticle(article) if (!content.trim()) return null - const contentHash = await computeContentHash(content) const updatedAt = new Date(article.updated_at * 1000).toISOString() return { @@ -392,7 +385,7 @@ export const intercomConnector: ConnectorConfig = { content, mimeType: 'text/plain', sourceUrl: `https://app.intercom.com/a/apps/_/articles/articles/${article.id}/show`, - contentHash, + contentHash: `intercom:article-${article.id}:${article.updated_at}`, metadata: { type: 'article', state: article.state, @@ -410,7 +403,6 @@ export const intercomConnector: ConnectorConfig = { const content = formatConversation(detail) if (!content.trim()) return null - const contentHash = await computeContentHash(content) const updatedAt = new Date(detail.updated_at * 1000).toISOString() const tags = detail.tags?.tags?.map((t) => t.name) || [] @@ -418,9 +410,10 @@ export const intercomConnector: ConnectorConfig = { externalId, title: detail.title || `Conversation #${detail.id}`, content, + contentDeferred: false, mimeType: 'text/plain', sourceUrl: `https://app.intercom.com/a/apps/_/inbox/inbox/all/conversations/${detail.id}`, - contentHash, + contentHash: `intercom:conversation-${detail.id}:${detail.updated_at}`, metadata: { type: 'conversation', state: detail.state, diff --git a/apps/sim/connectors/jira/jira.ts b/apps/sim/connectors/jira/jira.ts index aaa1218a58f..ea883f77a03 100644 --- a/apps/sim/connectors/jira/jira.ts +++ b/apps/sim/connectors/jira/jira.ts @@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger' import { JiraIcon } from '@/components/icons' import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils' import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types' -import { computeContentHash, joinTagArray, parseTagDate } from '@/connectors/utils' +import { joinTagArray, parseTagDate } from '@/connectors/utils' import { extractAdfText, getJiraCloudId } from '@/tools/jira/utils' const logger = createLogger('JiraConnector') @@ -33,16 +33,12 @@ function buildIssueContent(fields: Record): string { } /** - * Converts a Jira issue API response to an ExternalDocument. + * Extracts common metadata fields from a Jira issue into an ExternalDocument + * stub with deferred content. The contentHash is metadata-based so it is + * identical whether produced during listing or full fetch. */ -async function issueToDocument( - issue: Record, - domain: string -): Promise { +function issueToStub(issue: Record, domain: string): ExternalDocument { const fields = (issue.fields || {}) as Record - const content = buildIssueContent(fields) - const contentHash = await computeContentHash(content) - const key = issue.key as string const issueType = fields.issuetype as Record | undefined const status = fields.status as Record | undefined @@ -51,14 +47,16 @@ async function issueToDocument( const reporter = fields.reporter as Record | undefined const project = fields.project as Record | undefined const labels = Array.isArray(fields.labels) ? (fields.labels as string[]) : [] + const updated = (fields.updated as string) ?? '' return { externalId: String(issue.id), title: `${key}: ${(fields.summary as string) || 'Untitled'}`, - content, + content: '', + contentDeferred: true, mimeType: 'text/plain', sourceUrl: `https://${domain}/browse/${key}`, - contentHash, + contentHash: `jira:${issue.id}:${updated}`, metadata: { key, issueType: issueType?.name, @@ -74,6 +72,22 @@ async function issueToDocument( } } +/** + * Converts a fully-fetched Jira issue (with description and comments) into an + * ExternalDocument with resolved content. + */ +function issueToFullDocument(issue: Record, domain: string): ExternalDocument { + const stub = issueToStub(issue, domain) + const fields = (issue.fields || {}) as Record + const content = buildIssueContent(fields) + + return { + ...stub, + content, + contentDeferred: false, + } +} + export const jiraConnector: ConnectorConfig = { id: 'jira', name: 'Jira', @@ -162,7 +176,7 @@ export const jiraConnector: ConnectorConfig = { params.append('maxResults', String(Math.min(PAGE_SIZE, remaining))) params.append( 'fields', - 'summary,description,comment,issuetype,status,priority,assignee,reporter,project,labels,created,updated' + 'summary,issuetype,status,priority,assignee,reporter,project,labels,created,updated' ) const url = `https://api.atlassian.com/ex/jira/${cloudId}/rest/api/3/search?${params.toString()}` @@ -190,9 +204,7 @@ export const jiraConnector: ConnectorConfig = { const issues = (data.issues || []) as Record[] const total = (data.total as number) ?? 0 - const documents: ExternalDocument[] = await Promise.all( - issues.map((issue) => issueToDocument(issue, domain)) - ) + const documents: ExternalDocument[] = issues.map((issue) => issueToStub(issue, domain)) const nextStart = startAt + issues.length const hasMore = nextStart < total && (maxIssues <= 0 || nextStart < maxIssues) @@ -239,7 +251,7 @@ export const jiraConnector: ConnectorConfig = { } const issue = await response.json() - return issueToDocument(issue, domain) + return issueToFullDocument(issue, domain) }, validateConfig: async ( diff --git a/apps/sim/connectors/linear/linear.ts b/apps/sim/connectors/linear/linear.ts index fbcb2dde60e..efe391c7c00 100644 --- a/apps/sim/connectors/linear/linear.ts +++ b/apps/sim/connectors/linear/linear.ts @@ -3,7 +3,7 @@ import { LinearIcon } from '@/components/icons' import type { RetryOptions } from '@/lib/knowledge/documents/utils' import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils' import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types' -import { computeContentHash, joinTagArray, parseTagDate } from '@/connectors/utils' +import { joinTagArray, parseTagDate } from '@/connectors/utils' const logger = createLogger('LinearConnector') @@ -278,36 +278,34 @@ export const linearConnector: ConnectorConfig = { const nodes = (issuesConn.nodes || []) as Record[] const pageInfo = issuesConn.pageInfo as Record - const documents: ExternalDocument[] = await Promise.all( - nodes.map(async (issue) => { - const content = buildIssueContent(issue) - const contentHash = await computeContentHash(content) + const documents: ExternalDocument[] = nodes.map((issue) => { + const content = buildIssueContent(issue) + const contentHash = `linear:${issue.id}:${issue.updatedAt}` - const labelNodes = ((issue.labels as Record)?.nodes || []) as Record< - string, - unknown - >[] + const labelNodes = ((issue.labels as Record)?.nodes || []) as Record< + string, + unknown + >[] - return { - externalId: issue.id as string, - title: `${(issue.identifier as string) || ''}: ${(issue.title as string) || 'Untitled'}`, - content, - mimeType: 'text/plain' as const, - sourceUrl: (issue.url as string) || undefined, - contentHash, - metadata: { - identifier: issue.identifier, - state: (issue.state as Record)?.name, - priority: issue.priorityLabel, - assignee: (issue.assignee as Record)?.name, - labels: labelNodes.map((l) => l.name as string), - team: (issue.team as Record)?.name, - project: (issue.project as Record)?.name, - lastModified: issue.updatedAt, - }, - } - }) - ) + return { + externalId: issue.id as string, + title: `${(issue.identifier as string) || ''}: ${(issue.title as string) || 'Untitled'}`, + content, + mimeType: 'text/plain' as const, + sourceUrl: (issue.url as string) || undefined, + contentHash, + metadata: { + identifier: issue.identifier, + state: (issue.state as Record)?.name, + priority: issue.priorityLabel, + assignee: (issue.assignee as Record)?.name, + labels: labelNodes.map((l) => l.name as string), + team: (issue.team as Record)?.name, + project: (issue.project as Record)?.name, + lastModified: issue.updatedAt, + }, + } + }) const hasNextPage = Boolean(pageInfo.hasNextPage) const endCursor = (pageInfo.endCursor as string) || undefined @@ -335,7 +333,7 @@ export const linearConnector: ConnectorConfig = { if (!issue) return null const content = buildIssueContent(issue) - const contentHash = await computeContentHash(content) + const contentHash = `linear:${issue.id}:${issue.updatedAt}` const labelNodes = ((issue.labels as Record)?.nodes || []) as Record< string, @@ -346,7 +344,7 @@ export const linearConnector: ConnectorConfig = { externalId: issue.id as string, title: `${(issue.identifier as string) || ''}: ${(issue.title as string) || 'Untitled'}`, content, - mimeType: 'text/plain', + mimeType: 'text/plain' as const, sourceUrl: (issue.url as string) || undefined, contentHash, metadata: { @@ -379,7 +377,6 @@ export const linearConnector: ConnectorConfig = { } try { - // Verify the token works by fetching teams const data = await linearGraphQL(accessToken, TEAMS_QUERY, undefined, VALIDATE_RETRY_OPTIONS) const teamsConn = data.teams as Record const teams = (teamsConn.nodes || []) as Record[] @@ -391,7 +388,6 @@ export const linearConnector: ConnectorConfig = { } } - // If teamId specified, verify it exists const teamId = sourceConfig.teamId as string | undefined if (teamId) { const found = teams.some((t) => t.id === teamId) diff --git a/apps/sim/connectors/outlook/outlook.ts b/apps/sim/connectors/outlook/outlook.ts index 1c800701caa..e79a42e3c8e 100644 --- a/apps/sim/connectors/outlook/outlook.ts +++ b/apps/sim/connectors/outlook/outlook.ts @@ -2,14 +2,37 @@ import { createLogger } from '@sim/logger' import { OutlookIcon } from '@/components/icons' import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils' import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types' -import { computeContentHash, htmlToPlainText, parseTagDate } from '@/connectors/utils' +import { htmlToPlainText, parseTagDate } from '@/connectors/utils' const logger = createLogger('OutlookConnector') const GRAPH_API_BASE = 'https://graph.microsoft.com/v1.0/me' const DEFAULT_MAX_CONVERSATIONS = 500 const MESSAGES_PER_PAGE = 50 -const MESSAGE_FIELDS = [ +/** + * Fields requested when listing messages (no body — deferred to getDocument). + */ +const LIST_MESSAGE_FIELDS = [ + 'id', + 'conversationId', + 'subject', + 'from', + 'toRecipients', + 'receivedDateTime', + 'sentDateTime', + 'categories', + 'importance', + 'inferenceClassification', + 'hasAttachments', + 'webLink', + 'isDraft', + 'parentFolderId', +].join(',') + +/** + * Fields requested when fetching full message content in getDocument. + */ +const FULL_MESSAGE_FIELDS = [ 'id', 'conversationId', 'subject', @@ -84,7 +107,7 @@ function buildInitialUrl(sourceConfig: Record): string { const params = new URLSearchParams({ $top: String(MESSAGES_PER_PAGE), - $select: MESSAGE_FIELDS, + $select: LIST_MESSAGE_FIELDS, }) // Build $filter clauses @@ -353,7 +376,6 @@ export const outlookConnector: ConnectorConfig = { const headers: Record = { Authorization: `Bearer ${accessToken}`, Accept: 'application/json', - Prefer: 'outlook.body-content-type="text"', } const response = await fetchWithRetry(url, { method: 'GET', headers }) @@ -407,8 +429,8 @@ export const outlookConnector: ConnectorConfig = { } } - // Phase 2: Group conversations into documents - logger.info('Grouping Outlook messages into conversations', { + // Phase 2: Build lightweight stubs — content is deferred to getDocument + logger.info('Building Outlook conversation stubs', { totalMessages: syncContext?._totalMessagesFetched, totalConversations: Object.keys(conversations).length, }) @@ -433,23 +455,26 @@ export const outlookConnector: ConnectorConfig = { const documents: ExternalDocument[] = [] for (const [convId, msgs] of limited) { - const result = formatConversation(convId, msgs) - if (!result) continue + if (msgs.length === 0) continue - const contentHash = await computeContentHash(result.content) + const lastDate = msgs.reduce((max, m) => { + const d = m.receivedDateTime || '' + return d > max ? d : max + }, '') - // Use the first message's webLink as the source URL + const subject = msgs[0].subject || 'No Subject' const firstWithLink = msgs.find((m) => m.webLink) - const sourceUrl = firstWithLink?.webLink || `https://outlook.office.com/mail/inbox` + const sourceUrl = firstWithLink?.webLink || 'https://outlook.office.com/mail/inbox' documents.push({ externalId: convId, - title: result.subject, - content: result.content, + title: subject, + content: '', + contentDeferred: true, mimeType: 'text/plain', sourceUrl, - contentHash, - metadata: result.metadata, + contentHash: `outlook:${convId}:${lastDate}:${msgs.length}`, + metadata: {}, }) } @@ -465,7 +490,7 @@ export const outlookConnector: ConnectorConfig = { // Fetch messages for this conversation const params = new URLSearchParams({ $filter: `conversationId eq '${externalId.replace(/'/g, "''")}'`, - $select: MESSAGE_FIELDS, + $select: FULL_MESSAGE_FIELDS, $top: '50', }) @@ -493,16 +518,21 @@ export const outlookConnector: ConnectorConfig = { const result = formatConversation(externalId, messages) if (!result) return null - const contentHash = await computeContentHash(result.content) + const lastDate = messages.reduce((max, m) => { + const d = m.receivedDateTime || '' + return d > max ? d : max + }, '') + const firstWithLink = messages.find((m) => m.webLink) return { externalId, title: result.subject, content: result.content, + contentDeferred: false, mimeType: 'text/plain', sourceUrl: firstWithLink?.webLink || 'https://outlook.office.com/mail/inbox', - contentHash, + contentHash: `outlook:${externalId}:${lastDate}:${messages.length}`, metadata: result.metadata, } } catch (error) { diff --git a/apps/sim/connectors/reddit/reddit.ts b/apps/sim/connectors/reddit/reddit.ts index b74a8d4b0b3..ce636ad1be7 100644 --- a/apps/sim/connectors/reddit/reddit.ts +++ b/apps/sim/connectors/reddit/reddit.ts @@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger' import { RedditIcon } from '@/components/icons' import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils' import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types' -import { computeContentHash, parseTagDate } from '@/connectors/utils' +import { parseTagDate } from '@/connectors/utils' const logger = createLogger('RedditConnector') @@ -338,29 +338,23 @@ export const redditConnector: ConnectorConfig = { afterToken ) - const documents: ExternalDocument[] = [] - - for (const post of posts) { - const content = await formatPostContent(accessToken, post, COMMENTS_PER_POST) - const contentHash = await computeContentHash(content) - - documents.push({ - externalId: post.id, - title: post.title, - content, - mimeType: 'text/plain', - sourceUrl: `https://www.reddit.com${post.permalink}`, - contentHash, - metadata: { - author: post.author, - score: post.score, - commentCount: post.num_comments, - flair: post.link_flair_text ?? undefined, - postDate: new Date(post.created_utc * 1000).toISOString(), - subreddit: post.subreddit, - }, - }) - } + const documents: ExternalDocument[] = posts.map((post) => ({ + externalId: post.id, + title: post.title, + content: '', + contentDeferred: true, + mimeType: 'text/plain', + sourceUrl: `https://www.reddit.com${post.permalink}`, + contentHash: `reddit:${post.id}:${post.created_utc}`, + metadata: { + author: post.author, + score: post.score, + commentCount: post.num_comments, + flair: post.link_flair_text ?? undefined, + postDate: new Date(post.created_utc * 1000).toISOString(), + subreddit: post.subreddit, + }, + })) const totalCollected = collectedSoFar + documents.length const hasMore = after !== null && totalCollected < maxPosts @@ -397,15 +391,15 @@ export const redditConnector: ConnectorConfig = { const comments = data.length >= 2 ? extractComments(data[1] as RedditListing, COMMENTS_PER_POST) : [] const content = await formatPostContent(accessToken, post, COMMENTS_PER_POST, comments) - const contentHash = await computeContentHash(content) return { externalId: post.id, title: post.title, content, + contentDeferred: false, mimeType: 'text/plain', sourceUrl: `https://www.reddit.com${post.permalink}`, - contentHash, + contentHash: `reddit:${post.id}:${post.created_utc}`, metadata: { author: post.author, score: post.score, diff --git a/apps/sim/connectors/salesforce/salesforce.ts b/apps/sim/connectors/salesforce/salesforce.ts index c829187c699..c090ad91559 100644 --- a/apps/sim/connectors/salesforce/salesforce.ts +++ b/apps/sim/connectors/salesforce/salesforce.ts @@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger' import { SalesforceIcon } from '@/components/icons' import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils' import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types' -import { computeContentHash, htmlToPlainText, parseTagDate } from '@/connectors/utils' +import { htmlToPlainText, parseTagDate } from '@/connectors/utils' const logger = createLogger('SalesforceConnector') @@ -12,7 +12,14 @@ const PAGE_SIZE = 200 /** SOQL field lists per object type. */ const OBJECT_FIELDS: Record = { - KnowledgeArticleVersion: ['Id', 'Title', 'Summary', 'LastModifiedDate', 'ArticleNumber'], + KnowledgeArticleVersion: [ + 'Id', + 'Title', + 'Summary', + 'LastModifiedDate', + 'ArticleNumber', + 'PublishStatus', + ], Case: ['Id', 'Subject', 'Description', 'Status', 'LastModifiedDate', 'CaseNumber'], Account: ['Id', 'Name', 'Description', 'Industry', 'LastModifiedDate'], Opportunity: [ @@ -146,36 +153,52 @@ function getRecordStatus(objectType: string, record: Record): s } /** - * Converts a Salesforce record to an ExternalDocument. + * Creates a lightweight stub for a Salesforce record with metadata-based hash. + * Content is deferred and fetched later via getDocument only for new/changed docs. */ -async function recordToDocument( +function recordToStub( record: Record, objectType: string, instanceUrl: string -): Promise { +): ExternalDocument { const id = record.Id as string - const content = buildRecordContent(objectType, record) - const contentHash = await computeContentHash(content) const title = buildRecordTitle(objectType, record) - + const lastModified = (record.LastModifiedDate as string) || '' const baseUrl = instanceUrl.replace(`/services/data/${API_VERSION}/`, '') return { externalId: id, title, - content, + content: '', + contentDeferred: true, mimeType: 'text/plain', sourceUrl: `${baseUrl}/${id}`, - contentHash, + contentHash: `salesforce:${id}:${lastModified}`, metadata: { objectType, - lastModified: (record.LastModifiedDate as string) || undefined, + lastModified: lastModified || undefined, recordNumber: getRecordNumber(objectType, record), status: getRecordStatus(objectType, record), }, } } +/** + * Builds a full ExternalDocument with content from a Salesforce record. + */ +function recordToDocument( + record: Record, + objectType: string, + instanceUrl: string +): ExternalDocument { + const stub = recordToStub(record, objectType, instanceUrl) + return { + ...stub, + content: buildRecordContent(objectType, record), + contentDeferred: false, + } +} + export const salesforceConnector: ConnectorConfig = { id: 'salesforce', name: 'Salesforce', @@ -257,8 +280,8 @@ export const salesforceConnector: ConnectorConfig = { const records = (data.records || []) as Record[] const nextRecordsUrl = data.nextRecordsUrl as string | undefined - const documents: ExternalDocument[] = await Promise.all( - records.map((record) => recordToDocument(record, objectType, instanceUrl)) + const documents: ExternalDocument[] = records.map((record) => + recordToStub(record, objectType, instanceUrl) ) const previouslyFetched = (syncContext?.totalDocsFetched as number) ?? 0 diff --git a/apps/sim/connectors/servicenow/servicenow.ts b/apps/sim/connectors/servicenow/servicenow.ts index 3aefb66fbdc..ebd33e79056 100644 --- a/apps/sim/connectors/servicenow/servicenow.ts +++ b/apps/sim/connectors/servicenow/servicenow.ts @@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger' import { ServiceNowIcon } from '@/components/icons' import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils' import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types' -import { computeContentHash, htmlToPlainText, parseTagDate } from '@/connectors/utils' +import { htmlToPlainText, parseTagDate } from '@/connectors/utils' const logger = createLogger('ServiceNowConnector') @@ -184,15 +184,13 @@ function priorityLabel(priority: string | undefined): string { /** * Converts a KB article record to an ExternalDocument. */ -async function kbArticleToDocument( - article: KBArticle, - instanceUrl: string -): Promise { +function kbArticleToDocument(article: KBArticle, instanceUrl: string): ExternalDocument { const title = rawValue(article.short_description) || rawValue(article.number) || article.sys_id const articleText = rawValue(article.text) || rawValue(article.wiki) || '' const content = htmlToPlainText(articleText) - const contentHash = await computeContentHash(content) const sysId = rawValue(article.sys_id as unknown as string) || article.sys_id + const updatedOn = rawValue(article.sys_updated_on) || '' + const contentHash = `servicenow:${sysId}:${updatedOn}` const sourceUrl = `${instanceUrl}/kb_view.do?sys_kb_id=${sysId}` return { @@ -218,10 +216,7 @@ async function kbArticleToDocument( /** * Converts an incident record to an ExternalDocument. */ -async function incidentToDocument( - incident: Incident, - instanceUrl: string -): Promise { +function incidentToDocument(incident: Incident, instanceUrl: string): ExternalDocument { const number = rawValue(incident.number) const shortDesc = rawValue(incident.short_description) const title = number ? `${number}: ${shortDesc || 'Untitled'}` : shortDesc || incident.sys_id @@ -258,8 +253,9 @@ async function incidentToDocument( } const content = parts.join('\n') - const contentHash = await computeContentHash(content) const sysId = rawValue(incident.sys_id as unknown as string) || incident.sys_id + const updatedOn = rawValue(incident.sys_updated_on) || '' + const contentHash = `servicenow:${sysId}:${updatedOn}` const sourceUrl = `${instanceUrl}/incident.do?sys_id=${sysId}` return { @@ -478,8 +474,8 @@ export const servicenowConnector: ConnectorConfig = { const documents: ExternalDocument[] = [] for (const record of result) { const doc = isKB - ? await kbArticleToDocument(record as unknown as KBArticle, instanceUrl) - : await incidentToDocument(record as unknown as Incident, instanceUrl) + ? kbArticleToDocument(record as unknown as KBArticle, instanceUrl) + : incidentToDocument(record as unknown as Incident, instanceUrl) if (doc.content.trim()) { documents.push(doc) @@ -532,8 +528,8 @@ export const servicenowConnector: ConnectorConfig = { const record = result[0] const doc = isKB - ? await kbArticleToDocument(record as unknown as KBArticle, instanceUrl) - : await incidentToDocument(record as unknown as Incident, instanceUrl) + ? kbArticleToDocument(record as unknown as KBArticle, instanceUrl) + : incidentToDocument(record as unknown as Incident, instanceUrl) return doc.content.trim() ? doc : null } catch (error) { diff --git a/apps/sim/connectors/webflow/webflow.ts b/apps/sim/connectors/webflow/webflow.ts index c398ed815f1..b0ee6932fac 100644 --- a/apps/sim/connectors/webflow/webflow.ts +++ b/apps/sim/connectors/webflow/webflow.ts @@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger' import { WebflowIcon } from '@/components/icons' import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils' import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types' -import { computeContentHash, htmlToPlainText, parseTagDate } from '@/connectors/utils' +import { htmlToPlainText, parseTagDate } from '@/connectors/utils' const logger = createLogger('WebflowConnector') @@ -194,8 +194,8 @@ export const webflowConnector: ConnectorConfig = { } const items = data.items || [] - let documents: ExternalDocument[] = await Promise.all( - items.map((item) => itemToDocument(item, currentCollectionId, collectionName)) + let documents: ExternalDocument[] = items.map((item) => + itemToDocument(item, currentCollectionId, collectionName) ) if (maxItems > 0) { @@ -373,13 +373,14 @@ export const webflowConnector: ConnectorConfig = { /** * Converts a Webflow CMS item to an ExternalDocument. */ -async function itemToDocument( +function itemToDocument( item: WebflowItem, collectionId: string, collectionName: string -): Promise { +): ExternalDocument { const plainText = itemToPlainText(item, collectionName) - const contentHash = await computeContentHash(plainText) + const lastModified = item.lastUpdated || item.lastPublished || item.createdOn || '' + const contentHash = `webflow:${item.id}:${lastModified}` const title = extractItemTitle(item) const slug = (item.fieldData?.slug as string) || '' diff --git a/apps/sim/connectors/wordpress/wordpress.ts b/apps/sim/connectors/wordpress/wordpress.ts index 9f835e52e06..eb079f104a9 100644 --- a/apps/sim/connectors/wordpress/wordpress.ts +++ b/apps/sim/connectors/wordpress/wordpress.ts @@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger' import { WordpressIcon } from '@/components/icons' import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils' import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types' -import { computeContentHash, htmlToPlainText, joinTagArray, parseTagDate } from '@/connectors/utils' +import { htmlToPlainText, joinTagArray, parseTagDate } from '@/connectors/utils' const logger = createLogger('WordPressConnector') @@ -59,10 +59,10 @@ function extractTagNames(tags: Record): string[] { /** * Converts a WordPress post to an ExternalDocument. */ -async function postToDocument(post: WordPressPost): Promise { +function postToDocument(post: WordPressPost): ExternalDocument { const plainText = htmlToPlainText(post.content) const fullContent = `# ${post.title}\n\n${plainText}` - const contentHash = await computeContentHash(fullContent) + const contentHash = `wordpress:${post.ID}:${post.modified || ''}` const categories = extractCategoryNames(post.categories) const tags = extractTagNames(post.tags) @@ -182,7 +182,7 @@ export const wordpressConnector: ConnectorConfig = { const data = (await response.json()) as WordPressPostsResponse const posts = data.posts || [] - const documents = await Promise.all(posts.map(postToDocument)) + const documents = posts.map(postToDocument) const totalFetched = totalDocsFetched + documents.length if (syncContext) syncContext.totalDocsFetched = totalFetched @@ -226,7 +226,7 @@ export const wordpressConnector: ConnectorConfig = { } const post = (await response.json()) as WordPressPost - return await postToDocument(post) + return postToDocument(post) } catch (error) { logger.warn('Failed to get WordPress document', { externalId, diff --git a/apps/sim/connectors/zendesk/zendesk.ts b/apps/sim/connectors/zendesk/zendesk.ts index d42d016366a..6bdf57e6108 100644 --- a/apps/sim/connectors/zendesk/zendesk.ts +++ b/apps/sim/connectors/zendesk/zendesk.ts @@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger' import { ZendeskIcon } from '@/components/icons' import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils' import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types' -import { computeContentHash, htmlToPlainText, joinTagArray, parseTagDate } from '@/connectors/utils' +import { htmlToPlainText, joinTagArray, parseTagDate } from '@/connectors/utils' const logger = createLogger('ZendeskConnector') @@ -207,14 +207,11 @@ function formatTicketContent(ticket: ZendeskTicket, comments: ZendeskComment[]): } /** - * Converts an article to an ExternalDocument. + * Converts an article to an ExternalDocument with inline content. + * Articles return body inline from the list API so no deferral is needed. */ -async function articleToDocument( - article: ZendeskArticle, - subdomain: string -): Promise { +function articleToDocument(article: ZendeskArticle, subdomain: string): ExternalDocument { const content = htmlToPlainText(article.body || '') - const contentHash = await computeContentHash(content) return { externalId: `article-${article.id}`, @@ -222,7 +219,7 @@ async function articleToDocument( content, mimeType: 'text/plain', sourceUrl: article.html_url || `https://${subdomain}.zendesk.com/hc/articles/${article.id}`, - contentHash, + contentHash: `zendesk:article:${article.id}:${article.updated_at}`, metadata: { type: 'article', articleId: article.id, @@ -238,23 +235,50 @@ async function articleToDocument( } /** - * Converts a ticket (with comments) to an ExternalDocument. + * Creates a deferred stub for a ticket. Content is not fetched here because + * each ticket requires a separate comments API call. Full content is fetched + * lazily via getDocument only for new/changed documents. + */ +function ticketToStub(ticket: ZendeskTicket, subdomain: string): ExternalDocument { + return { + externalId: `ticket-${ticket.id}`, + title: `Ticket #${ticket.id}: ${ticket.subject}`, + content: '', + contentDeferred: true, + mimeType: 'text/plain', + sourceUrl: `https://${subdomain}.zendesk.com/agent/tickets/${ticket.id}`, + contentHash: `zendesk:ticket:${ticket.id}:${ticket.updated_at}`, + metadata: { + type: 'ticket', + ticketId: ticket.id, + status: ticket.status, + priority: ticket.priority, + tags: ticket.tags, + createdAt: ticket.created_at, + updatedAt: ticket.updated_at, + }, + } +} + +/** + * Converts a ticket (with comments) to a full ExternalDocument. + * Used by getDocument to resolve deferred ticket stubs. */ -async function ticketToDocument( +function ticketToDocument( ticket: ZendeskTicket, comments: ZendeskComment[], subdomain: string -): Promise { +): ExternalDocument { const content = formatTicketContent(ticket, comments) - const contentHash = await computeContentHash(content) return { externalId: `ticket-${ticket.id}`, title: `Ticket #${ticket.id}: ${ticket.subject}`, content, + contentDeferred: false, mimeType: 'text/plain', sourceUrl: `https://${subdomain}.zendesk.com/agent/tickets/${ticket.id}`, - contentHash, + contentHash: `zendesk:ticket:${ticket.id}:${ticket.updated_at}`, metadata: { type: 'ticket', ticketId: ticket.id, @@ -375,8 +399,7 @@ export const zendeskConnector: ConnectorConfig = { for (const article of articles) { if (!article.body?.trim()) continue - const doc = await articleToDocument(article, subdomain) - documents.push(doc) + documents.push(articleToDocument(article, subdomain)) } } @@ -391,21 +414,8 @@ export const zendeskConnector: ConnectorConfig = { ) logger.info(`Fetched ${tickets.length} tickets from Zendesk`) - const BATCH_SIZE = 5 - for (let i = 0; i < tickets.length; i += BATCH_SIZE) { - const batch = tickets.slice(i, i + BATCH_SIZE) - const batchResults = await Promise.all( - batch.map(async (ticket) => { - const comments = await fetchTicketComments( - subdomain, - accessToken, - sourceConfig, - ticket.id - ) - return ticketToDocument(ticket, comments, subdomain) - }) - ) - documents.push(...batchResults) + for (const ticket of tickets) { + documents.push(ticketToStub(ticket, subdomain)) } } From c73e75c068cd5a4156149b94081ccbe280b758eb Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Wed, 8 Apr 2026 00:27:27 -0700 Subject: [PATCH 2/6] fix(kb): remove message count from outlook contentHash to prevent list/get divergence --- apps/sim/connectors/outlook/outlook.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/sim/connectors/outlook/outlook.ts b/apps/sim/connectors/outlook/outlook.ts index e79a42e3c8e..6586778bff7 100644 --- a/apps/sim/connectors/outlook/outlook.ts +++ b/apps/sim/connectors/outlook/outlook.ts @@ -473,7 +473,7 @@ export const outlookConnector: ConnectorConfig = { contentDeferred: true, mimeType: 'text/plain', sourceUrl, - contentHash: `outlook:${convId}:${lastDate}:${msgs.length}`, + contentHash: `outlook:${convId}:${lastDate}`, metadata: {}, }) } @@ -532,7 +532,7 @@ export const outlookConnector: ConnectorConfig = { contentDeferred: false, mimeType: 'text/plain', sourceUrl: firstWithLink?.webLink || 'https://outlook.office.com/mail/inbox', - contentHash: `outlook:${externalId}:${lastDate}:${messages.length}`, + contentHash: `outlook:${externalId}:${lastDate}`, metadata: result.metadata, } } catch (error) { From 03a27566484866b1b884a257cf79931b44f84378 Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Wed, 8 Apr 2026 00:41:50 -0700 Subject: [PATCH 3/6] fix(kb): increase outlook getDocument message limit from 50 to 250 --- apps/sim/connectors/outlook/outlook.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/sim/connectors/outlook/outlook.ts b/apps/sim/connectors/outlook/outlook.ts index 6586778bff7..a0c5836a220 100644 --- a/apps/sim/connectors/outlook/outlook.ts +++ b/apps/sim/connectors/outlook/outlook.ts @@ -491,7 +491,7 @@ export const outlookConnector: ConnectorConfig = { const params = new URLSearchParams({ $filter: `conversationId eq '${externalId.replace(/'/g, "''")}'`, $select: FULL_MESSAGE_FIELDS, - $top: '50', + $top: '250', }) const url = `${GRAPH_API_BASE}/messages?${params.toString()}` From b5e33dd68d38540ac68b13f7c70aaf9ab901ccb6 Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Wed, 8 Apr 2026 00:42:34 -0700 Subject: [PATCH 4/6] fix(kb): skip outlook messages without conversationId to prevent broken stubs --- apps/sim/connectors/outlook/outlook.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/apps/sim/connectors/outlook/outlook.ts b/apps/sim/connectors/outlook/outlook.ts index a0c5836a220..7e32461060d 100644 --- a/apps/sim/connectors/outlook/outlook.ts +++ b/apps/sim/connectors/outlook/outlook.ts @@ -407,7 +407,8 @@ export const outlookConnector: ConnectorConfig = { continue } - const convId = msg.conversationId || msg.id + if (!msg.conversationId) continue + const convId = msg.conversationId if (!conversations[convId]) { conversations[convId] = [] } From 44b23c2b473b00eb97e95662653b1f251a381ac6 Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Wed, 8 Apr 2026 00:48:56 -0700 Subject: [PATCH 5/6] fix(kb): scope outlook getDocument to same folder as listDocuments to prevent hash divergence --- apps/sim/connectors/outlook/outlook.ts | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/apps/sim/connectors/outlook/outlook.ts b/apps/sim/connectors/outlook/outlook.ts index 7e32461060d..d36635d6d9b 100644 --- a/apps/sim/connectors/outlook/outlook.ts +++ b/apps/sim/connectors/outlook/outlook.ts @@ -488,14 +488,25 @@ export const outlookConnector: ConnectorConfig = { externalId: string ): Promise => { try { - // Fetch messages for this conversation + // Scope to the same folder as listDocuments so contentHash stays consistent + const folder = (sourceConfig.folder as string) || 'inbox' + const basePath = + folder === 'all' + ? `${GRAPH_API_BASE}/messages` + : `${GRAPH_API_BASE}/mailFolders/${WELL_KNOWN_FOLDERS[folder] || folder}/messages` + + const filterParts = [ + `conversationId eq '${externalId.replace(/'/g, "''")}'`, + 'isDraft eq false', + ] + const params = new URLSearchParams({ - $filter: `conversationId eq '${externalId.replace(/'/g, "''")}'`, + $filter: filterParts.join(' and '), $select: FULL_MESSAGE_FIELDS, $top: '250', }) - const url = `${GRAPH_API_BASE}/messages?${params.toString()}` + const url = `${basePath}?${params.toString()}` const response = await fetchWithRetry(url, { method: 'GET', From b1d5279745175b57df22bbeb8d4806a6e48a3cea Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Wed, 8 Apr 2026 00:57:02 -0700 Subject: [PATCH 6/6] fix(kb): add missing connector sync cron job to Helm values The connector sync endpoint existed but had no cron job configured to trigger it, meaning scheduled syncs would never fire. Co-Authored-By: Claude Opus 4.6 --- helm/sim/values.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/helm/sim/values.yaml b/helm/sim/values.yaml index b7b32cb4c4e..2e596c69bcf 100644 --- a/helm/sim/values.yaml +++ b/helm/sim/values.yaml @@ -985,6 +985,15 @@ cronjobs: successfulJobsHistoryLimit: 3 failedJobsHistoryLimit: 1 + connectorSync: + enabled: true + name: connector-sync + schedule: "*/5 * * * *" + path: "/api/knowledge/connectors/sync" + concurrencyPolicy: Forbid + successfulJobsHistoryLimit: 3 + failedJobsHistoryLimit: 1 + # Global CronJob settings image: repository: curlimages/curl