/** * Script to sync database with MinIO * * Removes document references from database for files that no longer exist in MinIO */ const { S3Client, ListObjectsV2Command, HeadObjectCommand } = require('@aws-sdk/client-s3'); const { Client } = require('pg'); require('dotenv').config(); const MINIO_ENDPOINT = process.env.AWS_S3_ENDPOINT || 'http://localhost:9000'; const BUCKET_NAME = 'xpeditis-documents'; // Initialize MinIO client const s3Client = new S3Client({ region: 'us-east-1', endpoint: MINIO_ENDPOINT, credentials: { accessKeyId: process.env.AWS_ACCESS_KEY_ID || 'minioadmin', secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY || 'minioadmin', }, forcePathStyle: true, }); async function syncDatabase() { const pgClient = new Client({ host: process.env.DATABASE_HOST || 'localhost', port: process.env.DATABASE_PORT || 5432, user: process.env.DATABASE_USER || 'xpeditis', password: process.env.DATABASE_PASSWORD || 'xpeditis_dev_password', database: process.env.DATABASE_NAME || 'xpeditis_dev', }); try { await pgClient.connect(); console.log('āœ… Connected to database\n'); // Get all MinIO files console.log('šŸ“‹ Listing files in MinIO...'); let allMinioFiles = []; let continuationToken = null; do { const command = new ListObjectsV2Command({ Bucket: BUCKET_NAME, ContinuationToken: continuationToken, }); const response = await s3Client.send(command); if (response.Contents) { allMinioFiles = allMinioFiles.concat(response.Contents.map(f => f.Key)); } continuationToken = response.NextContinuationToken; } while (continuationToken); console.log(` Found ${allMinioFiles.length} files in MinIO\n`); // Create a set for faster lookup const minioFilesSet = new Set(allMinioFiles); // Get all bookings with documents const result = await pgClient.query( `SELECT id, documents FROM csv_bookings WHERE documents IS NOT NULL AND jsonb_array_length(documents::jsonb) > 0` ); console.log(`šŸ“„ Found ${result.rows.length} bookings with documents in database\n`); let updatedCount = 0; let removedDocsCount = 0; let emptyBookingsCount = 0; for (const row of result.rows) { const bookingId = row.id; const documents = row.documents; // Filter documents to keep only those that exist in MinIO const validDocuments = []; const missingDocuments = []; for (const doc of documents) { if (!doc.filePath) { missingDocuments.push(doc); continue; } // Extract the S3 key from the URL try { const url = new URL(doc.filePath); const pathname = url.pathname; // Remove leading slash and bucket name const key = pathname.substring(1).replace(`${BUCKET_NAME}/`, ''); if (minioFilesSet.has(key)) { validDocuments.push(doc); } else { missingDocuments.push(doc); } } catch (error) { console.error(` āš ļø Invalid URL for booking ${bookingId}: ${doc.filePath}`); missingDocuments.push(doc); } } if (missingDocuments.length > 0) { console.log(`\nšŸ“¦ Booking: ${bookingId.substring(0, 8)}...`); console.log(` Total documents: ${documents.length}`); console.log(` Valid documents: ${validDocuments.length}`); console.log(` Missing documents: ${missingDocuments.length}`); missingDocuments.forEach(doc => { console.log(` āŒ ${doc.fileName || 'Unknown'}`); }); // Update the database await pgClient.query( `UPDATE csv_bookings SET documents = $1 WHERE id = $2`, [JSON.stringify(validDocuments), bookingId] ); updatedCount++; removedDocsCount += missingDocuments.length; if (validDocuments.length === 0) { emptyBookingsCount++; console.log(` āš ļø This booking now has NO documents`); } } } console.log(`\nšŸ“Š Summary:`); console.log(` Bookings updated: ${updatedCount}`); console.log(` Documents removed from DB: ${removedDocsCount}`); console.log(` Bookings with no documents: ${emptyBookingsCount}`); console.log(`\nāœ… Database synchronized with MinIO`); } catch (error) { console.error('āŒ Error:', error); throw error; } finally { await pgClient.end(); console.log('\nšŸ‘‹ Disconnected from database'); } } syncDatabase() .then(() => { console.log('\nāœ… Script completed successfully'); process.exit(0); }) .catch((error) => { console.error('\nāŒ Script failed:', error); process.exit(1); });