From 8defb21c1a79b357f8becdbd7c736d1bc0cf1481 Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Wed, 21 May 2025 15:44:29 +0300 Subject: [PATCH 01/33] get name and size from metadata and header of file, avoid input stream using --- .../dspace/content/BitstreamServiceImpl.java | 9 + .../content/PreviewContentServiceImpl.java | 392 ++++++++++++++++-- .../content/service/BitstreamService.java | 13 + .../service/PreviewContentService.java | 8 +- .../storage/bitstore/BitStoreService.java | 11 + .../bitstore/BitstreamStorageServiceImpl.java | 8 + .../storage/bitstore/DSBitStoreService.java | 2 +- .../storage/bitstore/S3BitStoreService.java | 23 + .../service/BitstreamStorageService.java | 13 + 9 files changed, 432 insertions(+), 47 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/content/BitstreamServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/BitstreamServiceImpl.java index 6b6b14a1def1..abe892a058af 100644 --- a/dspace-api/src/main/java/org/dspace/content/BitstreamServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/BitstreamServiceImpl.java @@ -7,6 +7,7 @@ */ package org.dspace.content; +import java.io.File; import java.io.IOException; import java.io.InputStream; import java.sql.SQLException; @@ -315,6 +316,14 @@ public InputStream retrieve(Context context, Bitstream bitstream) return bitstreamStorageService.retrieve(context, bitstream); } + @Override + public File retrieveFile(Context context, Bitstream bitstream) + throws IOException, SQLException, AuthorizeException { + // Maybe should return AuthorizeException?? + authorizeService.authorizeAction(context, bitstream, Constants.READ); + return bitstreamStorageService.retrieveFile(context, bitstream); + } + @Override public boolean isRegisteredBitstream(Bitstream bitstream) { return bitstreamStorageService.isRegisteredBitstream(bitstream.getInternalId()); diff --git a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java index 371d5abf30b7..3e2efd36d365 100644 --- a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java @@ -8,10 +8,13 @@ package org.dspace.content; import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; import java.io.IOException; -import java.io.InputStream; import java.io.InputStreamReader; +import java.io.RandomAccessFile; import java.io.UnsupportedEncodingException; +import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.InvalidPathException; @@ -34,6 +37,7 @@ import org.apache.commons.compress.archivers.tar.TarArchiveEntry; import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; +import org.apache.tools.ant.taskdefs.Tar; import org.dspace.app.util.Util; import org.dspace.authorize.AuthorizeException; import org.dspace.authorize.MissingLicenseAgreementException; @@ -83,6 +87,25 @@ public class PreviewContentServiceImpl implements PreviewContentService { @Autowired BitstreamService bitstreamService; + private static class EOCDRecord { + long totalEntries; + long centralDirectoryOffset; + + EOCDRecord(long totalEntries, long centralDirectoryOffset) { + this.totalEntries = totalEntries; + this.centralDirectoryOffset = centralDirectoryOffset; + } + } + + private static class TarHeader { + final String fileName; + final long fileSize; + + TarHeader(String fileName, long fileSize) { + this.fileName = fileName; + this.fileSize = fileSize; + } + } @Override public PreviewContent create(Context context, Bitstream bitstream, String name, String content, @@ -153,14 +176,14 @@ public boolean canPreview(Context context, Bitstream bitstream) throws SQLExcept @Override public List getFilePreviewContent(Context context, Bitstream bitstream) throws Exception { - InputStream inputStream = null; + File file = null; List fileInfos = null; try { - inputStream = bitstreamService.retrieve(context, bitstream); + file = bitstreamService.retrieveFile(context, bitstream); } catch (MissingLicenseAgreementException e) { /* Do nothing */ } - if (Objects.nonNull(inputStream)) { - fileInfos = processInputStreamToFilePreview(context, bitstream, inputStream); + if (Objects.nonNull(file)) { + fileInfos = processFileToFilePreview(context, bitstream, file); } return fileInfos; } @@ -187,8 +210,8 @@ public FileInfo createFileInfo(PreviewContent pc) { } @Override - public List processInputStreamToFilePreview(Context context, Bitstream bitstream, - InputStream inputStream) + public List processFileToFilePreview(Context context, Bitstream bitstream, + File file) throws Exception { List fileInfos = new ArrayList<>(); String bitstreamMimeType = bitstream.getFormat(context).getMIMEType(); @@ -198,10 +221,10 @@ public List processInputStreamToFilePreview(Context context, Bitstream "database. This could cause the ZIP file to be previewed as a text file, potentially leading" + " to a database error."); } - String data = getFileContent(inputStream, true); + String data = getFileContent(file, true); fileInfos.add(new FileInfo(data, false)); } else if (bitstreamMimeType.equals("text/html")) { - String data = getFileContent(inputStream, false); + String data = getFileContent(file, false); fileInfos.add(new FileInfo(data, false)); } else { String data = ""; @@ -212,7 +235,7 @@ public List processInputStreamToFilePreview(Context context, Bitstream String mimeType = bitstream.getFormat(context).getMIMEType(); if (archiveTypes.containsKey(mimeType)) { - data = extractFile(inputStream, archiveTypes.get(mimeType)); + data = extractFile(file, archiveTypes.get(mimeType)); fileInfos = FileTreeViewGenerator.parse(data); } } @@ -309,38 +332,322 @@ private void addFilePath(List filePaths, String path, long size) { /** * Processes a TAR file, extracting its entries and adding their paths to the provided list. * @param filePaths the list to populate with the extracted file paths - * @param inputStream the TAR file data + * @param file the TAR file data * @throws IOException if an I/O error occurs while reading the TAR file */ - private void processTarFile(List filePaths, InputStream inputStream) throws IOException { - try (TarArchiveInputStream tis = new TarArchiveInputStream(inputStream)) { - TarArchiveEntry entry; - while ((entry = tis.getNextTarEntry()) != null) { - if (!entry.isDirectory()) { - // Add the file path and its size (from the TAR entry) - addFilePath(filePaths, entry.getName(), entry.getSize()); + private void processTarFile(List filePaths, File file) throws IOException { + try (RandomAccessFile raf = new RandomAccessFile(file, "r")) { + long fileSize = raf.length(); + byte[] buffer = new byte[512]; // TAR header size is always 512 bytes + long currentPos = 0; + + while (currentPos < fileSize) { + // Read the next 512-byte header + raf.seek(currentPos); + raf.readFully(buffer); + + // Parse the header to extract file metadata + TarHeader header = parseTarHeader(buffer); + + if (header == null || header.fileName.isEmpty()) { + break; // End of archive (empty header) + } + + // Handle the file metadata + long fileContentSize = header.fileSize; + String fileName = header.fileName; + + // Move to the file content position + currentPos += 512; // Move past the header + byte[] fileContent = new byte[(int) fileContentSize]; + raf.readFully(fileContent); // Read the file content + + // Add the file to the list (or process it further) + addFilePath(filePaths, fileName, fileContentSize); + + // Move to the next file's header (file content is padded to 512-byte boundary) + currentPos += (fileContentSize + 511) / 512 * 512; + } + } + } + + /** + * Parse the 512-byte TAR header. + * + * @param headerBytes the header block (512 bytes) + * @return a TarHeader object containing file metadata + */ + private TarHeader parseTarHeader(byte[] headerBytes) { + // Extract the file name (first 100 bytes) + String fileName = new String(headerBytes, 0, 100, StandardCharsets.US_ASCII).trim(); + + // If the file name is empty, we've reached the end of the archive + if (fileName.isEmpty()) { + return null; + } + + // Extract the file size (octal value in bytes 124-135) + String sizeStr = new String(headerBytes, 124, 12, StandardCharsets.US_ASCII).trim(); + long fileSize = Long.parseLong(sizeStr, 8); // TAR file sizes are stored in octal + + return new TarHeader(fileName, fileSize); + } + + /** + * Parses a ZIP file and extracts the names and sizes of its entries. + * Handles standard ZIP and ZIP64 formats for large files or archives with many entries. + * + * @param filePaths the list to populate with entry names + * @param file the ZIP file to read + * @throws IOException if the file is invalid or cannot be read + */ + public void processZipFile(List filePaths, File file) throws IOException { + try (RandomAccessFile raf = new RandomAccessFile(file, "r")) { + EOCDRecord eocd = findEOCD(raf); + if (eocd == null) { + throw new IOException("End of Central Directory not found. Not a valid ZIP file: " + file.getName()); + } + + // Seek to the Central Directory offset + raf.seek(eocd.centralDirectoryOffset); + + // Loop through all entries in the Central Directory + for (long i = 0; i < eocd.totalEntries; i++) { + long currentEntryStart = raf.getFilePointer(); // Track entry position + + int signature = readIntLE(raf); + if (signature != 0x02014b50) { // Central directory file header + throw new IOException("Invalid central directory signature at entry " + i + + " (offset: " + currentEntryStart + ")"); + } + + raf.skipBytes(2); // Version made by + raf.skipBytes(2); // Version needed to extract + int generalPurposeBitFlag = readShortLE(raf); + + raf.skipBytes(2); // Compression method + raf.skipBytes(2); // File modification time + raf.skipBytes(2); // File modification date + raf.skipBytes(4); // CRC-32 + + // Read compressed/uncompressed sizes (can be -1 if ZIP64 is used) + int compressedSize32 = readIntLE(raf); + int uncompressedSize32 = readIntLE(raf); + + int fileNameLength = readShortLE(raf); + int extraFieldLength = readShortLE(raf); + int fileCommentLength = readShortLE(raf); + + raf.skipBytes(2); // Disk number start + raf.skipBytes(2); // Internal file attributes + raf.skipBytes(4); // External file attributes + + int relativeOffset32 = readIntLE(raf); // Relative offset of local header + + // Read file name + byte[] nameBytes = new byte[fileNameLength]; + raf.readFully(nameBytes); + + // Determine character set (bit 11 = UTF-8) + Charset charset = (generalPurposeBitFlag & (1 << 11)) != 0 + ? StandardCharsets.UTF_8 + : Charset.forName("IBM437"); + String name = new String(nameBytes, charset); + + // Default values for final sizes and offset (use 64-bit to avoid overflow) + long finalCompressedSize = compressedSize32 & 0xFFFFFFFFL; + long finalUncompressedSize = uncompressedSize32 & 0xFFFFFFFFL; + long finalRelativeOffset = relativeOffset32 & 0xFFFFFFFFL; + + // Read extra fields (e.g. ZIP64 extended information) + long afterFileNamePos = raf.getFilePointer(); + byte[] extraFieldBytes = new byte[extraFieldLength]; + if (extraFieldLength > 0) { + raf.readFully(extraFieldBytes); } + + // ZIP64 is used when sizes or offsets are too large for 32-bit integers + if (compressedSize32 == -1 || uncompressedSize32 == -1 || relativeOffset32 == -1) { + int pointer = 0; + while (pointer + 4 <= extraFieldLength) { + int headerId = (extraFieldBytes[pointer] & 0xFF) | ((extraFieldBytes[pointer + 1] & 0xFF) << 8); + int dataSize = (extraFieldBytes[pointer + 2] & 0xFF) | ((extraFieldBytes[pointer + 3] & 0xFF) << 8); + + if (pointer + 4 + dataSize > extraFieldLength) { + System.err.println("Warning: Malformed extra field with ID 0x" + Integer.toHexString(headerId)); + break; + } + + if (headerId == 0x0001) { // ZIP64 Extended Information + int offset = pointer + 4; + int bytesRead = 0; + + if (uncompressedSize32 == -1 && bytesRead + 8 <= dataSize) { + finalUncompressedSize = parseLongLE(extraFieldBytes, offset + bytesRead); + bytesRead += 8; + } + if (compressedSize32 == -1 && bytesRead + 8 <= dataSize) { + finalCompressedSize = parseLongLE(extraFieldBytes, offset + bytesRead); + bytesRead += 8; + } + if (relativeOffset32 == -1 && bytesRead + 8 <= dataSize) { + finalRelativeOffset = parseLongLE(extraFieldBytes, offset + bytesRead); + bytesRead += 8; + } + break; + } + + pointer += (4 + dataSize); + } + } + + // Skip comment field + raf.seek(afterFileNamePos + extraFieldLength); + raf.skipBytes(fileCommentLength); + + // Pass extracted file entry to callback + addFilePath(filePaths, name, finalUncompressedSize); } } } /** - * Processes a ZIP file, extracting its entries and adding their paths to the provided list. - * @param filePaths the list to populate with the extracted file paths - * @param inputStream the ZIP file data - * @throws IOException if an I/O error occurs while reading the ZIP file + * Reads a 4-byte little-endian integer from a stream. + * + * @param raf the RandomAccessFile to read from + * @return the 32-bit integer value read (interpreted in little-endian order) + * @throws IOException if the stream ends unexpectedly before reading 4 bytes */ - private void processZipFile(List filePaths, InputStream inputStream) throws IOException { - try (ZipInputStream zipInputStream = new ZipInputStream(inputStream)) { - ZipEntry entry; - while ((entry = zipInputStream.getNextEntry()) != null) { - if (!entry.isDirectory()) { - // Add the file path and its size (from the ZIP entry) - long fileSize = entry.getSize(); - addFilePath(filePaths, entry.getName(), fileSize); + private int readIntLE(RandomAccessFile raf) throws IOException { + int b1 = raf.read(); if (b1 == -1) throw new IOException("Unexpected EOF while reading int (byte 1)"); + int b2 = raf.read(); if (b2 == -1) throw new IOException("Unexpected EOF while reading int (byte 2)"); + int b3 = raf.read(); if (b3 == -1) throw new IOException("Unexpected EOF while reading int (byte 3)"); + int b4 = raf.read(); if (b4 == -1) throw new IOException("Unexpected EOF while reading int (byte 4)"); + return (b1 & 0xFF) | ((b2 & 0xFF) << 8) | ((b3 & 0xFF) << 16) | ((b4 & 0xFF) << 24); + } + + /** + * Reads a 2-byte little-endian short from a stream. + * + * @param raf the RandomAccessFile to read from + * @return the 16-bit integer value read (interpreted in little-endian order) + * @throws IOException if the stream ends unexpectedly before reading 2 bytes + */ + private int readShortLE(RandomAccessFile raf) throws IOException { + int b1 = raf.read(); if (b1 == -1) throw new IOException("Unexpected EOF while reading short (byte 1)"); + int b2 = raf.read(); if (b2 == -1) throw new IOException("Unexpected EOF while reading short (byte 2)"); + return (b1 & 0xFF) | ((b2 & 0xFF) << 8); + } + + /** + * Reads an 8-byte little-endian long from a stream. + * + * @param raf the RandomAccessFile to read from + * @return the 64-bit long value read (interpreted in little-endian order) + * @throws IOException if the stream ends unexpectedly before reading 8 bytes + */ + private long readLongLE(RandomAccessFile raf) throws IOException { + long b1 = raf.read(); if (b1 == -1) throw new IOException("Unexpected EOF while reading long (byte 1)"); + long b2 = raf.read(); if (b2 == -1) throw new IOException("Unexpected EOF while reading long (byte 2)"); + long b3 = raf.read(); if (b3 == -1) throw new IOException("Unexpected EOF while reading long (byte 3)"); + long b4 = raf.read(); if (b4 == -1) throw new IOException("Unexpected EOF while reading long (byte 4)"); + long b5 = raf.read(); if (b5 == -1) throw new IOException("Unexpected EOF while reading long (byte 5)"); + long b6 = raf.read(); if (b6 == -1) throw new IOException("Unexpected EOF while reading long (byte 6)"); + long b7 = raf.read(); if (b7 == -1) throw new IOException("Unexpected EOF while reading long (byte 7)"); + long b8 = raf.read(); if (b8 == -1) throw new IOException("Unexpected EOF while reading long (byte 8)"); + + return (b1 & 0xFFL) | + ((b2 & 0xFFL) << 8) | + ((b3 & 0xFFL) << 16) | + ((b4 & 0xFFL) << 24) | + ((b5 & 0xFFL) << 32) | + ((b6 & 0xFFL) << 40) | + ((b7 & 0xFFL) << 48) | + ((b8 & 0xFFL) << 56); + } + + /** + * Reads an 8-byte little-endian long from a byte array. + * + * @param bytes the byte array containing the long value + * @param offset the starting index in the array + * @return the 64-bit long value parsed (interpreted in little-endian order) + * @throws IndexOutOfBoundsException if there are not enough bytes from offset + */ + private long parseLongLE(byte[] bytes, int offset) { + return (long) (bytes[offset] & 0xFF) | + ((long) (bytes[offset + 1] & 0xFF) << 8) | + ((long) (bytes[offset + 2] & 0xFF) << 16) | + ((long) (bytes[offset + 3] & 0xFF) << 24) | + ((long) (bytes[offset + 4] & 0xFF) << 32) | + ((long) (bytes[offset + 5] & 0xFF) << 40) | + ((long) (bytes[offset + 6] & 0xFF) << 48) | + ((long) (bytes[offset + 7] & 0xFF) << 56); + } + + /** + * Finds the End Of Central Directory (EOCD) record by scanning backward in the file. + * Supports standard and ZIP64 formats. + * + * @param raf the RandomAccessFile positioned at the start of the ZIP file + * @return an EOCDRecord containing the total number of entries and central directory offset, or null if not found + * @throws IOException if an I/O error occurs or if the EOCD or ZIP64 EOCD structure is invalid + */ + private EOCDRecord findEOCD(RandomAccessFile raf) throws IOException { + long fileLength = raf.length(); + // Scan up to 64KB + 20 bytes (ZIP64 EOCD Locator) + 56 bytes (ZIP64 EOCD) for safety + long scanRange = Math.min(fileLength, 65536L + 20L + 56L); + byte[] buffer = new byte[(int) scanRange]; // Cast to int is safe because scanRange is capped + + raf.seek(fileLength - scanRange); + raf.readFully(buffer); + + // First, search for the standard EOCD signature (0x06054b50) backwards + for (int i = buffer.length - 4; i >= 0; i--) { + if ((buffer[i] & 0xFF) == 0x50 && + (buffer[i + 1] & 0xFF) == 0x4b && + (buffer[i + 2] & 0xFF) == 0x05 && + (buffer[i + 3] & 0xFF) == 0x06) { + + if (i + 22 > buffer.length) continue; // Avoid out-of-bounds read + + int totalEntriesOnDisk16 = (buffer[i + 8] & 0xFF) | ((buffer[i + 9] & 0xFF) << 8); + int totalEntries16 = (buffer[i + 10] & 0xFF) | ((buffer[i + 11] & 0xFF) << 8); + int cdOffset32 = (buffer[i + 16] & 0xFF) | + ((buffer[i + 17] & 0xFF) << 8) | + ((buffer[i + 18] & 0xFF) << 16) | + ((buffer[i + 19] & 0xFF) << 24); + + boolean isZip64 = totalEntries16 == 0xFFFF || cdOffset32 == -1 || totalEntriesOnDisk16 == 0xFFFF; + + if (isZip64) { + int zip64LocatorStart = i - 20; + if (zip64LocatorStart >= 0 && + (buffer[zip64LocatorStart] & 0xFF) == 0x50 && + (buffer[zip64LocatorStart + 1] & 0xFF) == 0x4b && + (buffer[zip64LocatorStart + 2] & 0xFF) == 0x06 && + (buffer[zip64LocatorStart + 3] & 0xFF) == 0x07) { + + long zip64EOCDOffset = parseLongLE(buffer, zip64LocatorStart + 8); + raf.seek(zip64EOCDOffset); + + if (readIntLE(raf) != 0x06064b50) { + throw new IOException("Invalid ZIP64 EOCD signature."); + } + + raf.skipBytes(8 + 2 + 2 + 4 + 4 + 8); // Skip ahead + long totalEntries64 = readLongLE(raf); + raf.skipBytes(8); + long cdOffset64 = readLongLE(raf); + + return new EOCDRecord(totalEntries64, cdOffset64); + } + } else { + return new EOCDRecord(totalEntries16, cdOffset32); } } } + return null; // Standard EOCD signature not found in the scanned range } /** @@ -378,43 +685,43 @@ private String buildXmlResponse(List filePaths) { /** * Processes file data based on the specified file type (tar or zip), * and returns an XML representation of the file paths. - * @param inputStream the InputStream containing the file data + * @param file the InputStream containing the file data * @param fileType the type of file to extract ("tar" or "zip") * @return an XML string representing the extracted file paths */ - private String extractFile(InputStream inputStream, String fileType) throws Exception { + private String extractFile(File file, String fileType) throws Exception { List filePaths = new ArrayList<>(); // Process the file based on its type if (ARCHIVE_TYPE_TAR.equals(fileType)) { - processTarFile(filePaths, inputStream); + processTarFile(filePaths, file); } else { - processZipFile(filePaths, inputStream); + processZipFile(filePaths, file); } return buildXmlResponse(filePaths); } /** * Read input stream and return content as String - * @param inputStream to read + * @param file to read * @return content of the inputStream as a String * @throws IOException */ - private String getFileContent(InputStream inputStream, boolean cutResult) throws IOException { + private String getFileContent(File file, boolean cutResult) throws IOException { StringBuilder content = new StringBuilder(); - // Generate the preview content in the UTF-8 encoding - BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8)); - try { + + try (BufferedReader reader = new BufferedReader( + new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8))) { + String line; while ((line = reader.readLine()) != null) { content.append(line).append("\n"); } - } catch (UnsupportedEncodingException e) { - log.error("UnsupportedEncodingException during creating the preview content because: ", e); + } catch (IOException e) { log.error("IOException during creating the preview content because: ", e); + throw e; // Optional: rethrow if you want the exception to propagate } - reader.close(); return cutResult ? ensureMaxLength(content.toString()) : content.toString(); } @@ -439,3 +746,4 @@ private static String ensureMaxLength(String input) { } } } + diff --git a/dspace-api/src/main/java/org/dspace/content/service/BitstreamService.java b/dspace-api/src/main/java/org/dspace/content/service/BitstreamService.java index 8effabf28435..b48174b68339 100644 --- a/dspace-api/src/main/java/org/dspace/content/service/BitstreamService.java +++ b/dspace-api/src/main/java/org/dspace/content/service/BitstreamService.java @@ -7,6 +7,7 @@ */ package org.dspace.content.service; +import java.io.File; import java.io.IOException; import java.io.InputStream; import java.sql.SQLException; @@ -166,6 +167,18 @@ public Bitstream register(Context context, int assetstore, String bitstreamPath) public InputStream retrieve(Context context, Bitstream bitstream) throws IOException, SQLException, AuthorizeException; + /** + * Retrieve the contents of the bitstream + * + * @param context DSpace context object + * @param bitstream DSpace bitstream + * @return a File from which the bitstream can be read. + * @throws IOException if IO error + * @throws SQLException if database error + * @throws AuthorizeException if authorization error + */ + public File retrieveFile(Context context, Bitstream bitstream) throws IOException, SQLException, AuthorizeException; + /** * Determine if this bitstream is registered (available elsewhere on * filesystem than in assetstore). More about registered items: diff --git a/dspace-api/src/main/java/org/dspace/content/service/PreviewContentService.java b/dspace-api/src/main/java/org/dspace/content/service/PreviewContentService.java index 8a27f61844c7..481eec9b813d 100644 --- a/dspace-api/src/main/java/org/dspace/content/service/PreviewContentService.java +++ b/dspace-api/src/main/java/org/dspace/content/service/PreviewContentService.java @@ -7,7 +7,7 @@ */ package org.dspace.content.service; -import java.io.InputStream; +import java.io.File; import java.sql.SQLException; import java.util.List; import java.util.Map; @@ -143,13 +143,13 @@ PreviewContent create(Context context, Bitstream bitstream, String name, String FileInfo createFileInfo(PreviewContent pc); /** - * Convert InputStream of the ZIP file into FileInfo classes. + * Convert File of the ZIP file into FileInfo classes. * * @param context DSpace context object * @param bitstream previewing bitstream - * @param inputStream content of the zip file + * @param file content of the zip file * @return List of FileInfo classes where is wrapped ZIP file content */ - List processInputStreamToFilePreview(Context context, Bitstream bitstream, InputStream inputStream) + List processFileToFilePreview(Context context, Bitstream bitstream, File file) throws Exception; } diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/BitStoreService.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/BitStoreService.java index 5a02ad1d5617..d29be58d6f43 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/BitStoreService.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/BitStoreService.java @@ -7,6 +7,7 @@ */ package org.dspace.storage.bitstore; +import java.io.File; import java.io.IOException; import java.io.InputStream; import java.util.List; @@ -45,6 +46,16 @@ public interface BitStoreService { */ public InputStream get(Bitstream bitstream) throws IOException; + /** + * Retrieve the bits for bitstream + * + * @param bitstream DSpace Bitstream object + * @return The File + * @throws java.io.IOException If a problem occurs while retrieving the bits, or if no + * asset with ID exists in the store + */ + public File getFile(Bitstream bitstream) throws IOException; + /** * Store a stream of bits. * diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java index 3539496b1466..1a018ef7ad1c 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java @@ -7,6 +7,7 @@ */ package org.dspace.storage.bitstore; +import java.io.File; import java.io.IOException; import java.io.InputStream; import java.sql.SQLException; @@ -214,6 +215,13 @@ public InputStream retrieve(Context context, Bitstream bitstream) return this.getStore(storeNumber).get(bitstream); } + @Override + public File retrieveFile(Context context, Bitstream bitstream) + throws IOException { + Integer storeNumber = bitstream.getStoreNumber(); + return this.getStore(storeNumber).getFile(bitstream); + } + @Override public void cleanup(boolean deleteDbRecords, boolean verbose) throws SQLException, IOException, AuthorizeException { Context context = new Context(Context.Mode.BATCH_EDIT); diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/DSBitStoreService.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/DSBitStoreService.java index 6fef7365e482..52154ed7e365 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/DSBitStoreService.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/DSBitStoreService.java @@ -208,7 +208,7 @@ private synchronized static void deleteParents(File file) { * @return The corresponding file in the file system, or null * @throws IOException If a problem occurs while determining the file */ - protected File getFile(Bitstream bitstream) throws IOException { + public File getFile(Bitstream bitstream) throws IOException { // Check that bitstream is not null if (bitstream == null) { return null; diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java index c621aa6efce9..789faa622cc7 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java @@ -305,6 +305,29 @@ public InputStream get(Bitstream bitstream) throws IOException { } } + @Override + public File getFile(Bitstream bitstream) throws IOException { + String key = getFullKey(bitstream.getInternalId()); + // Strip -R from bitstream key if it's registered + if (isRegisteredBitstream(key)) { + key = key.substring(REGISTERED_FLAG.length()); + } + try { + File tempFile = File.createTempFile("s3-disk-copy-" + UUID.randomUUID(), "temp"); + tempFile.deleteOnExit(); + + GetObjectRequest getObjectRequest = new GetObjectRequest(bucketName, key); + + Download download = tm.download(getObjectRequest, tempFile); + download.waitForCompletion(); + + return tempFile; + } catch (AmazonClientException | InterruptedException e) { + log.error("get(" + key + ")", e); + throw new IOException(e); + } + } + /** * Store a stream of bits. * diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/service/BitstreamStorageService.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/service/BitstreamStorageService.java index 7f5ed8f9129f..d98be808d8d6 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/service/BitstreamStorageService.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/service/BitstreamStorageService.java @@ -7,6 +7,7 @@ */ package org.dspace.storage.bitstore.service; +import java.io.File; import java.io.IOException; import java.io.InputStream; import java.sql.SQLException; @@ -126,6 +127,18 @@ public UUID register(Context context, Bitstream bitstream, int assetstore, Strin public InputStream retrieve(Context context, Bitstream bitstream) throws SQLException, IOException; + /** + * Retrieve the file of the bitstream with ID. If the bitstream does not + * exist, or is marked deleted, returns null. + * + * @param context The current context + * @param bitstream The bitstream to retrieve + * @return The file, or null + * @throws IOException If a problem occurs while retrieving the bits + * @throws SQLException If a problem occurs accessing the RDBMS + */ + public File retrieveFile(Context context, Bitstream bitstream) throws SQLException, IOException; + /** * Clean up the bitstream storage area. This method deletes any bitstreams * which are more than 1 hour old and marked deleted. The deletions cannot From 6d55c9547d3116b91140ab5cc4d71c74da6798df Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Wed, 21 May 2025 16:01:38 +0300 Subject: [PATCH 02/33] checkstyle violations --- .../content/PreviewContentServiceImpl.java | 53 +++++++++++-------- .../bitstore/BitstreamStorageServiceImpl.java | 2 +- 2 files changed, 33 insertions(+), 22 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java index 3e2efd36d365..7d1a68715432 100644 --- a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java @@ -32,12 +32,7 @@ import java.util.function.Function; import java.util.regex.Matcher; import java.util.regex.Pattern; -import java.util.zip.ZipEntry; -import java.util.zip.ZipInputStream; -import org.apache.commons.compress.archivers.tar.TarArchiveEntry; -import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; -import org.apache.tools.ant.taskdefs.Tar; import org.dspace.app.util.Util; import org.dspace.authorize.AuthorizeException; import org.dspace.authorize.MissingLicenseAgreementException; @@ -471,10 +466,12 @@ public void processZipFile(List filePaths, File file) throws IOException int pointer = 0; while (pointer + 4 <= extraFieldLength) { int headerId = (extraFieldBytes[pointer] & 0xFF) | ((extraFieldBytes[pointer + 1] & 0xFF) << 8); - int dataSize = (extraFieldBytes[pointer + 2] & 0xFF) | ((extraFieldBytes[pointer + 3] & 0xFF) << 8); + int dataSize = (extraFieldBytes[pointer + 2] & 0xFF) | + ((extraFieldBytes[pointer + 3] & 0xFF) << 8); if (pointer + 4 + dataSize > extraFieldLength) { - System.err.println("Warning: Malformed extra field with ID 0x" + Integer.toHexString(headerId)); + System.err.println("Warning: Malformed extra field with ID 0x" + + Integer.toHexString(headerId)); break; } @@ -519,10 +516,14 @@ public void processZipFile(List filePaths, File file) throws IOException * @throws IOException if the stream ends unexpectedly before reading 4 bytes */ private int readIntLE(RandomAccessFile raf) throws IOException { - int b1 = raf.read(); if (b1 == -1) throw new IOException("Unexpected EOF while reading int (byte 1)"); - int b2 = raf.read(); if (b2 == -1) throw new IOException("Unexpected EOF while reading int (byte 2)"); - int b3 = raf.read(); if (b3 == -1) throw new IOException("Unexpected EOF while reading int (byte 3)"); - int b4 = raf.read(); if (b4 == -1) throw new IOException("Unexpected EOF while reading int (byte 4)"); + int b1 = raf.read(); + if (b1 == -1) throw new IOException("Unexpected EOF while reading int (byte 1)"); + int b2 = raf.read(); + if (b2 == -1) throw new IOException("Unexpected EOF while reading int (byte 2)"); + int b3 = raf.read(); + if (b3 == -1) throw new IOException("Unexpected EOF while reading int (byte 3)"); + int b4 = raf.read(); + if (b4 == -1) throw new IOException("Unexpected EOF while reading int (byte 4)"); return (b1 & 0xFF) | ((b2 & 0xFF) << 8) | ((b3 & 0xFF) << 16) | ((b4 & 0xFF) << 24); } @@ -534,8 +535,10 @@ private int readIntLE(RandomAccessFile raf) throws IOException { * @throws IOException if the stream ends unexpectedly before reading 2 bytes */ private int readShortLE(RandomAccessFile raf) throws IOException { - int b1 = raf.read(); if (b1 == -1) throw new IOException("Unexpected EOF while reading short (byte 1)"); - int b2 = raf.read(); if (b2 == -1) throw new IOException("Unexpected EOF while reading short (byte 2)"); + int b1 = raf.read(); + if (b1 == -1) throw new IOException("Unexpected EOF while reading short (byte 1)"); + int b2 = raf.read(); + if (b2 == -1) throw new IOException("Unexpected EOF while reading short (byte 2)"); return (b1 & 0xFF) | ((b2 & 0xFF) << 8); } @@ -547,14 +550,22 @@ private int readShortLE(RandomAccessFile raf) throws IOException { * @throws IOException if the stream ends unexpectedly before reading 8 bytes */ private long readLongLE(RandomAccessFile raf) throws IOException { - long b1 = raf.read(); if (b1 == -1) throw new IOException("Unexpected EOF while reading long (byte 1)"); - long b2 = raf.read(); if (b2 == -1) throw new IOException("Unexpected EOF while reading long (byte 2)"); - long b3 = raf.read(); if (b3 == -1) throw new IOException("Unexpected EOF while reading long (byte 3)"); - long b4 = raf.read(); if (b4 == -1) throw new IOException("Unexpected EOF while reading long (byte 4)"); - long b5 = raf.read(); if (b5 == -1) throw new IOException("Unexpected EOF while reading long (byte 5)"); - long b6 = raf.read(); if (b6 == -1) throw new IOException("Unexpected EOF while reading long (byte 6)"); - long b7 = raf.read(); if (b7 == -1) throw new IOException("Unexpected EOF while reading long (byte 7)"); - long b8 = raf.read(); if (b8 == -1) throw new IOException("Unexpected EOF while reading long (byte 8)"); + long b1 = raf.read(); + if (b1 == -1) throw new IOException("Unexpected EOF while reading long (byte 1)"); + long b2 = raf.read(); + if (b2 == -1) throw new IOException("Unexpected EOF while reading long (byte 2)"); + long b3 = raf.read(); + if (b3 == -1) throw new IOException("Unexpected EOF while reading long (byte 3)"); + long b4 = raf.read(); + if (b4 == -1) throw new IOException("Unexpected EOF while reading long (byte 4)"); + long b5 = raf.read(); + if (b5 == -1) throw new IOException("Unexpected EOF while reading long (byte 5)"); + long b6 = raf.read(); + if (b6 == -1) throw new IOException("Unexpected EOF while reading long (byte 6)"); + long b7 = raf.read(); + if (b7 == -1) throw new IOException("Unexpected EOF while reading long (byte 7)"); + long b8 = raf.read(); + if (b8 == -1) throw new IOException("Unexpected EOF while reading long (byte 8)"); return (b1 & 0xFFL) | ((b2 & 0xFFL) << 8) | diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java index 1a018ef7ad1c..85da914644df 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java @@ -217,7 +217,7 @@ public InputStream retrieve(Context context, Bitstream bitstream) @Override public File retrieveFile(Context context, Bitstream bitstream) - throws IOException { + throws IOException { Integer storeNumber = bitstream.getStoreNumber(); return this.getStore(storeNumber).getFile(bitstream); } From d1fa49733b85dacca0d41d1bf7a3b37648d7d3fb Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Wed, 21 May 2025 17:12:50 +0300 Subject: [PATCH 03/33] remove temp file, checkstyle, do not load full file --- .../content/PreviewContentServiceImpl.java | 125 ++++++++---------- .../storage/bitstore/S3BitStoreService.java | 2 +- 2 files changed, 59 insertions(+), 68 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java index 7d1a68715432..9ff65e08f9db 100644 --- a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java @@ -14,6 +14,8 @@ import java.io.InputStreamReader; import java.io.RandomAccessFile; import java.io.UnsupportedEncodingException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.nio.file.Files; @@ -64,6 +66,7 @@ public class PreviewContentServiceImpl implements PreviewContentService { private final String ARCHIVE_TYPE_ZIP = "zip"; private final String ARCHIVE_TYPE_TAR = "tar"; + // This constant is used to limit the length of the preview content stored in the database to prevent // the database from being overloaded with large amounts of data. private static final int MAX_PREVIEW_COUNT_LENGTH = 2000; @@ -169,16 +172,30 @@ public boolean canPreview(Context context, Bitstream bitstream) throws SQLExcept } @Override - public List getFilePreviewContent(Context context, Bitstream bitstream) - throws Exception { - File file = null; + public List getFilePreviewContent(Context context, Bitstream bitstream) throws Exception { List fileInfos = null; + File file = null; + try { - file = bitstreamService.retrieveFile(context, bitstream); - } catch (MissingLicenseAgreementException e) { /* Do nothing */ } + file = bitstreamService.retrieveFile(context, bitstream); // Retrieve the file - if (Objects.nonNull(file)) { - fileInfos = processFileToFilePreview(context, bitstream, file); + if (Objects.nonNull(file)) { + fileInfos = processFileToFilePreview(context, bitstream, file); + } + } catch (MissingLicenseAgreementException e) { + log.error("Missing license agreement: ", e); + throw e; + } catch (IOException e) { + log.error("IOException during file processing: ", e); + throw e; + } finally { + // Ensure the file is deleted + if (file != null && file.exists()) { + boolean deleted = file.delete(); // Delete the file to avoid leaks + if (!deleted) { + log.warn("Failed to delete temporary file: " + file.getAbsolutePath()); + } + } } return fileInfos; } @@ -335,33 +352,25 @@ private void processTarFile(List filePaths, File file) throws IOExceptio long fileSize = raf.length(); byte[] buffer = new byte[512]; // TAR header size is always 512 bytes long currentPos = 0; - while (currentPos < fileSize) { // Read the next 512-byte header raf.seek(currentPos); raf.readFully(buffer); - // Parse the header to extract file metadata TarHeader header = parseTarHeader(buffer); - if (header == null || header.fileName.isEmpty()) { break; // End of archive (empty header) } - // Handle the file metadata long fileContentSize = header.fileSize; String fileName = header.fileName; - // Move to the file content position currentPos += 512; // Move past the header - byte[] fileContent = new byte[(int) fileContentSize]; - raf.readFully(fileContent); // Read the file content - - // Add the file to the list (or process it further) + // Add the file to the list (only metadata needed) addFilePath(filePaths, fileName, fileContentSize); - - // Move to the next file's header (file content is padded to 512-byte boundary) - currentPos += (fileContentSize + 511) / 512 * 512; + // Skip payload and align to next header + currentPos += fileContentSize; // skip payload + currentPos = ((currentPos + 511) / 512) * 512; // align to 512-byte boundary } } } @@ -516,15 +525,12 @@ public void processZipFile(List filePaths, File file) throws IOException * @throws IOException if the stream ends unexpectedly before reading 4 bytes */ private int readIntLE(RandomAccessFile raf) throws IOException { - int b1 = raf.read(); - if (b1 == -1) throw new IOException("Unexpected EOF while reading int (byte 1)"); - int b2 = raf.read(); - if (b2 == -1) throw new IOException("Unexpected EOF while reading int (byte 2)"); - int b3 = raf.read(); - if (b3 == -1) throw new IOException("Unexpected EOF while reading int (byte 3)"); - int b4 = raf.read(); - if (b4 == -1) throw new IOException("Unexpected EOF while reading int (byte 4)"); - return (b1 & 0xFF) | ((b2 & 0xFF) << 8) | ((b3 & 0xFF) << 16) | ((b4 & 0xFF) << 24); + byte[] bytes = new byte[4]; + raf.readFully(bytes); // Zabezpečí načítanie všetkých 4 bajtov alebo hodí EOFException + + return ByteBuffer.wrap(bytes) + .order(ByteOrder.LITTLE_ENDIAN) + .getInt(); } /** @@ -534,12 +540,15 @@ private int readIntLE(RandomAccessFile raf) throws IOException { * @return the 16-bit integer value read (interpreted in little-endian order) * @throws IOException if the stream ends unexpectedly before reading 2 bytes */ - private int readShortLE(RandomAccessFile raf) throws IOException { - int b1 = raf.read(); - if (b1 == -1) throw new IOException("Unexpected EOF while reading short (byte 1)"); - int b2 = raf.read(); - if (b2 == -1) throw new IOException("Unexpected EOF while reading short (byte 2)"); - return (b1 & 0xFF) | ((b2 & 0xFF) << 8); + private short readShortLE(RandomAccessFile raf) throws IOException { + byte[] buffer = new byte[2]; + if (raf.read(buffer) != 2) { + throw new IOException("Unexpected EOF while reading 2-byte little-endian short"); + } + + return ByteBuffer.wrap(buffer) + .order(ByteOrder.LITTLE_ENDIAN) + .getShort(); } /** @@ -550,31 +559,14 @@ private int readShortLE(RandomAccessFile raf) throws IOException { * @throws IOException if the stream ends unexpectedly before reading 8 bytes */ private long readLongLE(RandomAccessFile raf) throws IOException { - long b1 = raf.read(); - if (b1 == -1) throw new IOException("Unexpected EOF while reading long (byte 1)"); - long b2 = raf.read(); - if (b2 == -1) throw new IOException("Unexpected EOF while reading long (byte 2)"); - long b3 = raf.read(); - if (b3 == -1) throw new IOException("Unexpected EOF while reading long (byte 3)"); - long b4 = raf.read(); - if (b4 == -1) throw new IOException("Unexpected EOF while reading long (byte 4)"); - long b5 = raf.read(); - if (b5 == -1) throw new IOException("Unexpected EOF while reading long (byte 5)"); - long b6 = raf.read(); - if (b6 == -1) throw new IOException("Unexpected EOF while reading long (byte 6)"); - long b7 = raf.read(); - if (b7 == -1) throw new IOException("Unexpected EOF while reading long (byte 7)"); - long b8 = raf.read(); - if (b8 == -1) throw new IOException("Unexpected EOF while reading long (byte 8)"); - - return (b1 & 0xFFL) | - ((b2 & 0xFFL) << 8) | - ((b3 & 0xFFL) << 16) | - ((b4 & 0xFFL) << 24) | - ((b5 & 0xFFL) << 32) | - ((b6 & 0xFFL) << 40) | - ((b7 & 0xFFL) << 48) | - ((b8 & 0xFFL) << 56); + byte[] buffer = new byte[8]; + if (raf.read(buffer) != 8) { + throw new IOException("Unexpected EOF while reading 8-byte little-endian long"); + } + + return ByteBuffer.wrap(buffer) + .order(ByteOrder.LITTLE_ENDIAN) + .getLong(); } /** @@ -586,14 +578,9 @@ private long readLongLE(RandomAccessFile raf) throws IOException { * @throws IndexOutOfBoundsException if there are not enough bytes from offset */ private long parseLongLE(byte[] bytes, int offset) { - return (long) (bytes[offset] & 0xFF) | - ((long) (bytes[offset + 1] & 0xFF) << 8) | - ((long) (bytes[offset + 2] & 0xFF) << 16) | - ((long) (bytes[offset + 3] & 0xFF) << 24) | - ((long) (bytes[offset + 4] & 0xFF) << 32) | - ((long) (bytes[offset + 5] & 0xFF) << 40) | - ((long) (bytes[offset + 6] & 0xFF) << 48) | - ((long) (bytes[offset + 7] & 0xFF) << 56); + return ByteBuffer.wrap(bytes, offset, 8) + .order(ByteOrder.LITTLE_ENDIAN) + .getLong(); } /** @@ -725,6 +712,10 @@ private String getFileContent(File file, boolean cutResult) throws IOException { String line; while ((line = reader.readLine()) != null) { + if (cutResult && content.length() > MAX_PREVIEW_COUNT_LENGTH) { + content.append(" . . ."); + break; + } content.append(line).append("\n"); } diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java index 789faa622cc7..1d553e5dbd21 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java @@ -323,7 +323,7 @@ public File getFile(Bitstream bitstream) throws IOException { return tempFile; } catch (AmazonClientException | InterruptedException e) { - log.error("get(" + key + ")", e); + log.error("getFile(" + key + ")", e); throw new IOException(e); } } From c52090c1d068eb0dbb0fb00f8ec7827c9b4fd5ca Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Wed, 21 May 2025 17:25:22 +0300 Subject: [PATCH 04/33] add { } after if --- .../java/org/dspace/content/PreviewContentServiceImpl.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java index 9ff65e08f9db..769d02db4623 100644 --- a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java @@ -607,7 +607,9 @@ private EOCDRecord findEOCD(RandomAccessFile raf) throws IOException { (buffer[i + 2] & 0xFF) == 0x05 && (buffer[i + 3] & 0xFF) == 0x06) { - if (i + 22 > buffer.length) continue; // Avoid out-of-bounds read + if (i + 22 > buffer.length) { + continue; // Avoid out-of-bounds read + } int totalEntriesOnDisk16 = (buffer[i + 8] & 0xFF) | ((buffer[i + 9] & 0xFF) << 8); int totalEntries16 = (buffer[i + 10] & 0xFF) | ((buffer[i + 11] & 0xFF) << 8); From 8e576a72ed51befdcbfdd61d830a975bc0b41951 Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Wed, 21 May 2025 18:10:19 +0300 Subject: [PATCH 05/33] added check for max preview file --- .../content/PreviewContentServiceImpl.java | 29 +++++++++++++++---- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java index 769d02db4623..11f4bf1aa726 100644 --- a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java @@ -353,6 +353,10 @@ private void processTarFile(List filePaths, File file) throws IOExceptio byte[] buffer = new byte[512]; // TAR header size is always 512 bytes long currentPos = 0; while (currentPos < fileSize) { + if (filePaths.size() >= maxPreviewCount) { + filePaths.add("... (too many files)"); + break; + } // Read the next 512-byte header raf.seek(currentPos); raf.readFully(buffer); @@ -382,17 +386,29 @@ private void processTarFile(List filePaths, File file) throws IOExceptio * @return a TarHeader object containing file metadata */ private TarHeader parseTarHeader(byte[] headerBytes) { - // Extract the file name (first 100 bytes) - String fileName = new String(headerBytes, 0, 100, StandardCharsets.US_ASCII).trim(); + // Extract null-terminated file name from first 100 bytes + int nameEnd = 0; + while (nameEnd < 100 && headerBytes[nameEnd] != 0) { + nameEnd++; + } + String fileName = new String(headerBytes, 0, nameEnd, StandardCharsets.US_ASCII); // If the file name is empty, we've reached the end of the archive if (fileName.isEmpty()) { return null; } - // Extract the file size (octal value in bytes 124-135) - String sizeStr = new String(headerBytes, 124, 12, StandardCharsets.US_ASCII).trim(); - long fileSize = Long.parseLong(sizeStr, 8); // TAR file sizes are stored in octal + // Extract and sanitize octal file size from bytes 124–135 + String sizeStr = new String(headerBytes, 124, 12, StandardCharsets.US_ASCII) + .replace("\0", "").trim(); + + long fileSize; + try { + fileSize = sizeStr.isEmpty() ? 0L : Long.parseLong(sizeStr, 8); + } catch (NumberFormatException nfe) { + log.warn("Malformed TAR size '{}', treating as 0", sizeStr, nfe); + fileSize = 0L; + } return new TarHeader(fileName, fileSize); } @@ -417,6 +433,9 @@ public void processZipFile(List filePaths, File file) throws IOException // Loop through all entries in the Central Directory for (long i = 0; i < eocd.totalEntries; i++) { + if (filePaths.size() >= maxPreviewCount) { + break; + } long currentEntryStart = raf.getFilePointer(); // Track entry position int signature = readIntLE(raf); From 1e1ea9f1157e5ba6ce8d4e6dede1719cb47fa26a Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Tue, 27 May 2025 13:03:24 +0300 Subject: [PATCH 06/33] used ZipFile and TarArchived for filepreview generating --- .../content/PreviewContentServiceImpl.java | 350 ++---------------- 1 file changed, 27 insertions(+), 323 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java index 11f4bf1aa726..73c4dd7abb27 100644 --- a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java @@ -7,16 +7,14 @@ */ package org.dspace.content; +import java.io.BufferedInputStream; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; +import java.io.InputStream; import java.io.InputStreamReader; -import java.io.RandomAccessFile; import java.io.UnsupportedEncodingException; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.InvalidPathException; @@ -25,6 +23,7 @@ import java.sql.SQLException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Enumeration; import java.util.Hashtable; import java.util.Iterator; import java.util.List; @@ -34,7 +33,11 @@ import java.util.function.Function; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.zip.ZipEntry; +import java.util.zip.ZipFile; +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; import org.dspace.app.util.Util; import org.dspace.authorize.AuthorizeException; import org.dspace.authorize.MissingLicenseAgreementException; @@ -85,26 +88,6 @@ public class PreviewContentServiceImpl implements PreviewContentService { @Autowired BitstreamService bitstreamService; - private static class EOCDRecord { - long totalEntries; - long centralDirectoryOffset; - - EOCDRecord(long totalEntries, long centralDirectoryOffset) { - this.totalEntries = totalEntries; - this.centralDirectoryOffset = centralDirectoryOffset; - } - } - - private static class TarHeader { - final String fileName; - final long fileSize; - - TarHeader(String fileName, long fileSize) { - this.fileName = fileName; - this.fileSize = fileSize; - } - } - @Override public PreviewContent create(Context context, Bitstream bitstream, String name, String content, boolean isDirectory, String size, Map subPreviewContents) @@ -348,325 +331,46 @@ private void addFilePath(List filePaths, String path, long size) { * @throws IOException if an I/O error occurs while reading the TAR file */ private void processTarFile(List filePaths, File file) throws IOException { - try (RandomAccessFile raf = new RandomAccessFile(file, "r")) { - long fileSize = raf.length(); - byte[] buffer = new byte[512]; // TAR header size is always 512 bytes - long currentPos = 0; - while (currentPos < fileSize) { + try (InputStream fis = new FileInputStream(file); + BufferedInputStream bis = new BufferedInputStream(fis); + TarArchiveInputStream tarInput = new TarArchiveInputStream(bis)) { + + TarArchiveEntry entry; + while ((entry = tarInput.getNextTarEntry()) != null) { if (filePaths.size() >= maxPreviewCount) { filePaths.add("... (too many files)"); break; } - // Read the next 512-byte header - raf.seek(currentPos); - raf.readFully(buffer); - // Parse the header to extract file metadata - TarHeader header = parseTarHeader(buffer); - if (header == null || header.fileName.isEmpty()) { - break; // End of archive (empty header) - } - // Handle the file metadata - long fileContentSize = header.fileSize; - String fileName = header.fileName; - // Move to the file content position - currentPos += 512; // Move past the header - // Add the file to the list (only metadata needed) - addFilePath(filePaths, fileName, fileContentSize); - // Skip payload and align to next header - currentPos += fileContentSize; // skip payload - currentPos = ((currentPos + 511) / 512) * 512; // align to 512-byte boundary - } - } - } - /** - * Parse the 512-byte TAR header. - * - * @param headerBytes the header block (512 bytes) - * @return a TarHeader object containing file metadata - */ - private TarHeader parseTarHeader(byte[] headerBytes) { - // Extract null-terminated file name from first 100 bytes - int nameEnd = 0; - while (nameEnd < 100 && headerBytes[nameEnd] != 0) { - nameEnd++; - } - String fileName = new String(headerBytes, 0, nameEnd, StandardCharsets.US_ASCII); - - // If the file name is empty, we've reached the end of the archive - if (fileName.isEmpty()) { - return null; - } - - // Extract and sanitize octal file size from bytes 124–135 - String sizeStr = new String(headerBytes, 124, 12, StandardCharsets.US_ASCII) - .replace("\0", "").trim(); + if (!entry.isDirectory()) { + String name = entry.getName(); + long size = entry.getSize(); + addFilePath(filePaths, name, size); + } - long fileSize; - try { - fileSize = sizeStr.isEmpty() ? 0L : Long.parseLong(sizeStr, 8); - } catch (NumberFormatException nfe) { - log.warn("Malformed TAR size '{}', treating as 0", sizeStr, nfe); - fileSize = 0L; + // Skip file contents efficiently + tarInput.skip(entry.getSize()); + } } - - return new TarHeader(fileName, fileSize); } /** * Parses a ZIP file and extracts the names and sizes of its entries. - * Handles standard ZIP and ZIP64 formats for large files or archives with many entries. * * @param filePaths the list to populate with entry names * @param file the ZIP file to read * @throws IOException if the file is invalid or cannot be read */ public void processZipFile(List filePaths, File file) throws IOException { - try (RandomAccessFile raf = new RandomAccessFile(file, "r")) { - EOCDRecord eocd = findEOCD(raf); - if (eocd == null) { - throw new IOException("End of Central Directory not found. Not a valid ZIP file: " + file.getName()); - } - - // Seek to the Central Directory offset - raf.seek(eocd.centralDirectoryOffset); - - // Loop through all entries in the Central Directory - for (long i = 0; i < eocd.totalEntries; i++) { - if (filePaths.size() >= maxPreviewCount) { - break; - } - long currentEntryStart = raf.getFilePointer(); // Track entry position - - int signature = readIntLE(raf); - if (signature != 0x02014b50) { // Central directory file header - throw new IOException("Invalid central directory signature at entry " + i + - " (offset: " + currentEntryStart + ")"); - } - - raf.skipBytes(2); // Version made by - raf.skipBytes(2); // Version needed to extract - int generalPurposeBitFlag = readShortLE(raf); - - raf.skipBytes(2); // Compression method - raf.skipBytes(2); // File modification time - raf.skipBytes(2); // File modification date - raf.skipBytes(4); // CRC-32 - - // Read compressed/uncompressed sizes (can be -1 if ZIP64 is used) - int compressedSize32 = readIntLE(raf); - int uncompressedSize32 = readIntLE(raf); - - int fileNameLength = readShortLE(raf); - int extraFieldLength = readShortLE(raf); - int fileCommentLength = readShortLE(raf); - - raf.skipBytes(2); // Disk number start - raf.skipBytes(2); // Internal file attributes - raf.skipBytes(4); // External file attributes - - int relativeOffset32 = readIntLE(raf); // Relative offset of local header - - // Read file name - byte[] nameBytes = new byte[fileNameLength]; - raf.readFully(nameBytes); - - // Determine character set (bit 11 = UTF-8) - Charset charset = (generalPurposeBitFlag & (1 << 11)) != 0 - ? StandardCharsets.UTF_8 - : Charset.forName("IBM437"); - String name = new String(nameBytes, charset); - - // Default values for final sizes and offset (use 64-bit to avoid overflow) - long finalCompressedSize = compressedSize32 & 0xFFFFFFFFL; - long finalUncompressedSize = uncompressedSize32 & 0xFFFFFFFFL; - long finalRelativeOffset = relativeOffset32 & 0xFFFFFFFFL; - - // Read extra fields (e.g. ZIP64 extended information) - long afterFileNamePos = raf.getFilePointer(); - byte[] extraFieldBytes = new byte[extraFieldLength]; - if (extraFieldLength > 0) { - raf.readFully(extraFieldBytes); - } - - // ZIP64 is used when sizes or offsets are too large for 32-bit integers - if (compressedSize32 == -1 || uncompressedSize32 == -1 || relativeOffset32 == -1) { - int pointer = 0; - while (pointer + 4 <= extraFieldLength) { - int headerId = (extraFieldBytes[pointer] & 0xFF) | ((extraFieldBytes[pointer + 1] & 0xFF) << 8); - int dataSize = (extraFieldBytes[pointer + 2] & 0xFF) | - ((extraFieldBytes[pointer + 3] & 0xFF) << 8); - - if (pointer + 4 + dataSize > extraFieldLength) { - System.err.println("Warning: Malformed extra field with ID 0x" - + Integer.toHexString(headerId)); - break; - } - - if (headerId == 0x0001) { // ZIP64 Extended Information - int offset = pointer + 4; - int bytesRead = 0; - - if (uncompressedSize32 == -1 && bytesRead + 8 <= dataSize) { - finalUncompressedSize = parseLongLE(extraFieldBytes, offset + bytesRead); - bytesRead += 8; - } - if (compressedSize32 == -1 && bytesRead + 8 <= dataSize) { - finalCompressedSize = parseLongLE(extraFieldBytes, offset + bytesRead); - bytesRead += 8; - } - if (relativeOffset32 == -1 && bytesRead + 8 <= dataSize) { - finalRelativeOffset = parseLongLE(extraFieldBytes, offset + bytesRead); - bytesRead += 8; - } - break; - } - - pointer += (4 + dataSize); - } - } - - // Skip comment field - raf.seek(afterFileNamePos + extraFieldLength); - raf.skipBytes(fileCommentLength); - - // Pass extracted file entry to callback - addFilePath(filePaths, name, finalUncompressedSize); - } - } - } - - /** - * Reads a 4-byte little-endian integer from a stream. - * - * @param raf the RandomAccessFile to read from - * @return the 32-bit integer value read (interpreted in little-endian order) - * @throws IOException if the stream ends unexpectedly before reading 4 bytes - */ - private int readIntLE(RandomAccessFile raf) throws IOException { - byte[] bytes = new byte[4]; - raf.readFully(bytes); // Zabezpečí načítanie všetkých 4 bajtov alebo hodí EOFException - - return ByteBuffer.wrap(bytes) - .order(ByteOrder.LITTLE_ENDIAN) - .getInt(); - } - - /** - * Reads a 2-byte little-endian short from a stream. - * - * @param raf the RandomAccessFile to read from - * @return the 16-bit integer value read (interpreted in little-endian order) - * @throws IOException if the stream ends unexpectedly before reading 2 bytes - */ - private short readShortLE(RandomAccessFile raf) throws IOException { - byte[] buffer = new byte[2]; - if (raf.read(buffer) != 2) { - throw new IOException("Unexpected EOF while reading 2-byte little-endian short"); - } - - return ByteBuffer.wrap(buffer) - .order(ByteOrder.LITTLE_ENDIAN) - .getShort(); - } - - /** - * Reads an 8-byte little-endian long from a stream. - * - * @param raf the RandomAccessFile to read from - * @return the 64-bit long value read (interpreted in little-endian order) - * @throws IOException if the stream ends unexpectedly before reading 8 bytes - */ - private long readLongLE(RandomAccessFile raf) throws IOException { - byte[] buffer = new byte[8]; - if (raf.read(buffer) != 8) { - throw new IOException("Unexpected EOF while reading 8-byte little-endian long"); - } - - return ByteBuffer.wrap(buffer) - .order(ByteOrder.LITTLE_ENDIAN) - .getLong(); - } - - /** - * Reads an 8-byte little-endian long from a byte array. - * - * @param bytes the byte array containing the long value - * @param offset the starting index in the array - * @return the 64-bit long value parsed (interpreted in little-endian order) - * @throws IndexOutOfBoundsException if there are not enough bytes from offset - */ - private long parseLongLE(byte[] bytes, int offset) { - return ByteBuffer.wrap(bytes, offset, 8) - .order(ByteOrder.LITTLE_ENDIAN) - .getLong(); - } - - /** - * Finds the End Of Central Directory (EOCD) record by scanning backward in the file. - * Supports standard and ZIP64 formats. - * - * @param raf the RandomAccessFile positioned at the start of the ZIP file - * @return an EOCDRecord containing the total number of entries and central directory offset, or null if not found - * @throws IOException if an I/O error occurs or if the EOCD or ZIP64 EOCD structure is invalid - */ - private EOCDRecord findEOCD(RandomAccessFile raf) throws IOException { - long fileLength = raf.length(); - // Scan up to 64KB + 20 bytes (ZIP64 EOCD Locator) + 56 bytes (ZIP64 EOCD) for safety - long scanRange = Math.min(fileLength, 65536L + 20L + 56L); - byte[] buffer = new byte[(int) scanRange]; // Cast to int is safe because scanRange is capped - - raf.seek(fileLength - scanRange); - raf.readFully(buffer); - - // First, search for the standard EOCD signature (0x06054b50) backwards - for (int i = buffer.length - 4; i >= 0; i--) { - if ((buffer[i] & 0xFF) == 0x50 && - (buffer[i + 1] & 0xFF) == 0x4b && - (buffer[i + 2] & 0xFF) == 0x05 && - (buffer[i + 3] & 0xFF) == 0x06) { - - if (i + 22 > buffer.length) { - continue; // Avoid out-of-bounds read - } - - int totalEntriesOnDisk16 = (buffer[i + 8] & 0xFF) | ((buffer[i + 9] & 0xFF) << 8); - int totalEntries16 = (buffer[i + 10] & 0xFF) | ((buffer[i + 11] & 0xFF) << 8); - int cdOffset32 = (buffer[i + 16] & 0xFF) | - ((buffer[i + 17] & 0xFF) << 8) | - ((buffer[i + 18] & 0xFF) << 16) | - ((buffer[i + 19] & 0xFF) << 24); - - boolean isZip64 = totalEntries16 == 0xFFFF || cdOffset32 == -1 || totalEntriesOnDisk16 == 0xFFFF; - - if (isZip64) { - int zip64LocatorStart = i - 20; - if (zip64LocatorStart >= 0 && - (buffer[zip64LocatorStart] & 0xFF) == 0x50 && - (buffer[zip64LocatorStart + 1] & 0xFF) == 0x4b && - (buffer[zip64LocatorStart + 2] & 0xFF) == 0x06 && - (buffer[zip64LocatorStart + 3] & 0xFF) == 0x07) { - - long zip64EOCDOffset = parseLongLE(buffer, zip64LocatorStart + 8); - raf.seek(zip64EOCDOffset); - - if (readIntLE(raf) != 0x06064b50) { - throw new IOException("Invalid ZIP64 EOCD signature."); - } - - raf.skipBytes(8 + 2 + 2 + 4 + 4 + 8); // Skip ahead - long totalEntries64 = readLongLE(raf); - raf.skipBytes(8); - long cdOffset64 = readLongLE(raf); - - return new EOCDRecord(totalEntries64, cdOffset64); - } - } else { - return new EOCDRecord(totalEntries16, cdOffset32); + try (ZipFile zipFile = new ZipFile(file)) { + Enumeration entries = zipFile.entries(); + while (entries.hasMoreElements()) { + ZipEntry entry = entries.nextElement(); + if (!entry.isDirectory()) { + addFilePath(filePaths, entry.getName(), entry.getSize()); } } } - return null; // Standard EOCD signature not found in the scanned range } /** From 1ed0c68072eef997299e2196baa38448d5d6c616 Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Tue, 27 May 2025 13:11:06 +0300 Subject: [PATCH 07/33] added removed lines --- .../java/org/dspace/content/PreviewContentServiceImpl.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java index 73c4dd7abb27..1aaa64f624e7 100644 --- a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java @@ -48,6 +48,7 @@ import org.dspace.core.Constants; import org.dspace.core.Context; import org.dspace.services.ConfigurationService; +import org.dspace.storage.bitstore.S3BitStoreService; import org.dspace.util.FileInfo; import org.dspace.util.FileTreeViewGenerator; import org.slf4j.Logger; @@ -69,7 +70,6 @@ public class PreviewContentServiceImpl implements PreviewContentService { private final String ARCHIVE_TYPE_ZIP = "zip"; private final String ARCHIVE_TYPE_TAR = "tar"; - // This constant is used to limit the length of the preview content stored in the database to prevent // the database from being overloaded with large amounts of data. private static final int MAX_PREVIEW_COUNT_LENGTH = 2000; @@ -88,6 +88,7 @@ public class PreviewContentServiceImpl implements PreviewContentService { @Autowired BitstreamService bitstreamService; + @Override public PreviewContent create(Context context, Bitstream bitstream, String name, String content, boolean isDirectory, String size, Map subPreviewContents) From 9fea2e2e28aaa4a33324bb942b5059847e678d80 Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Tue, 27 May 2025 14:13:54 +0300 Subject: [PATCH 08/33] used 7z for zip and tar files --- .../content/PreviewContentServiceImpl.java | 100 +++++++++--------- 1 file changed, 49 insertions(+), 51 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java index 1aaa64f624e7..7ef6cd61968b 100644 --- a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java @@ -172,14 +172,6 @@ public List getFilePreviewContent(Context context, Bitstream bitstream } catch (IOException e) { log.error("IOException during file processing: ", e); throw e; - } finally { - // Ensure the file is deleted - if (file != null && file.exists()) { - boolean deleted = file.delete(); // Delete the file to avoid leaks - if (!deleted) { - log.warn("Failed to delete temporary file: " + file.getAbsolutePath()); - } - } } return fileInfos; } @@ -326,51 +318,61 @@ private void addFilePath(List filePaths, String path, long size) { } /** - * Processes a TAR file, extracting its entries and adding their paths to the provided list. - * @param filePaths the list to populate with the extracted file paths - * @param file the TAR file data - * @throws IOException if an I/O error occurs while reading the TAR file - */ - private void processTarFile(List filePaths, File file) throws IOException { - try (InputStream fis = new FileInputStream(file); - BufferedInputStream bis = new BufferedInputStream(fis); - TarArchiveInputStream tarInput = new TarArchiveInputStream(bis)) { - - TarArchiveEntry entry; - while ((entry = tarInput.getNextTarEntry()) != null) { - if (filePaths.size() >= maxPreviewCount) { - filePaths.add("... (too many files)"); - break; - } - - if (!entry.isDirectory()) { - String name = entry.getName(); - long size = entry.getSize(); - addFilePath(filePaths, name, size); - } - - // Skip file contents efficiently - tarInput.skip(entry.getSize()); - } - } - } - - /** - * Parses a ZIP file and extracts the names and sizes of its entries. + * Parses a ZIP and TAR file and extracts the names and sizes of its entries. * * @param filePaths the list to populate with entry names * @param file the ZIP file to read * @throws IOException if the file is invalid or cannot be read */ - public void processZipFile(List filePaths, File file) throws IOException { - try (ZipFile zipFile = new ZipFile(file)) { - Enumeration entries = zipFile.entries(); - while (entries.hasMoreElements()) { - ZipEntry entry = entries.nextElement(); - if (!entry.isDirectory()) { - addFilePath(filePaths, entry.getName(), entry.getSize()); + public void process7zFile(List filePaths, File file) throws IOException { + ProcessBuilder processBuilder = new ProcessBuilder("7z", "l", file.getAbsolutePath()); + processBuilder.redirectErrorStream(true); + + Process process = processBuilder.start(); + Pattern pattern = Pattern.compile( + "^(\\d{4}-\\d{2}-\\d{2})\\s+(\\d{2}:\\d{2}:\\d{2})\\s+\\S+\\s+(\\d+)\\s+\\d+\\s+(.+)$" + ); + + try (BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()))) { + String line; + boolean inFileList = false; + + while ((line = reader.readLine()) != null) { + line = line.trim(); + + if (line.isEmpty()) continue; + + // Detect listing block between dashed lines + if (line.matches("^-+")) { + inFileList = !inFileList; + continue; } + + if (inFileList) { + // Skip final summary lines like "7 files" + if (line.matches(".*\\d+ files$")) continue; + + Matcher matcher = pattern.matcher(line); + if (matcher.matches()) { + try { + long size = Long.parseLong(matcher.group(3)); + String fileName = matcher.group(4); + addFilePath(filePaths, fileName, size); + } catch (NumberFormatException ignored) { + // Skip invalid lines + } + } + } + } + + int exitCode = process.waitFor(); + if (exitCode != 0) { + throw new IOException("7z command failed with exit code " + exitCode); } + + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); // Restore interrupt flag + throw new IOException("7z process interrupted", e); } } @@ -416,11 +418,7 @@ private String buildXmlResponse(List filePaths) { private String extractFile(File file, String fileType) throws Exception { List filePaths = new ArrayList<>(); // Process the file based on its type - if (ARCHIVE_TYPE_TAR.equals(fileType)) { - processTarFile(filePaths, file); - } else { - processZipFile(filePaths, file); - } + process7zFile(filePaths, file); return buildXmlResponse(filePaths); } From 76be5b3e34c4e3f78b829060d752e1da4534a02d Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Tue, 27 May 2025 16:51:40 +0300 Subject: [PATCH 09/33] removed 7z and used zip and tar entry --- .../content/PreviewContentServiceImpl.java | 90 +++++++++---------- 1 file changed, 41 insertions(+), 49 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java index 7ef6cd61968b..4d785c26b3cf 100644 --- a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java @@ -318,61 +318,49 @@ private void addFilePath(List filePaths, String path, long size) { } /** - * Parses a ZIP and TAR file and extracts the names and sizes of its entries. + * Processes a TAR file, extracting its entries and adding their paths to the provided list. + * @param filePaths the list to populate with the extracted file paths + * @param file the TAR file data + * @throws IOException if an I/O error occurs while reading the TAR file + */ + private void processTarFile(List filePaths, File file) throws IOException { + try (InputStream fis = new FileInputStream(file); + BufferedInputStream bis = new BufferedInputStream(fis); + TarArchiveInputStream tarInput = new TarArchiveInputStream(bis)) { + + TarArchiveEntry entry; + while ((entry = tarInput.getNextTarEntry()) != null) { + if (filePaths.size() >= maxPreviewCount) { + filePaths.add("... (too many files)"); + break; + } + if (!entry.isDirectory()) { + String name = entry.getName(); + long size = entry.getSize(); + addFilePath(filePaths, name, size); + } + // Skip file contents efficiently + tarInput.skip(entry.getSize()); + } + } + } + + /** + * Parses a ZIP file and extracts the names and sizes of its entries. * * @param filePaths the list to populate with entry names * @param file the ZIP file to read * @throws IOException if the file is invalid or cannot be read */ - public void process7zFile(List filePaths, File file) throws IOException { - ProcessBuilder processBuilder = new ProcessBuilder("7z", "l", file.getAbsolutePath()); - processBuilder.redirectErrorStream(true); - - Process process = processBuilder.start(); - Pattern pattern = Pattern.compile( - "^(\\d{4}-\\d{2}-\\d{2})\\s+(\\d{2}:\\d{2}:\\d{2})\\s+\\S+\\s+(\\d+)\\s+\\d+\\s+(.+)$" - ); - - try (BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()))) { - String line; - boolean inFileList = false; - - while ((line = reader.readLine()) != null) { - line = line.trim(); - - if (line.isEmpty()) continue; - - // Detect listing block between dashed lines - if (line.matches("^-+")) { - inFileList = !inFileList; - continue; - } - - if (inFileList) { - // Skip final summary lines like "7 files" - if (line.matches(".*\\d+ files$")) continue; - - Matcher matcher = pattern.matcher(line); - if (matcher.matches()) { - try { - long size = Long.parseLong(matcher.group(3)); - String fileName = matcher.group(4); - addFilePath(filePaths, fileName, size); - } catch (NumberFormatException ignored) { - // Skip invalid lines - } - } + public void processZipFile(List filePaths, File file) throws IOException { + try (ZipFile zipFile = new ZipFile(file)) { + Enumeration entries = zipFile.entries(); + while (entries.hasMoreElements()) { + ZipEntry entry = entries.nextElement(); + if (!entry.isDirectory()) { + addFilePath(filePaths, entry.getName(), entry.getSize()); } } - - int exitCode = process.waitFor(); - if (exitCode != 0) { - throw new IOException("7z command failed with exit code " + exitCode); - } - - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); // Restore interrupt flag - throw new IOException("7z process interrupted", e); } } @@ -418,7 +406,11 @@ private String buildXmlResponse(List filePaths) { private String extractFile(File file, String fileType) throws Exception { List filePaths = new ArrayList<>(); // Process the file based on its type - process7zFile(filePaths, file); + if (ARCHIVE_TYPE_TAR.equals(fileType)) { + processTarFile(filePaths, file); + } else { + processZipFile(filePaths, file); + } return buildXmlResponse(filePaths); } From 09bcf799ba8b4bbb73cd1ea3368e00256cbd07c7 Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Tue, 27 May 2025 18:44:18 +0300 Subject: [PATCH 10/33] checkstyle violations --- .../main/java/org/dspace/content/PreviewContentServiceImpl.java | 1 - 1 file changed, 1 deletion(-) diff --git a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java index 4d785c26b3cf..6357d80571c7 100644 --- a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java @@ -48,7 +48,6 @@ import org.dspace.core.Constants; import org.dspace.core.Context; import org.dspace.services.ConfigurationService; -import org.dspace.storage.bitstore.S3BitStoreService; import org.dspace.util.FileInfo; import org.dspace.util.FileTreeViewGenerator; import org.slf4j.Logger; From 8f07fe2ef3cfc1a70c32887e3dfaf78fc2b9afe6 Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Fri, 6 Jun 2025 08:01:07 +0200 Subject: [PATCH 11/33] improved file previrew generating speed, used string builder, xml builder, authorization only if is required --- .../dspace/content/BitstreamServiceImpl.java | 6 +- .../content/PreviewContentServiceImpl.java | 91 +++++++++++-------- .../dao/impl/PreviewContentDAOImpl.java | 13 +-- .../content/service/BitstreamService.java | 3 +- .../service/PreviewContentService.java | 3 +- .../scripts/filepreview/FilePreview.java | 2 +- .../MetadataBitstreamRestRepository.java | 2 +- 7 files changed, 68 insertions(+), 52 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/content/BitstreamServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/BitstreamServiceImpl.java index abe892a058af..453880a7a049 100644 --- a/dspace-api/src/main/java/org/dspace/content/BitstreamServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/BitstreamServiceImpl.java @@ -317,10 +317,12 @@ public InputStream retrieve(Context context, Bitstream bitstream) } @Override - public File retrieveFile(Context context, Bitstream bitstream) + public File retrieveFile(Context context, Bitstream bitstream, boolean authorization) throws IOException, SQLException, AuthorizeException { // Maybe should return AuthorizeException?? - authorizeService.authorizeAction(context, bitstream, Constants.READ); + if (authorization) { + authorizeService.authorizeAction(context, bitstream, Constants.READ); + } return bitstreamStorageService.retrieveFile(context, bitstream); } diff --git a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java index 6357d80571c7..de3be3736e46 100644 --- a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java @@ -14,28 +14,28 @@ import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.io.StringWriter; import java.io.UnsupportedEncodingException; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.InvalidPathException; -import java.nio.file.Path; import java.nio.file.Paths; import java.sql.SQLException; import java.util.ArrayList; import java.util.Arrays; import java.util.Enumeration; import java.util.Hashtable; -import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.UUID; import java.util.function.Function; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; +import javax.xml.stream.XMLOutputFactory; +import javax.xml.stream.XMLStreamWriter; + import org.apache.commons.compress.archivers.tar.TarArchiveEntry; import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; import org.dspace.app.util.Util; @@ -72,6 +72,7 @@ public class PreviewContentServiceImpl implements PreviewContentService { // This constant is used to limit the length of the preview content stored in the database to prevent // the database from being overloaded with large amounts of data. private static final int MAX_PREVIEW_COUNT_LENGTH = 2000; + int estimatedFileCount = 200; // Configured ZIP file preview limit (default: 1000) - if the ZIP file contains more files, it will be truncated @Value("${file.preview.zip.limit.length:1000}") @@ -138,7 +139,7 @@ public List findAll(Context context) throws SQLException { } @Override - public boolean canPreview(Context context, Bitstream bitstream) throws SQLException, AuthorizeException { + public boolean canPreview(Context context, Bitstream bitstream, boolean authorization) throws SQLException, AuthorizeException { try { // Check it is allowed by configuration boolean isAllowedByCfg = configurationService.getBooleanProperty("file.preview.enabled", true); @@ -147,7 +148,9 @@ public boolean canPreview(Context context, Bitstream bitstream) throws SQLExcept } // Check it is allowed by license - authorizeService.authorizeAction(context, bitstream, Constants.READ); + if (authorization) { + authorizeService.authorizeAction(context, bitstream, Constants.READ); + } return true; } catch (MissingLicenseAgreementException e) { return false; @@ -160,7 +163,7 @@ public List getFilePreviewContent(Context context, Bitstream bitstream File file = null; try { - file = bitstreamService.retrieveFile(context, bitstream); // Retrieve the file + file = bitstreamService.retrieveFile(context, bitstream, false); // Retrieve the file if (Objects.nonNull(file)) { fileInfos = processFileToFilePreview(context, bitstream, file); @@ -219,10 +222,8 @@ public List processFileToFilePreview(Context context, Bitstream bitstr "application/zip", ARCHIVE_TYPE_ZIP, "application/x-tar", ARCHIVE_TYPE_TAR ); - - String mimeType = bitstream.getFormat(context).getMIMEType(); - if (archiveTypes.containsKey(mimeType)) { - data = extractFile(file, archiveTypes.get(mimeType)); + if (archiveTypes.containsKey(bitstreamMimeType)) { + data = extractFile(file, archiveTypes.get(bitstreamMimeType)); fileInfos = FileTreeViewGenerator.parse(data); } } @@ -305,15 +306,16 @@ private Hashtable createSubMap(Map sourceMap, Funct * @param size the size of the file or directory */ private void addFilePath(List filePaths, String path, long size) { - String fileInfo = ""; try { - Path filePath = Paths.get(path); - boolean isDir = Files.isDirectory(filePath); - fileInfo = (isDir ? path + "/|" : path + "|") + size; + boolean isDir = Files.isDirectory(Paths.get(path)); + StringBuilder sb = new StringBuilder(path.length() + 16); + sb.append(path); + sb.append(isDir ? "/|" : "|"); + sb.append(size); + filePaths.add(sb.toString()); } catch (NullPointerException | InvalidPathException | SecurityException e) { log.error(String.format("Failed to add file path. Path: '%s', Size: %d", path, size), e); } - filePaths.add(fileInfo); } /** @@ -355,6 +357,10 @@ public void processZipFile(List filePaths, File file) throws IOException try (ZipFile zipFile = new ZipFile(file)) { Enumeration entries = zipFile.entries(); while (entries.hasMoreElements()) { + if (filePaths.size() >= maxPreviewCount) { + filePaths.add("... (too many files)"); + break; + } ZipEntry entry = entries.nextElement(); if (!entry.isDirectory()) { addFilePath(filePaths, entry.getName(), entry.getSize()); @@ -369,30 +375,39 @@ public void processZipFile(List filePaths, File file) throws IOException * @return an XML string representation of the file paths */ private String buildXmlResponse(List filePaths) { - // Is a folder regex - String folderRegex = "/|\\d+"; - Pattern pattern = Pattern.compile(folderRegex); - - StringBuilder sb = new StringBuilder(); - sb.append(""); - Iterator iterator = filePaths.iterator(); - int fileCounter = 0; - while (iterator.hasNext() && fileCounter < maxPreviewCount) { - String filePath = iterator.next(); - // Check if the file is a folder - Matcher matcher = pattern.matcher(filePath); - if (!matcher.matches()) { - // It is a file - fileCounter++; + StringWriter stringWriter = new StringWriter(); + try { + XMLOutputFactory factory = XMLOutputFactory.newInstance(); + XMLStreamWriter writer = factory.createXMLStreamWriter(stringWriter); + + writer.writeStartDocument("UTF-8", "1.0"); + writer.writeStartElement("root"); + + int count = 0; + for (String filePath : filePaths) { + if (count >= maxPreviewCount) { + writer.writeStartElement("element"); + writer.writeCharacters("...too many files...|0"); + writer.writeEndElement(); + break; + } + writer.writeStartElement("element"); + writer.writeCharacters(filePath); + writer.writeEndElement(); + count++; } - sb.append("").append(filePath).append(""); - } - if (fileCounter > maxPreviewCount) { - sb.append("...too many files...|0"); + writer.writeEndElement(); // + writer.writeEndDocument(); + writer.flush(); + writer.close(); + + } catch (Exception e) { + log.error("Failed to build XML response", e); + return "Failed to generate preview"; } - sb.append(""); - return sb.toString(); + + return stringWriter.toString(); } /** @@ -403,7 +418,7 @@ private String buildXmlResponse(List filePaths) { * @return an XML string representing the extracted file paths */ private String extractFile(File file, String fileType) throws Exception { - List filePaths = new ArrayList<>(); + List filePaths = new ArrayList<>(estimatedFileCount); // Process the file based on its type if (ARCHIVE_TYPE_TAR.equals(fileType)) { processTarFile(filePaths, file); diff --git a/dspace-api/src/main/java/org/dspace/content/dao/impl/PreviewContentDAOImpl.java b/dspace-api/src/main/java/org/dspace/content/dao/impl/PreviewContentDAOImpl.java index bd4470d56a2b..e306bb5ec4f3 100644 --- a/dspace-api/src/main/java/org/dspace/content/dao/impl/PreviewContentDAOImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/dao/impl/PreviewContentDAOImpl.java @@ -41,14 +41,11 @@ public List findByBitstream(Context context, UUID bitstreamId) t @Override public List hasPreview(Context context, Bitstream bitstream) throws SQLException { - // select only data from the previewcontent table whose ID is not a child in the preview2preview table - Query query = getHibernateSession(context).createNativeQuery( - "SELECT pc.* FROM previewcontent pc " + - "JOIN bitstream b ON pc.bitstream_id = b.uuid " + - "WHERE b.uuid = :bitstream_id " + - "AND NOT EXISTS (SELECT 1 FROM preview2preview p2p WHERE pc.previewcontent_id = p2p.child_id)", - PreviewContent.class - ); + String sql = + "SELECT 1 FROM previewcontent pc\n" + + "WHERE pc.bitstream_id = :bitstream_id\n" + + "LIMIT 1"; + Query query = getHibernateSession(context).createNativeQuery(sql, PreviewContent.class); query.setParameter("bitstream_id", bitstream.getID()); query.setHint("org.hibernate.cacheable", Boolean.TRUE); return findMany(context, query); diff --git a/dspace-api/src/main/java/org/dspace/content/service/BitstreamService.java b/dspace-api/src/main/java/org/dspace/content/service/BitstreamService.java index b48174b68339..8834fc44d87a 100644 --- a/dspace-api/src/main/java/org/dspace/content/service/BitstreamService.java +++ b/dspace-api/src/main/java/org/dspace/content/service/BitstreamService.java @@ -172,12 +172,13 @@ public InputStream retrieve(Context context, Bitstream bitstream) * * @param context DSpace context object * @param bitstream DSpace bitstream + * @param authorization true if authorization is required else false * @return a File from which the bitstream can be read. * @throws IOException if IO error * @throws SQLException if database error * @throws AuthorizeException if authorization error */ - public File retrieveFile(Context context, Bitstream bitstream) throws IOException, SQLException, AuthorizeException; + public File retrieveFile(Context context, Bitstream bitstream, boolean authorization) throws IOException, SQLException, AuthorizeException; /** * Determine if this bitstream is registered (available elsewhere on diff --git a/dspace-api/src/main/java/org/dspace/content/service/PreviewContentService.java b/dspace-api/src/main/java/org/dspace/content/service/PreviewContentService.java index 481eec9b813d..2804c54829ae 100644 --- a/dspace-api/src/main/java/org/dspace/content/service/PreviewContentService.java +++ b/dspace-api/src/main/java/org/dspace/content/service/PreviewContentService.java @@ -104,9 +104,10 @@ PreviewContent create(Context context, Bitstream bitstream, String name, String * * @param context DSpace context object * @param bitstream check if this bitstream could be previewed + * @param authorization true if authorization is required else false * @return true if the bitstream could be previewed, false otherwise */ - boolean canPreview(Context context, Bitstream bitstream) throws SQLException, AuthorizeException; + boolean canPreview(Context context, Bitstream bitstream, boolean authorization) throws SQLException, AuthorizeException; /** * Return converted ZIP file content into FileInfo classes. diff --git a/dspace-api/src/main/java/org/dspace/scripts/filepreview/FilePreview.java b/dspace-api/src/main/java/org/dspace/scripts/filepreview/FilePreview.java index a35a2ac6f035..9616cc162d9e 100644 --- a/dspace-api/src/main/java/org/dspace/scripts/filepreview/FilePreview.java +++ b/dspace-api/src/main/java/org/dspace/scripts/filepreview/FilePreview.java @@ -147,7 +147,7 @@ private void generateItemFilePreviews(Context context, UUID itemUUID) throws Exc for (Bundle bundle : bundles) { List bitstreams = bundle.getBitstreams(); for (Bitstream bitstream : bitstreams) { - boolean canPreview = previewContentService.canPreview(context, bitstream); + boolean canPreview = previewContentService.canPreview(context, bitstream, false); if (!canPreview) { return; } diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/MetadataBitstreamRestRepository.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/MetadataBitstreamRestRepository.java index 1e216a6ae501..4b31e0af8f13 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/MetadataBitstreamRestRepository.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/MetadataBitstreamRestRepository.java @@ -106,7 +106,7 @@ public Page findByHandle(@Parameter(value = "handl for (Bitstream bitstream : bitstreams) { String url = previewContentService.composePreviewURL(context, item, bitstream, contextPath); List fileInfos = new ArrayList<>(); - boolean canPreview = previewContentService.canPreview(context, bitstream); + boolean canPreview = previewContentService.canPreview(context, bitstream, false); if (canPreview) { try { List prContents = previewContentService.hasPreview(context, bitstream); From 0ba1947592c8614140b3941a50e137434a4fe2aa Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Fri, 6 Jun 2025 08:27:39 +0200 Subject: [PATCH 12/33] checkstyle, return boolean from haspreview and previrews from getPreview, replaced return with continue --- .../dspace/content/BitstreamServiceImpl.java | 1 - .../content/PreviewContentServiceImpl.java | 12 ++++++---- .../dspace/content/dao/PreviewContentDAO.java | 12 +++++++++- .../dao/impl/PreviewContentDAOImpl.java | 24 +++++++++++++++---- .../content/service/BitstreamService.java | 3 ++- .../service/PreviewContentService.java | 14 +++++++++-- .../scripts/filepreview/FilePreview.java | 9 ++++--- .../MetadataBitstreamRestRepository.java | 4 ++-- 8 files changed, 59 insertions(+), 20 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/content/BitstreamServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/BitstreamServiceImpl.java index 453880a7a049..76c3b3b425ab 100644 --- a/dspace-api/src/main/java/org/dspace/content/BitstreamServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/BitstreamServiceImpl.java @@ -319,7 +319,6 @@ public InputStream retrieve(Context context, Bitstream bitstream) @Override public File retrieveFile(Context context, Bitstream bitstream, boolean authorization) throws IOException, SQLException, AuthorizeException { - // Maybe should return AuthorizeException?? if (authorization) { authorizeService.authorizeAction(context, bitstream, Constants.READ); } diff --git a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java index de3be3736e46..832aae829b8a 100644 --- a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java @@ -32,7 +32,6 @@ import java.util.function.Function; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; - import javax.xml.stream.XMLOutputFactory; import javax.xml.stream.XMLStreamWriter; @@ -88,7 +87,6 @@ public class PreviewContentServiceImpl implements PreviewContentService { @Autowired BitstreamService bitstreamService; - @Override public PreviewContent create(Context context, Bitstream bitstream, String name, String content, boolean isDirectory, String size, Map subPreviewContents) @@ -129,17 +127,23 @@ public List findByBitstream(Context context, UUID bitstreamId) t } @Override - public List hasPreview(Context context, Bitstream bitstream) throws SQLException { + public boolean hasPreview(Context context, Bitstream bitstream) throws SQLException { return previewContentDAO.hasPreview(context, bitstream); } + @Override + public List getPreview(Context context, Bitstream bitstream) throws SQLException { + return previewContentDAO.getPreview(context, bitstream); + } + @Override public List findAll(Context context) throws SQLException { return previewContentDAO.findAll(context, PreviewContent.class); } @Override - public boolean canPreview(Context context, Bitstream bitstream, boolean authorization) throws SQLException, AuthorizeException { + public boolean canPreview(Context context, Bitstream bitstream, boolean authorization) + throws SQLException, AuthorizeException { try { // Check it is allowed by configuration boolean isAllowedByCfg = configurationService.getBooleanProperty("file.preview.enabled", true); diff --git a/dspace-api/src/main/java/org/dspace/content/dao/PreviewContentDAO.java b/dspace-api/src/main/java/org/dspace/content/dao/PreviewContentDAO.java index 3531271ff518..079b1d0fd8b0 100644 --- a/dspace-api/src/main/java/org/dspace/content/dao/PreviewContentDAO.java +++ b/dspace-api/src/main/java/org/dspace/content/dao/PreviewContentDAO.java @@ -33,6 +33,16 @@ public interface PreviewContentDAO extends GenericDAO { */ List findByBitstream(Context context, UUID bitstreamId) throws SQLException; + /** + * Returns true if the bitstream has associated preview content. + * + * @param context DSpace context + * @param bitstream The bitstream to get bitstream UUID + * @return List of found preview content + * @throws SQLException If a database error occurs + */ + boolean hasPreview(Context context, Bitstream bitstream) throws SQLException; + /** * Find all preview content based on bitstream that are the root directory. * @@ -41,5 +51,5 @@ public interface PreviewContentDAO extends GenericDAO { * @return List of found preview content * @throws SQLException If a database error occurs */ - List hasPreview(Context context, Bitstream bitstream) throws SQLException; + List getPreview(Context context, Bitstream bitstream) throws SQLException; } diff --git a/dspace-api/src/main/java/org/dspace/content/dao/impl/PreviewContentDAOImpl.java b/dspace-api/src/main/java/org/dspace/content/dao/impl/PreviewContentDAOImpl.java index e306bb5ec4f3..0e08c750b242 100644 --- a/dspace-api/src/main/java/org/dspace/content/dao/impl/PreviewContentDAOImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/dao/impl/PreviewContentDAOImpl.java @@ -40,14 +40,30 @@ public List findByBitstream(Context context, UUID bitstreamId) t } @Override - public List hasPreview(Context context, Bitstream bitstream) throws SQLException { + public boolean hasPreview(Context context, Bitstream bitstream) throws SQLException { String sql = - "SELECT 1 FROM previewcontent pc\n" + - "WHERE pc.bitstream_id = :bitstream_id\n" + - "LIMIT 1"; + "SELECT COUNT(*) FROM previewcontent pc " + + "WHERE pc.bitstream_id = :bitstream_id"; Query query = getHibernateSession(context).createNativeQuery(sql, PreviewContent.class); query.setParameter("bitstream_id", bitstream.getID()); query.setHint("org.hibernate.cacheable", Boolean.TRUE); + query.setMaxResults(1); + long count = ((Number) query.getSingleResult()).longValue(); + return count > 0; + } + + @Override + public List getPreview(Context context, Bitstream bitstream) throws SQLException { + // select only data from the previewcontent table whose ID is not a child in the preview2preview table + Query query = getHibernateSession(context).createNativeQuery( + "SELECT pc.* FROM previewcontent pc " + + "JOIN bitstream b ON pc.bitstream_id = b.uuid " + + "WHERE b.uuid = :bitstream_id " + + "AND NOT EXISTS (SELECT 1 FROM preview2preview p2p WHERE pc.previewcontent_id = p2p.child_id)", + PreviewContent.class + ); + query.setParameter("bitstream_id", bitstream.getID()); + query.setHint("org.hibernate.cacheable", Boolean.TRUE); return findMany(context, query); } } diff --git a/dspace-api/src/main/java/org/dspace/content/service/BitstreamService.java b/dspace-api/src/main/java/org/dspace/content/service/BitstreamService.java index 8834fc44d87a..ffbde758dfea 100644 --- a/dspace-api/src/main/java/org/dspace/content/service/BitstreamService.java +++ b/dspace-api/src/main/java/org/dspace/content/service/BitstreamService.java @@ -178,7 +178,8 @@ public InputStream retrieve(Context context, Bitstream bitstream) * @throws SQLException if database error * @throws AuthorizeException if authorization error */ - public File retrieveFile(Context context, Bitstream bitstream, boolean authorization) throws IOException, SQLException, AuthorizeException; + public File retrieveFile(Context context, Bitstream bitstream, boolean authorization) + throws IOException, SQLException, AuthorizeException; /** * Determine if this bitstream is registered (available elsewhere on diff --git a/dspace-api/src/main/java/org/dspace/content/service/PreviewContentService.java b/dspace-api/src/main/java/org/dspace/content/service/PreviewContentService.java index 2804c54829ae..b88b38e0d0a0 100644 --- a/dspace-api/src/main/java/org/dspace/content/service/PreviewContentService.java +++ b/dspace-api/src/main/java/org/dspace/content/service/PreviewContentService.java @@ -82,6 +82,15 @@ PreviewContent create(Context context, Bitstream bitstream, String name, String */ List findByBitstream(Context context, UUID bitstream_id) throws SQLException; + /** + * Returns true if the bitstream has associated preview content. + * + * @param context DSpace context + * @param bitstream The bitstream to get bitstream UUID + * @throws SQLException If a database error occurs + */ + boolean hasPreview(Context context, Bitstream bitstream) throws SQLException; + /** * Find all preview content based on bitstream that are the root directory. * @@ -89,7 +98,7 @@ PreviewContent create(Context context, Bitstream bitstream, String name, String * @param bitstream The bitstream to get bitstream UUID * @throws SQLException If a database error occurs */ - List hasPreview(Context context, Bitstream bitstream) throws SQLException; + List getPreview(Context context, Bitstream bitstream) throws SQLException; /** * Find all preview contents from database. @@ -107,7 +116,8 @@ PreviewContent create(Context context, Bitstream bitstream, String name, String * @param authorization true if authorization is required else false * @return true if the bitstream could be previewed, false otherwise */ - boolean canPreview(Context context, Bitstream bitstream, boolean authorization) throws SQLException, AuthorizeException; + boolean canPreview(Context context, Bitstream bitstream, boolean authorization) + throws SQLException, AuthorizeException; /** * Return converted ZIP file content into FileInfo classes. diff --git a/dspace-api/src/main/java/org/dspace/scripts/filepreview/FilePreview.java b/dspace-api/src/main/java/org/dspace/scripts/filepreview/FilePreview.java index 9616cc162d9e..49b2564147cd 100644 --- a/dspace-api/src/main/java/org/dspace/scripts/filepreview/FilePreview.java +++ b/dspace-api/src/main/java/org/dspace/scripts/filepreview/FilePreview.java @@ -149,19 +149,18 @@ private void generateItemFilePreviews(Context context, UUID itemUUID) throws Exc for (Bitstream bitstream : bitstreams) { boolean canPreview = previewContentService.canPreview(context, bitstream, false); if (!canPreview) { - return; + continue; } - List prContents = previewContentService.hasPreview(context, bitstream); // Generate new content if we didn't find any - if (!prContents.isEmpty()) { - return; + if (previewContentService.hasPreview(context, bitstream)) { + continue; } List fileInfos = previewContentService.getFilePreviewContent(context, bitstream); // Do not store HTML content in the database because it could be longer than the limit // of the database column if (StringUtils.equals("text/html", bitstream.getFormat(context).getMIMEType())) { - return; + continue; } for (FileInfo fi : fileInfos) { diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/MetadataBitstreamRestRepository.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/MetadataBitstreamRestRepository.java index 4b31e0af8f13..f5f5db26ca3c 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/MetadataBitstreamRestRepository.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/MetadataBitstreamRestRepository.java @@ -109,9 +109,8 @@ public Page findByHandle(@Parameter(value = "handl boolean canPreview = previewContentService.canPreview(context, bitstream, false); if (canPreview) { try { - List prContents = previewContentService.hasPreview(context, bitstream); // Generate new content if we didn't find any - if (prContents.isEmpty()) { + if (!previewContentService.hasPreview(context, bitstream)) { boolean allowComposePreviewContent = configurationService.getBooleanProperty ("create.file-preview.on-item-page-load", false); if (allowComposePreviewContent) { @@ -126,6 +125,7 @@ public Page findByHandle(@Parameter(value = "handl } } } else { + List prContents = previewContentService.getPreview(context, bitstream); for (PreviewContent pc : prContents) { fileInfos.add(previewContentService.createFileInfo(pc)); } From 719fa7755d491cfbacf7fe2372d122182e3c064d Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Fri, 6 Jun 2025 09:01:37 +0200 Subject: [PATCH 13/33] fix test, better doc, checkstyle --- .../org/dspace/content/PreviewContentServiceImpl.java | 9 +++++---- .../java/org/dspace/content/dao/PreviewContentDAO.java | 2 +- .../dspace/content/dao/impl/PreviewContentDAOImpl.java | 2 +- .../dspace/content/service/PreviewContentService.java | 2 ++ .../java/org/dspace/scripts/filepreview/FilePreview.java | 1 - .../org/dspace/app/rest/PreviewContentServiceImplIT.java | 2 +- 6 files changed, 10 insertions(+), 8 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java index 832aae829b8a..def93ac08279 100644 --- a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java @@ -433,10 +433,11 @@ private String extractFile(File file, String fileType) throws Exception { } /** - * Read input stream and return content as String - * @param file to read - * @return content of the inputStream as a String - * @throws IOException + * Read file content and return as String + * @param file the file to read + * @param cutResult whether to limit the content length + * @return content of the file as a String + * @throws IOException if an error occurs reading the file */ private String getFileContent(File file, boolean cutResult) throws IOException { StringBuilder content = new StringBuilder(); diff --git a/dspace-api/src/main/java/org/dspace/content/dao/PreviewContentDAO.java b/dspace-api/src/main/java/org/dspace/content/dao/PreviewContentDAO.java index 079b1d0fd8b0..596357b4a9b1 100644 --- a/dspace-api/src/main/java/org/dspace/content/dao/PreviewContentDAO.java +++ b/dspace-api/src/main/java/org/dspace/content/dao/PreviewContentDAO.java @@ -38,7 +38,7 @@ public interface PreviewContentDAO extends GenericDAO { * * @param context DSpace context * @param bitstream The bitstream to get bitstream UUID - * @return List of found preview content + * @return True if preview content exists, false otherwise * @throws SQLException If a database error occurs */ boolean hasPreview(Context context, Bitstream bitstream) throws SQLException; diff --git a/dspace-api/src/main/java/org/dspace/content/dao/impl/PreviewContentDAOImpl.java b/dspace-api/src/main/java/org/dspace/content/dao/impl/PreviewContentDAOImpl.java index 0e08c750b242..b86fb2179bcd 100644 --- a/dspace-api/src/main/java/org/dspace/content/dao/impl/PreviewContentDAOImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/dao/impl/PreviewContentDAOImpl.java @@ -44,7 +44,7 @@ public boolean hasPreview(Context context, Bitstream bitstream) throws SQLExcept String sql = "SELECT COUNT(*) FROM previewcontent pc " + "WHERE pc.bitstream_id = :bitstream_id"; - Query query = getHibernateSession(context).createNativeQuery(sql, PreviewContent.class); + Query query = getHibernateSession(context).createNativeQuery(sql); query.setParameter("bitstream_id", bitstream.getID()); query.setHint("org.hibernate.cacheable", Boolean.TRUE); query.setMaxResults(1); diff --git a/dspace-api/src/main/java/org/dspace/content/service/PreviewContentService.java b/dspace-api/src/main/java/org/dspace/content/service/PreviewContentService.java index b88b38e0d0a0..842112413e8d 100644 --- a/dspace-api/src/main/java/org/dspace/content/service/PreviewContentService.java +++ b/dspace-api/src/main/java/org/dspace/content/service/PreviewContentService.java @@ -87,6 +87,7 @@ PreviewContent create(Context context, Bitstream bitstream, String name, String * * @param context DSpace context * @param bitstream The bitstream to get bitstream UUID + * @return True if preview content exists, false otherwise * @throws SQLException If a database error occurs */ boolean hasPreview(Context context, Bitstream bitstream) throws SQLException; @@ -96,6 +97,7 @@ PreviewContent create(Context context, Bitstream bitstream, String name, String * * @param context DSpace context * @param bitstream The bitstream to get bitstream UUID + * @return List of preview contents * @throws SQLException If a database error occurs */ List getPreview(Context context, Bitstream bitstream) throws SQLException; diff --git a/dspace-api/src/main/java/org/dspace/scripts/filepreview/FilePreview.java b/dspace-api/src/main/java/org/dspace/scripts/filepreview/FilePreview.java index 49b2564147cd..ba0b0ef0bf12 100644 --- a/dspace-api/src/main/java/org/dspace/scripts/filepreview/FilePreview.java +++ b/dspace-api/src/main/java/org/dspace/scripts/filepreview/FilePreview.java @@ -22,7 +22,6 @@ import org.dspace.content.Bitstream; import org.dspace.content.Bundle; import org.dspace.content.Item; -import org.dspace.content.PreviewContent; import org.dspace.content.factory.ContentServiceFactory; import org.dspace.content.service.ItemService; import org.dspace.content.service.PreviewContentService; diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/PreviewContentServiceImplIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/PreviewContentServiceImplIT.java index 48bf84e010a4..c79b1c87f31f 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/PreviewContentServiceImplIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/PreviewContentServiceImplIT.java @@ -125,7 +125,7 @@ public void testFindByBitstream() throws Exception { @Test public void testFindRootByBitstream() throws Exception { List previewContentList = - previewContentService.hasPreview(context, bitstream1); + previewContentService.getPreview(context, bitstream1); Assert.assertEquals(previewContentList.size(), 1); Assert.assertEquals(previewContent1.getID(), previewContentList.get(0).getID()); } From 2d45b6cb003d92c88ac733ddfe1f73fb0195e12a Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Fri, 6 Jun 2025 09:34:51 +0200 Subject: [PATCH 14/33] removed empty lines --- .../java/org/dspace/content/PreviewContentServiceImpl.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java index def93ac08279..080abbd263b9 100644 --- a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java @@ -453,7 +453,6 @@ private String getFileContent(File file, boolean cutResult) throws IOException { } content.append(line).append("\n"); } - } catch (IOException e) { log.error("IOException during creating the preview content because: ", e); throw e; // Optional: rethrow if you want the exception to propagate @@ -482,5 +481,4 @@ private static String ensureMaxLength(String input) { return input; } } -} - +} \ No newline at end of file From a23fc5003cb42c0c2f728d7807bf51cc13419b1a Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Fri, 6 Jun 2025 09:49:27 +0200 Subject: [PATCH 15/33] removed maxresult limit --- .../java/org/dspace/content/dao/impl/PreviewContentDAOImpl.java | 1 - 1 file changed, 1 deletion(-) diff --git a/dspace-api/src/main/java/org/dspace/content/dao/impl/PreviewContentDAOImpl.java b/dspace-api/src/main/java/org/dspace/content/dao/impl/PreviewContentDAOImpl.java index b86fb2179bcd..b407f82b72e9 100644 --- a/dspace-api/src/main/java/org/dspace/content/dao/impl/PreviewContentDAOImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/dao/impl/PreviewContentDAOImpl.java @@ -47,7 +47,6 @@ public boolean hasPreview(Context context, Bitstream bitstream) throws SQLExcept Query query = getHibernateSession(context).createNativeQuery(sql); query.setParameter("bitstream_id", bitstream.getID()); query.setHint("org.hibernate.cacheable", Boolean.TRUE); - query.setMaxResults(1); long count = ((Number) query.getSingleResult()).longValue(); return count > 0; } From 058824f41f4d72f57eea96c2bb13242599fc7249 Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Fri, 6 Jun 2025 09:55:53 +0200 Subject: [PATCH 16/33] fix long conversion --- .../java/org/dspace/content/dao/impl/PreviewContentDAOImpl.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dspace-api/src/main/java/org/dspace/content/dao/impl/PreviewContentDAOImpl.java b/dspace-api/src/main/java/org/dspace/content/dao/impl/PreviewContentDAOImpl.java index b407f82b72e9..5fc8471bca14 100644 --- a/dspace-api/src/main/java/org/dspace/content/dao/impl/PreviewContentDAOImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/dao/impl/PreviewContentDAOImpl.java @@ -47,7 +47,7 @@ public boolean hasPreview(Context context, Bitstream bitstream) throws SQLExcept Query query = getHibernateSession(context).createNativeQuery(sql); query.setParameter("bitstream_id", bitstream.getID()); query.setHint("org.hibernate.cacheable", Boolean.TRUE); - long count = ((Number) query.getSingleResult()).longValue(); + long count = (long) query.getSingleResult(); return count > 0; } From 5831fdab26f34dd986b6e37b62c26808fb0e1a65 Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Fri, 6 Jun 2025 10:57:26 +0200 Subject: [PATCH 17/33] fix problem with hibernate session --- .../org/dspace/content/dao/impl/PreviewContentDAOImpl.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/content/dao/impl/PreviewContentDAOImpl.java b/dspace-api/src/main/java/org/dspace/content/dao/impl/PreviewContentDAOImpl.java index 5fc8471bca14..e631efa6ec7a 100644 --- a/dspace-api/src/main/java/org/dspace/content/dao/impl/PreviewContentDAOImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/dao/impl/PreviewContentDAOImpl.java @@ -47,8 +47,7 @@ public boolean hasPreview(Context context, Bitstream bitstream) throws SQLExcept Query query = getHibernateSession(context).createNativeQuery(sql); query.setParameter("bitstream_id", bitstream.getID()); query.setHint("org.hibernate.cacheable", Boolean.TRUE); - long count = (long) query.getSingleResult(); - return count > 0; + return count(query) > 0; } @Override From e8001e71c42ccea2e0c79cdebbcbcde61e817c58 Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Fri, 6 Jun 2025 11:22:54 +0200 Subject: [PATCH 18/33] fix problem with hibernate session --- .../dspace/content/dao/impl/PreviewContentDAOImpl.java | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/content/dao/impl/PreviewContentDAOImpl.java b/dspace-api/src/main/java/org/dspace/content/dao/impl/PreviewContentDAOImpl.java index e631efa6ec7a..2e83994957f2 100644 --- a/dspace-api/src/main/java/org/dspace/content/dao/impl/PreviewContentDAOImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/dao/impl/PreviewContentDAOImpl.java @@ -41,12 +41,10 @@ public List findByBitstream(Context context, UUID bitstreamId) t @Override public boolean hasPreview(Context context, Bitstream bitstream) throws SQLException { - String sql = - "SELECT COUNT(*) FROM previewcontent pc " + - "WHERE pc.bitstream_id = :bitstream_id"; - Query query = getHibernateSession(context).createNativeQuery(sql); + Query query = createQuery(context, + "SELECT COUNT(pc) FROM " + PreviewContent.class.getSimpleName() + + " pc WHERE pc.bitstream.id = :bitstream_id"); query.setParameter("bitstream_id", bitstream.getID()); - query.setHint("org.hibernate.cacheable", Boolean.TRUE); return count(query) > 0; } From 316434bab7cc3d42e753168b94cfe79566459ffd Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Fri, 6 Jun 2025 12:08:00 +0200 Subject: [PATCH 19/33] fix .tar.gz generating --- .../java/org/dspace/content/PreviewContentServiceImpl.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java index 080abbd263b9..9f03885b3e6a 100644 --- a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java @@ -37,6 +37,7 @@ import org.apache.commons.compress.archivers.tar.TarArchiveEntry; import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; import org.dspace.app.util.Util; import org.dspace.authorize.AuthorizeException; import org.dspace.authorize.MissingLicenseAgreementException; @@ -331,7 +332,8 @@ private void addFilePath(List filePaths, String path, long size) { private void processTarFile(List filePaths, File file) throws IOException { try (InputStream fis = new FileInputStream(file); BufferedInputStream bis = new BufferedInputStream(fis); - TarArchiveInputStream tarInput = new TarArchiveInputStream(bis)) { + InputStream gzipIn = new GzipCompressorInputStream(bis); + TarArchiveInputStream tarInput = new TarArchiveInputStream(gzipIn)) { TarArchiveEntry entry; while ((entry = tarInput.getNextTarEntry()) != null) { From d6468d5bda14f6fee0737251cf9bdf2b24384fe9 Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Fri, 6 Jun 2025 12:50:41 +0200 Subject: [PATCH 20/33] removed fix for .tar.gz - not work --- .../java/org/dspace/content/PreviewContentServiceImpl.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java index 9f03885b3e6a..080abbd263b9 100644 --- a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java @@ -37,7 +37,6 @@ import org.apache.commons.compress.archivers.tar.TarArchiveEntry; import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; -import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; import org.dspace.app.util.Util; import org.dspace.authorize.AuthorizeException; import org.dspace.authorize.MissingLicenseAgreementException; @@ -332,8 +331,7 @@ private void addFilePath(List filePaths, String path, long size) { private void processTarFile(List filePaths, File file) throws IOException { try (InputStream fis = new FileInputStream(file); BufferedInputStream bis = new BufferedInputStream(fis); - InputStream gzipIn = new GzipCompressorInputStream(bis); - TarArchiveInputStream tarInput = new TarArchiveInputStream(gzipIn)) { + TarArchiveInputStream tarInput = new TarArchiveInputStream(bis)) { TarArchiveEntry entry; while ((entry = tarInput.getNextTarEntry()) != null) { From 7f2374b85a0ef3d557fe3335be0721e8e82f8175 Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Fri, 6 Jun 2025 13:06:53 +0200 Subject: [PATCH 21/33] skip fully entry for tar --- .../content/PreviewContentServiceImpl.java | 30 +++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java index 080abbd263b9..b2efc5cc19b0 100644 --- a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java @@ -37,6 +37,7 @@ import org.apache.commons.compress.archivers.tar.TarArchiveEntry; import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; +import org.apache.commons.compress.utils.IOUtils; import org.dspace.app.util.Util; import org.dspace.authorize.AuthorizeException; import org.dspace.authorize.MissingLicenseAgreementException; @@ -331,8 +332,10 @@ private void addFilePath(List filePaths, String path, long size) { private void processTarFile(List filePaths, File file) throws IOException { try (InputStream fis = new FileInputStream(file); BufferedInputStream bis = new BufferedInputStream(fis); + // Use the constructor that accepts LongFileMode TarArchiveInputStream tarInput = new TarArchiveInputStream(bis)) { + TarArchiveEntry entry; while ((entry = tarInput.getNextTarEntry()) != null) { if (filePaths.size() >= maxPreviewCount) { @@ -344,9 +347,32 @@ private void processTarFile(List filePaths, File file) throws IOExceptio long size = entry.getSize(); addFilePath(filePaths, name, size); } - // Skip file contents efficiently - tarInput.skip(entry.getSize()); + // Fully skip entry content to handle large files correctly + skipFully(tarInput, entry.getSize()); + } + } + } + + /** + * Fully skips the specified number of bytes from the input stream, + * ensuring that all bytes are skipped even if InputStream.skip() skips less. + * + * @param in the input stream to skip bytes from + * @param bytesToSkip the number of bytes to skip + * @throws IOException if an I/O error occurs or the end of stream is reached before skipping all bytes + */ + private void skipFully(InputStream in, long bytesToSkip) throws IOException { + long remaining = bytesToSkip; + while (remaining > 0) { + long skipped = in.skip(remaining); + if (skipped <= 0) { + // If skip returns 0 or less, try to read a byte to move forward + if (in.read() == -1) { + throw new IOException("Unexpected end of stream while skipping"); + } + skipped = 1; } + remaining -= skipped; } } From 252f59ca67c46ef03aca2a4036902040e49d40bd Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Mon, 9 Jun 2025 09:23:21 +0200 Subject: [PATCH 22/33] added indexes for speed up queries --- .../V7.6_2024.08.05__Added_Preview_Tables.sql | 18 ++++++++++++++++++ .../V7.6_2024.08.05__Added_Preview_Tables.sql | 18 ++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/h2/V7.6_2024.08.05__Added_Preview_Tables.sql b/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/h2/V7.6_2024.08.05__Added_Preview_Tables.sql index 068f80f9430a..8ba58e0f20fe 100644 --- a/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/h2/V7.6_2024.08.05__Added_Preview_Tables.sql +++ b/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/h2/V7.6_2024.08.05__Added_Preview_Tables.sql @@ -76,3 +76,21 @@ ALTER TABLE preview2preview ALTER TABLE preview2preview ADD CONSTRAINT preview2preview_child_fk FOREIGN KEY (child_id) REFERENCES previewcontent(previewcontent_id) ON DELETE CASCADE; + +-- =================================================================== +-- PERFORMANCE INDEXES +-- =================================================================== + +-- +-- Index to speed up queries filtering previewcontent by bitstream_id, +-- used in hasPreview() and getPreview() JOIN with bitstream table. +-- +CREATE INDEX idx_previewcontent_bitstream_id +ON previewcontent (bitstream_id); + +-- +-- Index to optimize NOT EXISTS subquery in getPreview(), +-- checking for existence of child_id in preview2preview. +-- +CREATE INDEX idx_preview2preview_child_id +ON preview2preview (child_id); \ No newline at end of file diff --git a/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.6_2024.08.05__Added_Preview_Tables.sql b/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.6_2024.08.05__Added_Preview_Tables.sql index 57919fbfa8e6..7ea6a2b020a6 100644 --- a/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.6_2024.08.05__Added_Preview_Tables.sql +++ b/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.6_2024.08.05__Added_Preview_Tables.sql @@ -86,3 +86,21 @@ ALTER TABLE preview2preview ALTER TABLE preview2preview ADD CONSTRAINT preview2preview_child_fk FOREIGN KEY (child_id) REFERENCES previewcontent(previewcontent_id) ON DELETE CASCADE; + +-- =================================================================== +-- PERFORMANCE INDEXES +-- =================================================================== + +-- +-- Index to speed up queries filtering previewcontent by bitstream_id, +-- used in hasPreview() and getPreview() JOIN with bitstream table. +-- +CREATE INDEX idx_previewcontent_bitstream_id +ON previewcontent (bitstream_id); + +-- +-- Index to optimize NOT EXISTS subquery in getPreview(), +-- checking for existence of child_id in preview2preview. +-- +CREATE INDEX idx_preview2preview_child_id +ON preview2preview (child_id); From 3c408237bd47c57bfc115f9763afa9ec5247f84f Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Mon, 9 Jun 2025 09:44:53 +0200 Subject: [PATCH 23/33] removed unused improt --- .../main/java/org/dspace/content/PreviewContentServiceImpl.java | 1 - 1 file changed, 1 deletion(-) diff --git a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java index b2efc5cc19b0..819e2d2149fd 100644 --- a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java @@ -37,7 +37,6 @@ import org.apache.commons.compress.archivers.tar.TarArchiveEntry; import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; -import org.apache.commons.compress.utils.IOUtils; import org.dspace.app.util.Util; import org.dspace.authorize.AuthorizeException; import org.dspace.authorize.MissingLicenseAgreementException; From e13b55d16e87084c6173c40d26afb43512474ac0 Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Mon, 9 Jun 2025 14:16:08 +0200 Subject: [PATCH 24/33] fix comments, added indexes to separated sql file --- .../content/PreviewContentServiceImpl.java | 3 ++- .../content/service/BitstreamService.java | 4 ++-- .../storage/bitstore/BitStoreService.java | 4 ++-- .../V7.6_2024.08.05__Added_Preview_Tables.sql | 20 +------------------ ...06.09__Added_Indexes_To_Preview_Tables.sql | 17 ++++++++++++++++ .../V7.6_2024.08.05__Added_Preview_Tables.sql | 20 +------------------ ...06.09__Added_Indexes_To_Preview_Tables.sql | 17 ++++++++++++++++ 7 files changed, 42 insertions(+), 43 deletions(-) create mode 100644 dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/h2/V7.6_2025.06.09__Added_Indexes_To_Preview_Tables.sql create mode 100644 dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.6_2025.06.09__Added_Indexes_To_Preview_Tables.sql diff --git a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java index 819e2d2149fd..3fb3d50a831d 100644 --- a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java @@ -71,7 +71,8 @@ public class PreviewContentServiceImpl implements PreviewContentService { // This constant is used to limit the length of the preview content stored in the database to prevent // the database from being overloaded with large amounts of data. private static final int MAX_PREVIEW_COUNT_LENGTH = 2000; - int estimatedFileCount = 200; + // Initial capacity for the list of extracted file paths, set to 200 based on typical archive file counts. + private int estimatedFileCount = 200; // Configured ZIP file preview limit (default: 1000) - if the ZIP file contains more files, it will be truncated @Value("${file.preview.zip.limit.length:1000}") diff --git a/dspace-api/src/main/java/org/dspace/content/service/BitstreamService.java b/dspace-api/src/main/java/org/dspace/content/service/BitstreamService.java index ffbde758dfea..2cf03ea5e4c8 100644 --- a/dspace-api/src/main/java/org/dspace/content/service/BitstreamService.java +++ b/dspace-api/src/main/java/org/dspace/content/service/BitstreamService.java @@ -155,7 +155,7 @@ public Bitstream register(Context context, int assetstore, String bitstreamPath) public void setFormat(Context context, Bitstream bitstream, BitstreamFormat bitstreamFormat) throws SQLException; /** - * Retrieve the contents of the bitstream + * Retrieve the content of the bitstream. * * @param context DSpace context object * @param bitstream DSpace bitstream @@ -168,7 +168,7 @@ public InputStream retrieve(Context context, Bitstream bitstream) throws IOException, SQLException, AuthorizeException; /** - * Retrieve the contents of the bitstream + * Retrieve the content of the bitstream. * * @param context DSpace context object * @param bitstream DSpace bitstream diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/BitStoreService.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/BitStoreService.java index d29be58d6f43..716b60d03f1f 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/BitStoreService.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/BitStoreService.java @@ -37,7 +37,7 @@ public interface BitStoreService { public String generateId(); /** - * Retrieve the bits for bitstream + * Retrieve the content of the given bitstream as a stream. * * @param bitstream DSpace Bitstream object * @return The stream of bits @@ -47,7 +47,7 @@ public interface BitStoreService { public InputStream get(Bitstream bitstream) throws IOException; /** - * Retrieve the bits for bitstream + * Retrieve the content of the given bitstream as a File. * * @param bitstream DSpace Bitstream object * @return The File diff --git a/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/h2/V7.6_2024.08.05__Added_Preview_Tables.sql b/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/h2/V7.6_2024.08.05__Added_Preview_Tables.sql index 8ba58e0f20fe..6b2e1c54b154 100644 --- a/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/h2/V7.6_2024.08.05__Added_Preview_Tables.sql +++ b/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/h2/V7.6_2024.08.05__Added_Preview_Tables.sql @@ -75,22 +75,4 @@ ALTER TABLE preview2preview -- ALTER TABLE preview2preview - ADD CONSTRAINT preview2preview_child_fk FOREIGN KEY (child_id) REFERENCES previewcontent(previewcontent_id) ON DELETE CASCADE; - --- =================================================================== --- PERFORMANCE INDEXES --- =================================================================== - --- --- Index to speed up queries filtering previewcontent by bitstream_id, --- used in hasPreview() and getPreview() JOIN with bitstream table. --- -CREATE INDEX idx_previewcontent_bitstream_id -ON previewcontent (bitstream_id); - --- --- Index to optimize NOT EXISTS subquery in getPreview(), --- checking for existence of child_id in preview2preview. --- -CREATE INDEX idx_preview2preview_child_id -ON preview2preview (child_id); \ No newline at end of file + ADD CONSTRAINT preview2preview_child_fk FOREIGN KEY (child_id) REFERENCES previewcontent(previewcontent_id) ON DELETE CASCADE; \ No newline at end of file diff --git a/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/h2/V7.6_2025.06.09__Added_Indexes_To_Preview_Tables.sql b/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/h2/V7.6_2025.06.09__Added_Indexes_To_Preview_Tables.sql new file mode 100644 index 000000000000..95750d1c605f --- /dev/null +++ b/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/h2/V7.6_2025.06.09__Added_Indexes_To_Preview_Tables.sql @@ -0,0 +1,17 @@ +-- =================================================================== +-- PERFORMANCE INDEXES +-- =================================================================== + +-- +-- Index to speed up queries filtering previewcontent by bitstream_id, +-- used in hasPreview() and getPreview() JOIN with bitstream table. +-- +CREATE INDEX idx_previewcontent_bitstream_id +ON previewcontent (bitstream_id); + +-- +-- Index to optimize NOT EXISTS subquery in getPreview(), +-- checking for existence of child_id in preview2preview. +-- +CREATE INDEX idx_preview2preview_child_id +ON preview2preview (child_id); \ No newline at end of file diff --git a/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.6_2024.08.05__Added_Preview_Tables.sql b/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.6_2024.08.05__Added_Preview_Tables.sql index 7ea6a2b020a6..0243b95445ec 100644 --- a/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.6_2024.08.05__Added_Preview_Tables.sql +++ b/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.6_2024.08.05__Added_Preview_Tables.sql @@ -85,22 +85,4 @@ ALTER TABLE preview2preview -- ALTER TABLE preview2preview - ADD CONSTRAINT preview2preview_child_fk FOREIGN KEY (child_id) REFERENCES previewcontent(previewcontent_id) ON DELETE CASCADE; - --- =================================================================== --- PERFORMANCE INDEXES --- =================================================================== - --- --- Index to speed up queries filtering previewcontent by bitstream_id, --- used in hasPreview() and getPreview() JOIN with bitstream table. --- -CREATE INDEX idx_previewcontent_bitstream_id -ON previewcontent (bitstream_id); - --- --- Index to optimize NOT EXISTS subquery in getPreview(), --- checking for existence of child_id in preview2preview. --- -CREATE INDEX idx_preview2preview_child_id -ON preview2preview (child_id); + ADD CONSTRAINT preview2preview_child_fk FOREIGN KEY (child_id) REFERENCES previewcontent(previewcontent_id) ON DELETE CASCADE; \ No newline at end of file diff --git a/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.6_2025.06.09__Added_Indexes_To_Preview_Tables.sql b/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.6_2025.06.09__Added_Indexes_To_Preview_Tables.sql new file mode 100644 index 000000000000..95750d1c605f --- /dev/null +++ b/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.6_2025.06.09__Added_Indexes_To_Preview_Tables.sql @@ -0,0 +1,17 @@ +-- =================================================================== +-- PERFORMANCE INDEXES +-- =================================================================== + +-- +-- Index to speed up queries filtering previewcontent by bitstream_id, +-- used in hasPreview() and getPreview() JOIN with bitstream table. +-- +CREATE INDEX idx_previewcontent_bitstream_id +ON previewcontent (bitstream_id); + +-- +-- Index to optimize NOT EXISTS subquery in getPreview(), +-- checking for existence of child_id in preview2preview. +-- +CREATE INDEX idx_preview2preview_child_id +ON preview2preview (child_id); \ No newline at end of file From 62693ed329539b864eb2535fbb12f7acaaeb7ad0 Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Mon, 9 Jun 2025 15:09:09 +0200 Subject: [PATCH 25/33] added license header --- .../V7.6_2025.06.09__Added_Indexes_To_Preview_Tables.sql | 8 ++++++++ .../V7.6_2025.06.09__Added_Indexes_To_Preview_Tables.sql | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/h2/V7.6_2025.06.09__Added_Indexes_To_Preview_Tables.sql b/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/h2/V7.6_2025.06.09__Added_Indexes_To_Preview_Tables.sql index 95750d1c605f..b0f95661c0c1 100644 --- a/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/h2/V7.6_2025.06.09__Added_Indexes_To_Preview_Tables.sql +++ b/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/h2/V7.6_2025.06.09__Added_Indexes_To_Preview_Tables.sql @@ -1,3 +1,11 @@ +-- +-- The contents of this file are subject to the license and copyright +-- detailed in the LICENSE and NOTICE files at the root of the source +-- tree and available online at +-- +-- http://www.dspace.org/license/ +-- + -- =================================================================== -- PERFORMANCE INDEXES -- =================================================================== diff --git a/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.6_2025.06.09__Added_Indexes_To_Preview_Tables.sql b/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.6_2025.06.09__Added_Indexes_To_Preview_Tables.sql index 95750d1c605f..b0f95661c0c1 100644 --- a/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.6_2025.06.09__Added_Indexes_To_Preview_Tables.sql +++ b/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.6_2025.06.09__Added_Indexes_To_Preview_Tables.sql @@ -1,3 +1,11 @@ +-- +-- The contents of this file are subject to the license and copyright +-- detailed in the LICENSE and NOTICE files at the root of the source +-- tree and available online at +-- +-- http://www.dspace.org/license/ +-- + -- =================================================================== -- PERFORMANCE INDEXES -- =================================================================== From 6805000e9b630c42c80e2cc3988548e5cb039ee8 Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Tue, 10 Jun 2025 07:14:30 +0200 Subject: [PATCH 26/33] named constant by upper case --- .../java/org/dspace/content/PreviewContentServiceImpl.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java index 3fb3d50a831d..3e28fb4c66cd 100644 --- a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java @@ -72,7 +72,7 @@ public class PreviewContentServiceImpl implements PreviewContentService { // the database from being overloaded with large amounts of data. private static final int MAX_PREVIEW_COUNT_LENGTH = 2000; // Initial capacity for the list of extracted file paths, set to 200 based on typical archive file counts. - private int estimatedFileCount = 200; + private int ESTIMATED_FILE_COUNT = 200; // Configured ZIP file preview limit (default: 1000) - if the ZIP file contains more files, it will be truncated @Value("${file.preview.zip.limit.length:1000}") @@ -448,7 +448,7 @@ private String buildXmlResponse(List filePaths) { * @return an XML string representing the extracted file paths */ private String extractFile(File file, String fileType) throws Exception { - List filePaths = new ArrayList<>(estimatedFileCount); + List filePaths = new ArrayList<>(ESTIMATED_FILE_COUNT); // Process the file based on its type if (ARCHIVE_TYPE_TAR.equals(fileType)) { processTarFile(filePaths, file); From edeeca0200eb9c4c26a03384ca4ec6f7e3659eca Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Tue, 10 Jun 2025 07:15:27 +0200 Subject: [PATCH 27/33] added empty lines --- .../sqlmigration/h2/V7.6_2024.08.05__Added_Preview_Tables.sql | 3 ++- .../postgres/V7.6_2024.08.05__Added_Preview_Tables.sql | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/h2/V7.6_2024.08.05__Added_Preview_Tables.sql b/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/h2/V7.6_2024.08.05__Added_Preview_Tables.sql index 6b2e1c54b154..963d509e0897 100644 --- a/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/h2/V7.6_2024.08.05__Added_Preview_Tables.sql +++ b/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/h2/V7.6_2024.08.05__Added_Preview_Tables.sql @@ -75,4 +75,5 @@ ALTER TABLE preview2preview -- ALTER TABLE preview2preview - ADD CONSTRAINT preview2preview_child_fk FOREIGN KEY (child_id) REFERENCES previewcontent(previewcontent_id) ON DELETE CASCADE; \ No newline at end of file + ADD CONSTRAINT preview2preview_child_fk FOREIGN KEY (child_id) REFERENCES previewcontent(previewcontent_id) ON DELETE CASCADE; + \ No newline at end of file diff --git a/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.6_2024.08.05__Added_Preview_Tables.sql b/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.6_2024.08.05__Added_Preview_Tables.sql index 0243b95445ec..57919fbfa8e6 100644 --- a/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.6_2024.08.05__Added_Preview_Tables.sql +++ b/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.6_2024.08.05__Added_Preview_Tables.sql @@ -85,4 +85,4 @@ ALTER TABLE preview2preview -- ALTER TABLE preview2preview - ADD CONSTRAINT preview2preview_child_fk FOREIGN KEY (child_id) REFERENCES previewcontent(previewcontent_id) ON DELETE CASCADE; \ No newline at end of file + ADD CONSTRAINT preview2preview_child_fk FOREIGN KEY (child_id) REFERENCES previewcontent(previewcontent_id) ON DELETE CASCADE; From 5fbdf5d74f2d7ba0e1be48cdfd958b0ea6d5a1b5 Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Tue, 10 Jun 2025 07:19:34 +0200 Subject: [PATCH 28/33] removed empty lines --- .../sqlmigration/h2/V7.6_2024.08.05__Added_Preview_Tables.sql | 3 +-- .../postgres/V7.6_2024.08.05__Added_Preview_Tables.sql | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/h2/V7.6_2024.08.05__Added_Preview_Tables.sql b/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/h2/V7.6_2024.08.05__Added_Preview_Tables.sql index 963d509e0897..6b2e1c54b154 100644 --- a/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/h2/V7.6_2024.08.05__Added_Preview_Tables.sql +++ b/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/h2/V7.6_2024.08.05__Added_Preview_Tables.sql @@ -75,5 +75,4 @@ ALTER TABLE preview2preview -- ALTER TABLE preview2preview - ADD CONSTRAINT preview2preview_child_fk FOREIGN KEY (child_id) REFERENCES previewcontent(previewcontent_id) ON DELETE CASCADE; - \ No newline at end of file + ADD CONSTRAINT preview2preview_child_fk FOREIGN KEY (child_id) REFERENCES previewcontent(previewcontent_id) ON DELETE CASCADE; \ No newline at end of file diff --git a/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.6_2024.08.05__Added_Preview_Tables.sql b/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.6_2024.08.05__Added_Preview_Tables.sql index 57919fbfa8e6..0243b95445ec 100644 --- a/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.6_2024.08.05__Added_Preview_Tables.sql +++ b/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.6_2024.08.05__Added_Preview_Tables.sql @@ -85,4 +85,4 @@ ALTER TABLE preview2preview -- ALTER TABLE preview2preview - ADD CONSTRAINT preview2preview_child_fk FOREIGN KEY (child_id) REFERENCES previewcontent(previewcontent_id) ON DELETE CASCADE; + ADD CONSTRAINT preview2preview_child_fk FOREIGN KEY (child_id) REFERENCES previewcontent(previewcontent_id) ON DELETE CASCADE; \ No newline at end of file From 06d8f0c9b99b621127baf55c5f66ae292d5116ec Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Tue, 10 Jun 2025 08:57:37 +0200 Subject: [PATCH 29/33] set static attribute to static final --- .../main/java/org/dspace/content/PreviewContentServiceImpl.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java index 3e28fb4c66cd..f61839fe446e 100644 --- a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java @@ -72,7 +72,7 @@ public class PreviewContentServiceImpl implements PreviewContentService { // the database from being overloaded with large amounts of data. private static final int MAX_PREVIEW_COUNT_LENGTH = 2000; // Initial capacity for the list of extracted file paths, set to 200 based on typical archive file counts. - private int ESTIMATED_FILE_COUNT = 200; + private static final int ESTIMATED_FILE_COUNT = 200; // Configured ZIP file preview limit (default: 1000) - if the ZIP file contains more files, it will be truncated @Value("${file.preview.zip.limit.length:1000}") From 95e7dfb5d26828499f83d1afd8c9c9f2d80f84ae Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Tue, 10 Jun 2025 09:53:12 +0200 Subject: [PATCH 30/33] set public method to private, interupt threat in exception --- .../java/org/dspace/content/PreviewContentServiceImpl.java | 4 ++-- .../java/org/dspace/storage/bitstore/S3BitStoreService.java | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java index f61839fe446e..810421410611 100644 --- a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java @@ -383,7 +383,7 @@ private void skipFully(InputStream in, long bytesToSkip) throws IOException { * @param file the ZIP file to read * @throws IOException if the file is invalid or cannot be read */ - public void processZipFile(List filePaths, File file) throws IOException { + private void processZipFile(List filePaths, File file) throws IOException { try (ZipFile zipFile = new ZipFile(file)) { Enumeration entries = zipFile.entries(); while (entries.hasMoreElements()) { @@ -406,8 +406,8 @@ public void processZipFile(List filePaths, File file) throws IOException */ private String buildXmlResponse(List filePaths) { StringWriter stringWriter = new StringWriter(); + XMLOutputFactory factory = XMLOutputFactory.newInstance(); try { - XMLOutputFactory factory = XMLOutputFactory.newInstance(); XMLStreamWriter writer = factory.createXMLStreamWriter(stringWriter); writer.writeStartDocument("UTF-8", "1.0"); diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java index 1d553e5dbd21..85445444dffc 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java @@ -323,6 +323,9 @@ public File getFile(Bitstream bitstream) throws IOException { return tempFile; } catch (AmazonClientException | InterruptedException e) { + if (e instanceof InterruptedException) { + Thread.currentThread().interrupt(); + } log.error("getFile(" + key + ")", e); throw new IOException(e); } From 35222920a94c782ce284ef2d24ebd837779450fe Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Tue, 10 Jun 2025 10:22:16 +0200 Subject: [PATCH 31/33] inicialized fileInfo, refactorization of code based on copilot review --- .../java/org/dspace/content/PreviewContentServiceImpl.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java index 810421410611..676e76cbb874 100644 --- a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java @@ -164,7 +164,7 @@ public boolean canPreview(Context context, Bitstream bitstream, boolean authoriz @Override public List getFilePreviewContent(Context context, Bitstream bitstream) throws Exception { - List fileInfos = null; + List fileInfos = new ArrayList<>(); File file = null; try { @@ -407,8 +407,9 @@ private void processZipFile(List filePaths, File file) throws IOExceptio private String buildXmlResponse(List filePaths) { StringWriter stringWriter = new StringWriter(); XMLOutputFactory factory = XMLOutputFactory.newInstance(); + XMLStreamWriter writer = null; try { - XMLStreamWriter writer = factory.createXMLStreamWriter(stringWriter); + writer = factory.createXMLStreamWriter(stringWriter); writer.writeStartDocument("UTF-8", "1.0"); writer.writeStartElement("root"); @@ -443,7 +444,7 @@ private String buildXmlResponse(List filePaths) { /** * Processes file data based on the specified file type (tar or zip), * and returns an XML representation of the file paths. - * @param file the InputStream containing the file data + * @param file the file data * @param fileType the type of file to extract ("tar" or "zip") * @return an XML string representing the extracted file paths */ From 70668432bc27927c513e380d1a3fd4cbf3b3751f Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Tue, 10 Jun 2025 12:52:04 +0200 Subject: [PATCH 32/33] restore sql files, restore vanilla comments --- .../java/org/dspace/content/service/BitstreamService.java | 4 ++-- .../java/org/dspace/storage/bitstore/BitStoreService.java | 4 ++-- .../sqlmigration/h2/V7.6_2024.08.05__Added_Preview_Tables.sql | 2 +- .../postgres/V7.6_2024.08.05__Added_Preview_Tables.sql | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/content/service/BitstreamService.java b/dspace-api/src/main/java/org/dspace/content/service/BitstreamService.java index 2cf03ea5e4c8..2973dc9a3c1f 100644 --- a/dspace-api/src/main/java/org/dspace/content/service/BitstreamService.java +++ b/dspace-api/src/main/java/org/dspace/content/service/BitstreamService.java @@ -155,7 +155,7 @@ public Bitstream register(Context context, int assetstore, String bitstreamPath) public void setFormat(Context context, Bitstream bitstream, BitstreamFormat bitstreamFormat) throws SQLException; /** - * Retrieve the content of the bitstream. + * Retrieve the contents of the bitstream. * * @param context DSpace context object * @param bitstream DSpace bitstream @@ -168,7 +168,7 @@ public InputStream retrieve(Context context, Bitstream bitstream) throws IOException, SQLException, AuthorizeException; /** - * Retrieve the content of the bitstream. + * Retrieve the contents of the bitstream. * * @param context DSpace context object * @param bitstream DSpace bitstream diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/BitStoreService.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/BitStoreService.java index 716b60d03f1f..d29be58d6f43 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/BitStoreService.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/BitStoreService.java @@ -37,7 +37,7 @@ public interface BitStoreService { public String generateId(); /** - * Retrieve the content of the given bitstream as a stream. + * Retrieve the bits for bitstream * * @param bitstream DSpace Bitstream object * @return The stream of bits @@ -47,7 +47,7 @@ public interface BitStoreService { public InputStream get(Bitstream bitstream) throws IOException; /** - * Retrieve the content of the given bitstream as a File. + * Retrieve the bits for bitstream * * @param bitstream DSpace Bitstream object * @return The File diff --git a/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/h2/V7.6_2024.08.05__Added_Preview_Tables.sql b/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/h2/V7.6_2024.08.05__Added_Preview_Tables.sql index 6b2e1c54b154..068f80f9430a 100644 --- a/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/h2/V7.6_2024.08.05__Added_Preview_Tables.sql +++ b/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/h2/V7.6_2024.08.05__Added_Preview_Tables.sql @@ -75,4 +75,4 @@ ALTER TABLE preview2preview -- ALTER TABLE preview2preview - ADD CONSTRAINT preview2preview_child_fk FOREIGN KEY (child_id) REFERENCES previewcontent(previewcontent_id) ON DELETE CASCADE; \ No newline at end of file + ADD CONSTRAINT preview2preview_child_fk FOREIGN KEY (child_id) REFERENCES previewcontent(previewcontent_id) ON DELETE CASCADE; diff --git a/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.6_2024.08.05__Added_Preview_Tables.sql b/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.6_2024.08.05__Added_Preview_Tables.sql index 0243b95445ec..57919fbfa8e6 100644 --- a/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.6_2024.08.05__Added_Preview_Tables.sql +++ b/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.6_2024.08.05__Added_Preview_Tables.sql @@ -85,4 +85,4 @@ ALTER TABLE preview2preview -- ALTER TABLE preview2preview - ADD CONSTRAINT preview2preview_child_fk FOREIGN KEY (child_id) REFERENCES previewcontent(previewcontent_id) ON DELETE CASCADE; \ No newline at end of file + ADD CONSTRAINT preview2preview_child_fk FOREIGN KEY (child_id) REFERENCES previewcontent(previewcontent_id) ON DELETE CASCADE; From dd26d98518da7f29936aad45e4160e9f6e9eae39 Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Tue, 10 Jun 2025 13:08:47 +0200 Subject: [PATCH 33/33] removed interuption of threat from exception --- .../java/org/dspace/storage/bitstore/S3BitStoreService.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java index 85445444dffc..1d553e5dbd21 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java @@ -323,9 +323,6 @@ public File getFile(Bitstream bitstream) throws IOException { return tempFile; } catch (AmazonClientException | InterruptedException e) { - if (e instanceof InterruptedException) { - Thread.currentThread().interrupt(); - } log.error("getFile(" + key + ")", e); throw new IOException(e); }