diff --git a/dspace-api/src/main/java/org/dspace/content/BitstreamServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/BitstreamServiceImpl.java index 6b6b14a1def1..76c3b3b425ab 100644 --- a/dspace-api/src/main/java/org/dspace/content/BitstreamServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/BitstreamServiceImpl.java @@ -7,6 +7,7 @@ */ package org.dspace.content; +import java.io.File; import java.io.IOException; import java.io.InputStream; import java.sql.SQLException; @@ -315,6 +316,15 @@ public InputStream retrieve(Context context, Bitstream bitstream) return bitstreamStorageService.retrieve(context, bitstream); } + @Override + public File retrieveFile(Context context, Bitstream bitstream, boolean authorization) + throws IOException, SQLException, AuthorizeException { + if (authorization) { + authorizeService.authorizeAction(context, bitstream, Constants.READ); + } + return bitstreamStorageService.retrieveFile(context, bitstream); + } + @Override public boolean isRegisteredBitstream(Bitstream bitstream) { return bitstreamStorageService.isRegisteredBitstream(bitstream.getInternalId()); diff --git a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java index 371d5abf30b7..676e76cbb874 100644 --- a/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/PreviewContentServiceImpl.java @@ -7,30 +7,33 @@ */ package org.dspace.content; +import java.io.BufferedInputStream; import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.io.StringWriter; import java.io.UnsupportedEncodingException; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.InvalidPathException; -import java.nio.file.Path; import java.nio.file.Paths; import java.sql.SQLException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Enumeration; import java.util.Hashtable; -import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.UUID; import java.util.function.Function; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import java.util.zip.ZipEntry; -import java.util.zip.ZipInputStream; +import java.util.zip.ZipFile; +import javax.xml.stream.XMLOutputFactory; +import javax.xml.stream.XMLStreamWriter; import org.apache.commons.compress.archivers.tar.TarArchiveEntry; import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; @@ -68,6 +71,8 @@ public class PreviewContentServiceImpl implements PreviewContentService { // This constant is used to limit the length of the preview content stored in the database to prevent // the database from being overloaded with large amounts of data. private static final int MAX_PREVIEW_COUNT_LENGTH = 2000; + // Initial capacity for the list of extracted file paths, set to 200 based on typical archive file counts. + private static final int ESTIMATED_FILE_COUNT = 200; // Configured ZIP file preview limit (default: 1000) - if the ZIP file contains more files, it will be truncated @Value("${file.preview.zip.limit.length:1000}") @@ -83,7 +88,6 @@ public class PreviewContentServiceImpl implements PreviewContentService { @Autowired BitstreamService bitstreamService; - @Override public PreviewContent create(Context context, Bitstream bitstream, String name, String content, boolean isDirectory, String size, Map subPreviewContents) @@ -124,17 +128,23 @@ public List findByBitstream(Context context, UUID bitstreamId) t } @Override - public List hasPreview(Context context, Bitstream bitstream) throws SQLException { + public boolean hasPreview(Context context, Bitstream bitstream) throws SQLException { return previewContentDAO.hasPreview(context, bitstream); } + @Override + public List getPreview(Context context, Bitstream bitstream) throws SQLException { + return previewContentDAO.getPreview(context, bitstream); + } + @Override public List findAll(Context context) throws SQLException { return previewContentDAO.findAll(context, PreviewContent.class); } @Override - public boolean canPreview(Context context, Bitstream bitstream) throws SQLException, AuthorizeException { + public boolean canPreview(Context context, Bitstream bitstream, boolean authorization) + throws SQLException, AuthorizeException { try { // Check it is allowed by configuration boolean isAllowedByCfg = configurationService.getBooleanProperty("file.preview.enabled", true); @@ -143,7 +153,9 @@ public boolean canPreview(Context context, Bitstream bitstream) throws SQLExcept } // Check it is allowed by license - authorizeService.authorizeAction(context, bitstream, Constants.READ); + if (authorization) { + authorizeService.authorizeAction(context, bitstream, Constants.READ); + } return true; } catch (MissingLicenseAgreementException e) { return false; @@ -151,16 +163,22 @@ public boolean canPreview(Context context, Bitstream bitstream) throws SQLExcept } @Override - public List getFilePreviewContent(Context context, Bitstream bitstream) - throws Exception { - InputStream inputStream = null; - List fileInfos = null; + public List getFilePreviewContent(Context context, Bitstream bitstream) throws Exception { + List fileInfos = new ArrayList<>(); + File file = null; + try { - inputStream = bitstreamService.retrieve(context, bitstream); - } catch (MissingLicenseAgreementException e) { /* Do nothing */ } + file = bitstreamService.retrieveFile(context, bitstream, false); // Retrieve the file - if (Objects.nonNull(inputStream)) { - fileInfos = processInputStreamToFilePreview(context, bitstream, inputStream); + if (Objects.nonNull(file)) { + fileInfos = processFileToFilePreview(context, bitstream, file); + } + } catch (MissingLicenseAgreementException e) { + log.error("Missing license agreement: ", e); + throw e; + } catch (IOException e) { + log.error("IOException during file processing: ", e); + throw e; } return fileInfos; } @@ -187,8 +205,8 @@ public FileInfo createFileInfo(PreviewContent pc) { } @Override - public List processInputStreamToFilePreview(Context context, Bitstream bitstream, - InputStream inputStream) + public List processFileToFilePreview(Context context, Bitstream bitstream, + File file) throws Exception { List fileInfos = new ArrayList<>(); String bitstreamMimeType = bitstream.getFormat(context).getMIMEType(); @@ -198,10 +216,10 @@ public List processInputStreamToFilePreview(Context context, Bitstream "database. This could cause the ZIP file to be previewed as a text file, potentially leading" + " to a database error."); } - String data = getFileContent(inputStream, true); + String data = getFileContent(file, true); fileInfos.add(new FileInfo(data, false)); } else if (bitstreamMimeType.equals("text/html")) { - String data = getFileContent(inputStream, false); + String data = getFileContent(file, false); fileInfos.add(new FileInfo(data, false)); } else { String data = ""; @@ -209,10 +227,8 @@ public List processInputStreamToFilePreview(Context context, Bitstream "application/zip", ARCHIVE_TYPE_ZIP, "application/x-tar", ARCHIVE_TYPE_TAR ); - - String mimeType = bitstream.getFormat(context).getMIMEType(); - if (archiveTypes.containsKey(mimeType)) { - data = extractFile(inputStream, archiveTypes.get(mimeType)); + if (archiveTypes.containsKey(bitstreamMimeType)) { + data = extractFile(file, archiveTypes.get(bitstreamMimeType)); fileInfos = FileTreeViewGenerator.parse(data); } } @@ -295,49 +311,89 @@ private Hashtable createSubMap(Map sourceMap, Funct * @param size the size of the file or directory */ private void addFilePath(List filePaths, String path, long size) { - String fileInfo = ""; try { - Path filePath = Paths.get(path); - boolean isDir = Files.isDirectory(filePath); - fileInfo = (isDir ? path + "/|" : path + "|") + size; + boolean isDir = Files.isDirectory(Paths.get(path)); + StringBuilder sb = new StringBuilder(path.length() + 16); + sb.append(path); + sb.append(isDir ? "/|" : "|"); + sb.append(size); + filePaths.add(sb.toString()); } catch (NullPointerException | InvalidPathException | SecurityException e) { log.error(String.format("Failed to add file path. Path: '%s', Size: %d", path, size), e); } - filePaths.add(fileInfo); } /** * Processes a TAR file, extracting its entries and adding their paths to the provided list. * @param filePaths the list to populate with the extracted file paths - * @param inputStream the TAR file data + * @param file the TAR file data * @throws IOException if an I/O error occurs while reading the TAR file */ - private void processTarFile(List filePaths, InputStream inputStream) throws IOException { - try (TarArchiveInputStream tis = new TarArchiveInputStream(inputStream)) { + private void processTarFile(List filePaths, File file) throws IOException { + try (InputStream fis = new FileInputStream(file); + BufferedInputStream bis = new BufferedInputStream(fis); + // Use the constructor that accepts LongFileMode + TarArchiveInputStream tarInput = new TarArchiveInputStream(bis)) { + + TarArchiveEntry entry; - while ((entry = tis.getNextTarEntry()) != null) { + while ((entry = tarInput.getNextTarEntry()) != null) { + if (filePaths.size() >= maxPreviewCount) { + filePaths.add("... (too many files)"); + break; + } if (!entry.isDirectory()) { - // Add the file path and its size (from the TAR entry) - addFilePath(filePaths, entry.getName(), entry.getSize()); + String name = entry.getName(); + long size = entry.getSize(); + addFilePath(filePaths, name, size); + } + // Fully skip entry content to handle large files correctly + skipFully(tarInput, entry.getSize()); + } + } + } + + /** + * Fully skips the specified number of bytes from the input stream, + * ensuring that all bytes are skipped even if InputStream.skip() skips less. + * + * @param in the input stream to skip bytes from + * @param bytesToSkip the number of bytes to skip + * @throws IOException if an I/O error occurs or the end of stream is reached before skipping all bytes + */ + private void skipFully(InputStream in, long bytesToSkip) throws IOException { + long remaining = bytesToSkip; + while (remaining > 0) { + long skipped = in.skip(remaining); + if (skipped <= 0) { + // If skip returns 0 or less, try to read a byte to move forward + if (in.read() == -1) { + throw new IOException("Unexpected end of stream while skipping"); } + skipped = 1; } + remaining -= skipped; } } /** - * Processes a ZIP file, extracting its entries and adding their paths to the provided list. - * @param filePaths the list to populate with the extracted file paths - * @param inputStream the ZIP file data - * @throws IOException if an I/O error occurs while reading the ZIP file + * Parses a ZIP file and extracts the names and sizes of its entries. + * + * @param filePaths the list to populate with entry names + * @param file the ZIP file to read + * @throws IOException if the file is invalid or cannot be read */ - private void processZipFile(List filePaths, InputStream inputStream) throws IOException { - try (ZipInputStream zipInputStream = new ZipInputStream(inputStream)) { - ZipEntry entry; - while ((entry = zipInputStream.getNextEntry()) != null) { + private void processZipFile(List filePaths, File file) throws IOException { + try (ZipFile zipFile = new ZipFile(file)) { + Enumeration entries = zipFile.entries(); + while (entries.hasMoreElements()) { + if (filePaths.size() >= maxPreviewCount) { + filePaths.add("... (too many files)"); + break; + } + ZipEntry entry = entries.nextElement(); if (!entry.isDirectory()) { - // Add the file path and its size (from the ZIP entry) - long fileSize = entry.getSize(); - addFilePath(filePaths, entry.getName(), fileSize); + addFilePath(filePaths, entry.getName(), entry.getSize()); } } } @@ -349,72 +405,86 @@ private void processZipFile(List filePaths, InputStream inputStream) thr * @return an XML string representation of the file paths */ private String buildXmlResponse(List filePaths) { - // Is a folder regex - String folderRegex = "/|\\d+"; - Pattern pattern = Pattern.compile(folderRegex); - - StringBuilder sb = new StringBuilder(); - sb.append(""); - Iterator iterator = filePaths.iterator(); - int fileCounter = 0; - while (iterator.hasNext() && fileCounter < maxPreviewCount) { - String filePath = iterator.next(); - // Check if the file is a folder - Matcher matcher = pattern.matcher(filePath); - if (!matcher.matches()) { - // It is a file - fileCounter++; + StringWriter stringWriter = new StringWriter(); + XMLOutputFactory factory = XMLOutputFactory.newInstance(); + XMLStreamWriter writer = null; + try { + writer = factory.createXMLStreamWriter(stringWriter); + + writer.writeStartDocument("UTF-8", "1.0"); + writer.writeStartElement("root"); + + int count = 0; + for (String filePath : filePaths) { + if (count >= maxPreviewCount) { + writer.writeStartElement("element"); + writer.writeCharacters("...too many files...|0"); + writer.writeEndElement(); + break; + } + writer.writeStartElement("element"); + writer.writeCharacters(filePath); + writer.writeEndElement(); + count++; } - sb.append("").append(filePath).append(""); - } - if (fileCounter > maxPreviewCount) { - sb.append("...too many files...|0"); + writer.writeEndElement(); // + writer.writeEndDocument(); + writer.flush(); + writer.close(); + + } catch (Exception e) { + log.error("Failed to build XML response", e); + return "Failed to generate preview"; } - sb.append(""); - return sb.toString(); + + return stringWriter.toString(); } /** * Processes file data based on the specified file type (tar or zip), * and returns an XML representation of the file paths. - * @param inputStream the InputStream containing the file data + * @param file the file data * @param fileType the type of file to extract ("tar" or "zip") * @return an XML string representing the extracted file paths */ - private String extractFile(InputStream inputStream, String fileType) throws Exception { - List filePaths = new ArrayList<>(); + private String extractFile(File file, String fileType) throws Exception { + List filePaths = new ArrayList<>(ESTIMATED_FILE_COUNT); // Process the file based on its type if (ARCHIVE_TYPE_TAR.equals(fileType)) { - processTarFile(filePaths, inputStream); + processTarFile(filePaths, file); } else { - processZipFile(filePaths, inputStream); + processZipFile(filePaths, file); } return buildXmlResponse(filePaths); } /** - * Read input stream and return content as String - * @param inputStream to read - * @return content of the inputStream as a String - * @throws IOException + * Read file content and return as String + * @param file the file to read + * @param cutResult whether to limit the content length + * @return content of the file as a String + * @throws IOException if an error occurs reading the file */ - private String getFileContent(InputStream inputStream, boolean cutResult) throws IOException { + private String getFileContent(File file, boolean cutResult) throws IOException { StringBuilder content = new StringBuilder(); - // Generate the preview content in the UTF-8 encoding - BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8)); - try { + + try (BufferedReader reader = new BufferedReader( + new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8))) { + String line; while ((line = reader.readLine()) != null) { + if (cutResult && content.length() > MAX_PREVIEW_COUNT_LENGTH) { + content.append(" . . ."); + break; + } content.append(line).append("\n"); } - } catch (UnsupportedEncodingException e) { - log.error("UnsupportedEncodingException during creating the preview content because: ", e); } catch (IOException e) { log.error("IOException during creating the preview content because: ", e); + throw e; // Optional: rethrow if you want the exception to propagate } - reader.close(); return cutResult ? ensureMaxLength(content.toString()) : content.toString(); } @@ -438,4 +508,4 @@ private static String ensureMaxLength(String input) { return input; } } -} +} \ No newline at end of file diff --git a/dspace-api/src/main/java/org/dspace/content/dao/PreviewContentDAO.java b/dspace-api/src/main/java/org/dspace/content/dao/PreviewContentDAO.java index 3531271ff518..596357b4a9b1 100644 --- a/dspace-api/src/main/java/org/dspace/content/dao/PreviewContentDAO.java +++ b/dspace-api/src/main/java/org/dspace/content/dao/PreviewContentDAO.java @@ -33,6 +33,16 @@ public interface PreviewContentDAO extends GenericDAO { */ List findByBitstream(Context context, UUID bitstreamId) throws SQLException; + /** + * Returns true if the bitstream has associated preview content. + * + * @param context DSpace context + * @param bitstream The bitstream to get bitstream UUID + * @return True if preview content exists, false otherwise + * @throws SQLException If a database error occurs + */ + boolean hasPreview(Context context, Bitstream bitstream) throws SQLException; + /** * Find all preview content based on bitstream that are the root directory. * @@ -41,5 +51,5 @@ public interface PreviewContentDAO extends GenericDAO { * @return List of found preview content * @throws SQLException If a database error occurs */ - List hasPreview(Context context, Bitstream bitstream) throws SQLException; + List getPreview(Context context, Bitstream bitstream) throws SQLException; } diff --git a/dspace-api/src/main/java/org/dspace/content/dao/impl/PreviewContentDAOImpl.java b/dspace-api/src/main/java/org/dspace/content/dao/impl/PreviewContentDAOImpl.java index bd4470d56a2b..2e83994957f2 100644 --- a/dspace-api/src/main/java/org/dspace/content/dao/impl/PreviewContentDAOImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/dao/impl/PreviewContentDAOImpl.java @@ -40,7 +40,16 @@ public List findByBitstream(Context context, UUID bitstreamId) t } @Override - public List hasPreview(Context context, Bitstream bitstream) throws SQLException { + public boolean hasPreview(Context context, Bitstream bitstream) throws SQLException { + Query query = createQuery(context, + "SELECT COUNT(pc) FROM " + PreviewContent.class.getSimpleName() + + " pc WHERE pc.bitstream.id = :bitstream_id"); + query.setParameter("bitstream_id", bitstream.getID()); + return count(query) > 0; + } + + @Override + public List getPreview(Context context, Bitstream bitstream) throws SQLException { // select only data from the previewcontent table whose ID is not a child in the preview2preview table Query query = getHibernateSession(context).createNativeQuery( "SELECT pc.* FROM previewcontent pc " + diff --git a/dspace-api/src/main/java/org/dspace/content/service/BitstreamService.java b/dspace-api/src/main/java/org/dspace/content/service/BitstreamService.java index 8effabf28435..2973dc9a3c1f 100644 --- a/dspace-api/src/main/java/org/dspace/content/service/BitstreamService.java +++ b/dspace-api/src/main/java/org/dspace/content/service/BitstreamService.java @@ -7,6 +7,7 @@ */ package org.dspace.content.service; +import java.io.File; import java.io.IOException; import java.io.InputStream; import java.sql.SQLException; @@ -154,7 +155,7 @@ public Bitstream register(Context context, int assetstore, String bitstreamPath) public void setFormat(Context context, Bitstream bitstream, BitstreamFormat bitstreamFormat) throws SQLException; /** - * Retrieve the contents of the bitstream + * Retrieve the contents of the bitstream. * * @param context DSpace context object * @param bitstream DSpace bitstream @@ -166,6 +167,20 @@ public Bitstream register(Context context, int assetstore, String bitstreamPath) public InputStream retrieve(Context context, Bitstream bitstream) throws IOException, SQLException, AuthorizeException; + /** + * Retrieve the contents of the bitstream. + * + * @param context DSpace context object + * @param bitstream DSpace bitstream + * @param authorization true if authorization is required else false + * @return a File from which the bitstream can be read. + * @throws IOException if IO error + * @throws SQLException if database error + * @throws AuthorizeException if authorization error + */ + public File retrieveFile(Context context, Bitstream bitstream, boolean authorization) + throws IOException, SQLException, AuthorizeException; + /** * Determine if this bitstream is registered (available elsewhere on * filesystem than in assetstore). More about registered items: diff --git a/dspace-api/src/main/java/org/dspace/content/service/PreviewContentService.java b/dspace-api/src/main/java/org/dspace/content/service/PreviewContentService.java index 8a27f61844c7..842112413e8d 100644 --- a/dspace-api/src/main/java/org/dspace/content/service/PreviewContentService.java +++ b/dspace-api/src/main/java/org/dspace/content/service/PreviewContentService.java @@ -7,7 +7,7 @@ */ package org.dspace.content.service; -import java.io.InputStream; +import java.io.File; import java.sql.SQLException; import java.util.List; import java.util.Map; @@ -82,14 +82,25 @@ PreviewContent create(Context context, Bitstream bitstream, String name, String */ List findByBitstream(Context context, UUID bitstream_id) throws SQLException; + /** + * Returns true if the bitstream has associated preview content. + * + * @param context DSpace context + * @param bitstream The bitstream to get bitstream UUID + * @return True if preview content exists, false otherwise + * @throws SQLException If a database error occurs + */ + boolean hasPreview(Context context, Bitstream bitstream) throws SQLException; + /** * Find all preview content based on bitstream that are the root directory. * * @param context DSpace context * @param bitstream The bitstream to get bitstream UUID + * @return List of preview contents * @throws SQLException If a database error occurs */ - List hasPreview(Context context, Bitstream bitstream) throws SQLException; + List getPreview(Context context, Bitstream bitstream) throws SQLException; /** * Find all preview contents from database. @@ -104,9 +115,11 @@ PreviewContent create(Context context, Bitstream bitstream, String name, String * * @param context DSpace context object * @param bitstream check if this bitstream could be previewed + * @param authorization true if authorization is required else false * @return true if the bitstream could be previewed, false otherwise */ - boolean canPreview(Context context, Bitstream bitstream) throws SQLException, AuthorizeException; + boolean canPreview(Context context, Bitstream bitstream, boolean authorization) + throws SQLException, AuthorizeException; /** * Return converted ZIP file content into FileInfo classes. @@ -143,13 +156,13 @@ PreviewContent create(Context context, Bitstream bitstream, String name, String FileInfo createFileInfo(PreviewContent pc); /** - * Convert InputStream of the ZIP file into FileInfo classes. + * Convert File of the ZIP file into FileInfo classes. * * @param context DSpace context object * @param bitstream previewing bitstream - * @param inputStream content of the zip file + * @param file content of the zip file * @return List of FileInfo classes where is wrapped ZIP file content */ - List processInputStreamToFilePreview(Context context, Bitstream bitstream, InputStream inputStream) + List processFileToFilePreview(Context context, Bitstream bitstream, File file) throws Exception; } diff --git a/dspace-api/src/main/java/org/dspace/scripts/filepreview/FilePreview.java b/dspace-api/src/main/java/org/dspace/scripts/filepreview/FilePreview.java index a35a2ac6f035..ba0b0ef0bf12 100644 --- a/dspace-api/src/main/java/org/dspace/scripts/filepreview/FilePreview.java +++ b/dspace-api/src/main/java/org/dspace/scripts/filepreview/FilePreview.java @@ -22,7 +22,6 @@ import org.dspace.content.Bitstream; import org.dspace.content.Bundle; import org.dspace.content.Item; -import org.dspace.content.PreviewContent; import org.dspace.content.factory.ContentServiceFactory; import org.dspace.content.service.ItemService; import org.dspace.content.service.PreviewContentService; @@ -147,21 +146,20 @@ private void generateItemFilePreviews(Context context, UUID itemUUID) throws Exc for (Bundle bundle : bundles) { List bitstreams = bundle.getBitstreams(); for (Bitstream bitstream : bitstreams) { - boolean canPreview = previewContentService.canPreview(context, bitstream); + boolean canPreview = previewContentService.canPreview(context, bitstream, false); if (!canPreview) { - return; + continue; } - List prContents = previewContentService.hasPreview(context, bitstream); // Generate new content if we didn't find any - if (!prContents.isEmpty()) { - return; + if (previewContentService.hasPreview(context, bitstream)) { + continue; } List fileInfos = previewContentService.getFilePreviewContent(context, bitstream); // Do not store HTML content in the database because it could be longer than the limit // of the database column if (StringUtils.equals("text/html", bitstream.getFormat(context).getMIMEType())) { - return; + continue; } for (FileInfo fi : fileInfos) { diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/BitStoreService.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/BitStoreService.java index 5a02ad1d5617..d29be58d6f43 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/BitStoreService.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/BitStoreService.java @@ -7,6 +7,7 @@ */ package org.dspace.storage.bitstore; +import java.io.File; import java.io.IOException; import java.io.InputStream; import java.util.List; @@ -45,6 +46,16 @@ public interface BitStoreService { */ public InputStream get(Bitstream bitstream) throws IOException; + /** + * Retrieve the bits for bitstream + * + * @param bitstream DSpace Bitstream object + * @return The File + * @throws java.io.IOException If a problem occurs while retrieving the bits, or if no + * asset with ID exists in the store + */ + public File getFile(Bitstream bitstream) throws IOException; + /** * Store a stream of bits. * diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java index 3539496b1466..85da914644df 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java @@ -7,6 +7,7 @@ */ package org.dspace.storage.bitstore; +import java.io.File; import java.io.IOException; import java.io.InputStream; import java.sql.SQLException; @@ -214,6 +215,13 @@ public InputStream retrieve(Context context, Bitstream bitstream) return this.getStore(storeNumber).get(bitstream); } + @Override + public File retrieveFile(Context context, Bitstream bitstream) + throws IOException { + Integer storeNumber = bitstream.getStoreNumber(); + return this.getStore(storeNumber).getFile(bitstream); + } + @Override public void cleanup(boolean deleteDbRecords, boolean verbose) throws SQLException, IOException, AuthorizeException { Context context = new Context(Context.Mode.BATCH_EDIT); diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/DSBitStoreService.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/DSBitStoreService.java index 6fef7365e482..52154ed7e365 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/DSBitStoreService.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/DSBitStoreService.java @@ -208,7 +208,7 @@ private synchronized static void deleteParents(File file) { * @return The corresponding file in the file system, or null * @throws IOException If a problem occurs while determining the file */ - protected File getFile(Bitstream bitstream) throws IOException { + public File getFile(Bitstream bitstream) throws IOException { // Check that bitstream is not null if (bitstream == null) { return null; diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java index c621aa6efce9..1d553e5dbd21 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java @@ -305,6 +305,29 @@ public InputStream get(Bitstream bitstream) throws IOException { } } + @Override + public File getFile(Bitstream bitstream) throws IOException { + String key = getFullKey(bitstream.getInternalId()); + // Strip -R from bitstream key if it's registered + if (isRegisteredBitstream(key)) { + key = key.substring(REGISTERED_FLAG.length()); + } + try { + File tempFile = File.createTempFile("s3-disk-copy-" + UUID.randomUUID(), "temp"); + tempFile.deleteOnExit(); + + GetObjectRequest getObjectRequest = new GetObjectRequest(bucketName, key); + + Download download = tm.download(getObjectRequest, tempFile); + download.waitForCompletion(); + + return tempFile; + } catch (AmazonClientException | InterruptedException e) { + log.error("getFile(" + key + ")", e); + throw new IOException(e); + } + } + /** * Store a stream of bits. * diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/service/BitstreamStorageService.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/service/BitstreamStorageService.java index 7f5ed8f9129f..d98be808d8d6 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/service/BitstreamStorageService.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/service/BitstreamStorageService.java @@ -7,6 +7,7 @@ */ package org.dspace.storage.bitstore.service; +import java.io.File; import java.io.IOException; import java.io.InputStream; import java.sql.SQLException; @@ -126,6 +127,18 @@ public UUID register(Context context, Bitstream bitstream, int assetstore, Strin public InputStream retrieve(Context context, Bitstream bitstream) throws SQLException, IOException; + /** + * Retrieve the file of the bitstream with ID. If the bitstream does not + * exist, or is marked deleted, returns null. + * + * @param context The current context + * @param bitstream The bitstream to retrieve + * @return The file, or null + * @throws IOException If a problem occurs while retrieving the bits + * @throws SQLException If a problem occurs accessing the RDBMS + */ + public File retrieveFile(Context context, Bitstream bitstream) throws SQLException, IOException; + /** * Clean up the bitstream storage area. This method deletes any bitstreams * which are more than 1 hour old and marked deleted. The deletions cannot diff --git a/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/h2/V7.6_2025.06.09__Added_Indexes_To_Preview_Tables.sql b/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/h2/V7.6_2025.06.09__Added_Indexes_To_Preview_Tables.sql new file mode 100644 index 000000000000..b0f95661c0c1 --- /dev/null +++ b/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/h2/V7.6_2025.06.09__Added_Indexes_To_Preview_Tables.sql @@ -0,0 +1,25 @@ +-- +-- The contents of this file are subject to the license and copyright +-- detailed in the LICENSE and NOTICE files at the root of the source +-- tree and available online at +-- +-- http://www.dspace.org/license/ +-- + +-- =================================================================== +-- PERFORMANCE INDEXES +-- =================================================================== + +-- +-- Index to speed up queries filtering previewcontent by bitstream_id, +-- used in hasPreview() and getPreview() JOIN with bitstream table. +-- +CREATE INDEX idx_previewcontent_bitstream_id +ON previewcontent (bitstream_id); + +-- +-- Index to optimize NOT EXISTS subquery in getPreview(), +-- checking for existence of child_id in preview2preview. +-- +CREATE INDEX idx_preview2preview_child_id +ON preview2preview (child_id); \ No newline at end of file diff --git a/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.6_2025.06.09__Added_Indexes_To_Preview_Tables.sql b/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.6_2025.06.09__Added_Indexes_To_Preview_Tables.sql new file mode 100644 index 000000000000..b0f95661c0c1 --- /dev/null +++ b/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.6_2025.06.09__Added_Indexes_To_Preview_Tables.sql @@ -0,0 +1,25 @@ +-- +-- The contents of this file are subject to the license and copyright +-- detailed in the LICENSE and NOTICE files at the root of the source +-- tree and available online at +-- +-- http://www.dspace.org/license/ +-- + +-- =================================================================== +-- PERFORMANCE INDEXES +-- =================================================================== + +-- +-- Index to speed up queries filtering previewcontent by bitstream_id, +-- used in hasPreview() and getPreview() JOIN with bitstream table. +-- +CREATE INDEX idx_previewcontent_bitstream_id +ON previewcontent (bitstream_id); + +-- +-- Index to optimize NOT EXISTS subquery in getPreview(), +-- checking for existence of child_id in preview2preview. +-- +CREATE INDEX idx_preview2preview_child_id +ON preview2preview (child_id); \ No newline at end of file diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/MetadataBitstreamRestRepository.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/MetadataBitstreamRestRepository.java index 87f7b0c40140..3056dd8e12ed 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/MetadataBitstreamRestRepository.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/MetadataBitstreamRestRepository.java @@ -107,15 +107,14 @@ public Page findByHandle(@Parameter(value = "handl for (Bitstream bitstream : bitstreams) { String url = previewContentService.composePreviewURL(context, item, bitstream, contextPath); List fileInfos = new ArrayList<>(); - boolean canPreview = previewContentService.canPreview(context, bitstream); + boolean canPreview = previewContentService.canPreview(context, bitstream, false); String mimeType = bitstream.getFormat(context).getMIMEType(); // HTML content could be longer than the limit, so we do not store it in the DB. // It has to be generated even if property is false. if (StringUtils.equals(mimeType, TEXT_HTML_MIME_TYPE) || canPreview) { try { - List prContents = previewContentService.hasPreview(context, bitstream); // Generate new content if we didn't find any - if (prContents.isEmpty()) { + if (!previewContentService.hasPreview(context, bitstream)) { boolean allowComposePreviewContent = configurationService.getBooleanProperty ("create.file-preview.on-item-page-load", false); if (allowComposePreviewContent) { @@ -131,6 +130,7 @@ public Page findByHandle(@Parameter(value = "handl } } } else { + List prContents = previewContentService.getPreview(context, bitstream); for (PreviewContent pc : prContents) { fileInfos.add(previewContentService.createFileInfo(pc)); } diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/PreviewContentServiceImplIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/PreviewContentServiceImplIT.java index 48bf84e010a4..c79b1c87f31f 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/PreviewContentServiceImplIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/PreviewContentServiceImplIT.java @@ -125,7 +125,7 @@ public void testFindByBitstream() throws Exception { @Test public void testFindRootByBitstream() throws Exception { List previewContentList = - previewContentService.hasPreview(context, bitstream1); + previewContentService.getPreview(context, bitstream1); Assert.assertEquals(previewContentList.size(), 1); Assert.assertEquals(previewContent1.getID(), previewContentList.get(0).getID()); }