diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/parser/__init__.py b/src/parser/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/parser/error.py b/src/parser/error.py new file mode 100644 index 000000000..dc7c5ccd6 --- /dev/null +++ b/src/parser/error.py @@ -0,0 +1,18 @@ +# Copyright (c) 2022 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import List + + +class SPDXParsingError(Exception): + messages: List[str] + + def __init__(self, message): + self.messages = [message] diff --git a/src/parser/json/__init__.py b/src/parser/json/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/parser/json/annotation_parser.py b/src/parser/json/annotation_parser.py new file mode 100644 index 000000000..6e0b38013 --- /dev/null +++ b/src/parser/json/annotation_parser.py @@ -0,0 +1,39 @@ +# Copyright (c) 2022 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Dict, Optional, List + +from src.model.annotation import Annotation, AnnotationType +from src.parser.logger import Logger + + +class AnnotationParser: + logger: Logger + + def __init__(self, logger): + self.logger = logger + + def parse_annotation(self, annotation: Dict, spdx_id: Optional[str] = None) -> Annotation: + try: + spdx_id = annotation.get("SPDXID") or spdx_id + annotation_type = annotation.get("annotationType") + annotator = annotation.get("annotator") + annotation_date = annotation.get("annotationDate") + annotation_comment = annotation.get("annotationComment") + annotation = Annotation(spdx_id, annotation_type, annotator, annotation_date, annotation_comment) + except ValueError as err: + self.logger.append(f'Error while parsing annotation: {err.args[0]}') + return annotation + + def parse_annotations(self, annotations_dict_list: List[Dict], spdx_id: Optional[str] = None) -> List[Annotation]: + annotations_list = [] + for annotation_dict in annotations_dict_list: + annotations_list.append(self.parse_annotation(annotation_dict, spdx_id)) + return annotations_list diff --git a/src/parser/json/creation_info_parser.py b/src/parser/json/creation_info_parser.py new file mode 100644 index 000000000..0a689850c --- /dev/null +++ b/src/parser/json/creation_info_parser.py @@ -0,0 +1,42 @@ +# Copyright (c) 2022 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Dict, Tuple + +from src.model.document import CreationInfo +from src.parser.logger import Logger + + +class CreationInfoParser: + logger: Logger + + def __init__(self, logger: Logger): + self.logger = logger + + def parse_creation_info(self, creation_info_dict: Dict) -> CreationInfo: + creators = creation_info_dict.get("creators") + created = creation_info_dict.get("created") + creation_info = CreationInfo(creators, created) + if "comment" in creation_info_dict: + creation_info.comment = creation_info_dict.get("comment") + + if "licenseListVersion" in creation_info_dict: + creation_info.license_list_version = creation_info_dict.get("licenseListVersion") + + return creation_info + + def parse(self, doc_dict: Dict) -> Tuple[str, str, str, str, CreationInfo]: + spdx_version = doc_dict.get("spdxVersion") + spdx_id = doc_dict.get("SPDXID") + name = doc_dict.get("name") + document_namespace = doc_dict.get("documentNamespace") + creation_info = self.parse_creation_info(doc_dict.get("creationInfo")) + + return spdx_version, spdx_id, name, document_namespace, creation_info diff --git a/src/parser/json/extracted_licensing_parser.py b/src/parser/json/extracted_licensing_parser.py new file mode 100644 index 000000000..8dcb9dcbd --- /dev/null +++ b/src/parser/json/extracted_licensing_parser.py @@ -0,0 +1,32 @@ +# Copyright (c) 2022 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Dict, List + +from src.model.extracted_licensing_info import ExtractedLicensingInfo +from src.parser.logger import Logger + + +class ExtractedLicensingParser: + logger: Logger + + def __init__(self, logger: Logger): + self.logger = logger + + def parse_extracted_licensing_info(self, extracted_licensing_info_dict: Dict) -> ExtractedLicensingInfo: + extracted_licensing_info_dict = ExtractedLicensingInfo() + return extracted_licensing_info_dict + + def parse_extracted_licensing_infos(self, extracted_licensing_info_dicts: List[Dict]) -> List[ExtractedLicensingInfo]: + extracted_licensing_infos_list = [] + for extracted_licensing_info_dict in extracted_licensing_info_dicts: + extracted_licensing_infos_list.append(self.parse_extracted_licensing_info(extracted_licensing_info_dict)) + + return extracted_licensing_infos_list diff --git a/src/parser/json/file_parser.py b/src/parser/json/file_parser.py new file mode 100644 index 000000000..b4d5f73d7 --- /dev/null +++ b/src/parser/json/file_parser.py @@ -0,0 +1,39 @@ +# Copyright (c) 2022 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Dict, List, Optional + +from spdx.file import File +from src.parser.logger import Logger + + +class FileParser: + logger: Logger + + def __init__(self, logger: Logger): + self.logger = logger + + def parse_file(self, file_dict: Dict) -> Optional[File]: + name = file_dict.get("name") + spdx_id = file_dict.get("SPDXID") + try: + file = File(name, spdx_id) + except ValueError as error: + self.logger.append(error.args[0]) + return + return file + + def parse_files(self, file_dict_list) -> List[File]: + file_list = [] + for file_dict in file_dict_list: + file_list.append(self.parse_file(file_dict)) + + return file_list + diff --git a/src/parser/json/json_parser.py b/src/parser/json/json_parser.py new file mode 100644 index 000000000..0f5b27087 --- /dev/null +++ b/src/parser/json/json_parser.py @@ -0,0 +1,70 @@ +# Copyright (c) 2022 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json + +from src.model.document import Document +from src.parser.json.annotation_parser import AnnotationParser +from src.parser.json.creation_info_parser import CreationInfoParser +from src.parser.error import SPDXParsingError +from src.parser.json.extracted_licensing_parser import ExtractedLicensingParser +from src.parser.json.file_parser import FileParser +from src.parser.logger import Logger +from src.parser.json.package_parser import PackageParser +from src.parser.json.relationship_parser import RelationshipParser +from src.parser.json.review_parser import ReviewParser +from src.parser.json.snippet_parser import SnippetParser + + +class JsonParser: + logger: Logger + creation_info_parser: CreationInfoParser + package_parser: PackageParser + file_parser: FileParser + snippet_parser: SnippetParser + extracted_licenses_parser: ExtractedLicensingParser + relationship_parser: RelationshipParser + annotation_parser: AnnotationParser + review_parser: ReviewParser + + def __init__(self): + self.logger = Logger() + self.creation_info_parser = CreationInfoParser(self.logger) + self.package_parser = PackageParser(self.logger) + self.file_parser = FileParser(self.logger) + self.snippet_parser = SnippetParser(self.logger) + self.extracted_licenses_parser = ExtractedLicensingParser(self.logger) + self.relationship_parser = RelationshipParser(self.logger) + self.annotation_parser = AnnotationParser(self.logger) + self.review_parser = ReviewParser(self.logger) + + def parse(self, filename: str) -> Document: + logger = Logger() + with open(filename) as file: + input_doc_as_dict = json.load(file) + spdx_version, spdx_id, name, document_namespace, creation_info = self.creation_info_parser.parse( + input_doc_as_dict) + document: Document = Document(spdx_version, spdx_id, name, document_namespace, creation_info) + + document.packages = self.package_parser.parse_packages(input_doc_as_dict.get("packages")) + document.files = self.file_parser.parse_files(input_doc_as_dict.get("files")) + document.annotations = self.annotation_parser.parse_annotations(input_doc_as_dict.get("annotations"), document.spdx_id) + document.snippets = self.snippet_parser.parse_snippets(input_doc_as_dict.get("snippets")) + document.relationships = self.relationship_parser.parse_relationships(input_doc_as_dict.get("relationships")) + review_to_annotations = self.review_parser.parse_reviews(input_doc_as_dict.get("revieweds")) + for annotation in review_to_annotations: + document.annotations.append(annotation) + + document.extracted_licensing_info = map(self.extracted_licenses_parser.parse_extracted_licensing_info, + input_doc_as_dict.get("hasExtractedLicensingInfo")) + + if logger.has_errors(): + raise SPDXParsingError(logger.get_errors()) + return document diff --git a/src/parser/json/package_parser.py b/src/parser/json/package_parser.py new file mode 100644 index 000000000..f6f38586d --- /dev/null +++ b/src/parser/json/package_parser.py @@ -0,0 +1,34 @@ +# Copyright (c) 2022 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Dict, List + +from src.model.package import Package +from src.parser.logger import Logger + + +class PackageParser: + logger: Logger + + def __init__(self, logger: Logger): + self.logger = logger + + def parse_package(self, package: Dict) -> Package: + name = package.get("name") + + package = Package(name) + return package + + def parse_packages(self, packages_dict_list: List[Dict]) -> List[Package]: + packages_list = [] + for package_dict in packages_dict_list: + packages_list.append(self.parse_package(package_dict)) + return packages_list + diff --git a/src/parser/json/parser_utils.py b/src/parser/json/parser_utils.py new file mode 100644 index 000000000..d1692e3ec --- /dev/null +++ b/src/parser/json/parser_utils.py @@ -0,0 +1,21 @@ +# Copyright (c) 2022 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Dict, Optional, List, Union + +from src.parser.json.snippet_parser import RangeType + + +def set_optional_property(property_name: Union[str, RangeType], parse_object: Dict) -> Optional[str, int, Dict, List, bool]: + if property_name in parse_object: + property_value = parse_object.get(property_name) + return property_value + else: + return None diff --git a/src/parser/json/relationship_parser.py b/src/parser/json/relationship_parser.py new file mode 100644 index 000000000..abe7fb3b2 --- /dev/null +++ b/src/parser/json/relationship_parser.py @@ -0,0 +1,31 @@ +# Copyright (c) 2022 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Dict, List + +from src.model.relationship import Relationship +from src.parser.logger import Logger + + +class RelationshipParser: + logger: Logger + + def __init__(self, logger): + self.logger = logger + + def parse_relationship(self, relationship: Dict) -> Relationship: + relationship = Relationship() + return relationship + + def parse_relationships(self, relationship_dict_list: List[Dict]) -> List[Relationship]: + relationships_list = [] + for relationship_dict in relationship_dict_list: + relationships_list.append(self.parse_relationship(relationship_dict)) + return relationships_list diff --git a/src/parser/json/review_parser.py b/src/parser/json/review_parser.py new file mode 100644 index 000000000..414375768 --- /dev/null +++ b/src/parser/json/review_parser.py @@ -0,0 +1,37 @@ +# Copyright (c) 2022 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Dict, List + +from src.model.annotation import Annotation, AnnotationType +from src.parser.logger import Logger + + +class ReviewParser: # direkt AnnotationParser benutzen mit annotation_type als Argument? + logger: Logger + + def __init__(self, logger: Logger): + self.logger = logger + + def parse_review(self, review: Dict) -> Annotation: + annotation_date = review.get("reviewDate") + annotator = review.get("reviewer") + try: + annotation = Annotation(annotation_type=AnnotationType.REVIEW, annotator=annotator, + annotation_date=annotation_date) + except ValueError as error: + self.logger.append(error.args[0]) + return annotation + + def parse_reviews(self, review_dicts_list: List[Dict]) -> List[Annotation]: + reviews_list = [] + for review_dict in review_dicts_list: + reviews_list.append(self.parse_review(review_dict)) + return reviews_list diff --git a/src/parser/json/snippet_parser.py b/src/parser/json/snippet_parser.py new file mode 100644 index 000000000..968001b59 --- /dev/null +++ b/src/parser/json/snippet_parser.py @@ -0,0 +1,98 @@ +# Copyright (c) 2022 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from enum import auto, Enum +from typing import Dict, Tuple, List + +from src.model.snippet import Snippet +from src.parser.json.annotation_parser import AnnotationParser + +from src.parser.json.parser_utils import set_optional_property +from src.parser.logger import Logger + + +class RangeType(Enum): + BYTE = auto() + LINE = auto() + + +class SnippetParser: + annotation_parser: AnnotationParser + logger: Logger + + def __init__(self, logger: Logger): + self.annotation_parser = AnnotationParser(logger) + self.logger = logger + + def parse_snippets(self, snippet_dicts_list: List[Dict]) -> List[Snippet]: + snippets_list = [] + for snippet_dict in snippet_dicts_list: + snippets_list.append(self.parse_snippet(snippet_dict)) + return snippets_list + + def parse_snippet(self, snippet_dict: Dict) -> Snippet: + spdx_id = snippet_dict.get("SPDXID") + file_spdx_id = snippet_dict.get("snippetFromFile") + name = snippet_dict.get("name") + ranges= self.parse_ranges(snippet_dict.get("ranges")) + snippet = Snippet(spdx_id, file_spdx_id, ranges[RangeType.BYTE], name) + + snippet.line_range = set_optional_property(RangeType.LINE, ranges) + snippet.attribution_texts = set_optional_property("attributionTexts", snippet_dict) + snippet.comment = set_optional_property("comment", snippet_dict) + snippet.copyright_text = set_optional_property("copyrightText", snippet_dict) + snippet.license_comment = set_optional_property("licenseComments", snippet_dict) + snippet.concluded_license = set_optional_property("licenseConcluded", snippet_dict) + snippet.license_info = set_optional_property("licenseInfoInSnippets", snippet_dict) + + if "annotations" in snippet_dict: + self.annotation_parser.parse_annotation(snippet_dict.get("annotations"), snippet.spdx_id) + + return snippet + + def parse_ranges(self, ranges_from_snippet: List[Dict]) -> Dict: + ranges = {} + for range_dict in ranges_from_snippet: + try: + range_type = self.validate_range_and_get_type(range_dict) + start_end_tuple: Tuple[int, int] = SnippetParser.get_start_end_tuple(range_dict, range_type) + ranges[range_type] = start_end_tuple + except ValueError as error: + self.logger.append(error.args[0]) + return ranges + + @staticmethod + def get_start_end_tuple(range_dict: Dict, range_type: RangeType) -> Tuple[int, int]: + end_pointer = range_dict["endPointer"] + start_pointer = range_dict["startPointer"] + if range_type == RangeType.BYTE: + start = int(start_pointer["offset"]) + end = int(end_pointer["offset"]) + else: + start = int(start_pointer["lineNumber"]) + end = int(end_pointer["lineNumber"]) + + return start, end + + def validate_range_and_get_type(self, range_dict: Dict) -> RangeType: + if ("startPointer" not in range_dict) or ("endPointer" not in range_dict): + raise ValueError("Snippet::ranges") + start_pointer_type = self.validate_pointer_and_get_type(range_dict["startPointer"]) + end_pointer_type = self.validate_pointer_and_get_type(range_dict["endPointer"]) + if start_pointer_type != end_pointer_type: + raise ValueError("Snippet::ranges") + return start_pointer_type + + @staticmethod + def validate_pointer_and_get_type(self, pointer: Dict) -> RangeType: + if ("offset" in pointer and "lineNumber" in pointer) or ( + "offset" not in pointer and "lineNumber" not in pointer): + raise ValueError("Snippet::ranges") + return RangeType.BYTE if "offset" in pointer else RangeType.LINE diff --git a/src/parser/logger.py b/src/parser/logger.py new file mode 100644 index 000000000..e126b5ccf --- /dev/null +++ b/src/parser/logger.py @@ -0,0 +1,27 @@ +# Copyright (c) 2022 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import List + + +class Logger: + messages: List[str] + + def __int__(self): + self.messages = [] + + def append(self, message: str): + self.messages.append(message) + + def has_errors(self): + return bool(self.messages) + + def get_errors(self): + return self.messages diff --git a/tests/parser/__init__.py b/tests/parser/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/parser/test_json_parser.py b/tests/parser/test_json_parser.py new file mode 100644 index 000000000..b68cc98a3 --- /dev/null +++ b/tests/parser/test_json_parser.py @@ -0,0 +1,16 @@ +# Copyright (c) 2022 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pytest + + +@pytest.mark.parametrize() +def test_set_optional_property() + assert