From 00174c94381f81bd77e6b5feeb55c7012dfe6551 Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Fri, 25 Nov 2022 10:29:08 +0100 Subject: [PATCH 1/4] WIP: [issue-305] add parser Signed-off-by: Meret Behrens --- src/__init__.py | 0 src/parser/__init__.py | 0 src/parser/annotationparser.py | 33 ++++++++++++ src/parser/creationinfoparser.py | 42 +++++++++++++++ src/parser/error.py | 22 ++++++++ src/parser/extractedlicensingparser.py | 25 +++++++++ src/parser/fileparser.py | 25 +++++++++ src/parser/jsonparser.py | 71 ++++++++++++++++++++++++++ src/parser/logger.py | 27 ++++++++++ src/parser/packageparser.py | 25 +++++++++ src/parser/relationshipparser.py | 25 +++++++++ src/parser/reviewparser.py | 29 +++++++++++ src/parser/snippetparser.py | 60 ++++++++++++++++++++++ 13 files changed, 384 insertions(+) create mode 100644 src/__init__.py create mode 100644 src/parser/__init__.py create mode 100644 src/parser/annotationparser.py create mode 100644 src/parser/creationinfoparser.py create mode 100644 src/parser/error.py create mode 100644 src/parser/extractedlicensingparser.py create mode 100644 src/parser/fileparser.py create mode 100644 src/parser/jsonparser.py create mode 100644 src/parser/logger.py create mode 100644 src/parser/packageparser.py create mode 100644 src/parser/relationshipparser.py create mode 100644 src/parser/reviewparser.py create mode 100644 src/parser/snippetparser.py diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/parser/__init__.py b/src/parser/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/parser/annotationparser.py b/src/parser/annotationparser.py new file mode 100644 index 000000000..86f8c349c --- /dev/null +++ b/src/parser/annotationparser.py @@ -0,0 +1,33 @@ +# Copyright (c) 2022 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Dict, Optional + +from src.model.annotation import Annotation, AnnotationType +from src.parser.logger import Logger + + +class AnnotationParser: + logger: Logger + + def __init__(self, logger): + self.logger = logger + + def parse(self, annotation: Dict, spdx_id: Optional[str]) -> Annotation: + try: + spdx_id = annotation.get("SPDXID") or spdx_id + annotation_type = annotation.get("annotationType") + annotator = annotation.get("annotator") + annotation_date = annotation.get("annotationDate") + annotation_comment = annotation.get("annotationComment") + annotation = Annotation(spdx_id, annotation_type, annotator, annotation_date, annotation_comment) + except TypeError: + self.logger.append('Error while parsing annotation') + return annotation diff --git a/src/parser/creationinfoparser.py b/src/parser/creationinfoparser.py new file mode 100644 index 000000000..0a689850c --- /dev/null +++ b/src/parser/creationinfoparser.py @@ -0,0 +1,42 @@ +# Copyright (c) 2022 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Dict, Tuple + +from src.model.document import CreationInfo +from src.parser.logger import Logger + + +class CreationInfoParser: + logger: Logger + + def __init__(self, logger: Logger): + self.logger = logger + + def parse_creation_info(self, creation_info_dict: Dict) -> CreationInfo: + creators = creation_info_dict.get("creators") + created = creation_info_dict.get("created") + creation_info = CreationInfo(creators, created) + if "comment" in creation_info_dict: + creation_info.comment = creation_info_dict.get("comment") + + if "licenseListVersion" in creation_info_dict: + creation_info.license_list_version = creation_info_dict.get("licenseListVersion") + + return creation_info + + def parse(self, doc_dict: Dict) -> Tuple[str, str, str, str, CreationInfo]: + spdx_version = doc_dict.get("spdxVersion") + spdx_id = doc_dict.get("SPDXID") + name = doc_dict.get("name") + document_namespace = doc_dict.get("documentNamespace") + creation_info = self.parse_creation_info(doc_dict.get("creationInfo")) + + return spdx_version, spdx_id, name, document_namespace, creation_info diff --git a/src/parser/error.py b/src/parser/error.py new file mode 100644 index 000000000..ac192eabd --- /dev/null +++ b/src/parser/error.py @@ -0,0 +1,22 @@ +# Copyright (c) 2022 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import List + + +class SPDXError(Exception): + pass + + +class SPDXParsingError(SPDXError): + message: List[str] + + def __init__(self, message): + self.message = message diff --git a/src/parser/extractedlicensingparser.py b/src/parser/extractedlicensingparser.py new file mode 100644 index 000000000..839d80449 --- /dev/null +++ b/src/parser/extractedlicensingparser.py @@ -0,0 +1,25 @@ +# Copyright (c) 2022 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Dict + +from src.model.extracted_licensing_info import ExtractedLicensingInfo +from src.parser.logger import Logger + + +class ExtractedLicensingParser: + logger: Logger + + def __init__(self, logger: Logger): + self.logger = logger + + def parse(self, extracted_licensing_info: Dict) -> ExtractedLicensingInfo: + extracted_licensing_info = ExtractedLicensingInfo() + return extracted_licensing_info diff --git a/src/parser/fileparser.py b/src/parser/fileparser.py new file mode 100644 index 000000000..38124c05d --- /dev/null +++ b/src/parser/fileparser.py @@ -0,0 +1,25 @@ +# Copyright (c) 2022 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Dict + +from spdx.file import File +from src.parser.logger import Logger + + +class FileParser: + logger: Logger + + def __init__(self, logger: Logger): + self.logger = logger + + def parse(self, file: Dict) -> File: + file = File() + return file diff --git a/src/parser/jsonparser.py b/src/parser/jsonparser.py new file mode 100644 index 000000000..d33e130cf --- /dev/null +++ b/src/parser/jsonparser.py @@ -0,0 +1,71 @@ +# Copyright (c) 2022 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json + +from src.model.document import Document +from src.parser.annotationparser import AnnotationParser +from src.parser.creationinfoparser import CreationInfoParser +from src.parser.error import SPDXParsingError +from src.parser.extractedlicensingparser import ExtractedLicensingParser +from src.parser.fileparser import FileParser +from src.parser.logger import Logger +from src.parser.packageparser import PackageParser +from src.parser.relationshipparser import RelationshipParser +from src.parser.reviewparser import ReviewParser +from src.parser.snippetparser import SnippetParser + + +class JsonParser: + logger: Logger + creation_info_parser: CreationInfoParser + package_parser: PackageParser + file_parser: FileParser + snippet_parser: SnippetParser + extracted_licenses_parser: ExtractedLicensingParser + relationship_parser: RelationshipParser + annotation_parser: AnnotationParser + review_parser: ReviewParser + + def __init__(self): + self.logger = Logger() + self.creation_info_parser = CreationInfoParser(self.logger) + self.package_parser = PackageParser(self.logger) + self.file_parser = FileParser(self.logger) + self.snippet_parser = SnippetParser(self.logger) + self.extracted_licenses_parser = ExtractedLicensingParser(self.logger) + self.relationship_parser = RelationshipParser(self.logger) + self.annotation_parser = AnnotationParser(self.logger) + self.review_parser = ReviewParser(self.logger) + + def parse(self, filename: str) -> Document: + logger = Logger() + with open(filename) as file: + input_doc_as_dict = json.load(file) + spdx_version, spdx_id, name, document_namespace, creation_info = self.creation_info_parser.parse( + input_doc_as_dict) + document: Document = Document(spdx_version, spdx_id, name, document_namespace, creation_info) + + document.packages = list(map(self.package_parser.parse, input_doc_as_dict.get("packages"))) + document.files = list(map(self.file_parser.parse, input_doc_as_dict.get("files"))) + document.annotations = list(map(self.annotation_parser.parse, input_doc_as_dict.get("annotations"), document.spdx_id)) + document.snippets = list(map(self.snippet_parser.parse, input_doc_as_dict.get("snippets"))) + document.relationships = list(map(self.relationship_parser.parse, input_doc_as_dict.get("relationships"))) + + review_to_annotations = list(map(self.review_parser.parse, input_doc_as_dict.get("revieweds"))) + for annotation in review_to_annotations: + document.annotations.append(annotation) + + document.extracted_licensing_info = map(self.extracted_licenses_parser.parse, + input_doc_as_dict.get("hasExtractedLicensingInfo")) + + if logger.has_errors(): + raise SPDXParsingError(logger.get_errors()) + return document diff --git a/src/parser/logger.py b/src/parser/logger.py new file mode 100644 index 000000000..e126b5ccf --- /dev/null +++ b/src/parser/logger.py @@ -0,0 +1,27 @@ +# Copyright (c) 2022 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import List + + +class Logger: + messages: List[str] + + def __int__(self): + self.messages = [] + + def append(self, message: str): + self.messages.append(message) + + def has_errors(self): + return bool(self.messages) + + def get_errors(self): + return self.messages diff --git a/src/parser/packageparser.py b/src/parser/packageparser.py new file mode 100644 index 000000000..ad667d775 --- /dev/null +++ b/src/parser/packageparser.py @@ -0,0 +1,25 @@ +# Copyright (c) 2022 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Dict + +from src.model.package import Package +from src.parser.logger import Logger + + +class PackageParser: + logger: Logger + + def __init__(self, logger: Logger): + self.logger = logger + + def parse(self, package: Dict) -> Package: + package = Package() + return package diff --git a/src/parser/relationshipparser.py b/src/parser/relationshipparser.py new file mode 100644 index 000000000..06b652a2b --- /dev/null +++ b/src/parser/relationshipparser.py @@ -0,0 +1,25 @@ +# Copyright (c) 2022 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Dict + +from src.model.relationship import Relationship +from src.parser.logger import Logger + + +class RelationshipParser: + logger: Logger + + def __init__(self, logger): + self.logger = logger + + def parse(self, relationship: Dict) -> Relationship: + relationship = Relationship() + return relationship diff --git a/src/parser/reviewparser.py b/src/parser/reviewparser.py new file mode 100644 index 000000000..e7b8c93d8 --- /dev/null +++ b/src/parser/reviewparser.py @@ -0,0 +1,29 @@ +# Copyright (c) 2022 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Dict + +from src.model.annotation import Annotation, AnnotationType +from src.parser.logger import Logger + + +class ReviewParser: # direkt AnnotationParser benutzen mit annotation_type als Argument? + logger: Logger + + def __init__(self, logger: Logger): + self.logger = logger + + def parse(self, review: Dict) -> Annotation: + annotation_date = review.get("reviewDate") + annotator = review.get("reviewer") + + annotation = Annotation(annotation_type=AnnotationType.REVIEW, annotator=annotator, + annotation_date=annotation_date) + return annotation diff --git a/src/parser/snippetparser.py b/src/parser/snippetparser.py new file mode 100644 index 000000000..07bb244da --- /dev/null +++ b/src/parser/snippetparser.py @@ -0,0 +1,60 @@ +# Copyright (c) 2022 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Dict, Tuple + +from src.model.snippet import Snippet +from src.parser.annotationparser import AnnotationParser +from src.parser.logger import Logger + + +class SnippetParser: + annotation_parser: AnnotationParser + logger: Logger + + def __init__(self, logger: Logger): + self.annotation_parser = AnnotationParser() + self.logger = logger + + def parse_ranges(self, ranges: Dict) -> Tuple: + byte_range = () + line_range = () + return byte_range, line_range + + def parse(self, snippet_dict: Dict) -> Snippet: + spdx_id = snippet_dict.get("SPDXID") + file_spdx_id = snippet_dict.get("snippetFromFile") + name = snippet_dict.get("name") + byte_range, line_range = self.parse_ranges(snippet_dict.get("ranges")) + snippet = Snippet(spdx_id, file_spdx_id, byte_range, name) + if line_range: + snippet.line_range = line_range + if "attributionTexts" in snippet_dict: + attribution_texts = [attribution_text for attribution_text in snippet_dict.get("attributionTexts")] + snippet.attribution_texts = attribution_texts + if "comment" in snippet_dict: + comment = snippet_dict.get("comment") + snippet.comment = comment + if "copyrightText" in snippet_dict: + copyright_text = snippet_dict.get("copyrightText") + snippet.copyright_text = copyright_text + if "license_comments" in snippet_dict: + license_comment = snippet_dict.get("licenseComments") + snippet.license_comment = license_comment + if "licenseConcluded" in snippet_dict: #LicenseExpression? + concluded_license = snippet_dict.get("licenseConcluded") + snippet.concluded_license = concluded_license + if "licenInfoInSnippets" in snippet_dict: #LicenseExpression? + license_infos = [license_info for license_info in snippet_dict.get("licenseInfoInSnippets")] + snippet.license_info = license_infos + if "annotations" in snippet_dict: + self.annotation_parser.parse(snippet_dict.get("annotations"), snippet.spdx_id) + + return snippet From 306a31354671d4f25ca7586fe789d7f4eb76486e Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Mon, 28 Nov 2022 11:28:24 +0100 Subject: [PATCH 2/4] refactor parser module Signed-off-by: Meret Behrens --- src/parser/error.py | 10 +- src/parser/json/__init__.py | 0 .../annotation_parser.py} | 10 +- .../creation_info_parser.py} | 0 .../extracted_licensing_parser.py} | 15 ++- src/parser/json/file_parser.py | 39 ++++++++ .../{jsonparser.py => json/json_parser.py} | 31 +++--- .../package_parser.py} | 15 ++- .../{fileparser.py => json/parser_utils.py} | 20 ++-- .../relationship_parser.py} | 10 +- .../review_parser.py} | 16 ++- src/parser/json/snippet_parser.py | 98 +++++++++++++++++++ src/parser/snippetparser.py | 60 ------------ 13 files changed, 214 insertions(+), 110 deletions(-) create mode 100644 src/parser/json/__init__.py rename src/parser/{annotationparser.py => json/annotation_parser.py} (74%) rename src/parser/{creationinfoparser.py => json/creation_info_parser.py} (100%) rename src/parser/{extractedlicensingparser.py => json/extracted_licensing_parser.py} (55%) create mode 100644 src/parser/json/file_parser.py rename src/parser/{jsonparser.py => json/json_parser.py} (67%) rename src/parser/{packageparser.py => json/package_parser.py} (66%) rename src/parser/{fileparser.py => json/parser_utils.py} (60%) rename src/parser/{relationshipparser.py => json/relationship_parser.py} (67%) rename src/parser/{reviewparser.py => json/review_parser.py} (66%) create mode 100644 src/parser/json/snippet_parser.py delete mode 100644 src/parser/snippetparser.py diff --git a/src/parser/error.py b/src/parser/error.py index ac192eabd..dc7c5ccd6 100644 --- a/src/parser/error.py +++ b/src/parser/error.py @@ -11,12 +11,8 @@ from typing import List -class SPDXError(Exception): - pass - - -class SPDXParsingError(SPDXError): - message: List[str] +class SPDXParsingError(Exception): + messages: List[str] def __init__(self, message): - self.message = message + self.messages = [message] diff --git a/src/parser/json/__init__.py b/src/parser/json/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/parser/annotationparser.py b/src/parser/json/annotation_parser.py similarity index 74% rename from src/parser/annotationparser.py rename to src/parser/json/annotation_parser.py index 86f8c349c..dbe026f6f 100644 --- a/src/parser/annotationparser.py +++ b/src/parser/json/annotation_parser.py @@ -8,7 +8,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, Optional +from typing import Dict, Optional, List from src.model.annotation import Annotation, AnnotationType from src.parser.logger import Logger @@ -20,7 +20,7 @@ class AnnotationParser: def __init__(self, logger): self.logger = logger - def parse(self, annotation: Dict, spdx_id: Optional[str]) -> Annotation: + def parse_annotation(self, annotation: Dict, spdx_id: Optional[str] = None) -> Annotation: try: spdx_id = annotation.get("SPDXID") or spdx_id annotation_type = annotation.get("annotationType") @@ -31,3 +31,9 @@ def parse(self, annotation: Dict, spdx_id: Optional[str]) -> Annotation: except TypeError: self.logger.append('Error while parsing annotation') return annotation + + def parse_annotations(self, annotations_dict_list: List[Dict], spdx_id: Optional[str] = None) -> List[Annotation]: + annotations_list = [] + for annotation_dict in annotations_dict_list: + annotations_list.append(self.parse_annotation(annotation_dict, spdx_id)) + return annotations_list diff --git a/src/parser/creationinfoparser.py b/src/parser/json/creation_info_parser.py similarity index 100% rename from src/parser/creationinfoparser.py rename to src/parser/json/creation_info_parser.py diff --git a/src/parser/extractedlicensingparser.py b/src/parser/json/extracted_licensing_parser.py similarity index 55% rename from src/parser/extractedlicensingparser.py rename to src/parser/json/extracted_licensing_parser.py index 839d80449..8dcb9dcbd 100644 --- a/src/parser/extractedlicensingparser.py +++ b/src/parser/json/extracted_licensing_parser.py @@ -8,7 +8,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict +from typing import Dict, List from src.model.extracted_licensing_info import ExtractedLicensingInfo from src.parser.logger import Logger @@ -20,6 +20,13 @@ class ExtractedLicensingParser: def __init__(self, logger: Logger): self.logger = logger - def parse(self, extracted_licensing_info: Dict) -> ExtractedLicensingInfo: - extracted_licensing_info = ExtractedLicensingInfo() - return extracted_licensing_info + def parse_extracted_licensing_info(self, extracted_licensing_info_dict: Dict) -> ExtractedLicensingInfo: + extracted_licensing_info_dict = ExtractedLicensingInfo() + return extracted_licensing_info_dict + + def parse_extracted_licensing_infos(self, extracted_licensing_info_dicts: List[Dict]) -> List[ExtractedLicensingInfo]: + extracted_licensing_infos_list = [] + for extracted_licensing_info_dict in extracted_licensing_info_dicts: + extracted_licensing_infos_list.append(self.parse_extracted_licensing_info(extracted_licensing_info_dict)) + + return extracted_licensing_infos_list diff --git a/src/parser/json/file_parser.py b/src/parser/json/file_parser.py new file mode 100644 index 000000000..b4d5f73d7 --- /dev/null +++ b/src/parser/json/file_parser.py @@ -0,0 +1,39 @@ +# Copyright (c) 2022 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Dict, List, Optional + +from spdx.file import File +from src.parser.logger import Logger + + +class FileParser: + logger: Logger + + def __init__(self, logger: Logger): + self.logger = logger + + def parse_file(self, file_dict: Dict) -> Optional[File]: + name = file_dict.get("name") + spdx_id = file_dict.get("SPDXID") + try: + file = File(name, spdx_id) + except ValueError as error: + self.logger.append(error.args[0]) + return + return file + + def parse_files(self, file_dict_list) -> List[File]: + file_list = [] + for file_dict in file_dict_list: + file_list.append(self.parse_file(file_dict)) + + return file_list + diff --git a/src/parser/jsonparser.py b/src/parser/json/json_parser.py similarity index 67% rename from src/parser/jsonparser.py rename to src/parser/json/json_parser.py index d33e130cf..0f5b27087 100644 --- a/src/parser/jsonparser.py +++ b/src/parser/json/json_parser.py @@ -11,16 +11,16 @@ import json from src.model.document import Document -from src.parser.annotationparser import AnnotationParser -from src.parser.creationinfoparser import CreationInfoParser +from src.parser.json.annotation_parser import AnnotationParser +from src.parser.json.creation_info_parser import CreationInfoParser from src.parser.error import SPDXParsingError -from src.parser.extractedlicensingparser import ExtractedLicensingParser -from src.parser.fileparser import FileParser +from src.parser.json.extracted_licensing_parser import ExtractedLicensingParser +from src.parser.json.file_parser import FileParser from src.parser.logger import Logger -from src.parser.packageparser import PackageParser -from src.parser.relationshipparser import RelationshipParser -from src.parser.reviewparser import ReviewParser -from src.parser.snippetparser import SnippetParser +from src.parser.json.package_parser import PackageParser +from src.parser.json.relationship_parser import RelationshipParser +from src.parser.json.review_parser import ReviewParser +from src.parser.json.snippet_parser import SnippetParser class JsonParser: @@ -53,17 +53,16 @@ def parse(self, filename: str) -> Document: input_doc_as_dict) document: Document = Document(spdx_version, spdx_id, name, document_namespace, creation_info) - document.packages = list(map(self.package_parser.parse, input_doc_as_dict.get("packages"))) - document.files = list(map(self.file_parser.parse, input_doc_as_dict.get("files"))) - document.annotations = list(map(self.annotation_parser.parse, input_doc_as_dict.get("annotations"), document.spdx_id)) - document.snippets = list(map(self.snippet_parser.parse, input_doc_as_dict.get("snippets"))) - document.relationships = list(map(self.relationship_parser.parse, input_doc_as_dict.get("relationships"))) - - review_to_annotations = list(map(self.review_parser.parse, input_doc_as_dict.get("revieweds"))) + document.packages = self.package_parser.parse_packages(input_doc_as_dict.get("packages")) + document.files = self.file_parser.parse_files(input_doc_as_dict.get("files")) + document.annotations = self.annotation_parser.parse_annotations(input_doc_as_dict.get("annotations"), document.spdx_id) + document.snippets = self.snippet_parser.parse_snippets(input_doc_as_dict.get("snippets")) + document.relationships = self.relationship_parser.parse_relationships(input_doc_as_dict.get("relationships")) + review_to_annotations = self.review_parser.parse_reviews(input_doc_as_dict.get("revieweds")) for annotation in review_to_annotations: document.annotations.append(annotation) - document.extracted_licensing_info = map(self.extracted_licenses_parser.parse, + document.extracted_licensing_info = map(self.extracted_licenses_parser.parse_extracted_licensing_info, input_doc_as_dict.get("hasExtractedLicensingInfo")) if logger.has_errors(): diff --git a/src/parser/packageparser.py b/src/parser/json/package_parser.py similarity index 66% rename from src/parser/packageparser.py rename to src/parser/json/package_parser.py index ad667d775..f6f38586d 100644 --- a/src/parser/packageparser.py +++ b/src/parser/json/package_parser.py @@ -8,7 +8,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict +from typing import Dict, List from src.model.package import Package from src.parser.logger import Logger @@ -20,6 +20,15 @@ class PackageParser: def __init__(self, logger: Logger): self.logger = logger - def parse(self, package: Dict) -> Package: - package = Package() + def parse_package(self, package: Dict) -> Package: + name = package.get("name") + + package = Package(name) return package + + def parse_packages(self, packages_dict_list: List[Dict]) -> List[Package]: + packages_list = [] + for package_dict in packages_dict_list: + packages_list.append(self.parse_package(package_dict)) + return packages_list + diff --git a/src/parser/fileparser.py b/src/parser/json/parser_utils.py similarity index 60% rename from src/parser/fileparser.py rename to src/parser/json/parser_utils.py index 38124c05d..d1692e3ec 100644 --- a/src/parser/fileparser.py +++ b/src/parser/json/parser_utils.py @@ -8,18 +8,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict +from typing import Dict, Optional, List, Union -from spdx.file import File -from src.parser.logger import Logger +from src.parser.json.snippet_parser import RangeType -class FileParser: - logger: Logger - - def __init__(self, logger: Logger): - self.logger = logger - - def parse(self, file: Dict) -> File: - file = File() - return file +def set_optional_property(property_name: Union[str, RangeType], parse_object: Dict) -> Optional[str, int, Dict, List, bool]: + if property_name in parse_object: + property_value = parse_object.get(property_name) + return property_value + else: + return None diff --git a/src/parser/relationshipparser.py b/src/parser/json/relationship_parser.py similarity index 67% rename from src/parser/relationshipparser.py rename to src/parser/json/relationship_parser.py index 06b652a2b..abe7fb3b2 100644 --- a/src/parser/relationshipparser.py +++ b/src/parser/json/relationship_parser.py @@ -8,7 +8,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict +from typing import Dict, List from src.model.relationship import Relationship from src.parser.logger import Logger @@ -20,6 +20,12 @@ class RelationshipParser: def __init__(self, logger): self.logger = logger - def parse(self, relationship: Dict) -> Relationship: + def parse_relationship(self, relationship: Dict) -> Relationship: relationship = Relationship() return relationship + + def parse_relationships(self, relationship_dict_list: List[Dict]) -> List[Relationship]: + relationships_list = [] + for relationship_dict in relationship_dict_list: + relationships_list.append(self.parse_relationship(relationship_dict)) + return relationships_list diff --git a/src/parser/reviewparser.py b/src/parser/json/review_parser.py similarity index 66% rename from src/parser/reviewparser.py rename to src/parser/json/review_parser.py index e7b8c93d8..414375768 100644 --- a/src/parser/reviewparser.py +++ b/src/parser/json/review_parser.py @@ -8,7 +8,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict +from typing import Dict, List from src.model.annotation import Annotation, AnnotationType from src.parser.logger import Logger @@ -20,10 +20,18 @@ class ReviewParser: # direkt AnnotationParser benutzen mit annotation_type als def __init__(self, logger: Logger): self.logger = logger - def parse(self, review: Dict) -> Annotation: + def parse_review(self, review: Dict) -> Annotation: annotation_date = review.get("reviewDate") annotator = review.get("reviewer") - - annotation = Annotation(annotation_type=AnnotationType.REVIEW, annotator=annotator, + try: + annotation = Annotation(annotation_type=AnnotationType.REVIEW, annotator=annotator, annotation_date=annotation_date) + except ValueError as error: + self.logger.append(error.args[0]) return annotation + + def parse_reviews(self, review_dicts_list: List[Dict]) -> List[Annotation]: + reviews_list = [] + for review_dict in review_dicts_list: + reviews_list.append(self.parse_review(review_dict)) + return reviews_list diff --git a/src/parser/json/snippet_parser.py b/src/parser/json/snippet_parser.py new file mode 100644 index 000000000..968001b59 --- /dev/null +++ b/src/parser/json/snippet_parser.py @@ -0,0 +1,98 @@ +# Copyright (c) 2022 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from enum import auto, Enum +from typing import Dict, Tuple, List + +from src.model.snippet import Snippet +from src.parser.json.annotation_parser import AnnotationParser + +from src.parser.json.parser_utils import set_optional_property +from src.parser.logger import Logger + + +class RangeType(Enum): + BYTE = auto() + LINE = auto() + + +class SnippetParser: + annotation_parser: AnnotationParser + logger: Logger + + def __init__(self, logger: Logger): + self.annotation_parser = AnnotationParser(logger) + self.logger = logger + + def parse_snippets(self, snippet_dicts_list: List[Dict]) -> List[Snippet]: + snippets_list = [] + for snippet_dict in snippet_dicts_list: + snippets_list.append(self.parse_snippet(snippet_dict)) + return snippets_list + + def parse_snippet(self, snippet_dict: Dict) -> Snippet: + spdx_id = snippet_dict.get("SPDXID") + file_spdx_id = snippet_dict.get("snippetFromFile") + name = snippet_dict.get("name") + ranges= self.parse_ranges(snippet_dict.get("ranges")) + snippet = Snippet(spdx_id, file_spdx_id, ranges[RangeType.BYTE], name) + + snippet.line_range = set_optional_property(RangeType.LINE, ranges) + snippet.attribution_texts = set_optional_property("attributionTexts", snippet_dict) + snippet.comment = set_optional_property("comment", snippet_dict) + snippet.copyright_text = set_optional_property("copyrightText", snippet_dict) + snippet.license_comment = set_optional_property("licenseComments", snippet_dict) + snippet.concluded_license = set_optional_property("licenseConcluded", snippet_dict) + snippet.license_info = set_optional_property("licenseInfoInSnippets", snippet_dict) + + if "annotations" in snippet_dict: + self.annotation_parser.parse_annotation(snippet_dict.get("annotations"), snippet.spdx_id) + + return snippet + + def parse_ranges(self, ranges_from_snippet: List[Dict]) -> Dict: + ranges = {} + for range_dict in ranges_from_snippet: + try: + range_type = self.validate_range_and_get_type(range_dict) + start_end_tuple: Tuple[int, int] = SnippetParser.get_start_end_tuple(range_dict, range_type) + ranges[range_type] = start_end_tuple + except ValueError as error: + self.logger.append(error.args[0]) + return ranges + + @staticmethod + def get_start_end_tuple(range_dict: Dict, range_type: RangeType) -> Tuple[int, int]: + end_pointer = range_dict["endPointer"] + start_pointer = range_dict["startPointer"] + if range_type == RangeType.BYTE: + start = int(start_pointer["offset"]) + end = int(end_pointer["offset"]) + else: + start = int(start_pointer["lineNumber"]) + end = int(end_pointer["lineNumber"]) + + return start, end + + def validate_range_and_get_type(self, range_dict: Dict) -> RangeType: + if ("startPointer" not in range_dict) or ("endPointer" not in range_dict): + raise ValueError("Snippet::ranges") + start_pointer_type = self.validate_pointer_and_get_type(range_dict["startPointer"]) + end_pointer_type = self.validate_pointer_and_get_type(range_dict["endPointer"]) + if start_pointer_type != end_pointer_type: + raise ValueError("Snippet::ranges") + return start_pointer_type + + @staticmethod + def validate_pointer_and_get_type(self, pointer: Dict) -> RangeType: + if ("offset" in pointer and "lineNumber" in pointer) or ( + "offset" not in pointer and "lineNumber" not in pointer): + raise ValueError("Snippet::ranges") + return RangeType.BYTE if "offset" in pointer else RangeType.LINE diff --git a/src/parser/snippetparser.py b/src/parser/snippetparser.py deleted file mode 100644 index 07bb244da..000000000 --- a/src/parser/snippetparser.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright (c) 2022 spdx contributors -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from typing import Dict, Tuple - -from src.model.snippet import Snippet -from src.parser.annotationparser import AnnotationParser -from src.parser.logger import Logger - - -class SnippetParser: - annotation_parser: AnnotationParser - logger: Logger - - def __init__(self, logger: Logger): - self.annotation_parser = AnnotationParser() - self.logger = logger - - def parse_ranges(self, ranges: Dict) -> Tuple: - byte_range = () - line_range = () - return byte_range, line_range - - def parse(self, snippet_dict: Dict) -> Snippet: - spdx_id = snippet_dict.get("SPDXID") - file_spdx_id = snippet_dict.get("snippetFromFile") - name = snippet_dict.get("name") - byte_range, line_range = self.parse_ranges(snippet_dict.get("ranges")) - snippet = Snippet(spdx_id, file_spdx_id, byte_range, name) - if line_range: - snippet.line_range = line_range - if "attributionTexts" in snippet_dict: - attribution_texts = [attribution_text for attribution_text in snippet_dict.get("attributionTexts")] - snippet.attribution_texts = attribution_texts - if "comment" in snippet_dict: - comment = snippet_dict.get("comment") - snippet.comment = comment - if "copyrightText" in snippet_dict: - copyright_text = snippet_dict.get("copyrightText") - snippet.copyright_text = copyright_text - if "license_comments" in snippet_dict: - license_comment = snippet_dict.get("licenseComments") - snippet.license_comment = license_comment - if "licenseConcluded" in snippet_dict: #LicenseExpression? - concluded_license = snippet_dict.get("licenseConcluded") - snippet.concluded_license = concluded_license - if "licenInfoInSnippets" in snippet_dict: #LicenseExpression? - license_infos = [license_info for license_info in snippet_dict.get("licenseInfoInSnippets")] - snippet.license_info = license_infos - if "annotations" in snippet_dict: - self.annotation_parser.parse(snippet_dict.get("annotations"), snippet.spdx_id) - - return snippet From 1f36d0aea900540d4d55978428126aefd0cd8d34 Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Mon, 28 Nov 2022 11:47:33 +0100 Subject: [PATCH 3/4] wip: add test for parser Signed-off-by: Meret Behrens --- tests/parser/__init__.py | 0 tests/parser/test_json_parser.py | 16 ++++++++++++++++ 2 files changed, 16 insertions(+) create mode 100644 tests/parser/__init__.py create mode 100644 tests/parser/test_json_parser.py diff --git a/tests/parser/__init__.py b/tests/parser/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/parser/test_json_parser.py b/tests/parser/test_json_parser.py new file mode 100644 index 000000000..b68cc98a3 --- /dev/null +++ b/tests/parser/test_json_parser.py @@ -0,0 +1,16 @@ +# Copyright (c) 2022 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pytest + + +@pytest.mark.parametrize() +def test_set_optional_property() + assert From d33ea220a3f6aadf3ddb6e73c4d6fc5a1f715cf6 Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Mon, 28 Nov 2022 16:47:09 +0100 Subject: [PATCH 4/4] wip annotation_parser Signed-off-by: Meret Behrens --- src/parser/json/annotation_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/parser/json/annotation_parser.py b/src/parser/json/annotation_parser.py index dbe026f6f..6e0b38013 100644 --- a/src/parser/json/annotation_parser.py +++ b/src/parser/json/annotation_parser.py @@ -28,8 +28,8 @@ def parse_annotation(self, annotation: Dict, spdx_id: Optional[str] = None) -> A annotation_date = annotation.get("annotationDate") annotation_comment = annotation.get("annotationComment") annotation = Annotation(spdx_id, annotation_type, annotator, annotation_date, annotation_comment) - except TypeError: - self.logger.append('Error while parsing annotation') + except ValueError as err: + self.logger.append(f'Error while parsing annotation: {err.args[0]}') return annotation def parse_annotations(self, annotations_dict_list: List[Dict], spdx_id: Optional[str] = None) -> List[Annotation]: