From 51473a3aecb2c03f44de1cca76707bb49a345dad Mon Sep 17 00:00:00 2001 From: Froilan Irizarry Date: Tue, 18 Sep 2018 11:53:32 -0400 Subject: [PATCH 1/7] Remove old Elasticsearch mapping files - Removed versions 1.0.0, 1.0.1, and 2.0.0 - Renamed mapping_201.json to mapping. From now on we will have just one mapping file - Renamed mapping_201.json in test_data to mapping.json. This file might be removed as well at some point --- .../repo/{mapping_201.json => mapping.json} | 0 indexes/repo/mapping_100.json | 223 -------- indexes/repo/mapping_101.json | 200 ------- indexes/repo/mapping_200.json | 492 ------------------ routes/utils.js | 2 +- services/indexer/repo/index.js | 2 +- services/indexer/term/index.js | 2 +- services/searcher/index.js | 2 +- .../indexer/repo/repoIndexStream.test.js | 2 +- .../{mapping_201.json => mapping.json} | 0 test/unit/utils.test.js | 2 +- 11 files changed, 6 insertions(+), 921 deletions(-) rename indexes/repo/{mapping_201.json => mapping.json} (100%) delete mode 100644 indexes/repo/mapping_100.json delete mode 100644 indexes/repo/mapping_101.json delete mode 100644 indexes/repo/mapping_200.json rename test/unit/test_data/mappings/{mapping_201.json => mapping.json} (100%) diff --git a/indexes/repo/mapping_201.json b/indexes/repo/mapping.json similarity index 100% rename from indexes/repo/mapping_201.json rename to indexes/repo/mapping.json diff --git a/indexes/repo/mapping_100.json b/indexes/repo/mapping_100.json deleted file mode 100644 index e8a7c01e..00000000 --- a/indexes/repo/mapping_100.json +++ /dev/null @@ -1,223 +0,0 @@ -{ - "repo": { - "properties": { - "repoID": { - "type": "string", - "index": "not_analyzed" - }, - "agency": { - "type": "nested", - "include_in_root": true, - "properties": { - "name": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "acronym": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "website": { - "type": "string", - "index": "not_analyzed" - }, - "codeUrl": { - "type": "string", - "index": "not_analyzed" - } - } - }, - "status": { - "type": "string", - "analyzer": "keyword_ci" - }, - "vcs": { - "type": "string", - "analyzer": "keyword_ci" - }, - "repository": { - "type": "string", - "analyzer": "keyword_ci" - }, - "name": { - "type": "string", - "analyzer": "keyword_ci" - }, - "homepage": { - "type": "string", - "analyzer": "keyword_ci" - }, - "downloadURL": { - "type": "string", - "index": "not_analyzed" - }, - "description": { - "type": "string", - "analyzer": "englishfulltext", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "events": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "tags": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "languages": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "updated": { - "type": "nested", - "include_in_root": true, - "properties": { - "metadataLastUpdated": { - "type": "date", - "format": "strict_date_optional_time||epoch_millis" - }, - "lastCommit": { - "type": "date", - "format": "strict_date_optional_time||epoch_millis" - }, - "sourceCodeLastModified": { - "type": "date", - "format": "strict_date_optional_time||epoch_millis" - } - } - }, - "contact": { - "type": "nested", - "include_in_root": true, - "properties": { - "name": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "email": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "twitter": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "phone": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - } - } - }, - "partners": { - "type": "nested", - "include_in_root": true, - "properties": { - "name": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "email": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - } - } - }, - "license": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "openSourceProject": { - "type": "byte" - }, - "governmentWideReuseProject": { - "type": "byte" - }, - "closedProject": { - "type": "byte" - }, - "exemption": { - "type": "byte" - }, - "exemptionText": { - "type": "string" - } - } - } -} diff --git a/indexes/repo/mapping_101.json b/indexes/repo/mapping_101.json deleted file mode 100644 index 1c8f26ca..00000000 --- a/indexes/repo/mapping_101.json +++ /dev/null @@ -1,200 +0,0 @@ -{ - "repo": { - "properties": { - "repoID": { - "type": "string", - "index": "not_analyzed" - }, - "agency": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "status": { - "type": "string", - "analyzer": "keyword_ci" - }, - "vcs": { - "type": "string", - "analyzer": "keyword_ci" - }, - "repository": { - "type": "string", - "analyzer": "keyword_ci" - }, - "name": { - "type": "string", - "analyzer": "keyword_ci" - }, - "homepage": { - "type": "string", - "analyzer": "keyword_ci" - }, - "downloadURL": { - "type": "string", - "index": "not_analyzed" - }, - "description": { - "type": "string", - "analyzer": "englishfulltext", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "events": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "tags": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "languages": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "updated": { - "type": "nested", - "include_in_root": true, - "properties": { - "metadataLastUpdated": { - "type": "date", - "format": "strict_date_optional_time||epoch_millis" - }, - "lastCommit": { - "type": "date", - "format": "strict_date_optional_time||epoch_millis" - }, - "sourceCodeLastModified": { - "type": "date", - "format": "strict_date_optional_time||epoch_millis" - } - } - }, - "contact": { - "type": "nested", - "include_in_root": true, - "properties": { - "name": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "email": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "twitter": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "phone": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - } - } - }, - "partners": { - "type": "nested", - "include_in_root": true, - "properties": { - "name": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "email": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - } - } - }, - "license": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "openSourceProject": { - "type": "byte" - }, - "governmentWideReuseProject": { - "type": "byte" - }, - "closedProject": { - "type": "byte" - }, - "exemption": { - "type": "byte" - }, - "exemptionText": { - "type": "string" - } - } - } - } - \ No newline at end of file diff --git a/indexes/repo/mapping_200.json b/indexes/repo/mapping_200.json deleted file mode 100644 index f7f67978..00000000 --- a/indexes/repo/mapping_200.json +++ /dev/null @@ -1,492 +0,0 @@ -{ - "repo": { - "properties": { - "repoID": { - "type": "string", - "index": "not_analyzed" - }, - "agency": { - "type": "object", - "properties": { - "name": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "acronym": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "keyword_ci" - } - } - }, - "website": { - "type": "string", - "index": "not_analyzed" - }, - "codeUrl": { - "type": "string", - "index": "not_analyzed" - }, - "requirements": { - "type": "nested", - "properties": { - "agencyWidePolicy": { - "type": "float", - "index": "not_analyzed" - }, - "openSourceRequirement": { - "type": "float", - "index": "not_analyzed" - }, - "inventoryRequirement": { - "type": "float", - "index": "not_analyzed" - }, - "schemaFormat": { - "type": "float", - "index": "not_analyzed" - }, - "overallCompliance": { - "type": "float", - "index": "not_analyzed" - } - } - } - } - }, - "measurementType": { - "type": "object", - "properties": { - "method": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "ifOther": { - "type": "string", - "analyzer": "englishfulltext", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - } - } - }, - "status": { - "type": "string", - "analyzer": "keyword_ci" - }, - "vcs": { - "type": "string", - "analyzer": "keyword_ci" - }, - "repositoryURL": { - "type": "string", - "analyzer": "keyword_ci" - }, - "name": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "version": { - "type": "string", - "analyzer": "keyword_ci" - }, - "organization": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "homepageURL": { - "type": "string", - "analyzer": "keyword_ci" - }, - "downloadURL": { - "type": "string", - "index": "not_analyzed" - }, - "description": { - "type": "string", - "analyzer": "englishfulltext" - }, - "events": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "tags": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "languages": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "contact": { - "type": "object", - "include_in_root": true, - "properties": { - "name": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "email": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "twitter": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "phone": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - } - } - }, - "partners": { - "type": "nested", - "include_in_root": true, - "properties": { - "name": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "email": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - } - } - }, - "permissions": { - "type": "object", - "properties": { - "licenses": { - "type": "nested", - "properties": { - "name": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "URL": { - "type": "string", - "index": "not_analyzed" - } - } - }, - "usageType": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "exemptionText": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "laborHours": { - "type": "integer", - "index": "not_analyzed" - }, - "relatedCode": { - "type": "nested", - "properties": { - "name": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "URL": { - "type": "string", - "index": "not_analyzed" - } - } - }, - "reusedCode": { - "type": "nested", - "properties": { - "name": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "URL": { - "type": "string", - "index": "not_analyzed" - } - } - }, - "disclaimerURL": { - "type": "string", - "index": "not_analyzed" - }, - "disclaimerText": { - "type": "string", - "analyzer": "englishfulltext" - }, - "date": { - "type": "nested", - "include_in_root": true, - "properties": { - "created": { - "type": "date", - "ignore_malformed": true - }, - "lastModified": { - "type": "date", - "ignore_malformed": true - }, - "metadataLastUpdated": { - "type": "date", - "ignore_malformed": true - } - } - } - } - }, - "status": { - "properties": { - "last_data_harvest": { - "type": "date" - }, - "version": { - "type": "string", - "index": "not_analyzed" - }, - "agency": { - "type": "object", - "properties": { - "name": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "acronym": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "keyword_ci" - } - } - }, - "website": { - "type": "string", - "index": "not_analyzed" - }, - "codeUrl": { - "type": "string", - "index": "not_analyzed" - }, - "requirements": { - "type": "object", - "properties": { - "agencyWidePolicy": { - "type": "float", - "index": "not_analyzed" - }, - "openSourceRequirement": { - "type": "float", - "index": "not_analyzed" - }, - "inventoryRequirement": { - "type": "float", - "index": "not_analyzed" - }, - "schemaFormat": { - "type": "float", - "index": "not_analyzed" - }, - "overallCompliance": { - "type": "float", - "index": "not_analyzed" - } - } - } - } - }, - "issues": { - "type": "nested", - "properties": { - "organization": { - "type": "string" - }, - "project_name": { - "type": "string" - }, - "errors": { - "type": "nested", - "properties": { - "keyword": { - "type": "string" - }, - "dataPath": { - "type": "string" - }, - "schemaPath": { - "type": "string" - }, - "params": { - "type": "object" - }, - "message": { - "type": "string" - } - } - }, - "warning": { - "type": "nested", - "properties": { - "keyword": { - "type": "string" - }, - "dataPath": { - "type": "string" - }, - "schemaPath": { - "type": "string" - }, - "params": { - "type": "object" - }, - "message": { - "type": "string" - } - } - }, - "enhancements": { - "type": "nested", - "properties": { - "keyword": { - "type": "string" - }, - "dataPath": { - "type": "string" - }, - "schemaPath": { - "type": "string" - }, - "params": { - "type": "object" - }, - "message": { - "type": "string" - } - } - } - } - }, - "fallback_used": { - "type": "boolean" - } - } - } -} \ No newline at end of file diff --git a/routes/utils.js b/routes/utils.js index 5319ccee..84f70545 100644 --- a/routes/utils.js +++ b/routes/utils.js @@ -4,7 +4,7 @@ const git = require("git-rev"); const pkg = require("../package.json"); const Jsonfile = require("jsonfile"); const Utils = require('../utils'); -const repoMapping = require('../indexes/repo/mapping_201.json'); +const repoMapping = require('../indexes/repo/mapping.json'); const searchPropsByType = Utils.getFlattenedMappingPropertiesByType(repoMapping["repo"]); diff --git a/services/indexer/repo/index.js b/services/indexer/repo/index.js index f4d70c4e..8ad8dc2b 100644 --- a/services/indexer/repo/index.js +++ b/services/indexer/repo/index.js @@ -6,7 +6,7 @@ const AbstractIndexer = require("../abstract_indexer"); const AgencyJsonStream = require("../repo/AgencyJsonStream"); const RepoIndexerStream = require("../repo/RepoIndexStream"); -const ES_MAPPING = require("../../../indexes/repo/mapping_201.json"); +const ES_MAPPING = require("../../../indexes/repo/mapping.json"); const ES_SETTINGS = require("../../../indexes/repo/settings.json"); const ES_PARAMS = { diff --git a/services/indexer/term/index.js b/services/indexer/term/index.js index 0f084c76..dd35c4e8 100644 --- a/services/indexer/term/index.js +++ b/services/indexer/term/index.js @@ -6,7 +6,7 @@ const getConfig = require('../../../config'); // NOTE: dependent on elasticsearch repos being indexed -const ES_REPO_MAPPING = require("../../../indexes/repo/mapping_201.json"); +const ES_REPO_MAPPING = require("../../../indexes/repo/mapping.json"); const ES_REPO_SETTINGS = require("../../../indexes/repo/settings.json"); const ES_REPO_PARAMS = { "esAlias": "repos", diff --git a/services/searcher/index.js b/services/searcher/index.js index 4ebea044..368a1803 100644 --- a/services/searcher/index.js +++ b/services/searcher/index.js @@ -3,7 +3,7 @@ const Bodybuilder = require("bodybuilder"); const moment = require("moment"); const Utils = require("../../utils"); const Logger = require("../../utils/logger"); -const repoMapping = require("../../indexes/repo/mapping_201.json"); +const repoMapping = require("../../indexes/repo/mapping.json"); const DATE_FORMAT = "YYYY-MM-DD"; const REPO_RESULT_SIZE_MAX = 10000; diff --git a/test/unit/services/indexer/repo/repoIndexStream.test.js b/test/unit/services/indexer/repo/repoIndexStream.test.js index c896135c..a0c141b9 100644 --- a/test/unit/services/indexer/repo/repoIndexStream.test.js +++ b/test/unit/services/indexer/repo/repoIndexStream.test.js @@ -16,7 +16,7 @@ describe('Index given repo', function(done) { let indexer; let mockAdapter = new MockAdapter(); - const ES_MAPPING = require("../../../../../indexes/repo/mapping_201.json"); + const ES_MAPPING = require("../../../../../indexes/repo/mapping.json"); const ES_SETTINGS = require("../../../../../indexes/repo/settings.json"); const ES_PARAMS = { "esAlias": "repos", diff --git a/test/unit/test_data/mappings/mapping_201.json b/test/unit/test_data/mappings/mapping.json similarity index 100% rename from test/unit/test_data/mappings/mapping_201.json rename to test/unit/test_data/mappings/mapping.json diff --git a/test/unit/utils.test.js b/test/unit/utils.test.js index 7e53dd94..86757939 100644 --- a/test/unit/utils.test.js +++ b/test/unit/utils.test.js @@ -14,7 +14,7 @@ describe('Testing Utils module', function () { describe('flatten mapping properties', function () { let mappings; before(function () { - mappings = require('./test_data/mappings/mapping_201.json'); + mappings = require('./test_data/mappings/mapping.json'); }) it('should return a flattened versions of the passed mapping object using getFlattenedMappingProperties', function () { const expected = { From 152ab578ffd93c0cf295db2997391f36172acaa5 Mon Sep 17 00:00:00 2001 From: Froilan Irizarry Date: Tue, 18 Sep 2018 17:17:55 -0400 Subject: [PATCH 2/7] Elasticsearch mappings and filter refactor - Elasticsearch mappings were updated to be 100% compatible with new features in Elasticsearch 5.x and newer - Text fields were removed from the fields properties following best practices. - String fields were changed to text and keyword types where appropriate. This was done because the string type has been removed. - keyword_ci analyzer was removed since it is no longer needed. All fields that used this analyzer were changed to the keyword type. - the field _fulltext was removed in favor of the keyword field. Now fields that need full text search are of text type from the start with keyword fields where appropriate. - Changed the filter construction code to take into account the new changes to the mappings. - Search and filter properties do not use string as a type. These are constructed using keyword and text types --- indexes/repo/mapping.json | 404 +++++++++++-------------------------- indexes/repo/settings.json | 11 +- routes/utils.js | 4 +- services/searcher/index.js | 24 +-- 4 files changed, 144 insertions(+), 299 deletions(-) diff --git a/indexes/repo/mapping.json b/indexes/repo/mapping.json index 765d29fc..3388a8f6 100644 --- a/indexes/repo/mapping.json +++ b/indexes/repo/mapping.json @@ -2,62 +2,56 @@ "repo": { "properties": { "repoID": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "normalizer": "lowercase" }, "agency": { "type": "object", "properties": { "name": { - "type": "string", - "analyzer": "keyword_ci", + "type": "text", + "analyzer": "englishfulltext", "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" + "keyword": { + "type": "keyword", + "normalizer": "lowercase" } } }, "acronym": { - "type": "string", - "analyzer": "keyword_ci", + "type": "keyword", "fields": { - "_fulltext": { - "type": "string", - "analyzer": "keyword_ci" + "keyword": { + "type": "keyword", + "normalizer": "lowercase" } } }, "website": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "normalizer": "lowercase" }, "codeUrl": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "normalizer": "lowercase" }, "requirements": { "type": "nested", "properties": { "agencyWidePolicy": { - "type": "float", - "index": "not_analyzed" + "type": "float" }, "openSourceRequirement": { - "type": "float", - "index": "not_analyzed" + "type": "float" }, "inventoryRequirement": { - "type": "float", - "index": "not_analyzed" + "type": "float" }, "schemaFormat": { - "type": "float", - "index": "not_analyzed" + "type": "float" }, "overallCompliance": { - "type": "float", - "index": "not_analyzed" + "type": "float" } } } @@ -67,106 +61,98 @@ "type": "object", "properties": { "method": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } + "type": "keyword" }, "ifOther": { - "type": "string", + "type": "text", "analyzer": "englishfulltext", "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" + "keyword": { + "type": "keyword", + "normalizer": "lowercase" } } } } }, "status": { - "type": "string", - "analyzer": "keyword_ci" + "type": "keyword", + "normalizer": "lowercase" }, "vcs": { - "type": "string", - "analyzer": "keyword_ci" + "type": "keyword", + "normalizer": "lowercase" }, "repositoryURL": { - "type": "string", - "analyzer": "keyword_ci" + "type": "keyword" }, "targetOperatingSystems": { - "type": "string", - "analyzer": "keyword_ci" + "type": "keyword", + "normalizer": "lowercase" }, "name": { - "type": "string", - "analyzer": "keyword_ci", + "type": "text", + "analyzer": "englishfulltext", "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" + "keyword": { + "type": "keyword", + "normalizer": "lowercase" } } }, "version": { - "type": "string", - "analyzer": "keyword_ci" + "type": "keyword", + "normalizer": "lowercase" }, "organization": { - "type": "string", - "analyzer": "keyword_ci", + "type": "text", + "analyzer": "englishfulltext", "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" + "keyword": { + "type": "keyword", + "normalizer": "lowercase" } } }, "homepageURL": { - "type": "string", - "analyzer": "keyword_ci" + "type": "keyword", + "normalizer": "lowercase" }, "downloadURL": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "normalizer": "lowercase" }, "description": { - "type": "string", + "type": "text", "analyzer": "englishfulltext" }, "events": { - "type": "string", - "analyzer": "keyword_ci", + "type": "text", + "analyzer": "englishfulltext", "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" + "keyword": { + "type": "keyword", + "normalizer": "lowercase" } } }, "tags": { - "type": "string", - "analyzer": "keyword_ci", + "type": "text", + "analyzer": "englishfulltext", "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" + "keyword": { + "type": "keyword", + "normalizer": "lowercase" } } }, "languages": { - "type": "string", - "analyzer": "keyword_ci", + "type": "text", + "analyzer": "englishfulltext", "fields": { "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" + "type": "keyword", + "normalizer": "lowercase" } } }, @@ -175,42 +161,42 @@ "include_in_root": true, "properties": { "name": { - "type": "string", - "analyzer": "keyword_ci", + "type": "text", + "analyzer": "englishfulltext", "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" + "keyword": { + "type": "keyword", + "normalizer": "lowercase" } } }, "email": { - "type": "string", - "analyzer": "keyword_ci", + "type": "text", + "analyzer": "englishfulltext", "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" + "keyword": { + "type": "keyword", + "normalizer": "lowercase" } } }, "twitter": { - "type": "string", - "analyzer": "keyword_ci", + "type": "text", + "analyzer": "englishfulltext", "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" + "keyword": { + "type": "keyword", + "normalizer": "lowercase" } } }, "phone": { - "type": "string", - "analyzer": "keyword_ci", + "type": "text", + "analyzer": "englishfulltext", "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" + "keyword": { + "type": "keyword", + "normalizer": "lowercase" } } } @@ -221,22 +207,20 @@ "include_in_root": true, "properties": { "name": { - "type": "string", - "analyzer": "keyword_ci", + "type": "text", + "analyzer": "englishfulltext", "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" + "keyword": { + "type": "keyword" } } }, "email": { - "type": "string", - "analyzer": "keyword_ci", + "type": "text", + "analyzer": "englishfulltext", "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" + "keyword": { + "type": "keyword" } } } @@ -249,57 +233,56 @@ "type": "nested", "properties": { "name": { - "type": "string", - "analyzer": "keyword_ci", + "type": "text", + "analyzer": "englishfulltext", "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" + "keyword": { + "type": "keyword", + "normalizer": "lowercase" } } }, "URL": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "normalizer": "lowercase" } } }, "usageType": { - "type": "string", - "analyzer": "keyword_ci", + "type": "text", + "analyzer": "englishfulltext", "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" + "keyword": { + "type": "keyword", + "normalizer": "lowercase" } } }, "exemptionText": { - "type": "string", + "type": "text", "analyzer": "englishfulltext" } } }, "laborHours": { - "type": "integer", - "index": "not_analyzed" + "type": "integer" }, "relatedCode": { "type": "nested", "properties": { "name": { - "type": "string", - "analyzer": "keyword_ci", + "type": "text", + "analyzer": "englishfulltext", "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" + "keyword": { + "type": "keyword", + "normalizer": "lowercase" } } }, "URL": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "normalizer": "lowercase" } } }, @@ -307,27 +290,27 @@ "type": "nested", "properties": { "name": { - "type": "string", - "analyzer": "keyword_ci", + "type": "text", + "analyzer": "englishfulltext", "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" + "keyword": { + "type": "keyword", + "normalizer": "lowercase" } } }, "URL": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "normalizer": "lowercase" } } }, "disclaimerURL": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "normalizer": "lowercase" }, "disclaimerText": { - "type": "string", + "type": "text", "analyzer": "englishfulltext" }, "additionalInformation": { @@ -353,148 +336,5 @@ } } } - }, - "status": { - "properties": { - "last_data_harvest": { - "type": "date" - }, - "version": { - "type": "string", - "index": "not_analyzed" - }, - "agency": { - "type": "object", - "properties": { - "name": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" - } - } - }, - "acronym": { - "type": "string", - "analyzer": "keyword_ci", - "fields": { - "_fulltext": { - "type": "string", - "analyzer": "keyword_ci" - } - } - }, - "website": { - "type": "string", - "index": "not_analyzed" - }, - "codeUrl": { - "type": "string", - "index": "not_analyzed" - }, - "requirements": { - "type": "object", - "properties": { - "agencyWidePolicy": { - "type": "float", - "index": "not_analyzed" - }, - "openSourceRequirement": { - "type": "float", - "index": "not_analyzed" - }, - "inventoryRequirement": { - "type": "float", - "index": "not_analyzed" - }, - "schemaFormat": { - "type": "float", - "index": "not_analyzed" - }, - "overallCompliance": { - "type": "float", - "index": "not_analyzed" - } - } - } - } - }, - "issues": { - "type": "nested", - "properties": { - "organization": { - "type": "string" - }, - "project_name": { - "type": "string" - }, - "errors": { - "type": "nested", - "properties": { - "keyword": { - "type": "string" - }, - "dataPath": { - "type": "string" - }, - "schemaPath": { - "type": "string" - }, - "params": { - "type": "object" - }, - "message": { - "type": "string" - } - } - }, - "warning": { - "type": "nested", - "properties": { - "keyword": { - "type": "string" - }, - "dataPath": { - "type": "string" - }, - "schemaPath": { - "type": "string" - }, - "params": { - "type": "object" - }, - "message": { - "type": "string" - } - } - }, - "enhancements": { - "type": "nested", - "properties": { - "keyword": { - "type": "string" - }, - "dataPath": { - "type": "string" - }, - "schemaPath": { - "type": "string" - }, - "params": { - "type": "object" - }, - "message": { - "type": "string" - } - } - } - } - }, - "fallback_used": { - "type": "boolean" - } - } } } \ No newline at end of file diff --git a/indexes/repo/settings.json b/indexes/repo/settings.json index 25611df0..7787d2c3 100644 --- a/indexes/repo/settings.json +++ b/indexes/repo/settings.json @@ -1,5 +1,12 @@ { "analysis": { + "normalizer": { + "lowercase": { + "type": "custom", + "char_filter": [], + "filter": ["lowercase"] + } + }, "filter": { "english_stop": { "type": "stop", @@ -25,10 +32,6 @@ } }, "analyzer": { - "keyword_ci": { - "filter": "lowercase", - "tokenizer": "keyword" - }, "englishhtml": { "tokenizer": "standard", "char_filter": ["html_strip"], diff --git a/routes/utils.js b/routes/utils.js index 84f70545..f1d4fb8a 100644 --- a/routes/utils.js +++ b/routes/utils.js @@ -119,7 +119,9 @@ function getInvalidRepoQueryParams (queryParams) { let without = _.without(queryParams, "from", "size", "sort", "q", "include", "exclude"); return without.filter((queryParam) => { - if (_.includes(searchPropsByType["string"], queryParam)) { + if (_.includes(searchPropsByType["keyword"], queryParam)) { + return false; + } else if (_.includes(searchPropsByType["text"], queryParam)) { return false; } else if (queryParam.endsWith("_gte") || queryParam.endsWith("_lte")) { let paramWithoutOp = queryParam.substring(0, queryParam.length - 4); diff --git a/services/searcher/index.js b/services/searcher/index.js index 368a1803..0b16965a 100644 --- a/services/searcher/index.js +++ b/services/searcher/index.js @@ -102,17 +102,17 @@ class Searcher { _addFullTextQuery(body, searchQuery) { const searchFields = [ - "name^10", - "name._fulltext^5", + "name^5", + "name.keyword^10", "description^2", "agency.acronym", - "agency.name^5", - "agency.name._fulltext", + "agency.name", + "agency.name.keyword^5", "permissions.usageType", "tags^3", - "tags._fulltext", - "languages^3", - "languages._fulltext" + "tags.keyword^3", + "languages", + "languages.keyword^3" ]; body.query("multi_match", 'fields', searchFields, {"query": searchQuery}, {"type": "best_fields"}); @@ -122,16 +122,16 @@ class Searcher { if (filter instanceof Array) { filter.forEach((filterElement) => { logger.info(filterElement); - body.orFilter("term", field, filterElement.toLowerCase()); + body.orFilter("term", `${field}.keyword`, filterElement.toLowerCase()); }); } else { - body.filter("term", field, filter.toLowerCase()); + body.filter("term", `${field}.keyword`, filter.toLowerCase()); } } _addStringFilters(body, queryParams) { - searchPropsByType['string'].forEach((field) => { + searchPropsByType['keyword'].forEach((field) => { if(queryParams[field]) { this._addStringFilter(body, field, queryParams[field]); } @@ -250,9 +250,9 @@ class Searcher { sortOptions.mode = item; } }); - body.sort(sortField, sortOptions); + body.sort(`${sortField}.keyword`, sortOptions); } else { - body.sort(sortField, 'asc'); + body.sort(`${sortField}.keyword`, 'asc'); } }); } From 049e50f74e6054e053ea8f2e98042181ea584c29 Mon Sep 17 00:00:00 2001 From: Froilan Irizarry Date: Tue, 18 Sep 2018 17:22:27 -0400 Subject: [PATCH 3/7] Change default Elasticsearch URL to the default URL expected by Ealsticsearch 5.x and above --- config/index.js | 4 ++-- test/unit/config-loading.test.js | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/config/index.js b/config/index.js index f8c5dd55..e91620be 100644 --- a/config/index.js +++ b/config/index.js @@ -18,9 +18,9 @@ function getElasticsearchUri(cloudFoundryEnv) { return elasticSearchCredentials.uri ? elasticSearchCredentials.uri - : 'http://localhost:9200'; + : 'http://elastic:changeme@localhost:9200'; } - return process.env.ES_URI ? process.env.ES_URI : 'http://localhost:9200'; + return process.env.ES_URI ? process.env.ES_URI : 'http://elastic:changeme@localhost:9200'; } /** diff --git a/test/unit/config-loading.test.js b/test/unit/config-loading.test.js index 06211ef5..aff3eb9c 100644 --- a/test/unit/config-loading.test.js +++ b/test/unit/config-loading.test.js @@ -12,7 +12,7 @@ describe('Load config', function() { '2.0.0' ]; - config.ES_HOST.should.be.equal('http://localhost:9200'); + config.ES_HOST.should.be.equal('http://elastic:changeme@localhost:9200'); config.USE_HSTS.should.be.equal(false); config.HSTS_MAX_AGE.should.be.equal(31536000); config.HSTS_PRELOAD.should.be.equal(false); From c217d5dbf07ef30d3e28daa3b5ade9d3de43a4a4 Mon Sep 17 00:00:00 2001 From: Froilan Irizarry Date: Tue, 18 Sep 2018 21:15:24 -0400 Subject: [PATCH 4/7] Update status index mapping and settings to Elasticsearch 5.x anf above format --- indexes/status/mapping.json | 90 ++++++++++++++++++++---------------- indexes/status/settings.json | 7 +++ 2 files changed, 57 insertions(+), 40 deletions(-) diff --git a/indexes/status/mapping.json b/indexes/status/mapping.json index 03ddb12d..3a264a09 100644 --- a/indexes/status/mapping.json +++ b/indexes/status/mapping.json @@ -5,62 +5,55 @@ "type": "date" }, "version": { - "type": "string", - "index": "not_analyzed" + "type": "keyword" }, "agency": { "type": "object", "properties": { "name": { - "type": "string", - "analyzer": "keyword_ci", + "type": "text", + "analyzer": "englishfulltext", "fields": { - "_fulltext": { - "type": "string", - "analyzer": "englishfulltext" + "keyword": { + "type": "keyword", + "normalizer": "lowercase" } } }, "acronym": { - "type": "string", - "analyzer": "keyword_ci", + "type": "keyword", "fields": { - "_fulltext": { - "type": "string", - "analyzer": "keyword_ci" + "keyword": { + "type": "keyword", + "normalizer": "lowercase" } } }, "website": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "normalizer": "lowercase" }, "codeUrl": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "normalizer": "lowercase" }, "requirements": { "type": "object", "properties": { "agencyWidePolicy": { - "type": "float", - "index": "not_analyzed" + "type": "float" }, "openSourceRequirement": { - "type": "float", - "index": "not_analyzed" + "type": "float" }, "inventoryRequirement": { - "type": "float", - "index": "not_analyzed" + "type": "float" }, "schemaFormat": { - "type": "float", - "index": "not_analyzed" + "type": "float" }, "overallCompliance": { - "type": "float", - "index": "not_analyzed" + "type": "float" } } } @@ -70,28 +63,43 @@ "type": "nested", "properties": { "organization": { - "type": "string" + "type": "text", + "analyzer": "englishfulltext", + "fields": { + "keyword": { + "type": "keyword", + "normalizer": "lowercase" + } + } }, "project_name": { - "type": "string" + "type": "text", + "analyzer": "englishfulltext", + "fields": { + "keyword": { + "type": "keyword", + "normalizer": "lowercase" + } + } }, "errors": { "type": "nested", "properties": { "keyword": { - "type": "string" + "type": "keyword" }, "dataPath": { - "type": "string" + "type": "keyword" }, "schemaPath": { - "type": "string" + "type": "keyword" }, "params": { "type": "object" }, "message": { - "type": "string" + "type": "text", + "analyzer": "englishfulltext" } } }, @@ -99,19 +107,20 @@ "type": "nested", "properties": { "keyword": { - "type": "string" + "type": "keyword" }, "dataPath": { - "type": "string" + "type": "keyword" }, "schemaPath": { - "type": "string" + "type": "keyword" }, "params": { "type": "object" }, "message": { - "type": "string" + "type": "text", + "analyzer": "englishfulltext" } } }, @@ -119,19 +128,20 @@ "type": "nested", "properties": { "keyword": { - "type": "string" + "type": "keyword" }, "dataPath": { - "type": "string" + "type": "keyword" }, "schemaPath": { - "type": "string" + "type": "keyword" }, "params": { "type": "object" }, "message": { - "type": "string" + "type": "text", + "analyzer": "englishfulltext" } } } diff --git a/indexes/status/settings.json b/indexes/status/settings.json index 37af98ba..6612d394 100644 --- a/indexes/status/settings.json +++ b/indexes/status/settings.json @@ -1,6 +1,13 @@ { "index.mapping.total_fields.limit": 2000, "analysis": { + "normalizer": { + "lowercase": { + "type": "custom", + "char_filter": [], + "filter": ["lowercase"] + } + }, "filter": { "english_stop": { "type": "stop", From a19d9da2517d056cf19fb56b8d575fe41ed5b344 Mon Sep 17 00:00:00 2001 From: Froilan Irizarry Date: Tue, 18 Sep 2018 21:22:27 -0400 Subject: [PATCH 5/7] Updated Elasticsearch mappings and settings for terms index to be compatible with ES 5.x and above --- indexes/term/mapping.json | 12 ++++++------ indexes/term/settings.json | 11 +++++++---- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/indexes/term/mapping.json b/indexes/term/mapping.json index 896c8c5d..d91ec37d 100644 --- a/indexes/term/mapping.json +++ b/indexes/term/mapping.json @@ -2,21 +2,21 @@ "term": { "properties": { "term_key": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "normalizer": "lowercase" }, "term": { - "type": "string", - "analyzer": "keyword_ci", + "type": "keyword", + "normalizer": "lowercase", "copy_to": "term_suggest" }, "term_suggest": { - "type": "string", + "type": "text", "analyzer": "autocomplete_index", "search_analyzer": "autocomplete_search" }, "term_type": { - "type": "string" + "type": "keyword" }, "count": { "type": "long" diff --git a/indexes/term/settings.json b/indexes/term/settings.json index ede28278..dd9355ae 100644 --- a/indexes/term/settings.json +++ b/indexes/term/settings.json @@ -1,5 +1,12 @@ { "analysis": { + "normalizer": { + "lowercase": { + "type": "custom", + "char_filter": [], + "filter": ["lowercase"] + } + }, "filter": { "autocomplete_filter": { "type": "edge_ngram", @@ -30,10 +37,6 @@ "lowercase", "asciifolding" ] - }, - "keyword_ci": { - "tokenizer": "keyword", - "filter": "lowercase" } } } From fbbb898e8d479bc944cb6fd8105f700ef7ab35be Mon Sep 17 00:00:00 2001 From: Froilan Irizarry Date: Tue, 18 Sep 2018 21:33:25 -0400 Subject: [PATCH 6/7] Updated Elasticsearch docker image for integration tests --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index b09d83f1..aaed7c0a 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -68,7 +68,7 @@ jobs: working_directory: ~/code-gov-api docker: - image: circleci/node:8.9.3-stretch - - image: elasticsearch:2.4 + - image: docker.elastic.co/elasticsearch/elasticsearch:5.6.12 environment: NODE_ENV: "testing" steps: From 6c7948a5c83319cb66aca72460a7dbd868004930 Mon Sep 17 00:00:00 2001 From: Froilan Irizarry Date: Tue, 18 Sep 2018 21:46:16 -0400 Subject: [PATCH 7/7] Added sleep before running index command as a test to wait for elasticsearch to be up --- .circleci/config.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index aaed7c0a..17a2caca 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -95,7 +95,10 @@ jobs: - ./node_modules - run: name: get-data - command: npm run index + command: | + set -x + sleep 45 + npm run index - run: name: run-test command: npm run integration-test