-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdb.txt
More file actions
165 lines (154 loc) · 4.56 KB
/
db.txt
File metadata and controls
165 lines (154 loc) · 4.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
create table meta(
Run varchar(20),
AssayType varchar(50),
BioProject varchar(20),
BioSample varchar(20),
AvgSpotLen integer,
Bases bigint,
BioSampleModel varchar(50),
Bytes bigint,
CenterName varchar(100),
Experiment varchar(50),
geo_loc_name_country varchar(50),
geo_loc_name_country_continent varchar(50),
Host varchar(50),
Age varchar(100),
Sex varchar(100),
LibrarySource varchar(50),
Instrument varchar(50),
IsolationSource_EnvMedium varchar(50),
LibraryName varchar(100),
LibraryLayout varchar(50),
LibrarySelection varchar(20),
ReleaseDate varchar(50),
SampleName varchar(50),
SRAStudy varchar(50),
Organism varchar(100),
HostDisease varchar(100),
Description varchar(5000),
PMID varchar(20)
);
create table bioproject(
BioProject varchar(20),
SRA varchar(20),
Grp varchar(1000),
SubGroup varchar(5000),
IsolationSource varchar(100),
Biome varchar(60),
AssayType varchar(60),
Instrument varchar(200),
LibraryLayout varchar(20),
Year varchar(50),
ProcessedRuns integer,
VariableRegion varchar(50),
Country varchar(200),
PMID varchar(50),
primary key (BioProject)
);
create table run(
Run varchar(20),
Experiment varchar(20),
BioSample varchar(20),
BioProject varchar(20),
Bases bigint,
Bytes bigint,
Country varchar(60),
AssayType varchar(20),
Biome varchar(30),
IsolationSource varchar(100),
LibraryLayout varchar(20),
VariableRegion varchar(50),
Instrument varchar(100),
Year integer,
Grp varchar(200),
SubGroup varchar(500),
Gender varchar(30),
Age varchar(30),
SmokingStatus varchar(30),
Therapeutics varchar(1000),
Comorbidity varchar(1000),
ProcessedReads float,
primary key (Run),
constraint fk_bioproject foreign key (BioProject) references bioproject(BioProject),
constraint fk_subgroup foreign key (SubGroup) references disease(SubGroup)
);
create table disease(
SubGroup varchar(500),
Grp varchar(200),
primary key (SubGroup)
);
create table taxa(
Taxa varchar(100),
NCBITaxaID varchar(20),
Domain varchar(50),
TaxaLevel varchar(50),
BiofilmFormation varchar(500),
BiofilmFormationEvidence varchar(100),
GramStain varchar(500),
GramStainEvidence varchar(100),
SporeFormation varchar(500),
SporeFormationEvidence varchar(100),
Aerophilicity varchar(500),
AerophilicityEvidence varchar(100),
GenomeSize varchar(500),
GenomeSizeEvidence varchar(100),
CodingGenes varchar(500),
CodingGenesEvidence varchar(100),
AntimicrobialResistance varchar(500),
AntimicrobialResistanceEvidence varchar(100),
AntimicrobialSensitivity varchar(500),
AntimicrobialSensitivityEvidence varchar(100),
Shape varchar(500),
ShapeEvidence varchar(100),
Ploidy varchar(100),
Mycelium varchar(100),
InfectsTheLungs varchar(1000),
InfectsTheLungsEvidence varchar(500),
Pathogenic varchar(100),
Host varchar(100),
GeneticMaterial varchar(100),
Capsid varchar(100),
Envelop varchar(100),
Virion varchar(100),
primary key (Taxa)
);
create table abundance_subgroup (
Taxa varchar(100),
Sample varchar(500),
Abundance double,
SubGroup varchar(500),
BioProject varchar(20),
constraint fk_sg_abundance_taxa foreign key (Taxa) references taxa(Taxa),
constraint fk_sg_abundance_bioproject foreign key (BioProject) references bioproject(BioProject),
constraint fk_sg_abundance_subgroup foreign key (SubGroup) references disease(SubGroup)
);
create table abundance_biome (
Taxa varchar(100),
Sample varchar(500),
Abundance double,
Biome varchar(500),
BioProject varchar(20),
constraint fk_bm_abundance_taxa foreign key (Taxa) references taxa(Taxa),
constraint fk_bm_abundance_bioproject foreign key (BioProject) references bioproject(BioProject)
);
create table data_submission (
TKey varchar(100) primary key,
BioProject varchar(20),
Grp varchar(500),
PMID varchar(100),
Country varchar(100),
Email varchar(100),
IsolationSourceBiome varchar(200),
Link varchar(100)
);
SET GLOBAL local_infile=1;
quit
sudo mysql --local-infile=1
use mdpd;
load data local infile '/home/sudipto/data/projects/mdpd/input/input.csv' into table meta ignore 1 lines;
load data local infile '/home/sudipto/data/projects/mdpd/input/input_disease.csv' into table disease ignore 1 lines;
load data local infile '/home/sudipto/data/projects/mdpd/input/input_bioproject.csv' into table bioproject ignore 1 lines;
load data local infile '/home/sudipto/data/projects/mdpd/input/input_run.csv' into table run ignore 1 lines;
load data local infile '/home/sudipto/data/projects/mdpd/input/input_taxa.csv' into table taxa ignore 1 lines;
load data local infile '/home/sudipto/data/projects/mdpd/input/input_abundance_subgroup.csv' into table abundance_subgroup ignore 1 lines;
load data local infile '/home/sudipto/data/projects/mdpd/input/input_abundance_biome.csv' into table abundance_biome ignore 1 lines;