This repository was archived by the owner on Jul 28, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathmain.py
More file actions
46 lines (37 loc) · 1.37 KB
/
main.py
File metadata and controls
46 lines (37 loc) · 1.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import os
import sys
from pathlib import Path
dir_path = Path(os.getcwd())
# Importing rse_watch package
sys.path.append(str(dir_path / 'polls/rse_model'))
model_directory = dir_path / "data/model"
from rse_watch.pdf_parser import run as run_parser
from rse_watch.indexer import run as run_indexer
from rse_watch.conf import Config, DebugConfig
from time import time
import argparse
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--mode',
default="final",
choices=["final", "debug"],
help="Wether to parse all dpefs only a subset.")
parser.add_argument('--task',
default="both",
choices=["parse", "model", "both"],
help="Wether to parse pdfs, train the BM25+embedding model, or both.")
args = parser.parse_args()
if args.mode == "final":
config = Config(model_directory)
elif args.mode == "debug":
config = DebugConfig(model_directory)
t = time()
print("Begin Initialization.")
if args.task in ["parse", "both"]:
run_parser(config)
if args.task in ["model", "both"]:
run_indexer(config, create_from_scratch=True)
print("Finished intialization")
print("Took {} seconds to initialize.".format(int(time()-t)))
if __name__ == "__main__":
main()