Skip to content

Commit aa84f59

Browse files
authored
Merge pull request #98 from NeotomaDB/93-docker-compose-for-article-relevance
Docker compose for article relevance
2 parents 19f9caf + cf2e23b commit aa84f59

File tree

3 files changed

+40
-5
lines changed

3 files changed

+40
-5
lines changed

docker-compose.yml

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,28 @@
11
version: "3.9"
22
services:
3+
article-relevance-prediction:
4+
image: metaextractor-article-relevance:v0.0.1
5+
build:
6+
dockerfile: ./docker/article-relevance/Dockerfile
7+
context: .
8+
environment:
9+
- N_RECENT=10
10+
- MIN_DATE=
11+
- MAX_DATE=
12+
- TERM=
13+
- AUTO_MIN_DATE=False
14+
- AUTO_CHECK_DUP=False
15+
- OUTPUT_PATH=/outputs/
16+
- SEND_XDD=False
17+
- DOI_FILE_PATH=/raw/gdd_api_return.json
18+
- MODEL_PATH=/models/logistic_regression_model.joblib
19+
20+
volumes:
21+
- ./data/article-relevance/outputs:/output
22+
- ./data/article-relevance/processed/prediction_parquet:/parquet
23+
- ./data/article-relevance/raw:/raw
24+
- ./models/article-relevance:/models
25+
326
data-review-tool:
427
image: metaextractor-data-review-tool:v0.0.1
528
build:

docker/article-relevance/Dockerfile

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# Use the official Python 3.10 image as the base image
22
FROM python:3.10
33

4+
ENV DOI_PATH="/raw"
5+
ENV PARQUET_PATH="/parquet"
46
# Set the working directory inside the container
57
WORKDIR /app/
68

@@ -13,9 +15,6 @@ RUN pip install --no-cache-dir -r requirements.txt
1315
# Copy the entire repository folder into the container
1416
COPY src ./src
1517

16-
# Copy the model folder into the container
17-
COPY models/article-relevance ./models/article-relevance
18-
1918
# Copy the shell script to the container
2019
COPY docker/article-relevance/run-prediction.sh .
2120

@@ -24,7 +23,10 @@ RUN chmod +x run-prediction.sh
2423

2524
# Mount volumes
2625
RUN mkdir -p /output
27-
VOLUME ["/output"]
26+
RUN mkdir -p /raw
27+
RUN mkdir -p /parquet
28+
RUN mkdir -p /models
29+
VOLUME ["/output", "/parquet", "/raw", "/models"]
2830

2931
# Set the entry point for the Docker container
3032
ENTRYPOINT ["./run-prediction.sh"]

src/article_relevance/relevance_prediction_parquet.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -422,9 +422,19 @@ def main():
422422
opt = docopt(__doc__)
423423

424424
doi_list_file_path = opt["--doi_file_path"]
425-
model_path = opt['--model_path']
426425
output_path = opt['--output_path']
427426
send_xdd = opt['--send_xdd']
427+
428+
# # /models directory is a mounted volume, containing the model object
429+
# models = os.listdir("/models")
430+
# models = [f for f in models if f.endswith(".joblib")]
431+
432+
# if models:
433+
# model_path = os.path.join("/models", models[0])
434+
# else:
435+
# model_path = ""
436+
437+
model_path = opt['--model_path']
428438

429439
metadata_df = crossref_extract(doi_list_file_path)
430440

0 commit comments

Comments
 (0)