Skip to content

Commit 8042e60

Browse files
committed
Loading models hosted on huggingface
1 parent 8a7d6d3 commit 8042e60

File tree

4 files changed

+16
-21
lines changed

4 files changed

+16
-21
lines changed

docker-compose.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,6 @@ services:
1414
build:
1515
dockerfile: ./docker/entity-extraction-pipeline/Dockerfile
1616
context: .
17-
args:
18-
HF_NER_MODEL_NAME: "roberta-finetuned-v3"
19-
SPACY_NER_MODEL_NAME: "spacy-transformer-v3"
2017
ports:
2118
- "5000:5000"
2219
volumes:

docker/entity-extraction-pipeline/Dockerfile

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,25 +10,18 @@ COPY docker/entity-extraction-pipeline/requirements.txt .
1010
# Install the required Python packages
1111
RUN pip install --no-cache-dir -r requirements.txt
1212
RUN python -m nltk.downloader stopwords
13+
RUN pip install https://huggingface.co/finding-fossils/metaextractor-spacy/resolve/main/en_metaextractor_spacy-any-py3-none-any.whl
1314

1415
# Copy the entire repository folder into the container
1516
COPY src ./src
1617

17-
# Build args
18-
ARG HF_NER_MODEL_NAME
19-
ARG SPACY_NER_MODEL_NAME
20-
2118
# Set env variables for when running the container
22-
ENV HF_NER_MODEL_NAME=${HF_NER_MODEL_NAME}
23-
ENV SPACY_NER_MODEL_NAME=${SPACY_NER_MODEL_NAME}
24-
ENV USE_NER_MODEL_TYPE=huggingface
19+
ENV HF_NER_MODEL_NAME="finding-fossils/metaextractor"
20+
ENV SPACY_NER_MODEL_NAME="en_metaextractor_spacy"
21+
ENV USE_NER_MODEL_TYPE="huggingface"
2522
ENV MAX_ARTICLES=-1
2623
ENV MAX_SENTENCES=-1
2724

28-
# Copy in the model defined by the env variable NER_MODEL_NAME from models folder
29-
COPY models/ner/${HF_NER_MODEL_NAME} ./models/ner/${HF_NER_MODEL_NAME}
30-
COPY models/ner/${SPACY_NER_MODEL_NAME} ./models/ner/${SPACY_NER_MODEL_NAME}
31-
3225
# non-root user control inspired from here: https://stackoverflow.com/questions/66349101/docker-non-root-user-does-not-have-writing-permissions-when-using-volumes
3326
# Create a non-root user that owns the input/outputs directory by default
3427
RUN useradd -r extraction-user # no specific user ID

src/entity_extraction/spacy_entity_extraction.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,13 @@ def spacy_extract_all(
3232
"""
3333

3434
if ner_model == None:
35-
logger.info("Empty model passed, return 0 labels.")
36-
return []
35+
try:
36+
import en_metaextractor_spacy
37+
ner_model = en_metaextractor_spacy.load()
38+
except:
39+
logger.error(f"Spacy model en_metaextractor_spacy not found.")
40+
logger.info("Empty model passed, return 0 labels.")
41+
return []
3742

3843
entities = []
3944
doc = ner_model(text)

src/pipeline/entity_extraction_pipeline.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@
3434
load_dotenv(find_dotenv())
3535

3636
# get the MODEL_NAME from environment variables
37-
HF_NER_MODEL_NAME = os.getenv("HF_NER_MODEL_NAME", "roberta-finetuned-v3")
38-
SPACY_NER_MODEL_NAME = os.getenv("SPACY_NER_MODEL_NAME", "spacy-transformer-v3")
37+
HF_NER_MODEL_NAME = os.getenv("HF_NER_MODEL_NAME", "finding-fossils/metaextractor")
38+
SPACY_NER_MODEL_NAME = os.getenv("SPACY_NER_MODEL_NAME", "en_metaextractor_spacy")
3939
USE_NER_MODEL_TYPE = os.getenv("USE_NER_MODEL_TYPE", "huggingface")
4040

4141
logger = get_logger(__name__)
@@ -286,7 +286,7 @@ def recreate_original_sentences_with_labels(row):
286286
def extract_entities(
287287
article_text_data: pd.DataFrame,
288288
model_type: str = "huggingface",
289-
model_path: str = os.path.join("models", "ner", "roberta-finetuned-v3"),
289+
model_path: str = "finding-fossils/metaextractor",
290290
) -> pd.DataFrame:
291291
"""
292292
Extracts the entities from the article text data.
@@ -574,10 +574,10 @@ def main():
574574

575575
if USE_NER_MODEL_TYPE == "huggingface":
576576
logger.info(f"Using HuggingFace model {HF_NER_MODEL_NAME}")
577-
model_path = os.path.join("models", "ner", HF_NER_MODEL_NAME)
577+
model_path = HF_NER_MODEL_NAME
578578
elif USE_NER_MODEL_TYPE == "spacy":
579579
logger.info(f"Using Spacy model {SPACY_NER_MODEL_NAME}")
580-
model_path = os.path.join("models", "ner", SPACY_NER_MODEL_NAME)
580+
model_path = SPACY_NER_MODEL_NAME
581581
else:
582582
raise ValueError(
583583
f"Model type {USE_NER_MODEL_TYPE} not supported. Please set MODEL_TYPE to either 'huggingface' or 'spacy'."

0 commit comments

Comments
 (0)