-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrequirements.txt
More file actions
41 lines (34 loc) · 1.06 KB
/
requirements.txt
File metadata and controls
41 lines (34 loc) · 1.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# Core data collection dependencies
requests==2.32.5
tqdm==4.67.1
curl_cffi==0.13.0
# Data processing dependencies
pandas==2.3.3
numpy==2.3.5
# Text processing and analysis
ftfy==6.3.1
regex==2025.11.3
langid==1.1.6
beautifulsoup4==4.14.2
lxml==6.0.2
nltk==3.9.2
spacy==3.8.11
en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl#sha256=1932429db727d4bff3deed6b34cfc05df17794f4a52eeb26cf8928f7c1a0fb85
en_core_web_lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0-py3-none-any.whl#sha256=293e9547a655b25499198ab15a525b05b9407a75f10255e405e8c3854329ab63
# NLP and embeddings
sentence-transformers==5.2.0
transformers==4.57.3
torch==2.9.1
torchvision==0.24.1
# Scientific computing and visualization
scipy==1.16.3
scikit-learn==1.8.0
matplotlib==3.10.7
seaborn==0.13.2
# Topic modeling
turftopic==0.23.1
# PII anonymization (optional)
presidio_analyzer==2.2.360
presidio_anonymizer==2.2.360
# Environment and utilities
python-dotenv==1.2.1