data-engineering/pyproject.toml at main · NYCPlanning/data-engineering

215 lines (191 loc) · 5.26 KB
[tool.sqlfluff.core]
verbose = 1
dialect = "postgres"
templater = "placeholder"
# Comma separated list of rules to exclude, or None
exclude_rules = [
    "ambiguous.column_count",
    "references.qualification",
    "structure.unused_cte",
    "structure.column_order",
    "structure.subquery",
    "references.keywords",
    "references.consistent",
    "references.special_chars",
    "layout.cte_newline",
# The standard max_line_length is 80 in line with the convention of
# other tools and several style guides. Many projects however prefer
# something a little longer.
# Set to zero or negative to disable checks.
max_line_length = 120
# CPU processes to use while linting.
# The default is "single threaded" to allow easy debugging, but this
# is often undesirable at scale.
# If positive, just implies number of processes.
# If negative or zero, implies number_of_cpus - specified_number.
# e.g. -1 means use all processors but one. 0 means all cpus.
processes = -1
large_file_skip_byte_limit = 40000
# Maximum parse depth (grammar + bracket nesting). Prevents DoS from deeply nested SQL. Set to 0 or empty to disable. Default 255 (Oracle WHERE subquery limit).
max_parse_depth = 302 # this limit is driven by products/developments/sql/_now.sql
[tool.sqlfluff.templater.placeholder]
param_style = "colon_optional_quotes"
TABLE = "dummy_table_name"
VERSION = "2023-01-01"
VERSION_PREV = "2022-01-01"
CONDO = "true"
MAPPED = "true"
CONDITION = "WHERE a.geom IS NOT NULL"
GEOM = "geom_2263"
ccp_v = "fisa_2000"
build_schema = "public"
CAPTURE_DATE = "2024-01-01"
CAPTURE_DATE_PREV = "2023-07-01"
internal_columns = '"Job_Number","Job_Type"'
external_columns = '"Job_Number","Job_Type"'
[tool.sqlfluff.templater.jinja.context]
source_column = "bct2020"
output_column = "bct2020"
additional_column_mappings = '[("geoid", "centract2020")]'
group_by = '["boro", "bct2020", "centract2020"]'
join_table = "dcp_ct2020"
geom_join_column = "boroct2020"
years = "['2010', '2011']"
decade = "2020"
geom = "CD"
CAPTURE_DATE = "2024-01-01"
geography_type = "anything"
geography_name = "anything"
geography_id = "anything"
table_name = "anything"
[tool.sqlfluff.rules.layout.long_lines]
ignore_comment_lines = true
ignore_comment_clauses = true
[tool.sqlfluff.rules.capitalisation.keywords]
capitalisation_policy = "upper"
[tool.sqlfluff.templater.dbt]
dialect = "postgres"
[tool.pydocstyle]
ignore = ["D100", "D101", "D102", "D107", "D104", "D213", "D407", "D413"]
[tool.ruff.lint]
    # Default rulesets
    "E4",   # Subset of pycodestyle
    "F",    # Pyflakes
    # Additional rulesets
    "I",    # Sort imports properly
[tool.ruff.lint.per-file-ignores]
"__init__.py" = ["F401", "E402"]
"conftest.py" = ["E402"]
"setup_dev_bucket.py" = ["E402"]
[[tool.mypy.overrides]]
module = "geopandas.*,geosupport.*,cerberus.*,osgeo.*,diagrams.*,usaddress.*,plotly.*,folium.*,streamlit_folium.*,st_aggrid.*,numerize.*,moto.*,matplotlib.*,contextily,leafmap.*,shapely,socrata.*,faker.*,ruamel.*,pyogrio.*,ijson.*" # no stubs available
ignore_missing_imports = true
[tool.pytest.ini_options]
xfail_strict = true
addopts = "--strict-markers"
markers = ["end_to_end"]
[tool.coverage.run]
# Measure branch coverage (https://coverage.readthedocs.io/en/latest/branch.html)
branch = true
omit = ["dcpy/test/*", "dcpy/test_integration/*"]
[tool.coverage.report]
include_namespace_packages = true
show_missing = true
# Regexes for lines to exclude from consideration
exclude_lines = [
    # Have to re-enable the standard pragma
    "pragma: no cover",
    # Don't complain if non-runnable code isn't run:
    "if __name__ == .__main__.:",
    "@app.command",
[build-system]
requires = ["setuptools", "setuptools-scm"]
build-backend = "setuptools.build_meta"
name = "dcpy"
version = "0.0.1"
description = "DCP Data Engineering's internal python package"
requires-python = ">=3.11"
dependencies = [
    "beautifulsoup4",
    "boto3",
    "cloudpathlib[all]",
    "duckdb",
    "faker",
    "geoalchemy2",
    "geopandas",
    "ijson",
    "simplejson",
    "Jinja2",
    "leafmap",
    "lxml",
    "openpyxl",
    "pandas",
    "psycopg2-binary",
    "pyarrow",
    "pydantic",
    "pydantic-xml",
    "pyogrio",
    "pytest-xdist",
    "paramiko",
    "python-dateutil",
    "python-dotenv",
    "pytz",
    "PyYAML",
    "requests",
    "rich",
    "shapely",
    "socrata-py",
    "sqlalchemy",
    "tabulate",
    "typer",
    "urllib3",
    "usaddress",
    "xlrd",
[project.optional-dependencies]
# Core utilities - database, s3, data processing (no GDAL)
    "boto3",
    "cloudpathlib[all]",
    "duckdb",
    "pandas",
    "psycopg2-binary",
    "pyarrow",
    "python-dateutil",
    "pytz",
    "sqlalchemy",
    "tabulate",
    "urllib3",
    "paramiko",
# Product metadata - standalone metadata reading/writing
product-metadata = [
    "dcpy[utils]",
    "pydantic",
    "PyYAML",
    "openpyxl",
    "beautifulsoup4",
    "css-inline",
    "lxml",
    "Jinja2",
[tool.setuptools.packages.find]
include = ["dcpy*"]
exclude = ["dcpy.test*"]
[project.scripts]
library = "dcpy.library.cli:run"
dcpy = "dcpy.__main__:cli"
[tool.setuptools.package-data]
    "library/templates/*.yml",
    "lifecycle/package/resources/**",
    "product_metadata/writers/resources/**",
    "connectors/edm/bytes/site_map.json",
Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

pyproject.toml

Latest commit

History

pyproject.toml

File metadata and controls