Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 53 additions & 1 deletion dataikuapi/iac/workflows/discovery/catalog_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def _generate_navigation_menu(self, metadata: EnhancedBlockMetadata) -> str:
- [Dependencies](#dependencies)
- [Usage](#usage)
- [Flow Diagram](#flow-diagram)
- [Technical Details](#technical-details) _(Coming soon)_
- [Technical Details](#technical-details)
"""

def _generate_datasets_section(self, datasets: List[DatasetDetail]) -> str:
Expand Down Expand Up @@ -401,6 +401,51 @@ def _generate_flow_diagram(self, flow_graph: Optional[Dict[str, Any]]) -> str:

return "\n".join(mermaid_lines) + "\n"

def _generate_technical_details(self, metadata: EnhancedBlockMetadata) -> str:
"""
Generate Technical Details section with dataset schemas.

Creates a collapsible section for each dataset showing:
- Dataset name and schema column summary
- Column sample as markdown table
- Link to full schema JSON file in Library

Args:
metadata: EnhancedBlockMetadata with dataset_details

Returns:
Formatted markdown string with schema details, or empty string if no datasets

Example:
>>> writer = CatalogWriter()
>>> ds = DatasetDetail(name="DS1", schema_summary={"sample": ["col1", "col2"]})
>>> meta = EnhancedBlockMetadata(block_id="BLK", dataset_details=[ds])
>>> section = writer._generate_technical_details(meta)
>>> print("Schema: DS1" in section)
True
"""
# Early return if no datasets
if not metadata.dataset_details:
return ""

md = "## Technical Details\n\n"
md += "### Dataset Schemas\n\n"

for ds in metadata.dataset_details:
md += f"<details>\n<summary>Schema: {ds.name}</summary>\n\n"
md += "| Column | Type |\n|---|---|\n"

# Extract sample columns from schema_summary
sample_columns = ds.schema_summary.get("sample", [])
for col in sample_columns:
md += f"| {col} | - |\n"

# Add link to full schema JSON file
md += f"\n[Download Full Schema (JSON)](schemas/{metadata.block_id}_{ds.name}.schema.json)\n"
md += "</details>\n\n"

return md

def generate_wiki_article(self, metadata: BlockMetadata) -> str:
"""
Generate wiki article from block metadata.
Expand Down Expand Up @@ -489,6 +534,13 @@ def generate_wiki_article(self, metadata: BlockMetadata) -> str:
sections.append(self._generate_flow_diagram(metadata.flow_graph))
sections.append("")

# 5.6 Technical Details (if EnhancedBlockMetadata)
if isinstance(metadata, EnhancedBlockMetadata):
tech_details = self._generate_technical_details(metadata)
if tech_details:
sections.append(tech_details)
sections.append("")

# 6. Contains Section
sections.append("## Contains")
sections.append("")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -304,9 +304,9 @@ def test_generate_navigation_menu_basic(self):
assert "Datasets (2)" in nav
assert "Recipes (1)" in nav

# Verify future sections
# Verify all sections
assert "[Flow Diagram](#flow-diagram)" in nav
assert "_(Coming soon)_" in nav
assert "[Technical Details](#technical-details)" in nav

def test_generate_navigation_menu_zero_items(self):
"""Test navigation menu with no datasets or recipes."""
Expand Down Expand Up @@ -423,5 +423,6 @@ def test_navigation_menu_no_coming_soon(self):
)
assert "- [Flow Diagram](#flow-diagram) _(Coming soon)_" not in nav

# Technical Details should still have "Coming soon" (future phase)
assert "- [Technical Details](#technical-details) _(Coming soon)_" in nav
# Technical Details should not have "Coming soon" (implemented in P9-F001)
assert "- [Technical Details](#technical-details)\n" in nav
assert "- [Technical Details](#technical-details) _(Coming soon)_" not in nav
194 changes: 194 additions & 0 deletions tests/iac/workflows/discovery/unit/test_catalog_writer_tech.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
"""Unit tests for CatalogWriter technical details generation (Phase 9)."""


class TestGenerateTechnicalDetailsSection:
"""Tests for _generate_technical_details method (P9-F001)."""

def test_generate_technical_details_basic(self):
"""Test basic technical details section generation."""
from dataikuapi.iac.workflows.discovery.catalog_writer import CatalogWriter
from dataikuapi.iac.workflows.discovery.models import (
DatasetDetail,
EnhancedBlockMetadata,
)

writer = CatalogWriter()
ds = DatasetDetail(
name="DS1",
type="S3",
connection="",
format_type="",
schema_summary={"sample": ["col1", "col2"]},
)
meta = EnhancedBlockMetadata(
block_id="BLK",
version="1.0.0",
type="zone",
source_project="TEST",
dataset_details=[ds],
)

section = writer._generate_technical_details(meta)

assert "## Technical Details" in section
assert "### Dataset Schemas" in section
assert "Schema: DS1" in section
assert "| col1 |" in section
assert "| col2 |" in section
assert "BLK_DS1.schema.json" in section
assert "<details>" in section
assert "</details>" in section

def test_generate_technical_details_empty(self):
"""Test technical details with no datasets."""
from dataikuapi.iac.workflows.discovery.catalog_writer import CatalogWriter
from dataikuapi.iac.workflows.discovery.models import EnhancedBlockMetadata

writer = CatalogWriter()
meta = EnhancedBlockMetadata(
block_id="BLK",
version="1.0.0",
type="zone",
source_project="TEST",
dataset_details=[],
)

section = writer._generate_technical_details(meta)

assert section == ""

def test_generate_technical_details_no_sample(self):
"""Test technical details when schema has no sample columns."""
from dataikuapi.iac.workflows.discovery.catalog_writer import CatalogWriter
from dataikuapi.iac.workflows.discovery.models import (
DatasetDetail,
EnhancedBlockMetadata,
)

writer = CatalogWriter()
ds = DatasetDetail(
name="DS_NO_SAMPLE",
type="PostgreSQL",
connection="",
format_type="",
schema_summary={}, # No "sample" key
)
meta = EnhancedBlockMetadata(
block_id="BLK",
version="1.0.0",
type="zone",
source_project="TEST",
dataset_details=[ds],
)

section = writer._generate_technical_details(meta)

assert "Schema: DS_NO_SAMPLE" in section
assert "BLK_DS_NO_SAMPLE.schema.json" in section
assert "| Column | Type |" in section

def test_generate_technical_details_multiple_datasets(self):
"""Test technical details with multiple datasets."""
from dataikuapi.iac.workflows.discovery.catalog_writer import CatalogWriter
from dataikuapi.iac.workflows.discovery.models import (
DatasetDetail,
EnhancedBlockMetadata,
)

writer = CatalogWriter()
datasets = [
DatasetDetail(
name="INPUTS",
type="S3",
connection="",
format_type="",
schema_summary={"sample": ["id", "name"]},
),
DatasetDetail(
name="OUTPUTS",
type="Snowflake",
connection="",
format_type="",
schema_summary={"sample": ["id", "prediction"]},
),
]
meta = EnhancedBlockMetadata(
block_id="ML_BLOCK",
version="1.0.0",
type="zone",
source_project="TEST",
dataset_details=datasets,
)

section = writer._generate_technical_details(meta)

assert "Schema: INPUTS" in section
assert "Schema: OUTPUTS" in section
assert "| id |" in section
assert "| name |" in section
assert "| prediction |" in section
assert "ML_BLOCK_INPUTS.schema.json" in section
assert "ML_BLOCK_OUTPUTS.schema.json" in section

def test_generate_technical_details_special_characters(self):
"""Test handling of special characters in column names."""
from dataikuapi.iac.workflows.discovery.catalog_writer import CatalogWriter
from dataikuapi.iac.workflows.discovery.models import (
DatasetDetail,
EnhancedBlockMetadata,
)

writer = CatalogWriter()
ds = DatasetDetail(
name="SPECIAL",
type="S3",
connection="",
format_type="",
schema_summary={"sample": ["col_with_underscore", "col-with-dash"]},
)
meta = EnhancedBlockMetadata(
block_id="BLK",
version="1.0.0",
type="zone",
source_project="TEST",
dataset_details=[ds],
)

section = writer._generate_technical_details(meta)

assert "col_with_underscore" in section
assert "col-with-dash" in section

def test_generate_technical_details_formatting(self):
"""Test markdown formatting is correct."""
from dataikuapi.iac.workflows.discovery.catalog_writer import CatalogWriter
from dataikuapi.iac.workflows.discovery.models import (
DatasetDetail,
EnhancedBlockMetadata,
)

writer = CatalogWriter()
ds = DatasetDetail(
name="FMT_TEST",
type="S3",
connection="",
format_type="",
schema_summary={"sample": ["col1"]},
)
meta = EnhancedBlockMetadata(
block_id="BLK",
version="1.0.0",
type="zone",
source_project="TEST",
dataset_details=[ds],
)

section = writer._generate_technical_details(meta)

# Verify markdown structure
assert section.startswith("## Technical Details\n")
assert "<details>\n<summary>" in section
assert "</summary>\n" in section
assert "| Column | Type |" in section
assert "[Download Full Schema (JSON)]" in section
assert section.endswith("</details>\n\n")
Loading