invisible-threads/create_final_export.py at main · baboonzero/invisible-threads

125 lines (111 loc) · 5.33 KB
"""Create final thread export with manually curated names and descriptions."""
import json
from datetime import datetime
with open('insights_first/data/threads_v2_clean.json', 'r', encoding='utf-8') as f:
    data = json.load(f)
# Manual curation based on actual insight content
THREAD_NAMES = {
    "FOCUS_ON_CUSTOMER_SUCCESS": {
        "name": "Treat customers as partners, not transactions",
        "core_claim": "Companies that prioritize genuine customer relationships and invest in customer success outperform those focused on short-term sales.",
        "why_connected": "All insights emphasize personal engagement and long-term customer value over transactional relationships"
    "PROJECT_TIMELINE_RESTRICTIONS": {
        "name": "Small teams with hard deadlines ship better",
        "core_claim": "Constraining team size and imposing strict time limits forces prioritization and prevents scope creep.",
        "why_connected": "Each insight shows how constraints (time, team size, roles) improve outcomes"
    "Effective Communication Across Contexts": {
        "name": "Communication quality determines career impact",
        "core_claim": "Your ability to communicate clearly—whether giving feedback, pitching ideas, or managing up—is the primary driver of your professional effectiveness.",
        "why_connected": "All insights connect communication skills to measurable career and organizational outcomes"
    "BIG_IMPACT_GOALS": {
        "name": "Strategy is about what you won't do",
        "core_claim": "Effective product strategy requires ruthless prioritization—defining impact through what you explicitly choose not to pursue.",
        "why_connected": "Each insight emphasizes prioritization and saying no as the key to strategic clarity"
    "FOCUS_ON_HIGH_IMPACT_FEATURES": {
        "name": "Fewer features, higher quality",
        "core_claim": "Building 5 excellent features beats building 15 mediocre ones—users value depth over breadth.",
        "why_connected": "All insights advocate for quality and focus over feature quantity"
    "DATA_OVEREMPHASIS_AND_GUT_FEELING": {
        "name": "Data informs but judgment decides",
        "core_claim": "Over-reliance on quantitative data leads to poor decisions; the best leaders balance data with qualitative insight and informed intuition.",
        "why_connected": "Each insight challenges data-only decision making"
    "Mission-driven product strategy": {
        "name": "Mission clarity drives hard tradeoffs",
        "core_claim": "When mission is clear, difficult product decisions become obvious—even when they mean leaving opportunities on the table.",
        "why_connected": "All insights show mission driving non-obvious strategic choices"
    "Productivity Improvements vs. Constant Connectivity": {
        "name": "Real productivity requires disconnection",
        "core_claim": "Constant connectivity and smartphone dependence reduce rather than enhance meaningful output.",
        "why_connected": "All insights argue against always-on work culture"
threads = list(data['threads'].values())
final_threads = []
for t in threads:
    old_name = t.get('thread_name', '')
    # Find matching curation
    curation = None
    for key, value in THREAD_NAMES.items():
        if key in old_name or old_name in key:
            curation = value
            break
    if not curation:
        print(f"WARNING: No curation found for: {old_name}")
        continue
    final_thread = {
        'thread_id': len(final_threads),
        'thread_name': curation['name'],
        'core_claim': curation['core_claim'],
        'why_connected': curation['why_connected'],
        'size': t['size'],
        'num_episodes': t['num_episodes'],
        'episodes': t['episodes'],
        'category': t.get('category', 'other'),
        'coherence': t.get('coherence', 0),
        'insights': t['insights'],
    final_threads.append(final_thread)
    print(f"✓ {old_name} → \"{curation['name']}\"")
# Sort by episode coverage
final_threads.sort(key=lambda x: (x['num_episodes'], x['size']), reverse=True)
for i, t in enumerate(final_threads):
    t['thread_id'] = i
total_insights = sum(t['size'] for t in final_threads)
print(f"\n{'='*70}")
print("FINAL THREADS")
print('='*70)
for t in final_threads:
    print(f"\n{t['thread_name']}")
    print(f"  {t['size']} insights from {t['num_episodes']} episodes")
    print(f"  Core claim: {t['core_claim'][:80]}...")
    'metadata': {
        'timestamp': datetime.now().isoformat(),
        'approach': 'topic-based threading with manual curation',
        'threads_found': len(final_threads),
        'total_insights': total_insights,
        'coverage_pct': round(total_insights/465*100, 1),
        'quality_notes': [
            'All insights have novelty >= 7/10',
            'All insights deduplicated (1 per episode per thread)',
            'Thread names manually curated for clarity',
            'All threads span 3+ different episodes'
    'threads': final_threads
output_file = 'insights_first/data/threads_final.json'
with open(output_file, 'w', encoding='utf-8') as f:
    json.dump(output, f, indent=2, ensure_ascii=False)
print(f"\n{'='*70}")
print(f"Saved {len(final_threads)} threads with {total_insights} insights to:")
print(f"  {output_file}")
Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

create_final_export.py

Latest commit

History

create_final_export.py

File metadata and controls