您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

test_analyze_sources.py 7.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. """Tests for analyze_sources.py"""
  2. import json
  3. import os
  4. import tempfile
  5. from pathlib import Path
  6. from unittest.mock import patch
  7. import pytest
  8. # Add parent dir to path so we can import the script
  9. import sys
  10. sys.path.insert(0, str(Path(__file__).parent.parent))
  11. from analyze_sources import (
  12. resolve_inputs,
  13. detect_doc_type,
  14. suggest_groups,
  15. analyze,
  16. INCLUDE_EXTENSIONS,
  17. SKIP_DIRS,
  18. )
  19. @pytest.fixture
  20. def temp_dir():
  21. """Create a temp directory with sample files."""
  22. with tempfile.TemporaryDirectory() as d:
  23. # Create sample files
  24. (Path(d) / "product-brief-foo.md").write_text("# Product Brief\nContent here")
  25. (Path(d) / "product-brief-foo-discovery-notes.md").write_text("# Discovery\nNotes")
  26. (Path(d) / "architecture-doc.md").write_text("# Architecture\nDesign here")
  27. (Path(d) / "research-report.md").write_text("# Research\nFindings")
  28. (Path(d) / "random.txt").write_text("Some text content")
  29. (Path(d) / "image.png").write_bytes(b"\x89PNG")
  30. # Create a subdirectory with more files
  31. sub = Path(d) / "subdir"
  32. sub.mkdir()
  33. (sub / "prd-v2.md").write_text("# PRD\nRequirements")
  34. # Create a skip directory
  35. skip = Path(d) / "node_modules"
  36. skip.mkdir()
  37. (skip / "junk.md").write_text("Should be skipped")
  38. yield d
  39. class TestResolveInputs:
  40. def test_single_file(self, temp_dir):
  41. f = str(Path(temp_dir) / "product-brief-foo.md")
  42. result = resolve_inputs([f])
  43. assert len(result) == 1
  44. assert result[0].name == "product-brief-foo.md"
  45. def test_folder_recursion(self, temp_dir):
  46. result = resolve_inputs([temp_dir])
  47. names = {f.name for f in result}
  48. assert "product-brief-foo.md" in names
  49. assert "prd-v2.md" in names
  50. assert "random.txt" in names
  51. def test_folder_skips_excluded_dirs(self, temp_dir):
  52. result = resolve_inputs([temp_dir])
  53. names = {f.name for f in result}
  54. assert "junk.md" not in names
  55. def test_folder_skips_non_text_files(self, temp_dir):
  56. result = resolve_inputs([temp_dir])
  57. names = {f.name for f in result}
  58. assert "image.png" not in names
  59. def test_glob_pattern(self, temp_dir):
  60. pattern = str(Path(temp_dir) / "product-brief-*.md")
  61. result = resolve_inputs([pattern])
  62. assert len(result) == 2
  63. names = {f.name for f in result}
  64. assert "product-brief-foo.md" in names
  65. assert "product-brief-foo-discovery-notes.md" in names
  66. def test_deduplication(self, temp_dir):
  67. f = str(Path(temp_dir) / "product-brief-foo.md")
  68. result = resolve_inputs([f, f, f])
  69. assert len(result) == 1
  70. def test_mixed_inputs(self, temp_dir):
  71. file_path = str(Path(temp_dir) / "architecture-doc.md")
  72. folder_path = str(Path(temp_dir) / "subdir")
  73. result = resolve_inputs([file_path, folder_path])
  74. names = {f.name for f in result}
  75. assert "architecture-doc.md" in names
  76. assert "prd-v2.md" in names
  77. def test_nonexistent_path(self):
  78. result = resolve_inputs(["/nonexistent/path/file.md"])
  79. assert len(result) == 0
  80. class TestDetectDocType:
  81. @pytest.mark.parametrize("filename,expected", [
  82. ("product-brief-foo.md", "product-brief"),
  83. ("product_brief_bar.md", "product-brief"),
  84. ("foo-discovery-notes.md", "discovery-notes"),
  85. ("foo-discovery_notes.md", "discovery-notes"),
  86. ("architecture-overview.md", "architecture-doc"),
  87. ("my-prd.md", "prd"),
  88. ("research-report-q4.md", "research-report"),
  89. ("foo-distillate.md", "distillate"),
  90. ("changelog.md", "changelog"),
  91. ("readme.md", "readme"),
  92. ("api-spec.md", "specification"),
  93. ("design-doc-v2.md", "design-doc"),
  94. ("meeting-notes-2026.md", "meeting-notes"),
  95. ("brainstorm-session.md", "brainstorming"),
  96. ("user-interview-notes.md", "interview-notes"),
  97. ("random-file.md", "unknown"),
  98. ])
  99. def test_detection(self, filename, expected):
  100. assert detect_doc_type(filename) == expected
  101. class TestSuggestGroups:
  102. def test_groups_brief_with_discovery_notes(self, temp_dir):
  103. files = [
  104. Path(temp_dir) / "product-brief-foo.md",
  105. Path(temp_dir) / "product-brief-foo-discovery-notes.md",
  106. ]
  107. groups = suggest_groups(files)
  108. # Should produce one group with both files
  109. paired = [g for g in groups if len(g["files"]) > 1]
  110. assert len(paired) == 1
  111. filenames = {f["filename"] for f in paired[0]["files"]}
  112. assert "product-brief-foo.md" in filenames
  113. assert "product-brief-foo-discovery-notes.md" in filenames
  114. def test_standalone_files(self, temp_dir):
  115. files = [
  116. Path(temp_dir) / "architecture-doc.md",
  117. Path(temp_dir) / "research-report.md",
  118. ]
  119. groups = suggest_groups(files)
  120. assert len(groups) == 2
  121. for g in groups:
  122. assert len(g["files"]) == 1
  123. def test_mixed_grouped_and_standalone(self, temp_dir):
  124. files = [
  125. Path(temp_dir) / "product-brief-foo.md",
  126. Path(temp_dir) / "product-brief-foo-discovery-notes.md",
  127. Path(temp_dir) / "architecture-doc.md",
  128. ]
  129. groups = suggest_groups(files)
  130. paired = [g for g in groups if len(g["files"]) > 1]
  131. standalone = [g for g in groups if len(g["files"]) == 1]
  132. assert len(paired) == 1
  133. assert len(standalone) == 1
  134. class TestAnalyze:
  135. def test_basic_analysis(self, temp_dir):
  136. f = str(Path(temp_dir) / "product-brief-foo.md")
  137. output_file = str(Path(temp_dir) / "output.json")
  138. analyze([f], output_file)
  139. result = json.loads(Path(output_file).read_text())
  140. assert result["status"] == "ok"
  141. assert result["summary"]["total_files"] == 1
  142. assert result["files"][0]["doc_type"] == "product-brief"
  143. assert result["files"][0]["estimated_tokens"] > 0
  144. def test_routing_single_small_input(self, temp_dir):
  145. f = str(Path(temp_dir) / "product-brief-foo.md")
  146. output_file = str(Path(temp_dir) / "output.json")
  147. analyze([f], output_file)
  148. result = json.loads(Path(output_file).read_text())
  149. assert result["routing"]["recommendation"] == "single"
  150. def test_routing_fanout_many_files(self, temp_dir):
  151. # Create enough files to trigger fan-out (> 3 files)
  152. for i in range(5):
  153. (Path(temp_dir) / f"doc-{i}.md").write_text("x" * 1000)
  154. output_file = str(Path(temp_dir) / "output.json")
  155. analyze([temp_dir], output_file)
  156. result = json.loads(Path(output_file).read_text())
  157. assert result["routing"]["recommendation"] == "fan-out"
  158. def test_folder_analysis(self, temp_dir):
  159. output_file = str(Path(temp_dir) / "output.json")
  160. analyze([temp_dir], output_file)
  161. result = json.loads(Path(output_file).read_text())
  162. assert result["status"] == "ok"
  163. assert result["summary"]["total_files"] >= 4 # at least the base files
  164. assert len(result["groups"]) > 0
  165. def test_no_files_found(self):
  166. output_file = "/tmp/test_analyze_empty.json"
  167. analyze(["/nonexistent/path"], output_file)
  168. result = json.loads(Path(output_file).read_text())
  169. assert result["status"] == "error"
  170. os.unlink(output_file)
  171. def test_stdout_output(self, temp_dir, capsys):
  172. f = str(Path(temp_dir) / "product-brief-foo.md")
  173. analyze([f])
  174. captured = capsys.readouterr()
  175. result = json.loads(captured.out)
  176. assert result["status"] == "ok"