Source code for hbllmutils.meta.code.pydoc_generation

"""
Module for generating Python documentation (pydoc) using LLM-based code analysis.

This module provides functionality to create specialized LLM tasks for generating
comprehensive Python documentation in reStructuredText format. It leverages detailed
source code analysis to produce high-quality docstrings that include:

- Functional analysis for modules, classes, methods, and functions
- Parameter descriptions with type annotations
- Return value documentation
- Exception documentation
- Usage examples

The generated documentation follows reStructuredText conventions and can be used
directly in Python source files for tools like Sphinx.

The module contains the following main components:

* :func:`create_pydoc_generation_task` - Factory function for creating pydoc generation tasks

.. note::
   This module requires access to the source file system to analyze code structure
   and dependencies. Ensure proper file permissions when processing source files.

.. warning::
   The generated documentation quality depends on the LLM model capabilities.
   More advanced models typically produce better structured and more accurate
   documentation.

Example::

    >>> from hbllmutils.model import load_llm_model
    >>> from hbllmutils.meta.code.pydoc_generation import create_pydoc_generation_task
    >>> 
    >>> # Create a pydoc generation task with GPT-4
    >>> model = load_llm_model('gpt-4')
    >>> task = create_pydoc_generation_task(model, show_module_directory_tree=True)
    >>> 
    >>> # Generate documentation for a Python file
    >>> documented_code = task.ask_then_parse(input_content='mymodule.py')
    >>> print(documented_code)
    # Output will contain original code with comprehensive pydoc
    >>> 
    >>> # Save the documented code back to file
    >>> with open('mymodule.py', 'w') as f:
    ...     f.write(documented_code)

"""

import os
from typing import Optional, Iterable

from .task import PythonDetailedCodeGenerationLLMTask, PythonCodeGenerationLLMTask
from ...history import LLMHistory
from ...model import LLMModelTyping, load_llm_model
from ...template import PromptTemplate


[docs] def create_pydoc_generation_task( model: LLMModelTyping, show_module_directory_tree: bool = False, skip_when_error: bool = True, force_ast_check: bool = True, ignore_modules: Optional[Iterable[str]] = None, no_ignore_modules: Optional[Iterable[str]] = None ) -> PythonCodeGenerationLLMTask: """ Create an LLM task for generating Python documentation (pydoc) in reStructuredText format. This function creates a specialized code generation task that analyzes Python source files and generates comprehensive documentation including: - Module-level docstrings with functional descriptions and component listings - Class docstrings with attribute and inheritance information - Method and function docstrings in reStructuredText format - Parameter and return value documentation with type hints - Exception documentation with descriptions - Usage examples demonstrating typical use cases The task uses a predefined system prompt template (pydoc_generation.md) that instructs the LLM on documentation requirements and formatting conventions. The prompt template provides detailed guidelines for: - reStructuredText syntax and formatting standards - Documentation structure for different code elements - Cross-referencing conventions using Sphinx directives - Type annotation standards - Example code formatting The generated task performs comprehensive source code analysis including: - Full source file content extraction - Package namespace and file location identification - Optional module directory tree visualization - Dependency analysis showing all imports and their implementations - AST-based syntax validation of generated code :param model: The LLM model to use for documentation generation. Can be: - A string representing the model name (e.g., 'gpt-4', 'claude-2') - An LLMModel instance for direct use - None to use the default configured model :type model: LLMModelTyping :param show_module_directory_tree: If True, includes the module's directory tree structure in the analysis prompt to provide additional context about the file's location within the project hierarchy. This helps the LLM understand the module's organizational context. Defaults to False. :type show_module_directory_tree: bool :param skip_when_error: If True, skips over imports that fail to load during dependency analysis and issues warnings instead of raising exceptions. This allows documentation generation to proceed even when some dependencies are unavailable. Defaults to True. :type skip_when_error: bool :param force_ast_check: If True, validates the generated code using Python's AST parser to ensure syntactic correctness. The task will retry generation if validation fails. Defaults to True. :type force_ast_check: bool :param ignore_modules: Optional iterable of module names that should be explicitly ignored during dependency analysis regardless of download count or other criteria. :type ignore_modules: Optional[Iterable[str]] :param no_ignore_modules: Optional iterable of module names that should never be ignored during dependency analysis regardless of download count or other filtering criteria. :type no_ignore_modules: Optional[Iterable[str]] :return: A configured LLM task ready to generate Python documentation for source files. The task can be used with the ask_then_parse() method to process Python files and return documented code. :rtype: PythonCodeGenerationLLMTask :raises FileNotFoundError: If the system prompt template file (pydoc_generation.md) is not found in the module directory. :raises ValueError: If the model specification is invalid or cannot be loaded. :raises TypeError: If model parameter is not of type LLMModelTyping. .. note:: The generated documentation quality and style depend on the capabilities of the selected LLM model. More advanced models (e.g., GPT-4) typically produce more accurate and well-structured documentation. .. warning:: For large modules with many dependencies, the analysis prompt can become very long and may exceed token limits for some LLM models. Consider the model's context window when using this function with complex codebases. Example:: >>> from hbllmutils.model import load_llm_model >>> from hbllmutils.meta.code.pydoc_generation import create_pydoc_generation_task >>> >>> # Create a basic pydoc generation task >>> model = load_llm_model('gpt-4') >>> task = create_pydoc_generation_task(model) >>> >>> # Generate documentation for a Python file >>> documented_code = task.ask_then_parse(input_content='mypackage/module.py') >>> >>> # Save the documented code >>> with open('mypackage/module.py', 'w') as f: ... f.write(documented_code) >>> >>> # Create a task with directory tree visualization >>> task = create_pydoc_generation_task( ... model=model, ... show_module_directory_tree=True, ... skip_when_error=False ... ) >>> >>> # Process multiple files in a batch >>> files = ['module1.py', 'module2.py', 'module3.py'] >>> for file in files: ... try: ... documented = task.ask_then_parse(input_content=file) ... with open(file, 'w') as f: ... f.write(documented) ... print(f"Successfully documented {file}") ... except Exception as e: ... print(f"Failed to document {file}: {e}") >>> >>> # Use with default model from configuration >>> task = create_pydoc_generation_task( ... model=None, ... show_module_directory_tree=True ... ) >>> documented = task.ask_then_parse(input_content='utils.py') """ system_prompt_file = os.path.join(os.path.dirname(__file__), 'pydoc_generation.md') system_prompt_template = PromptTemplate.from_file(system_prompt_file) system_prompt = system_prompt_template.render() return PythonDetailedCodeGenerationLLMTask( model=load_llm_model(model), code_name='Code For Task', description_text='This is the source code for you to generate new code with pydoc', history=LLMHistory().with_system_prompt(system_prompt), show_module_directory_tree=show_module_directory_tree, skip_when_error=skip_when_error, force_ast_check=force_ast_check, ignore_modules=ignore_modules, no_ignore_modules=no_ignore_modules, )