import pytest import unittest from unittest.mock import patch from loader_hub.confluence.base import ConfluenceReader, Document @pytest.fixture def mock_confluence(): with patch("atlassian.Confluence") as mock_confluence: yield mock_confluence CONFLUENCE_BASE_URL = "https://example.atlassian.com/wiki" MOCK_OAUTH = { "client_id": "your_client_id", "token": { "access_token": "your_access_token", "token_type": "Bearer", }, } class TestConfluenceReader: def test_confluence_reader_initialization(self, mock_confluence): # Test with oauth2 ConfluenceReader(base_url=CONFLUENCE_BASE_URL, oauth2=MOCK_OAUTH) mock_confluence.assert_called_once_with(url=CONFLUENCE_BASE_URL, oauth2=MOCK_OAUTH, cloud=True) # Test without oauth2 with unittest.mock.patch.dict("os.environ", {"CONFLUENCE_USERNAME": "user", "CONFLUENCE_API_TOKEN": "api_token"}): ConfluenceReader(base_url=CONFLUENCE_BASE_URL) mock_confluence.assert_called_with(url=CONFLUENCE_BASE_URL, username="user", password="api_token", cloud=True) def test_confluence_reader_load_data_invalid_args(self, mock_confluence): confluence_reader = ConfluenceReader(base_url=CONFLUENCE_BASE_URL, oauth2=MOCK_OAUTH) confluence_reader.confluence = mock_confluence with pytest.raises(ValueError, match="Must specify at least one among `space_key`, `page_ids`, `label`, `cql` parameters."): confluence_reader.load_data() def test_confluence_reader_load_data_by_page_ids(self, mock_confluence): mock_confluence.get_page_by_id.side_effect = [ {'id': '123', 'title': 'Page 123', 'body': {'storage': {'value': '

Content 123

'}}}, {'id': '456', 'title': 'Page 456', 'body': {'storage': {'value': '

Content 456

'}}} ] confluence_reader = ConfluenceReader(base_url=CONFLUENCE_BASE_URL, oauth2=MOCK_OAUTH) confluence_reader.confluence = mock_confluence mock_page_ids = ['123', '456'] documents = confluence_reader.load_data(page_ids=mock_page_ids) assert len(documents) == 2 assert all(isinstance(doc, Document) for doc in documents) assert documents[0].doc_id == "123" assert documents[0].extra_info == { "title": "Page 123" } assert documents[1].doc_id == "456" assert documents[1].extra_info == { "title": "Page 456" } assert mock_confluence.get_page_by_id.call_count == 2 assert mock_confluence.get_all_pages_from_space.call_count == 0 assert mock_confluence.get_all_pages_by_label.call_count == 0 assert mock_confluence.cql.call_count == 0 assert mock_confluence.get_page_child_by_type.call_count == 0 def test_confluence_reader_load_data_by_space_id(self, mock_confluence): # one response with two pages mock_confluence.get_all_pages_from_space.return_value = [ { 'id': '123', 'type': 'page', 'status': 'current', 'title': 'Page 123', 'body': {'storage': {'value': '

Content 123

'}} }, { 'id': '456', 'type': 'page', 'status': 'current', 'title': 'Page 456', 'body': {'storage': {'value': '

Content 456

'}} } ] confluence_reader = ConfluenceReader(base_url=CONFLUENCE_BASE_URL, oauth2=MOCK_OAUTH) confluence_reader.confluence = mock_confluence mock_space_key = 'spaceId123' documents = confluence_reader.load_data(space_key=mock_space_key) assert mock_confluence.get_all_pages_from_space.call_count == 1 assert mock_confluence.get_all_pages_from_space.call_args[0][0] == 'spaceId123' assert mock_confluence.get_all_pages_from_space.call_args[1]['start'] == 0 assert mock_confluence.get_all_pages_from_space.call_args[1]['limit'] == 50 assert len(documents) == 2 assert all(isinstance(doc, Document) for doc in documents) assert documents[0].doc_id == "123" assert documents[0].extra_info == { "title": "Page 123" } assert documents[1].doc_id == "456" assert documents[1].extra_info == { "title": "Page 456" } assert mock_confluence.get_page_by_id.call_count == 0 assert mock_confluence.get_all_pages_by_label.call_count == 0 assert mock_confluence.cql.call_count == 0 assert mock_confluence.get_page_child_by_type.call_count == 0 def test_confluence_reader_load_data_by_space_id_pagination(self, mock_confluence): # two api responses with one page each mock_confluence.get_all_pages_from_space.side_effect = [ [ { 'id': '123', 'type': 'page', 'status': 'current', 'title': 'Page 123', 'body': {'storage': {'value': '

Content 123

'}} }, ], [ { 'id': '456', 'type': 'page', 'status': 'current', 'title': 'Page 456', 'body': {'storage': {'value': '

Content 456

'}} } ], [] ] confluence_reader = ConfluenceReader(base_url=CONFLUENCE_BASE_URL, oauth2=MOCK_OAUTH) confluence_reader.confluence = mock_confluence mock_space_key = 'spaceId123' mock_limit = 1 # fetch one page at a time documents = confluence_reader.load_data(space_key=mock_space_key, limit=mock_limit) assert mock_confluence.get_all_pages_from_space.call_count == 3 assert len(documents) == 2 assert all(isinstance(doc, Document) for doc in documents) assert documents[0].doc_id == "123" assert documents[0].extra_info == { "title": "Page 123" } assert documents[1].doc_id == "456" assert documents[1].extra_info == { "title": "Page 456" } assert mock_confluence.get_page_by_id.call_count == 0 assert mock_confluence.get_all_pages_by_label.call_count == 0 assert mock_confluence.cql.call_count == 0 assert mock_confluence.get_page_child_by_type.call_count == 0