diff --git a/.github/workflows/gh-pages.yml b/.github/workflows/gh-pages.yml index 592e9635..19f98567 100644 --- a/.github/workflows/gh-pages.yml +++ b/.github/workflows/gh-pages.yml @@ -17,29 +17,6 @@ jobs: env: GH_PAGES: 1 DEBUG: 1 - GRAPHRAG_LLM_TYPE: "azure_openai_chat" - GRAPHRAG_EMBEDDING_TYPE: "azure_openai_embedding" - GRAPHRAG_API_KEY: ${{ secrets.OPENAI_API_KEY }} - GRAPHRAG_API_BASE: ${{ secrets.GRAPHRAG_API_BASE }} - GRAPHRAG_API_VERSION: ${{ secrets.GRAPHRAG_API_VERSION }} - GRAPHRAG_LLM_DEPLOYMENT_NAME: ${{ secrets.GRAPHRAG_LLM_DEPLOYMENT_NAME }} - GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME: ${{ secrets.GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME }} - GRAPHRAG_CACHE_TYPE: "blob" - GRAPHRAG_CACHE_CONNECTION_STRING: ${{ secrets.BLOB_STORAGE_CONNECTION_STRING }} - GRAPHRAG_CACHE_CONTAINER_NAME: "cicache" - GRAPHRAG_CACHE_BASE_DIR": "cache" - GRAPHRAG_LLM_MODEL: gpt-3.5-turbo-16k - GRAPHRAG_EMBEDDING_MODEL: text-embedding-ada-002 - # We have Windows + Linux runners in 3.10 and 3.11, so we need to divide the rate limits by 4 - GRAPHRAG_LLM_TPM: 45_000 # 180,000 / 4 - GRAPHRAG_LLM_RPM: 270 # 1,080 / 4 - GRAPHRAG_EMBEDDING_TPM: 87_500 # 350,000 / 4 - GRAPHRAG_EMBEDDING_RPM: 525 # 2,100 / 4 - GRAPHRAG_CHUNK_SIZE: 1200 - GRAPHRAG_CHUNK_OVERLAP: 0 - # Azure AI Search config - AZURE_AI_SEARCH_URL_ENDPOINT: ${{ secrets.AZURE_AI_SEARCH_URL_ENDPOINT }} - AZURE_AI_SEARCH_API_KEY: ${{ secrets.AZURE_AI_SEARCH_API_KEY }} steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/issues-autoresolve.yml b/.github/workflows/issues-autoresolve.yml index b88f7255..b077a44d 100644 --- a/.github/workflows/issues-autoresolve.yml +++ b/.github/workflows/issues-autoresolve.yml @@ -5,6 +5,8 @@ on: permissions: actions: write + issues: write + pull-requests: write jobs: close-issues: diff --git a/.github/workflows/javascript-ci.yml b/.github/workflows/javascript-ci.yml deleted file mode 100644 index e1d45c6f..00000000 --- a/.github/workflows/javascript-ci.yml +++ /dev/null @@ -1,30 +0,0 @@ -name: JavaScript CI -on: - push: - branches: [main] - pull_request: - branches: [main] - -env: - NODE_VERSION: 18.x - -jobs: - javascript-ci: - runs-on: ubuntu-latest - strategy: - fail-fast: false - steps: - - name: Use Node ${{ env.NODE_VERSION }} - uses: actions/setup-node@v4 - with: - node-version: ${{ env.NODE_VERSION }} - - - uses: actions/checkout@v4 - - - run: yarn install - working-directory: docsite - name: Install Dependencies - - - run: yarn build - working-directory: docsite - name: Build Docsite \ No newline at end of file diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml index b27dcdc0..9529bf32 100644 --- a/.github/workflows/python-ci.yml +++ b/.github/workflows/python-ci.yml @@ -26,28 +26,6 @@ jobs: fail-fast: false # Continue running all jobs even if one fails env: DEBUG: 1 - GRAPHRAG_LLM_TYPE: "azure_openai_chat" - GRAPHRAG_EMBEDDING_TYPE: "azure_openai_embedding" - GRAPHRAG_API_KEY: ${{ secrets.OPENAI_API_KEY }} - GRAPHRAG_API_BASE: ${{ secrets.GRAPHRAG_API_BASE }} - GRAPHRAG_API_VERSION: ${{ secrets.GRAPHRAG_API_VERSION }} - GRAPHRAG_LLM_DEPLOYMENT_NAME: ${{ secrets.GRAPHRAG_LLM_DEPLOYMENT_NAME }} - GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME: ${{ secrets.GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME }} - GRAPHRAG_CACHE_CONTAINER_NAME: "cicache" - GRAPHRAG_CACHE_BASE_DIR": "cache" - GRAPHRAG_LLM_MODEL: ${{ secrets.GRAPHRAG_LLM_MODEL }} - GRAPHRAG_EMBEDDING_MODEL: ${{ secrets.GRAPHRAG_EMBEDDING_MODEL }} - GRAPHRAG_ENTITY_EXTRACTION_ENCODING_MODEL: ${{ secrets.GRAPHRAG_ENTITY_EXTRACTION_ENCODING_MODEL }} - # We have Windows + Linux runners in 3.10 and 3.11, so we need to divide the rate limits by 4 - GRAPHRAG_LLM_TPM: 45_000 # 180,000 / 4 - GRAPHRAG_LLM_RPM: 270 # 1,080 / 4 - GRAPHRAG_EMBEDDING_TPM: 87_500 # 350,000 / 4 - GRAPHRAG_EMBEDDING_RPM: 525 # 2,100 / 4 - GRAPHRAG_CHUNK_SIZE: 1200 - GRAPHRAG_CHUNK_OVERLAP: 0 - # Azure AI Search config - AZURE_AI_SEARCH_URL_ENDPOINT: ${{ secrets.AZURE_AI_SEARCH_URL_ENDPOINT }} - AZURE_AI_SEARCH_API_KEY: ${{ secrets.AZURE_AI_SEARCH_API_KEY }} runs-on: ${{ matrix.os }} steps: @@ -65,7 +43,7 @@ jobs: - '**/*.toml' - '**/*.ipynb' - '.github/workflows/python*.yml' - - 'tests/smoke/*' + - 'tests/**/*' - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 @@ -92,14 +70,6 @@ jobs: run: | poetry build - - name: Install Azurite - id: azuright - uses: potatoqualitee/azuright@v1.1 - - name: Unit Test run: | - poetry run poe test_unit - - - name: Integration Test - run: | - poetry run poe test_integration + poetry run poe test_unit \ No newline at end of file diff --git a/.github/workflows/python-integration-tests.yml b/.github/workflows/python-integration-tests.yml new file mode 100644 index 00000000..eb58b139 --- /dev/null +++ b/.github/workflows/python-integration-tests.yml @@ -0,0 +1,75 @@ +name: Python Integration Tests +on: + push: + branches: [main] + pull_request: + branches: [main] + +permissions: + contents: read + pull-requests: read + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + # Only run the for the latest commit + cancel-in-progress: true + +env: + POETRY_VERSION: 1.8.3 + +jobs: + python-ci: + strategy: + matrix: + python-version: ["3.10"] + os: [ubuntu-latest, windows-latest] + fail-fast: false # Continue running all jobs even if one fails + env: + DEBUG: 1 + + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + + - uses: dorny/paths-filter@v3 + id: changes + with: + filters: | + python: + - 'graphrag/**/*' + - 'poetry.lock' + - 'pyproject.toml' + - '**/*.py' + - '**/*.toml' + - '**/*.ipynb' + - '.github/workflows/python*.yml' + - 'tests/integration/**/*' + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install Poetry + uses: abatilo/actions-poetry@v3.0.0 + with: + poetry-version: $POETRY_VERSION + + - name: Install dependencies + shell: bash + run: | + poetry self add setuptools wheel + poetry run python -m pip install gensim + poetry install + + - name: Build + run: | + poetry build + + - name: Install Azurite + id: azuright + uses: potatoqualitee/azuright@v1.1 + + - name: Integration Test + run: | + poetry run poe test_integration diff --git a/.github/workflows/python-notebook-tests.yml b/.github/workflows/python-notebook-tests.yml index 9ec1bed7..60241005 100644 --- a/.github/workflows/python-notebook-tests.yml +++ b/.github/workflows/python-notebook-tests.yml @@ -21,7 +21,7 @@ jobs: python-ci: strategy: matrix: - python-version: ["3.11"] + python-version: ["3.10"] os: [ubuntu-latest, windows-latest] fail-fast: false # Continue running all jobs even if one fails env: diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 499f1ad0..f9fcce22 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -1,4 +1,4 @@ -name: Python Publish +name: Python Publish (pypi) on: release: types: [created] diff --git a/.github/workflows/python-smoke-tests.yml b/.github/workflows/python-smoke-tests.yml index 47b975db..45c9ca8e 100644 --- a/.github/workflows/python-smoke-tests.yml +++ b/.github/workflows/python-smoke-tests.yml @@ -21,7 +21,7 @@ jobs: python-ci: strategy: matrix: - python-version: ["3.10", "3.11"] # add 3.12 once gensim supports it. TODO: watch this issue - https://github.com/piskvorky/gensim/issues/3510 + python-version: ["3.10"] os: [ubuntu-latest, windows-latest] fail-fast: false # Continue running all jobs even if one fails env: @@ -65,7 +65,7 @@ jobs: - '**/*.toml' - '**/*.ipynb' - '.github/workflows/python*.yml' - - 'tests/smoke/*' + - 'tests/smoke/**/*' - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 diff --git a/pyproject.toml b/pyproject.toml index 40651657..af2d1c7b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,9 @@ authors = [ "Gaudy Blanco Meneses ", "Ha Trinh ", "Jonathan Larson ", + "Josh Bradley ", "Kate Lytvynets ", + "Kenny Zhang ", "Mónica Carvajal", "Nathan Evans ", "Rodrigo Racanicci ", diff --git a/tests/integration/_pipeline/megapipeline.yml b/tests/integration/_pipeline/megapipeline.yml index bc48af0d..e8f51d26 100644 --- a/tests/integration/_pipeline/megapipeline.yml +++ b/tests/integration/_pipeline/megapipeline.yml @@ -39,14 +39,6 @@ workflows: - name: create_base_documents - # - name: create_final_community_reports - # config: - # create_community_reports: - # <<: *llm_parallel_config - # strategy: - # type: graph_intelligence - # llm: *llm_config - - name: create_final_communities - name: create_final_text_units config: diff --git a/tests/unit/indexing/storage/__init__.py b/tests/integration/storage/__init__.py similarity index 100% rename from tests/unit/indexing/storage/__init__.py rename to tests/integration/storage/__init__.py diff --git a/tests/unit/indexing/storage/test_blob_pipeline_storage.py b/tests/integration/storage/test_blob_pipeline_storage.py similarity index 100% rename from tests/unit/indexing/storage/test_blob_pipeline_storage.py rename to tests/integration/storage/test_blob_pipeline_storage.py diff --git a/tests/unit/indexing/storage/test_file_pipeline_storage.py b/tests/integration/storage/test_file_pipeline_storage.py similarity index 100% rename from tests/unit/indexing/storage/test_file_pipeline_storage.py rename to tests/integration/storage/test_file_pipeline_storage.py