Ci streamline (#988)

* Remove excess vars from gh-pages build * Delete redundant javascript ci * Pull apart testing CI * Clean up integration tests build * Move storage tests to integration CI * Take py 3.10 out of smoke tests matrix * Use minimum supported python version for most tests * Re-run main CI on any test change * Add Josh and Kenny to author list * Update auto-resolve perms
2025-11-14 17:13:31 +00:00 · 2024-08-21 14:16:15 -07:00 · 2024-08-21 14:16:15 -07:00 · f5b4d2fea5
commit f5b4d2fea5
parent 98cabba38b
13 changed files with 85 additions and 97 deletions
--- a/.github/workflows/gh-pages.yml
+++ b/.github/workflows/gh-pages.yml
@ -17,29 +17,6 @@ jobs:
    env:
      GH_PAGES: 1
      DEBUG: 1
-      GRAPHRAG_LLM_TYPE: "azure_openai_chat"
-      GRAPHRAG_EMBEDDING_TYPE: "azure_openai_embedding"
-      GRAPHRAG_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-      GRAPHRAG_API_BASE: ${{ secrets.GRAPHRAG_API_BASE }}
-      GRAPHRAG_API_VERSION: ${{ secrets.GRAPHRAG_API_VERSION }}
-      GRAPHRAG_LLM_DEPLOYMENT_NAME: ${{ secrets.GRAPHRAG_LLM_DEPLOYMENT_NAME }}
-      GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME: ${{ secrets.GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME }}
-      GRAPHRAG_CACHE_TYPE: "blob"
-      GRAPHRAG_CACHE_CONNECTION_STRING: ${{ secrets.BLOB_STORAGE_CONNECTION_STRING }}
-      GRAPHRAG_CACHE_CONTAINER_NAME: "cicache"
-      GRAPHRAG_CACHE_BASE_DIR": "cache"
-      GRAPHRAG_LLM_MODEL: gpt-3.5-turbo-16k
-      GRAPHRAG_EMBEDDING_MODEL: text-embedding-ada-002
-      # We have Windows + Linux runners in 3.10 and 3.11, so we need to divide the rate limits by 4
-      GRAPHRAG_LLM_TPM: 45_000 # 180,000 / 4
-      GRAPHRAG_LLM_RPM: 270 # 1,080 / 4
-      GRAPHRAG_EMBEDDING_TPM: 87_500 # 350,000 / 4
-      GRAPHRAG_EMBEDDING_RPM: 525 # 2,100 / 4
-      GRAPHRAG_CHUNK_SIZE: 1200
-      GRAPHRAG_CHUNK_OVERLAP: 0
-      # Azure AI Search config
-      AZURE_AI_SEARCH_URL_ENDPOINT: ${{ secrets.AZURE_AI_SEARCH_URL_ENDPOINT }}
-      AZURE_AI_SEARCH_API_KEY: ${{ secrets.AZURE_AI_SEARCH_API_KEY }}

    steps:
      - uses: actions/checkout@v4
--- a/.github/workflows/issues-autoresolve.yml
+++ b/.github/workflows/issues-autoresolve.yml
@ -5,6 +5,8 @@ on:

 permissions:
  actions: write
+  issues: write
+  pull-requests: write

 jobs:
  close-issues:
--- a/.github/workflows/javascript-ci.yml
+++ b/.github/workflows/javascript-ci.yml
@ -1,30 +0,0 @@
-name: JavaScript CI
-on:
-  push:
-    branches: [main]
-  pull_request:
-    branches: [main]
-
-env:
-  NODE_VERSION: 18.x
-
-jobs:
-  javascript-ci:
-    runs-on: ubuntu-latest
-    strategy:
-      fail-fast: false
-    steps:
-      - name: Use Node ${{ env.NODE_VERSION }}
-        uses: actions/setup-node@v4
-        with:
-          node-version: ${{ env.NODE_VERSION }}
-
-      - uses: actions/checkout@v4
-
-      - run: yarn install
-        working-directory: docsite
-        name: Install Dependencies
-
-      - run: yarn build
-        working-directory: docsite
-        name: Build Docsite
--- a/.github/workflows/python-ci.yml
+++ b/.github/workflows/python-ci.yml
@ -26,28 +26,6 @@ jobs:
      fail-fast: false # Continue running all jobs even if one fails
    env:
      DEBUG: 1
-      GRAPHRAG_LLM_TYPE: "azure_openai_chat"
-      GRAPHRAG_EMBEDDING_TYPE: "azure_openai_embedding"
-      GRAPHRAG_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-      GRAPHRAG_API_BASE: ${{ secrets.GRAPHRAG_API_BASE }}
-      GRAPHRAG_API_VERSION: ${{ secrets.GRAPHRAG_API_VERSION }}
-      GRAPHRAG_LLM_DEPLOYMENT_NAME: ${{ secrets.GRAPHRAG_LLM_DEPLOYMENT_NAME }}
-      GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME: ${{ secrets.GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME }}
-      GRAPHRAG_CACHE_CONTAINER_NAME: "cicache"
-      GRAPHRAG_CACHE_BASE_DIR": "cache"
-      GRAPHRAG_LLM_MODEL: ${{ secrets.GRAPHRAG_LLM_MODEL }}
-      GRAPHRAG_EMBEDDING_MODEL: ${{ secrets.GRAPHRAG_EMBEDDING_MODEL }}
-      GRAPHRAG_ENTITY_EXTRACTION_ENCODING_MODEL: ${{ secrets.GRAPHRAG_ENTITY_EXTRACTION_ENCODING_MODEL }}
-      # We have Windows + Linux runners in 3.10 and 3.11, so we need to divide the rate limits by 4
-      GRAPHRAG_LLM_TPM: 45_000 # 180,000 / 4
-      GRAPHRAG_LLM_RPM: 270 # 1,080 / 4
-      GRAPHRAG_EMBEDDING_TPM: 87_500 # 350,000 / 4
-      GRAPHRAG_EMBEDDING_RPM: 525 # 2,100 / 4
-      GRAPHRAG_CHUNK_SIZE: 1200
-      GRAPHRAG_CHUNK_OVERLAP: 0
-      # Azure AI Search config
-      AZURE_AI_SEARCH_URL_ENDPOINT: ${{ secrets.AZURE_AI_SEARCH_URL_ENDPOINT }}
-      AZURE_AI_SEARCH_API_KEY: ${{ secrets.AZURE_AI_SEARCH_API_KEY }}

    runs-on: ${{ matrix.os }}
    steps:
@ -65,7 +43,7 @@ jobs:
              - '**/*.toml'
              - '**/*.ipynb'
              - '.github/workflows/python*.yml'
-              - 'tests/smoke/*'
+              - 'tests/**/*'

      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
@ -92,14 +70,6 @@ jobs:
        run: |
          poetry build

-      - name: Install Azurite
-        id: azuright
-        uses: potatoqualitee/azuright@v1.1
-
      - name: Unit Test
        run: |
          poetry run poe test_unit
-
-      - name: Integration Test
-        run: |
-          poetry run poe test_integration
--- a/.github/workflows/python-integration-tests.yml
+++ b/.github/workflows/python-integration-tests.yml
@ -0,0 +1,75 @@
+name: Python Integration Tests
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+permissions:
+  contents: read
+  pull-requests: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  # Only run the for the latest commit
+  cancel-in-progress: true
+
+env:
+  POETRY_VERSION: 1.8.3
+
+jobs:
+  python-ci:
+    strategy:
+      matrix:
+        python-version: ["3.10"]
+        os: [ubuntu-latest, windows-latest]
+      fail-fast: false # Continue running all jobs even if one fails
+    env:
+      DEBUG: 1
+
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: dorny/paths-filter@v3
+        id: changes
+        with:
+          filters: |
+            python:
+              - 'graphrag/**/*'
+              - 'poetry.lock'
+              - 'pyproject.toml'
+              - '**/*.py'
+              - '**/*.toml'
+              - '**/*.ipynb'
+              - '.github/workflows/python*.yml'
+              - 'tests/integration/**/*'
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install Poetry
+        uses: abatilo/actions-poetry@v3.0.0
+        with:
+          poetry-version: $POETRY_VERSION
+
+      - name: Install dependencies
+        shell: bash
+        run: |
+          poetry self add setuptools wheel
+          poetry run python -m pip install gensim
+          poetry install
+
+      - name: Build
+        run: |
+          poetry build
+
+      - name: Install Azurite
+        id: azuright
+        uses: potatoqualitee/azuright@v1.1
+
+      - name: Integration Test
+        run: |
+          poetry run poe test_integration
--- a/.github/workflows/python-notebook-tests.yml
+++ b/.github/workflows/python-notebook-tests.yml
@ -21,7 +21,7 @@ jobs:
  python-ci:
    strategy:
      matrix:
-        python-version: ["3.11"]
+        python-version: ["3.10"]
        os: [ubuntu-latest, windows-latest]
      fail-fast: false # Continue running all jobs even if one fails
    env:
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@ -1,4 +1,4 @@
-name: Python Publish
+name: Python Publish (pypi)
 on:
  release:
    types: [created]
--- a/.github/workflows/python-smoke-tests.yml
+++ b/.github/workflows/python-smoke-tests.yml
@ -21,7 +21,7 @@ jobs:
  python-ci:
    strategy:
      matrix:
-        python-version: ["3.10", "3.11"] # add 3.12 once gensim supports it. TODO: watch this issue - https://github.com/piskvorky/gensim/issues/3510
+        python-version: ["3.10"]
        os: [ubuntu-latest, windows-latest]
      fail-fast: false # Continue running all jobs even if one fails
    env:
@ -65,7 +65,7 @@ jobs:
              - '**/*.toml'
              - '**/*.ipynb'
              - '.github/workflows/python*.yml'
-              - 'tests/smoke/*'
+              - 'tests/smoke/**/*'

      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
--- a/pyproject.toml
+++ b/pyproject.toml
@ -12,7 +12,9 @@ authors = [
    "Gaudy Blanco Meneses <gaudyb@microsoft.com>",
    "Ha Trinh <trinhha@microsoft.com>",
    "Jonathan Larson <jolarso@microsoft.com>",
+    "Josh Bradley <joshbradley@microsoft.com>",
    "Kate Lytvynets <kalytv@microsoft.com>",
+    "Kenny Zhang <zhangken@microsoft.com>",
    "Mónica Carvajal",
    "Nathan Evans <naevans@microsoft.com>",
    "Rodrigo Racanicci <rracanicci@microsoft.com>",
--- a/tests/integration/_pipeline/megapipeline.yml
+++ b/tests/integration/_pipeline/megapipeline.yml
@ -39,14 +39,6 @@ workflows:

  - name: create_base_documents

-  # - name: create_final_community_reports
-  #   config:
-  #     create_community_reports:
-  #       <<: *llm_parallel_config
-  #       strategy:
-  #         type: graph_intelligence
-  #         llm: *llm_config
-
  - name: create_final_communities
  - name: create_final_text_units
    config:
--- a/tests/unit/indexing/storage/init.py
+++ b/tests/unit/indexing/storage/init.py
--- a/tests/unit/indexing/storage/test_blob_pipeline_storage.py
+++ b/tests/unit/indexing/storage/test_blob_pipeline_storage.py
--- a/tests/unit/indexing/storage/test_file_pipeline_storage.py
+++ b/tests/unit/indexing/storage/test_file_pipeline_storage.py