mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-06-26 22:00:13 +00:00
docs: sync code to deepset workspace
This commit is contained in:
parent
bcaef53cbc
commit
de2ad76c0c
194
.github/utils/deepset_sync.py
vendored
Normal file
194
.github/utils/deepset_sync.py
vendored
Normal file
@ -0,0 +1,194 @@
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "requests",
|
||||
# ]
|
||||
# ///
|
||||
"""
|
||||
Upload Markdown files to Deepset API with transformed filenames.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import argparse
|
||||
import requests
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def transform_filename(filepath: Path) -> str:
|
||||
"""
|
||||
Transform a file path to the required format:
|
||||
- Replace path separators with underscores
|
||||
"""
|
||||
# Convert to string and replace path separators with underscores
|
||||
transformed = str(filepath).replace("/", "_").replace("\\", "_")
|
||||
|
||||
return transformed
|
||||
|
||||
|
||||
def upload_file_to_deepset(filepath: Path, api_key: str, workspace: str) -> bool:
|
||||
"""
|
||||
Upload a single file to Deepset API.
|
||||
"""
|
||||
# Read file content
|
||||
try:
|
||||
content = filepath.read_text(encoding="utf-8")
|
||||
except Exception as e:
|
||||
print(f"Error reading file {filepath}: {e}")
|
||||
return False
|
||||
|
||||
# Transform filename
|
||||
transformed_name = transform_filename(filepath)
|
||||
|
||||
# Prepare metadata
|
||||
metadata: dict[str, str] = {"original_file_path": str(filepath)}
|
||||
|
||||
# Prepare API request
|
||||
url = f"https://api.cloud.deepset.ai/api/v1/workspaces/{workspace}/files"
|
||||
params: dict[str, str] = {"file_name": transformed_name, "write_mode": "OVERWRITE"}
|
||||
|
||||
headers: dict[str, str] = {
|
||||
"accept": "application/json",
|
||||
"authorization": f"Bearer {api_key}",
|
||||
}
|
||||
|
||||
# Prepare multipart form data
|
||||
files: dict[str, tuple[None, str, str]] = {
|
||||
"meta": (None, json.dumps(metadata), "application/json"),
|
||||
"text": (None, content, "text/plain"),
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.post(url, params=params, headers=headers, files=files)
|
||||
response.raise_for_status()
|
||||
print(f"Successfully uploaded: {filepath} as {transformed_name}")
|
||||
return True
|
||||
except requests.exceptions.HTTPError:
|
||||
print(f"Failed to upload {filepath}: HTTP {response.status_code}")
|
||||
print(f" Response: {response.text}")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"Failed to upload {filepath}: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def delete_files_from_deepset(
|
||||
filepaths: list[Path], api_key: str, workspace: str
|
||||
) -> bool:
|
||||
"""
|
||||
Delete multiple files from Deepset API.
|
||||
"""
|
||||
if not filepaths:
|
||||
return True
|
||||
|
||||
# Transform filenames
|
||||
transformed_names: list[str] = [transform_filename(fp) for fp in filepaths]
|
||||
|
||||
# Prepare API request
|
||||
url = f"https://api.cloud.deepset.ai/api/v1/workspaces/{workspace}/files"
|
||||
|
||||
headers: dict[str, str] = {
|
||||
"accept": "application/json",
|
||||
"authorization": f"Bearer {api_key}",
|
||||
"content-type": "application/json",
|
||||
}
|
||||
|
||||
data: dict[str, list[str]] = {"names": transformed_names}
|
||||
|
||||
try:
|
||||
response = requests.delete(url, headers=headers, json=data)
|
||||
response.raise_for_status()
|
||||
print(f"Successfully deleted {len(transformed_names)} file(s):")
|
||||
for original, transformed in zip(filepaths, transformed_names):
|
||||
print(f" - {original} (as {transformed})")
|
||||
return True
|
||||
except requests.exceptions.HTTPError:
|
||||
print(f"Failed to delete files: HTTP {response.status_code}")
|
||||
print(f" Response: {response.text}")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"Failed to delete files: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""
|
||||
Main function to process and upload/delete files.
|
||||
"""
|
||||
# Parse command line arguments
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Upload/delete Markdown files to/from Deepset"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--changed", nargs="*", default=[], help="Changed or added files"
|
||||
)
|
||||
parser.add_argument("--deleted", nargs="*", default=[], help="Deleted files")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Get environment variables
|
||||
api_key: str | None = os.environ.get("DEEPSET_API_KEY")
|
||||
workspace: str = os.environ.get("DEEPSET_WORKSPACE")
|
||||
|
||||
if not api_key:
|
||||
print("Error: DEEPSET_API_KEY environment variable not set")
|
||||
sys.exit(1)
|
||||
|
||||
# Process arguments and convert to Path objects
|
||||
changed_files: list[Path] = [Path(f.strip()) for f in args.changed if f.strip()]
|
||||
deleted_files: list[Path] = [Path(f.strip()) for f in args.deleted if f.strip()]
|
||||
|
||||
if not changed_files and not deleted_files:
|
||||
print("No Markdown files to process")
|
||||
sys.exit(0)
|
||||
|
||||
print(f"Processing files in Deepset workspace: {workspace}")
|
||||
print("-" * 50)
|
||||
|
||||
# Track results
|
||||
upload_success: int = 0
|
||||
upload_failed: list[Path] = []
|
||||
delete_success: bool = False
|
||||
|
||||
# Handle deletions first
|
||||
if deleted_files:
|
||||
print(f"\nDeleting {len(deleted_files)} file(s)...")
|
||||
delete_success = delete_files_from_deepset(deleted_files, api_key, workspace)
|
||||
|
||||
# Upload changed/new files
|
||||
if changed_files:
|
||||
print(f"\nUploading {len(changed_files)} file(s)...")
|
||||
for filepath in changed_files:
|
||||
if filepath.exists():
|
||||
if upload_file_to_deepset(filepath, api_key, workspace):
|
||||
upload_success += 1
|
||||
else:
|
||||
upload_failed.append(filepath)
|
||||
else:
|
||||
print(f"Skipping non-existent file: {filepath}")
|
||||
|
||||
# Summary
|
||||
print("-" * 50)
|
||||
print("Processing Summary:")
|
||||
if changed_files:
|
||||
print(
|
||||
f" Uploads - Successful: {upload_success}, Failed: {len(upload_failed)}"
|
||||
)
|
||||
if deleted_files:
|
||||
print(
|
||||
f" Deletions - {'Successful' if delete_success else 'Failed'}: {len(deleted_files)} file(s)"
|
||||
)
|
||||
|
||||
if upload_failed:
|
||||
print("\nFailed uploads:")
|
||||
for f in upload_failed:
|
||||
print(f" - {f}")
|
||||
|
||||
# Exit with error if any operation failed
|
||||
if upload_failed or (deleted_files and not delete_success):
|
||||
sys.exit(1)
|
||||
|
||||
print("\nAll operations completed successfully!")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
55
.github/workflows/sync_code_to_deepset.yml
vendored
Normal file
55
.github/workflows/sync_code_to_deepset.yml
vendored
Normal file
@ -0,0 +1,55 @@
|
||||
name: Upload Documentation to Deepset
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
jobs:
|
||||
upload-files:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0 # Fetch all history for proper diff
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.12'
|
||||
|
||||
- name: Install uv
|
||||
run: |
|
||||
pip install uv
|
||||
|
||||
- name: Get changed files
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@v46
|
||||
with:
|
||||
files: |
|
||||
haystack/**/*.py
|
||||
separator: ' '
|
||||
|
||||
- name: Upload files to Deepset
|
||||
if: steps.changed-files.outputs.any_changed == 'true' || steps.changed-files.outputs.any_deleted == 'true'
|
||||
env:
|
||||
DEEPSET_API_KEY: ${{ secrets.DEEPSET_API_KEY }}
|
||||
DEEPSET_WORKSPACE: haystack-code
|
||||
run: |
|
||||
# Combine added and modified files for upload
|
||||
CHANGED_FILES=""
|
||||
if [ -n "${{ steps.changed-files.outputs.added_files }}" ]; then
|
||||
CHANGED_FILES="${{ steps.changed-files.outputs.added_files }}"
|
||||
fi
|
||||
if [ -n "${{ steps.changed-files.outputs.modified_files }}" ]; then
|
||||
if [ -n "$CHANGED_FILES" ]; then
|
||||
CHANGED_FILES="$CHANGED_FILES ${{ steps.changed-files.outputs.modified_files }}"
|
||||
else
|
||||
CHANGED_FILES="${{ steps.changed-files.outputs.modified_files }}"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Run the script with changed and deleted files
|
||||
uv run --no-project --no-config --no-cache .github/utils/deepset_sync.py \
|
||||
--changed $CHANGED_FILES \
|
||||
--deleted ${{ steps.changed-files.outputs.deleted_files }}
|
Loading…
x
Reference in New Issue
Block a user