add all dot files back (#25)

This commit is contained in:
Josh Bradley 2024-06-26 16:01:41 -04:00 committed by GitHub
parent 651ffd651c
commit 46d5c22707
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 22149 additions and 0 deletions

34
.azdo/azure-pipeline.yaml Normal file
View File

@ -0,0 +1,34 @@
trigger:
- main
pool:
vmImage: ubuntu-latest
steps:
- task: PoliCheck@2
displayName: 'Run PoliCheck'
inputs:
targetType: 'F'
targetArgument: '$(Build.SourcesDirectory)'
result: 'PoliCheck.xml'
toolVersion: Latest
- script: docker build -t graphrag:backend -f docker/Dockerfile-backend .
displayName: 'Build backend docker image'
- script: docker build -t graphrag:frontend -f docker/Dockerfile-frontend .
displayName: 'Build frontend docker image'
# Component Governance does not support pyproject.toml yet.
# For that reason, use toml-to-requirements to export the
# dependencies into a requirements.txt file.
- script: |
pip install toml-to-requirements
toml-to-req --toml-file pyproject.toml --poetry --optional-lists dev,test,backend,frontend
# toml-to-req is not perfect. It will leave wildcard characters in the requirements.txt file which we remove
sed -i 's/\*//g' requirements.txt
displayName: 'Export python dependencies to requirements.txt'
- task: ComponentGovernanceComponentDetection@0
displayName: 'Component Governance - Component Detection'
inputs:
scanType: 'Register'
verbosity: 'Verbose'
alertWarningLevel: 'High'
dockerImagesToScan: graphrag:backend,graphrag:frontend

87
.devcontainer/Dockerfile Normal file
View File

@ -0,0 +1,87 @@
FROM python:3.10
# avoid common warnings
ENV DEBIAN_FRONTEND=noninteractive
ENV PIP_ROOT_USER_ACTION=ignore
ENV PIP_DISABLE_PIP_VERSION_CHECK=1
# configure environment
ARG ENVNAME="GraphRAG"
ARG USERNAME=vscode
ARG USER_UID=1001
ARG USER_GID=$USER_UID
ARG WORKDIR=/${ENVNAME}
# install python, pip, git, and other required tools
RUN apt-get update && apt-get install -y \
ca-certificates \
libicu-dev \
git \
curl \
sudo \
pre-commit \
wget \
jq \
apt-transport-https \
lsb-release \
gnupg \
software-properties-common
# install Azure CLI
RUN curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash
RUN az bicep install
# install kubectl
RUN curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" \
&& install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl
# install helm
RUN curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 \
&& chmod 700 get_helm.sh \
&& ./get_helm.sh \
&& rm ./get_helm.sh
# install yq
RUN wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/bin/yq \
&& chmod +x /usr/bin/yq
# install docker
RUN curl -fsSL https://get.docker.com -o install-docker.sh \
&& sh install-docker.sh \
&& rm install-docker.sh
# cleanup to keep the image size down
RUN rm -rf /var/lib/apt/lists/* \
&& apt-get purge -y --auto-remove \
&& apt-get autoremove \
&& apt-get clean
# set the location for the virtual environments to be outside the project directory
ENV POETRY_VIRTUALENVS_IN_PROJECT=true
# set up non-root user
RUN useradd -ms /bin/bash -u ${USER_GID} ${USERNAME} \
&& echo "${USERNAME}:${USERNAME}" | chpasswd \
# add user to sudo group and docker group
&& adduser ${USERNAME} sudo \
&& echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers \
&& usermod -aG docker ${USERNAME}
# switch to non-root user
USER ${USERNAME}
# install poetry
RUN curl -sSL https://install.python-poetry.org | python3 -
# add the local bin to the PATH for the non-root user
ENV PATH="/home/${USERNAME}/.local/bin:${PATH}"
# Add venv to beginning of path so we don't have to activate it
ENV PATH=/graphrag-accelerator/.venv/bin:$PATH
# copy the project files into the container and set ownership
COPY --chown=${USERNAME}:${USER_GID} . ${WORKDIR}
COPY entrypoint.sh /usr/local/bin/entrypoint.sh
# Create directories for vscode server and extensions
RUN mkdir -p /home/$USERNAME/.vscode-server/extensions \
&& chown -R $USER_UID:$USER_GID /home/$USERNAME/.vscode-server
ENTRYPOINT [ "/usr/local/bin/entrypoint.sh" ]
CMD ["bash"]

View File

@ -0,0 +1,85 @@
{
"name": "graphrag-accelerator",
"build": {
"dockerfile": "./Dockerfile",
"args": {
"DOCKER_GROUP_ID": "${localEnv:DOCKER_GROUP_ID}"
}
},
"forwardPorts": [ 7071 ],
"runArgs": [
"--network", "host" // use host networking so that the dev container can access the API when running the container locally
],
"remoteUser": "vscode",
"remoteEnv": {
// We add the .venv to the beginning of the path env in the Dockerfile
// so that we use the proper python, however vscode rewrites/overwrites
// the PATH in the image and puts /usr/local/bin in front of our .venv
// path. This fixes that issue.
"PATH": "${containerEnv:PATH}",
// Add src folder to PYTHONPATH so that we can import modules that
// are in the source dir
"PYTHONPATH": "/graphrag-accelerator/backend/:$PATH"
},
"mounts": [
// Keep command history
"type=volume,source=graphrag-bashhistory,target=/home/vscode/command_history",
"type=volume,source=graphrag-devcontainer-vscode-server,target=/home/vscode/.vscode-server/extensions",
// Mounts the login details from the host machine so azcli works seamlessly in the container
// "type=bind,source=${localEnv:HOME}/.azure,target=/home/vscode/.azure",
// Mounts the ssh details from the host machine - this allows the container to connect to ssh hosts
"type=bind,source=${localEnv:HOME}/.ssh,target=/home/vscode/.ssh",
// Mount docker socket for docker builds
"type=bind,source=/var/run/docker.sock,target=/var/run/docker.sock"
],
"customizations": {
"vscode": {
// Set *default* container specific settings.json values on container create.
"settings": {
"python.pythonPath": "/graphrag-accelerator/.venv/bin/python",
"python.defaultInterpreterPath": "/graphrag-accelerator/.venv/bin/python",
"python.languageServer": "Pylance",
"files.watcherExclude": {
"**/.git/objects/**": true,
"**/.git/subtree-cache/**": true,
"**/node_modules/*/**": true,
"**/.python_packages/*/**": true
},
"files.associations": {
"*.workbook": "[jsonc]"
},
"ruff.interpreter": [
"/graphrag-accelerator/.venv/bin/python"
],
"ruff.lint.args": [
"--config",
"/graphrag-accelerator/pyproject.toml"
],
"ruff.lint.run": "onType"
},
// Add the IDs of extensions you want installed when the container is created.
"extensions": [
"donjayamanne.githistory",
"codezombiech.gitignore",
"GitHub.copilot",
"GitHub.copilot-chat",
"ms-azuretools.vscode-docker",
"ms-azuretools.vscode-bicep",
"ms-dotnettools.vscode-dotnet-runtime",
"ms-kubernetes-tools.vscode-kubernetes-tools",
"ms-python.python",
"ms-python.vscode-pylance",
"ms-toolsai.datawrangler",
"ms-toolsai.jupyter",
"ms-toolsai.jupyter-keymap",
"ms-toolsai.vscode-jupyter-cell-tags",
"ms-toolsai.vscode-jupyter-slideshow",
"ziyasal.vscode-open-in-github",
"charliermarsh.ruff"
]
}
},
"postCreateCommand": "bash /usr/local/bin/entrypoint.sh",
"workspaceMount": "source=${localWorkspaceFolder},target=/graphrag-accelerator,type=bind,consistency=cached",
"workspaceFolder": "/graphrag-accelerator"
}

21
.devcontainer/entrypoint.sh Executable file
View File

@ -0,0 +1,21 @@
#!/bin/bash
################################
### Docker configuration ###
################################
sudo chmod 666 /var/run/docker.sock
################################
### Dependency configuration ###
################################
# Install graphrag dependencies
cd /graphrag-accelerator
poetry install --no-interaction -v
#########################
### Git configuration ###
#########################
git config --global --add safe.directory /graphrag-accelerator
pre-commit install

20
.dockerignore Normal file
View File

@ -0,0 +1,20 @@
notebooks
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
.github
.git
**/__pycache__
*.pyc
*.pyo
*.pyd
**/.pytest_cache
**/.ruff_cache
**/.DS_Store

2
.gitattributes vendored Normal file
View File

@ -0,0 +1,2 @@
# all files are checked into the repo with LF
* text=auto

167
.gitignore vendored Normal file
View File

@ -0,0 +1,167 @@
# ignore vscode config files
.vscode/
# ignore all logfiles
logs
logs/*
# ignore example directory crated by HelloWorld.ipynb
example_files/
files/
.scripts/
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# env files
*.env*
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
# Bicep parameters
# **\.parameters\.json
main.parameters.json
# helm entries
# OSX leaves these everywhere on SMB shares
._*
# Chart dependencies
**/charts/*.tgz
**/Chart.lock
.history

34
.pre-commit-config.yaml Normal file
View File

@ -0,0 +1,34 @@
exclude: "tests/data"
repos:
- repo: https://github.com/kynan/nbstripout
rev: 0.7.1
hooks:
- id: nbstripout
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
hooks:
- id: requirements-txt-fixer
- id: mixed-line-ending
- id: trailing-whitespace
- id: check-json
exclude: "devcontainer.json"
- id: pretty-format-json
args:
- "--autofix"
exclude: 'tests/|devcontainer.json|^.*\.ipynb$'
- id: end-of-file-fixer
files: \.(py|sh|bash|conf|yaml|yml|toml|ini)$
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.4.6
hooks:
# Run the linter.
- id: ruff
types_or: [ python, pyi, jupyter ]
# Run the formatter.
- id: ruff-format
types_or: [ python, pyi, jupyter ]
- repo: https://github.com/Yelp/detect-secrets
rev: v1.5.0
hooks:
- id: detect-secrets
args: ['--baseline', '.secrets.baseline']

21699
.secrets.baseline Normal file

File diff suppressed because it is too large Load Diff