Skip to content

Commit 58ad8a9

Browse files
committed
feat: Initial indexing and scraping versions
1 parent a2f2d31 commit 58ad8a9

31 files changed

+160624
-1
lines changed

.devcontainer/devcontainer.json

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
{
2+
"name": "Development",
3+
"image": "mcr.microsoft.com/devcontainers/python:1-3.11-bookworm",
4+
"forwardPorts": [3000, 8000],
5+
"init": true,
6+
"remoteEnv": {
7+
"HOMEBREW_AUTO_UPDATE_SECS": "604800"
8+
},
9+
"features": {
10+
"ghcr.io/devcontainers/features/common-utils:2": {
11+
"configureZshAsDefaultShell": true
12+
},
13+
"ghcr.io/devcontainers/features/azure-cli:1": {},
14+
"ghcr.io/jlaundry/devcontainer-features/azure-functions-core-tools:1": {},
15+
"ghcr.io/devcontainers/features/docker-in-docker:2": {}
16+
},
17+
"onCreateCommand": "cd /workspaces/scrape-it-now && python3 -m venv .venv",
18+
"updateContentCommand": "cd /workspaces/scrape-it-now && . .venv/bin/activate && make upgrade install",
19+
"customizations": {
20+
"vscode": {
21+
"settings": {
22+
"python.defaultInterpreterPath": ".venv/bin/python"
23+
},
24+
"extensions": [
25+
"bierner.markdown-mermaid",
26+
"bradlc.vscode-tailwindcss",
27+
"DavidAnson.vscode-markdownlint",
28+
"EditorConfig.EditorConfig",
29+
"github.vscode-github-actions",
30+
"mechatroner.rainbow-csv",
31+
"mikestead.dotenv",
32+
"ms-azuretools.vscode-azurefunctions",
33+
"ms-azuretools.vscode-bicep",
34+
"ms-python.black-formatter",
35+
"ms-python.debugpy",
36+
"ms-python.isort",
37+
"ms-python.pylint",
38+
"ms-python.python",
39+
"ms-python.vscode-pylance",
40+
"ms-toolsai.jupyter",
41+
"ms-vscode.azurecli",
42+
"ms-vscode.makefile-tools",
43+
"qwtel.sqlite-viewer",
44+
"redhat.vscode-yaml",
45+
"visualstudioexptteam.vscodeintellicode",
46+
"wholroyd.jinja"
47+
]
48+
}
49+
}
50+
}

.editorconfig

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# EditorConfig is awesome: https://EditorConfig.org
2+
3+
# top-most EditorConfig file
4+
root = true
5+
6+
[*]
7+
charset = utf-8
8+
end_of_line = lf
9+
indent_style = space
10+
insert_final_newline = true
11+
trim_trailing_whitespace = true
12+
13+
[*.py]
14+
indent_size = 4
15+
16+
[Makefile]
17+
indent_size = 4
18+
indent_style = tab

.env.example

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# OpenAI
2+
AZURE_OPENAI_API_KEY=xxx
3+
AZURE_OPENAI_CHAT_DEPLOYMENT_NAME=gpt-4o-2024-05-13
4+
AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME=text-embedding-3-large-1
5+
AZURE_OPENAI_EMBEDDING_DIMENSIONS=3072
6+
AZURE_OPENAI_ENDPOINT=https://xxx.openai.azure.com
7+
8+
# AI Search
9+
AZURE_SEARCH_API_KEY=xxx
10+
AZURE_SEARCH_ENDPOINT=https://xxx.search.windows.net
11+
AZURE_SEARCH_INDEX_NAME=learn
12+
13+
# Blob Storage
14+
AZURE_STORAGE_CONNECTION_STRING=DefaultEndpointsProtocol=https;AccountName=xxx;AccountKey=xxx;EndpointSuffix=core.windows.net
15+
AZURE_STORAGE_CONTAINER=learn-scraping
16+
17+
# Application Insights
18+
APPLICATIONINSIGHTS_CONNECTION_STRING=xxx

.github/dependabot.yml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
version: 2
2+
updates:
3+
- package-ecosystem: github-actions
4+
directory: ""
5+
target-branch: develop
6+
schedule:
7+
interval: weekly
8+
- package-ecosystem: pip
9+
directory: ""
10+
target-branch: develop
11+
schedule:
12+
interval: weekly
13+
- package-ecosystem: "devcontainers"
14+
directory: ""
15+
target-branch: develop
16+
schedule:
17+
interval: weekly

.github/workflows/codeql.yml

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
name: "CodeQL"
2+
3+
on:
4+
push:
5+
branches:
6+
- develop
7+
- main
8+
pull_request:
9+
branches:
10+
- develop
11+
- main
12+
13+
jobs:
14+
analyze:
15+
name: Analyze (${{ matrix.language }})
16+
permissions:
17+
actions: read
18+
contents: read
19+
packages: read
20+
security-events: write
21+
runs-on: ubuntu-22.04
22+
timeout-minutes: 360
23+
strategy:
24+
fail-fast: false
25+
matrix:
26+
include:
27+
- build-mode: none
28+
language: python
29+
steps:
30+
- name: Checkout repository
31+
uses: actions/checkout@v4
32+
33+
- name: Initialize CodeQL
34+
uses: github/codeql-action/init@v3
35+
with:
36+
build-mode: ${{ matrix.build-mode }}
37+
languages: ${{ matrix.language }}
38+
39+
- name: Perform CodeQL Analysis
40+
uses: github/codeql-action/analyze@v3
41+
with:
42+
category: "/language:${{matrix.language}}"

0 commit comments

Comments
 (0)