-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexport.sh
More file actions
executable file
·111 lines (94 loc) · 3.39 KB
/
export.sh
File metadata and controls
executable file
·111 lines (94 loc) · 3.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#!/bin/bash
# =============================================================================
# Clean Project Export for LLM Analysis (Final Directory Fix)
# =============================================================================
set -e
OUTPUT_DIR="documentation/llm"
OUTPUT_FILE="$OUTPUT_DIR/dump.txt"
PROJECT_PATH="$(pwd)"
# Ensure we are in a git repository
if ! git rev-parse --is-inside-work-tree > /dev/null 2>&1; then
echo "Error: This script must be run inside a Git repository."
exit 1
fi
mkdir -p "$OUTPUT_DIR"
echo "=============================================="
echo " Generating Clean Project Export"
echo "=============================================="
# Start output file with header
{
echo "==============================================================================="
echo "PROJECT EXPORT (GIT TRACKED ONLY)"
echo "Generated: $(date)"
echo "Project Path: $PROJECT_PATH"
echo "==============================================================================="
echo ""
} > "$OUTPUT_FILE"
# 1. Directory Structure (Using Python for a reliable tree)
echo "Generating directory structure..."
{
echo "DIRECTORY STRUCTURE:"
echo "==================="
# This python snippet takes git-tracked files and builds a perfect visual tree
git ls-files | python3 -c "
import sys
tree = {}
for line in sys.stdin:
parts = line.strip().split('/')
curr = tree
for part in parts:
curr = curr.setdefault(part, {})
def print_tree(d, indent=''):
items = sorted(d.items())
for i, (name, children) in enumerate(items):
is_last = (i == len(items) - 1)
print(f'{indent}{\"└── \" if is_last else \"├── \"}{name}')
print_tree(children, indent + (' ' if is_last else '│ '))
print_tree(tree)
"
echo ""
} >> "$OUTPUT_FILE"
# 2. Collect and Process Files
echo "Collecting and cleaning file contents..."
{
echo "FILE CONTENTS:"
echo "=============="
echo ""
} >> "$OUTPUT_FILE"
git ls-files | while read -r FILENAME; do
# Skip the export script itself and the output file
if [[ "$FILENAME" == "export.sh" || "$FILENAME" == "$OUTPUT_FILE" ]]; then
continue
fi
# Skip specific binary extensions
if [[ "$FILENAME" =~ \.(ico|png|jpg|jpeg|gif|dll|exe|pdb|bin|zip|tar|gz|7z|ttf|woff|woff2)$ ]]; then
continue
fi
# Content-based binary check
if file --mime "$FILENAME" | grep -q "binary"; then
continue
fi
# Null byte check (Crucial for preventing "Unsupported Encoding" in Grok)
if grep -qP '\x00' "$FILENAME" 2>/dev/null; then
continue
fi
FILESIZE=$(stat -c%s "$FILENAME" 2>/dev/null || stat -f%z "$FILENAME" 2>/dev/null || echo "0")
# Skip large files (>500KB)
if [ "$FILESIZE" -gt 512000 ]; then
continue
fi
{
echo "================================================================================"
echo "FILE: $FILENAME"
echo "SIZE: $(echo "scale=2; $FILESIZE/1024" | bc 2>/dev/null || echo "0.00") KB"
echo "================================================================================"
echo ""
# tr -d removes non-printable control characters that break LLM parsers
cat "$FILENAME" | tr -d '\000-\010\013\014\016-\037'
echo ""
echo ""
} >> "$OUTPUT_FILE"
echo "Processed: $FILENAME"
done
echo ""
echo "Export Complete: $OUTPUT_FILE"