Skip to content

Commit 8ba8e46

Browse files
authored
Add unicode_filter.c
1 parent eda4802 commit 8ba8e46

File tree

1 file changed

+190
-0
lines changed

1 file changed

+190
-0
lines changed

tools/unicode_filter.c

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
// unicode_filter.c
2+
// Supports format 4 + format 12
3+
4+
#include <stdio.h>
5+
#include <stdlib.h>
6+
#include <stdint.h>
7+
#include <string.h>
8+
#include <fcntl.h>
9+
#include <unistd.h>
10+
#include <sys/mman.h>
11+
#include <sys/stat.h>
12+
13+
#define MAX_RANGES 128
14+
15+
typedef struct {
16+
uint32_t start;
17+
uint32_t end;
18+
} Range;
19+
20+
typedef struct {
21+
Range ranges[MAX_RANGES];
22+
int count;
23+
} RangeList;
24+
25+
// BE helpers
26+
static uint16_t r16(const uint8_t *b){ return (b[0]<<8)|b[1]; }
27+
static uint32_t r32(const uint8_t *b){ return (b[0]<<24)|(b[1]<<16)|(b[2]<<8)|b[3]; }
28+
static void w16(uint8_t *b,uint16_t v){ b[0]=(v>>8)&0xFF; b[1]=v&0xFF; }
29+
static void w32(uint8_t *b,uint32_t v){ b[0]=(v>>24)&0xFF; b[1]=(v>>16)&0xFF; b[2]=(v>>8)&0xFF; b[3]=v&0xFF; }
30+
31+
// checksum
32+
uint32_t checksum(uint8_t *data,uint32_t len){
33+
uint32_t sum=0;
34+
uint32_t n=(len+3)&~3;
35+
36+
for(uint32_t i=0;i<n;i+=4){
37+
uint32_t v=0;
38+
if(i+3 < len){
39+
v=r32(data+i);
40+
}else{
41+
uint8_t tmp[4]={0,0,0,0};
42+
for(int j=0;j<4;j++)
43+
if(i+j < len) tmp[j]=data[i+j];
44+
v=r32(tmp);
45+
}
46+
sum+=v;
47+
}
48+
return sum;
49+
}
50+
51+
// check if this range overlaps any delete range
52+
int should_delete(uint32_t start,uint32_t end,RangeList *list){
53+
for(int i=0;i<list->count;i++){
54+
if(!(end < list->ranges[i].start || start > list->ranges[i].end))
55+
return 1;
56+
}
57+
return 0;
58+
}
59+
60+
// format12 safe rebuild
61+
void rebuild_format12_safe(uint8_t *sub,RangeList *list){
62+
uint32_t n = r32(sub+12);
63+
uint8_t *grp = sub+16;
64+
uint32_t writeIndex=0;
65+
66+
for(uint32_t i=0;i<n;i++){
67+
uint8_t *g = grp + i*12;
68+
uint32_t start = r32(g);
69+
uint32_t end = r32(g+4);
70+
71+
if(should_delete(start,end,list)){
72+
continue;
73+
}
74+
75+
if(writeIndex != i)
76+
memcpy(grp+writeIndex*12,g,12);
77+
78+
writeIndex++;
79+
}
80+
81+
w32(sub+12,writeIndex);
82+
}
83+
84+
// format4 rebuild
85+
void rebuild_format4_safe(uint8_t *sub,RangeList *list){
86+
uint16_t segCount = r16(sub+6)/2;
87+
uint8_t *p = sub+14;
88+
89+
uint16_t *endCode = (uint16_t*)p;
90+
p += segCount*2;
91+
92+
p += 2; // reservedPad
93+
94+
uint16_t *startCode = (uint16_t*)p;
95+
p += segCount*2;
96+
97+
uint16_t *idDelta = (uint16_t*)p;
98+
p += segCount*2;
99+
100+
uint16_t *idRangeOffset = (uint16_t*)p;
101+
// glyphIdArray follows but we don't modify it
102+
103+
for(int i=0;i<segCount;i++){
104+
uint32_t start = r16((uint8_t*)&startCode[i]);
105+
uint32_t end = r16((uint8_t*)&endCode[i]);
106+
107+
if(start == 0xFFFF) continue;
108+
109+
if(should_delete(start,end,list)){
110+
w16((uint8_t*)&idDelta[i],0);
111+
w16((uint8_t*)&idRangeOffset[i],0);
112+
}
113+
}
114+
}
115+
116+
// fix checksumAdjustment in head
117+
void fix_font_checksum(uint8_t *buf,uint32_t size){
118+
uint16_t numTables=r16(buf+4);
119+
uint8_t *head=NULL;
120+
121+
for(int i=0;i<numTables;i++){
122+
uint8_t *rec=buf+12+i*16;
123+
if(!memcmp(rec,"head",4)){
124+
head=buf+r32(rec+8);
125+
break;
126+
}
127+
}
128+
129+
if(!head) return;
130+
131+
w32(head+8,0);
132+
uint32_t sum=checksum(buf,size);
133+
uint32_t adj=0xB1B0AFBA - sum;
134+
w32(head+8,adj);
135+
}
136+
137+
int main(int argc,char **argv){
138+
if(argc<3){
139+
printf("Usage: %s font.ttf U+XXXX-U+YYYY\n",argv[0]);
140+
return 1;
141+
}
142+
143+
RangeList list;
144+
list.count=1;
145+
sscanf(argv[2],"U+%x-U+%x",&list.ranges[0].start,&list.ranges[0].end);
146+
147+
int fd=open(argv[1],O_RDWR);
148+
struct stat st;
149+
fstat(fd,&st);
150+
151+
uint8_t *buf=mmap(NULL,st.st_size,PROT_READ|PROT_WRITE,MAP_SHARED,fd,0);
152+
153+
uint16_t numTables=r16(buf+4);
154+
uint32_t cmap_offset=0;
155+
uint8_t *cmap_record=NULL;
156+
157+
for(int i=0;i<numTables;i++){
158+
uint8_t *rec=buf+12+i*16;
159+
if(!memcmp(rec,"cmap",4)){
160+
cmap_offset=r32(rec+8);
161+
cmap_record=rec;
162+
}
163+
}
164+
165+
uint16_t numSub=r16(buf+cmap_offset+2);
166+
for(int i=0;i<numSub;i++){
167+
uint8_t *rec=buf+cmap_offset+4+i*8;
168+
uint32_t off=r32(rec+4);
169+
uint8_t *sub=buf+cmap_offset+off;
170+
uint16_t format=r16(sub);
171+
172+
if(format==12)
173+
rebuild_format12_safe(sub,&list);
174+
if(format==4)
175+
rebuild_format4_safe(sub,&list);
176+
}
177+
178+
uint32_t cmap_len=r32(cmap_record+12);
179+
uint32_t sum=checksum(buf+cmap_offset,cmap_len);
180+
w32(cmap_record+4,sum);
181+
182+
fix_font_checksum(buf,st.st_size);
183+
184+
msync(buf,st.st_size,MS_SYNC);
185+
munmap(buf,st.st_size);
186+
close(fd);
187+
188+
printf("SAFE CMAP REBUILD COMPLETE\n");
189+
return 0;
190+
}

0 commit comments

Comments
 (0)