@@ -553,13 +553,46 @@ def process_html(self, title, display_name, ocw_xml, seq, handle_broken_xml=Fals
553
553
vert = etree .SubElement (seq ,'vertical' )
554
554
vert .set ('display_name' ,dn )
555
555
self .add_contents_to_vert (a , vert )
556
+ self .process_edx_xml_for_local_pdf_links (vert , seq )
556
557
else :
557
558
self .do_href (p )
558
559
intro .append (p )
559
560
if len (intro )== 0 :
560
561
intro .getparent ().remove (intro ) # remove intro if empty
561
562
else :
562
563
self .process_html_intro_for_table_of_pdf_files (intro , seq )
564
+ self .process_edx_xml_for_local_pdf_links (intro , seq )
565
+
566
+ def process_edx_xml_for_local_pdf_links (self , xml , seq ):
567
+ '''
568
+ Process an edX XML block, and see if any links are local PDF files
569
+ which haven't yet been processed. For each such link, generate
570
+ a PDF vertical with an embedded PDF viewer.
571
+ '''
572
+ n_found = 0
573
+ n_added = 0
574
+ n_links = 0
575
+ dn = xml .get ('display_name' )
576
+ for aelem in xml .findall ('.//a' ):
577
+ n_links += 1
578
+ href = aelem .get ('href' )
579
+ print " link: %s (%s)" % (aelem .text , href )
580
+ if not href :
581
+ continue
582
+ if not href .lower ().endswith (".pdf" ):
583
+ continue
584
+ if href .startswith ("http" ):
585
+ continue
586
+ n_found += 1
587
+ if aelem .get ('pdf_processed' )== 1 :
588
+ continue
589
+ title = aelem .text or ("File %s" % os .path .basename (href ))
590
+ self .add_pdf_vertical (title , href , aelem , seq )
591
+ aelem .set ("pdf_processed" , "1" ) # so it isn't done again
592
+ n_added += 1
593
+ if 1 or n_found :
594
+ print " [%s] Found %s links, %s are local PDF, %d new ones added as vertical pages" % (dn , n_links , n_found , n_added )
595
+
563
596
564
597
def process_html_intro_for_table_of_pdf_files (self , intro_xml , seq ):
565
598
'''
@@ -601,6 +634,7 @@ def process_html_intro_for_table_of_pdf_files(self, intro_xml, seq):
601
634
href = aelem .get ('href' )
602
635
if href and href .lower ().endswith ("pdf" ):
603
636
self .add_pdf_vertical (rowtext , href , aelem , seq )
637
+ aelem .set ("pdf_processed" , "1" ) # so it isn't done again
604
638
nadded += 1
605
639
summary = table .get ('summary' )
606
640
print " Found table '%s' of PDFs, with %d rows: added %d pdf vertical pages" % (summary , nrows , nadded )
0 commit comments