Skip to content

Commit 0fe945f

Browse files
committed
feat: auto writer
1 parent 681169a commit 0fe945f

1 file changed

Lines changed: 119 additions & 0 deletions

File tree

writer.py

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
from openai import OpenAI
2+
from bs4 import BeautifulSoup
3+
import requests
4+
import random
5+
import time
6+
import datetime
7+
import os
8+
9+
path_to = f'src/content/blog/{datetime.datetime.now().strftime("%Y-%m-%d")}'
10+
11+
if os.path.exists(path_to):
12+
print("Article already generated today.")
13+
exit(0)
14+
else:
15+
os.makedirs(path_to, exist_ok=True)
16+
print(f"Created directory {path_to}")
17+
18+
start = time.time()
19+
print("Connecting to LLM API ...")
20+
deepseek = OpenAI(base_url="https://api.deepseek.com", api_key=os.environ.get("DS_APIKEY$"))
21+
print(f"Initialized LLM API. ({time.time() - start:.1f}s)")
22+
23+
def generate(context, provider, model):
24+
completion = provider.chat.completions.create(
25+
model=model,
26+
messages=context
27+
)
28+
return completion.choices[0].message.content.strip()
29+
30+
def scrape_website(url, css_selector):
31+
response = requests.get(url)
32+
if response.status_code == 200:
33+
soup = BeautifulSoup(response.content, "html.parser")
34+
elements = soup.select(css_selector)
35+
return elements
36+
else: return []
37+
38+
topics = [topic.get_text(strip=True) for topic in scrape_website("https://news.ycombinator.com/", ".titleline")]
39+
topics_text = "\n".join(random.choices(topics, k=random.randint(5, len(topics))))
40+
print(f"Scraped {len(topics)} topics from Hacker News.")
41+
42+
def extract_topic(topics):
43+
global deepseek
44+
return generate([
45+
{"role": "system", "content": "你在为一篇技术博客确定一个主题。直接用中文输出主题。"},
46+
{"role": "user", "content": f"阅读以下是 HackerNews 的热门文章,然后写一个可以用于技术博客的主题。这个主题应当是一个通用、普通的技术,不能是一个事件或其它东西。\n\n{topics}\n\n只需要一个主题,直接输出。"},
47+
], deepseek, "deepseek-chat")
48+
49+
def outline(topic):
50+
global deepseek
51+
return generate([
52+
{"role": "user", "content": f"我要写一篇关于「{topic}」的博客文章。帮我列一个详细的文章提纲。"}
53+
], deepseek, "deepseek-reasoner")
54+
55+
def write_from_outline(outline):
56+
global deepseek
57+
return generate([
58+
{"role": "user", "content": f"{outline}\n\n根据这个提纲中关于技术知识的部分,写出一篇技术博客文章。文章中避免出现图片,避免使用列表。每一段出现的代码都进行较为详细的解读。在讲述内容时尽量使用段落的语言,语言风格可以略偏专业,但保持清晰。使用markdown输出,使用latex公式,标题尽量只用一级标题 `#` 和二级标题 `##`,不要用分割线。直接输出正文。"}
59+
], deepseek, "deepseek-reasoner")
60+
61+
def summary(article):
62+
global deepseek
63+
return generate([
64+
{"role": "system", "content": "你是一个技术博客简介写作者,简介不一定需要涵盖文章的全部内容,能起到一定的提示作用即可。直接输出简介。"},
65+
{"role": "user", "content": f"给这篇文章写一个15字的简短介绍:\n\n{article}"}
66+
], deepseek, "deepseek-chat")
67+
68+
start = time.time()
69+
print("Generating topic ...")
70+
topic = extract_topic(topics_text)
71+
print(f"Determined topic ({time.time() - start:.1f}s): {topic}")
72+
73+
start = time.time()
74+
print("Generating outline ...")
75+
outline_result = outline(topic)
76+
print(f"Outline generated ({time.time() - start:.1f}s).")
77+
78+
start = time.time()
79+
print("Generating article ...")
80+
article = write_from_outline(outline_result)
81+
print(f"Article generated ({time.time() - start:.1f}s).")
82+
83+
start = time.time()
84+
print("Generating summary ...")
85+
summary_result = summary(article)
86+
print(f"Summary ({time.time() - start:.1f}s): {summary_result}")
87+
88+
lines = iter(article.split("\n"))
89+
markdown_file = ""
90+
author = random.choice(["杨其臻", "杨子凡", "叶家炜", "黄京"])
91+
92+
for line in lines:
93+
if line.startswith("# "):
94+
title = line[1:].strip()
95+
print(f"Detected title: {title}")
96+
97+
metadata = "\n".join([
98+
"---",
99+
f'title: "{title}"',
100+
f'author: "{author}"',
101+
f'date: "{datetime.datetime.now().strftime("%b %d, %Y")}"',
102+
f'description: "{summary_result}"',
103+
f'latex: true',
104+
f'pdf: true',
105+
"---",
106+
]) + "\n"
107+
print(f"Injecting metadata:\n{metadata.strip()}")
108+
109+
markdown_file += metadata
110+
break
111+
112+
for line in lines:
113+
if line.startswith("---"): continue
114+
markdown_file += line + "\n"
115+
116+
with open(f"{path_to}/index.md", "w", encoding="utf-8") as f:
117+
f.write(markdown_file)
118+
119+
print(f"Markdown file generated at {path_to}/index.md")

0 commit comments

Comments
 (0)