编辑代码

import os
import requests
import datetime
# import pytz
# import schedule
import time

url = "https://api.followin.io/feed/list/recommended/news"
# url = "https://api.binance.com/api/v3/klines?symbol=BTCUSDT&interval=5m&limit=20"
Data_folder = "/www/wwwroot/llmweb/xinwen1lian"
Data_folder2 = "/www/pachongFile/followinFile"

headers = {
    "Accept": "application/json, text/plain, */*",
    "Content-Type": "application/json",
    "Origin": "https://followin.io",
    "Referer": "https://followin.io/",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"
}

def write(file_path,content):  #传入写入的路径和写入的json内容,将内容写入txt文档,然后关闭文件。
    with open(file_path,"a",encoding="UTF-8") as fp:
        for f in content["data"]["list"]:
            fp.write(f["title"]+"\n")
            fp.write("时间:" + datetime.datetime.fromtimestamp(f["publish_time"] / 1000).strftime(
                "%Y-%m-%d %H:%M") + "\n")
            fp.write(f["content"].replace("\n", "")+"\n\n")
    fp.close()

def param(page_index):
    payload = {
        "count": 20,
        # "last_cursor": "7888504",  # 爬虫的第一个页面没有上一个内容id
        "last_source": "algo",
        "only_important": False,
        "page": page_index
    }
    if page_index != 1:
        payload["last_cursor"]=last_cursor
        return payload
    return payload
def run():
    payload = param(page_index)
    response_content = requests.post(url,json=payload,headers=headers)
    content = response_content.json()
    write(file_path,content)  ##调用写入函数,将内容写入到指定路径
    write(file_path2,content)
    last_cursor = content["data"]["last_cursor"]
    return last_cursor
    
f_path = os.path.join(Data_folder,"begin.txt")
with open(f_path,"a",encoding="UTF-8") as fp:
    fp.write("hello")
    
while True:  #每隔15分钟运行一次。
    file_path = os.path.join(Data_folder,"followin{}.txt".format(datetime.datetime.now().strftime("%Y%m%d%H%M%S")))   #调用文档的地方
    file_path2 = os.path.join(Data_folder2,"followin{}.txt".format(datetime.datetime.now().strftime("%Y%m%d%H%M%S")))  #存内容的地方
    page_index = 1
    #开始运行爬虫程序
    for index in range(1,21):  #定义抓取最近多少页的数据
        last_cursor = run()
        page_index += 1  #从第一页开始
    if f_path:
        os.remove(f_path)  #新内容爬取成功以后,删除旧文档
    f_path = file_path  # 将最新的文档路径和名称,赋予到中间变量
    time.sleep(60*60)  #休息30分钟