본문 바로가기

PYTHON/자동화

알지오매스 지금까지 한 자동화코드 정리

728x90

(pandas) 알지오매스 로그인>검색>링크와 제목 csv에 저장

1. 알지오매스 로그인하고 문서만들기/도형만들기 클릭


import time

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
import time

driver = webdriver.Chrome('C:\chromedriver.exe')

time.sleep(0.5)
driver.get("https://www.algeomath.kr/login.do?returnUrl=%2Fmain.do")
#driver.execute_script('''window.open('https://www.algeomath.kr/login.do?returnUrl=%2Fmain.do',"blank");''')
driver.maximize_window()
time.sleep(0.5)

action = ActionChains(driver)




time.sleep(0.5)

#action.send_keys('아이디').perform()
driver.find_element_by_name('sui_user_id').send_keys('아이디')


time.sleep(0.5)
driver.find_element_by_name('sui_pass_word').send_keys('비밀번호')



driver.find_element_by_xpath('//*[@id="login-form-submit"]').click() #로그인 버튼클릭
time.sleep(0.5)
driver.find_element_by_xpath('//*[@id="header"]/div/div/div[2]/div/div[2]/a').click() #문서만들기 클릭
time.sleep(5)

#driver.find_element_by_xpath('//*[@id="openRcntFolder"]').click() # 왜 안눌러질까?
#driver.find_element_by_xpath('//*[@id="header"]/div/div/div[2]/div/div[1]/a').click() #도형만들기 클릭

1-2. 알지오매스 문서열기

왜 여기서는 되는데 로그인을 새로 해서 들어가면 안되는지 모르겠다

import time

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
import time

driver = webdriver.Chrome('C:\chromedriver.exe')

time.sleep(0.5)
driver.get("https://www.algeomath.kr/algeomath/doc/make.do")
#driver.execute_script('''window.open('https://www.algeomath.kr/login.do?returnUrl=%2Fmain.do',"blank");''')
driver.maximize_window()
time.sleep(0.5)

#action = ActionChains(driver)

driver.find_element_by_xpath('//*[@id="openRcntFolder"]').click()
time.sleep(0.5)

3.로그인 후 메인 검색창에 거북이 검색 후 제목 가져와서 csv파일로 만들기

import time

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from bs4 import BeautifulSoup
import pandas as pd

driver = webdriver.Chrome('C:\chromedriver.exe')

time.sleep(0.5)
driver.get("https://www.algeomath.kr/login.do?returnUrl=%2Fmain.do")
#driver.execute_script('''window.open('https://www.algeomath.kr/login.do?returnUrl=%2Fmain.do',"blank");''')
driver.maximize_window()
time.sleep(0.5)

#action = ActionChains(driver)

time.sleep(0.5)

#action.send_keys('아이디').perform()
driver.find_element_by_name('sui_user_id').send_keys('아이디')

time.sleep(0.5)
driver.find_element_by_name('sui_pass_word').send_keys('비밀번호')

driver.find_element_by_xpath('//*[@id="login-form-submit"]').click() #로그인 버튼클릭
time.sleep(0.5)

#거북이 검색
driver.find_element_by_id("keyword").send_keys("거북이")
ActionChains(driver).send_keys(Keys.ENTER).perform()

time.sleep(0.5)



req = driver.page_source
soup = BeautifulSoup(req, 'html.parser')
total = soup.select(".title")

title_list=[]

for title in total:
    temp=[]
    temp.append(title.text.strip())
    title_list.append(temp)


titleresults = pd.DataFrame(title_list)
titleresults.to_csv('title.csv')

print("csv파일 저장완료")

3.여러 페이지 읽어오기

페이지 수 사용자 입력받기 or 끝 페이지 알기 추가해야됨

import time

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains #마우스나 키보드 이용한 동작
from bs4 import BeautifulSoup
import pandas as pd

maximum = 0
page = 1


driver = webdriver.Chrome('C:\chromedriver.exe')

time.sleep(0.5)
driver.get("https://www.algeomath.kr/login.do?returnUrl=%2Fmain.do")

# 전체 화면
driver.maximize_window()


# 로그인
time.sleep(0.5)
driver.find_element_by_name('sui_user_id').send_keys('아이디')
time.sleep(0.5)
driver.find_element_by_name('sui_pass_word').send_keys('비밀번호')
driver.find_element_by_xpath('//*[@id="login-form-submit"]').click() #로그인 버튼클릭
time.sleep(0.5)


#거북이 검색
driver.find_element_by_id("keyword").send_keys("피타고라스")
ActionChains(driver).send_keys(Keys.ENTER).perform()

time.sleep(0.5)


# 다음 검색결과페이지 클릭
def go_next_page():
    next_btn = driver.find_element_by_xpath('//*[@id="contents"]/div/nav[2]/ul/li[13]/a')
    ActionChains(driver).click(next_btn).perform()


title_list=[]



for i in range(1,6):
    source = driver.page_source
    soup = BeautifulSoup(source, 'html.parser')

    go_next_page()

    total = soup.select(".title")


    for title in total:
        temp=[]
        temp.append(title.text.strip())
        title_list.append(temp) 
        print(temp)
        print('\n')

"""
while 1:
    page_list = soup.findAll("a",{"onclick":"selectPage_func("+str(page)+")"})
    if not page_list:
        maximum = page-1
        break
    page = page + 1
print("총"+str(maximum)+"개의 페이지")
"""


titleresults = pd.DataFrame(title_list)
titleresults.to_csv('title.csv')

print("csv파일 저장완료")

4.링크도 csv에 넣기 (가장 완성도 있음)

  1. 전체화면 키우기
  2. 알지오매스 로그인/비번 치고 로그인버튼 누르기
  3. 홈화면에서 검색하기
  4. 페이지를 넘겨가며 제목과 링크 csv로 저장하기

문제는 검색결과를 다음페이지로 넘어가는 버튼의 값이 달라짐

  • 링크에서 pgno숫자와 키워드를 바꿔 이용해야하나
  • 전체 검색결과의 갯수/10 하면 페이지의 수는 알 수 있을 것 같은데 값을 못 얻겠음ㅜㅜ
  • csv열었을 때 맨위의 행이 왜 (,0) 이라고 뜨는지 모르겠음
import time

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains #마우스나 키보드 이용한 동작
from bs4 import BeautifulSoup
import pandas as pd

maximum = 0
page = 1


driver = webdriver.Chrome('C:\chromedriver.exe')

time.sleep(0.5)
driver.get("https://www.algeomath.kr/login.do?returnUrl=%2Fmain.do")

# 전체 화면
driver.maximize_window()


# 로그인
time.sleep(0.5)
driver.find_element_by_name('sui_user_id').send_keys('아이디')
time.sleep(0.5)
driver.find_element_by_name('sui_pass_word').send_keys('비밀번호')
driver.find_element_by_xpath('//*[@id="login-form-submit"]').click() #로그인 버튼클릭
time.sleep(0.5)


#거북이 검색
driver.find_element_by_id("keyword").send_keys("피타고라스")
ActionChains(driver).send_keys(Keys.ENTER).perform()

time.sleep(0.5)


# 다음 검색결과페이지 클릭
def go_next_page():
    next_btn = driver.find_element_by_xpath('//*[@id="contents"]/div/nav[2]/ul/li[13]/a')
    ActionChains(driver).click(next_btn).perform()


title_list=[]
onclick_list=[]

for i in range(1,6):
    source = driver.page_source
    soup = BeautifulSoup(source, 'html.parser')

    go_next_page()

    titleclass = soup.select(".title>a")


    for title in titleclass:
        temp=[]
        temp.append(title.text.strip())
        title_list.append(temp) 
        onclick_list.append("https://www.algeomath.kr/algeomath/app/key/"+title.get('onclick').split('\'')[3]+"/view.do")
        print(temp)
        print('\n')


titleresults = pd.DataFrame(title_list,onclick_list)
titleresults.to_csv('title.csv')

print("csv파일 저장완료")

4.2 csv파일 읽어오기

'''    
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains #마우스나 키보드 이용한 동작
from bs4 import BeautifulSoup
maximum = 0
page = 1


driver = webdriver.Chrome('C:\chromedriver.exe')

time.sleep(0.5)
driver.get("https://www.algeomath.kr/login.do?returnUrl=%2Fmain.do")

# 전체 화면
driver.maximize_window()


# 로그인
time.sleep(0.5)
driver.find_element_by_name('sui_user_id').send_keys('아이디')
time.sleep(0.5)
driver.find_element_by_name('sui_pass_word').send_keys('비밀번호')
driver.find_element_by_xpath('//*[@id="login-form-submit"]').click() #로그인 버튼클릭
time.sleep(0.5)


#거북이 검색
driver.find_element_by_id("keyword").send_keys("피타고라스")
ActionChains(driver).send_keys(Keys.ENTER).perform()

time.sleep(0.5)


# 다음 검색결과페이지 클릭
def go_next_page():
    next_btn = driver.find_element_by_xpath('//*[@id="contents"]/div/nav[2]/ul/li[13]/a')
    ActionChains(driver).click(next_btn).perform()


title_list=[]
onclick_list=[]

for i in range(1,6):
    source = driver.page_source
    soup = BeautifulSoup(source, 'html.parser')

    go_next_page()

    titleclass = soup.select(".title>a")


    for title in titleclass:
        temp=[]
        temp.append(title.text.strip())
        title_list.append(temp) 
        onclick_list.append("https://www.algeomath.kr/algeomath/app/key/"+title.get('onclick').split('\'')[3]+"/view.do")
        print(temp)
        print('\n')



#titleresults = pd.DataFrame(title_list,onclick_list)

#titleresults.to_csv('title.csv')
f=open('title.csv','w',encoding='utf-8',newline='')
wr = csv.writer(f)
wr.writerow([1,title_list])
wr.writerow([2,onclick_list])
f.close()


print("csv파일 저장완료")

'''

위에는 다른 방법으로 csv저장하려했는데 행,열이 맞지않음
밑에는 그냥 csv파일 읽어오는 코드

import csv
import pandas as pd

f=open('title.csv','r',encoding='utf-8')
rdr = csv.reader(f)
for line in rdr:
    print(line)
f.close()

4-3. 검색어 입력

"검색어를 입력해주세요(두글자 이상)":

import time

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains #마우스나 키보드 이용한 동작
from bs4 import BeautifulSoup
import pandas as pd

maximum = 0
page = 1

#keyword_input = input('검색어를 입력해주세요(두글자 이상): ')

driver = webdriver.Chrome('C:\chromedriver.exe')

time.sleep(0.5)
driver.get("https://www.algeomath.kr/login.do?returnUrl=%2Fmain.do")

# 전체 화면
driver.maximize_window()


# 로그인
time.sleep(0.5)
driver.find_element_by_name('sui_user_id').send_keys('아이디')
time.sleep(0.5)
driver.find_element_by_name('sui_pass_word').send_keys('비밀번호')
driver.find_element_by_xpath('//*[@id="login-form-submit"]').click() #로그인 버튼클릭
time.sleep(0.5)


#거북이 검색
driver.find_element_by_id("keyword").send_keys(keyword_input)
ActionChains(driver).send_keys(Keys.ENTER).perform()

time.sleep(0.5)


# 다음 검색결과페이지 클릭
def go_next_page():
    next_btn = driver.find_element_by_xpath('//*[@id="contents"]/div/nav[2]/ul/li[13]/a')
    ActionChains(driver).click(next_btn).perform()


title_list=[]
onclick_list=[]

for i in range(0,3):
    source = driver.page_source
    soup = BeautifulSoup(source, 'html.parser')


    titleclass = soup.select(".title>a")


    for title in titleclass:
        temp=[]
        temp.append(title.text.strip())
        title_list.append(temp) 
        onclick_list.append("https://www.algeomath.kr/algeomath/app/key/"+title.get('onclick').split('\'')[3]+"/view.do")
        print(temp)
        print('\n')

    go_next_page()

titleresults = pd.DataFrame(title_list,onclick_list)
titleresults.to_csv(keyword_input+'.csv')

print("csv파일 저장완료")

4-4 페이지 끝까지 찾으려고 시도

import time

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains #마우스나 키보드 이용한 동작
from bs4 import BeautifulSoup
import pandas as pd

maximum = 0
page = 1


driver = webdriver.Chrome('C:\chromedriver.exe')

time.sleep(0.5)
driver.get("https://www.algeomath.kr/login.do?returnUrl=%2Fmain.do")

# 전체 화면
driver.maximize_window()


# 로그인
time.sleep(0.5)
driver.find_element_by_name('sui_user_id').send_keys('아이디')
time.sleep(0.5)
driver.find_element_by_name('sui_pass_word').send_keys('비밀번호')
driver.find_element_by_xpath('//*[@id="login-form-submit"]').click() #로그인 버튼클릭
time.sleep(0.5)


#거북이 검색
driver.find_element_by_id("keyword").send_keys("피타고라스")
ActionChains(driver).send_keys(Keys.ENTER).perform()

time.sleep(0.5)


# 다음 검색결과페이지 클릭
def go_next_page():
    next_btn = driver.find_element_by_xpath('//*[@id="contents"]/div/nav[2]/ul/li[13]/a')
    ActionChains(driver).click(next_btn).perform()


title_list=[]
onclick_list=[]

source = driver.page_source
soup = BeautifulSoup(source, 'html.parser')

result_count=soup.select('.nav-link app active')
print(result_count)
#print(result_count.split("[\\(\\)]"))

for i in range(1,6):
    source = driver.page_source
    soup = BeautifulSoup(source, 'html.parser')

    go_next_page()

    titleclass = soup.select(".title>a")


    for title in titleclass:
        temp=[]
        temp.append(title.text.strip())
        title_list.append(temp) 
        onclick_list.append("https://www.algeomath.kr/algeomath/app/key/"+title.get('onclick').split('\'')[3]+"/view.do")
       # print(temp)
       # print('\n')


titleresults = pd.DataFrame(title_list,onclick_list)
titleresults.to_csv('title.csv')

print("csv파일 저장완료")

modoom 모든 글 csv로 저장하기_pandas (가장 완성도 있음2)

import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains #마우스나 키보드 이용한 동작
from bs4 import BeautifulSoup
import pandas as pd

SCROLL_PAUSE_TIME = 2

titlelist=[]
namelist=[]
linklist=[]    

driver = webdriver.Chrome('C:\chromedriver.exe')
time.sleep(0.5)
driver.get("https://www.algeomath.kr/modoom/4f21351254834cdd94f4af5e4cacb6ce/main.do") # 모둠링크
driver.maximize_window()# 전체 화면



# 로그인
# time.sleep(0.5)
# driver.find_element_by_name('sui_user_id').send_keys('아이디')
# time.sleep(0.5)
# driver.find_element_by_name('sui_pass_word').send_keys('비밀번호')
# driver.find_element_by_xpath('//*[@id="login-form-submit"]').click() #로그인 버튼클릭
# time.sleep(0.5)


def crawling():
    source = driver.page_source
    soup = BeautifulSoup(source, 'html.parser')
    title_select=soup.select('.title>a')
    name_select=soup.select('.name>a')

    for titles in title_select:
        title=(titles.text.strip())
        titlelist.append(title)
#         print(title)
        link=("https://www.algeomath.kr/algeomath/app/key/"+titles.get('onclick').split('\'')[3]+"/view.do")
#         print(link)
        linklist.append(link)

    for names in name_select:
        name=(names.text.strip())
        namelist.append(name)



def scroll_down_end():
    last_height = driver.execute_script("return document.body.scrollHeight")

    while True:
        # 끝까지 스크롤 내리기
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

        # 쉬기
        time.sleep(SCROLL_PAUSE_TIME)

        # 불러올 스크롤이 없으면 그만하기
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height

scroll_down_end()
crawling()

print(titlelist)
print(linklist)
print(namelist)


# 모둠 검색 (input)
# 검색한 모둠 목록 나오고
# 그 중에 모둠 선택 (input)
# 그 모둠 안에 있는 문서 링크&제목&작성자 가져오기


# 왜 namelist은 안들어가지?ㅠㅠ
results = pd.DataFrame(titlelist,linklist)
# results.columns=['title','link']
results.to_csv('modoom_pandas.csv')

print("csv파일 저장완료")

# data=pd.read_scv('modoom_pandas.csv')
# data