본문 바로가기

JS/NodeJS

취업 사이트 크롤링 Crawling 조회

Mongodb로 저장한 값을 조회한다.

https://github.com/copyNdpaste/recruitment-information_Node-JS


index.ejs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
<!DOCTYPE html>
<html lang="ko">
  <head>
      <%include template/head.ejs%>
  </head>
  <body id="page-top">
    <%include template/nav.ejs%>
    <div class="container-fluid p-0">
        <%include template/about.ejs%>
        <%include template/howtouse.ejs%>
        <%include template/ITnewbie.ejs%><!--신입-->
    </div>
    <%include template/forjs%>
  </body>
</html>
cs


ITnewbie.ejs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
<section class="recruit-section p-3 p-lg-5 d-flex flex-column" id="ITnewbie">
<div class="my-auto">
  <h1 class="mb-0"><span class="text-primary">IT Rookie</span> Recruitment Info</h1>
  <div class="recruit-item d-flex flex-column flex-md-row mb-5">
    <div class="recruit-content mr-auto">
      <h3 class="mb-0">IT Developer</h3>
      <div class="subheading mb-3">for rookie</div>
        <p>
            <input class="btn btn-primary" type="button" data-toggle="collapse" data-target="#collapse" aria-expanded="false" aria-controls="multiCollapseExample1" id="itnewbie" value="모든 정보 보기"/>
        </p>
        <p>
          <input class="search" value="">
          <button class="btn btn-primary" type="button" data-toggle="collapse" data-target="#collapse" aria-expanded="false" aria-controls="multiCollapseExample2" id="searchbtn">검색</button>
        </p>
        <div class="row">
            <div class="col">
                <div class="collapse multi-collapse" id="collapse">
                    <div class="card card-body">
                        <div id="ITnewbieInfo">
 
                        </div>
                    </div>
                </div>
            </div>
        </div>
    </div>
  </div>
</div>
</section>
cs

나머지 ejs 생략.. github에 있음


ajaxITnewbie.js


페이지에서 버튼이 클릭되면 Mongodb로부터 값을 가져와서 뿌려준다.

버튼이 눌리면 ajax를 실행한다. url에는 path를 지정해주고 해당 path에 맞는 함수가 실행된다. 그 함수들은 아래 findjob.js 파일에 있다.


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
$("#itnewbie").on('click',function(req,res){
    console.log("itnewbie 버튼 클릭");
    if($(this).attr('aria-expanded')==='true'){
        $(this).val('모든 정보 보기');
    }else{
        $(this).val('접기');
    }
    $.ajax({
        url:'/process/ITnewbie',
        type:'post',
        //dataType:'json',
        //data:{hi:'hi'},
        //contentType:'application/json',
        //jsonpCallback:'callback',
        success:function(results,success){
            console.log(success);
            var len=Object.keys(results).length;
            var ITnewbieInfo=
                '<table class="table table-sm">'
                    +'<thead>'
                    +'<tr>'
                        +'<th scope="col">기업</th>'
                        +'<th scope="col">제목</th>'
                        +'<th scope="col">경력/학력/지역</th>'
                        +'<th scope="col">마감</th>'
                        +'<th scope="col">출처</th>'
                    +'</tr>'
                    +'</thead>'
                    +'<tbody>';
                           for(var i=0;i<len;i++){
                               if(results[i].title){
                                   ITnewbieInfo+='<tr>'
                                       +'<td>'+results[i].companyname+'</td>'
                                       +'<td><a href="'+results[i].link+'" target="_blank">'+results[i].title+'</a>'
                                        +'<p class="field">';
                                        for(var j=0;j<results[i].field.length;j++){
                                            ITnewbieInfo+=results[i].field[j]+'/';
                                        }
                                        +'</p>'
                                       ITnewbieInfo+='</td><td class="else">'+results[i].career+'/'+results[i].levOfEdu+'/'+results[i].area+'</td>'
                                       +'<td class="deadline">'+results[i].deadline+'</td>'
                                       +'<td class="from">'+results[i].from+'</td>'
                                        +'</tr>';
                               }
                           }
                        ITnewbieInfo+=
                    '</tbody>'
                +'</table>'
                ;
            $("#ITnewbieInfo").html(ITnewbieInfo);
            console.log(results[0].foldername);
            console.log(results);
        },
        error:function(err){
            console.log('err:',err);
        }
    })
});
var aria_ex=function(){
    console.log('aria_ex 호출');
    //$("#searchbtn").attr('aria-expanded','true');
    $("#searchbtn").click();
}
$("#searchbtn").on('click',function(req,res){
    console.log("searchbtn 버튼 클릭");
    var keyword=$('.search').val();
    console.log('input value:',keyword);
    if($(this).attr('aria-expanded')==='true'){
        console.log('aria-expanded true로');
        setTimeout(aria_ex,400);
        $('#itnewbie').val('모든 정보 보기');
    }else{
        $('#itnewbie').val('접기');
    }
    $.ajax({
        url:'/process/ITnewbieSearch',
        type:'post',
        //dataType:'json',
        data:{keyword:keyword},
        //contentType:'application/json',
        //jsonpCallback:'callback',
        success:function(results,success){
            console.log(success);
            var len=Object.keys(results).length;
            var ITnewbieInfo=
                '<table class="table table-sm">'
                    +'<thead>'
                    +'<tr>'
                        +'<th scope="col">기업</th>'
                        +'<th scope="col">제목</th>'
                        +'<th scope="col">경력/학력/지역</th>'
                        +'<th scope="col">마감</th>'
                        +'<th scope="col">출처</th>'
                    +'</tr>'
                    +'</thead>'
                    +'<tbody>';
                           for(var i=0;i<len;i++){
                               if(results[i].title){
                                   ITnewbieInfo+='<tr>'
                                       +'<td>'+results[i].companyname+'</td>'
                                       +'<td><a href="'+results[i].link+'" target="_blank">'+results[i].title+'</a>'
                                        +'<p class="field">';
                                        for(var j=0;j<results[i].field.length;j++){
                                            ITnewbieInfo+=results[i].field[j]+'/';
                                        }
                                        +'</p>'
                                       ITnewbieInfo+='</td><td class="else">'+results[i].career+'/'+results[i].levOfEdu+'/'+results[i].area+'</td>'
                                       +'<td class="deadline">'+results[i].deadline+'</td>'
                                       +'<td class="from">'+results[i].from+'</td>'
                                        +'</tr>';
                               }
                           }
                        ITnewbieInfo+=
                    '</tbody>'
                +'</table>';
            $("#ITnewbieInfo").html(ITnewbieInfo);
            console.log(results);
        },
        error:function(err){
            console.log('err:',err);
        }
    })
});
cs


findjob.js


path가 매칭되는 함수가 실행되고 mongodb에 접근해서 데이터를 추출한다. 첫번째 함수는 find. 전체 다 검색

두번째 함수는 사용자가 검색한 것을 출력하도록 했다. aggregate를 써서 pipeline을 만들었다. field, companyname 등과 일치하는 값을 출력한다. javascript 정규식은 new RegExp로 만든다. 입력된 keyword가 대소문자 구별 없이 검색되게 하려면 option으로 'i'를 써준다. 사용자가 DBA, dba, DbA 중 뭘 입력해도 검색이 된다.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
var ITnewbie=function(req,res){
    var database=req.app.get('database');
    console.log('ITnewbie 호출됨');
    database.JobModel.find({},function(err,results){
        console.log('hi');
        //res.send({results:results});
        res.send(results);
        
    });
}
var ITnewbieSearch=function(req,res){
    var database=req.app.get('database');
    var keyword=req.body.keyword; //Ajax에서 post 형식으로 data를 전달해 줌 
    console.log(keyword);
    console.log('ITnewbieSearch 호출됨');
    database.JobModel.aggregate([{$match:{$or:
                                          [
                                              {field:{$regex:new RegExp(keyword,'i')}},
                                              {companyname:{$regex:new RegExp(keyword,'i')}},
                                              {title:{$regex:new RegExp(keyword,'i')}},
                                              {career:{$regex:new RegExp(keyword,'i')}},
                                              {levOfEdu:{$regex:new RegExp(keyword,'i')}},
                                              {area:{$regex:new RegExp(keyword,'i')}},
                                              {deadline:{$regex:new RegExp(keyword,'i')}},
                                              {from:{$regex:new RegExp(keyword,'i')}}
                                          ]
                                         }}],function(err,results){
        res.send(results);
    });
 
}
 
module.exports.ITnewbie=ITnewbie;
module.exports.ITnewbieSearch=ITnewbieSearch;
cs




사이트 크롤링 -> http://oneshottenkill.tistory.com/311